[SOMA][BOT-3][INFRA] skeleton: project folder structure and all package manifests

2026-03-23 01:13:11 +05:30
parent 00e314eb89
commit f857845dd2
18 changed files with 1024 additions and 0 deletions
--- a/soma/scripts/admin_recovery.py
+++ b/soma/scripts/admin_recovery.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python3
+"""
+admin_recovery.py
+
+Emergency TOTP reset tool. Run ONLY via SSH on the EC2 instance.
+This script is NOT exposed via any HTTP endpoint.
+It resets the TOTP secret for the admin account, prints a new QR code
+and backup codes, which must be scanned in Google Authenticator.
+
+Usage (on EC2):
+    docker compose exec backend python /app/scripts/admin_recovery.py
+
+Or directly:
+    ssh ubuntu@your-ec2-ip
+    cd ~/soma
+    docker compose exec backend python /app/scripts/admin_recovery.py
+"""
+
+import asyncio
+import sys
+import os
+import secrets
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+
+async def reset_totp():
+    from app.database import AsyncSessionLocal
+    from app.models.user import User
+    from app.config import settings
+    from sqlalchemy import select
+    import pyotp
+    import qrcode
+    import io
+
+    async with AsyncSessionLocal() as db:
+        # Find the admin user (single-tenant: only one user exists)
+        result = await db.execute(select(User).limit(1))
+        user = result.scalar_one_or_none()
+
+        if not user:
+            print("ERROR: No user found in database. Run the setup flow first.")
+            sys.exit(1)
+
+        print(f"\nResetting TOTP for: {user.email}")
+        print("=" * 60)
+
+        # Confirm action
+        confirm = input("Type 'RESET' to confirm TOTP reset: ").strip()
+        if confirm != "RESET":
+            print("Aborted.")
+            sys.exit(0)
+
+        # Generate new TOTP secret
+        new_secret = pyotp.random_base32()
+
+        # Generate new backup codes (8 codes, 8 chars each)
+        backup_codes = [secrets.token_hex(4).upper() for _ in range(8)]
+
+        # Generate QR code URL
+        totp = pyotp.TOTP(new_secret)
+        provisioning_uri = totp.provisioning_uri(
+            name=user.email,
+            issuer_name="SOMA"
+        )
+
+        # Print QR code as ASCII art (requires 'qrcode' library)
+        try:
+            qr = qrcode.QRCode(version=1, box_size=1, border=1)
+            qr.add_data(provisioning_uri)
+            qr.make(fit=True)
+            print("\nScan this QR code with Google Authenticator / Authy:")
+            print("=" * 60)
+            qr.print_ascii(invert=True)
+            print("=" * 60)
+        except Exception:
+            print(f"\nProvisioning URI (paste into authenticator app):")
+            print(provisioning_uri)
+
+        print(f"\nManual entry secret: {new_secret}")
+        print("\nBACKUP CODES (save these offline — one-time use):")
+        print("-" * 40)
+        for i, code in enumerate(backup_codes, 1):
+            print(f"  {i:2}. {code}")
+        print("-" * 40)
+
+        # Verify the new TOTP code before saving
+        print("\nEnter a TOTP code from your authenticator app to verify setup:")
+        test_code = input("6-digit code: ").strip()
+        if not totp.verify(test_code, valid_window=1):
+            print("ERROR: TOTP code verification failed. TOTP NOT reset.")
+            sys.exit(1)
+
+        # Save to database
+        user.totp_secret = new_secret
+        user.totp_backup_codes = backup_codes  # stored as plain text; hash in production if desired
+
+        # Invalidate all existing sessions
+        from app.models.user import AuthSession
+        from sqlalchemy import update
+        await db.execute(
+            update(AuthSession)
+            .where(AuthSession.user_id == user.id)
+            .values(is_active=False)
+        )
+
+        await db.commit()
+
+        print("\nTOTP reset successful. All existing sessions have been invalidated.")
+        print("Log in at your SOMA URL to start a new session.")
+
+
+if __name__ == '__main__':
+    asyncio.run(reset_totp())
--- a/soma/scripts/backup_db.sh
+++ b/soma/scripts/backup_db.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# backup_db.sh
+# Dumps SOMA PostgreSQL database, compresses it, uploads to S3 with timestamp.
+# Run inside the backend container or on the EC2 host with Docker access.
+#
+# Required environment variables (from .env):
+#   POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_DB
+#   AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_REGION
+#   S3_BACKUP_BUCKET (set this in .env — not in TID default, add it)
+#
+# Usage: bash /app/scripts/backup_db.sh
+
+set -euo pipefail
+
+TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
+BACKUP_DIR="/tmp/soma_backups"
+FILENAME="somadb_${TIMESTAMP}.sql.gz"
+FILEPATH="${BACKUP_DIR}/${FILENAME}"
+
+# Defaults if env not set
+POSTGRES_USER="${POSTGRES_USER:-soma}"
+POSTGRES_PASSWORD="${POSTGRES_PASSWORD:-somapass}"
+POSTGRES_DB="${POSTGRES_DB:-somadb}"
+POSTGRES_HOST="${POSTGRES_HOST:-postgres}"
+S3_BUCKET="${S3_BACKUP_BUCKET:-soma-backups}"
+
+mkdir -p "${BACKUP_DIR}"
+
+echo "[backup] Starting database dump at ${TIMESTAMP}"
+
+# Dump and compress
+PGPASSWORD="${POSTGRES_PASSWORD}" pg_dump \
+    -h "${POSTGRES_HOST}" \
+    -U "${POSTGRES_USER}" \
+    -d "${POSTGRES_DB}" \
+    --no-password \
+    --format=plain \
+    --clean \
+    --if-exists \
+    | gzip > "${FILEPATH}"
+
+FILESIZE=$(du -sh "${FILEPATH}" | cut -f1)
+echo "[backup] Dump complete: ${FILENAME} (${FILESIZE})"
+
+# Upload to S3
+if command -v aws &> /dev/null; then
+    S3_KEY="backups/${TIMESTAMP:0:6}/${FILENAME}"
+    aws s3 cp "${FILEPATH}" "s3://${S3_BUCKET}/${S3_KEY}" \
+        --sse AES256 \
+        --region "${AWS_REGION:-ap-south-1}"
+    echo "[backup] Uploaded to s3://${S3_BUCKET}/${S3_KEY}"
+else
+    echo "[backup] WARNING: aws CLI not found. Backup saved locally at ${FILEPATH}"
+    echo "[backup] Install aws CLI or add boto3 call to upload manually."
+fi
+
+# Keep only last 7 local backups
+find "${BACKUP_DIR}" -name "somadb_*.sql.gz" -mtime +7 -delete
+
+echo "[backup] Done."
--- a/soma/scripts/seed_from_markdown.py
+++ b/soma/scripts/seed_from_markdown.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+"""
+seed_from_markdown.py
+
+Reads a Markdown file, extracts all unchecked task items (- [ ] lines),
+parses optional metadata from the same line or the preceding heading,
+and inserts them as tasks into the SOMA database.
+
+Usage:
+    python seed_from_markdown.py --file /path/to/life_plan.md [--dry-run]
+
+Format recognized:
+    # Project Name
+    ## Sub-section
+    - [ ] Task title @tag #priority due:2026-04-01
+    - [x] Already done task (skipped)
+
+Metadata extraction:
+    @tag         → maps to project_tag (e.g., @openclaw, @mak)
+    #priority    → maps to priority (low|medium|high|critical)
+    due:YYYY-MM-DD → maps to due_date
+    Default tag: derived from nearest parent heading (slugified, max 50 chars)
+    Default priority: medium
+"""
+
+import argparse
+import re
+import sys
+import os
+import asyncio
+from datetime import date
+
+# Add parent directory to path so we can import app modules
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+
+
+def parse_markdown_tasks(filepath: str) -> list[dict]:
+    """Parse all unchecked [ ] tasks from a markdown file."""
+    tasks = []
+    current_tag = "general"
+    current_section = ""
+
+    with open(filepath, 'r', encoding='utf-8') as f:
+        lines = f.readlines()
+
+    for line in lines:
+        line = line.rstrip()
+
+        # Track headings for default tag
+        heading_match = re.match(r'^#{1,3}\s+(.+)$', line)
+        if heading_match:
+            heading_text = heading_match.group(1).strip()
+            # Slugify heading: lowercase, replace spaces/special chars with -
+            slug = re.sub(r'[^a-z0-9]+', '-', heading_text.lower()).strip('-')[:50]
+            # Map common heading words to known tags
+            tag_map = {
+                'openclaw': 'openclaw', 'mak': 'mak', 'rarity': 'rarity-media',
+                'japan': 'japan', 'germany': 'germany', 'personal': 'personal',
+                'habit': 'habits', 'retreat': 'retreat'
+            }
+            mapped = False
+            for keyword, tag in tag_map.items():
+                if keyword in heading_text.lower():
+                    current_tag = tag
+                    mapped = True
+                    break
+            if not mapped:
+                current_tag = slug if slug else "general"
+            current_section = heading_text
+            continue
+
+        # Match unchecked task: - [ ] or * [ ]
+        task_match = re.match(r'^\s*[-*]\s+\[ \]\s+(.+)$', line)
+        if not task_match:
+            continue
+
+        raw_title = task_match.group(1).strip()
+
+        # Extract metadata from title
+        tag = current_tag
+        priority = "medium"
+        due_date = None
+
+        # @tag
+        tag_match = re.search(r'@(\S+)', raw_title)
+        if tag_match:
+            tag = tag_match.group(1)
+            raw_title = raw_title.replace(tag_match.group(0), '').strip()
+
+        # #priority
+        priority_match = re.search(r'#(low|medium|high|critical)', raw_title, re.IGNORECASE)
+        if priority_match:
+            priority = priority_match.group(1).lower()
+            raw_title = raw_title.replace(priority_match.group(0), '').strip()
+
+        # due:YYYY-MM-DD
+        due_match = re.search(r'due:(\d{4}-\d{2}-\d{2})', raw_title)
+        if due_match:
+            try:
+                due_date = due_match.group(1)
+                date.fromisoformat(due_date)  # validate
+            except ValueError:
+                due_date = None
+            raw_title = raw_title.replace(due_match.group(0), '').strip()
+
+        # Clean up extra whitespace
+        title = re.sub(r'\s+', ' ', raw_title).strip()
+        if not title:
+            continue
+
+        # XP based on priority
+        xp_map = {'low': 25, 'medium': 50, 'high': 100, 'critical': 200}
+
+        tasks.append({
+            'title': title,
+            'description': f"Imported from: {current_section}" if current_section else None,
+            'status': 'backlog',
+            'priority': priority,
+            'project_tag': tag,
+            'due_date': due_date,
+            'xp_reward': xp_map[priority],
+            'position': 0,
+            'checklist': [],
+        })
+
+    return tasks
+
+
+async def insert_tasks(tasks: list[dict], dry_run: bool = False):
+    """Insert parsed tasks into the database."""
+    from app.database import AsyncSessionLocal
+    from app.models.task import Task
+    import uuid
+
+    if dry_run:
+        print(f"DRY RUN: Would insert {len(tasks)} tasks:")
+        for i, t in enumerate(tasks, 1):
+            print(f"  {i:3}. [{t['priority'].upper():8}] [{t['project_tag']:20}] {t['title']}")
+        return
+
+    async with AsyncSessionLocal() as db:
+        inserted = 0
+        skipped = 0
+        for i, task_data in enumerate(tasks):
+            # Set position based on insertion order within each status+tag
+            task_data['position'] = i
+            task = Task(id=uuid.uuid4(), **task_data)
+            db.add(task)
+            inserted += 1
+
+        await db.commit()
+        print(f"Inserted {inserted} tasks, skipped {skipped}.")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Seed SOMA tasks from a Markdown file')
+    parser.add_argument('--file', required=True, help='Path to the Markdown file')
+    parser.add_argument('--dry-run', action='store_true', help='Parse only, do not insert')
+    args = parser.parse_args()
+
+    if not os.path.exists(args.file):
+        print(f"Error: File not found: {args.file}", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"Parsing: {args.file}")
+    tasks = parse_markdown_tasks(args.file)
+    print(f"Found {len(tasks)} unchecked tasks")
+
+    if not tasks:
+        print("No tasks found. Check that the file contains '- [ ] ' lines.")
+        sys.exit(0)
+
+    asyncio.run(insert_tasks(tasks, dry_run=args.dry_run))
+
+
+if __name__ == '__main__':
+    main()