[SOMA][BOT-3][INFRA] skeleton: project folder structure and all package manifests
This commit is contained in:
177
soma/scripts/seed_from_markdown.py
Normal file
177
soma/scripts/seed_from_markdown.py
Normal file
@@ -0,0 +1,177 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
seed_from_markdown.py
|
||||
|
||||
Reads a Markdown file, extracts all unchecked task items (- [ ] lines),
|
||||
parses optional metadata from the same line or the preceding heading,
|
||||
and inserts them as tasks into the SOMA database.
|
||||
|
||||
Usage:
|
||||
python seed_from_markdown.py --file /path/to/life_plan.md [--dry-run]
|
||||
|
||||
Format recognized:
|
||||
# Project Name
|
||||
## Sub-section
|
||||
- [ ] Task title @tag #priority due:2026-04-01
|
||||
- [x] Already done task (skipped)
|
||||
|
||||
Metadata extraction:
|
||||
@tag → maps to project_tag (e.g., @openclaw, @mak)
|
||||
#priority → maps to priority (low|medium|high|critical)
|
||||
due:YYYY-MM-DD → maps to due_date
|
||||
Default tag: derived from nearest parent heading (slugified, max 50 chars)
|
||||
Default priority: medium
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
import asyncio
|
||||
from datetime import date
|
||||
|
||||
# Add parent directory to path so we can import app modules
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
|
||||
|
||||
|
||||
def parse_markdown_tasks(filepath: str) -> list[dict]:
|
||||
"""Parse all unchecked [ ] tasks from a markdown file."""
|
||||
tasks = []
|
||||
current_tag = "general"
|
||||
current_section = ""
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
|
||||
for line in lines:
|
||||
line = line.rstrip()
|
||||
|
||||
# Track headings for default tag
|
||||
heading_match = re.match(r'^#{1,3}\s+(.+)$', line)
|
||||
if heading_match:
|
||||
heading_text = heading_match.group(1).strip()
|
||||
# Slugify heading: lowercase, replace spaces/special chars with -
|
||||
slug = re.sub(r'[^a-z0-9]+', '-', heading_text.lower()).strip('-')[:50]
|
||||
# Map common heading words to known tags
|
||||
tag_map = {
|
||||
'openclaw': 'openclaw', 'mak': 'mak', 'rarity': 'rarity-media',
|
||||
'japan': 'japan', 'germany': 'germany', 'personal': 'personal',
|
||||
'habit': 'habits', 'retreat': 'retreat'
|
||||
}
|
||||
mapped = False
|
||||
for keyword, tag in tag_map.items():
|
||||
if keyword in heading_text.lower():
|
||||
current_tag = tag
|
||||
mapped = True
|
||||
break
|
||||
if not mapped:
|
||||
current_tag = slug if slug else "general"
|
||||
current_section = heading_text
|
||||
continue
|
||||
|
||||
# Match unchecked task: - [ ] or * [ ]
|
||||
task_match = re.match(r'^\s*[-*]\s+\[ \]\s+(.+)$', line)
|
||||
if not task_match:
|
||||
continue
|
||||
|
||||
raw_title = task_match.group(1).strip()
|
||||
|
||||
# Extract metadata from title
|
||||
tag = current_tag
|
||||
priority = "medium"
|
||||
due_date = None
|
||||
|
||||
# @tag
|
||||
tag_match = re.search(r'@(\S+)', raw_title)
|
||||
if tag_match:
|
||||
tag = tag_match.group(1)
|
||||
raw_title = raw_title.replace(tag_match.group(0), '').strip()
|
||||
|
||||
# #priority
|
||||
priority_match = re.search(r'#(low|medium|high|critical)', raw_title, re.IGNORECASE)
|
||||
if priority_match:
|
||||
priority = priority_match.group(1).lower()
|
||||
raw_title = raw_title.replace(priority_match.group(0), '').strip()
|
||||
|
||||
# due:YYYY-MM-DD
|
||||
due_match = re.search(r'due:(\d{4}-\d{2}-\d{2})', raw_title)
|
||||
if due_match:
|
||||
try:
|
||||
due_date = due_match.group(1)
|
||||
date.fromisoformat(due_date) # validate
|
||||
except ValueError:
|
||||
due_date = None
|
||||
raw_title = raw_title.replace(due_match.group(0), '').strip()
|
||||
|
||||
# Clean up extra whitespace
|
||||
title = re.sub(r'\s+', ' ', raw_title).strip()
|
||||
if not title:
|
||||
continue
|
||||
|
||||
# XP based on priority
|
||||
xp_map = {'low': 25, 'medium': 50, 'high': 100, 'critical': 200}
|
||||
|
||||
tasks.append({
|
||||
'title': title,
|
||||
'description': f"Imported from: {current_section}" if current_section else None,
|
||||
'status': 'backlog',
|
||||
'priority': priority,
|
||||
'project_tag': tag,
|
||||
'due_date': due_date,
|
||||
'xp_reward': xp_map[priority],
|
||||
'position': 0,
|
||||
'checklist': [],
|
||||
})
|
||||
|
||||
return tasks
|
||||
|
||||
|
||||
async def insert_tasks(tasks: list[dict], dry_run: bool = False):
|
||||
"""Insert parsed tasks into the database."""
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.task import Task
|
||||
import uuid
|
||||
|
||||
if dry_run:
|
||||
print(f"DRY RUN: Would insert {len(tasks)} tasks:")
|
||||
for i, t in enumerate(tasks, 1):
|
||||
print(f" {i:3}. [{t['priority'].upper():8}] [{t['project_tag']:20}] {t['title']}")
|
||||
return
|
||||
|
||||
async with AsyncSessionLocal() as db:
|
||||
inserted = 0
|
||||
skipped = 0
|
||||
for i, task_data in enumerate(tasks):
|
||||
# Set position based on insertion order within each status+tag
|
||||
task_data['position'] = i
|
||||
task = Task(id=uuid.uuid4(), **task_data)
|
||||
db.add(task)
|
||||
inserted += 1
|
||||
|
||||
await db.commit()
|
||||
print(f"Inserted {inserted} tasks, skipped {skipped}.")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Seed SOMA tasks from a Markdown file')
|
||||
parser.add_argument('--file', required=True, help='Path to the Markdown file')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Parse only, do not insert')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.file):
|
||||
print(f"Error: File not found: {args.file}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Parsing: {args.file}")
|
||||
tasks = parse_markdown_tasks(args.file)
|
||||
print(f"Found {len(tasks)} unchecked tasks")
|
||||
|
||||
if not tasks:
|
||||
print("No tasks found. Check that the file contains '- [ ] ' lines.")
|
||||
sys.exit(0)
|
||||
|
||||
asyncio.run(insert_tasks(tasks, dry_run=args.dry_run))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user