Init project

ago%!(EXTRA string=1 week) · b2aa5dd6a6
parent e4f3044d95
commit b2aa5dd6a6
1 changed files with 610 additions and 0 deletions
--- a/doc_think.py
+++ b/doc_think.py
@ -0,0 +1,610 @@
+#!/usr/bin/env python3
+"""
+Python Documentation Generator using Ollama LLM
+Automatically generates comprehensive markdown documentation for Python projects.
+"""
+
+import os
+import ast
+import json
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+from typing import Dict, List, Set, Tuple, Optional
+import requests
+import re
+from urllib.parse import quote
+
+class PythonAnalyzer:
+    """Analyzes Python files to extract structural information."""
+    
+    def __init__(self):
+        self.imports = set()
+        self.classes = []
+        self.functions = []
+        self.constants = []
+        self.module_docstring = None
+        
+    def analyze_file(self, file_path: str) -> Dict:
+        """Analyze a Python file and extract its structure."""
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+                
+            tree = ast.parse(content)
+            
+            # Reset for each file
+            self.imports = set()
+            self.classes = []
+            self.functions = []
+            self.constants = []
+            self.module_docstring = ast.get_docstring(tree)
+            
+            for node in ast.walk(tree):
+                if isinstance(node, ast.Import):
+                    for alias in node.names:
+                        self.imports.add(alias.name)
+                elif isinstance(node, ast.ImportFrom):
+                    module = node.module or ""
+                    for alias in node.names:
+                        self.imports.add(f"{module}.{alias.name}")
+                elif isinstance(node, ast.ClassDef):
+                    self.classes.append({
+                        'name': node.name,
+                        'bases': [ast.unparse(base) for base in node.bases],
+                        'docstring': ast.get_docstring(node),
+                        'methods': [n.name for n in node.body if isinstance(n, ast.FunctionDef)],
+                        'lineno': node.lineno
+                    })
+                elif isinstance(node, ast.FunctionDef):
+                    # Only top-level functions (not methods)
+                    parent_classes = [n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)]
+                    is_method = False
+                    for cls in parent_classes:
+                        if hasattr(cls, 'body') and node in cls.body:
+                            is_method = True
+                            break
+                    
+                    if not is_method:
+                        self.functions.append({
+                            'name': node.name,
+                            'args': [arg.arg for arg in node.args.args],
+                            'docstring': ast.get_docstring(node),
+                            'lineno': node.lineno,
+                            'returns': ast.unparse(node.returns) if node.returns else None
+                        })
+                elif isinstance(node, ast.Assign):
+                    # Top-level constants (ALL_CAPS variables)
+                    for target in node.targets:
+                        if isinstance(target, ast.Name) and target.id.isupper():
+                            self.constants.append({
+                                'name': target.id,
+                                'value': ast.unparse(node.value),
+                                'lineno': node.lineno
+                            })
+            
+            return {
+                'file_path': file_path,
+                'content': content,
+                'module_docstring': self.module_docstring,
+                'imports': list(self.imports),
+                'classes': self.classes,
+                'functions': self.functions,
+                'constants': self.constants,
+                'lines_of_code': len(content.splitlines())
+            }
+            
+        except Exception as e:
+            print(f"Error analyzing {file_path}: {e}")
+            return None
+
+class OllamaDocGenerator:
+    """Generates documentation using Ollama LLM."""
+    
+    def __init__(self, model_name: str = "deepseek-r1:latest", ollama_url: str = "http://localhost:11434", thinking: bool = False):
+        self.model_name = model_name
+        self.ollama_url = ollama_url
+        self.session = requests.Session()
+        self.thinking = thinking
+        
+    def check_ollama_connection(self) -> bool:
+        """Check if Ollama is running and accessible."""
+        try:
+            response = self.session.get(f"{self.ollama_url}/api/tags")
+            return response.status_code == 200
+        except requests.exceptions.RequestException:
+            return False
+    
+    def check_model_availability(self) -> bool:
+        """Check if the specified model is available."""
+        try:
+            response = self.session.get(f"{self.ollama_url}/api/tags")
+            if response.status_code == 200:
+                models = response.json().get('models', [])
+                return any(model['name'].startswith(self.model_name) for model in models)
+            return False
+        except requests.exceptions.RequestException:
+            return False
+    
+    def generate_documentation(self, file_analysis: Dict, project_context: Dict) -> str:
+        """Generate documentation for a single Python file."""
+        
+        # Create comprehensive prompt with context
+        prompt = self.create_documentation_prompt(file_analysis, project_context)
+        
+        # Check if this is a thinking model (o1-like models)
+        is_thinking_model = self.thinking
+        try:
+            if is_thinking_model:
+                print("Thinking model chosen")
+                # For thinking models, use chat format and handle thinking tokens
+                response = self.session.post(
+                    f"{self.ollama_url}/api/chat",
+                    json={
+                        "model": self.model_name,
+                        "messages": [
+                            {
+                                "role": "user",
+                                "content": prompt
+                            }
+                        ],
+                        "stream": False,
+                        "options": {
+                            "temperature": 0.1,
+                            "top_p": 0.9, 
+                        }
+                    },
+                    timeout=600  # 10 minute timeout for thinking models
+                )
+                
+                if response.status_code == 200:
+                    result = response.json()
+                    message = result.get('message', {})
+                    content = message.get('content', '')
+                    # Parse and display thinking process
+                    thinking_content, final_answer = self.parse_thinking_response(content)
+                    
+                    if thinking_content:
+                        print(f"  🧠 Model thinking process:")
+                        print(f"     {thinking_content[:200]}..." if len(thinking_content) > 200 else f"     {thinking_content}")
+                    
+                    return final_answer if final_answer else content
+                else:
+                    print(f"Error generating documentation: {response.status_code}")
+                    return None
+            else:
+                print("None thinking model chosen")
+                # Standard generation for regular models
+                response = self.session.post(
+                    f"{self.ollama_url}/api/generate",
+                    json={
+                        "model": self.model_name,
+                        "prompt": prompt,
+                        "stream": False,
+                        "think": False,
+                        "options": {
+                            "temperature": 0.1,
+                            "top_p": 0.9,
+                        }
+                    },
+                    timeout=300  # 5 minute timeout
+                )
+                
+                if response.status_code == 200:
+                    return response.json()['response']
+                else:
+                    print(f"Error generating documentation: {response.status_code}")
+                    return None
+                
+        except requests.exceptions.RequestException as e:
+            print(f"Error communicating with Ollama: {e}")
+            return None
+    
+    def parse_thinking_response(self, content: str) -> Tuple[Optional[str], str]:
+        """Parse thinking model response to extract thinking process and final answer."""
+        import re
+        
+        # Try different thinking tag patterns
+        thinking_patterns = [
+            r'<thinking>(.*?)</thinking>',
+            r'<think>(.*?)</think>',
+            r'<reasoning>(.*?)</reasoning>',
+            r'<analysis>(.*?)</analysis>'
+        ]
+        
+        thinking_content = None
+        final_answer = content
+        
+        for pattern in thinking_patterns:
+            match = re.search(pattern, content, re.DOTALL)
+            if match:
+                thinking_content = match.group(1).strip()
+                # Remove thinking section from final answer
+                final_answer = re.sub(pattern, '', content, flags=re.DOTALL).strip()
+                break
+        
+        # If no thinking tags found, check for other patterns like "I need to think about..."
+        if not thinking_content:
+            # Look for thinking indicators at the start
+            thinking_indicators = [
+                r'^(Let me think about.*?(?=\n\n|\n#|\nI\'ll))',
+                r'^(I need to analyze.*?(?=\n\n|\n#|\nI\'ll))',
+                r'^(First, let me understand.*?(?=\n\n|\n#|\nI\'ll))',
+                r'^(To document this.*?(?=\n\n|\n#|\nI\'ll))'
+            ]
+            
+            for pattern in thinking_indicators:
+                match = re.search(pattern, content, re.DOTALL | re.MULTILINE)
+                if match:
+                    thinking_content = match.group(1).strip()
+                    final_answer = content[match.end():].strip()
+                    break
+        
+        return thinking_content, final_answer
+    
+    def create_documentation_prompt(self, file_analysis: Dict, project_context: Dict) -> str:
+        """Create a comprehensive prompt for documentation generation."""
+        
+        file_path = file_analysis['file_path']
+        relative_path = os.path.relpath(file_path, project_context['root_path'])
+        
+        prompt = f"""You are a technical documentation expert. Generate comprehensive markdown documentation for the Python file: `{relative_path}`
+
+## PROJECT CONTEXT:
+- **Project Root**: {project_context['root_path']}
+- **Total Python Files**: {len(project_context['all_files'])}
+- **External Dependencies**: {', '.join(project_context['external_dependencies']) if project_context['external_dependencies'] else 'None detected'}
+- **Project Structure**: 
+{self.format_project_structure(project_context['file_structure'])}
+
+## FILE ANALYSIS:
+- **File Path**: `{relative_path}`
+- **Lines of Code**: {file_analysis['lines_of_code']}
+- **Module Docstring**: {file_analysis['module_docstring'] or 'None'}
+
+### Imports ({len(file_analysis['imports'])} total):
+{chr(10).join(f'- `{imp}`' for imp in file_analysis['imports'])}
+
+### Classes ({len(file_analysis['classes'])} total):
+{self.format_classes(file_analysis['classes'])}
+
+### Functions ({len(file_analysis['functions'])} total):
+{self.format_functions(file_analysis['functions'])}
+
+### Constants ({len(file_analysis['constants'])} total):
+{self.format_constants(file_analysis['constants'])}
+
+## RELATED FILES:
+{self.format_related_files(file_analysis, project_context)}
+
+## FULL SOURCE CODE:
+```python
+{file_analysis['content']}
+```
+
+## DOCUMENTATION REQUIREMENTS:
+
+Generate a complete markdown documentation file that includes:
+
+1. **File Header**: Title ('Documentation ' + file), purpose, and brief description
+2. **Overview**: What this module/file does and its role in the project
+3. **Dependencies**: External and internal dependencies with explanations
+4. **API Reference**: Detailed documentation of all classes, functions, and constants
+5. **Usage Examples**: Practical code examples where applicable  
+6. **Cross-References**: Links to related files using relative markdown links
+7. **Implementation Notes**: Architecture decisions, patterns used, etc.
+
+## FORMATTING GUIDELINES:
+- YOUR ARE **NOT ALLOWED** TO USE markdown CODE BLOCKS!
+- Use proper markdown syntax, so no **# title** or other none standard markdown features
+- Be carefull with indentation
+- Limite the use of unecessary newlines
+- Include code blocks with syntax highlighting
+- Add tables for parameter/return value documentation
+- Use relative links to other documentation files: `[filename](./filename.md)`
+- Include line number references where helpful
+- Make it professional and comprehensive
+- Focus on clarity and usefulness for developers
+
+Generate the complete markdown documentation now:"""
+
+        return prompt
+    
+    def format_project_structure(self, file_structure: Dict) -> str:
+        """Format project structure for the prompt."""
+        lines = []
+        for root, dirs, files in file_structure:
+            level = root.replace(file_structure[0][0], '').count(os.sep)
+            indent = '  ' * level
+            lines.append(f"{indent}- {os.path.basename(root)}/")
+            subindent = '  ' * (level + 1)
+            for file in files:
+                if file.endswith('.py'):
+                    lines.append(f"{subindent}- {file}")
+        return '\n'.join(lines[:20])  # Limit to first 20 lines
+    
+    def format_classes(self, classes: List[Dict]) -> str:
+        """Format class information for the prompt."""
+        if not classes:
+            return "None"
+        
+        lines = []
+        for cls in classes:
+            lines.append(f"- **{cls['name']}** (line {cls['lineno']})")
+            if cls['bases']:
+                lines.append(f"  - Inherits from: {', '.join(cls['bases'])}")
+            if cls['methods']:
+                lines.append(f"  - Methods: {', '.join(cls['methods'])}")
+            if cls['docstring']:
+                lines.append(f"  - Description: {cls['docstring'][:100]}...")
+        return '\n'.join(lines)
+    
+    def format_functions(self, functions: List[Dict]) -> str:
+        """Format function information for the prompt."""
+        if not functions:
+            return "None"
+            
+        lines = []
+        for func in functions:
+            args_str = ', '.join(func['args']) if func['args'] else ''
+            lines.append(f"- **{func['name']}({args_str})** (line {func['lineno']})")
+            if func['returns']:
+                lines.append(f"  - Returns: {func['returns']}")
+            if func['docstring']:
+                lines.append(f"  - Description: {func['docstring'][:100]}...")
+        return '\n'.join(lines)
+    
+    def format_constants(self, constants: List[Dict]) -> str:
+        """Format constant information for the prompt."""
+        if not constants:
+            return "None"
+            
+        lines = []
+        for const in constants:
+            lines.append(f"- **{const['name']}** = {const['value']} (line {const['lineno']})")
+        return '\n'.join(lines)
+    
+    def format_related_files(self, file_analysis: Dict, project_context: Dict) -> str:
+        """Format related files information."""
+        current_imports = set(file_analysis['imports'])
+        related_files = []
+        
+        for other_file in project_context['all_files']:
+            if other_file != file_analysis['file_path']:
+                rel_path = os.path.relpath(other_file, project_context['root_path'])
+                module_name = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '')
+                
+                # Check if this file imports the other or vice versa
+                if any(imp.startswith(module_name) for imp in current_imports):
+                    related_files.append(f"- `{rel_path}` (imported by this file)")
+        
+        return '\n'.join(related_files) if related_files else "None detected"
+
+class ProjectAnalyzer:
+    """Analyzes the entire project structure."""
+    
+    def __init__(self, root_path: str):
+        self.root_path = Path(root_path).resolve()
+        self.python_files = []
+        self.external_dependencies = set()
+        
+    def scan_project(self, exclude_dirs: List[str] = None) -> Dict:
+        """Scan the project and collect all Python files."""
+        if exclude_dirs is None: exclude_dirs = ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules']
+        else:  exclude_dirs = exclude_dirs + ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules']
+        
+        self.python_files = []
+        file_structure = []
+        
+        for root, dirs, files in os.walk(self.root_path):
+            # Remove excluded directories
+            dirs[:] = [d for d in dirs if d not in exclude_dirs]
+            files[:] = [f for f in files if f not in exclude_dirs]
+            file_structure.append((root, dirs, files))
+            
+            for file in files:
+                if file.endswith('.py'):
+                    self.python_files.append(os.path.join(root, file))
+        
+        # Analyze dependencies
+        self.analyze_dependencies()
+        
+        return {
+            'root_path': str(self.root_path),
+            'all_files': self.python_files,
+            'file_structure': file_structure,
+            'external_dependencies': list(self.external_dependencies)
+        }
+    
+    def analyze_dependencies(self):
+        """Analyze external dependencies across all Python files."""
+        analyzer = PythonAnalyzer()
+        
+        for file_path in self.python_files:
+            analysis = analyzer.analyze_file(file_path)
+            if analysis:
+                for imp in analysis['imports']:
+                    # Check if it's an external dependency (not local)
+                    if not self.is_local_import(imp):
+                        self.external_dependencies.add(imp.split('.')[0])
+    
+    def is_local_import(self, import_name: str) -> bool:
+        """Check if an import is local to the project."""
+        # Simple heuristic: if the import starts with a relative path or matches a local file
+        if import_name.startswith('.'):
+            return True
+        
+        # Check if it matches any of our Python files
+        for py_file in self.python_files:
+            rel_path = os.path.relpath(py_file, self.root_path)
+            module_path = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '')
+            if import_name.startswith(module_path):
+                return True
+        
+        return False
+
+class DocumentationManager:
+    """Manages the documentation generation process."""
+    
+    def __init__(self, output_dir: str = "./pydocs"):
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(exist_ok=True)
+        
+    def generate_index(self, project_context: Dict, generated_docs: List[str]):
+        """Generate an index.md file linking to all documentation."""
+        
+        index_content = f"""# Project Documentation
+
+Auto-generated documentation for Python project: `{os.path.basename(project_context['root_path'])}`
+
+## Project Overview
+
+- **Total Python Files**: {len(project_context['all_files'])}
+- **External Dependencies**: {len(project_context['external_dependencies'])}
+- **Documentation Files**: {len(generated_docs)}
+
+## External Dependencies
+
+{chr(10).join(f'- `{dep}`' for dep in sorted(project_context['external_dependencies']))}
+
+## File Documentation
+
+"""
+        
+        for doc_file in sorted(generated_docs):
+            rel_path = os.path.relpath(doc_file.replace('.md', '.py'), '.')
+            doc_name = os.path.basename(doc_file)
+            index_content += f"- [`{rel_path}`](./{doc_name})\n"
+        
+        index_content += f"""
+## Project Structure
+
+```
+{self.generate_tree_structure(project_context)}
+```
+
+---
+
+*Documentation generated automatically using Ollama LLM*
+"""
+        
+        with open(self.output_dir / "index.md", 'w', encoding='utf-8') as f:
+            f.write(index_content)
+    
+    def generate_tree_structure(self, project_context: Dict, max_depth: int = 3) -> str:
+        """Generate a tree-like structure of the project."""
+        lines = []
+        root_path = project_context['root_path']
+        
+        for py_file in sorted(project_context['all_files']):
+            rel_path = os.path.relpath(py_file, root_path)
+            depth = rel_path.count(os.sep)
+            if depth <= max_depth:
+                indent = "  " * depth
+                filename = os.path.basename(rel_path)
+                lines.append(f"{indent}{filename}")
+        
+        return '\n'.join(lines[:50])  # Limit output
+    
+    def sanitize_filename(self, file_path: str, root_path: str) -> str:
+        """Convert file path to a safe markdown filename."""
+        rel_path = os.path.relpath(file_path, root_path)
+        # Replace path separators and special characters
+        safe_name = rel_path.replace('\\', '/').replace('.py', '.md')
+        return safe_name
+
+def main():
+    parser = argparse.ArgumentParser(description="Generate documentation for Python project using Ollama")
+    parser.add_argument("path", help="Path to Python project directory")
+    parser.add_argument("--model", default="deepseek-r1:latest", help="Ollama model to use (default: deepseek-r1:latest). For thinking models use 'thinking' in name")
+    parser.add_argument("--thinking", action=argparse.BooleanOptionalAction, help="Does the model think", type=bool)
+    parser.add_argument("--output", default="./pydocs", help="Output directory for documentation (default: ./pydocs)")
+    parser.add_argument("--ollama-url", default="http://localhost:11434", help="Ollama server URL")
+    parser.add_argument("--exclude", nargs="*", default=[], help="Directories to exclude from scanning")
+    parser.add_argument("--max-files", type=int, default=400, help="Maximum number of files to process")
+    
+    args = parser.parse_args()
+    
+    # Validate project path
+    if not os.path.exists(args.path):
+        print(f"Error: Path '{args.path}' does not exist")
+        sys.exit(1)
+    
+    # Initialize components
+    doc_generator = OllamaDocGenerator(args.model, args.ollama_url, args.thinking)
+    project_analyzer = ProjectAnalyzer(args.path)
+    doc_manager = DocumentationManager(args.output)
+    analyzer = PythonAnalyzer()
+    
+    # Check Ollama connection
+    print("Checking Ollama connection...")
+    if not doc_generator.check_ollama_connection():
+        print(f"Error: Cannot connect to Ollama at {args.ollama_url}")
+        print("Make sure Ollama is running: ollama serve")
+        sys.exit(1)
+    
+    # Check model availability
+    print(f"Checking model availability: {args.model}")
+    if not doc_generator.check_model_availability():
+        print(f"Error: Model '{args.model}' is not available")
+        print(f"Install it with: ollama pull {args.model}")
+        sys.exit(1)
+    
+    print(f"✓ Ollama connection established with model: {args.model}")
+    
+    # Scan project
+    print("Scanning project...")
+    project_context = project_analyzer.scan_project(args.exclude)
+    
+    if not project_context['all_files']:
+        print("No Python files found in the project")
+        sys.exit(1)
+    
+    print(f"Found {len(project_context['all_files'])} Python files")
+    
+    # Limit files if specified
+    files_to_process = project_context['all_files'][:args.max_files]
+    if len(files_to_process) < len(project_context['all_files']):
+        print(f"Processing first {args.max_files} files (use --max-files to change)")
+    
+    # Generate documentation for each file
+    generated_docs = []
+    
+    for i, file_path in enumerate(files_to_process, 1):
+        rel_path = os.path.relpath(file_path, args.path)
+        print(f"[{i}/{len(files_to_process)}] Documenting {rel_path}...")
+        
+        # Analyze file
+        file_analysis = analyzer.analyze_file(file_path)
+        if not file_analysis:
+            print(f"  ⚠ Skipped due to analysis error")
+            continue
+        
+        # Generate documentation
+        documentation = doc_generator.generate_documentation(file_analysis, project_context) if len(file_analysis['content'].strip(" \n\t")) else ""
+        if not documentation:
+            print(f"  ⚠ Failed to generate documentation" if len(file_analysis['content'].strip(" \n\t")) else "   ⚠ No document generated because no code was found in the file")
+            continue
+        
+        # Save documentation
+        doc_filename = doc_manager.sanitize_filename(file_path, args.path)
+        doc_path = doc_manager.output_dir / doc_filename
+        os.makedirs(os.path.dirname(doc_path), exist_ok=True)
+        with open(doc_path, 'w', encoding='utf-8') as f:
+            f.write(documentation)
+        
+        generated_docs.append(doc_filename)
+        print(f"  ✓ Generated: {doc_filename}")
+    
+    # Generate index file
+    if generated_docs:
+        print("Generating index file...")
+        doc_manager.generate_index(project_context, generated_docs)
+        print(f"✓ Documentation complete! Check {args.output}/index.md")
+        print(f"Generated {len(generated_docs)} documentation files")
+    else:
+        print("No documentation files were generated")
+
+if __name__ == "__main__":
+    main()