#!/usr/bin/env python3 """ Python Documentation Generator using Ollama LLM Automatically generates comprehensive markdown documentation for Python projects. """ import os import ast import json import argparse import subprocess import sys from pathlib import Path from typing import Dict, List, Set, Tuple, Optional import requests import re from urllib.parse import quote class PythonAnalyzer: """Analyzes Python files to extract structural information.""" def __init__(self): self.imports = set() self.classes = [] self.functions = [] self.constants = [] self.module_docstring = None def analyze_file(self, file_path: str) -> Dict: """Analyze a Python file and extract its structure.""" try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() tree = ast.parse(content) # Reset for each file self.imports = set() self.classes = [] self.functions = [] self.constants = [] self.module_docstring = ast.get_docstring(tree) for node in ast.walk(tree): if isinstance(node, ast.Import): for alias in node.names: self.imports.add(alias.name) elif isinstance(node, ast.ImportFrom): module = node.module or "" for alias in node.names: self.imports.add(f"{module}.{alias.name}") elif isinstance(node, ast.ClassDef): self.classes.append({ 'name': node.name, 'bases': [ast.unparse(base) for base in node.bases], 'docstring': ast.get_docstring(node), 'methods': [n.name for n in node.body if isinstance(n, ast.FunctionDef)], 'lineno': node.lineno }) elif isinstance(node, ast.FunctionDef): # Only top-level functions (not methods) parent_classes = [n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)] is_method = False for cls in parent_classes: if hasattr(cls, 'body') and node in cls.body: is_method = True break if not is_method: self.functions.append({ 'name': node.name, 'args': [arg.arg for arg in node.args.args], 'docstring': ast.get_docstring(node), 'lineno': node.lineno, 'returns': ast.unparse(node.returns) if node.returns else None }) elif isinstance(node, ast.Assign): # Top-level constants (ALL_CAPS variables) for target in node.targets: if isinstance(target, ast.Name) and target.id.isupper(): self.constants.append({ 'name': target.id, 'value': ast.unparse(node.value), 'lineno': node.lineno }) return { 'file_path': file_path, 'content': content, 'module_docstring': self.module_docstring, 'imports': list(self.imports), 'classes': self.classes, 'functions': self.functions, 'constants': self.constants, 'lines_of_code': len(content.splitlines()) } except Exception as e: print(f"Error analyzing {file_path}: {e}") return None class OllamaDocGenerator: """Generates documentation using Ollama LLM.""" def __init__(self, model_name: str = "deepseek-r1:latest", ollama_url: str = "http://localhost:11434", thinking: bool = False): self.model_name = model_name self.ollama_url = ollama_url self.session = requests.Session() self.thinking = thinking def check_ollama_connection(self) -> bool: """Check if Ollama is running and accessible.""" try: response = self.session.get(f"{self.ollama_url}/api/tags") return response.status_code == 200 except requests.exceptions.RequestException: return False def check_model_availability(self) -> bool: """Check if the specified model is available.""" try: response = self.session.get(f"{self.ollama_url}/api/tags") if response.status_code == 200: models = response.json().get('models', []) return any(model['name'].startswith(self.model_name) for model in models) return False except requests.exceptions.RequestException: return False def generate_documentation(self, file_analysis: Dict, project_context: Dict) -> str: """Generate documentation for a single Python file.""" # Create comprehensive prompt with context prompt = self.create_documentation_prompt(file_analysis, project_context) # Check if this is a thinking model (o1-like models) is_thinking_model = self.thinking try: if is_thinking_model: print("Thinking model chosen") # For thinking models, use chat format and handle thinking tokens response = self.session.post( f"{self.ollama_url}/api/chat", json={ "model": self.model_name, "messages": [ { "role": "user", "content": prompt } ], "stream": False, "options": { "temperature": 0.1, "top_p": 0.9, } }, timeout=600 # 10 minute timeout for thinking models ) if response.status_code == 200: result = response.json() message = result.get('message', {}) content = message.get('content', '') # Parse and display thinking process thinking_content, final_answer = self.parse_thinking_response(content) if thinking_content: print(f" 🧠 Model thinking process:") print(f" {thinking_content[:200]}..." if len(thinking_content) > 200 else f" {thinking_content}") return final_answer if final_answer else content else: print(f"Error generating documentation: {response.status_code}") return None else: print("None thinking model chosen") # Standard generation for regular models response = self.session.post( f"{self.ollama_url}/api/generate", json={ "model": self.model_name, "prompt": prompt, "stream": False, "think": False, "options": { "temperature": 0.1, "top_p": 0.9, } }, timeout=300 # 5 minute timeout ) if response.status_code == 200: return response.json()['response'] else: print(f"Error generating documentation: {response.status_code}") return None except requests.exceptions.RequestException as e: print(f"Error communicating with Ollama: {e}") return None def parse_thinking_response(self, content: str) -> Tuple[Optional[str], str]: """Parse thinking model response to extract thinking process and final answer.""" import re # Try different thinking tag patterns thinking_patterns = [ r'(.*?)', r'(.*?)', r'(.*?)', r'(.*?)' ] thinking_content = None final_answer = content for pattern in thinking_patterns: match = re.search(pattern, content, re.DOTALL) if match: thinking_content = match.group(1).strip() # Remove thinking section from final answer final_answer = re.sub(pattern, '', content, flags=re.DOTALL).strip() break # If no thinking tags found, check for other patterns like "I need to think about..." if not thinking_content: # Look for thinking indicators at the start thinking_indicators = [ r'^(Let me think about.*?(?=\n\n|\n#|\nI\'ll))', r'^(I need to analyze.*?(?=\n\n|\n#|\nI\'ll))', r'^(First, let me understand.*?(?=\n\n|\n#|\nI\'ll))', r'^(To document this.*?(?=\n\n|\n#|\nI\'ll))' ] for pattern in thinking_indicators: match = re.search(pattern, content, re.DOTALL | re.MULTILINE) if match: thinking_content = match.group(1).strip() final_answer = content[match.end():].strip() break return thinking_content, final_answer def create_documentation_prompt(self, file_analysis: Dict, project_context: Dict) -> str: """Create a comprehensive prompt for documentation generation.""" file_path = file_analysis['file_path'] relative_path = os.path.relpath(file_path, project_context['root_path']) prompt = f"""You are a technical documentation expert. Generate comprehensive markdown documentation for the Python file: `{relative_path}` ## PROJECT CONTEXT: - **Project Root**: {project_context['root_path']} - **Total Python Files**: {len(project_context['all_files'])} - **External Dependencies**: {', '.join(project_context['external_dependencies']) if project_context['external_dependencies'] else 'None detected'} - **Project Structure**: {self.format_project_structure(project_context['file_structure'])} ## FILE ANALYSIS: - **File Path**: `{relative_path}` - **Lines of Code**: {file_analysis['lines_of_code']} - **Module Docstring**: {file_analysis['module_docstring'] or 'None'} ### Imports ({len(file_analysis['imports'])} total): {chr(10).join(f'- `{imp}`' for imp in file_analysis['imports'])} ### Classes ({len(file_analysis['classes'])} total): {self.format_classes(file_analysis['classes'])} ### Functions ({len(file_analysis['functions'])} total): {self.format_functions(file_analysis['functions'])} ### Constants ({len(file_analysis['constants'])} total): {self.format_constants(file_analysis['constants'])} ## RELATED FILES: {self.format_related_files(file_analysis, project_context)} ## FULL SOURCE CODE: ```python {file_analysis['content']} ``` ## DOCUMENTATION REQUIREMENTS: Generate a complete markdown documentation file that includes: 1. **File Header**: Title ('Documentation ' + file), purpose, and brief description 2. **Overview**: What this module/file does and its role in the project 3. **Dependencies**: External and internal dependencies with explanations 4. **API Reference**: Detailed documentation of all classes, functions, and constants 5. **Usage Examples**: Practical code examples where applicable 6. **Cross-References**: Links to related files using relative markdown links 7. **Implementation Notes**: Architecture decisions, patterns used, etc. ## FORMATTING GUIDELINES: - YOUR ARE **NOT ALLOWED** TO USE markdown CODE BLOCKS! - Use proper markdown syntax, so no **# title** or other none standard markdown features - Be carefull with indentation - Limite the use of unecessary newlines - Include code blocks with syntax highlighting - Add tables for parameter/return value documentation - Use relative links to other documentation files: `[filename](./filename.md)` - Include line number references where helpful - Make it professional and comprehensive - Focus on clarity and usefulness for developers Generate the complete markdown documentation now:""" return prompt def format_project_structure(self, file_structure: Dict) -> str: """Format project structure for the prompt.""" lines = [] for root, dirs, files in file_structure: level = root.replace(file_structure[0][0], '').count(os.sep) indent = ' ' * level lines.append(f"{indent}- {os.path.basename(root)}/") subindent = ' ' * (level + 1) for file in files: if file.endswith('.py'): lines.append(f"{subindent}- {file}") return '\n'.join(lines[:20]) # Limit to first 20 lines def format_classes(self, classes: List[Dict]) -> str: """Format class information for the prompt.""" if not classes: return "None" lines = [] for cls in classes: lines.append(f"- **{cls['name']}** (line {cls['lineno']})") if cls['bases']: lines.append(f" - Inherits from: {', '.join(cls['bases'])}") if cls['methods']: lines.append(f" - Methods: {', '.join(cls['methods'])}") if cls['docstring']: lines.append(f" - Description: {cls['docstring'][:100]}...") return '\n'.join(lines) def format_functions(self, functions: List[Dict]) -> str: """Format function information for the prompt.""" if not functions: return "None" lines = [] for func in functions: args_str = ', '.join(func['args']) if func['args'] else '' lines.append(f"- **{func['name']}({args_str})** (line {func['lineno']})") if func['returns']: lines.append(f" - Returns: {func['returns']}") if func['docstring']: lines.append(f" - Description: {func['docstring'][:100]}...") return '\n'.join(lines) def format_constants(self, constants: List[Dict]) -> str: """Format constant information for the prompt.""" if not constants: return "None" lines = [] for const in constants: lines.append(f"- **{const['name']}** = {const['value']} (line {const['lineno']})") return '\n'.join(lines) def format_related_files(self, file_analysis: Dict, project_context: Dict) -> str: """Format related files information.""" current_imports = set(file_analysis['imports']) related_files = [] for other_file in project_context['all_files']: if other_file != file_analysis['file_path']: rel_path = os.path.relpath(other_file, project_context['root_path']) module_name = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '') # Check if this file imports the other or vice versa if any(imp.startswith(module_name) for imp in current_imports): related_files.append(f"- `{rel_path}` (imported by this file)") return '\n'.join(related_files) if related_files else "None detected" class ProjectAnalyzer: """Analyzes the entire project structure.""" def __init__(self, root_path: str): self.root_path = Path(root_path).resolve() self.python_files = [] self.external_dependencies = set() def scan_project(self, exclude_dirs: List[str] = None) -> Dict: """Scan the project and collect all Python files.""" if exclude_dirs is None: exclude_dirs = ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules'] else: exclude_dirs = exclude_dirs + ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules'] self.python_files = [] file_structure = [] for root, dirs, files in os.walk(self.root_path): # Remove excluded directories dirs[:] = [d for d in dirs if d not in exclude_dirs] files[:] = [f for f in files if f not in exclude_dirs] file_structure.append((root, dirs, files)) for file in files: if file.endswith('.py'): self.python_files.append(os.path.join(root, file)) # Analyze dependencies self.analyze_dependencies() return { 'root_path': str(self.root_path), 'all_files': self.python_files, 'file_structure': file_structure, 'external_dependencies': list(self.external_dependencies) } def analyze_dependencies(self): """Analyze external dependencies across all Python files.""" analyzer = PythonAnalyzer() for file_path in self.python_files: analysis = analyzer.analyze_file(file_path) if analysis: for imp in analysis['imports']: # Check if it's an external dependency (not local) if not self.is_local_import(imp): self.external_dependencies.add(imp.split('.')[0]) def is_local_import(self, import_name: str) -> bool: """Check if an import is local to the project.""" # Simple heuristic: if the import starts with a relative path or matches a local file if import_name.startswith('.'): return True # Check if it matches any of our Python files for py_file in self.python_files: rel_path = os.path.relpath(py_file, self.root_path) module_path = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '') if import_name.startswith(module_path): return True return False class DocumentationManager: """Manages the documentation generation process.""" def __init__(self, output_dir: str = "./pydocs"): self.output_dir = Path(output_dir) self.output_dir.mkdir(exist_ok=True) def generate_index(self, project_context: Dict, generated_docs: List[str]): """Generate an index.md file linking to all documentation.""" index_content = f"""# Project Documentation Auto-generated documentation for Python project: `{os.path.basename(project_context['root_path'])}` ## Project Overview - **Total Python Files**: {len(project_context['all_files'])} - **External Dependencies**: {len(project_context['external_dependencies'])} - **Documentation Files**: {len(generated_docs)} ## External Dependencies {chr(10).join(f'- `{dep}`' for dep in sorted(project_context['external_dependencies']))} ## File Documentation """ for doc_file in sorted(generated_docs): rel_path = os.path.relpath(doc_file.replace('.md', '.py'), '.') doc_name = os.path.basename(doc_file) index_content += f"- [`{rel_path}`](./{doc_name})\n" index_content += f""" ## Project Structure ``` {self.generate_tree_structure(project_context)} ``` --- *Documentation generated automatically using Ollama LLM* """ with open(self.output_dir / "index.md", 'w', encoding='utf-8') as f: f.write(index_content) def generate_tree_structure(self, project_context: Dict, max_depth: int = 3) -> str: """Generate a tree-like structure of the project.""" lines = [] root_path = project_context['root_path'] for py_file in sorted(project_context['all_files']): rel_path = os.path.relpath(py_file, root_path) depth = rel_path.count(os.sep) if depth <= max_depth: indent = " " * depth filename = os.path.basename(rel_path) lines.append(f"{indent}{filename}") return '\n'.join(lines[:50]) # Limit output def sanitize_filename(self, file_path: str, root_path: str) -> str: """Convert file path to a safe markdown filename.""" rel_path = os.path.relpath(file_path, root_path) # Replace path separators and special characters safe_name = rel_path.replace('\\', '/').replace('.py', '.md') return safe_name def main(): parser = argparse.ArgumentParser(description="Generate documentation for Python project using Ollama") parser.add_argument("path", help="Path to Python project directory") parser.add_argument("--model", default="deepseek-r1:latest", help="Ollama model to use (default: deepseek-r1:latest). For thinking models use 'thinking' in name") parser.add_argument("--thinking", action=argparse.BooleanOptionalAction, help="Does the model think", type=bool) parser.add_argument("--output", default="./pydocs", help="Output directory for documentation (default: ./pydocs)") parser.add_argument("--ollama-url", default="http://localhost:11434", help="Ollama server URL") parser.add_argument("--exclude", nargs="*", default=[], help="Directories to exclude from scanning") parser.add_argument("--max-files", type=int, default=400, help="Maximum number of files to process") args = parser.parse_args() # Validate project path if not os.path.exists(args.path): print(f"Error: Path '{args.path}' does not exist") sys.exit(1) # Initialize components doc_generator = OllamaDocGenerator(args.model, args.ollama_url, args.thinking) project_analyzer = ProjectAnalyzer(args.path) doc_manager = DocumentationManager(args.output) analyzer = PythonAnalyzer() # Check Ollama connection print("Checking Ollama connection...") if not doc_generator.check_ollama_connection(): print(f"Error: Cannot connect to Ollama at {args.ollama_url}") print("Make sure Ollama is running: ollama serve") sys.exit(1) # Check model availability print(f"Checking model availability: {args.model}") if not doc_generator.check_model_availability(): print(f"Error: Model '{args.model}' is not available") print(f"Install it with: ollama pull {args.model}") sys.exit(1) print(f"✓ Ollama connection established with model: {args.model}") # Scan project print("Scanning project...") project_context = project_analyzer.scan_project(args.exclude) if not project_context['all_files']: print("No Python files found in the project") sys.exit(1) print(f"Found {len(project_context['all_files'])} Python files") # Limit files if specified files_to_process = project_context['all_files'][:args.max_files] if len(files_to_process) < len(project_context['all_files']): print(f"Processing first {args.max_files} files (use --max-files to change)") # Generate documentation for each file generated_docs = [] for i, file_path in enumerate(files_to_process, 1): rel_path = os.path.relpath(file_path, args.path) print(f"[{i}/{len(files_to_process)}] Documenting {rel_path}...") # Analyze file file_analysis = analyzer.analyze_file(file_path) if not file_analysis: print(f" ⚠ Skipped due to analysis error") continue # Generate documentation documentation = doc_generator.generate_documentation(file_analysis, project_context) if len(file_analysis['content'].strip(" \n\t")) else "" if not documentation: print(f" ⚠ Failed to generate documentation" if len(file_analysis['content'].strip(" \n\t")) else " ⚠ No document generated because no code was found in the file") continue # Save documentation doc_filename = doc_manager.sanitize_filename(file_path, args.path) doc_path = doc_manager.output_dir / doc_filename os.makedirs(os.path.dirname(doc_path), exist_ok=True) with open(doc_path, 'w', encoding='utf-8') as f: f.write(documentation) generated_docs.append(doc_filename) print(f" ✓ Generated: {doc_filename}") # Generate index file if generated_docs: print("Generating index file...") doc_manager.generate_index(project_context, generated_docs) print(f"✓ Documentation complete! Check {args.output}/index.md") print(f"Generated {len(generated_docs)} documentation files") else: print("No documentation files were generated") if __name__ == "__main__": main()