#!/usr/bin/env python3 """ Python Documentation Generator using Ollama LLM Automatically generates comprehensive markdown documentation for Python projects. """ import os import ast import json import argparse import subprocess import sys from pathlib import Path from typing import Dict, List, Set, Tuple, Optional import requests import re from urllib.parse import quote class PythonAnalyzer: """Analyzes Python files to extract structural information.""" def __init__(self): self.imports = set() self.classes = [] self.functions = [] self.constants = [] self.module_docstring = None def analyze_file(self, file_path: str) -> Dict: """Analyze a Python file and extract its structure.""" try: with open(file_path, 'r', encoding='utf-8') as f: content = f.read() tree = ast.parse(content) # Reset for each file self.imports = set() self.classes = [] self.functions = [] self.constants = [] self.module_docstring = ast.get_docstring(tree) for node in ast.walk(tree): if isinstance(node, ast.Import): for alias in node.names: self.imports.add(alias.name) elif isinstance(node, ast.ImportFrom): module = node.module or "" for alias in node.names: self.imports.add(f"{module}.{alias.name}") elif isinstance(node, ast.ClassDef): self.classes.append({ 'name': node.name, 'bases': [ast.unparse(base) for base in node.bases], 'docstring': ast.get_docstring(node), 'methods': [n.name for n in node.body if isinstance(n, ast.FunctionDef)], 'lineno': node.lineno }) elif isinstance(node, ast.FunctionDef): # Only top-level functions (not methods) parent_classes = [n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)] is_method = False for cls in parent_classes: if hasattr(cls, 'body') and node in cls.body: is_method = True break if not is_method: self.functions.append({ 'name': node.name, 'args': [arg.arg for arg in node.args.args], 'docstring': ast.get_docstring(node), 'lineno': node.lineno, 'returns': ast.unparse(node.returns) if node.returns else None }) elif isinstance(node, ast.Assign): # Top-level constants (ALL_CAPS variables) for target in node.targets: if isinstance(target, ast.Name) and target.id.isupper(): self.constants.append({ 'name': target.id, 'value': ast.unparse(node.value), 'lineno': node.lineno }) return { 'file_path': file_path, 'content': content, 'module_docstring': self.module_docstring, 'imports': list(self.imports), 'classes': self.classes, 'functions': self.functions, 'constants': self.constants, 'lines_of_code': len(content.splitlines()) } except Exception as e: print(f"Error analyzing {file_path}: {e}") return None class OllamaDocGenerator: """Generates documentation using Ollama LLM.""" def __init__(self, model_name: str = "deepseek-r1:latest", ollama_url: str = "http://localhost:11434", thinking: bool = False): self.model_name = model_name self.ollama_url = ollama_url self.session = requests.Session() self.thinking = thinking def check_ollama_connection(self) -> bool: """Check if Ollama is running and accessible.""" try: response = self.session.get(f"{self.ollama_url}/api/tags") return response.status_code == 200 except requests.exceptions.RequestException: return False def check_model_availability(self) -> bool: """Check if the specified model is available.""" try: response = self.session.get(f"{self.ollama_url}/api/tags") if response.status_code == 200: models = response.json().get('models', []) return any(model['name'].startswith(self.model_name) for model in models) return False except requests.exceptions.RequestException: return False def generate_documentation(self, file_analysis: Dict, project_context: Dict) -> str: """Generate documentation for a single Python file.""" # Create comprehensive prompt with context prompt = self.create_documentation_prompt(file_analysis, project_context) # Check if this is a thinking model (o1-like models) is_thinking_model = self.thinking try: if is_thinking_model: print("Thinking model chosen") # For thinking models, use chat format and handle thinking tokens response = self.session.post( f"{self.ollama_url}/api/chat", json={ "model": self.model_name, "messages": [ { "role": "user", "content": prompt } ], "tools": [ { 'type': 'function', 'function': { 'name': 'analyze_file', 'description': 'This tool allows you to examine other Python files in the project and it returns the same structured information you received for the current file (imports, classes, functions, constants, etc.).', 'parameters': { 'type': 'object', 'properties': { 'path': { 'type': 'string', 'description': 'Relative (from the root of the current project) path to the file', }, }, 'required': ['path'], }, }, }, ], "stream": False, "options": { "temperature": 0.1, "top_p": 0.9, } }, timeout=60 * 60 * 24 ) if response.status_code == 200: result = response.json() tool_calls = result.get('tool_calls', []) print(result, tool_calls) message = result.get('message', {}) content = message.get('content', '') # Parse and display thinking process thinking_content, final_answer = self.parse_thinking_response(content) if thinking_content: print(f" 🧠 Model thinking process:") print(f" {thinking_content[:200]}..." if len(thinking_content) > 200 else f" {thinking_content}") return final_answer if final_answer else content else: print(f"Error generating documentation: {response.status_code} {response.text}") return None else: print("None thinking model chosen") response = self.session.post( f"{self.ollama_url}/api/generate", json={ "model": self.model_name, "prompt": prompt, "stream": False, "think": False, "options": { "temperature": 0.1, "top_p": 0.9, } }, timeout=60 * 60 * 12 ) if response.status_code == 200: return response.json()['response'] else: print(f"Error generating documentation: {response.status_code}") return None except requests.exceptions.RequestException as e: print(f"Error communicating with Ollama: {e}") return None def parse_thinking_response(self, content: str) -> Tuple[Optional[str], str]: """Parse thinking model response to extract thinking process and final answer.""" import re thinking_patterns = [ r'(.*?)', r'(.*?)', r'(.*?)', r'(.*?)' ] thinking_content = None final_answer = content for pattern in thinking_patterns: match = re.search(pattern, content, re.DOTALL) if match: thinking_content = match.group(1).strip() final_answer = re.sub(pattern, '', content, flags=re.DOTALL).strip() break if not thinking_content: thinking_indicators = [ r'^(Let me think about.*?(?=\n\n|\n#|\nI\'ll))', r'^(I need to analyze.*?(?=\n\n|\n#|\nI\'ll))', r'^(First, let me understand.*?(?=\n\n|\n#|\nI\'ll))', r'^(To document this.*?(?=\n\n|\n#|\nI\'ll))' ] for pattern in thinking_indicators: match = re.search(pattern, content, re.DOTALL | re.MULTILINE) if match: thinking_content = match.group(1).strip() final_answer = content[match.end():].strip() break return thinking_content, final_answer def get_code_documentation(self, file_analysis: Dict, project_context: Dict): file_path = file_analysis['file_path'] relative_path = os.path.relpath(file_path, project_context['root_path']) return f"""## PROJECT CONTEXT: - **Project Root**: {project_context['root_path']} - **Total Python Files**: {len(project_context['all_files'])} - **External Dependencies**: {', '.join(project_context['external_dependencies']) if project_context['external_dependencies'] else 'None detected'} - **Project Structure**: {self.format_project_structure(project_context['file_structure'])} ## FILE ANALYSIS: - **File Path**: `{relative_path}` - **Lines of Code**: {file_analysis['lines_of_code']} - **Module Docstring**: {file_analysis['module_docstring'] or 'None'} ### Imports ({len(file_analysis['imports'])} total): {chr(10).join(f'- `{imp}`' for imp in file_analysis['imports'])} ### Classes ({len(file_analysis['classes'])} total): {self.format_classes(file_analysis['classes'])} ### Functions ({len(file_analysis['functions'])} total): {self.format_functions(file_analysis['functions'])} ### Constants ({len(file_analysis['constants'])} total): {self.format_constants(file_analysis['constants'])} ## RELATED FILES: {self.format_related_files(file_analysis, project_context)} ## FULL SOURCE CODE: ```python {file_analysis['content']} ``` """ def create_documentation_prompt(self, file_analysis: Dict, project_context: Dict) -> str: file_path = file_analysis['file_path'] relative_path = os.path.relpath(file_path, project_context['root_path']) prompt = f"""You are a technical documentation expert. Generate comprehensive markdown documentation for the Python file: `{relative_path}` ## AVAILABLE TOOLS You have access to a `analyze_file` tool that allows you to examine other Python files in the project. This tool returns the same structured information you received for the current file (imports, classes, functions, constants, etc.). **When to use this tool:** - When you need to understand how other files interact with the current file - To verify import relationships and dependencies - To provide more accurate cross-references in your documentation - To understand the broader context of classes or functions used in the current file - TRY TO USE IT NOW! {self.get_code_documentation(file_analysis, project_context)} ## DOCUMENTATION REQUIREMENTS: **Generate a complete markdown documentation file that includes:** 1. **File Header**: Title ('Documentation ' + file), purpose, and brief description 2. **Overview**: What this module/file does and its role in the project 3. **Dependencies**: External and internal dependencies with explanations 4. **API Reference**: Detailed documentation of all classes, functions, and constants 5. **Usage Examples**: Practical code examples where applicable 6. **Cross-References**: Links to related files using relative markdown links 7. **Implementation Notes**: Architecture decisions, patterns used, etc. ## FORMATTING GUIDELINES: - YOUR ARE **NOT ALLOWED** TO USE markdown CODE BLOCKS! - Use proper markdown syntax, so no **# title** or other none standard markdown features - Be carefull with indentation - Limite the use of unecessary newlines - Include code blocks with syntax highlighting - Add tables for parameter/return value documentation - Use relative links to other documentation files: `[filename](./filename.md)` - Include line number references where helpful - Make it professional and comprehensive - Focus on clarity and usefulness for developers Generate the complete markdown documentation now:""" return prompt def format_project_structure(self, file_structure: Dict) -> str: """Format project structure for the prompt.""" lines = [] for root, dirs, files in file_structure: level = root.replace(file_structure[0][0], '').count(os.sep) indent = ' ' * level lines.append(f"{indent}- {os.path.basename(root)}/") subindent = ' ' * (level + 1) for file in files: if file.endswith('.py'): lines.append(f"{subindent}- {file}") return '\n'.join(lines[:20]) def format_classes(self, classes: List[Dict]) -> str: """Format class information for the prompt.""" if not classes: return "None" lines = [] for cls in classes: lines.append(f"- **{cls['name']}** (line {cls['lineno']})") if cls['bases']: lines.append(f" - Inherits from: {', '.join(cls['bases'])}") if cls['methods']: lines.append(f" - Methods: {', '.join(cls['methods'])}") if cls['docstring']: lines.append(f" - Description: {cls['docstring'][:100]}...") return '\n'.join(lines) def format_functions(self, functions: List[Dict]) -> str: """Format function information for the prompt.""" if not functions: return "None" lines = [] for func in functions: args_str = ', '.join(func['args']) if func['args'] else '' lines.append(f"- **{func['name']}({args_str})** (line {func['lineno']})") if func['returns']: lines.append(f" - Returns: {func['returns']}") if func['docstring']: lines.append(f" - Description: {func['docstring'][:100]}...") return '\n'.join(lines) def format_constants(self, constants: List[Dict]) -> str: if not constants: return "None" lines = [] for const in constants: lines.append(f"- **{const['name']}** = {const['value']} (line {const['lineno']})") return '\n'.join(lines) def format_related_files(self, file_analysis: Dict, project_context: Dict) -> str: current_imports = set(file_analysis['imports']) related_files = [] for other_file in project_context['all_files']: if other_file != file_analysis['file_path']: rel_path = os.path.relpath(other_file, project_context['root_path']) module_name = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '') if any(imp.startswith(module_name) for imp in current_imports): related_files.append(f"- `{rel_path}` (imported by this file)") return '\n'.join(related_files) if related_files else "None detected" class ProjectAnalyzer: """Analyzes the entire project structure.""" def __init__(self, root_path: str): self.root_path = Path(root_path).resolve() self.python_files = [] self.external_dependencies = set() def scan_project(self, exclude_dirs: List[str] = None) -> Dict: if(os.path.isdir(self.root_path)): if exclude_dirs is None: exclude_dirs = ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules'] else: exclude_dirs = exclude_dirs + ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules'] self.python_files = [] file_structure = [] for root, dirs, files in os.walk(self.root_path): dirs[:] = [d for d in dirs if d not in exclude_dirs] files[:] = [f for f in files if f not in exclude_dirs] file_structure.append((root, dirs, files)) for file in files: if file.endswith('.py'): self.python_files.append(os.path.join(root, file)) self.analyze_dependencies() return { 'root_path': str(self.root_path), 'all_files': self.python_files, 'file_structure': file_structure, 'external_dependencies': list(self.external_dependencies) } else: self.python_files = [os.path.basename(self.root_path)] self.root_path = os.path.dirname(self.root_path) self.analyze_dependencies() return { 'root_path': str(self.root_path), 'all_files': self.python_files, 'file_structure': [], 'external_dependencies': list(self.external_dependencies) } def analyze_dependencies(self): analyzer = PythonAnalyzer() for file_path in self.python_files: analysis = analyzer.analyze_file(file_path) if analysis: for imp in analysis['imports']: if not self.is_local_import(imp): self.external_dependencies.add(imp.split('.')[0]) def is_local_import(self, import_name: str) -> bool: if import_name.startswith('.'): return True for py_file in self.python_files: rel_path = os.path.relpath(py_file, self.root_path) module_path = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '') if import_name.startswith(module_path): return True return False class DocumentationManager: def __init__(self, output_dir: str = "./pydocs"): self.output_dir = Path(output_dir) os.makedirs(self.output_dir, exist_ok=True) def generate_index(self, project_context: Dict, generated_docs: List[str]): index_content = f"""# Project Documentation Auto-generated documentation for Python project: `{os.path.basename(project_context['root_path'])}` ## Project Overview - **Total Python Files**: {len(project_context['all_files'])} - **External Dependencies**: {len(project_context['external_dependencies'])} - **Documentation Files**: {len(generated_docs)} ## External Dependencies {chr(10).join(f'- `{dep}`' for dep in sorted(project_context['external_dependencies']))} ## File Documentation """ for doc_file in sorted(generated_docs): rel_path = os.path.relpath(doc_file.replace('.md', '.py'), '.') doc_name = os.path.basename(doc_file) index_content += f"- [`{rel_path}`](./{rel_path})\n" index_content += f""" ## Project Structure ``` {self.generate_tree_structure(project_context)} ``` --- *Documentation generated automatically using Ollama LLM* """ with open(self.output_dir / "index.md", 'w', encoding='utf-8') as f: f.write(index_content) def generate_tree_structure(self, project_context: Dict, max_depth: int = 5) -> str: """Generate a tree-like structure of the project.""" lines = [] root_path = project_context['root_path'] for py_file in sorted(project_context['all_files']): rel_path = os.path.relpath(py_file, root_path) depth = rel_path.count(os.sep) if depth <= max_depth: indent = (" " * depth) + "└────" filename = os.path.basename(rel_path) lines.append(f"{indent} [`{filename}`](./{rel_path})") return '\n'.join(lines[:50]) # Limit output def sanitize_filename(self, file_path: str, root_path: str) -> str: """Convert file path to a safe markdown filename.""" rel_path = os.path.relpath(file_path, root_path) # Replace path separators and special characters safe_name = rel_path.replace('\\', '/').replace('.py', '.md') return safe_name def main(): parser = argparse.ArgumentParser(description="Generate documentation for Python project using Ollama") parser.add_argument("path", help="Path to Python project directory") parser.add_argument("--model", default="deepseek-r1:latest", help="Ollama model to use (default: deepseek-r1:latest). For thinking models use 'thinking' in name") parser.add_argument("--thinking", action=argparse.BooleanOptionalAction, help="Does the model think", type=bool) parser.add_argument("--output", default="./pydocs", help="Output directory for documentation (default: ./pydocs)") parser.add_argument("--ollama-url", default="http://localhost:11434", help="Ollama server URL") parser.add_argument("--exclude", nargs="*", default=[], help="Directories to exclude from scanning") parser.add_argument("--max-files", type=int, default=400, help="Maximum number of files to process") args = parser.parse_args() # Validate project path if not os.path.exists(args.path): print(f"Error: Path '{args.path}' does not exist") sys.exit(1) # Initialize components doc_generator = OllamaDocGenerator(args.model, args.ollama_url, args.thinking) project_analyzer = ProjectAnalyzer(args.path) doc_manager = DocumentationManager(args.output) analyzer = PythonAnalyzer() # Check Ollama connection print("Checking Ollama connection...") if not doc_generator.check_ollama_connection(): print(f"Error: Cannot connect to Ollama at {args.ollama_url}") print("Make sure Ollama is running: ollama serve") sys.exit(1) # Check model availability print(f"Checking model availability: {args.model}") if not doc_generator.check_model_availability(): print(f"Error: Model '{args.model}' is not available") print(f"Install it with: ollama pull {args.model}") sys.exit(1) print(f"✓ Ollama connection established with model: {args.model}") # Scan project print("Scanning project...") project_context = project_analyzer.scan_project(args.exclude) if not project_context['all_files']: print("No Python files found in the project") sys.exit(1) print(f"Found {len(project_context['all_files'])} Python files") # Limit files if specified files_to_process = project_context['all_files'][:args.max_files] if len(files_to_process) < len(project_context['all_files']): print(f"Processing first {args.max_files} files (use --max-files to change)") # Generate documentation for each file generated_docs = [] for i, file_path in enumerate(files_to_process, 1): rel_path = os.path.relpath(file_path, args.path) print(f"[{i}/{len(files_to_process)}] Documenting {rel_path}...") # Analyze file file_analysis = analyzer.analyze_file(file_path) if not file_analysis: print(f" ⚠ Skipped due to analysis error") continue # Generate documentation documentation = doc_generator.generate_documentation(file_analysis, project_context) if len(file_analysis['content'].strip(" \n\t")) else "" if not documentation: print(f" ⚠ Failed to generate documentation" if len(file_analysis['content'].strip(" \n\t")) else " ⚠ No document generated because no code was found in the file") continue # Save documentation doc_filename = doc_manager.sanitize_filename(file_path, args.path) doc_path = doc_manager.output_dir / doc_filename os.makedirs(os.path.dirname(doc_path), exist_ok=True) with open(doc_path, 'w', encoding='utf-8') as f: f.write(documentation) generated_docs.append(doc_filename) print(f" ✓ Generated: {doc_filename}") # Generate index file if generated_docs: print("Generating index file...") doc_manager.generate_index(project_context, generated_docs) print(f"✓ Documentation complete! Check {args.output}/index.md") print(f"Generated {len(generated_docs)} documentation files") else: print("No documentation files were generated") if __name__ == "__main__": main()