You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and dots ('.'), can be up to 35 characters long. Letters must be lowercase.
610 lines
25 KiB
610 lines
25 KiB
#!/usr/bin/env python3 |
|
""" |
|
Python Documentation Generator using Ollama LLM |
|
Automatically generates comprehensive markdown documentation for Python projects. |
|
""" |
|
|
|
import os |
|
import ast |
|
import json |
|
import argparse |
|
import subprocess |
|
import sys |
|
from pathlib import Path |
|
from typing import Dict, List, Set, Tuple, Optional |
|
import requests |
|
import re |
|
from urllib.parse import quote |
|
|
|
class PythonAnalyzer: |
|
"""Analyzes Python files to extract structural information.""" |
|
|
|
def __init__(self): |
|
self.imports = set() |
|
self.classes = [] |
|
self.functions = [] |
|
self.constants = [] |
|
self.module_docstring = None |
|
|
|
def analyze_file(self, file_path: str) -> Dict: |
|
"""Analyze a Python file and extract its structure.""" |
|
try: |
|
with open(file_path, 'r', encoding='utf-8') as f: |
|
content = f.read() |
|
|
|
tree = ast.parse(content) |
|
|
|
# Reset for each file |
|
self.imports = set() |
|
self.classes = [] |
|
self.functions = [] |
|
self.constants = [] |
|
self.module_docstring = ast.get_docstring(tree) |
|
|
|
for node in ast.walk(tree): |
|
if isinstance(node, ast.Import): |
|
for alias in node.names: |
|
self.imports.add(alias.name) |
|
elif isinstance(node, ast.ImportFrom): |
|
module = node.module or "" |
|
for alias in node.names: |
|
self.imports.add(f"{module}.{alias.name}") |
|
elif isinstance(node, ast.ClassDef): |
|
self.classes.append({ |
|
'name': node.name, |
|
'bases': [ast.unparse(base) for base in node.bases], |
|
'docstring': ast.get_docstring(node), |
|
'methods': [n.name for n in node.body if isinstance(n, ast.FunctionDef)], |
|
'lineno': node.lineno |
|
}) |
|
elif isinstance(node, ast.FunctionDef): |
|
# Only top-level functions (not methods) |
|
parent_classes = [n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)] |
|
is_method = False |
|
for cls in parent_classes: |
|
if hasattr(cls, 'body') and node in cls.body: |
|
is_method = True |
|
break |
|
|
|
if not is_method: |
|
self.functions.append({ |
|
'name': node.name, |
|
'args': [arg.arg for arg in node.args.args], |
|
'docstring': ast.get_docstring(node), |
|
'lineno': node.lineno, |
|
'returns': ast.unparse(node.returns) if node.returns else None |
|
}) |
|
elif isinstance(node, ast.Assign): |
|
# Top-level constants (ALL_CAPS variables) |
|
for target in node.targets: |
|
if isinstance(target, ast.Name) and target.id.isupper(): |
|
self.constants.append({ |
|
'name': target.id, |
|
'value': ast.unparse(node.value), |
|
'lineno': node.lineno |
|
}) |
|
|
|
return { |
|
'file_path': file_path, |
|
'content': content, |
|
'module_docstring': self.module_docstring, |
|
'imports': list(self.imports), |
|
'classes': self.classes, |
|
'functions': self.functions, |
|
'constants': self.constants, |
|
'lines_of_code': len(content.splitlines()) |
|
} |
|
|
|
except Exception as e: |
|
print(f"Error analyzing {file_path}: {e}") |
|
return None |
|
|
|
class OllamaDocGenerator: |
|
"""Generates documentation using Ollama LLM.""" |
|
|
|
def __init__(self, model_name: str = "deepseek-r1:latest", ollama_url: str = "http://localhost:11434", thinking: bool = False): |
|
self.model_name = model_name |
|
self.ollama_url = ollama_url |
|
self.session = requests.Session() |
|
self.thinking = thinking |
|
|
|
def check_ollama_connection(self) -> bool: |
|
"""Check if Ollama is running and accessible.""" |
|
try: |
|
response = self.session.get(f"{self.ollama_url}/api/tags") |
|
return response.status_code == 200 |
|
except requests.exceptions.RequestException: |
|
return False |
|
|
|
def check_model_availability(self) -> bool: |
|
"""Check if the specified model is available.""" |
|
try: |
|
response = self.session.get(f"{self.ollama_url}/api/tags") |
|
if response.status_code == 200: |
|
models = response.json().get('models', []) |
|
return any(model['name'].startswith(self.model_name) for model in models) |
|
return False |
|
except requests.exceptions.RequestException: |
|
return False |
|
|
|
def generate_documentation(self, file_analysis: Dict, project_context: Dict) -> str: |
|
"""Generate documentation for a single Python file.""" |
|
|
|
# Create comprehensive prompt with context |
|
prompt = self.create_documentation_prompt(file_analysis, project_context) |
|
|
|
# Check if this is a thinking model (o1-like models) |
|
is_thinking_model = self.thinking |
|
try: |
|
if is_thinking_model: |
|
print("Thinking model chosen") |
|
# For thinking models, use chat format and handle thinking tokens |
|
response = self.session.post( |
|
f"{self.ollama_url}/api/chat", |
|
json={ |
|
"model": self.model_name, |
|
"messages": [ |
|
{ |
|
"role": "user", |
|
"content": prompt |
|
} |
|
], |
|
"stream": False, |
|
"options": { |
|
"temperature": 0.1, |
|
"top_p": 0.9, |
|
} |
|
}, |
|
timeout=600 # 10 minute timeout for thinking models |
|
) |
|
|
|
if response.status_code == 200: |
|
result = response.json() |
|
message = result.get('message', {}) |
|
content = message.get('content', '') |
|
# Parse and display thinking process |
|
thinking_content, final_answer = self.parse_thinking_response(content) |
|
|
|
if thinking_content: |
|
print(f" 🧠 Model thinking process:") |
|
print(f" {thinking_content[:200]}..." if len(thinking_content) > 200 else f" {thinking_content}") |
|
|
|
return final_answer if final_answer else content |
|
else: |
|
print(f"Error generating documentation: {response.status_code}") |
|
return None |
|
else: |
|
print("None thinking model chosen") |
|
# Standard generation for regular models |
|
response = self.session.post( |
|
f"{self.ollama_url}/api/generate", |
|
json={ |
|
"model": self.model_name, |
|
"prompt": prompt, |
|
"stream": False, |
|
"think": False, |
|
"options": { |
|
"temperature": 0.1, |
|
"top_p": 0.9, |
|
} |
|
}, |
|
timeout=300 # 5 minute timeout |
|
) |
|
|
|
if response.status_code == 200: |
|
return response.json()['response'] |
|
else: |
|
print(f"Error generating documentation: {response.status_code}") |
|
return None |
|
|
|
except requests.exceptions.RequestException as e: |
|
print(f"Error communicating with Ollama: {e}") |
|
return None |
|
|
|
def parse_thinking_response(self, content: str) -> Tuple[Optional[str], str]: |
|
"""Parse thinking model response to extract thinking process and final answer.""" |
|
import re |
|
|
|
# Try different thinking tag patterns |
|
thinking_patterns = [ |
|
r'<thinking>(.*?)</thinking>', |
|
r'<think>(.*?)</think>', |
|
r'<reasoning>(.*?)</reasoning>', |
|
r'<analysis>(.*?)</analysis>' |
|
] |
|
|
|
thinking_content = None |
|
final_answer = content |
|
|
|
for pattern in thinking_patterns: |
|
match = re.search(pattern, content, re.DOTALL) |
|
if match: |
|
thinking_content = match.group(1).strip() |
|
# Remove thinking section from final answer |
|
final_answer = re.sub(pattern, '', content, flags=re.DOTALL).strip() |
|
break |
|
|
|
# If no thinking tags found, check for other patterns like "I need to think about..." |
|
if not thinking_content: |
|
# Look for thinking indicators at the start |
|
thinking_indicators = [ |
|
r'^(Let me think about.*?(?=\n\n|\n#|\nI\'ll))', |
|
r'^(I need to analyze.*?(?=\n\n|\n#|\nI\'ll))', |
|
r'^(First, let me understand.*?(?=\n\n|\n#|\nI\'ll))', |
|
r'^(To document this.*?(?=\n\n|\n#|\nI\'ll))' |
|
] |
|
|
|
for pattern in thinking_indicators: |
|
match = re.search(pattern, content, re.DOTALL | re.MULTILINE) |
|
if match: |
|
thinking_content = match.group(1).strip() |
|
final_answer = content[match.end():].strip() |
|
break |
|
|
|
return thinking_content, final_answer |
|
|
|
def create_documentation_prompt(self, file_analysis: Dict, project_context: Dict) -> str: |
|
"""Create a comprehensive prompt for documentation generation.""" |
|
|
|
file_path = file_analysis['file_path'] |
|
relative_path = os.path.relpath(file_path, project_context['root_path']) |
|
|
|
prompt = f"""You are a technical documentation expert. Generate comprehensive markdown documentation for the Python file: `{relative_path}` |
|
|
|
## PROJECT CONTEXT: |
|
- **Project Root**: {project_context['root_path']} |
|
- **Total Python Files**: {len(project_context['all_files'])} |
|
- **External Dependencies**: {', '.join(project_context['external_dependencies']) if project_context['external_dependencies'] else 'None detected'} |
|
- **Project Structure**: |
|
{self.format_project_structure(project_context['file_structure'])} |
|
|
|
## FILE ANALYSIS: |
|
- **File Path**: `{relative_path}` |
|
- **Lines of Code**: {file_analysis['lines_of_code']} |
|
- **Module Docstring**: {file_analysis['module_docstring'] or 'None'} |
|
|
|
### Imports ({len(file_analysis['imports'])} total): |
|
{chr(10).join(f'- `{imp}`' for imp in file_analysis['imports'])} |
|
|
|
### Classes ({len(file_analysis['classes'])} total): |
|
{self.format_classes(file_analysis['classes'])} |
|
|
|
### Functions ({len(file_analysis['functions'])} total): |
|
{self.format_functions(file_analysis['functions'])} |
|
|
|
### Constants ({len(file_analysis['constants'])} total): |
|
{self.format_constants(file_analysis['constants'])} |
|
|
|
## RELATED FILES: |
|
{self.format_related_files(file_analysis, project_context)} |
|
|
|
## FULL SOURCE CODE: |
|
```python |
|
{file_analysis['content']} |
|
``` |
|
|
|
## DOCUMENTATION REQUIREMENTS: |
|
|
|
Generate a complete markdown documentation file that includes: |
|
|
|
1. **File Header**: Title ('Documentation ' + file), purpose, and brief description |
|
2. **Overview**: What this module/file does and its role in the project |
|
3. **Dependencies**: External and internal dependencies with explanations |
|
4. **API Reference**: Detailed documentation of all classes, functions, and constants |
|
5. **Usage Examples**: Practical code examples where applicable |
|
6. **Cross-References**: Links to related files using relative markdown links |
|
7. **Implementation Notes**: Architecture decisions, patterns used, etc. |
|
|
|
## FORMATTING GUIDELINES: |
|
- YOUR ARE **NOT ALLOWED** TO USE markdown CODE BLOCKS! |
|
- Use proper markdown syntax, so no **# title** or other none standard markdown features |
|
- Be carefull with indentation |
|
- Limite the use of unecessary newlines |
|
- Include code blocks with syntax highlighting |
|
- Add tables for parameter/return value documentation |
|
- Use relative links to other documentation files: `[filename](./filename.md)` |
|
- Include line number references where helpful |
|
- Make it professional and comprehensive |
|
- Focus on clarity and usefulness for developers |
|
|
|
Generate the complete markdown documentation now:""" |
|
|
|
return prompt |
|
|
|
def format_project_structure(self, file_structure: Dict) -> str: |
|
"""Format project structure for the prompt.""" |
|
lines = [] |
|
for root, dirs, files in file_structure: |
|
level = root.replace(file_structure[0][0], '').count(os.sep) |
|
indent = ' ' * level |
|
lines.append(f"{indent}- {os.path.basename(root)}/") |
|
subindent = ' ' * (level + 1) |
|
for file in files: |
|
if file.endswith('.py'): |
|
lines.append(f"{subindent}- {file}") |
|
return '\n'.join(lines[:20]) # Limit to first 20 lines |
|
|
|
def format_classes(self, classes: List[Dict]) -> str: |
|
"""Format class information for the prompt.""" |
|
if not classes: |
|
return "None" |
|
|
|
lines = [] |
|
for cls in classes: |
|
lines.append(f"- **{cls['name']}** (line {cls['lineno']})") |
|
if cls['bases']: |
|
lines.append(f" - Inherits from: {', '.join(cls['bases'])}") |
|
if cls['methods']: |
|
lines.append(f" - Methods: {', '.join(cls['methods'])}") |
|
if cls['docstring']: |
|
lines.append(f" - Description: {cls['docstring'][:100]}...") |
|
return '\n'.join(lines) |
|
|
|
def format_functions(self, functions: List[Dict]) -> str: |
|
"""Format function information for the prompt.""" |
|
if not functions: |
|
return "None" |
|
|
|
lines = [] |
|
for func in functions: |
|
args_str = ', '.join(func['args']) if func['args'] else '' |
|
lines.append(f"- **{func['name']}({args_str})** (line {func['lineno']})") |
|
if func['returns']: |
|
lines.append(f" - Returns: {func['returns']}") |
|
if func['docstring']: |
|
lines.append(f" - Description: {func['docstring'][:100]}...") |
|
return '\n'.join(lines) |
|
|
|
def format_constants(self, constants: List[Dict]) -> str: |
|
"""Format constant information for the prompt.""" |
|
if not constants: |
|
return "None" |
|
|
|
lines = [] |
|
for const in constants: |
|
lines.append(f"- **{const['name']}** = {const['value']} (line {const['lineno']})") |
|
return '\n'.join(lines) |
|
|
|
def format_related_files(self, file_analysis: Dict, project_context: Dict) -> str: |
|
"""Format related files information.""" |
|
current_imports = set(file_analysis['imports']) |
|
related_files = [] |
|
|
|
for other_file in project_context['all_files']: |
|
if other_file != file_analysis['file_path']: |
|
rel_path = os.path.relpath(other_file, project_context['root_path']) |
|
module_name = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '') |
|
|
|
# Check if this file imports the other or vice versa |
|
if any(imp.startswith(module_name) for imp in current_imports): |
|
related_files.append(f"- `{rel_path}` (imported by this file)") |
|
|
|
return '\n'.join(related_files) if related_files else "None detected" |
|
|
|
class ProjectAnalyzer: |
|
"""Analyzes the entire project structure.""" |
|
|
|
def __init__(self, root_path: str): |
|
self.root_path = Path(root_path).resolve() |
|
self.python_files = [] |
|
self.external_dependencies = set() |
|
|
|
def scan_project(self, exclude_dirs: List[str] = None) -> Dict: |
|
"""Scan the project and collect all Python files.""" |
|
if exclude_dirs is None: exclude_dirs = ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules'] |
|
else: exclude_dirs = exclude_dirs + ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules'] |
|
|
|
self.python_files = [] |
|
file_structure = [] |
|
|
|
for root, dirs, files in os.walk(self.root_path): |
|
# Remove excluded directories |
|
dirs[:] = [d for d in dirs if d not in exclude_dirs] |
|
files[:] = [f for f in files if f not in exclude_dirs] |
|
file_structure.append((root, dirs, files)) |
|
|
|
for file in files: |
|
if file.endswith('.py'): |
|
self.python_files.append(os.path.join(root, file)) |
|
|
|
# Analyze dependencies |
|
self.analyze_dependencies() |
|
|
|
return { |
|
'root_path': str(self.root_path), |
|
'all_files': self.python_files, |
|
'file_structure': file_structure, |
|
'external_dependencies': list(self.external_dependencies) |
|
} |
|
|
|
def analyze_dependencies(self): |
|
"""Analyze external dependencies across all Python files.""" |
|
analyzer = PythonAnalyzer() |
|
|
|
for file_path in self.python_files: |
|
analysis = analyzer.analyze_file(file_path) |
|
if analysis: |
|
for imp in analysis['imports']: |
|
# Check if it's an external dependency (not local) |
|
if not self.is_local_import(imp): |
|
self.external_dependencies.add(imp.split('.')[0]) |
|
|
|
def is_local_import(self, import_name: str) -> bool: |
|
"""Check if an import is local to the project.""" |
|
# Simple heuristic: if the import starts with a relative path or matches a local file |
|
if import_name.startswith('.'): |
|
return True |
|
|
|
# Check if it matches any of our Python files |
|
for py_file in self.python_files: |
|
rel_path = os.path.relpath(py_file, self.root_path) |
|
module_path = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '') |
|
if import_name.startswith(module_path): |
|
return True |
|
|
|
return False |
|
|
|
class DocumentationManager: |
|
"""Manages the documentation generation process.""" |
|
|
|
def __init__(self, output_dir: str = "./pydocs"): |
|
self.output_dir = Path(output_dir) |
|
self.output_dir.mkdir(exist_ok=True) |
|
|
|
def generate_index(self, project_context: Dict, generated_docs: List[str]): |
|
"""Generate an index.md file linking to all documentation.""" |
|
|
|
index_content = f"""# Project Documentation |
|
|
|
Auto-generated documentation for Python project: `{os.path.basename(project_context['root_path'])}` |
|
|
|
## Project Overview |
|
|
|
- **Total Python Files**: {len(project_context['all_files'])} |
|
- **External Dependencies**: {len(project_context['external_dependencies'])} |
|
- **Documentation Files**: {len(generated_docs)} |
|
|
|
## External Dependencies |
|
|
|
{chr(10).join(f'- `{dep}`' for dep in sorted(project_context['external_dependencies']))} |
|
|
|
## File Documentation |
|
|
|
""" |
|
|
|
for doc_file in sorted(generated_docs): |
|
rel_path = os.path.relpath(doc_file.replace('.md', '.py'), '.') |
|
doc_name = os.path.basename(doc_file) |
|
index_content += f"- [`{rel_path}`](./{doc_name})\n" |
|
|
|
index_content += f""" |
|
## Project Structure |
|
|
|
``` |
|
{self.generate_tree_structure(project_context)} |
|
``` |
|
|
|
--- |
|
|
|
*Documentation generated automatically using Ollama LLM* |
|
""" |
|
|
|
with open(self.output_dir / "index.md", 'w', encoding='utf-8') as f: |
|
f.write(index_content) |
|
|
|
def generate_tree_structure(self, project_context: Dict, max_depth: int = 3) -> str: |
|
"""Generate a tree-like structure of the project.""" |
|
lines = [] |
|
root_path = project_context['root_path'] |
|
|
|
for py_file in sorted(project_context['all_files']): |
|
rel_path = os.path.relpath(py_file, root_path) |
|
depth = rel_path.count(os.sep) |
|
if depth <= max_depth: |
|
indent = " " * depth |
|
filename = os.path.basename(rel_path) |
|
lines.append(f"{indent}{filename}") |
|
|
|
return '\n'.join(lines[:50]) # Limit output |
|
|
|
def sanitize_filename(self, file_path: str, root_path: str) -> str: |
|
"""Convert file path to a safe markdown filename.""" |
|
rel_path = os.path.relpath(file_path, root_path) |
|
# Replace path separators and special characters |
|
safe_name = rel_path.replace('\\', '/').replace('.py', '.md') |
|
return safe_name |
|
|
|
def main(): |
|
parser = argparse.ArgumentParser(description="Generate documentation for Python project using Ollama") |
|
parser.add_argument("path", help="Path to Python project directory") |
|
parser.add_argument("--model", default="deepseek-r1:latest", help="Ollama model to use (default: deepseek-r1:latest). For thinking models use 'thinking' in name") |
|
parser.add_argument("--thinking", action=argparse.BooleanOptionalAction, help="Does the model think", type=bool) |
|
parser.add_argument("--output", default="./pydocs", help="Output directory for documentation (default: ./pydocs)") |
|
parser.add_argument("--ollama-url", default="http://localhost:11434", help="Ollama server URL") |
|
parser.add_argument("--exclude", nargs="*", default=[], help="Directories to exclude from scanning") |
|
parser.add_argument("--max-files", type=int, default=400, help="Maximum number of files to process") |
|
|
|
args = parser.parse_args() |
|
|
|
# Validate project path |
|
if not os.path.exists(args.path): |
|
print(f"Error: Path '{args.path}' does not exist") |
|
sys.exit(1) |
|
|
|
# Initialize components |
|
doc_generator = OllamaDocGenerator(args.model, args.ollama_url, args.thinking) |
|
project_analyzer = ProjectAnalyzer(args.path) |
|
doc_manager = DocumentationManager(args.output) |
|
analyzer = PythonAnalyzer() |
|
|
|
# Check Ollama connection |
|
print("Checking Ollama connection...") |
|
if not doc_generator.check_ollama_connection(): |
|
print(f"Error: Cannot connect to Ollama at {args.ollama_url}") |
|
print("Make sure Ollama is running: ollama serve") |
|
sys.exit(1) |
|
|
|
# Check model availability |
|
print(f"Checking model availability: {args.model}") |
|
if not doc_generator.check_model_availability(): |
|
print(f"Error: Model '{args.model}' is not available") |
|
print(f"Install it with: ollama pull {args.model}") |
|
sys.exit(1) |
|
|
|
print(f"✓ Ollama connection established with model: {args.model}") |
|
|
|
# Scan project |
|
print("Scanning project...") |
|
project_context = project_analyzer.scan_project(args.exclude) |
|
|
|
if not project_context['all_files']: |
|
print("No Python files found in the project") |
|
sys.exit(1) |
|
|
|
print(f"Found {len(project_context['all_files'])} Python files") |
|
|
|
# Limit files if specified |
|
files_to_process = project_context['all_files'][:args.max_files] |
|
if len(files_to_process) < len(project_context['all_files']): |
|
print(f"Processing first {args.max_files} files (use --max-files to change)") |
|
|
|
# Generate documentation for each file |
|
generated_docs = [] |
|
|
|
for i, file_path in enumerate(files_to_process, 1): |
|
rel_path = os.path.relpath(file_path, args.path) |
|
print(f"[{i}/{len(files_to_process)}] Documenting {rel_path}...") |
|
|
|
# Analyze file |
|
file_analysis = analyzer.analyze_file(file_path) |
|
if not file_analysis: |
|
print(f" ⚠ Skipped due to analysis error") |
|
continue |
|
|
|
# Generate documentation |
|
documentation = doc_generator.generate_documentation(file_analysis, project_context) if len(file_analysis['content'].strip(" \n\t")) else "" |
|
if not documentation: |
|
print(f" ⚠ Failed to generate documentation" if len(file_analysis['content'].strip(" \n\t")) else " ⚠ No document generated because no code was found in the file") |
|
continue |
|
|
|
# Save documentation |
|
doc_filename = doc_manager.sanitize_filename(file_path, args.path) |
|
doc_path = doc_manager.output_dir / doc_filename |
|
os.makedirs(os.path.dirname(doc_path), exist_ok=True) |
|
with open(doc_path, 'w', encoding='utf-8') as f: |
|
f.write(documentation) |
|
|
|
generated_docs.append(doc_filename) |
|
print(f" ✓ Generated: {doc_filename}") |
|
|
|
# Generate index file |
|
if generated_docs: |
|
print("Generating index file...") |
|
doc_manager.generate_index(project_context, generated_docs) |
|
print(f"✓ Documentation complete! Check {args.output}/index.md") |
|
print(f"Generated {len(generated_docs)} documentation files") |
|
else: |
|
print("No documentation files were generated") |
|
|
|
if __name__ == "__main__": |
|
main() |