Init project

main
Vinayak Ambigapathy ago%!(EXTRA string=1 week)
parent e4f3044d95
commit b2aa5dd6a6
  1. 610
      doc_think.py

@ -0,0 +1,610 @@
#!/usr/bin/env python3
"""
Python Documentation Generator using Ollama LLM
Automatically generates comprehensive markdown documentation for Python projects.
"""
import os
import ast
import json
import argparse
import subprocess
import sys
from pathlib import Path
from typing import Dict, List, Set, Tuple, Optional
import requests
import re
from urllib.parse import quote
class PythonAnalyzer:
"""Analyzes Python files to extract structural information."""
def __init__(self):
self.imports = set()
self.classes = []
self.functions = []
self.constants = []
self.module_docstring = None
def analyze_file(self, file_path: str) -> Dict:
"""Analyze a Python file and extract its structure."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
tree = ast.parse(content)
# Reset for each file
self.imports = set()
self.classes = []
self.functions = []
self.constants = []
self.module_docstring = ast.get_docstring(tree)
for node in ast.walk(tree):
if isinstance(node, ast.Import):
for alias in node.names:
self.imports.add(alias.name)
elif isinstance(node, ast.ImportFrom):
module = node.module or ""
for alias in node.names:
self.imports.add(f"{module}.{alias.name}")
elif isinstance(node, ast.ClassDef):
self.classes.append({
'name': node.name,
'bases': [ast.unparse(base) for base in node.bases],
'docstring': ast.get_docstring(node),
'methods': [n.name for n in node.body if isinstance(n, ast.FunctionDef)],
'lineno': node.lineno
})
elif isinstance(node, ast.FunctionDef):
# Only top-level functions (not methods)
parent_classes = [n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)]
is_method = False
for cls in parent_classes:
if hasattr(cls, 'body') and node in cls.body:
is_method = True
break
if not is_method:
self.functions.append({
'name': node.name,
'args': [arg.arg for arg in node.args.args],
'docstring': ast.get_docstring(node),
'lineno': node.lineno,
'returns': ast.unparse(node.returns) if node.returns else None
})
elif isinstance(node, ast.Assign):
# Top-level constants (ALL_CAPS variables)
for target in node.targets:
if isinstance(target, ast.Name) and target.id.isupper():
self.constants.append({
'name': target.id,
'value': ast.unparse(node.value),
'lineno': node.lineno
})
return {
'file_path': file_path,
'content': content,
'module_docstring': self.module_docstring,
'imports': list(self.imports),
'classes': self.classes,
'functions': self.functions,
'constants': self.constants,
'lines_of_code': len(content.splitlines())
}
except Exception as e:
print(f"Error analyzing {file_path}: {e}")
return None
class OllamaDocGenerator:
"""Generates documentation using Ollama LLM."""
def __init__(self, model_name: str = "deepseek-r1:latest", ollama_url: str = "http://localhost:11434", thinking: bool = False):
self.model_name = model_name
self.ollama_url = ollama_url
self.session = requests.Session()
self.thinking = thinking
def check_ollama_connection(self) -> bool:
"""Check if Ollama is running and accessible."""
try:
response = self.session.get(f"{self.ollama_url}/api/tags")
return response.status_code == 200
except requests.exceptions.RequestException:
return False
def check_model_availability(self) -> bool:
"""Check if the specified model is available."""
try:
response = self.session.get(f"{self.ollama_url}/api/tags")
if response.status_code == 200:
models = response.json().get('models', [])
return any(model['name'].startswith(self.model_name) for model in models)
return False
except requests.exceptions.RequestException:
return False
def generate_documentation(self, file_analysis: Dict, project_context: Dict) -> str:
"""Generate documentation for a single Python file."""
# Create comprehensive prompt with context
prompt = self.create_documentation_prompt(file_analysis, project_context)
# Check if this is a thinking model (o1-like models)
is_thinking_model = self.thinking
try:
if is_thinking_model:
print("Thinking model chosen")
# For thinking models, use chat format and handle thinking tokens
response = self.session.post(
f"{self.ollama_url}/api/chat",
json={
"model": self.model_name,
"messages": [
{
"role": "user",
"content": prompt
}
],
"stream": False,
"options": {
"temperature": 0.1,
"top_p": 0.9,
}
},
timeout=600 # 10 minute timeout for thinking models
)
if response.status_code == 200:
result = response.json()
message = result.get('message', {})
content = message.get('content', '')
# Parse and display thinking process
thinking_content, final_answer = self.parse_thinking_response(content)
if thinking_content:
print(f" 🧠 Model thinking process:")
print(f" {thinking_content[:200]}..." if len(thinking_content) > 200 else f" {thinking_content}")
return final_answer if final_answer else content
else:
print(f"Error generating documentation: {response.status_code}")
return None
else:
print("None thinking model chosen")
# Standard generation for regular models
response = self.session.post(
f"{self.ollama_url}/api/generate",
json={
"model": self.model_name,
"prompt": prompt,
"stream": False,
"think": False,
"options": {
"temperature": 0.1,
"top_p": 0.9,
}
},
timeout=300 # 5 minute timeout
)
if response.status_code == 200:
return response.json()['response']
else:
print(f"Error generating documentation: {response.status_code}")
return None
except requests.exceptions.RequestException as e:
print(f"Error communicating with Ollama: {e}")
return None
def parse_thinking_response(self, content: str) -> Tuple[Optional[str], str]:
"""Parse thinking model response to extract thinking process and final answer."""
import re
# Try different thinking tag patterns
thinking_patterns = [
r'<thinking>(.*?)</thinking>',
r'<think>(.*?)</think>',
r'<reasoning>(.*?)</reasoning>',
r'<analysis>(.*?)</analysis>'
]
thinking_content = None
final_answer = content
for pattern in thinking_patterns:
match = re.search(pattern, content, re.DOTALL)
if match:
thinking_content = match.group(1).strip()
# Remove thinking section from final answer
final_answer = re.sub(pattern, '', content, flags=re.DOTALL).strip()
break
# If no thinking tags found, check for other patterns like "I need to think about..."
if not thinking_content:
# Look for thinking indicators at the start
thinking_indicators = [
r'^(Let me think about.*?(?=\n\n|\n#|\nI\'ll))',
r'^(I need to analyze.*?(?=\n\n|\n#|\nI\'ll))',
r'^(First, let me understand.*?(?=\n\n|\n#|\nI\'ll))',
r'^(To document this.*?(?=\n\n|\n#|\nI\'ll))'
]
for pattern in thinking_indicators:
match = re.search(pattern, content, re.DOTALL | re.MULTILINE)
if match:
thinking_content = match.group(1).strip()
final_answer = content[match.end():].strip()
break
return thinking_content, final_answer
def create_documentation_prompt(self, file_analysis: Dict, project_context: Dict) -> str:
"""Create a comprehensive prompt for documentation generation."""
file_path = file_analysis['file_path']
relative_path = os.path.relpath(file_path, project_context['root_path'])
prompt = f"""You are a technical documentation expert. Generate comprehensive markdown documentation for the Python file: `{relative_path}`
## PROJECT CONTEXT:
- **Project Root**: {project_context['root_path']}
- **Total Python Files**: {len(project_context['all_files'])}
- **External Dependencies**: {', '.join(project_context['external_dependencies']) if project_context['external_dependencies'] else 'None detected'}
- **Project Structure**:
{self.format_project_structure(project_context['file_structure'])}
## FILE ANALYSIS:
- **File Path**: `{relative_path}`
- **Lines of Code**: {file_analysis['lines_of_code']}
- **Module Docstring**: {file_analysis['module_docstring'] or 'None'}
### Imports ({len(file_analysis['imports'])} total):
{chr(10).join(f'- `{imp}`' for imp in file_analysis['imports'])}
### Classes ({len(file_analysis['classes'])} total):
{self.format_classes(file_analysis['classes'])}
### Functions ({len(file_analysis['functions'])} total):
{self.format_functions(file_analysis['functions'])}
### Constants ({len(file_analysis['constants'])} total):
{self.format_constants(file_analysis['constants'])}
## RELATED FILES:
{self.format_related_files(file_analysis, project_context)}
## FULL SOURCE CODE:
```python
{file_analysis['content']}
```
## DOCUMENTATION REQUIREMENTS:
Generate a complete markdown documentation file that includes:
1. **File Header**: Title ('Documentation ' + file), purpose, and brief description
2. **Overview**: What this module/file does and its role in the project
3. **Dependencies**: External and internal dependencies with explanations
4. **API Reference**: Detailed documentation of all classes, functions, and constants
5. **Usage Examples**: Practical code examples where applicable
6. **Cross-References**: Links to related files using relative markdown links
7. **Implementation Notes**: Architecture decisions, patterns used, etc.
## FORMATTING GUIDELINES:
- YOUR ARE **NOT ALLOWED** TO USE markdown CODE BLOCKS!
- Use proper markdown syntax, so no **# title** or other none standard markdown features
- Be carefull with indentation
- Limite the use of unecessary newlines
- Include code blocks with syntax highlighting
- Add tables for parameter/return value documentation
- Use relative links to other documentation files: `[filename](./filename.md)`
- Include line number references where helpful
- Make it professional and comprehensive
- Focus on clarity and usefulness for developers
Generate the complete markdown documentation now:"""
return prompt
def format_project_structure(self, file_structure: Dict) -> str:
"""Format project structure for the prompt."""
lines = []
for root, dirs, files in file_structure:
level = root.replace(file_structure[0][0], '').count(os.sep)
indent = ' ' * level
lines.append(f"{indent}- {os.path.basename(root)}/")
subindent = ' ' * (level + 1)
for file in files:
if file.endswith('.py'):
lines.append(f"{subindent}- {file}")
return '\n'.join(lines[:20]) # Limit to first 20 lines
def format_classes(self, classes: List[Dict]) -> str:
"""Format class information for the prompt."""
if not classes:
return "None"
lines = []
for cls in classes:
lines.append(f"- **{cls['name']}** (line {cls['lineno']})")
if cls['bases']:
lines.append(f" - Inherits from: {', '.join(cls['bases'])}")
if cls['methods']:
lines.append(f" - Methods: {', '.join(cls['methods'])}")
if cls['docstring']:
lines.append(f" - Description: {cls['docstring'][:100]}...")
return '\n'.join(lines)
def format_functions(self, functions: List[Dict]) -> str:
"""Format function information for the prompt."""
if not functions:
return "None"
lines = []
for func in functions:
args_str = ', '.join(func['args']) if func['args'] else ''
lines.append(f"- **{func['name']}({args_str})** (line {func['lineno']})")
if func['returns']:
lines.append(f" - Returns: {func['returns']}")
if func['docstring']:
lines.append(f" - Description: {func['docstring'][:100]}...")
return '\n'.join(lines)
def format_constants(self, constants: List[Dict]) -> str:
"""Format constant information for the prompt."""
if not constants:
return "None"
lines = []
for const in constants:
lines.append(f"- **{const['name']}** = {const['value']} (line {const['lineno']})")
return '\n'.join(lines)
def format_related_files(self, file_analysis: Dict, project_context: Dict) -> str:
"""Format related files information."""
current_imports = set(file_analysis['imports'])
related_files = []
for other_file in project_context['all_files']:
if other_file != file_analysis['file_path']:
rel_path = os.path.relpath(other_file, project_context['root_path'])
module_name = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '')
# Check if this file imports the other or vice versa
if any(imp.startswith(module_name) for imp in current_imports):
related_files.append(f"- `{rel_path}` (imported by this file)")
return '\n'.join(related_files) if related_files else "None detected"
class ProjectAnalyzer:
"""Analyzes the entire project structure."""
def __init__(self, root_path: str):
self.root_path = Path(root_path).resolve()
self.python_files = []
self.external_dependencies = set()
def scan_project(self, exclude_dirs: List[str] = None) -> Dict:
"""Scan the project and collect all Python files."""
if exclude_dirs is None: exclude_dirs = ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules']
else: exclude_dirs = exclude_dirs + ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules']
self.python_files = []
file_structure = []
for root, dirs, files in os.walk(self.root_path):
# Remove excluded directories
dirs[:] = [d for d in dirs if d not in exclude_dirs]
files[:] = [f for f in files if f not in exclude_dirs]
file_structure.append((root, dirs, files))
for file in files:
if file.endswith('.py'):
self.python_files.append(os.path.join(root, file))
# Analyze dependencies
self.analyze_dependencies()
return {
'root_path': str(self.root_path),
'all_files': self.python_files,
'file_structure': file_structure,
'external_dependencies': list(self.external_dependencies)
}
def analyze_dependencies(self):
"""Analyze external dependencies across all Python files."""
analyzer = PythonAnalyzer()
for file_path in self.python_files:
analysis = analyzer.analyze_file(file_path)
if analysis:
for imp in analysis['imports']:
# Check if it's an external dependency (not local)
if not self.is_local_import(imp):
self.external_dependencies.add(imp.split('.')[0])
def is_local_import(self, import_name: str) -> bool:
"""Check if an import is local to the project."""
# Simple heuristic: if the import starts with a relative path or matches a local file
if import_name.startswith('.'):
return True
# Check if it matches any of our Python files
for py_file in self.python_files:
rel_path = os.path.relpath(py_file, self.root_path)
module_path = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '')
if import_name.startswith(module_path):
return True
return False
class DocumentationManager:
"""Manages the documentation generation process."""
def __init__(self, output_dir: str = "./pydocs"):
self.output_dir = Path(output_dir)
self.output_dir.mkdir(exist_ok=True)
def generate_index(self, project_context: Dict, generated_docs: List[str]):
"""Generate an index.md file linking to all documentation."""
index_content = f"""# Project Documentation
Auto-generated documentation for Python project: `{os.path.basename(project_context['root_path'])}`
## Project Overview
- **Total Python Files**: {len(project_context['all_files'])}
- **External Dependencies**: {len(project_context['external_dependencies'])}
- **Documentation Files**: {len(generated_docs)}
## External Dependencies
{chr(10).join(f'- `{dep}`' for dep in sorted(project_context['external_dependencies']))}
## File Documentation
"""
for doc_file in sorted(generated_docs):
rel_path = os.path.relpath(doc_file.replace('.md', '.py'), '.')
doc_name = os.path.basename(doc_file)
index_content += f"- [`{rel_path}`](./{doc_name})\n"
index_content += f"""
## Project Structure
```
{self.generate_tree_structure(project_context)}
```
---
*Documentation generated automatically using Ollama LLM*
"""
with open(self.output_dir / "index.md", 'w', encoding='utf-8') as f:
f.write(index_content)
def generate_tree_structure(self, project_context: Dict, max_depth: int = 3) -> str:
"""Generate a tree-like structure of the project."""
lines = []
root_path = project_context['root_path']
for py_file in sorted(project_context['all_files']):
rel_path = os.path.relpath(py_file, root_path)
depth = rel_path.count(os.sep)
if depth <= max_depth:
indent = " " * depth
filename = os.path.basename(rel_path)
lines.append(f"{indent}{filename}")
return '\n'.join(lines[:50]) # Limit output
def sanitize_filename(self, file_path: str, root_path: str) -> str:
"""Convert file path to a safe markdown filename."""
rel_path = os.path.relpath(file_path, root_path)
# Replace path separators and special characters
safe_name = rel_path.replace('\\', '/').replace('.py', '.md')
return safe_name
def main():
parser = argparse.ArgumentParser(description="Generate documentation for Python project using Ollama")
parser.add_argument("path", help="Path to Python project directory")
parser.add_argument("--model", default="deepseek-r1:latest", help="Ollama model to use (default: deepseek-r1:latest). For thinking models use 'thinking' in name")
parser.add_argument("--thinking", action=argparse.BooleanOptionalAction, help="Does the model think", type=bool)
parser.add_argument("--output", default="./pydocs", help="Output directory for documentation (default: ./pydocs)")
parser.add_argument("--ollama-url", default="http://localhost:11434", help="Ollama server URL")
parser.add_argument("--exclude", nargs="*", default=[], help="Directories to exclude from scanning")
parser.add_argument("--max-files", type=int, default=400, help="Maximum number of files to process")
args = parser.parse_args()
# Validate project path
if not os.path.exists(args.path):
print(f"Error: Path '{args.path}' does not exist")
sys.exit(1)
# Initialize components
doc_generator = OllamaDocGenerator(args.model, args.ollama_url, args.thinking)
project_analyzer = ProjectAnalyzer(args.path)
doc_manager = DocumentationManager(args.output)
analyzer = PythonAnalyzer()
# Check Ollama connection
print("Checking Ollama connection...")
if not doc_generator.check_ollama_connection():
print(f"Error: Cannot connect to Ollama at {args.ollama_url}")
print("Make sure Ollama is running: ollama serve")
sys.exit(1)
# Check model availability
print(f"Checking model availability: {args.model}")
if not doc_generator.check_model_availability():
print(f"Error: Model '{args.model}' is not available")
print(f"Install it with: ollama pull {args.model}")
sys.exit(1)
print(f"✓ Ollama connection established with model: {args.model}")
# Scan project
print("Scanning project...")
project_context = project_analyzer.scan_project(args.exclude)
if not project_context['all_files']:
print("No Python files found in the project")
sys.exit(1)
print(f"Found {len(project_context['all_files'])} Python files")
# Limit files if specified
files_to_process = project_context['all_files'][:args.max_files]
if len(files_to_process) < len(project_context['all_files']):
print(f"Processing first {args.max_files} files (use --max-files to change)")
# Generate documentation for each file
generated_docs = []
for i, file_path in enumerate(files_to_process, 1):
rel_path = os.path.relpath(file_path, args.path)
print(f"[{i}/{len(files_to_process)}] Documenting {rel_path}...")
# Analyze file
file_analysis = analyzer.analyze_file(file_path)
if not file_analysis:
print(f" ⚠ Skipped due to analysis error")
continue
# Generate documentation
documentation = doc_generator.generate_documentation(file_analysis, project_context) if len(file_analysis['content'].strip(" \n\t")) else ""
if not documentation:
print(f" ⚠ Failed to generate documentation" if len(file_analysis['content'].strip(" \n\t")) else " ⚠ No document generated because no code was found in the file")
continue
# Save documentation
doc_filename = doc_manager.sanitize_filename(file_path, args.path)
doc_path = doc_manager.output_dir / doc_filename
os.makedirs(os.path.dirname(doc_path), exist_ok=True)
with open(doc_path, 'w', encoding='utf-8') as f:
f.write(documentation)
generated_docs.append(doc_filename)
print(f" ✓ Generated: {doc_filename}")
# Generate index file
if generated_docs:
print("Generating index file...")
doc_manager.generate_index(project_context, generated_docs)
print(f"✓ Documentation complete! Check {args.output}/index.md")
print(f"Generated {len(generated_docs)} documentation files")
else:
print("No documentation files were generated")
if __name__ == "__main__":
main()
Loading…
Cancel
Save