diff --git a/doc_think.py b/doc_think.py new file mode 100644 index 0000000..6330a43 --- /dev/null +++ b/doc_think.py @@ -0,0 +1,610 @@ +#!/usr/bin/env python3 +""" +Python Documentation Generator using Ollama LLM +Automatically generates comprehensive markdown documentation for Python projects. +""" + +import os +import ast +import json +import argparse +import subprocess +import sys +from pathlib import Path +from typing import Dict, List, Set, Tuple, Optional +import requests +import re +from urllib.parse import quote + +class PythonAnalyzer: + """Analyzes Python files to extract structural information.""" + + def __init__(self): + self.imports = set() + self.classes = [] + self.functions = [] + self.constants = [] + self.module_docstring = None + + def analyze_file(self, file_path: str) -> Dict: + """Analyze a Python file and extract its structure.""" + try: + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + tree = ast.parse(content) + + # Reset for each file + self.imports = set() + self.classes = [] + self.functions = [] + self.constants = [] + self.module_docstring = ast.get_docstring(tree) + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + self.imports.add(alias.name) + elif isinstance(node, ast.ImportFrom): + module = node.module or "" + for alias in node.names: + self.imports.add(f"{module}.{alias.name}") + elif isinstance(node, ast.ClassDef): + self.classes.append({ + 'name': node.name, + 'bases': [ast.unparse(base) for base in node.bases], + 'docstring': ast.get_docstring(node), + 'methods': [n.name for n in node.body if isinstance(n, ast.FunctionDef)], + 'lineno': node.lineno + }) + elif isinstance(node, ast.FunctionDef): + # Only top-level functions (not methods) + parent_classes = [n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)] + is_method = False + for cls in parent_classes: + if hasattr(cls, 'body') and node in cls.body: + is_method = True + break + + if not is_method: + self.functions.append({ + 'name': node.name, + 'args': [arg.arg for arg in node.args.args], + 'docstring': ast.get_docstring(node), + 'lineno': node.lineno, + 'returns': ast.unparse(node.returns) if node.returns else None + }) + elif isinstance(node, ast.Assign): + # Top-level constants (ALL_CAPS variables) + for target in node.targets: + if isinstance(target, ast.Name) and target.id.isupper(): + self.constants.append({ + 'name': target.id, + 'value': ast.unparse(node.value), + 'lineno': node.lineno + }) + + return { + 'file_path': file_path, + 'content': content, + 'module_docstring': self.module_docstring, + 'imports': list(self.imports), + 'classes': self.classes, + 'functions': self.functions, + 'constants': self.constants, + 'lines_of_code': len(content.splitlines()) + } + + except Exception as e: + print(f"Error analyzing {file_path}: {e}") + return None + +class OllamaDocGenerator: + """Generates documentation using Ollama LLM.""" + + def __init__(self, model_name: str = "deepseek-r1:latest", ollama_url: str = "http://localhost:11434", thinking: bool = False): + self.model_name = model_name + self.ollama_url = ollama_url + self.session = requests.Session() + self.thinking = thinking + + def check_ollama_connection(self) -> bool: + """Check if Ollama is running and accessible.""" + try: + response = self.session.get(f"{self.ollama_url}/api/tags") + return response.status_code == 200 + except requests.exceptions.RequestException: + return False + + def check_model_availability(self) -> bool: + """Check if the specified model is available.""" + try: + response = self.session.get(f"{self.ollama_url}/api/tags") + if response.status_code == 200: + models = response.json().get('models', []) + return any(model['name'].startswith(self.model_name) for model in models) + return False + except requests.exceptions.RequestException: + return False + + def generate_documentation(self, file_analysis: Dict, project_context: Dict) -> str: + """Generate documentation for a single Python file.""" + + # Create comprehensive prompt with context + prompt = self.create_documentation_prompt(file_analysis, project_context) + + # Check if this is a thinking model (o1-like models) + is_thinking_model = self.thinking + try: + if is_thinking_model: + print("Thinking model chosen") + # For thinking models, use chat format and handle thinking tokens + response = self.session.post( + f"{self.ollama_url}/api/chat", + json={ + "model": self.model_name, + "messages": [ + { + "role": "user", + "content": prompt + } + ], + "stream": False, + "options": { + "temperature": 0.1, + "top_p": 0.9, + } + }, + timeout=600 # 10 minute timeout for thinking models + ) + + if response.status_code == 200: + result = response.json() + message = result.get('message', {}) + content = message.get('content', '') + # Parse and display thinking process + thinking_content, final_answer = self.parse_thinking_response(content) + + if thinking_content: + print(f" 🧠 Model thinking process:") + print(f" {thinking_content[:200]}..." if len(thinking_content) > 200 else f" {thinking_content}") + + return final_answer if final_answer else content + else: + print(f"Error generating documentation: {response.status_code}") + return None + else: + print("None thinking model chosen") + # Standard generation for regular models + response = self.session.post( + f"{self.ollama_url}/api/generate", + json={ + "model": self.model_name, + "prompt": prompt, + "stream": False, + "think": False, + "options": { + "temperature": 0.1, + "top_p": 0.9, + } + }, + timeout=300 # 5 minute timeout + ) + + if response.status_code == 200: + return response.json()['response'] + else: + print(f"Error generating documentation: {response.status_code}") + return None + + except requests.exceptions.RequestException as e: + print(f"Error communicating with Ollama: {e}") + return None + + def parse_thinking_response(self, content: str) -> Tuple[Optional[str], str]: + """Parse thinking model response to extract thinking process and final answer.""" + import re + + # Try different thinking tag patterns + thinking_patterns = [ + r'(.*?)', + r'(.*?)', + r'(.*?)', + r'(.*?)' + ] + + thinking_content = None + final_answer = content + + for pattern in thinking_patterns: + match = re.search(pattern, content, re.DOTALL) + if match: + thinking_content = match.group(1).strip() + # Remove thinking section from final answer + final_answer = re.sub(pattern, '', content, flags=re.DOTALL).strip() + break + + # If no thinking tags found, check for other patterns like "I need to think about..." + if not thinking_content: + # Look for thinking indicators at the start + thinking_indicators = [ + r'^(Let me think about.*?(?=\n\n|\n#|\nI\'ll))', + r'^(I need to analyze.*?(?=\n\n|\n#|\nI\'ll))', + r'^(First, let me understand.*?(?=\n\n|\n#|\nI\'ll))', + r'^(To document this.*?(?=\n\n|\n#|\nI\'ll))' + ] + + for pattern in thinking_indicators: + match = re.search(pattern, content, re.DOTALL | re.MULTILINE) + if match: + thinking_content = match.group(1).strip() + final_answer = content[match.end():].strip() + break + + return thinking_content, final_answer + + def create_documentation_prompt(self, file_analysis: Dict, project_context: Dict) -> str: + """Create a comprehensive prompt for documentation generation.""" + + file_path = file_analysis['file_path'] + relative_path = os.path.relpath(file_path, project_context['root_path']) + + prompt = f"""You are a technical documentation expert. Generate comprehensive markdown documentation for the Python file: `{relative_path}` + +## PROJECT CONTEXT: +- **Project Root**: {project_context['root_path']} +- **Total Python Files**: {len(project_context['all_files'])} +- **External Dependencies**: {', '.join(project_context['external_dependencies']) if project_context['external_dependencies'] else 'None detected'} +- **Project Structure**: +{self.format_project_structure(project_context['file_structure'])} + +## FILE ANALYSIS: +- **File Path**: `{relative_path}` +- **Lines of Code**: {file_analysis['lines_of_code']} +- **Module Docstring**: {file_analysis['module_docstring'] or 'None'} + +### Imports ({len(file_analysis['imports'])} total): +{chr(10).join(f'- `{imp}`' for imp in file_analysis['imports'])} + +### Classes ({len(file_analysis['classes'])} total): +{self.format_classes(file_analysis['classes'])} + +### Functions ({len(file_analysis['functions'])} total): +{self.format_functions(file_analysis['functions'])} + +### Constants ({len(file_analysis['constants'])} total): +{self.format_constants(file_analysis['constants'])} + +## RELATED FILES: +{self.format_related_files(file_analysis, project_context)} + +## FULL SOURCE CODE: +```python +{file_analysis['content']} +``` + +## DOCUMENTATION REQUIREMENTS: + +Generate a complete markdown documentation file that includes: + +1. **File Header**: Title ('Documentation ' + file), purpose, and brief description +2. **Overview**: What this module/file does and its role in the project +3. **Dependencies**: External and internal dependencies with explanations +4. **API Reference**: Detailed documentation of all classes, functions, and constants +5. **Usage Examples**: Practical code examples where applicable +6. **Cross-References**: Links to related files using relative markdown links +7. **Implementation Notes**: Architecture decisions, patterns used, etc. + +## FORMATTING GUIDELINES: +- YOUR ARE **NOT ALLOWED** TO USE markdown CODE BLOCKS! +- Use proper markdown syntax, so no **# title** or other none standard markdown features +- Be carefull with indentation +- Limite the use of unecessary newlines +- Include code blocks with syntax highlighting +- Add tables for parameter/return value documentation +- Use relative links to other documentation files: `[filename](./filename.md)` +- Include line number references where helpful +- Make it professional and comprehensive +- Focus on clarity and usefulness for developers + +Generate the complete markdown documentation now:""" + + return prompt + + def format_project_structure(self, file_structure: Dict) -> str: + """Format project structure for the prompt.""" + lines = [] + for root, dirs, files in file_structure: + level = root.replace(file_structure[0][0], '').count(os.sep) + indent = ' ' * level + lines.append(f"{indent}- {os.path.basename(root)}/") + subindent = ' ' * (level + 1) + for file in files: + if file.endswith('.py'): + lines.append(f"{subindent}- {file}") + return '\n'.join(lines[:20]) # Limit to first 20 lines + + def format_classes(self, classes: List[Dict]) -> str: + """Format class information for the prompt.""" + if not classes: + return "None" + + lines = [] + for cls in classes: + lines.append(f"- **{cls['name']}** (line {cls['lineno']})") + if cls['bases']: + lines.append(f" - Inherits from: {', '.join(cls['bases'])}") + if cls['methods']: + lines.append(f" - Methods: {', '.join(cls['methods'])}") + if cls['docstring']: + lines.append(f" - Description: {cls['docstring'][:100]}...") + return '\n'.join(lines) + + def format_functions(self, functions: List[Dict]) -> str: + """Format function information for the prompt.""" + if not functions: + return "None" + + lines = [] + for func in functions: + args_str = ', '.join(func['args']) if func['args'] else '' + lines.append(f"- **{func['name']}({args_str})** (line {func['lineno']})") + if func['returns']: + lines.append(f" - Returns: {func['returns']}") + if func['docstring']: + lines.append(f" - Description: {func['docstring'][:100]}...") + return '\n'.join(lines) + + def format_constants(self, constants: List[Dict]) -> str: + """Format constant information for the prompt.""" + if not constants: + return "None" + + lines = [] + for const in constants: + lines.append(f"- **{const['name']}** = {const['value']} (line {const['lineno']})") + return '\n'.join(lines) + + def format_related_files(self, file_analysis: Dict, project_context: Dict) -> str: + """Format related files information.""" + current_imports = set(file_analysis['imports']) + related_files = [] + + for other_file in project_context['all_files']: + if other_file != file_analysis['file_path']: + rel_path = os.path.relpath(other_file, project_context['root_path']) + module_name = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '') + + # Check if this file imports the other or vice versa + if any(imp.startswith(module_name) for imp in current_imports): + related_files.append(f"- `{rel_path}` (imported by this file)") + + return '\n'.join(related_files) if related_files else "None detected" + +class ProjectAnalyzer: + """Analyzes the entire project structure.""" + + def __init__(self, root_path: str): + self.root_path = Path(root_path).resolve() + self.python_files = [] + self.external_dependencies = set() + + def scan_project(self, exclude_dirs: List[str] = None) -> Dict: + """Scan the project and collect all Python files.""" + if exclude_dirs is None: exclude_dirs = ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules'] + else: exclude_dirs = exclude_dirs + ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules'] + + self.python_files = [] + file_structure = [] + + for root, dirs, files in os.walk(self.root_path): + # Remove excluded directories + dirs[:] = [d for d in dirs if d not in exclude_dirs] + files[:] = [f for f in files if f not in exclude_dirs] + file_structure.append((root, dirs, files)) + + for file in files: + if file.endswith('.py'): + self.python_files.append(os.path.join(root, file)) + + # Analyze dependencies + self.analyze_dependencies() + + return { + 'root_path': str(self.root_path), + 'all_files': self.python_files, + 'file_structure': file_structure, + 'external_dependencies': list(self.external_dependencies) + } + + def analyze_dependencies(self): + """Analyze external dependencies across all Python files.""" + analyzer = PythonAnalyzer() + + for file_path in self.python_files: + analysis = analyzer.analyze_file(file_path) + if analysis: + for imp in analysis['imports']: + # Check if it's an external dependency (not local) + if not self.is_local_import(imp): + self.external_dependencies.add(imp.split('.')[0]) + + def is_local_import(self, import_name: str) -> bool: + """Check if an import is local to the project.""" + # Simple heuristic: if the import starts with a relative path or matches a local file + if import_name.startswith('.'): + return True + + # Check if it matches any of our Python files + for py_file in self.python_files: + rel_path = os.path.relpath(py_file, self.root_path) + module_path = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '') + if import_name.startswith(module_path): + return True + + return False + +class DocumentationManager: + """Manages the documentation generation process.""" + + def __init__(self, output_dir: str = "./pydocs"): + self.output_dir = Path(output_dir) + self.output_dir.mkdir(exist_ok=True) + + def generate_index(self, project_context: Dict, generated_docs: List[str]): + """Generate an index.md file linking to all documentation.""" + + index_content = f"""# Project Documentation + +Auto-generated documentation for Python project: `{os.path.basename(project_context['root_path'])}` + +## Project Overview + +- **Total Python Files**: {len(project_context['all_files'])} +- **External Dependencies**: {len(project_context['external_dependencies'])} +- **Documentation Files**: {len(generated_docs)} + +## External Dependencies + +{chr(10).join(f'- `{dep}`' for dep in sorted(project_context['external_dependencies']))} + +## File Documentation + +""" + + for doc_file in sorted(generated_docs): + rel_path = os.path.relpath(doc_file.replace('.md', '.py'), '.') + doc_name = os.path.basename(doc_file) + index_content += f"- [`{rel_path}`](./{doc_name})\n" + + index_content += f""" +## Project Structure + +``` +{self.generate_tree_structure(project_context)} +``` + +--- + +*Documentation generated automatically using Ollama LLM* +""" + + with open(self.output_dir / "index.md", 'w', encoding='utf-8') as f: + f.write(index_content) + + def generate_tree_structure(self, project_context: Dict, max_depth: int = 3) -> str: + """Generate a tree-like structure of the project.""" + lines = [] + root_path = project_context['root_path'] + + for py_file in sorted(project_context['all_files']): + rel_path = os.path.relpath(py_file, root_path) + depth = rel_path.count(os.sep) + if depth <= max_depth: + indent = " " * depth + filename = os.path.basename(rel_path) + lines.append(f"{indent}{filename}") + + return '\n'.join(lines[:50]) # Limit output + + def sanitize_filename(self, file_path: str, root_path: str) -> str: + """Convert file path to a safe markdown filename.""" + rel_path = os.path.relpath(file_path, root_path) + # Replace path separators and special characters + safe_name = rel_path.replace('\\', '/').replace('.py', '.md') + return safe_name + +def main(): + parser = argparse.ArgumentParser(description="Generate documentation for Python project using Ollama") + parser.add_argument("path", help="Path to Python project directory") + parser.add_argument("--model", default="deepseek-r1:latest", help="Ollama model to use (default: deepseek-r1:latest). For thinking models use 'thinking' in name") + parser.add_argument("--thinking", action=argparse.BooleanOptionalAction, help="Does the model think", type=bool) + parser.add_argument("--output", default="./pydocs", help="Output directory for documentation (default: ./pydocs)") + parser.add_argument("--ollama-url", default="http://localhost:11434", help="Ollama server URL") + parser.add_argument("--exclude", nargs="*", default=[], help="Directories to exclude from scanning") + parser.add_argument("--max-files", type=int, default=400, help="Maximum number of files to process") + + args = parser.parse_args() + + # Validate project path + if not os.path.exists(args.path): + print(f"Error: Path '{args.path}' does not exist") + sys.exit(1) + + # Initialize components + doc_generator = OllamaDocGenerator(args.model, args.ollama_url, args.thinking) + project_analyzer = ProjectAnalyzer(args.path) + doc_manager = DocumentationManager(args.output) + analyzer = PythonAnalyzer() + + # Check Ollama connection + print("Checking Ollama connection...") + if not doc_generator.check_ollama_connection(): + print(f"Error: Cannot connect to Ollama at {args.ollama_url}") + print("Make sure Ollama is running: ollama serve") + sys.exit(1) + + # Check model availability + print(f"Checking model availability: {args.model}") + if not doc_generator.check_model_availability(): + print(f"Error: Model '{args.model}' is not available") + print(f"Install it with: ollama pull {args.model}") + sys.exit(1) + + print(f"✓ Ollama connection established with model: {args.model}") + + # Scan project + print("Scanning project...") + project_context = project_analyzer.scan_project(args.exclude) + + if not project_context['all_files']: + print("No Python files found in the project") + sys.exit(1) + + print(f"Found {len(project_context['all_files'])} Python files") + + # Limit files if specified + files_to_process = project_context['all_files'][:args.max_files] + if len(files_to_process) < len(project_context['all_files']): + print(f"Processing first {args.max_files} files (use --max-files to change)") + + # Generate documentation for each file + generated_docs = [] + + for i, file_path in enumerate(files_to_process, 1): + rel_path = os.path.relpath(file_path, args.path) + print(f"[{i}/{len(files_to_process)}] Documenting {rel_path}...") + + # Analyze file + file_analysis = analyzer.analyze_file(file_path) + if not file_analysis: + print(f" ⚠ Skipped due to analysis error") + continue + + # Generate documentation + documentation = doc_generator.generate_documentation(file_analysis, project_context) if len(file_analysis['content'].strip(" \n\t")) else "" + if not documentation: + print(f" ⚠ Failed to generate documentation" if len(file_analysis['content'].strip(" \n\t")) else " ⚠ No document generated because no code was found in the file") + continue + + # Save documentation + doc_filename = doc_manager.sanitize_filename(file_path, args.path) + doc_path = doc_manager.output_dir / doc_filename + os.makedirs(os.path.dirname(doc_path), exist_ok=True) + with open(doc_path, 'w', encoding='utf-8') as f: + f.write(documentation) + + generated_docs.append(doc_filename) + print(f" ✓ Generated: {doc_filename}") + + # Generate index file + if generated_docs: + print("Generating index file...") + doc_manager.generate_index(project_context, generated_docs) + print(f"✓ Documentation complete! Check {args.output}/index.md") + print(f"Generated {len(generated_docs)} documentation files") + else: + print("No documentation files were generated") + +if __name__ == "__main__": + main() \ No newline at end of file