diff --git a/doc_think.py b/doc_think.py
new file mode 100644
index 0000000..6330a43
--- /dev/null
+++ b/doc_think.py
@@ -0,0 +1,610 @@
+#!/usr/bin/env python3
+"""
+Python Documentation Generator using Ollama LLM
+Automatically generates comprehensive markdown documentation for Python projects.
+"""
+
+import os
+import ast
+import json
+import argparse
+import subprocess
+import sys
+from pathlib import Path
+from typing import Dict, List, Set, Tuple, Optional
+import requests
+import re
+from urllib.parse import quote
+
+class PythonAnalyzer:
+ """Analyzes Python files to extract structural information."""
+
+ def __init__(self):
+ self.imports = set()
+ self.classes = []
+ self.functions = []
+ self.constants = []
+ self.module_docstring = None
+
+ def analyze_file(self, file_path: str) -> Dict:
+ """Analyze a Python file and extract its structure."""
+ try:
+ with open(file_path, 'r', encoding='utf-8') as f:
+ content = f.read()
+
+ tree = ast.parse(content)
+
+ # Reset for each file
+ self.imports = set()
+ self.classes = []
+ self.functions = []
+ self.constants = []
+ self.module_docstring = ast.get_docstring(tree)
+
+ for node in ast.walk(tree):
+ if isinstance(node, ast.Import):
+ for alias in node.names:
+ self.imports.add(alias.name)
+ elif isinstance(node, ast.ImportFrom):
+ module = node.module or ""
+ for alias in node.names:
+ self.imports.add(f"{module}.{alias.name}")
+ elif isinstance(node, ast.ClassDef):
+ self.classes.append({
+ 'name': node.name,
+ 'bases': [ast.unparse(base) for base in node.bases],
+ 'docstring': ast.get_docstring(node),
+ 'methods': [n.name for n in node.body if isinstance(n, ast.FunctionDef)],
+ 'lineno': node.lineno
+ })
+ elif isinstance(node, ast.FunctionDef):
+ # Only top-level functions (not methods)
+ parent_classes = [n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)]
+ is_method = False
+ for cls in parent_classes:
+ if hasattr(cls, 'body') and node in cls.body:
+ is_method = True
+ break
+
+ if not is_method:
+ self.functions.append({
+ 'name': node.name,
+ 'args': [arg.arg for arg in node.args.args],
+ 'docstring': ast.get_docstring(node),
+ 'lineno': node.lineno,
+ 'returns': ast.unparse(node.returns) if node.returns else None
+ })
+ elif isinstance(node, ast.Assign):
+ # Top-level constants (ALL_CAPS variables)
+ for target in node.targets:
+ if isinstance(target, ast.Name) and target.id.isupper():
+ self.constants.append({
+ 'name': target.id,
+ 'value': ast.unparse(node.value),
+ 'lineno': node.lineno
+ })
+
+ return {
+ 'file_path': file_path,
+ 'content': content,
+ 'module_docstring': self.module_docstring,
+ 'imports': list(self.imports),
+ 'classes': self.classes,
+ 'functions': self.functions,
+ 'constants': self.constants,
+ 'lines_of_code': len(content.splitlines())
+ }
+
+ except Exception as e:
+ print(f"Error analyzing {file_path}: {e}")
+ return None
+
+class OllamaDocGenerator:
+ """Generates documentation using Ollama LLM."""
+
+ def __init__(self, model_name: str = "deepseek-r1:latest", ollama_url: str = "http://localhost:11434", thinking: bool = False):
+ self.model_name = model_name
+ self.ollama_url = ollama_url
+ self.session = requests.Session()
+ self.thinking = thinking
+
+ def check_ollama_connection(self) -> bool:
+ """Check if Ollama is running and accessible."""
+ try:
+ response = self.session.get(f"{self.ollama_url}/api/tags")
+ return response.status_code == 200
+ except requests.exceptions.RequestException:
+ return False
+
+ def check_model_availability(self) -> bool:
+ """Check if the specified model is available."""
+ try:
+ response = self.session.get(f"{self.ollama_url}/api/tags")
+ if response.status_code == 200:
+ models = response.json().get('models', [])
+ return any(model['name'].startswith(self.model_name) for model in models)
+ return False
+ except requests.exceptions.RequestException:
+ return False
+
+ def generate_documentation(self, file_analysis: Dict, project_context: Dict) -> str:
+ """Generate documentation for a single Python file."""
+
+ # Create comprehensive prompt with context
+ prompt = self.create_documentation_prompt(file_analysis, project_context)
+
+ # Check if this is a thinking model (o1-like models)
+ is_thinking_model = self.thinking
+ try:
+ if is_thinking_model:
+ print("Thinking model chosen")
+ # For thinking models, use chat format and handle thinking tokens
+ response = self.session.post(
+ f"{self.ollama_url}/api/chat",
+ json={
+ "model": self.model_name,
+ "messages": [
+ {
+ "role": "user",
+ "content": prompt
+ }
+ ],
+ "stream": False,
+ "options": {
+ "temperature": 0.1,
+ "top_p": 0.9,
+ }
+ },
+ timeout=600 # 10 minute timeout for thinking models
+ )
+
+ if response.status_code == 200:
+ result = response.json()
+ message = result.get('message', {})
+ content = message.get('content', '')
+ # Parse and display thinking process
+ thinking_content, final_answer = self.parse_thinking_response(content)
+
+ if thinking_content:
+ print(f" 🧠Model thinking process:")
+ print(f" {thinking_content[:200]}..." if len(thinking_content) > 200 else f" {thinking_content}")
+
+ return final_answer if final_answer else content
+ else:
+ print(f"Error generating documentation: {response.status_code}")
+ return None
+ else:
+ print("None thinking model chosen")
+ # Standard generation for regular models
+ response = self.session.post(
+ f"{self.ollama_url}/api/generate",
+ json={
+ "model": self.model_name,
+ "prompt": prompt,
+ "stream": False,
+ "think": False,
+ "options": {
+ "temperature": 0.1,
+ "top_p": 0.9,
+ }
+ },
+ timeout=300 # 5 minute timeout
+ )
+
+ if response.status_code == 200:
+ return response.json()['response']
+ else:
+ print(f"Error generating documentation: {response.status_code}")
+ return None
+
+ except requests.exceptions.RequestException as e:
+ print(f"Error communicating with Ollama: {e}")
+ return None
+
+ def parse_thinking_response(self, content: str) -> Tuple[Optional[str], str]:
+ """Parse thinking model response to extract thinking process and final answer."""
+ import re
+
+ # Try different thinking tag patterns
+ thinking_patterns = [
+ r'(.*?)',
+ r'(.*?)',
+ r'(.*?)',
+ r'(.*?)'
+ ]
+
+ thinking_content = None
+ final_answer = content
+
+ for pattern in thinking_patterns:
+ match = re.search(pattern, content, re.DOTALL)
+ if match:
+ thinking_content = match.group(1).strip()
+ # Remove thinking section from final answer
+ final_answer = re.sub(pattern, '', content, flags=re.DOTALL).strip()
+ break
+
+ # If no thinking tags found, check for other patterns like "I need to think about..."
+ if not thinking_content:
+ # Look for thinking indicators at the start
+ thinking_indicators = [
+ r'^(Let me think about.*?(?=\n\n|\n#|\nI\'ll))',
+ r'^(I need to analyze.*?(?=\n\n|\n#|\nI\'ll))',
+ r'^(First, let me understand.*?(?=\n\n|\n#|\nI\'ll))',
+ r'^(To document this.*?(?=\n\n|\n#|\nI\'ll))'
+ ]
+
+ for pattern in thinking_indicators:
+ match = re.search(pattern, content, re.DOTALL | re.MULTILINE)
+ if match:
+ thinking_content = match.group(1).strip()
+ final_answer = content[match.end():].strip()
+ break
+
+ return thinking_content, final_answer
+
+ def create_documentation_prompt(self, file_analysis: Dict, project_context: Dict) -> str:
+ """Create a comprehensive prompt for documentation generation."""
+
+ file_path = file_analysis['file_path']
+ relative_path = os.path.relpath(file_path, project_context['root_path'])
+
+ prompt = f"""You are a technical documentation expert. Generate comprehensive markdown documentation for the Python file: `{relative_path}`
+
+## PROJECT CONTEXT:
+- **Project Root**: {project_context['root_path']}
+- **Total Python Files**: {len(project_context['all_files'])}
+- **External Dependencies**: {', '.join(project_context['external_dependencies']) if project_context['external_dependencies'] else 'None detected'}
+- **Project Structure**:
+{self.format_project_structure(project_context['file_structure'])}
+
+## FILE ANALYSIS:
+- **File Path**: `{relative_path}`
+- **Lines of Code**: {file_analysis['lines_of_code']}
+- **Module Docstring**: {file_analysis['module_docstring'] or 'None'}
+
+### Imports ({len(file_analysis['imports'])} total):
+{chr(10).join(f'- `{imp}`' for imp in file_analysis['imports'])}
+
+### Classes ({len(file_analysis['classes'])} total):
+{self.format_classes(file_analysis['classes'])}
+
+### Functions ({len(file_analysis['functions'])} total):
+{self.format_functions(file_analysis['functions'])}
+
+### Constants ({len(file_analysis['constants'])} total):
+{self.format_constants(file_analysis['constants'])}
+
+## RELATED FILES:
+{self.format_related_files(file_analysis, project_context)}
+
+## FULL SOURCE CODE:
+```python
+{file_analysis['content']}
+```
+
+## DOCUMENTATION REQUIREMENTS:
+
+Generate a complete markdown documentation file that includes:
+
+1. **File Header**: Title ('Documentation ' + file), purpose, and brief description
+2. **Overview**: What this module/file does and its role in the project
+3. **Dependencies**: External and internal dependencies with explanations
+4. **API Reference**: Detailed documentation of all classes, functions, and constants
+5. **Usage Examples**: Practical code examples where applicable
+6. **Cross-References**: Links to related files using relative markdown links
+7. **Implementation Notes**: Architecture decisions, patterns used, etc.
+
+## FORMATTING GUIDELINES:
+- YOUR ARE **NOT ALLOWED** TO USE markdown CODE BLOCKS!
+- Use proper markdown syntax, so no **# title** or other none standard markdown features
+- Be carefull with indentation
+- Limite the use of unecessary newlines
+- Include code blocks with syntax highlighting
+- Add tables for parameter/return value documentation
+- Use relative links to other documentation files: `[filename](./filename.md)`
+- Include line number references where helpful
+- Make it professional and comprehensive
+- Focus on clarity and usefulness for developers
+
+Generate the complete markdown documentation now:"""
+
+ return prompt
+
+ def format_project_structure(self, file_structure: Dict) -> str:
+ """Format project structure for the prompt."""
+ lines = []
+ for root, dirs, files in file_structure:
+ level = root.replace(file_structure[0][0], '').count(os.sep)
+ indent = ' ' * level
+ lines.append(f"{indent}- {os.path.basename(root)}/")
+ subindent = ' ' * (level + 1)
+ for file in files:
+ if file.endswith('.py'):
+ lines.append(f"{subindent}- {file}")
+ return '\n'.join(lines[:20]) # Limit to first 20 lines
+
+ def format_classes(self, classes: List[Dict]) -> str:
+ """Format class information for the prompt."""
+ if not classes:
+ return "None"
+
+ lines = []
+ for cls in classes:
+ lines.append(f"- **{cls['name']}** (line {cls['lineno']})")
+ if cls['bases']:
+ lines.append(f" - Inherits from: {', '.join(cls['bases'])}")
+ if cls['methods']:
+ lines.append(f" - Methods: {', '.join(cls['methods'])}")
+ if cls['docstring']:
+ lines.append(f" - Description: {cls['docstring'][:100]}...")
+ return '\n'.join(lines)
+
+ def format_functions(self, functions: List[Dict]) -> str:
+ """Format function information for the prompt."""
+ if not functions:
+ return "None"
+
+ lines = []
+ for func in functions:
+ args_str = ', '.join(func['args']) if func['args'] else ''
+ lines.append(f"- **{func['name']}({args_str})** (line {func['lineno']})")
+ if func['returns']:
+ lines.append(f" - Returns: {func['returns']}")
+ if func['docstring']:
+ lines.append(f" - Description: {func['docstring'][:100]}...")
+ return '\n'.join(lines)
+
+ def format_constants(self, constants: List[Dict]) -> str:
+ """Format constant information for the prompt."""
+ if not constants:
+ return "None"
+
+ lines = []
+ for const in constants:
+ lines.append(f"- **{const['name']}** = {const['value']} (line {const['lineno']})")
+ return '\n'.join(lines)
+
+ def format_related_files(self, file_analysis: Dict, project_context: Dict) -> str:
+ """Format related files information."""
+ current_imports = set(file_analysis['imports'])
+ related_files = []
+
+ for other_file in project_context['all_files']:
+ if other_file != file_analysis['file_path']:
+ rel_path = os.path.relpath(other_file, project_context['root_path'])
+ module_name = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '')
+
+ # Check if this file imports the other or vice versa
+ if any(imp.startswith(module_name) for imp in current_imports):
+ related_files.append(f"- `{rel_path}` (imported by this file)")
+
+ return '\n'.join(related_files) if related_files else "None detected"
+
+class ProjectAnalyzer:
+ """Analyzes the entire project structure."""
+
+ def __init__(self, root_path: str):
+ self.root_path = Path(root_path).resolve()
+ self.python_files = []
+ self.external_dependencies = set()
+
+ def scan_project(self, exclude_dirs: List[str] = None) -> Dict:
+ """Scan the project and collect all Python files."""
+ if exclude_dirs is None: exclude_dirs = ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules']
+ else: exclude_dirs = exclude_dirs + ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules']
+
+ self.python_files = []
+ file_structure = []
+
+ for root, dirs, files in os.walk(self.root_path):
+ # Remove excluded directories
+ dirs[:] = [d for d in dirs if d not in exclude_dirs]
+ files[:] = [f for f in files if f not in exclude_dirs]
+ file_structure.append((root, dirs, files))
+
+ for file in files:
+ if file.endswith('.py'):
+ self.python_files.append(os.path.join(root, file))
+
+ # Analyze dependencies
+ self.analyze_dependencies()
+
+ return {
+ 'root_path': str(self.root_path),
+ 'all_files': self.python_files,
+ 'file_structure': file_structure,
+ 'external_dependencies': list(self.external_dependencies)
+ }
+
+ def analyze_dependencies(self):
+ """Analyze external dependencies across all Python files."""
+ analyzer = PythonAnalyzer()
+
+ for file_path in self.python_files:
+ analysis = analyzer.analyze_file(file_path)
+ if analysis:
+ for imp in analysis['imports']:
+ # Check if it's an external dependency (not local)
+ if not self.is_local_import(imp):
+ self.external_dependencies.add(imp.split('.')[0])
+
+ def is_local_import(self, import_name: str) -> bool:
+ """Check if an import is local to the project."""
+ # Simple heuristic: if the import starts with a relative path or matches a local file
+ if import_name.startswith('.'):
+ return True
+
+ # Check if it matches any of our Python files
+ for py_file in self.python_files:
+ rel_path = os.path.relpath(py_file, self.root_path)
+ module_path = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '')
+ if import_name.startswith(module_path):
+ return True
+
+ return False
+
+class DocumentationManager:
+ """Manages the documentation generation process."""
+
+ def __init__(self, output_dir: str = "./pydocs"):
+ self.output_dir = Path(output_dir)
+ self.output_dir.mkdir(exist_ok=True)
+
+ def generate_index(self, project_context: Dict, generated_docs: List[str]):
+ """Generate an index.md file linking to all documentation."""
+
+ index_content = f"""# Project Documentation
+
+Auto-generated documentation for Python project: `{os.path.basename(project_context['root_path'])}`
+
+## Project Overview
+
+- **Total Python Files**: {len(project_context['all_files'])}
+- **External Dependencies**: {len(project_context['external_dependencies'])}
+- **Documentation Files**: {len(generated_docs)}
+
+## External Dependencies
+
+{chr(10).join(f'- `{dep}`' for dep in sorted(project_context['external_dependencies']))}
+
+## File Documentation
+
+"""
+
+ for doc_file in sorted(generated_docs):
+ rel_path = os.path.relpath(doc_file.replace('.md', '.py'), '.')
+ doc_name = os.path.basename(doc_file)
+ index_content += f"- [`{rel_path}`](./{doc_name})\n"
+
+ index_content += f"""
+## Project Structure
+
+```
+{self.generate_tree_structure(project_context)}
+```
+
+---
+
+*Documentation generated automatically using Ollama LLM*
+"""
+
+ with open(self.output_dir / "index.md", 'w', encoding='utf-8') as f:
+ f.write(index_content)
+
+ def generate_tree_structure(self, project_context: Dict, max_depth: int = 3) -> str:
+ """Generate a tree-like structure of the project."""
+ lines = []
+ root_path = project_context['root_path']
+
+ for py_file in sorted(project_context['all_files']):
+ rel_path = os.path.relpath(py_file, root_path)
+ depth = rel_path.count(os.sep)
+ if depth <= max_depth:
+ indent = " " * depth
+ filename = os.path.basename(rel_path)
+ lines.append(f"{indent}{filename}")
+
+ return '\n'.join(lines[:50]) # Limit output
+
+ def sanitize_filename(self, file_path: str, root_path: str) -> str:
+ """Convert file path to a safe markdown filename."""
+ rel_path = os.path.relpath(file_path, root_path)
+ # Replace path separators and special characters
+ safe_name = rel_path.replace('\\', '/').replace('.py', '.md')
+ return safe_name
+
+def main():
+ parser = argparse.ArgumentParser(description="Generate documentation for Python project using Ollama")
+ parser.add_argument("path", help="Path to Python project directory")
+ parser.add_argument("--model", default="deepseek-r1:latest", help="Ollama model to use (default: deepseek-r1:latest). For thinking models use 'thinking' in name")
+ parser.add_argument("--thinking", action=argparse.BooleanOptionalAction, help="Does the model think", type=bool)
+ parser.add_argument("--output", default="./pydocs", help="Output directory for documentation (default: ./pydocs)")
+ parser.add_argument("--ollama-url", default="http://localhost:11434", help="Ollama server URL")
+ parser.add_argument("--exclude", nargs="*", default=[], help="Directories to exclude from scanning")
+ parser.add_argument("--max-files", type=int, default=400, help="Maximum number of files to process")
+
+ args = parser.parse_args()
+
+ # Validate project path
+ if not os.path.exists(args.path):
+ print(f"Error: Path '{args.path}' does not exist")
+ sys.exit(1)
+
+ # Initialize components
+ doc_generator = OllamaDocGenerator(args.model, args.ollama_url, args.thinking)
+ project_analyzer = ProjectAnalyzer(args.path)
+ doc_manager = DocumentationManager(args.output)
+ analyzer = PythonAnalyzer()
+
+ # Check Ollama connection
+ print("Checking Ollama connection...")
+ if not doc_generator.check_ollama_connection():
+ print(f"Error: Cannot connect to Ollama at {args.ollama_url}")
+ print("Make sure Ollama is running: ollama serve")
+ sys.exit(1)
+
+ # Check model availability
+ print(f"Checking model availability: {args.model}")
+ if not doc_generator.check_model_availability():
+ print(f"Error: Model '{args.model}' is not available")
+ print(f"Install it with: ollama pull {args.model}")
+ sys.exit(1)
+
+ print(f"✓ Ollama connection established with model: {args.model}")
+
+ # Scan project
+ print("Scanning project...")
+ project_context = project_analyzer.scan_project(args.exclude)
+
+ if not project_context['all_files']:
+ print("No Python files found in the project")
+ sys.exit(1)
+
+ print(f"Found {len(project_context['all_files'])} Python files")
+
+ # Limit files if specified
+ files_to_process = project_context['all_files'][:args.max_files]
+ if len(files_to_process) < len(project_context['all_files']):
+ print(f"Processing first {args.max_files} files (use --max-files to change)")
+
+ # Generate documentation for each file
+ generated_docs = []
+
+ for i, file_path in enumerate(files_to_process, 1):
+ rel_path = os.path.relpath(file_path, args.path)
+ print(f"[{i}/{len(files_to_process)}] Documenting {rel_path}...")
+
+ # Analyze file
+ file_analysis = analyzer.analyze_file(file_path)
+ if not file_analysis:
+ print(f" âš Skipped due to analysis error")
+ continue
+
+ # Generate documentation
+ documentation = doc_generator.generate_documentation(file_analysis, project_context) if len(file_analysis['content'].strip(" \n\t")) else ""
+ if not documentation:
+ print(f" âš Failed to generate documentation" if len(file_analysis['content'].strip(" \n\t")) else " âš No document generated because no code was found in the file")
+ continue
+
+ # Save documentation
+ doc_filename = doc_manager.sanitize_filename(file_path, args.path)
+ doc_path = doc_manager.output_dir / doc_filename
+ os.makedirs(os.path.dirname(doc_path), exist_ok=True)
+ with open(doc_path, 'w', encoding='utf-8') as f:
+ f.write(documentation)
+
+ generated_docs.append(doc_filename)
+ print(f" ✓ Generated: {doc_filename}")
+
+ # Generate index file
+ if generated_docs:
+ print("Generating index file...")
+ doc_manager.generate_index(project_context, generated_docs)
+ print(f"✓ Documentation complete! Check {args.output}/index.md")
+ print(f"Generated {len(generated_docs)} documentation files")
+ else:
+ print("No documentation files were generated")
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file