#!/usr/bin/env python3
"""
Python Documentation Generator using Ollama LLM
Automatically generates comprehensive markdown documentation for Python projects.
"""

import os
import ast
import json
import argparse
import subprocess
import sys
from pathlib import Path
from typing import Dict, List, Set, Tuple, Optional
import requests
import re
from urllib.parse import quote

class PythonAnalyzer:
    """Analyzes Python files to extract structural information."""
    
    def __init__(self):
        self.imports = set()
        self.classes = []
        self.functions = []
        self.constants = []
        self.module_docstring = None
        
    def analyze_file(self, file_path: str) -> Dict:
        """Analyze a Python file and extract its structure."""
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
                
            tree = ast.parse(content)
            
            # Reset for each file
            self.imports = set()
            self.classes = []
            self.functions = []
            self.constants = []
            self.module_docstring = ast.get_docstring(tree)
            
            for node in ast.walk(tree):
                if isinstance(node, ast.Import):
                    for alias in node.names:
                        self.imports.add(alias.name)
                elif isinstance(node, ast.ImportFrom):
                    module = node.module or ""
                    for alias in node.names:
                        self.imports.add(f"{module}.{alias.name}")
                elif isinstance(node, ast.ClassDef):
                    self.classes.append({
                        'name': node.name,
                        'bases': [ast.unparse(base) for base in node.bases],
                        'docstring': ast.get_docstring(node),
                        'methods': [n.name for n in node.body if isinstance(n, ast.FunctionDef)],
                        'lineno': node.lineno
                    })
                elif isinstance(node, ast.FunctionDef):
                    # Only top-level functions (not methods)
                    parent_classes = [n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)]
                    is_method = False
                    for cls in parent_classes:
                        if hasattr(cls, 'body') and node in cls.body:
                            is_method = True
                            break
                    
                    if not is_method:
                        self.functions.append({
                            'name': node.name,
                            'args': [arg.arg for arg in node.args.args],
                            'docstring': ast.get_docstring(node),
                            'lineno': node.lineno,
                            'returns': ast.unparse(node.returns) if node.returns else None
                        })
                elif isinstance(node, ast.Assign):
                    # Top-level constants (ALL_CAPS variables)
                    for target in node.targets:
                        if isinstance(target, ast.Name) and target.id.isupper():
                            self.constants.append({
                                'name': target.id,
                                'value': ast.unparse(node.value),
                                'lineno': node.lineno
                            })
            
            return {
                'file_path': file_path,
                'content': content,
                'module_docstring': self.module_docstring,
                'imports': list(self.imports),
                'classes': self.classes,
                'functions': self.functions,
                'constants': self.constants,
                'lines_of_code': len(content.splitlines())
            }
            
        except Exception as e:
            print(f"Error analyzing {file_path}: {e}")
            return None

class OllamaDocGenerator:
    """Generates documentation using Ollama LLM."""
    
    def __init__(self, model_name: str = "deepseek-r1:latest", ollama_url: str = "http://localhost:11434", thinking: bool = False):
        self.model_name = model_name
        self.ollama_url = ollama_url
        self.session = requests.Session()
        self.thinking = thinking
        
    def check_ollama_connection(self) -> bool:
        """Check if Ollama is running and accessible."""
        try:
            response = self.session.get(f"{self.ollama_url}/api/tags")
            return response.status_code == 200
        except requests.exceptions.RequestException:
            return False
    
    def check_model_availability(self) -> bool:
        """Check if the specified model is available."""
        try:
            response = self.session.get(f"{self.ollama_url}/api/tags")
            if response.status_code == 200:
                models = response.json().get('models', [])
                return any(model['name'].startswith(self.model_name) for model in models)
            return False
        except requests.exceptions.RequestException:
            return False
    
    def generate_documentation(self, file_analysis: Dict, project_context: Dict) -> str:
        """Generate documentation for a single Python file."""
        
        # Create comprehensive prompt with context
        prompt = self.create_documentation_prompt(file_analysis, project_context)
        
        # Check if this is a thinking model (o1-like models)
        is_thinking_model = self.thinking
        try:
            if is_thinking_model:
                print("Thinking model chosen")
                # For thinking models, use chat format and handle thinking tokens
                response = self.session.post(
                    f"{self.ollama_url}/api/chat",
                    json={
                        "model": self.model_name,
                        "messages": [
                            {
                                "role": "user",
                                "content": prompt
                            }
                        ],
                        "tools": [
                            {
                                'type': 'function',
                                'function': {
                                    'name': 'analyze_file',
                                    'description': 'This tool allows you to examine other Python files in the project and it returns the same structured information you received for the current file (imports, classes, functions, constants, etc.).',
                                    'parameters': {
                                        'type': 'object',
                                        'properties': {
                                            'path': {
                                                'type': 'string',
                                                'description': 'Relative (from the root of the current project) path to the file',
                                            },
                                        },
                                        'required': ['path'],
                                    },
                                },
                            },
                        ],
                        "stream": False,
                        "options": {
                            "temperature": 0.1,
                            "top_p": 0.9, 
                        }
                    },
                    timeout=60 * 60 * 24 
                )
                
                if response.status_code == 200:
                    result = response.json()
                    tool_calls = result.get('tool_calls', [])
                    print(result, tool_calls)
                    message = result.get('message', {})
                    content = message.get('content', '')
                    # Parse and display thinking process
                    thinking_content, final_answer = self.parse_thinking_response(content)
                    
                    if thinking_content:
                        print(f"  🧠 Model thinking process:")
                        print(f"     {thinking_content[:200]}..." if len(thinking_content) > 200 else f"     {thinking_content}")
                    
                    return final_answer if final_answer else content
                else:
                    print(f"Error generating documentation: {response.status_code} {response.text}")
                    return None
            else:
                print("None thinking model chosen")
                response = self.session.post(
                    f"{self.ollama_url}/api/generate",
                    json={
                        "model": self.model_name,
                        "prompt": prompt,
                        "stream": False,
                        "think": False,
                        "options": {
                            "temperature": 0.1,
                            "top_p": 0.9,
                        }
                    },
                    timeout=60 * 60 * 12  
                )
                
                if response.status_code == 200:
                    return response.json()['response']
                else:
                    print(f"Error generating documentation: {response.status_code}")
                    return None
                
        except requests.exceptions.RequestException as e:
            print(f"Error communicating with Ollama: {e}")
            return None
    
    def parse_thinking_response(self, content: str) -> Tuple[Optional[str], str]:
        """Parse thinking model response to extract thinking process and final answer."""
        import re
        
        thinking_patterns = [
            r'<thinking>(.*?)</thinking>',
            r'<think>(.*?)</think>',
            r'<reasoning>(.*?)</reasoning>',
            r'<analysis>(.*?)</analysis>'
        ]
        
        thinking_content = None
        final_answer = content
        
        for pattern in thinking_patterns:
            match = re.search(pattern, content, re.DOTALL)
            if match:
                thinking_content = match.group(1).strip()
                final_answer = re.sub(pattern, '', content, flags=re.DOTALL).strip()
                break

        if not thinking_content:
            thinking_indicators = [
                r'^(Let me think about.*?(?=\n\n|\n#|\nI\'ll))',
                r'^(I need to analyze.*?(?=\n\n|\n#|\nI\'ll))',
                r'^(First, let me understand.*?(?=\n\n|\n#|\nI\'ll))',
                r'^(To document this.*?(?=\n\n|\n#|\nI\'ll))'
            ]
            
            for pattern in thinking_indicators:
                match = re.search(pattern, content, re.DOTALL | re.MULTILINE)
                if match:
                    thinking_content = match.group(1).strip()
                    final_answer = content[match.end():].strip()
                    break
        
        return thinking_content, final_answer
    
    def get_code_documentation(self, file_analysis: Dict, project_context: Dict):
        file_path = file_analysis['file_path']
        relative_path = os.path.relpath(file_path, project_context['root_path'])
        return f"""## PROJECT CONTEXT:
- **Project Root**: {project_context['root_path']}
- **Total Python Files**: {len(project_context['all_files'])}
- **External Dependencies**: {', '.join(project_context['external_dependencies']) if project_context['external_dependencies'] else 'None detected'}
- **Project Structure**: 
{self.format_project_structure(project_context['file_structure'])}

## FILE ANALYSIS:
- **File Path**: `{relative_path}`
- **Lines of Code**: {file_analysis['lines_of_code']}
- **Module Docstring**: {file_analysis['module_docstring'] or 'None'}

### Imports ({len(file_analysis['imports'])} total):
{chr(10).join(f'- `{imp}`' for imp in file_analysis['imports'])}

### Classes ({len(file_analysis['classes'])} total):
{self.format_classes(file_analysis['classes'])}

### Functions ({len(file_analysis['functions'])} total):
{self.format_functions(file_analysis['functions'])}

### Constants ({len(file_analysis['constants'])} total):
{self.format_constants(file_analysis['constants'])}

## RELATED FILES:
{self.format_related_files(file_analysis, project_context)}

## FULL SOURCE CODE:
```python
{file_analysis['content']}
```

"""
    def create_documentation_prompt(self, file_analysis: Dict, project_context: Dict) -> str:
        file_path = file_analysis['file_path']
        relative_path = os.path.relpath(file_path, project_context['root_path'])
        
        prompt = f"""You are a technical documentation expert. Generate comprehensive markdown documentation for the Python file: `{relative_path}`
## AVAILABLE TOOLS

You have access to a `analyze_file` tool that allows you to examine other Python files in the project. This tool returns the same structured information you received for the current file (imports, classes, functions, constants, etc.).

**When to use this tool:**
- When you need to understand how other files interact with the current file
- To verify import relationships and dependencies  
- To provide more accurate cross-references in your documentation
- To understand the broader context of classes or functions used in the current file
- TRY TO USE IT NOW!

{self.get_code_documentation(file_analysis, project_context)}
## DOCUMENTATION REQUIREMENTS:

**Generate a complete markdown documentation file that includes:**
1. **File Header**: Title ('Documentation ' + file), purpose, and brief description
2. **Overview**: What this module/file does and its role in the project
3. **Dependencies**: External and internal dependencies with explanations
4. **API Reference**: Detailed documentation of all classes, functions, and constants
5. **Usage Examples**: Practical code examples where applicable  
6. **Cross-References**: Links to related files using relative markdown links
7. **Implementation Notes**: Architecture decisions, patterns used, etc.

## FORMATTING GUIDELINES:
- YOUR ARE **NOT ALLOWED** TO USE markdown CODE BLOCKS!
- Use proper markdown syntax, so no **# title** or other none standard markdown features
- Be carefull with indentation
- Limite the use of unecessary newlines
- Include code blocks with syntax highlighting
- Add tables for parameter/return value documentation
- Use relative links to other documentation files: `[filename](./filename.md)`
- Include line number references where helpful
- Make it professional and comprehensive
- Focus on clarity and usefulness for developers

Generate the complete markdown documentation now:"""

        return prompt
    
    def format_project_structure(self, file_structure: Dict) -> str:
        """Format project structure for the prompt."""
        lines = []
        for root, dirs, files in file_structure:
            level = root.replace(file_structure[0][0], '').count(os.sep)
            indent = '  ' * level
            lines.append(f"{indent}- {os.path.basename(root)}/")
            subindent = '  ' * (level + 1)
            for file in files:
                if file.endswith('.py'):
                    lines.append(f"{subindent}- {file}")
        return '\n'.join(lines[:20])  
    
    def format_classes(self, classes: List[Dict]) -> str:
        """Format class information for the prompt."""
        if not classes:
            return "None"
        
        lines = []
        for cls in classes:
            lines.append(f"- **{cls['name']}** (line {cls['lineno']})")
            if cls['bases']:
                lines.append(f"  - Inherits from: {', '.join(cls['bases'])}")
            if cls['methods']:
                lines.append(f"  - Methods: {', '.join(cls['methods'])}")
            if cls['docstring']:
                lines.append(f"  - Description: {cls['docstring'][:100]}...")
        return '\n'.join(lines)
    
    def format_functions(self, functions: List[Dict]) -> str:
        """Format function information for the prompt."""
        if not functions:
            return "None"
            
        lines = []
        for func in functions:
            args_str = ', '.join(func['args']) if func['args'] else ''
            lines.append(f"- **{func['name']}({args_str})** (line {func['lineno']})")
            if func['returns']:
                lines.append(f"  - Returns: {func['returns']}")
            if func['docstring']:
                lines.append(f"  - Description: {func['docstring'][:100]}...")
        return '\n'.join(lines)
    
    def format_constants(self, constants: List[Dict]) -> str:
        if not constants:
            return "None"
            
        lines = []
        for const in constants:
            lines.append(f"- **{const['name']}** = {const['value']} (line {const['lineno']})")
        return '\n'.join(lines)
    
    def format_related_files(self, file_analysis: Dict, project_context: Dict) -> str:
        current_imports = set(file_analysis['imports'])
        related_files = []
        
        for other_file in project_context['all_files']:
            if other_file != file_analysis['file_path']:
                rel_path = os.path.relpath(other_file, project_context['root_path'])
                module_name = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '')
                if any(imp.startswith(module_name) for imp in current_imports):
                    related_files.append(f"- `{rel_path}` (imported by this file)")
        
        return '\n'.join(related_files) if related_files else "None detected"

class ProjectAnalyzer:
    """Analyzes the entire project structure."""
    
    def __init__(self, root_path: str):
        self.root_path = Path(root_path).resolve()
        self.python_files = []
        self.external_dependencies = set()
        
    def scan_project(self, exclude_dirs: List[str] = None) -> Dict:
        if(os.path.isdir(self.root_path)):

            if exclude_dirs is None: exclude_dirs = ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules']
            else:  exclude_dirs = exclude_dirs + ['.git', '__pycache__', '.pytest_cache', 'venv', 'env', '.venv', 'node_modules']
            
            self.python_files = []
            file_structure = []
            
            for root, dirs, files in os.walk(self.root_path):
                dirs[:] = [d for d in dirs if d not in exclude_dirs]
                files[:] = [f for f in files if f not in exclude_dirs]
                file_structure.append((root, dirs, files))
                
                for file in files:
                    if file.endswith('.py'):
                        self.python_files.append(os.path.join(root, file))
            self.analyze_dependencies()
            return {
                'root_path': str(self.root_path),
                'all_files': self.python_files,
                'file_structure': file_structure,
                'external_dependencies': list(self.external_dependencies)
            }
        else:
            self.python_files = [os.path.basename(self.root_path)]
            self.root_path = os.path.dirname(self.root_path) 
            self.analyze_dependencies()
            return {
                'root_path': str(self.root_path),
                'all_files': self.python_files,
                'file_structure': [],
                'external_dependencies': list(self.external_dependencies)
            }
    
    def analyze_dependencies(self):
        analyzer = PythonAnalyzer()
        
        for file_path in self.python_files:
            analysis = analyzer.analyze_file(file_path)
            if analysis:
                for imp in analysis['imports']:
                    if not self.is_local_import(imp):
                        self.external_dependencies.add(imp.split('.')[0])
    
    def is_local_import(self, import_name: str) -> bool:
        if import_name.startswith('.'): return True
        for py_file in self.python_files:
            rel_path = os.path.relpath(py_file, self.root_path)
            module_path = rel_path.replace('/', '.').replace('\\', '.').replace('.py', '')
            if import_name.startswith(module_path):
                return True
        
        return False

class DocumentationManager:
    
    def __init__(self, output_dir: str = "./pydocs"):
        self.output_dir = Path(output_dir)
        os.makedirs(self.output_dir, exist_ok=True)
        
    def generate_index(self, project_context: Dict, generated_docs: List[str]):
        index_content = f"""# Project Documentation

Auto-generated documentation for Python project: `{os.path.basename(project_context['root_path'])}`

## Project Overview

- **Total Python Files**: {len(project_context['all_files'])}
- **External Dependencies**: {len(project_context['external_dependencies'])}
- **Documentation Files**: {len(generated_docs)}

## External Dependencies

{chr(10).join(f'- `{dep}`' for dep in sorted(project_context['external_dependencies']))}

## File Documentation

"""
        
        for doc_file in sorted(generated_docs):
            rel_path = os.path.relpath(doc_file.replace('.md', '.py'), '.')
            doc_name = os.path.basename(doc_file)
            index_content += f"- [`{rel_path}`](./{rel_path})\n"
        
        index_content += f"""
## Project Structure

```
{self.generate_tree_structure(project_context)}
```

---

*Documentation generated automatically using Ollama LLM*
"""
        
        with open(self.output_dir / "index.md", 'w', encoding='utf-8') as f:
            f.write(index_content)
    
    def generate_tree_structure(self, project_context: Dict, max_depth: int = 5) -> str:
        """Generate a tree-like structure of the project."""
        lines = []
        root_path = project_context['root_path']
        
        for py_file in sorted(project_context['all_files']):
            rel_path = os.path.relpath(py_file, root_path)
            depth = rel_path.count(os.sep)
            if depth <= max_depth:
                indent = ("  " * depth) + "└────"
                filename = os.path.basename(rel_path)
                lines.append(f"{indent} [`{filename}`](./{rel_path})")
        
        return '\n'.join(lines[:50])  # Limit output
    
    def sanitize_filename(self, file_path: str, root_path: str) -> str:
        """Convert file path to a safe markdown filename."""
        rel_path = os.path.relpath(file_path, root_path)
        # Replace path separators and special characters
        safe_name = rel_path.replace('\\', '/').replace('.py', '.md')
        return safe_name

def main():
    parser = argparse.ArgumentParser(description="Generate documentation for Python project using Ollama")
    parser.add_argument("path", help="Path to Python project directory")
    parser.add_argument("--model", default="deepseek-r1:latest", help="Ollama model to use (default: deepseek-r1:latest). For thinking models use 'thinking' in name")
    parser.add_argument("--thinking", action=argparse.BooleanOptionalAction, help="Does the model think", type=bool)
    parser.add_argument("--output", default="./pydocs", help="Output directory for documentation (default: ./pydocs)")
    parser.add_argument("--ollama-url", default="http://localhost:11434", help="Ollama server URL")
    parser.add_argument("--exclude", nargs="*", default=[], help="Directories to exclude from scanning")
    parser.add_argument("--max-files", type=int, default=400, help="Maximum number of files to process")
    
    args = parser.parse_args()
    
    # Validate project path
    if not os.path.exists(args.path):
        print(f"Error: Path '{args.path}' does not exist")
        sys.exit(1)
    
    # Initialize components
    doc_generator = OllamaDocGenerator(args.model, args.ollama_url, args.thinking)
    project_analyzer = ProjectAnalyzer(args.path)
    doc_manager = DocumentationManager(args.output)
    analyzer = PythonAnalyzer()
    
    # Check Ollama connection
    print("Checking Ollama connection...")
    if not doc_generator.check_ollama_connection():
        print(f"Error: Cannot connect to Ollama at {args.ollama_url}")
        print("Make sure Ollama is running: ollama serve")
        sys.exit(1)
    
    # Check model availability
    print(f"Checking model availability: {args.model}")
    if not doc_generator.check_model_availability():
        print(f"Error: Model '{args.model}' is not available")
        print(f"Install it with: ollama pull {args.model}")
        sys.exit(1)
    
    print(f"✓ Ollama connection established with model: {args.model}")
    
    # Scan project
    print("Scanning project...")
    project_context = project_analyzer.scan_project(args.exclude)
    
    if not project_context['all_files']:
        print("No Python files found in the project")
        sys.exit(1)
    
    print(f"Found {len(project_context['all_files'])} Python files")
    
    # Limit files if specified
    files_to_process = project_context['all_files'][:args.max_files]
    if len(files_to_process) < len(project_context['all_files']):
        print(f"Processing first {args.max_files} files (use --max-files to change)")
    
    # Generate documentation for each file
    generated_docs = []
    
    for i, file_path in enumerate(files_to_process, 1):
        rel_path = os.path.relpath(file_path, args.path)
        print(f"[{i}/{len(files_to_process)}] Documenting {rel_path}...")
        
        # Analyze file
        file_analysis = analyzer.analyze_file(file_path)
        if not file_analysis:
            print(f"  ⚠ Skipped due to analysis error")
            continue
        
        # Generate documentation
        documentation = doc_generator.generate_documentation(file_analysis, project_context) if len(file_analysis['content'].strip(" \n\t")) else ""
        if not documentation:
            print(f"  ⚠ Failed to generate documentation" if len(file_analysis['content'].strip(" \n\t")) else "   ⚠ No document generated because no code was found in the file")
            continue
        
        # Save documentation
        doc_filename = doc_manager.sanitize_filename(file_path, args.path)
        doc_path = doc_manager.output_dir / doc_filename
        os.makedirs(os.path.dirname(doc_path), exist_ok=True)
        with open(doc_path, 'w', encoding='utf-8') as f:
            f.write(documentation)
        
        generated_docs.append(doc_filename)
        print(f"  ✓ Generated: {doc_filename}")
    
    # Generate index file
    if generated_docs:
        print("Generating index file...")
        doc_manager.generate_index(project_context, generated_docs)
        print(f"✓ Documentation complete! Check {args.output}/index.md")
        print(f"Generated {len(generated_docs)} documentation files")
    else:
        print("No documentation files were generated")

if __name__ == "__main__":
    main()