import warnings
warnings.filterwarnings("ignore", category=Warning, message=".*urllib3 v2 only supports OpenSSL.*")
import os
import re
import json
import time
import uuid
import tempfile
import shutil
import urllib.parse
import concurrent.futures
import threading
from datetime import datetime
from pathlib import Path
from functools import wraps
from threading import Lock
from concurrent.futures import ThreadPoolExecutor
import requests
import difflib
from bs4 import BeautifulSoup
import zipfile
from flask import Flask, render_template, request, redirect, url_for, jsonify, flash, abort
from apscheduler.schedulers.background import BackgroundScheduler
from apscheduler.triggers.interval import IntervalTrigger
import secrets
from werkzeug.utils import secure_filename
import logging
from logging.handlers import RotatingFileHandler
from flask_limiter import Limiter
from flask_limiter.util import get_remote_address
import atexit
import argparse
import hashlib
import warnings
import random
import socket


BASIC_AUTH_USERNAME = None
BASIC_AUTH_PASSWORD = None

VALID_AI_SERVICES = frozenset(['ollama', 'openai', 'deepseek', 'claude'])
VALID_SOURCES = frozenset(['library_auto', 'products', 'folder', 'direct'])

PRODUCTS_DIR = os.path.join(os.path.dirname(__file__), 'products')
AI_CONFIG_FILE = 'ai_config.json'
PRODUCTS_FILE = os.path.join(PRODUCTS_DIR, 'products.json')
LIBRARY_FILE = os.path.join(PRODUCTS_DIR, 'library.json')
SAVED_ANALYSES_DIR = os.path.join(os.path.dirname(__file__), 'saved_analyses')

DEFAULT_TIMEOUT = None
CVE_FETCH_TIMEOUT = 60
MAX_RETRY_ATTEMPTS = 3
RATE_LIMIT_BACKOFF_BASE = 2
MAX_BACKOFF_TIME = 60

MAX_FILE_SIZE = 10 * 1024 * 1024
MAX_DIFF_FILE_SIZE = 100 * 1024 * 1024

DEFAULT_AI_SERVICE = 'ollama'
DEFAULT_TEMPERATURE = 1.0
DEFAULT_CONTEXT_SIZE = 8192
DEFAULT_MAX_TOKENS = 1000

AI_SERVICE_CONFIGS = {
    'ollama': {
        'url': 'http://localhost:11434',
        'model': 'qwen2.5-coder:3b',
        'endpoint': '/api/generate'
    },
    'openai': {
        'base_url': 'https://api.openai.com/v1',
        'model': 'gpt-4-turbo',
        'endpoint': '/chat/completions'
    },
    'deepseek': {
        'base_url': 'https://api.deepseek.com/v1',
        'model': 'deepseek-chat',
        'endpoint': '/chat/completions'
    },
    'claude': {
        'base_url': 'https://api.anthropic.com/v1',
        'model': 'claude-3-opus-20240229',
        'endpoint': '/messages',
        'version': '2023-06-01'
    }
}

DEFAULT_PROMPTS = {
    'main_analysis': """Analyze the provided code diff for security fixes.

Instructions:
1. Your answer MUST strictly follow the answer format outlined below.
2. Always include the vulnerability name if one exists.
3. There may be multiple vulnerabilities. For each, provide a separate entry following the structure.
4. Even if you are uncertain whether a vulnerability exists, follow the structure and indicate your uncertainty.

Answer Format for Each Vulnerability:
    Vulnerability Existed: [yes/no/not sure]
    [Vulnerability Name] [File] [Lines]
    [Old Code]
    [Fixed Code]

Additional Details:
    File: {file_path}
    Diff Content:
    {diff_content}""",
    'cve_analysis': """Analysis:
{ai_response}

Question: Do any of the vulnerabilities identified in the analysis match the description?
Reply strictly in this format: 'Description Matches: Yes/No' 

Description:
{cve_description}"""
}

HTTP_OK = 200
HTTP_RATE_LIMITED = 429
HTTP_UNAUTHORIZED = 401

USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:137.0) Gecko/20100101 Firefox/137.0"

DEFAULT_HOST = "127.0.0.1"
DEFAULT_PORT = None

app = Flask(__name__)
app.secret_key = secrets.token_urlsafe(32)

limiter = Limiter(
    key_func=get_remote_address,
    default_limits=["200 per day", "50 per hour"],
    storage_uri="memory://"
)
limiter.init_app(app)

if not app.debug:
    if not os.path.exists('logs'):
        os.mkdir('logs')
    file_handler = RotatingFileHandler('logs/patchleaks.log', maxBytes=10240, backupCount=10)
    file_handler.setFormatter(logging.Formatter(
        '%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]'
    ))
    file_handler.setLevel(logging.INFO)
    app.logger.addHandler(file_handler)
    app.logger.setLevel(logging.INFO)
    app.logger.info('PatchLeaks startup')

scheduler = BackgroundScheduler()
scheduler.start()
atexit.register(lambda: scheduler.shutdown())

os.makedirs(SAVED_ANALYSES_DIR, exist_ok=True)
os.chmod(SAVED_ANALYSES_DIR, 0o755)

def check_basic_auth(username, password):
    return username == BASIC_AUTH_USERNAME and password == BASIC_AUTH_PASSWORD

def requires_basic_auth(f):
    @wraps(f)
    def decorated_function(*args, **kwargs):
        auth = request.authorization
        if not auth or not check_basic_auth(auth.username, auth.password):
            return ('Authentication required', 401, {
                'WWW-Authenticate': 'Basic realm="Login Required"'
            })
        return f(*args, **kwargs)
    return decorated_function

def conditional_auth(methods_to_protect):
    def decorator(f):
        @wraps(f)
        def decorated_function(*args, **kwargs):
            if request.method in methods_to_protect:
                auth = request.authorization
                if not auth or not check_basic_auth(auth.username, auth.password):
                    return ('Authentication required', 401, {
                        'WWW-Authenticate': 'Basic realm="Login Required"'
                    })
            return f(*args, **kwargs)
        return decorated_function
    return decorator

def validate_input(input_str, max_length=1000, pattern=None):
    if not input_str:
        return ""
    clean_input = re.sub(r'[\x00-\x1f\x7f-\x9f]', '', str(input_str))
    if len(clean_input) > max_length:
        clean_input = clean_input[:max_length]
    if pattern and not re.match(pattern, clean_input):
        return ""
    return clean_input.strip()

def validate_prompt(prompt_str, max_length=5000):
    if not prompt_str:
        return ""
    clean_prompt = re.sub(r'[\x00\x08\x0B\x0C\x0E-\x1F\x7F-\x9F]', '', str(prompt_str))
    if len(clean_prompt) > max_length:
        clean_prompt = clean_prompt[:max_length]
    return clean_prompt.strip()

def validate_url(url):
    if not url:
        return False
    try:
        parsed = urllib.parse.urlparse(url)
        return (parsed.scheme in ['http', 'https'] and 
                parsed.netloc in ['github.com', 'www.github.com'] and
                len(parsed.path) > 1)
    except Exception:
        return False

def validate_version(version):
    if not version:
        return False
    return re.match(r'^[a-zA-Z0-9._-]+$', version) and len(version) <= 50

def validate_filename(filename):
    if not filename:
        return False
    return '..' not in filename and not filename.startswith('/') and ':' not in filename and len(filename) <= 255

def validate_uuid(uuid_str):
    try:
        uuid.UUID(uuid_str)
        return True
    except ValueError:
        return False

def load_json_safe(filepath, default=None):
    try:
        with open(filepath, 'r') as f:
            data = json.load(f)
        return data if isinstance(data, (dict, list)) else default
    except (FileNotFoundError, json.JSONDecodeError, PermissionError):
        return default

def save_json_safe(filepath, data):
    try:
        with open(filepath, 'w') as f:
            json.dump(data, f, indent=2)
        return True
    except Exception as e:
        app.logger.error(f"Failed to save {filepath}: {str(e)}")
        return False

def load_library():
    data = load_json_safe(LIBRARY_FILE, [])
    return [repo for repo in data if isinstance(repo, dict)] if isinstance(data, list) else []

def save_library(library_data):
    if not isinstance(library_data, list):
        raise ValueError("Invalid library data format")
    if not save_json_safe(LIBRARY_FILE, library_data):
        raise Exception("Failed to save library data")

def add_library_repo(name, repo_url, ai_service):
    name = validate_input(name, 100, r'^[a-zA-Z0-9._-]+$')
    if not name:
        return False, "Invalid repository name"
    if not validate_url(repo_url):
        return False, "Invalid repository URL"
    ai_service = ai_service if ai_service in VALID_AI_SERVICES else 'ollama'
    
    library = load_library()
    if any(repo.get('name') == name for repo in library):
        return False, "Repository already exists"
    
    library.append({
        'id': str(uuid.uuid4()),
        'name': name,
        'repo_url': repo_url,
        'ai_service': ai_service,
        'created_at': datetime.now().isoformat(),
        'last_checked': None,
        'last_version': None,
        'auto_scan': True
    })
    
    save_library(library)
    return True, "Repository added successfully"

def remove_library_repo(repo_id):
    if not validate_uuid(repo_id):
        return False
    library = load_library()
    library = [repo for repo in library if repo.get('id') != repo_id]
    save_library(library)
    return True

def update_library_repo(repo_id, **kwargs):
    if not validate_uuid(repo_id):
        return False
    library = load_library()
    allowed_fields = frozenset(['last_checked', 'last_version', 'auto_scan'])
    for repo in library:
        if repo.get('id') == repo_id:
            for key, value in kwargs.items():
                if key in allowed_fields:
                    repo[key] = value
            break
    save_library(library)
    return True

def check_for_new_versions():
    try:
        app.logger.info("Checking for new versions in library")
        library = load_library()
        
        for repo in library:
            if not repo.get('auto_scan', True):
                continue
            
            try:
                repo_url = repo.get('repo_url')
                if not validate_url(repo_url):
                    continue
                
                versions = get_github_versions(repo_url)
                if versions:
                    latest_version = versions[0]
                    if repo.get('last_version') != latest_version:
                        app.logger.info(f"New version detected for {repo.get('name')}: {latest_version}")
                        update_library_repo(repo['id'], 
                                          last_version=latest_version,
                                          last_checked=datetime.now().isoformat())
                        if repo.get('last_version') is not None:
                            trigger_auto_analysis(repo, repo['last_version'], latest_version)
                        else:
                            update_library_repo(repo['id'], last_version=latest_version)
                    else:
                        update_library_repo(repo['id'], last_checked=datetime.now().isoformat())
            except Exception as e:
                app.logger.error(f"Error checking versions for {repo.get('name')}: {str(e)}")
    except Exception as e:
        app.logger.error(f"Error in check_for_new_versions: {str(e)}")

def trigger_auto_analysis(repo, old_version, new_version):
    try:
        if not validate_version(old_version) or not validate_version(new_version):
            return
        
        params = {
            'repo_name': validate_input(repo.get('name', ''), 100),
            'repo_url': repo.get('repo_url'),
            'old_version': old_version,
            'new_version': new_version,
            'ai_service': validate_input(repo.get('ai_service', 'ollama'), 50),
            'extension': None,
            'enable_ai': 'on',
            'special_keywords': '',
            'cve_ids': ''
        }
        
        analysis_id = create_new_analysis_record(params, source='library_auto', ai_enabled=True)
        threading.Thread(target=run_library_analysis_background, args=(analysis_id, params)).start()
        app.logger.info(f"Auto-analysis triggered for {repo.get('name')} ({old_version} → {new_version})")
    except Exception as e:
        app.logger.error(f"Failed to trigger auto-analysis for {repo.get('name')}: {str(e)}")

def create_new_analysis_record(params, source, ai_enabled):
    analysis_id = str(uuid.uuid4())
    source = source if source in VALID_SOURCES else 'direct'
    
    ai_service = None
    ai_model = None
    if ai_enabled:
        config = load_ai_config()
        ai_service = config.get('service', 'ollama')
        if ai_service in config:
            ai_model = config[ai_service].get('model', 'Unknown')
    
    analysis_data = {
        'meta': {
            'created_at': datetime.now().isoformat(),
            'source': source,
            'ai_enabled': bool(ai_enabled),
            'ai_service': ai_service,
            'ai_model': ai_model,
            'params': params,
            'status': 'running'
        },
        'results': {}
    }
    
    analysis_path = os.path.join(SAVED_ANALYSES_DIR, f"{analysis_id}.json")
    try:
        with open(analysis_path, 'w') as f:
            json.dump(analysis_data, f, indent=2)
        os.chmod(analysis_path, 0o644)
    except Exception as e:
        app.logger.error(f"Failed to create analysis record: {str(e)}")
        raise
    
    return analysis_id

def run_library_analysis_background(analysis_id, params):
    try:
        analysis_path = os.path.join(SAVED_ANALYSES_DIR, f"{analysis_id}.json")
        repo_name = params['repo_name']
        repo_url = params['repo_url']
        old_ver = params['old_version']
        new_ver = params['new_version']

        products_data = load_json_safe(PRODUCTS_FILE, {})
        
        product_name = repo_name.lower().replace('.', '').replace(' ', '').replace('-', '').replace('_', '')
        
        if repo_name in products_data:
            product_name = repo_name
            app.logger.info(f"Found {repo_name} in products file, using exact name")
        else:
            app.logger.info(f"Using transformed product_name: {product_name} for repo: {repo_name}")
        
        download_dir = os.path.join(PRODUCTS_DIR, f"{product_name}_downloads")
        product_versions_file = os.path.join(PRODUCTS_DIR, f"{product_name}.json")
        
        app.logger.info(f"Library analysis using product_name: {product_name}, repo_name: {repo_name}")
        os.makedirs(download_dir, exist_ok=True)

        versions = load_json_safe(product_versions_file, [])

        for ver in [old_ver, new_ver]:
            if not any(v['version'] == ver for v in versions):
                try:
                    zip_url = f"{repo_url}/archive/refs/tags/{ver}.zip"
                    response = requests.get(zip_url, stream=True)
                    response.raise_for_status()

                    zip_path = os.path.join(download_dir, f"{ver}.zip")
                    extract_path = os.path.join(download_dir, ver)

                    with open(zip_path, 'wb') as f:
                        for chunk in response.iter_content(chunk_size=8192):
                            f.write(chunk)

                    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                        zip_ref.extractall(extract_path)

                    os.remove(zip_path)
                    final_path = os.path.join(extract_path, os.listdir(extract_path)[0])

                    versions.append({
                        'version': ver,
                        'path': final_path,
                        'timestamp': datetime.now().isoformat()
                    })
                except Exception as e:
                    app.logger.error(f"Failed to download version {ver} for {repo_name}: {str(e)}")
                    continue

        save_json_safe(product_versions_file, versions)

        try:
            old_path = next(v['path'] for v in versions if v['version'] == old_ver)
            new_path = next(v['path'] for v in versions if v['version'] == new_ver)
        except StopIteration:
            app.logger.error(f"Could not find downloaded versions for {repo_name}. Available versions: {[v['version'] for v in versions]}")
            raise Exception(f"Required versions {old_ver} and/or {new_ver} not found in downloaded versions")

        app.logger.info(f"Starting folder comparison for {repo_name}: {old_path} -> {new_path}")
        special_keywords = params['special_keywords'].split(',') if params['special_keywords'] and params['special_keywords'].strip() else None
        compare_folders(old_path, new_path, params.get('extension'), special_keywords)
        diffs = parse_diff_file("special.txt")
        app.logger.info(f"Found {len(diffs)} diff files for {repo_name}")
        analyzed_results = analyze_diffs_with_keywords(diffs, special_keywords)
        app.logger.info(f"Analyzed {len(analyzed_results)} files for {repo_name}")

        if params['enable_ai'] == 'on' and analyzed_results:
            original_config = load_ai_config()
            temp_config = original_config.copy()
            temp_config['service'] = params['ai_service']

            with open(AI_CONFIG_FILE, 'w') as f:
                json.dump(temp_config, f)

            try:
                analyzed_results = process_ai_analysis(analyzed_results, diffs, params['cve_ids'])
            finally:
                with open(AI_CONFIG_FILE, 'w') as f:
                    json.dump(original_config, f)

        with open(analysis_path, 'r') as f:
            analysis_data = json.load(f)

        analysis_data['meta']['status'] = 'completed'
        analysis_data['results'] = analyzed_results
        
        if params['enable_ai'] == 'on':
            config = load_ai_config()
            ai_service = params.get('ai_service', config.get('service', 'ollama'))
            analysis_data['meta']['ai_service'] = ai_service
            if ai_service in config:
                analysis_data['meta']['ai_model'] = config[ai_service].get('model', 'Unknown')
        
        with open(analysis_path, 'w') as f:
            json.dump(analysis_data, f, indent=2)

        app.logger.info(f"Auto-analysis completed for {repo_name}")

    except Exception as e:
        try:
            with open(analysis_path, 'r') as f:
                analysis_data = json.load(f)
            analysis_data['meta']['status'] = 'failed'
            analysis_data['meta']['error'] = str(e)
            with open(analysis_path, 'w') as f:
                json.dump(analysis_data, f, indent=2)
        except:
            pass

scheduler.add_job(
    func=check_for_new_versions,
    trigger=IntervalTrigger(minutes=30),
    id='version_checker',
    name='Check for new versions',
    replace_existing=True
)

def load_ai_config():
    default = {
        'service': DEFAULT_AI_SERVICE,
        'ollama': {
            'url': AI_SERVICE_CONFIGS['ollama']['url'],
            'model': AI_SERVICE_CONFIGS['ollama']['model']
        },
        'openai': {
            'key': '',
            'model': AI_SERVICE_CONFIGS['openai']['model'],
            'base_url': AI_SERVICE_CONFIGS['openai']['base_url']
        },
        'deepseek': {
            'key': '',
            'model': AI_SERVICE_CONFIGS['deepseek']['model'],
            'base_url': AI_SERVICE_CONFIGS['deepseek']['base_url']
        },
        'claude': {
            'key': '',
            'model': AI_SERVICE_CONFIGS['claude']['model'],
            'base_url': AI_SERVICE_CONFIGS['claude']['base_url']
        },
        'parameters': {
            'temperature': DEFAULT_TEMPERATURE,
            'num_ctx': DEFAULT_CONTEXT_SIZE
        },
        'prompts': DEFAULT_PROMPTS.copy()
    }
    
    config = load_json_safe(AI_CONFIG_FILE, default)
    if not isinstance(config, dict):
        return default
    
    for service in ['openai', 'deepseek', 'claude']:
        if service not in config:
            config[service] = default[service]
        else:
            for key, value in default[service].items():
                if key not in config[service] or config[service][key] is None:
                    config[service][key] = value
    
    if 'prompts' not in config:
        config['prompts'] = default['prompts']
    else:
        for key, value in default['prompts'].items():
            if key not in config['prompts']:
                config['prompts'][key] = value
    
    return config

class AIServiceClient:
    
    def __init__(self, config):
        self.config = config
        self.service = config.get('service', DEFAULT_AI_SERVICE)
        self.timeout = DEFAULT_TIMEOUT
        self.max_retries = MAX_RETRY_ATTEMPTS
    
    def _calculate_safe_backoff(self, retry_count):
        try:
            if retry_count <= 10:
                backoff_time = min(RATE_LIMIT_BACKOFF_BASE ** retry_count, MAX_BACKOFF_TIME)
            else:
                backoff_time = min(retry_count * RATE_LIMIT_BACKOFF_BASE, MAX_BACKOFF_TIME)            
            return min(backoff_time, MAX_BACKOFF_TIME)
        except (OverflowError, ValueError):
            return MAX_BACKOFF_TIME
    
    def _get_ollama_request(self, prompt, temperature, max_tokens):
        request_data = {
            'url': f"{self.config['ollama']['url']}{AI_SERVICE_CONFIGS['ollama']['endpoint']}",
            'json': {
                'model': self.config['ollama']['model'],
                'prompt': prompt,
                'stream': False,
                'options': {
                    'temperature': temperature,
                    'num_ctx': max_tokens
                }
            }
        }
        if self.timeout is not None:
            request_data['timeout'] = self.timeout
        return request_data
    
    def _get_openai_request(self, prompt, temperature, max_tokens):
        request_data = {
            'url': f"{self.config['openai']['base_url']}{AI_SERVICE_CONFIGS['openai']['endpoint']}",
            'headers': {'Authorization': f"Bearer {self.config['openai']['key']}"},
            'json': {
                'model': self.config['openai']['model'],
                'messages': [{'role': 'user', 'content': prompt}],
                'temperature': temperature,
                'max_tokens': max_tokens
            }
        }
        if self.timeout is not None:
            request_data['timeout'] = self.timeout
        return request_data
    
    def _get_deepseek_request(self, prompt, temperature, max_tokens):
        request_data = {
            'url': f"{self.config['deepseek']['base_url']}{AI_SERVICE_CONFIGS['deepseek']['endpoint']}",
            'headers': {'Authorization': f"Bearer {self.config['deepseek']['key']}"},
            'json': {
                'model': self.config['deepseek']['model'],
                'messages': [{'role': 'user', 'content': prompt}],
                'temperature': temperature,
                'max_tokens': max_tokens
            }
        }
        if self.timeout is not None:
            request_data['timeout'] = self.timeout
        return request_data
    
    def _get_claude_request(self, prompt, temperature, max_tokens):
        request_data = {
            'url': f"{self.config['claude']['base_url']}{AI_SERVICE_CONFIGS['claude']['endpoint']}",
            'headers': {
                'x-api-key': self.config['claude']['key'],
                'anthropic-version': AI_SERVICE_CONFIGS['claude']['version']
            },
            'json': {
                'model': self.config['claude']['model'],
                'max_tokens': max_tokens,
                'temperature': temperature,
                'messages': [{'role': 'user', 'content': prompt}]
            }
        }
        if self.timeout is not None:
            request_data['timeout'] = self.timeout
        return request_data
    
    def _parse_response(self, response, service):
        if not response.ok:
            return f"Error: {response.text}"
        
        try:
            if service == 'ollama':
                return response.json().get('response', 'No AI response')
            elif service in ['openai', 'deepseek']:
                return response.json()['choices'][0]['message']['content']
            elif service == 'claude':
                return response.json()['content'][0]['text']
            else:
                return "Unsupported AI service"
        except (KeyError, IndexError, ValueError) as e:
            return f"Error parsing response: {str(e)}"
    
    def _handle_rate_limiting(self, response, retry_count):
        if response.status_code == HTTP_RATE_LIMITED:
            backoff_time = self._calculate_safe_backoff(retry_count)
            time.sleep(backoff_time)
            return True
        return False
    
    def generate_response(self, prompt):
        temperature = self.config['parameters']['temperature']
        max_tokens = self.config['parameters']['num_ctx']
        
        for retry_count in range(self.max_retries):
            try:
                if self.service == 'ollama':
                    request_data = self._get_ollama_request(prompt, temperature, max_tokens)
                elif self.service == 'openai':
                    request_data = self._get_openai_request(prompt, temperature, max_tokens)
                elif self.service == 'deepseek':
                    request_data = self._get_deepseek_request(prompt, temperature, max_tokens)
                elif self.service == 'claude':
                    request_data = self._get_claude_request(prompt, temperature, max_tokens)
                else:
                    return "Invalid AI service configuration"
                
                response = requests.post(**request_data)
                
                if self._handle_rate_limiting(response, retry_count):
                    continue
                
                return self._parse_response(response, self.service)
                
            except requests.exceptions.RequestException as e:
                if retry_count == self.max_retries - 1:
                    return f"Connection failed: {str(e)}"
                backoff_time = self._calculate_safe_backoff(retry_count)
                time.sleep(backoff_time)
            except Exception as e:
                return f"Unexpected error: {str(e)}"
        
        return "Maximum retry attempts exceeded"

def get_ai_analysis(file_path, diff_content):
    config = load_ai_config()
    prompt = config['prompts']['main_analysis'].format(file_path=file_path, diff_content=diff_content)
    
    client = AIServiceClient(config)
    return client.generate_response(prompt)

def get_cve_description(cve_id):
    try:
        url = f"https://nvd.nist.gov/vuln/detail/{cve_id}"
        headers = {"User-Agent": USER_AGENT}
        response = requests.get(url, timeout=CVE_FETCH_TIMEOUT, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        description_tag = soup.find('p', {'data-testid': 'vuln-description'})
        return description_tag.text.strip() if description_tag else "No description found"
    except Exception as e:
        return f"Failed to fetch CVE description: {str(e)}"

def analyze_with_cve(ai_response, cve_description):
    config = load_ai_config()
    analysis_prompt = config['prompts']['cve_analysis'].format(ai_response=ai_response, cve_description=cve_description)
    
    client = AIServiceClient(config)
    return client.generate_response(analysis_prompt)

def extract_context(diff_lines, match_indices, context=15):
    intervals = []
    for idx in match_indices:
        start = max(0, idx - context)
        end = min(len(diff_lines) - 1, idx + context)
        intervals.append((start, end))
    intervals.sort()
    
    merged = []
    for interval in intervals:
        if not merged or interval[0] > merged[-1][1] + 1:
            merged.append(list(interval))
        else:
            merged[-1][1] = max(merged[-1][1], interval[1])
    
    result_lines = []
    for i, (start, end) in enumerate(merged):
        if i > 0:
            result_lines.append("...")
        result_lines.extend(diff_lines[start:end+1])
    return result_lines

def get_files(folder):
    file_paths = set()
    for root, _, files in os.walk(folder):
        for file in files:
            file_paths.add(os.path.relpath(os.path.join(root, file), folder))
    return file_paths

def read_file(file_path):
    try:
        if not file_path or not os.path.exists(file_path):
            return []
        
        real_path = os.path.realpath(file_path)
        if not (real_path.startswith(os.path.realpath(PRODUCTS_DIR)) or 
                real_path.startswith(os.path.realpath(SAVED_ANALYSES_DIR))):
            return []
        
        if os.path.getsize(file_path) > MAX_FILE_SIZE:
            return []
        
        with open(file_path, "r", encoding="utf-8") as file:
            return file.readlines()
    except Exception as e:
        app.logger.warning(f"Error reading {file_path}: {e}")
        return []



def compare_single_file(file_info):
    file, old_folder, new_folder, ext_filter, manual_keywords, file_type = file_info
    
    if not validate_filename(file):
        return None
    
    old_path = os.path.join(old_folder, file)
    new_path = os.path.join(new_folder, file)
    
    try:
        if file_type == 'deleted':
            if not os.path.exists(old_path):
                return None
            
            old_code = read_file(old_path)
            if not old_code:
                return None
            
            diff = [f"--- {old_path}\n", f"+++ /dev/null\n", "@@ -1,{len(old_code)} +0,0 @@\n"]
            diff.extend([f"-{line}" for line in old_code])
            
            save_special = True
            if manual_keywords:
                keywords = [validate_input(k.strip(), 50) for k in manual_keywords if k.strip()]
                keywords = [k for k in keywords if k]
                save_special = any(any(k in line for k in keywords) for line in old_code)
            
            return {'file': file, 'diff': diff, 'save_special': save_special, 'type': 'deleted'}
            
        elif file_type == 'added':
            if not os.path.exists(new_path):
                return None
            
            new_code = read_file(new_path)
            if not new_code:
                return None
            
            diff = [f"--- /dev/null\n", f"+++ {new_path}\n", f"@@ -0,0 +1,{len(new_code)} @@\n"]
            diff.extend([f"+{line}" for line in new_code])
            
            save_special = True
            if manual_keywords:
                keywords = [validate_input(k.strip(), 50) for k in manual_keywords if k.strip()]
                keywords = [k for k in keywords if k]
                save_special = any(any(k in line for k in keywords) for line in new_code)
            
            return {'file': file, 'diff': diff, 'save_special': save_special, 'type': 'added'}
            
        else:
            if not (os.path.exists(old_path) and os.path.exists(new_path)):
                return None
            
            old_code = read_file(old_path)
            new_code = read_file(new_path)
            
            if not old_code or not new_code:
                return None
            
            diff = list(difflib.unified_diff(old_code, new_code, fromfile=old_path, tofile=new_path, lineterm="\n"))
            
            if not diff:
                return None
                
            save_special = True
            if manual_keywords:
                keywords = [validate_input(k.strip(), 50) for k in manual_keywords if k.strip()]
                keywords = [k for k in keywords if k]
                save_special = any(any(k in line for k in keywords) for line in diff)
            
            return {'file': file, 'diff': diff, 'save_special': save_special, 'type': 'modified'}
            
    except Exception as e:
        app.logger.warning(f"Error comparing file {file}: {str(e)}")
        return None

def compare_folders(old_folder, new_folder, ext_filter=None, manual_keywords=None):
    if not old_folder or not new_folder:
        return
    
    if not (os.path.exists(old_folder) and os.path.exists(new_folder)):
        return
    
    old_real = os.path.realpath(old_folder)
    new_real = os.path.realpath(new_folder)
    products_real = os.path.realpath(PRODUCTS_DIR)
    
    if not (old_real.startswith(products_real) and new_real.startswith(products_real)):
        return
    
    temp_dir = tempfile.mkdtemp()
    diff_file = os.path.join(temp_dir, "diff.txt")
    special_file = os.path.join(temp_dir, "special.txt")
    
    try:
        open(diff_file, "w").close()
        open(special_file, "w").close()
        
        old_files = get_files(old_folder)
        new_files = get_files(new_folder)
        common_files = old_files & new_files
        deleted_files = old_files - new_files
        added_files = new_files - old_files
        
        app.logger.info(f"compare_folders: old_files={len(old_files)}, new_files={len(new_files)}, common_files={len(common_files)}, deleted_files={len(deleted_files)}, added_files={len(added_files)}")
        
        all_files_to_process = []
        
        if ext_filter:
            extensions = [ext.strip() for ext in ext_filter.split(',') if ext.strip()]
            
            normalized_extensions = []
            for ext in extensions:
                ext = ext.strip()
                if ext:
                    if not ext.startswith('.'):
                        ext = '.' + ext
                    normalized_extensions.append(ext.lower())
            
            if normalized_extensions:
                filtered_common = set()
                filtered_deleted = set()
                filtered_added = set()
                
                for file in common_files:
                    file_extension = os.path.splitext(file)[1].lower()
                    if file_extension in normalized_extensions:
                        filtered_common.add(file)
                
                for file in deleted_files:
                    file_extension = os.path.splitext(file)[1].lower()
                    if file_extension in normalized_extensions:
                        filtered_deleted.add(file)
                
                for file in added_files:
                    file_extension = os.path.splitext(file)[1].lower()
                    if file_extension in normalized_extensions:
                        filtered_added.add(file)
                
                common_files = filtered_common
                deleted_files = filtered_deleted
                added_files = filtered_added
                
                app.logger.info(f"Extension filter applied: common={len(common_files)}, deleted={len(deleted_files)}, added={len(added_files)} files match extensions {ext_filter}")

        for file in common_files:
            all_files_to_process.append((file, old_folder, new_folder, ext_filter, manual_keywords, 'modified'))
        
        for file in deleted_files:
            all_files_to_process.append((file, old_folder, new_folder, ext_filter, manual_keywords, 'deleted'))
        
        for file in added_files:
            all_files_to_process.append((file, old_folder, new_folder, ext_filter, manual_keywords, 'added'))
        
        max_workers = min(32, len(all_files_to_process))
        
        app.logger.info(f"compare_folders: processing {len(all_files_to_process)} files with {max_workers} workers")
        
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            future_to_file = {executor.submit(compare_single_file, task): task[0] for task in all_files_to_process}
            
            files_with_diffs = 0
            files_with_keywords = 0
            
            for future in concurrent.futures.as_completed(future_to_file):
                result = future.result()
                if result:
                    files_with_diffs += 1
                    with open(diff_file, "a", encoding="utf-8") as f:
                        f.write(f"{result['file']} ({result.get('type', 'modified')})\n")
                        f.write("=" * 8 + "\n")
                        f.writelines(result['diff'])
                        f.write("=" * 9 + "\n\n")
                    
                    if result['save_special']:
                        files_with_keywords += 1
                        with open(special_file, "a", encoding="utf-8") as f:
                            f.write(f"{result['file']} ({result.get('type', 'modified')})\n")
                            f.write("=" * 8 + "\n")
                            f.writelines(result['diff'])
                            f.write("=" * 9 + "\n\n")
            
            app.logger.info(f"compare_folders: {files_with_diffs} files had differences, {files_with_keywords} files matched keywords")
        
        if os.path.exists(diff_file):
            shutil.copy2(diff_file, "diff.txt")
        if os.path.exists(special_file):
            shutil.copy2(special_file, "special.txt")
    
    finally:
        shutil.rmtree(temp_dir, ignore_errors=True)

def parse_diff_file(diff_path):
    diffs = []
    if not diff_path or not os.path.exists(diff_path):
        return diffs
    
    try:
        if os.path.getsize(diff_path) > MAX_DIFF_FILE_SIZE:
            app.logger.warning(f"Diff file too large: {diff_path}")
            return diffs
    except OSError:
        return diffs
    
    try:
        with open(diff_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
    except (IOError, UnicodeDecodeError) as e:
        app.logger.error(f"Error reading diff file {diff_path}: {str(e)}")
        return diffs
    
    i = 0
    while i < len(lines):
        if lines[i].strip() == "":
            i += 1
            continue
        
        filename_line = lines[i].rstrip("\n")
        
        filename = filename_line
        file_type = 'modified'
        
        if ' (' in filename_line and filename_line.endswith(')'):
            filename = filename_line.rsplit(' (', 1)[0]
            file_type = filename_line.rsplit(' (', 1)[1].rstrip(')')
        
        if not validate_filename(filename):
            i += 1
            continue
        
        i += 1
        if i < len(lines) and lines[i].strip() == "=" * 8:
            i += 1
        
        diff_lines = []
        while i < len(lines) and lines[i].strip() != "=" * 9 and len(diff_lines) < 10000:
            diff_lines.append(lines[i].rstrip("\n"))
            i += 1
        
        if i < len(lines) and lines[i].strip() == "=" * 9:
            i += 1
        
        while i < len(lines) and lines[i].strip() == "":
            i += 1
        
        diffs.append({'filename': filename, 'diff': diff_lines, 'type': file_type})
    
    return diffs

def analyze_diffs_with_keywords(diffs, manual_keywords):
    results = {}
    
    if manual_keywords:
        keywords = [validate_input(keyword, 50) for keyword in manual_keywords]
        keywords = [k for k in keywords if k]
        manual_keywords = keywords
    
    for diff in diffs:
        if not isinstance(diff, dict) or 'filename' not in diff or 'diff' not in diff:
            continue
        
        filename = diff['filename']
        diff_lines = diff['diff']
        
        if not validate_filename(filename):
            continue
        
        if manual_keywords and len(manual_keywords) > 0:
            match_indices = [i for i, line in enumerate(diff_lines) if any(keyword in line for keyword in manual_keywords)]
        else:
            match_indices = list(range(len(diff_lines)))
        
        if match_indices:
            context_lines = extract_context(diff_lines, sorted(set(match_indices)))
            context_lines = [line.rstrip('\n') for line in context_lines if line.strip()]
            
            if len(context_lines) > 1000:
                context_lines = context_lines[:1000]
            
            if filename not in results:
                results[filename] = {'context': context_lines}
    
    return results

def get_github_versions(repo_url):
    try:
        if not validate_url(repo_url):
            return []
        
        headers = {
            'User-Agent': USER_AGENT,
            'X-Requested-With': 'XMLHttpRequest'
        }
        
        url = f"{repo_url}/refs?tag_name=&experimental=1"
        response = requests.get(url, headers=headers, timeout=CVE_FETCH_TIMEOUT)
        response.raise_for_status()
        
        data = response.json()
        
        if not isinstance(data, dict):
            return []
        
        versions = []
        if 'refs' in data and isinstance(data['refs'], list):
            for version in data['refs']:
                if isinstance(version, str):
                    version = version.strip()
                    if validate_version(version):
                        versions.append(version)
        
        versions = list(dict.fromkeys(versions))
        
        def sort_key(version):
            clean_version = version.lstrip('v')
            semantic_match = re.match(r'^(\d+)\.(\d+)\.(\d+)(?:-(.+))?', clean_version)
            if semantic_match:
                major, minor, patch, suffix = semantic_match.groups()
                major, minor, patch = int(major), int(minor), int(patch)
                suffix_priority = 0 if suffix is None else 1
                return (0, -major, -minor, -patch, suffix_priority, suffix or "")
            else:
                return (1, version)
        
        versions.sort(key=sort_key)
        if versions and all(sort_key(v)[0] == 1 for v in versions):
            versions.reverse()
        return versions

    except Exception as e:
        app.logger.error(f"Version fetch error: {str(e)}")
        return []

def count_vulnerabilities(results):
    total_count = 0
    for filename, result in results.items():
        if 'vulnerability_status' in result and result['vulnerability_status'].startswith('AI:'):
            vuln_text = result['vulnerability_status'].split('AI: ')[1]
            if not (vuln_text.startswith('Not sure') or vuln_text.startswith('No vulnerabilities')):
                try:
                    vuln_number = int(vuln_text.split(' ')[0])
                    total_count += vuln_number
                except (ValueError, IndexError):
                    pass
    return total_count

def process_ai_analysis(analyzed_results, diffs, cve_ids):
    ai_tasks = [(diff['filename'], '\n'.join(diff['diff'])) for diff in diffs if diff['filename'] in analyzed_results]

    with ThreadPoolExecutor(max_workers=100) as executor:
        futures = {executor.submit(get_ai_analysis, task[0], task[1]): task[0] for task in ai_tasks}
        
        for future in concurrent.futures.as_completed(futures):
            filename = futures[future]
            try:
                ai_response = future.result()
                analyzed_results[filename]['ai_response'] = ai_response
                
                if cve_ids:
                    cve_results = {}
                    for cve_id in cve_ids.split(','):
                        cve_id = cve_id.strip()
                        if cve_id:
                            cve_description = get_cve_description(cve_id)
                            cve_analysis = analyze_with_cve(ai_response, cve_description)
                            match = re.search(r'Description Matches:\s*(Yes|No)', cve_analysis, re.I)
                            cve_results[cve_id] = {
                                'result': match.group(1).capitalize() if match else 'Unknown',
                                'description': cve_description
                            }
                    analyzed_results[filename]['cve_matches'] = cve_results

                vuln_matches = re.findall(r'(?i)(Vulnerability\s+Existed|Vuln\s+Existence).*?:\s*.*?\b(not[\s\-_]?sure|yes|no)\b', ai_response)
                yes_count = sum(1 for match in vuln_matches if match[1].lower() == 'yes')
                not_sure = any('not sure' in match[1].lower() for match in vuln_matches)
                analyzed_results[filename]['vulnerability_status'] = f"AI: {yes_count} vulnerabilities" if yes_count > 0 else "AI: Not sure" if not_sure else "AI: No vulnerabilities"
                analyzed_results[filename]['vuln_severity'] = 'yes' if yes_count > 0 else 'not sure' if not_sure else 'no'
            except Exception as e:
                analyzed_results[filename]['ai_response'] = f"AI analysis failed: {str(e)}"
                analyzed_results[filename]['vulnerability_status'] = "Analysis error"
                analyzed_results[filename]['vuln_severity'] = 'no'
    
    return analyzed_results



@app.route('/save-analysis', methods=['POST'])
@limiter.limit("10 per minute")
@requires_basic_auth
def save_analysis():
    try:
        data = request.json
        if not isinstance(data, dict):
            return jsonify({'error': 'Invalid JSON data'}), 400
    except Exception:
        return jsonify({'error': 'Invalid JSON data'}), 400
    
    analysis_id = str(uuid.uuid4())
    
    ai_service = None
    ai_model = None
    if data.get('enable_ai', False):
        config = load_ai_config()
        ai_service = config.get('service', 'ollama')
        if ai_service in config:
            ai_model = config[ai_service].get('model', 'Unknown')
    
    analysis_data = {
        'meta': {
            'created_at': datetime.now().isoformat(),
            'source': validate_input(data.get('source', 'direct'), 20),
            'ai_enabled': bool(data.get('enable_ai', False)),
            'ai_service': ai_service,
            'ai_model': ai_model,
            'params': data.get('params', {})
        },
        'results': data.get('results', {})
    }
    
    try:
        analysis_path = os.path.join(SAVED_ANALYSES_DIR, f"{analysis_id}.json")
        with open(analysis_path, 'w') as f:
            json.dump(analysis_data, f, indent=2)
        os.chmod(analysis_path, 0o644)
        
        return jsonify({'id': analysis_id})
    except Exception as e:
        app.logger.error(f"Error saving analysis: {str(e)}")
        return jsonify({'error': 'Failed to save analysis'}), 500

@app.route('/analysis/<analysis_id>')
@limiter.limit("30 per minute")
def view_analysis(analysis_id):
    if not validate_uuid(analysis_id):
        abort(404)
    
    try:
        analysis_path = os.path.join(SAVED_ANALYSES_DIR, f"{analysis_id}.json")
        
        if not os.path.exists(analysis_path):
            abort(404)
        
        real_path = os.path.realpath(analysis_path)
        if not real_path.startswith(os.path.realpath(SAVED_ANALYSES_DIR)):
            abort(404)
        
        with open(analysis_path) as f:
            analysis = json.load(f)
        
        if not isinstance(analysis, dict) or 'meta' not in analysis:
            abort(404)
        
        status = analysis['meta'].get('status', 'completed')
        
        page = request.args.get('page', 1, type=int)
        per_page = request.args.get('per_page', 20, type=int)
        filter_type = request.args.get('filter', 'all')
        search_term = request.args.get('search', '').strip().lower()
        
        page = max(1, page)
        per_page = max(5, min(100, per_page))
        
        try:
            page = int(page)
            per_page = int(per_page)
        except (ValueError, TypeError):
            page = 1
            per_page = 20
        
        results = analysis.get('results', {})
        filtered_results = {}
        
        if filter_type == 'all':
            filtered_results = results
        elif filter_type == 'cve':
            for filename, result in results.items():
                if result.get('cve_matches'):
                    for cve_data in result['cve_matches'].values():
                        if cve_data.get('result') == 'Yes':
                            filtered_results[filename] = result
                            break
        elif filter_type == 'vuln':
            for filename, result in results.items():
                if result.get('vuln_severity') == 'yes':
                    filtered_results[filename] = result
        
        if search_term:
            search_filtered_results = {}
            for filename, result in filtered_results.items():
                ai_response = result.get('ai_response', '').lower()
                filename_lower = filename.lower()
                vuln_status = result.get('vulnerability_status', '').lower()
                
                if (search_term in ai_response or 
                    search_term in filename_lower or 
                    search_term in vuln_status):
                    search_filtered_results[filename] = result
            
            filtered_results = search_filtered_results
        
        total_items = len(filtered_results)
        total_pages = max(1, (total_items + per_page - 1) // per_page) if total_items > 0 else 1
        
        page = min(page, total_pages) if total_pages > 0 else 1
        
        start_idx = (page - 1) * per_page
        end_idx = start_idx + per_page
        
        if filtered_results:
            results_list = list(filtered_results.items())
            paginated_results = dict(results_list[start_idx:end_idx])
        else:
            paginated_results = {}
        
        pagination = {
            'page': page,
            'per_page': per_page,
            'filter': filter_type,
            'search': search_term,
            'total_items': total_items,
            'total_pages': total_pages,
            'has_prev': page > 1,
            'has_next': page < total_pages,
            'prev_page': page - 1 if page > 1 else None,
            'next_page': page + 1 if page < total_pages else None,
            'start_item': start_idx + 1 if total_items > 0 else 0,
            'end_item': min(end_idx, total_items)
        }
        
        if status == 'completed' and results:
            return render_template("analysis.html", 
                                 analysis=analysis, 
                                 paginated_results=paginated_results,
                                 pagination=pagination,
                                 total_original_files=len(results),
                                 is_shared=True, 
                                 analysis_id=analysis_id, 
                                 status=status)
        else:
            return render_template("analysis.html", 
                                 analysis=analysis, 
                                 paginated_results={},
                                 pagination=None,
                                 total_original_files=0,
                                 is_shared=True, 
                                 analysis_id=analysis_id, 
                                 status=status)
    except Exception as e:
        app.logger.error(f"Error loading analysis {analysis_id}: {str(e)}")
        return render_template("error.html", message="Analysis not found"), 404

@app.route('/delete-analysis/<analysis_id>', methods=['POST'])
@limiter.limit("10 per minute")
@requires_basic_auth
def delete_analysis(analysis_id):
    if not validate_uuid(analysis_id):
        flash('Invalid analysis ID.', 'danger')
        return redirect(url_for('reports'))
    
    try:
        analysis_path = os.path.join(SAVED_ANALYSES_DIR, f"{analysis_id}.json")
        
        real_path = os.path.realpath(analysis_path)
        if not real_path.startswith(os.path.realpath(SAVED_ANALYSES_DIR)):
            flash('Invalid analysis path.', 'danger')
            return redirect(url_for('reports'))
        
        if os.path.exists(analysis_path):
            os.remove(analysis_path)
            flash('Analysis deleted successfully.', 'success')
            app.logger.info(f"Analysis {analysis_id} deleted")
        else:
            flash('Analysis not found.', 'danger')
    except Exception as e:
        app.logger.error(f"Error deleting analysis {analysis_id}: {str(e)}")
        flash('Error deleting analysis.', 'danger')
    return redirect(url_for('reports'))

@app.route('/delete-benchmark/<benchmark_id>', methods=['POST'])
@requires_basic_auth
def delete_benchmark(benchmark_id):
    if not validate_uuid(benchmark_id):
        return jsonify({'error': 'Invalid benchmark ID'}), 400
    
    try:
        benchmark_path = os.path.join(SAVED_ANALYSES_DIR, f"benchmark_{benchmark_id}.json")
        
        real_path = os.path.realpath(benchmark_path)
        if not real_path.startswith(os.path.realpath(SAVED_ANALYSES_DIR)):
            return jsonify({'error': 'Invalid benchmark path'}), 400
        
        if os.path.exists(benchmark_path):
            os.remove(benchmark_path)
            app.logger.info(f"Benchmark {benchmark_id} deleted")
            return jsonify({'success': True, 'message': 'Benchmark deleted successfully'})
        else:
            return jsonify({'error': 'Benchmark not found'}), 404
    except Exception as e:
        app.logger.error(f"Error deleting benchmark {benchmark_id}: {str(e)}")
        return jsonify({'error': 'Error deleting benchmark'}), 500

@app.route('/benchmark-status/<benchmark_id>')
@requires_basic_auth
def benchmark_status(benchmark_id):
    if not validate_uuid(benchmark_id):
        return jsonify({'error': 'Invalid benchmark ID'}), 400
    
    try:
        benchmark_path = os.path.join(SAVED_ANALYSES_DIR, f"benchmark_{benchmark_id}.json")
        
        if not os.path.exists(benchmark_path):
            return jsonify({'status': 'not_found', 'error': 'Benchmark not found'}), 404
        
        with open(benchmark_path, 'r') as f:
            benchmark_data = json.load(f)
        
        return jsonify({
            'status': benchmark_data.get('status', 'unknown'),
            'progress': benchmark_data.get('progress', 0),
            'current_test': benchmark_data.get('current_test', ''),
            'error': benchmark_data.get('error', '')
        })
    except Exception as e:
        app.logger.error(f"Error getting benchmark status {benchmark_id}: {str(e)}")
        return jsonify({'error': 'Error getting benchmark status'}), 500

@app.route('/ai-settings', methods=['GET','POST'])
@limiter.limit("5 per minute")
@requires_basic_auth
def ai_settings():
    if request.method == 'POST':
        ai_service = validate_input(request.form.get('ai_service'), 50)
        ai_service = ai_service if ai_service in VALID_AI_SERVICES else 'ollama'
        
        try:
            temperature = float(request.form.get('temperature', 1.0))
            temperature = max(0, min(2.0, temperature))
        except ValueError:
            temperature = 1.0
        
        try:
            num_ctx = int(request.form.get('num_ctx', 8192))
            num_ctx = max(1024, min(32768, num_ctx))
        except ValueError:
            num_ctx = 8192
        
        config = {
            'service': ai_service,
            'ollama': {
                'url': validate_input(request.form.get('ollama_url'), 200),
                'model': validate_input(request.form.get('ollama_model'), 100)
            },
            'openai': {
                'key': validate_input(request.form.get('openai_key'), 200),
                'model': validate_input(request.form.get('openai_model'), 100),
                'base_url': validate_input(request.form.get('openai_url'), 200)
            },
            'deepseek': {
                'key': validate_input(request.form.get('deepseek_key'), 200),
                'model': validate_input(request.form.get('deepseek_model'), 100),
                'base_url': validate_input(request.form.get('deepseek_url'), 200)
            },
            'claude': {
                'key': validate_input(request.form.get('claude_key'), 200),
                'model': validate_input(request.form.get('claude_model'), 100),
                'base_url': validate_input(request.form.get('claude_url'), 200)
            },
            'parameters': {
                'temperature': temperature,
                'num_ctx': num_ctx
            },
            'prompts': {
                'main_analysis': validate_prompt(request.form.get('main_analysis_prompt'), 5000),
                'cve_analysis': validate_prompt(request.form.get('cve_analysis_prompt'), 5000)
            }
        }
        
        if save_json_safe(AI_CONFIG_FILE, config):
            flash('AI settings updated successfully', 'success')
        else:
            flash('Error saving AI settings', 'danger')
        
        return redirect(url_for('ai_settings'))
    
    return render_template("ai_settings.html", config=load_ai_config())

@app.route('/reset-prompts', methods=['POST'])
@limiter.limit("5 per minute")
@requires_basic_auth
def reset_prompts():
    try:
        config = load_ai_config()
        
        default_config = {
            'service': DEFAULT_AI_SERVICE,
            'ollama': {
                'url': AI_SERVICE_CONFIGS['ollama']['url'],
                'model': AI_SERVICE_CONFIGS['ollama']['model']
            },
            'openai': {
                'key': '',
                'model': AI_SERVICE_CONFIGS['openai']['model'],
                'base_url': AI_SERVICE_CONFIGS['openai']['base_url']
            },
            'deepseek': {
                'key': '',
                'model': AI_SERVICE_CONFIGS['deepseek']['model'],
                'base_url': AI_SERVICE_CONFIGS['deepseek']['base_url']
            },
            'claude': {
                'key': '',
                'model': AI_SERVICE_CONFIGS['claude']['model'],
                'base_url': AI_SERVICE_CONFIGS['claude']['base_url']
            },
            'parameters': {
                'temperature': DEFAULT_TEMPERATURE,
                'num_ctx': DEFAULT_CONTEXT_SIZE
            },
            'prompts': DEFAULT_PROMPTS.copy()
        }
        
        config['prompts'] = default_config['prompts']
        
        if save_json_safe(AI_CONFIG_FILE, config):
            flash('Prompts reset to default values successfully', 'success')
        else:
            flash('Error resetting prompts', 'danger')
        
        return redirect(url_for('ai_settings'))
    except Exception as e:
        app.logger.error(f"Error resetting prompts: {str(e)}")
        flash('Error resetting prompts', 'danger')
        return redirect(url_for('ai_settings'))

def run_ai_benchmark(benchmark_id, benchmark_data):
    try:
        results_path = os.path.join(SAVED_ANALYSES_DIR, f"benchmark_{benchmark_id}.json")
        
        results = {
            'benchmark_id': benchmark_id,
            'status': 'running',
            'created_at': datetime.now().isoformat(),
            'config': benchmark_data,
            'results': {},
            'metrics': {}
        }
        
        with open(results_path, 'w') as f:
            json.dump(results, f, indent=2)
        
        questions = benchmark_data.get('questions', [])
        ai_configs = benchmark_data.get('ai_configs', {})
        judge_ai = benchmark_data.get('judge_ai', 'current')
        
        if not ai_configs:
            raise ValueError("No AI configurations provided")
        
        if not questions:
            raise ValueError("No benchmark questions provided")
        
        total_tests = len(ai_configs) * len(questions)
        current_test = 0
        
        for ai_name, ai_config in ai_configs.items():
            results['results'][ai_name] = []
            
            if not ai_config.get('service'):
                app.logger.warning(f"AI config {ai_name} missing service, skipping")
                continue
            
            for i, question_data in enumerate(questions):
                current_test += 1
                
                progress = (current_test / total_tests) * 100
                results['progress'] = round(progress, 1)
                results['current_test'] = f"Testing {ai_name} - Question {i+1}/{len(questions)}"
                
                with open(results_path, 'w') as f:
                    json.dump(results, f, indent=2)
                
                try:
                    start_time = time.time()
                    
                    question = question_data.get('question', '')
                    for var_name, var_value in question_data.get('variables', {}).items():
                        question = question.replace(f"{{{var_name}}}", str(var_value))
                    
                    ai_response = get_ai_response(question, ai_config)
                    response_time = time.time() - start_time
                    
                    expected_answer = question_data.get('expected_answer', '')
                    
                    accuracy_score = evaluate_answer_accuracy(ai_response, expected_answer, judge_ai)
                    
                    response_length = len(ai_response.split()) if ai_response else 0
                    
                    question_result = {
                        'question_id': i,
                        'question': question,
                        'expected_answer': expected_answer,
                        'ai_response': ai_response,
                        'response_time': response_time,
                        'response_length': response_length,
                        'accuracy_score': accuracy_score,
                        'accuracy_binary': accuracy_score > 0.7,
                        'judge_ai': judge_ai
                    }
                    
                    results['results'][ai_name].append(question_result)
                    
                except Exception as e:
                    app.logger.error(f"Error processing question {i} for AI {ai_name}: {str(e)}")
                    error_result = {
                        'question_id': i,
                        'question': question_data.get('question', ''),
                        'expected_answer': question_data.get('expected_answer', ''),
                        'ai_response': f"Error: {str(e)}",
                        'response_time': 0.0,
                        'response_length': 0,
                        'accuracy_score': 0.0,
                        'accuracy_binary': False,
                        'judge_ai': judge_ai,
                        'error': str(e)
                    }
                    results['results'][ai_name].append(error_result)
        
        results['metrics'] = calculate_benchmark_metrics(results['results'])
        results['status'] = 'completed'
        results['progress'] = 100.0
        results['current_test'] = 'Benchmark completed successfully'
        
        with open(results_path, 'w') as f:
            json.dump(results, f, indent=2)
        
        app.logger.info(f"Benchmark {benchmark_id} completed successfully")
        
    except Exception as e:
        app.logger.error(f"Error running benchmark {benchmark_id}: {str(e)}")
        try:
            results['status'] = 'error'
            results['error'] = str(e)
            results['progress'] = 0.0
            results['current_test'] = f'Error: {str(e)}'
            with open(results_path, 'w') as f:
                json.dump(results, f, indent=2)
        except:
            pass

def get_ai_response(question, ai_config):
    try:
        service = ai_config.get('service', 'openai')
        
        if ai_config.get('demo_mode', False):
            time.sleep(0.5)
            return f"Mock response for: {question[:50]}..."
        
        timeout = DEFAULT_TIMEOUT
        temperature = ai_config.get('temperature', DEFAULT_TEMPERATURE)
        max_tokens = ai_config.get('max_tokens', DEFAULT_MAX_TOKENS)
        
        if service == 'ollama':
            request_data = {
                'url': f"{ai_config['url']}/api/generate",
                'json': {
                    'model': ai_config['model'],
                    'prompt': question,
                    'stream': False,
                    'options': {
                        'temperature': temperature,
                        'num_ctx': max_tokens
                    }
                }
            }
            if timeout is not None:
                request_data['timeout'] = timeout
            response = requests.post(**request_data)
            return response.json().get('response', 'No response') if response.ok else f"Error: {response.text}"
            
        elif service == 'openai':
            headers = {'Authorization': f"Bearer {ai_config['key']}"}
            if 'base_url' not in ai_config:
                ai_config['base_url'] = AI_SERVICE_CONFIGS['openai']['base_url']
                
            request_data = {
                'url': f"{ai_config['base_url']}/chat/completions",
                'headers': headers,
                'json': {
                    'model': ai_config['model'],
                    'messages': [{'role': 'user', 'content': question}],
                    'temperature': temperature,
                    'max_tokens': max_tokens
                }
            }
            if timeout is not None:
                request_data['timeout'] = timeout
            response = requests.post(**request_data)
            return response.json()['choices'][0]['message']['content'] if response.ok else f"Error: {response.text}"
            
        elif service == 'deepseek':
            headers = {'Authorization': f"Bearer {ai_config['key']}"}
            if 'base_url' not in ai_config:
                ai_config['base_url'] = AI_SERVICE_CONFIGS['deepseek']['base_url']
                
            request_data = {
                'url': f"{ai_config['base_url']}/chat/completions",
                'headers': headers,
                'json': {
                    'model': ai_config['model'],
                    'messages': [{'role': 'user', 'content': question}],
                    'temperature': temperature,
                    'max_tokens': max_tokens
                }
            }
            if timeout is not None:
                request_data['timeout'] = timeout
            response = requests.post(**request_data)
            return response.json()['choices'][0]['message']['content'] if response.ok else f"Error: {response.text}"
            
        elif service == 'claude':
            headers = {'x-api-key': ai_config['key'], 'anthropic-version': AI_SERVICE_CONFIGS['claude']['version']}
            if 'base_url' not in ai_config:
                ai_config['base_url'] = AI_SERVICE_CONFIGS['claude']['base_url']
                
            request_data = {
                'url': f"{ai_config['base_url']}/messages",
                'headers': headers,
                'json': {
                    'model': ai_config['model'],
                    'max_tokens': max_tokens,
                    'temperature': temperature,
                    'messages': [{'role': 'user', 'content': question}]
                }
            }
            if timeout is not None:
                request_data['timeout'] = timeout
            response = requests.post(**request_data)
            return response.json()['content'][0]['text'] if response.ok else f"Error: {response.text}"
        
        return "Unsupported AI service"
        
    except Exception as e:
        return f"Error getting AI response: {str(e)}"

def evaluate_answer_accuracy(ai_response, expected_answer, judge_ai):
    try:
        if not ai_response or ai_response.startswith("Error"):
            return 0.0
        
        if not expected_answer:
            return 0.5
        
        if judge_ai and judge_ai != 'current':
            return evaluate_with_ai_judge(ai_response, expected_answer, judge_ai)
        
        return evaluate_with_heuristics(ai_response, expected_answer)
        
    except Exception as e:
        app.logger.error(f"Error evaluating accuracy: {str(e)}")
        return 0.5

def evaluate_with_ai_judge(ai_response, expected_answer, judge_ai):
    try:
        judge_prompt = f"""You are an expert evaluator. Rate how accurately the actual response matches the expected answer.

Expected Answer: {expected_answer}
Actual Response: {ai_response}

Consider:
- Semantic similarity (same meaning expressed differently)
- Factual correctness 
- Completeness of the answer
- Relevance to the question

Rate the accuracy from 0.0 to 1.0 (where 1.0 is perfect match):
- 1.0 = Perfect match or equivalent meaning
- 0.8-0.9 = Mostly correct with minor differences
- 0.6-0.7 = Partially correct but missing key elements
- 0.4-0.5 = Some relevance but significant errors
- 0.2-0.3 = Minimal relevance or mostly incorrect
- 0.0-0.1 = Completely wrong or irrelevant

Respond with only the numeric score (e.g., 0.85):"""

        config = load_ai_config()
        judge_config = get_judge_ai_config(judge_ai, config)
        
        if not judge_config:
            app.logger.warning(f"Invalid judge AI: {judge_ai}, falling back to heuristics")
            return evaluate_with_heuristics(ai_response, expected_answer)
        
        judgment = get_ai_response(judge_prompt, judge_config)
        
        score = extract_numeric_score(judgment)
        return min(max(score, 0.0), 1.0)
        
    except Exception as e:
        app.logger.error(f"Error with AI judge evaluation: {str(e)}")
        return evaluate_with_heuristics(ai_response, expected_answer)

def evaluate_with_heuristics(ai_response, expected_answer):
    try:
        expected_words = set(expected_answer.lower().split())
        response_words = set(ai_response.lower().split())
        
        if not expected_words:
            return 0.5
        
        overlap = len(expected_words & response_words)
        total_expected = len(expected_words)
        
        base_score = overlap / total_expected if total_expected > 0 else 0.5
        
        length_ratio = min(len(ai_response), len(expected_answer)) / max(len(ai_response), len(expected_answer)) if max(len(ai_response), len(expected_answer)) > 0 else 0.5
        
        final_score = (base_score * 0.7) + (length_ratio * 0.3)
        
        return min(max(final_score, 0.0), 1.0)
        
    except Exception as e:
        app.logger.error(f"Error in heuristic evaluation: {str(e)}")
        return 0.5

def get_judge_ai_config(judge_ai, config):
    if judge_ai == 'current':
        service = config.get('service', DEFAULT_AI_SERVICE)
        if service in config:
            judge_config = config[service].copy()
            judge_config['service'] = service
            return judge_config
    elif judge_ai in ['ollama', 'openai', 'deepseek', 'claude']:
        if judge_ai in config:
            judge_config = config[judge_ai].copy()
            judge_config['service'] = judge_ai
            return judge_config
    
    return None

def extract_numeric_score(judgment):
    try:
        matches = re.findall(r'\b([0-1]?\.\d+|\b[01]\b)', judgment)
        if matches:
            score = float(matches[0])
            if 0.0 <= score <= 1.0:
                return score
        
        percent_matches = re.findall(r'(\d+)%', judgment)
        if percent_matches:
            score = float(percent_matches[0]) / 100.0
            if 0.0 <= score <= 1.0:
                return score
        
        number_matches = re.findall(r'\b(\d+(?:\.\d+)?)\b', judgment)
        if number_matches:
            score = float(number_matches[0])
            if score <= 1.0:
                return score
            elif score <= 10.0:
                return score / 10.0
            elif score <= 100.0:
                return score / 100.0
        
        return 0.5
        
    except Exception as e:
        app.logger.error(f"Error extracting score from judgment: {str(e)}")
        return 0.5

def calculate_benchmark_metrics(results):
    metrics = {}
    
    for ai_name, ai_results in results.items():
        if not ai_results:
            continue
        
        avg_response_time = sum(r['response_time'] for r in ai_results) / len(ai_results)
        avg_response_length = sum(r['response_length'] for r in ai_results) / len(ai_results)
        avg_accuracy_score = sum(r['accuracy_score'] for r in ai_results) / len(ai_results)
        accuracy_rate = sum(1 for r in ai_results if r['accuracy_binary']) / len(ai_results)
        
        metrics[ai_name] = {
            'total_questions': len(ai_results),
            'avg_response_time': round(avg_response_time, 2),
            'avg_response_length': round(avg_response_length, 1),
            'avg_accuracy_score': round(avg_accuracy_score, 3),
            'accuracy_rate': round(accuracy_rate, 3),
            'correct_answers': sum(1 for r in ai_results if r['accuracy_binary']),
            'total_response_time': round(sum(r['response_time'] for r in ai_results), 2)
        }
    
    return metrics

@app.route('/reports')
@limiter.limit("20 per minute")
def reports():
    saved_analyses = []
    
    try:
        for filename in os.listdir(SAVED_ANALYSES_DIR):
            if filename.endswith('.json'):
                try:
                    analysis_id = filename.replace('.json', '')
                    if not validate_uuid(analysis_id):
                        continue
                    
                    analysis_path = os.path.join(SAVED_ANALYSES_DIR, filename)
                    
                    real_path = os.path.realpath(analysis_path)
                    if not real_path.startswith(os.path.realpath(SAVED_ANALYSES_DIR)):
                        continue
                    
                    with open(analysis_path, 'r') as f:
                        analysis = json.load(f)
                    
                    if not isinstance(analysis, dict) or 'meta' not in analysis:
                        continue
                    
                    analysis['id'] = analysis_id
                    analysis['vuln_count'] = count_vulnerabilities(analysis.get('results', {}))
                    analysis['status'] = analysis['meta'].get('status', 'running')
                    saved_analyses.append(analysis)
                except Exception as e:
                    app.logger.warning(f"Error loading analysis {filename}: {str(e)}")
        
        saved_analyses.sort(key=lambda x: x['meta'].get('created_at', ''), reverse=True)
    except Exception as e:
        app.logger.error(f"Error loading reports: {str(e)}")
    
    return render_template('reports.html', reports=saved_analyses)

@app.route('/manage-products', methods=['GET', 'POST'])
@limiter.limit("5 per day", methods=['POST'])
@limiter.limit("10 per minute", methods=['GET'])
def manage_products():
    if request.method == 'POST':
        product_name = validate_input(request.form.get('product_name'), 100, r'^[a-zA-Z0-9._-]+$')
        repo_url = validate_input(request.form.get('repo_url'), 200)
        
        if not product_name or not repo_url:
            return render_template("manage_products.html", error="Both fields are required")
        
        if not validate_url(repo_url):
            return render_template("manage_products.html", error="Invalid repository URL")
        
        products = load_json_safe(PRODUCTS_FILE, {})
        
        if product_name in products:
            return render_template("manage_products.html", error="Product already exists")
        
        for existing_product, product_data in products.items():
            if product_data.get('repo_url') == repo_url:
                return render_template("manage_products.html", error=f"Repository URL already exists in product '{existing_product}'")
        
        products[product_name] = {'repo_url': repo_url, 'versions': []}
        
        if save_json_safe(PRODUCTS_FILE, products):
            flash('Product added successfully', 'success')
        else:
            return render_template("manage_products.html", error="Failed to save product")
        
        return redirect(url_for('manage_products'))

    products = load_json_safe(PRODUCTS_FILE, {})
    return render_template("manage_products.html", products=products)

@app.route('/delete-product/<product_name>')
@limiter.limit("10 per minute")
@requires_basic_auth
def delete_product(product_name):
    product_name = validate_input(product_name, 100, r'^[a-zA-Z0-9._-]+$')
    if not product_name:
        flash('Invalid product name', 'danger')
        return redirect(url_for('manage_products'))
    
    products = load_json_safe(PRODUCTS_FILE, {})
    
    if product_name in products:
        del products[product_name]
        
        if save_json_safe(PRODUCTS_FILE, products):
            flash('Product deleted successfully', 'success')
            app.logger.info(f"Product {product_name} deleted")
        else:
            flash('Error deleting product', 'danger')
    else:
        flash('Product not found', 'danger')
    
    return redirect(url_for('manage_products'))

@app.route('/get_versions/<product>')
@limiter.limit("20 per minute")
def get_versions(product):
    product = validate_input(product, 100, r'^[a-zA-Z0-9._-]+$')
    if not product:
        return jsonify([])
    
    try:
        products = load_json_safe(PRODUCTS_FILE, {})
        
        if product in products:
            repo_url = products[product].get('repo_url')
            if validate_url(repo_url):
                return jsonify(get_github_versions(repo_url))
        
        return jsonify([])
    except Exception as e:
        app.logger.error(f"Error getting versions for product {product}: {str(e)}")
        return jsonify([])

def run_analysis_background(analysis_id, params, mode):
    try:
        analysis_path = os.path.join(SAVED_ANALYSES_DIR, f"{analysis_id}.json")
        analyzed_results = {}
        
        if mode == 'products':
            product = params['product']
            old_ver = params['old_version']
            new_ver = params['new_version']
            ext_filter = params['extension']
            enable_ai = params['enable_ai']
            special_keywords = params['special_keywords']
            cve_ids = params['cve_ids']
            
            product_versions_file = os.path.join(PRODUCTS_DIR, f"{product}.json")
            versions = load_json_safe(product_versions_file, [])
            
            products_data = load_json_safe(PRODUCTS_FILE, {})
            repo_url = products_data[product]['repo_url']
            
            for ver in [old_ver, new_ver]:
                if not any(v['version'] == ver for v in versions):
                    try:
                        download_dir = os.path.join(PRODUCTS_DIR, f"{product}_downloads")
                        os.makedirs(download_dir, exist_ok=True)
                        zip_url = f"{repo_url}/archive/refs/tags/{ver}.zip"
                        response = requests.get(zip_url, stream=True)
                        zip_path = os.path.join(download_dir, f"{ver}.zip")
                        extract_path = os.path.join(download_dir, ver)
                        
                        with open(zip_path, 'wb') as f:
                            for chunk in response.iter_content(chunk_size=8192):
                                f.write(chunk)
                        
                        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                            zip_ref.extractall(extract_path)
                        
                        os.remove(zip_path)
                        final_path = os.path.join(extract_path, os.listdir(extract_path)[0])
                        
                        versions.append({
                            'version': ver,
                            'path': final_path,
                            'timestamp': datetime.now().isoformat()
                        })
                    except Exception as e:
                        pass
            
            save_json_safe(product_versions_file, versions)
            
            old_path = next(v['path'] for v in versions if v['version'] == old_ver)
            new_path = next(v['path'] for v in versions if v['version'] == new_ver)
            
            compare_folders(old_path, new_path, ext_filter, special_keywords.split(',') if special_keywords else None)
            diffs = parse_diff_file("special.txt")
            analyzed_results = analyze_diffs_with_keywords(diffs, special_keywords.split(',') if special_keywords else None)
            
            if enable_ai == 'on' and analyzed_results:
                analyzed_results = process_ai_analysis(analyzed_results, diffs, cve_ids)
                
        elif mode == 'folder':
            old_folder = params['old_folder']
            new_folder = params['new_folder']
            ext_filter = params['extension']
            enable_ai = params['enable_ai']
            special_keywords = params['special_keywords']
            cve_ids = params['cve_ids']
            
            compare_folders(old_folder, new_folder, ext_filter, special_keywords.split(',') if special_keywords else None)
            diffs = parse_diff_file("special.txt")
            analyzed_results = analyze_diffs_with_keywords(diffs, special_keywords.split(',') if special_keywords else None)
            
            if enable_ai == 'on' and analyzed_results:
                analyzed_results = process_ai_analysis(analyzed_results, diffs, cve_ids)
        
        with open(analysis_path, 'r') as f:
            analysis_data = json.load(f)
        analysis_data['meta']['status'] = 'completed'
        analysis_data['results'] = analyzed_results
        
        if params.get('enable_ai') == 'on':
            config = load_ai_config()
            ai_service = config.get('service', 'ollama')
            analysis_data['meta']['ai_service'] = ai_service
            if ai_service in config:
                analysis_data['meta']['ai_model'] = config[ai_service].get('model', 'Unknown')
        
        with open(analysis_path, 'w') as f:
            json.dump(analysis_data, f, indent=2)
            
    except Exception as e:
        try:
            with open(analysis_path, 'r') as f:
                analysis_data = json.load(f)
            analysis_data['meta']['status'] = 'failed'
            analysis_data['meta']['error'] = str(e)
            with open(analysis_path, 'w') as f:
                json.dump(analysis_data, f, indent=2)
        except:
            pass

@app.route('/products', methods=['GET', 'POST'])
@limiter.limit("10 per minute")
def products():
    products_data = load_json_safe(PRODUCTS_FILE, {})
    products_list = list(products_data.keys()) if isinstance(products_data, dict) else []
    
    if request.method == 'POST':
        product = validate_input(request.form.get('product'), 100)
        old_ver = validate_input(request.form.get('old_version'), 50)
        new_ver = validate_input(request.form.get('new_version'), 50)
        ext_filter = validate_input(request.form.get('extension'), 10)
        enable_ai = request.form.get('enable_ai')
        special_keywords = validate_input(request.form.get('special_keywords'), 500)
        cve_ids = validate_input(request.form.get('cve_ids'), 200)

        if not product or not old_ver or not new_ver:
            flash('Missing required fields', 'error')
            return redirect(url_for('products'))
        
        if product not in products_list:
            flash('Invalid product selected', 'error')
            return redirect(url_for('products'))
        
        if not validate_version(old_ver) or not validate_version(new_ver):
            flash('Invalid version format', 'error')
            return redirect(url_for('products'))

        params = {
            'product': product,
            'old_version': old_ver,
            'new_version': new_ver,
            'extension': ext_filter,
            'enable_ai': enable_ai,
            'special_keywords': special_keywords,
            'cve_ids': cve_ids
        }
        analysis_id = create_new_analysis_record(params, source='products', ai_enabled=(enable_ai == 'on'))
        
        threading.Thread(target=run_analysis_background, args=(analysis_id, params, 'products')).start()
        return redirect(url_for('view_analysis', analysis_id=analysis_id))

    return render_template("products.html", products=products_list, analyzed_results={}, product='', old_version='', new_version='', extension='', enable_ai='', special_keywords='', cve_ids='')

@app.route('/folder', methods=['GET', 'POST'])
@limiter.limit("5 per minute")
@conditional_auth(['POST'])
def folder():
    if request.method == 'POST':
        old_folder = validate_input(request.form.get('old_folder'), 500)
        new_folder = validate_input(request.form.get('new_folder'), 500)
        ext_filter = validate_input(request.form.get('extension'), 10)
        enable_ai = request.form.get('enable_ai')
        special_keywords = validate_input(request.form.get('special_keywords'), 500)
        cve_ids = validate_input(request.form.get('cve_ids'), 200)

        if not old_folder or not new_folder:
            flash('Both folder paths are required', 'error')
            return redirect(url_for('folder'))
        
        if not (os.path.exists(old_folder) and os.path.exists(new_folder)):
            flash('Invalid folder paths', 'error')
            return redirect(url_for('folder'))
        
        try:
            old_real = os.path.realpath(old_folder)
            new_real = os.path.realpath(new_folder)
            products_real = os.path.realpath(PRODUCTS_DIR)
            
            if not (old_real.startswith(products_real) and new_real.startswith(products_real)):
                flash('Folder paths must be within the products directory', 'error')
                return redirect(url_for('folder'))
        except Exception:
            flash('Invalid folder paths', 'error')
            return redirect(url_for('folder'))

        params = {
            'old_folder': old_folder,
            'new_folder': new_folder,
            'extension': ext_filter,
            'enable_ai': enable_ai,
            'special_keywords': special_keywords,
            'cve_ids': cve_ids
        }
        analysis_id = create_new_analysis_record(params, source='folder', ai_enabled=(enable_ai == 'on'))
        
        threading.Thread(target=run_analysis_background, args=(analysis_id, params, 'folder')).start()
        return redirect(url_for('view_analysis', analysis_id=analysis_id))

    return render_template("folder.html", analyzed_results={}, old_folder='', new_folder='', extension='', enable_ai='', special_keywords='', cve_ids='')

@app.route('/library', methods=['GET', 'POST'])
@limiter.limit("10 per minute")
@conditional_auth(['POST'])
def library():
    if request.method == 'POST':
        name = validate_input(request.form.get('name'), 100)
        repo_url = validate_input(request.form.get('repo_url'), 200)
        ai_service = validate_input(request.form.get('ai_service'), 50)
        
        if not name or not repo_url:
            flash('Name and repository URL are required', 'danger')
            return redirect(url_for('library'))
        
        success, message = add_library_repo(name, repo_url, ai_service)
        flash(message, 'success' if success else 'danger')
        return redirect(url_for('library'))
    
    return render_template("library.html", library_repos=load_library())

@app.route('/library/delete/<repo_id>', methods=['POST'])
@limiter.limit("10 per minute")
@requires_basic_auth
def delete_library_repo(repo_id):
    if not validate_uuid(repo_id):
        flash('Invalid repository ID', 'danger')
        return redirect(url_for('library'))
    
    if remove_library_repo(repo_id):
        flash('Repository removed from library', 'success')
        app.logger.info(f"Library repo {repo_id} deleted")
    else:
        flash('Repository not found', 'danger')
    
    return redirect(url_for('library'))

@app.route('/library/toggle/<repo_id>', methods=['POST'])
@limiter.limit("10 per minute")
@requires_basic_auth
def toggle_library_repo(repo_id):
    if not validate_uuid(repo_id):
        flash('Invalid repository ID', 'danger')
        return redirect(url_for('library'))
    
    library = load_library()
    repo_found = False
    for repo in library:
        if repo.get('id') == repo_id:
            repo['auto_scan'] = not repo.get('auto_scan', True)
            repo_found = True
            break
    
    if repo_found:
        save_library(library)
        flash('Auto-scan setting updated', 'success')
    else:
        flash('Repository not found', 'danger')
    
    return redirect(url_for('library'))

@app.route('/library/check-now', methods=['POST'])
@limiter.limit("3 per minute")
def check_versions_now():
    threading.Thread(target=check_for_new_versions).start()
    flash('Version check started in background', 'info')
    return redirect(url_for('library'))

@app.route('/ai-benchmark', methods=['GET', 'POST'])
def ai_benchmark():
    if request.method == 'POST':
        try:
            if not request.authorization or not check_basic_auth(request.authorization.username, request.authorization.password):
                return ('Authentication required', 401, {'WWW-Authenticate': 'Basic realm="Login Required"'})
            benchmark_data = request.json
            if not benchmark_data:
                return jsonify({'error': 'No benchmark data provided'}), 400
            benchmark_id = str(uuid.uuid4())
            threading.Thread(target=run_ai_benchmark, args=(benchmark_id, benchmark_data)).start()
            return jsonify({'benchmark_id': benchmark_id, 'status': 'started'})
        except Exception as e:
            app.logger.error(f"Error starting benchmark: {str(e)}")
            return jsonify({'error': 'Failed to start benchmark'}), 500
    benchmark_results = []
    try:
        for filename in os.listdir(SAVED_ANALYSES_DIR):
            if filename.startswith('benchmark_') and filename.endswith('.json'):
                try:
                    benchmark_id = filename.replace('benchmark_', '').replace('.json', '')
                    if not validate_uuid(benchmark_id):
                        continue
                    benchmark_path = os.path.join(SAVED_ANALYSES_DIR, filename)
                    real_path = os.path.realpath(benchmark_path)
                    if not real_path.startswith(os.path.realpath(SAVED_ANALYSES_DIR)):
                        continue
                    with open(benchmark_path, 'r') as f:
                        benchmark = json.load(f)
                    if not isinstance(benchmark, dict):
                        continue
                    benchmark['id'] = benchmark_id
                    benchmark_results.append(benchmark)
                except Exception as e:
                    app.logger.warning(f"Error loading benchmark {filename}: {str(e)}")
        benchmark_results.sort(key=lambda x: x.get('created_at', ''), reverse=True)
    except Exception as e:
        app.logger.error(f"Error loading benchmark results: {str(e)}")
    return render_template("ai_benchmark.html", benchmark_results=benchmark_results)

@app.route('/benchmark-results/<benchmark_id>')
@requires_basic_auth
def benchmark_results(benchmark_id):
    if not validate_uuid(benchmark_id):
        abort(404)
    
    try:
        results_path = os.path.join(SAVED_ANALYSES_DIR, f"benchmark_{benchmark_id}.json")
        if not os.path.exists(results_path):
            return render_template("benchmark_results.html", benchmark_id=benchmark_id, status="running")
        
        with open(results_path, 'r') as f:
            benchmark_results = json.load(f)
        
        return render_template("benchmark_results.html", 
                             benchmark_id=benchmark_id,
                             results=benchmark_results,
                             status=benchmark_results.get('status', 'completed'))
    except Exception as e:
        app.logger.error(f"Error loading benchmark results {benchmark_id}: {str(e)}")
        return render_template("benchmark_results.html", benchmark_id=benchmark_id, status="error")

@app.route('/', methods=['GET'])
@limiter.limit("50 per minute")
def index():
    return render_template("index.html")

def generate_random_md5():
    """Generate a random MD5 hash."""
    random_bytes = os.urandom(16)
    return hashlib.md5(random_bytes).hexdigest()

def find_free_port():
    """Find a free port on the system."""
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.bind(('', 0))
        s.listen(1)
        port = s.getsockname()[1]
    return port

def print_banner(username, password, host, port):
    """Print a nice banner with authentication credentials."""
    banner = f"""
╔════════════════════════════════════════════════════════════════╗
║                      PatchLeaks Started                        ║
╠════════════════════════════════════════════════════════════════╣
║  Server URL:  http://{host}:{port}                    
║                                                                ║
║  Basic Authentication Credentials:                             ║
║  ┌────────────────────────────────────────────────────────┐   ║
║  │ Username: {username}                   │   ║
║  │ Password: {password}                   │   ║
║  └────────────────────────────────────────────────────────┘   ║
║                                                                ║
║  ⚠️  IMPORTANT: Save these credentials!                        ║
║  They are randomly generated each time the app starts.         ║
╚════════════════════════════════════════════════════════════════╝
"""
    print(banner)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='PatchLeaks - Security Patch Analysis Tool',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s                    # Run with random free port
  %(prog)s --port 8080        # Run on custom port 8080
  %(prog)s -p 5000            # Run on port 5000 (short form)
  
Features:
  - Analyzes security patches between software versions
  - AI-powered vulnerability detection
  - Supports multiple AI services (Ollama, OpenAI, DeepSeek, Claude)
  - Automatic version monitoring for GitHub repositories
  - CVE matching and analysis
  
Authentication:
  Random MD5 credentials are generated on each startup for security.
  Username and password will be displayed in the console.
        """
    )
    
    parser.add_argument(
        '-p', '--port',
        type=int,
        default=DEFAULT_PORT,
        help='Port to run the server on (default: random free port)'
    )
    
    parser.add_argument(
        '--host',
        type=str,
        default=DEFAULT_HOST,
        help=f'Host address to bind to (default: {DEFAULT_HOST})'
    )
    
    args = parser.parse_args()
    
    if args.port is None:
        port = find_free_port()
        print(f"🔍 No port specified, using random free port: {port}")
    else:
        port = args.port
    
    BASIC_AUTH_USERNAME = generate_random_md5()
    BASIC_AUTH_PASSWORD = generate_random_md5()
    
    globals()['BASIC_AUTH_USERNAME'] = BASIC_AUTH_USERNAME
    globals()['BASIC_AUTH_PASSWORD'] = BASIC_AUTH_PASSWORD
    
    os.makedirs(PRODUCTS_DIR, exist_ok=True)
    os.chmod(PRODUCTS_DIR, 0o755)
    
    if not os.path.exists(os.path.join(PRODUCTS_DIR, 'magento.json')):
        with open(os.path.join(PRODUCTS_DIR, 'magento.json'), 'w') as f:
            json.dump([], f)
    
    print_banner(BASIC_AUTH_USERNAME, BASIC_AUTH_PASSWORD, args.host, port)
    
    app.run(host=args.host, port=port, debug=False)
