Spaces:

CodeCommunity
/

gitgud-ai

Running

App Files Files Community

CodeCommunity commited on 12 days ago

Commit

da07baf

verified ·

1 Parent(s): 174a4e2

Create app/predictor.py

Browse files

Files changed (1) hide show

app/predictor.py +739 -0

app/predictor.py ADDED Viewed

	@@ -0,0 +1,739 @@

+# for the loading of CODE_BERT model
+import logging
+import torch
+import torch.nn.functional as F
+from transformers import AutoModel, AutoTokenizer
+# just use to log at terminal no big deal here
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class CodeClassifier:
+    # (initializing/downloading/caching) the codeBERT_model first time it will take some time as downloading (500_MB)
+    def __init__(self):
+        logger.info("⏳ Initializing AI Service...")
+        # Detect Hardware (Uses GPU on Victus, OR fallback if GPU is not available CPU/MPS)
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        if torch.backends.mps.is_available():
+            self.device = "mps"
+        logger.info(f"🚀 Running on device: {self.device}")
+        # This will download 'microsoft/codebert-base' from Hugging Face
+        # the first time it runs. It caches it locally afterwards.
+        try:
+            logger.info(
+                "📥 Loading CodeBERT Model (this may take a minute first time)..."
+            )
+            self.tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
+            self.model = AutoModel.from_pretrained("microsoft/codebert-base").to(
+                self.device
+            )
+            logger.info("✅ CodeBERT Loaded Successfully!")
+        except Exception as e:
+            logger.error(f"❌ Failed to load model: {e}")
+            raise e
+        # Initialize Semantic Anchors for Classification
+        self.labels = {
+            "Frontend": "import react component from view styles css html dom window document state props effect",
+            "Backend": "import express nest controller service entity repository database sql mongoose route api async await req res dto",
+            "Security": "import auth passport jwt strategy bcrypt verify token secret guard password user login session middleware",
+            "DevOps": "docker build image container kubernetes yaml env port host volume deploy pipeline stage steps runs-on",
+            "Testing": "describe it expect test mock spy jest beforeall aftereach suite spec assert",
+        }
+        self.label_embeddings = self._precompute_label_embeddings()
+    def _get_embedding(self, text: str):
+        """
+        Generates a 768-dim vector for the given text using CodeBERT.
+        """
+        # Truncate text to avoid model errors (CodeBERT max is 512 tokens)
+        inputs = self.tokenizer(
+            text, return_tensors="pt", padding=True, truncation=True, max_length=512
+        ).to(self.device)
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+        # Use the [CLS] token embedding (index 0) as the sentence representation
+        return outputs.last_hidden_state[:, 0, :]
+    def _precompute_label_embeddings(self):
+        """
+        Computes embeddings for the category descriptions once at startup.
+        """
+        logger.info("🧠 Pre-computing semantic anchors for classification...")
+        embeddings = {}
+        for label, description in self.labels.items():
+            embeddings[label] = self._get_embedding(description)
+        return embeddings
+    # for pridicting our file name themes and then classiyfing them into related catagory
+    def predict(self, file_path: str, content: str = None) -> dict:
+        """
+        Determines the 'Layer' of a file (Frontend, Backend, etc.)
+        Returns: { "label": str, "confidence": float, "embedding": list[float] }
+        """
+        path = file_path.lower()
+        # Helper to standardize return format
+        def result(label, conf=1.0, emb=None):
+            return {
+                "label": label,
+                "confidence": conf,
+                "embedding": emb if emb is not None else [],
+            }
+        # 1. Fast Path: Rule-Based Checks (High Precision)
+        # We still keep this because it's instant and correct for obvious things.
+        # Frontend Indicators
+        if any(
+            x in path
+            for x in [
+                "/components/",
+                "/pages/",
+                "/views/",
+                ".jsx",
+                ".tsx",
+                ".css",
+                "tailwind",
+            ]
+        ):
+            return result("Frontend")
+        # Backend Indicators
+        if any(
+            x in path
+            for x in [
+                "/controllers/",
+                "/modules/",
+                "/services/",
+                ".controller.ts",
+                ".service.ts",
+                "dto",
+            ]
+        ):
+            return result("Backend")
+        # Security Indicators
+        if any(
+            x in path
+            for x in ["auth", "guard", "strategy", "jwt", "passport", "middleware"]
+        ):
+            return result("Security")
+        # DevOps/Config Indicators
+        if any(
+            x in path
+            for x in ["docker", "k8s", "github/workflows", "tsconfig", "package.json"]
+        ):
+            return result("DevOps")
+        # Testing Indicators
+        if any(x in path for x in ["test", "spec", "e2e", "jest"]):
+            return result("Testing")
+        # 2. Slow Path: AI-Powered Semantic Classification (High Recall)
+        # If the rules are unsure ("Generic"), we ask the model.
+        try:
+            # Decide what to analyze: Content (best) or Path (fallback)
+            text_to_analyze = content if content else file_path
+            # If analyzing content, take the first 1000 chars (approx enough tokens) to capture imports/class defs
+            if content:
+                text_to_analyze = content[:1000]
+            target_embedding_tensor = self._get_embedding(text_to_analyze)
+            target_embedding_list = (
+                target_embedding_tensor.tolist()
+            )  # Convert to list for JSON serialization
+            best_label = "Generic"
+            highest_score = -1.0
+            for label, anchor_embedding in self.label_embeddings.items():
+                # Cosine Similarity: -1 to 1
+                score = F.cosine_similarity(
+                    target_embedding_tensor, anchor_embedding
+                ).item()
+                if score > highest_score:
+                    highest_score = score
+                    best_label = label
+            # Only accept the AI's opinion if it's somewhat confident
+            if highest_score > 0.25:
+                return result(best_label, highest_score, target_embedding_list)
+            return result("Generic", highest_score, target_embedding_list)
+        except Exception as e:
+            logger.error(f"AI Classification failed for {file_path}: {e}")
+            return result("Generic", 0.0)
+class GuideGenerator:
+    def __init__(self):
+        self.tech_stacks = {
+            "React": ["react", "jsx", "tsx", "next.config.js"],
+            "Vue": ["vue", "nuxt.config.js"],
+            "Angular": ["angular.json"],
+            "Svelte": ["svelte.config.js"],
+            "NestJS": ["nest-cli.json", ".module.ts"],
+            "Express": ["express", "server.js", "app.js"],
+            "FastAPI": ["fastapi", "main.py"],
+            "Django": ["django", "manage.py"],
+            "Flask": ["flask", "app.py"],
+            "Spring Boot": ["pom.xml", "build.gradle", "src/main/java"],
+            "Go": ["go.mod", "main.go"],
+            "Rust": ["Cargo.toml", "src/main.rs"],
+        }
+        self.tools = {
+            "Docker": ["Dockerfile", "docker-compose.yml"],
+            "Kubernetes": ["k8s", "helm", "charts/"],
+            "TypeScript": ["tsconfig.json", ".ts"],
+            "Tailwind CSS": ["tailwind.config.js"],
+            "Prisma": ["schema.prisma"],
+            "GraphQL": [".graphql", "schema.gql"],
+            "PostgreSQL": ["postgresql", "pg"],
+            "MongoDB": ["mongoose", "mongodb"],
+            "Redis": ["redis"],
+        }
+    def detect_stack(self, files: list[str]) -> dict:
+        detected = {"languages": set(), "frameworks": set(), "tools": set()}
+        for file in files:
+            path = file.lower()
+            # Languages
+            if path.endswith(".ts") or path.endswith(".tsx"):
+                detected["languages"].add("TypeScript")
+            elif path.endswith(".js") or path.endswith(".jsx"):
+                detected["languages"].add("JavaScript")
+            elif path.endswith(".py"):
+                detected["languages"].add("Python")
+            elif path.endswith(".go"):
+                detected["languages"].add("Go")
+            elif path.endswith(".rs"):
+                detected["languages"].add("Rust")
+            elif path.endswith(".java"):
+                detected["languages"].add("Java")
+            # Frameworks
+            for framework, indicators in self.tech_stacks.items():
+                if any(ind in path for ind in indicators):
+                    detected["frameworks"].add(framework)
+            # Tools
+            for tool, indicators in self.tools.items():
+                if any(ind in path for ind in indicators):
+                    detected["tools"].add(tool)
+        return detected
+    def _generate_tree(self, files: list[str]) -> str:
+        """
+        Generates a clean ASCII tree of the project structure with architectural annotations.
+        """
+        tree = {}
+        relevant_files = []
+        # 1. Filter and normalize paths
+        for f in files:
+            parts = f.split("/")
+            # Skip noise
+            if any(
+                p
+                in [
+                    "node_modules",
+                    ".git",
+                    "__pycache__",
+                    "dist",
+                    "build",
+                    ".idea",
+                    ".vscode",
+                ]
+                for p in parts
+            ):
+                continue
+            if f.endswith(".DS_Store"):
+                continue
+            relevant_files.append(f)
+        # 2. Build nested dictionary
+        for path in relevant_files:
+            parts = path.split("/")
+            if len(parts) > 3:
+                parts = parts[:3]
+            current = tree
+            for part in parts:
+                current = current.setdefault(part, {})
+        # 3. Define Annotations
+        descriptions = {
+            "src": "Core application source code",
+            "app": "Main application logic",
+            "components": "Reusable UI components",
+            "pages": "Route/Page definitions",
+            "api": "API endpoints and services",
+            "utils": "Utility functions and helpers",
+            "lib": "External libraries and configurations",
+            "test": "Unit and integration tests",
+            "tests": "Test suites",
+            "docs": "Project documentation",
+            "public": "Static assets (images, fonts)",
+            "assets": "Static media files",
+            "server": "Backend server code",
+            "client": "Frontend client application",
+            "config": "Configuration files",
+            "scripts": "Build and maintenance scripts",
+            "prisma": "Database schema and migrations",
+            "graphql": "GraphQL definitions",
+        }
+        # 4. Render tree
+        lines = []
+        def render(node, prefix=""):
+            keys = sorted(node.keys())
+            # Priority sorting: put 'src', 'app', 'server', 'client' first
+            priority = ["src", "app", "client", "server", "public"]
+            keys.sort(key=lambda k: (0 if k in priority else 1, k))
+            if len(keys) > 12:
+                keys = keys[:12] + ["..."]
+            for i, key in enumerate(keys):
+                is_last = i == len(keys) - 1
+                connector = "└── " if is_last else "├── "
+                # Add annotation if available and it's a folder (has children)
+                comment = ""
+                if key in descriptions and isinstance(node[key], dict) and node[key]:
+                    comment = f"  # {descriptions[key]}"
+                lines.append(f"{prefix}{connector}{key}{comment}")
+                if isinstance(node.get(key), dict) and node[key]:
+                    extension = "    " if is_last else "│   "
+                    render(node[key], prefix + extension)
+        render(tree)
+        return "\n".join(lines[:50])
+    def generate_markdown(self, repo_name: str, files: list[str]) -> str:
+        # 1. Perform AI Analysis (DNA of the repo)
+        stats = {
+            "Frontend": 0,
+            "Backend": 0,
+            "Security": 0,
+            "DevOps": 0,
+            "Testing": 0,
+            "Generic": 0,
+        }
+        layer_map = {}
+        low_confidence_files = []
+        file_embeddings = {}  # Path -> Tensor
+        for f in files:
+            # We use CodeBERT's path-embedding capability here since we don't have content for all files
+            prediction = classifier.predict(f)
+            layer = prediction["label"]
+            confidence = prediction["confidence"]
+            stats[layer] += 1
+            layer_map[f] = layer
+            if confidence < 0.4 and layer != "Generic":
+                low_confidence_files.append((f, confidence))
+            # Store embedding for coupling analysis (if available)
+            if prediction["embedding"] and len(prediction["embedding"]) > 0:
+                file_embeddings[f] = torch.tensor(prediction["embedding"])
+        total_files = len(files) if files else 1
+        primary_layer = max(stats, key=stats.get)
+        # Calculate Semantic Couplings (Top 5)
+        couplings = []
+        try:
+            paths = list(file_embeddings.keys())
+            # Limit to first 50 files for safety and performance
+            sample_paths = paths[:50]
+            for i in range(len(sample_paths)):
+                for j in range(i + 1, len(sample_paths)):
+                    p1, p2 = sample_paths[i], sample_paths[j]
+                    # Skip same folder
+                    if p1.rsplit("/", 1)[0] == p2.rsplit("/", 1)[0]:
+                        continue
+                    # Ensure tensors are valid and on CPU for comparison
+                    t1 = file_embeddings[p1].cpu()
+                    t2 = file_embeddings[p2].cpu()
+                    score = F.cosine_similarity(t1.unsqueeze(0), t2.unsqueeze(0)).item()
+                    if score > 0.88:
+                        couplings.append((p1, p2, score))
+        except Exception as e:
+            logger.error(f"Failed to calculate couplings: {e}")
+        couplings.sort(key=lambda x: x[2], reverse=True)
+        top_couplings = couplings[:5]
+        # Sort low confidence by lowest score
+        low_confidence_files.sort(key=lambda x: x[1])
+        top_refactors = low_confidence_files[:5]
+        # 2. Advanced Stack & Feature Detection
+        stack = self.detect_stack(files)
+        features = self._detect_features(files, stats)
+        dev_tools = self._detect_dev_tools(files)
+        install_cmd = "npm install"
+        run_cmd = "npm run dev"
+        test_cmd = "npm test"
+        if "Python" in stack["languages"]:
+            install_cmd = "pip install -r requirements.txt"
+            run_cmd = "python main.py"
+            test_cmd = "pytest"
+        elif "Go" in stack["languages"]:
+            install_cmd = "go mod download"
+            run_cmd = "go run main.go"
+            test_cmd = "go test ./..."
+        elif "Rust" in stack["languages"]:
+            install_cmd = "cargo build"
+            run_cmd = "cargo run"
+            test_cmd = "cargo test"
+        if "Django" in stack["frameworks"]:
+            run_cmd = "python manage.py runserver"
+            test_cmd = "python manage.py test"
+        elif "Spring Boot" in stack["frameworks"]:
+            install_cmd = "./mvnw install"
+            run_cmd = "./mvnw spring-boot:run"
+        if "Docker" in stack["tools"]:
+            run_cmd += "\n# Or using Docker\ndocker-compose up --build"
+        # 3. Assemble Guide
+        md = f"# {repo_name} Developer Guide\n\n"
+        md += "## AI Codebase Insights\n"
+        md += f"Analysis powered by **CodeBERT** semantic vectors.\n\n"
+        md += f"**Project DNA:** {self._get_project_dna(stats, total_files)}\n\n"
+        md += f"**Quality Check:** {self._get_testing_status(stats, total_files)}\n\n"
+        if top_refactors:
+            md += "### Code Health & Complexity\n"
+            md += "The AI flagged the following files as **Non-Standard** or **Complex** (Low Confidence).\n"
+            md += (
+                "These are good candidates for refactoring or documentation reviews:\n"
+            )
+            for f, score in top_refactors:
+                md += f"- `{f}` (Confidence: {int(score * 100)}%)\n"
+            md += "\n"
+        if top_couplings:
+            md += "### Logical Couplings\n"
+            md += "The AI detected strong semantic connections between these file pairs (they share logic but not folders):\n"
+            for p1, p2, score in top_couplings:
+                md += f"- `{p1}` <--> `{p2}` ({int(score * 100)}% match)\n"
+            md += "\n"
+        md += "### Layer Composition\n"
+        md += "| Layer | Composition | Status |\n"
+        md += "| :--- | :--- | :--- |\n"
+        for layer, count in stats.items():
+            if count > 0:
+                percentage = (count / total_files) * 100
+                status = "Primary" if layer == primary_layer else "Detected"
+                md += f"| {layer} | {percentage:.1f}% | {status} |\n"
+        md += "\n"
+        md += "## Key Features\n"
+        if features:
+            md += "The following capabilities were inferred from the codebase structure:\n\n"
+            for feature, description in features.items():
+                md += f"- **{feature}**: {description}\n"
+        else:
+            md += "No specific high-level features (Auth, Database, etc.) were explicitly detected from the file structure.\n"
+        md += "\n"
+        md += "## Architecture & Technologies\n"
+        md += "The project utilizes the following core technologies:\n\n"
+        if stack["languages"]:
+            md += "**Languages**: " + ", ".join(sorted(stack["languages"])) + "\n"
+        if stack["frameworks"]:
+            md += "**Frameworks**: " + ", ".join(sorted(stack["frameworks"])) + "\n"
+        if stack["tools"]:
+            md += "**Infrastructure**: " + ", ".join(sorted(stack["tools"])) + "\n"
+        if dev_tools:
+            md += "**Development Tools**: " + ", ".join(sorted(dev_tools)) + "\n"
+        md += "\n"
+        md += "## Getting Started\n\n"
+        # Config Section
+        if any(f.endswith(".env") or f.endswith(".env.example") for f in files):
+            md += "### Configuration\n"
+            md += "This project relies on environment variables. \n"
+            md += "1. Find the `.env.example` file in the root directory.\n"
+            md += "2. Copy it to create a new `.env` file.\n"
+            md += "   `cp .env.example .env`\n"
+            md += "3. Fill in the required values (Database URL, API Keys, etc.).\n\n"
+        md += "### Prerequisites\n"
+        md += "Ensure you have the following installed:\n"
+        md += "- Git\n"
+        if any(x in ["TypeScript", "JavaScript"] for x in stack["languages"]):
+            md += "- Node.js (LTS)\n"
+        if "Python" in stack["languages"]:
+            md += "- Python 3.8+\n"
+        if "Docker" in stack["tools"]:
+            md += "- Docker Desktop\n"
+        md += "\n### Installation\n"
+        md += "1. Clone and enter the repository:\n"
+        md += "   ```bash\n"
+        md += f"   git clone https://github.com/OWNER/{repo_name}.git\n"
+        md += f"   cd {repo_name}\n"
+        md += "   ```\n\n"
+        md += f"2. Install project dependencies:\n"
+        md += "   ```bash\n"
+        md += f"   {install_cmd}\n"
+        md += "   ```\n\n"
+        md += "### Execution\n"
+        md += "Start the development environment:\n"
+        md += "```bash\n"
+        md += f"{run_cmd}\n"
+        md += "```\n\n"
+        if stats["Testing"] > 0:
+            md += "## Testing\n"
+            md += "Automated tests were detected. Run them using:\n"
+            md += f"```bash\n{test_cmd}\n```\n\n"
+        md += "## Project Structure\n"
+        md += "Detailed tree view with AI-predicted layer labels:\n\n"
+        md += "```text\n"
+        md += self._generate_tree_with_ai(files, layer_map)
+        md += "\n```\n\n"
+        # ... (rest of the markdown)
+        md += "## Contribution Workflow\n\n"
+        md += "We welcome contributions! Please follow this detailed workflow to ensure smooth collaboration:\n\n"
+        md += "### 1. Find an Issue\n"
+        md += "- Browse the **Issues** tab for tasks.\n"
+        md += "- Look for labels like `good first issue` or `help wanted` if you are new.\n"
+        md += (
+            "- Comment on the issue to assign it to yourself before starting work.\n\n"
+        )
+        md += "### 2. Branching Strategy\n"
+        md += "Create a new branch from `main` using one of the following prefixes:\n"
+        md += "- `feat/`: For new features (e.g., `feat/user-auth`)\n"
+        md += "- `fix/`: For bug fixes (e.g., `fix/login-error`)\n"
+        md += "- `docs/`: For documentation changes (e.g., `docs/update-readme`)\n"
+        md += "- `refactor/`: For code improvements without logic changes\n"
+        md += "```bash\n"
+        md += "git checkout -b feat/your-feature-name\n"
+        md += "```\n\n"
+        md += "### 3. Development Standards\n"
+        if dev_tools:
+            md += "Before committing, ensure your code meets the project standards:\n"
+            if "ESLint" in dev_tools or "Prettier" in dev_tools:
+                md += "- **Linting**: Run `npm run lint` to fix style issues.\n"
+            if "Jest" in dev_tools or "pytest" in dev_tools:
+                md += "- **Testing**: Run tests to ensure no regressions.\n"
+        md += "- Keep pull requests small and focused on a single task.\n\n"
+        md += "### 4. Commit Messages\n"
+        md += "We follow the **Conventional Commits** specification:\n"
+        md += "- `feat: add new search component`\n"
+        md += "- `fix: handle null pointer exception`\n"
+        md += "- `chore: update dependencies`\n"
+        md += "```bash\n"
+        md += "git commit -m 'feat: implement amazing feature'\n"
+        md += "```\n\n"
+        md += "### 5. Pull Request Process\n"
+        md += "1. Push your branch: `git push origin feat/your-feature-name`\n"
+        md += "2. Open a Pull Request against the `main` branch.\n"
+        md += "3. Fill out the PR template with details about your changes.\n"
+        md += "4. Wait for code review and address any feedback.\n"
+        md += "5. Once approved, your changes will be merged!\n\n"
+        md += "## About this Guide\n"
+        md += "This documentation was automatically generated by **GitGud AI**.\n"
+        md += "- **Engine:** CodeBERT (Transformer-based Language Model)\n"
+        md += "- **Analysis:** Zero-shot semantic classification of file paths and content.\n"
+        md += "- **Accuracy:** The 'Project DNA' and 'Layer Composition' metrics are derived from vector embeddings of your codebase, providing a mathematical approximation of your architecture.\n"
+        return md
+    def _get_project_dna(self, stats: dict, total: int) -> str:
+        """
+        Interprets the layer statistics to give a high-level project description.
+        """
+        backend_pct = (stats["Backend"] / total) * 100
+        frontend_pct = (stats["Frontend"] / total) * 100
+        ops_pct = (stats["DevOps"] / total) * 100
+        if backend_pct > 50:
+            return "This project is a **Backend-focused service**, likely an API or microservice. The majority of the codebase is dedicated to business logic and data handling."
+        elif frontend_pct > 50:
+            return "This project is a **Frontend-heavy application**, focusing on UI/UX and client-side logic."
+        elif backend_pct > 30 and frontend_pct > 30:
+            return "This is a balanced **Full-Stack Application**, containing significant logic for both client and server components."
+        elif ops_pct > 40:
+            return "This repository appears to be an **Infrastructure or Configuration** project (IaC), heavily focused on deployment and orchestration."
+        else:
+            return "This is a **General purpose repository**, possibly a library or a mix of various utilities."
+    def _get_testing_status(self, stats: dict, total: int) -> str:
+        test_pct = (stats["Testing"] / total) * 100
+        if test_pct > 20:
+            return "[Excellent] **Excellent Test Coverage**. A significant portion of the codebase is dedicated to testing."
+        elif test_pct > 5:
+            return "[Moderate] **Moderate Testing**. Tests are present but may not cover all modules."
+        else:
+            return "[Low] **Low Test Coverage**. Very few test files were detected. Recommended to add more unit or integration tests."
+    def _detect_features(self, files: list[str], stats: dict) -> dict:
+        features = {}
+        files_str = " ".join(files).lower()
+        # Authentication - Only if CodeBERT detected Security logic
+        if stats["Security"] > 0:
+            auth_indicators = [
+                "/auth/",
+                "/login",
+                "/register",
+                "passport",
+                "jwt",
+                "session",
+                "bcrypt",
+                "strategy",
+            ]
+            if any(x in files_str for x in auth_indicators):
+                features["Authentication"] = (
+                    "Implements user authentication (Login/Signup/Session management)."
+                )
+        # Database - Only if CodeBERT detected Backend logic
+        if stats["Backend"] > 0:
+            db_indicators = [
+                "schema.prisma",
+                "models.py",
+                "migration",
+                "sequelize",
+                "typeorm",
+                "mongoose",
+                "/db/",
+                "/database/",
+            ]
+            if any(x in files_str for x in db_indicators):
+                features["Database"] = (
+                    "Includes database schema definitions or ORM models."
+                )
+        # API
+        api_indicators = [
+            "/api/",
+            "controllers",
+            "resolvers",
+            "routes",
+            "router",
+            "endpoint",
+        ]
+        if any(x in files_str for x in api_indicators):
+            features["API"] = "Exposes RESTful endpoints or GraphQL resolvers."
+        # Realtime
+        if any(x in files_str for x in ["socket", "websocket", "io.", "channel"]):
+            features["Real-time"] = "Uses WebSockets or real-time event channels."
+        # UI Architecture
+        if stats["Frontend"] > 0:
+            ui_indicators = ["components/", "views/", ".tsx", ".jsx", ".vue", "/pages/"]
+            if any(x in files_str for x in ui_indicators):
+                features["UI Architecture"] = "Modular component-based user interface."
+        return features
+    def _detect_dev_tools(self, files: list[str]) -> set:
+        tools = set()
+        files_str = " ".join(files).lower()
+        if "eslint" in files_str:
+            tools.add("ESLint")
+        if "prettier" in files_str:
+            tools.add("Prettier")
+        if "jest" in files_str:
+            tools.add("Jest")
+        if "cypress" in files_str:
+            tools.add("Cypress")
+        if "github/workflows" in files_str:
+            tools.add("GitHub Actions")
+        if "husky" in files_str:
+            tools.add("Husky")
+        if "tailwind" in files_str:
+            tools.add("Tailwind CSS")
+        if "vite" in files_str:
+            tools.add("Vite")
+        if "webpack" in files_str:
+            tools.add("Webpack")
+        return tools
+    def _generate_tree_with_ai(self, files: list[str], layer_map: dict) -> str:
+        tree = {}
+        for f in files:
+            parts = f.split("/")
+            if any(p in ["node_modules", ".git", "__pycache__", "dist"] for p in parts):
+                continue
+            curr = tree
+            for part in parts[:3]:  # Depth 3
+                curr = curr.setdefault(part, {})
+        lines = []
+        def render(node, path_prefix="", tree_prefix=""):
+            keys = sorted(node.keys())
+            for i, key in enumerate(keys):
+                is_last = i == len(keys) - 1
+                full_path = f"{path_prefix}/{key}".strip("/")
+                # Get layer from map or predict if it's a folder
+                prediction = classifier.predict(full_path)
+                layer = layer_map.get(full_path, prediction["label"])
+                label = f" [{layer}]" if layer != "Generic" else ""
+                connector = "└── " if is_last else "├── "
+                lines.append(f"{tree_prefix}{connector}{key}{label}")
+                if node[key]:
+                    render(
+                        node[key],
+                        full_path,
+                        tree_prefix + ("    " if is_last else "│   "),
+                    )
+        render(tree)
+        return "\n".join(lines[:60])
+# Create the global instance
+classifier = CodeClassifier()
+guide_generator = GuideGenerator()