Spaces:
Running
Running
Create app/predictor.py
Browse files- app/predictor.py +739 -0
app/predictor.py
ADDED
|
@@ -0,0 +1,739 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# for the loading of CODE_BERT model
|
| 2 |
+
|
| 3 |
+
import logging
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn.functional as F
|
| 6 |
+
from transformers import AutoModel, AutoTokenizer
|
| 7 |
+
|
| 8 |
+
# just use to log at terminal no big deal here
|
| 9 |
+
logging.basicConfig(level=logging.INFO)
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class CodeClassifier:
|
| 14 |
+
# (initializing/downloading/caching) the codeBERT_model first time it will take some time as downloading (500_MB)
|
| 15 |
+
def __init__(self):
|
| 16 |
+
logger.info("⏳ Initializing AI Service...")
|
| 17 |
+
|
| 18 |
+
# Detect Hardware (Uses GPU on Victus, OR fallback if GPU is not available CPU/MPS)
|
| 19 |
+
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 20 |
+
if torch.backends.mps.is_available():
|
| 21 |
+
self.device = "mps"
|
| 22 |
+
|
| 23 |
+
logger.info(f"🚀 Running on device: {self.device}")
|
| 24 |
+
|
| 25 |
+
# This will download 'microsoft/codebert-base' from Hugging Face
|
| 26 |
+
# the first time it runs. It caches it locally afterwards.
|
| 27 |
+
try:
|
| 28 |
+
logger.info(
|
| 29 |
+
"📥 Loading CodeBERT Model (this may take a minute first time)..."
|
| 30 |
+
)
|
| 31 |
+
self.tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")
|
| 32 |
+
self.model = AutoModel.from_pretrained("microsoft/codebert-base").to(
|
| 33 |
+
self.device
|
| 34 |
+
)
|
| 35 |
+
logger.info("✅ CodeBERT Loaded Successfully!")
|
| 36 |
+
except Exception as e:
|
| 37 |
+
logger.error(f"❌ Failed to load model: {e}")
|
| 38 |
+
raise e
|
| 39 |
+
|
| 40 |
+
# Initialize Semantic Anchors for Classification
|
| 41 |
+
self.labels = {
|
| 42 |
+
"Frontend": "import react component from view styles css html dom window document state props effect",
|
| 43 |
+
"Backend": "import express nest controller service entity repository database sql mongoose route api async await req res dto",
|
| 44 |
+
"Security": "import auth passport jwt strategy bcrypt verify token secret guard password user login session middleware",
|
| 45 |
+
"DevOps": "docker build image container kubernetes yaml env port host volume deploy pipeline stage steps runs-on",
|
| 46 |
+
"Testing": "describe it expect test mock spy jest beforeall aftereach suite spec assert",
|
| 47 |
+
}
|
| 48 |
+
self.label_embeddings = self._precompute_label_embeddings()
|
| 49 |
+
|
| 50 |
+
def _get_embedding(self, text: str):
|
| 51 |
+
"""
|
| 52 |
+
Generates a 768-dim vector for the given text using CodeBERT.
|
| 53 |
+
"""
|
| 54 |
+
# Truncate text to avoid model errors (CodeBERT max is 512 tokens)
|
| 55 |
+
inputs = self.tokenizer(
|
| 56 |
+
text, return_tensors="pt", padding=True, truncation=True, max_length=512
|
| 57 |
+
).to(self.device)
|
| 58 |
+
with torch.no_grad():
|
| 59 |
+
outputs = self.model(**inputs)
|
| 60 |
+
# Use the [CLS] token embedding (index 0) as the sentence representation
|
| 61 |
+
return outputs.last_hidden_state[:, 0, :]
|
| 62 |
+
|
| 63 |
+
def _precompute_label_embeddings(self):
|
| 64 |
+
"""
|
| 65 |
+
Computes embeddings for the category descriptions once at startup.
|
| 66 |
+
"""
|
| 67 |
+
logger.info("🧠 Pre-computing semantic anchors for classification...")
|
| 68 |
+
embeddings = {}
|
| 69 |
+
for label, description in self.labels.items():
|
| 70 |
+
embeddings[label] = self._get_embedding(description)
|
| 71 |
+
return embeddings
|
| 72 |
+
|
| 73 |
+
# for pridicting our file name themes and then classiyfing them into related catagory
|
| 74 |
+
def predict(self, file_path: str, content: str = None) -> dict:
|
| 75 |
+
"""
|
| 76 |
+
Determines the 'Layer' of a file (Frontend, Backend, etc.)
|
| 77 |
+
Returns: { "label": str, "confidence": float, "embedding": list[float] }
|
| 78 |
+
"""
|
| 79 |
+
path = file_path.lower()
|
| 80 |
+
|
| 81 |
+
# Helper to standardize return format
|
| 82 |
+
def result(label, conf=1.0, emb=None):
|
| 83 |
+
return {
|
| 84 |
+
"label": label,
|
| 85 |
+
"confidence": conf,
|
| 86 |
+
"embedding": emb if emb is not None else [],
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
# 1. Fast Path: Rule-Based Checks (High Precision)
|
| 90 |
+
# We still keep this because it's instant and correct for obvious things.
|
| 91 |
+
|
| 92 |
+
# Frontend Indicators
|
| 93 |
+
if any(
|
| 94 |
+
x in path
|
| 95 |
+
for x in [
|
| 96 |
+
"/components/",
|
| 97 |
+
"/pages/",
|
| 98 |
+
"/views/",
|
| 99 |
+
".jsx",
|
| 100 |
+
".tsx",
|
| 101 |
+
".css",
|
| 102 |
+
"tailwind",
|
| 103 |
+
]
|
| 104 |
+
):
|
| 105 |
+
return result("Frontend")
|
| 106 |
+
|
| 107 |
+
# Backend Indicators
|
| 108 |
+
if any(
|
| 109 |
+
x in path
|
| 110 |
+
for x in [
|
| 111 |
+
"/controllers/",
|
| 112 |
+
"/modules/",
|
| 113 |
+
"/services/",
|
| 114 |
+
".controller.ts",
|
| 115 |
+
".service.ts",
|
| 116 |
+
"dto",
|
| 117 |
+
]
|
| 118 |
+
):
|
| 119 |
+
return result("Backend")
|
| 120 |
+
|
| 121 |
+
# Security Indicators
|
| 122 |
+
if any(
|
| 123 |
+
x in path
|
| 124 |
+
for x in ["auth", "guard", "strategy", "jwt", "passport", "middleware"]
|
| 125 |
+
):
|
| 126 |
+
return result("Security")
|
| 127 |
+
|
| 128 |
+
# DevOps/Config Indicators
|
| 129 |
+
if any(
|
| 130 |
+
x in path
|
| 131 |
+
for x in ["docker", "k8s", "github/workflows", "tsconfig", "package.json"]
|
| 132 |
+
):
|
| 133 |
+
return result("DevOps")
|
| 134 |
+
|
| 135 |
+
# Testing Indicators
|
| 136 |
+
if any(x in path for x in ["test", "spec", "e2e", "jest"]):
|
| 137 |
+
return result("Testing")
|
| 138 |
+
|
| 139 |
+
# 2. Slow Path: AI-Powered Semantic Classification (High Recall)
|
| 140 |
+
# If the rules are unsure ("Generic"), we ask the model.
|
| 141 |
+
try:
|
| 142 |
+
# Decide what to analyze: Content (best) or Path (fallback)
|
| 143 |
+
text_to_analyze = content if content else file_path
|
| 144 |
+
|
| 145 |
+
# If analyzing content, take the first 1000 chars (approx enough tokens) to capture imports/class defs
|
| 146 |
+
if content:
|
| 147 |
+
text_to_analyze = content[:1000]
|
| 148 |
+
|
| 149 |
+
target_embedding_tensor = self._get_embedding(text_to_analyze)
|
| 150 |
+
target_embedding_list = (
|
| 151 |
+
target_embedding_tensor.tolist()
|
| 152 |
+
) # Convert to list for JSON serialization
|
| 153 |
+
|
| 154 |
+
best_label = "Generic"
|
| 155 |
+
highest_score = -1.0
|
| 156 |
+
|
| 157 |
+
for label, anchor_embedding in self.label_embeddings.items():
|
| 158 |
+
# Cosine Similarity: -1 to 1
|
| 159 |
+
score = F.cosine_similarity(
|
| 160 |
+
target_embedding_tensor, anchor_embedding
|
| 161 |
+
).item()
|
| 162 |
+
if score > highest_score:
|
| 163 |
+
highest_score = score
|
| 164 |
+
best_label = label
|
| 165 |
+
|
| 166 |
+
# Only accept the AI's opinion if it's somewhat confident
|
| 167 |
+
if highest_score > 0.25:
|
| 168 |
+
return result(best_label, highest_score, target_embedding_list)
|
| 169 |
+
|
| 170 |
+
return result("Generic", highest_score, target_embedding_list)
|
| 171 |
+
|
| 172 |
+
except Exception as e:
|
| 173 |
+
logger.error(f"AI Classification failed for {file_path}: {e}")
|
| 174 |
+
return result("Generic", 0.0)
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
class GuideGenerator:
|
| 178 |
+
def __init__(self):
|
| 179 |
+
self.tech_stacks = {
|
| 180 |
+
"React": ["react", "jsx", "tsx", "next.config.js"],
|
| 181 |
+
"Vue": ["vue", "nuxt.config.js"],
|
| 182 |
+
"Angular": ["angular.json"],
|
| 183 |
+
"Svelte": ["svelte.config.js"],
|
| 184 |
+
"NestJS": ["nest-cli.json", ".module.ts"],
|
| 185 |
+
"Express": ["express", "server.js", "app.js"],
|
| 186 |
+
"FastAPI": ["fastapi", "main.py"],
|
| 187 |
+
"Django": ["django", "manage.py"],
|
| 188 |
+
"Flask": ["flask", "app.py"],
|
| 189 |
+
"Spring Boot": ["pom.xml", "build.gradle", "src/main/java"],
|
| 190 |
+
"Go": ["go.mod", "main.go"],
|
| 191 |
+
"Rust": ["Cargo.toml", "src/main.rs"],
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
self.tools = {
|
| 195 |
+
"Docker": ["Dockerfile", "docker-compose.yml"],
|
| 196 |
+
"Kubernetes": ["k8s", "helm", "charts/"],
|
| 197 |
+
"TypeScript": ["tsconfig.json", ".ts"],
|
| 198 |
+
"Tailwind CSS": ["tailwind.config.js"],
|
| 199 |
+
"Prisma": ["schema.prisma"],
|
| 200 |
+
"GraphQL": [".graphql", "schema.gql"],
|
| 201 |
+
"PostgreSQL": ["postgresql", "pg"],
|
| 202 |
+
"MongoDB": ["mongoose", "mongodb"],
|
| 203 |
+
"Redis": ["redis"],
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
def detect_stack(self, files: list[str]) -> dict:
|
| 207 |
+
detected = {"languages": set(), "frameworks": set(), "tools": set()}
|
| 208 |
+
|
| 209 |
+
for file in files:
|
| 210 |
+
path = file.lower()
|
| 211 |
+
|
| 212 |
+
# Languages
|
| 213 |
+
if path.endswith(".ts") or path.endswith(".tsx"):
|
| 214 |
+
detected["languages"].add("TypeScript")
|
| 215 |
+
elif path.endswith(".js") or path.endswith(".jsx"):
|
| 216 |
+
detected["languages"].add("JavaScript")
|
| 217 |
+
elif path.endswith(".py"):
|
| 218 |
+
detected["languages"].add("Python")
|
| 219 |
+
elif path.endswith(".go"):
|
| 220 |
+
detected["languages"].add("Go")
|
| 221 |
+
elif path.endswith(".rs"):
|
| 222 |
+
detected["languages"].add("Rust")
|
| 223 |
+
elif path.endswith(".java"):
|
| 224 |
+
detected["languages"].add("Java")
|
| 225 |
+
|
| 226 |
+
# Frameworks
|
| 227 |
+
for framework, indicators in self.tech_stacks.items():
|
| 228 |
+
if any(ind in path for ind in indicators):
|
| 229 |
+
detected["frameworks"].add(framework)
|
| 230 |
+
|
| 231 |
+
# Tools
|
| 232 |
+
for tool, indicators in self.tools.items():
|
| 233 |
+
if any(ind in path for ind in indicators):
|
| 234 |
+
detected["tools"].add(tool)
|
| 235 |
+
|
| 236 |
+
return detected
|
| 237 |
+
|
| 238 |
+
def _generate_tree(self, files: list[str]) -> str:
|
| 239 |
+
"""
|
| 240 |
+
Generates a clean ASCII tree of the project structure with architectural annotations.
|
| 241 |
+
"""
|
| 242 |
+
tree = {}
|
| 243 |
+
relevant_files = []
|
| 244 |
+
|
| 245 |
+
# 1. Filter and normalize paths
|
| 246 |
+
for f in files:
|
| 247 |
+
parts = f.split("/")
|
| 248 |
+
|
| 249 |
+
# Skip noise
|
| 250 |
+
if any(
|
| 251 |
+
p
|
| 252 |
+
in [
|
| 253 |
+
"node_modules",
|
| 254 |
+
".git",
|
| 255 |
+
"__pycache__",
|
| 256 |
+
"dist",
|
| 257 |
+
"build",
|
| 258 |
+
".idea",
|
| 259 |
+
".vscode",
|
| 260 |
+
]
|
| 261 |
+
for p in parts
|
| 262 |
+
):
|
| 263 |
+
continue
|
| 264 |
+
if f.endswith(".DS_Store"):
|
| 265 |
+
continue
|
| 266 |
+
|
| 267 |
+
relevant_files.append(f)
|
| 268 |
+
|
| 269 |
+
# 2. Build nested dictionary
|
| 270 |
+
for path in relevant_files:
|
| 271 |
+
parts = path.split("/")
|
| 272 |
+
if len(parts) > 3:
|
| 273 |
+
parts = parts[:3]
|
| 274 |
+
|
| 275 |
+
current = tree
|
| 276 |
+
for part in parts:
|
| 277 |
+
current = current.setdefault(part, {})
|
| 278 |
+
|
| 279 |
+
# 3. Define Annotations
|
| 280 |
+
descriptions = {
|
| 281 |
+
"src": "Core application source code",
|
| 282 |
+
"app": "Main application logic",
|
| 283 |
+
"components": "Reusable UI components",
|
| 284 |
+
"pages": "Route/Page definitions",
|
| 285 |
+
"api": "API endpoints and services",
|
| 286 |
+
"utils": "Utility functions and helpers",
|
| 287 |
+
"lib": "External libraries and configurations",
|
| 288 |
+
"test": "Unit and integration tests",
|
| 289 |
+
"tests": "Test suites",
|
| 290 |
+
"docs": "Project documentation",
|
| 291 |
+
"public": "Static assets (images, fonts)",
|
| 292 |
+
"assets": "Static media files",
|
| 293 |
+
"server": "Backend server code",
|
| 294 |
+
"client": "Frontend client application",
|
| 295 |
+
"config": "Configuration files",
|
| 296 |
+
"scripts": "Build and maintenance scripts",
|
| 297 |
+
"prisma": "Database schema and migrations",
|
| 298 |
+
"graphql": "GraphQL definitions",
|
| 299 |
+
}
|
| 300 |
+
|
| 301 |
+
# 4. Render tree
|
| 302 |
+
lines = []
|
| 303 |
+
|
| 304 |
+
def render(node, prefix=""):
|
| 305 |
+
keys = sorted(node.keys())
|
| 306 |
+
|
| 307 |
+
# Priority sorting: put 'src', 'app', 'server', 'client' first
|
| 308 |
+
priority = ["src", "app", "client", "server", "public"]
|
| 309 |
+
keys.sort(key=lambda k: (0 if k in priority else 1, k))
|
| 310 |
+
|
| 311 |
+
if len(keys) > 12:
|
| 312 |
+
keys = keys[:12] + ["..."]
|
| 313 |
+
|
| 314 |
+
for i, key in enumerate(keys):
|
| 315 |
+
is_last = i == len(keys) - 1
|
| 316 |
+
connector = "└── " if is_last else "├── "
|
| 317 |
+
|
| 318 |
+
# Add annotation if available and it's a folder (has children)
|
| 319 |
+
comment = ""
|
| 320 |
+
if key in descriptions and isinstance(node[key], dict) and node[key]:
|
| 321 |
+
comment = f" # {descriptions[key]}"
|
| 322 |
+
|
| 323 |
+
lines.append(f"{prefix}{connector}{key}{comment}")
|
| 324 |
+
|
| 325 |
+
if isinstance(node.get(key), dict) and node[key]:
|
| 326 |
+
extension = " " if is_last else "│ "
|
| 327 |
+
render(node[key], prefix + extension)
|
| 328 |
+
|
| 329 |
+
render(tree)
|
| 330 |
+
return "\n".join(lines[:50])
|
| 331 |
+
|
| 332 |
+
def generate_markdown(self, repo_name: str, files: list[str]) -> str:
|
| 333 |
+
# 1. Perform AI Analysis (DNA of the repo)
|
| 334 |
+
stats = {
|
| 335 |
+
"Frontend": 0,
|
| 336 |
+
"Backend": 0,
|
| 337 |
+
"Security": 0,
|
| 338 |
+
"DevOps": 0,
|
| 339 |
+
"Testing": 0,
|
| 340 |
+
"Generic": 0,
|
| 341 |
+
}
|
| 342 |
+
layer_map = {}
|
| 343 |
+
|
| 344 |
+
low_confidence_files = []
|
| 345 |
+
file_embeddings = {} # Path -> Tensor
|
| 346 |
+
|
| 347 |
+
for f in files:
|
| 348 |
+
# We use CodeBERT's path-embedding capability here since we don't have content for all files
|
| 349 |
+
prediction = classifier.predict(f)
|
| 350 |
+
layer = prediction["label"]
|
| 351 |
+
confidence = prediction["confidence"]
|
| 352 |
+
|
| 353 |
+
stats[layer] += 1
|
| 354 |
+
layer_map[f] = layer
|
| 355 |
+
|
| 356 |
+
if confidence < 0.4 and layer != "Generic":
|
| 357 |
+
low_confidence_files.append((f, confidence))
|
| 358 |
+
|
| 359 |
+
# Store embedding for coupling analysis (if available)
|
| 360 |
+
if prediction["embedding"] and len(prediction["embedding"]) > 0:
|
| 361 |
+
file_embeddings[f] = torch.tensor(prediction["embedding"])
|
| 362 |
+
|
| 363 |
+
total_files = len(files) if files else 1
|
| 364 |
+
primary_layer = max(stats, key=stats.get)
|
| 365 |
+
|
| 366 |
+
# Calculate Semantic Couplings (Top 5)
|
| 367 |
+
couplings = []
|
| 368 |
+
try:
|
| 369 |
+
paths = list(file_embeddings.keys())
|
| 370 |
+
# Limit to first 50 files for safety and performance
|
| 371 |
+
sample_paths = paths[:50]
|
| 372 |
+
|
| 373 |
+
for i in range(len(sample_paths)):
|
| 374 |
+
for j in range(i + 1, len(sample_paths)):
|
| 375 |
+
p1, p2 = sample_paths[i], sample_paths[j]
|
| 376 |
+
# Skip same folder
|
| 377 |
+
if p1.rsplit("/", 1)[0] == p2.rsplit("/", 1)[0]:
|
| 378 |
+
continue
|
| 379 |
+
|
| 380 |
+
# Ensure tensors are valid and on CPU for comparison
|
| 381 |
+
t1 = file_embeddings[p1].cpu()
|
| 382 |
+
t2 = file_embeddings[p2].cpu()
|
| 383 |
+
|
| 384 |
+
score = F.cosine_similarity(t1.unsqueeze(0), t2.unsqueeze(0)).item()
|
| 385 |
+
if score > 0.88:
|
| 386 |
+
couplings.append((p1, p2, score))
|
| 387 |
+
except Exception as e:
|
| 388 |
+
logger.error(f"Failed to calculate couplings: {e}")
|
| 389 |
+
|
| 390 |
+
couplings.sort(key=lambda x: x[2], reverse=True)
|
| 391 |
+
top_couplings = couplings[:5]
|
| 392 |
+
|
| 393 |
+
# Sort low confidence by lowest score
|
| 394 |
+
low_confidence_files.sort(key=lambda x: x[1])
|
| 395 |
+
top_refactors = low_confidence_files[:5]
|
| 396 |
+
|
| 397 |
+
# 2. Advanced Stack & Feature Detection
|
| 398 |
+
stack = self.detect_stack(files)
|
| 399 |
+
features = self._detect_features(files, stats)
|
| 400 |
+
dev_tools = self._detect_dev_tools(files)
|
| 401 |
+
|
| 402 |
+
install_cmd = "npm install"
|
| 403 |
+
run_cmd = "npm run dev"
|
| 404 |
+
test_cmd = "npm test"
|
| 405 |
+
|
| 406 |
+
if "Python" in stack["languages"]:
|
| 407 |
+
install_cmd = "pip install -r requirements.txt"
|
| 408 |
+
run_cmd = "python main.py"
|
| 409 |
+
test_cmd = "pytest"
|
| 410 |
+
elif "Go" in stack["languages"]:
|
| 411 |
+
install_cmd = "go mod download"
|
| 412 |
+
run_cmd = "go run main.go"
|
| 413 |
+
test_cmd = "go test ./..."
|
| 414 |
+
elif "Rust" in stack["languages"]:
|
| 415 |
+
install_cmd = "cargo build"
|
| 416 |
+
run_cmd = "cargo run"
|
| 417 |
+
test_cmd = "cargo test"
|
| 418 |
+
|
| 419 |
+
if "Django" in stack["frameworks"]:
|
| 420 |
+
run_cmd = "python manage.py runserver"
|
| 421 |
+
test_cmd = "python manage.py test"
|
| 422 |
+
elif "Spring Boot" in stack["frameworks"]:
|
| 423 |
+
install_cmd = "./mvnw install"
|
| 424 |
+
run_cmd = "./mvnw spring-boot:run"
|
| 425 |
+
if "Docker" in stack["tools"]:
|
| 426 |
+
run_cmd += "\n# Or using Docker\ndocker-compose up --build"
|
| 427 |
+
|
| 428 |
+
# 3. Assemble Guide
|
| 429 |
+
md = f"# {repo_name} Developer Guide\n\n"
|
| 430 |
+
|
| 431 |
+
md += "## AI Codebase Insights\n"
|
| 432 |
+
md += f"Analysis powered by **CodeBERT** semantic vectors.\n\n"
|
| 433 |
+
md += f"**Project DNA:** {self._get_project_dna(stats, total_files)}\n\n"
|
| 434 |
+
md += f"**Quality Check:** {self._get_testing_status(stats, total_files)}\n\n"
|
| 435 |
+
|
| 436 |
+
if top_refactors:
|
| 437 |
+
md += "### Code Health & Complexity\n"
|
| 438 |
+
md += "The AI flagged the following files as **Non-Standard** or **Complex** (Low Confidence).\n"
|
| 439 |
+
md += (
|
| 440 |
+
"These are good candidates for refactoring or documentation reviews:\n"
|
| 441 |
+
)
|
| 442 |
+
for f, score in top_refactors:
|
| 443 |
+
md += f"- `{f}` (Confidence: {int(score * 100)}%)\n"
|
| 444 |
+
md += "\n"
|
| 445 |
+
|
| 446 |
+
if top_couplings:
|
| 447 |
+
md += "### Logical Couplings\n"
|
| 448 |
+
md += "The AI detected strong semantic connections between these file pairs (they share logic but not folders):\n"
|
| 449 |
+
for p1, p2, score in top_couplings:
|
| 450 |
+
md += f"- `{p1}` <--> `{p2}` ({int(score * 100)}% match)\n"
|
| 451 |
+
md += "\n"
|
| 452 |
+
|
| 453 |
+
md += "### Layer Composition\n"
|
| 454 |
+
md += "| Layer | Composition | Status |\n"
|
| 455 |
+
md += "| :--- | :--- | :--- |\n"
|
| 456 |
+
for layer, count in stats.items():
|
| 457 |
+
if count > 0:
|
| 458 |
+
percentage = (count / total_files) * 100
|
| 459 |
+
status = "Primary" if layer == primary_layer else "Detected"
|
| 460 |
+
md += f"| {layer} | {percentage:.1f}% | {status} |\n"
|
| 461 |
+
md += "\n"
|
| 462 |
+
|
| 463 |
+
md += "## Key Features\n"
|
| 464 |
+
if features:
|
| 465 |
+
md += "The following capabilities were inferred from the codebase structure:\n\n"
|
| 466 |
+
for feature, description in features.items():
|
| 467 |
+
md += f"- **{feature}**: {description}\n"
|
| 468 |
+
else:
|
| 469 |
+
md += "No specific high-level features (Auth, Database, etc.) were explicitly detected from the file structure.\n"
|
| 470 |
+
md += "\n"
|
| 471 |
+
|
| 472 |
+
md += "## Architecture & Technologies\n"
|
| 473 |
+
md += "The project utilizes the following core technologies:\n\n"
|
| 474 |
+
|
| 475 |
+
if stack["languages"]:
|
| 476 |
+
md += "**Languages**: " + ", ".join(sorted(stack["languages"])) + "\n"
|
| 477 |
+
if stack["frameworks"]:
|
| 478 |
+
md += "**Frameworks**: " + ", ".join(sorted(stack["frameworks"])) + "\n"
|
| 479 |
+
if stack["tools"]:
|
| 480 |
+
md += "**Infrastructure**: " + ", ".join(sorted(stack["tools"])) + "\n"
|
| 481 |
+
if dev_tools:
|
| 482 |
+
md += "**Development Tools**: " + ", ".join(sorted(dev_tools)) + "\n"
|
| 483 |
+
md += "\n"
|
| 484 |
+
|
| 485 |
+
md += "## Getting Started\n\n"
|
| 486 |
+
|
| 487 |
+
# Config Section
|
| 488 |
+
if any(f.endswith(".env") or f.endswith(".env.example") for f in files):
|
| 489 |
+
md += "### Configuration\n"
|
| 490 |
+
md += "This project relies on environment variables. \n"
|
| 491 |
+
md += "1. Find the `.env.example` file in the root directory.\n"
|
| 492 |
+
md += "2. Copy it to create a new `.env` file.\n"
|
| 493 |
+
md += " `cp .env.example .env`\n"
|
| 494 |
+
md += "3. Fill in the required values (Database URL, API Keys, etc.).\n\n"
|
| 495 |
+
|
| 496 |
+
md += "### Prerequisites\n"
|
| 497 |
+
md += "Ensure you have the following installed:\n"
|
| 498 |
+
md += "- Git\n"
|
| 499 |
+
if any(x in ["TypeScript", "JavaScript"] for x in stack["languages"]):
|
| 500 |
+
md += "- Node.js (LTS)\n"
|
| 501 |
+
if "Python" in stack["languages"]:
|
| 502 |
+
md += "- Python 3.8+\n"
|
| 503 |
+
if "Docker" in stack["tools"]:
|
| 504 |
+
md += "- Docker Desktop\n"
|
| 505 |
+
|
| 506 |
+
md += "\n### Installation\n"
|
| 507 |
+
md += "1. Clone and enter the repository:\n"
|
| 508 |
+
md += " ```bash\n"
|
| 509 |
+
md += f" git clone https://github.com/OWNER/{repo_name}.git\n"
|
| 510 |
+
md += f" cd {repo_name}\n"
|
| 511 |
+
md += " ```\n\n"
|
| 512 |
+
|
| 513 |
+
md += f"2. Install project dependencies:\n"
|
| 514 |
+
md += " ```bash\n"
|
| 515 |
+
md += f" {install_cmd}\n"
|
| 516 |
+
md += " ```\n\n"
|
| 517 |
+
|
| 518 |
+
md += "### Execution\n"
|
| 519 |
+
md += "Start the development environment:\n"
|
| 520 |
+
md += "```bash\n"
|
| 521 |
+
md += f"{run_cmd}\n"
|
| 522 |
+
md += "```\n\n"
|
| 523 |
+
|
| 524 |
+
if stats["Testing"] > 0:
|
| 525 |
+
md += "## Testing\n"
|
| 526 |
+
md += "Automated tests were detected. Run them using:\n"
|
| 527 |
+
md += f"```bash\n{test_cmd}\n```\n\n"
|
| 528 |
+
|
| 529 |
+
md += "## Project Structure\n"
|
| 530 |
+
md += "Detailed tree view with AI-predicted layer labels:\n\n"
|
| 531 |
+
md += "```text\n"
|
| 532 |
+
md += self._generate_tree_with_ai(files, layer_map)
|
| 533 |
+
md += "\n```\n\n"
|
| 534 |
+
|
| 535 |
+
# ... (rest of the markdown)
|
| 536 |
+
md += "## Contribution Workflow\n\n"
|
| 537 |
+
md += "We welcome contributions! Please follow this detailed workflow to ensure smooth collaboration:\n\n"
|
| 538 |
+
md += "### 1. Find an Issue\n"
|
| 539 |
+
md += "- Browse the **Issues** tab for tasks.\n"
|
| 540 |
+
md += "- Look for labels like `good first issue` or `help wanted` if you are new.\n"
|
| 541 |
+
md += (
|
| 542 |
+
"- Comment on the issue to assign it to yourself before starting work.\n\n"
|
| 543 |
+
)
|
| 544 |
+
md += "### 2. Branching Strategy\n"
|
| 545 |
+
md += "Create a new branch from `main` using one of the following prefixes:\n"
|
| 546 |
+
md += "- `feat/`: For new features (e.g., `feat/user-auth`)\n"
|
| 547 |
+
md += "- `fix/`: For bug fixes (e.g., `fix/login-error`)\n"
|
| 548 |
+
md += "- `docs/`: For documentation changes (e.g., `docs/update-readme`)\n"
|
| 549 |
+
md += "- `refactor/`: For code improvements without logic changes\n"
|
| 550 |
+
md += "```bash\n"
|
| 551 |
+
md += "git checkout -b feat/your-feature-name\n"
|
| 552 |
+
md += "```\n\n"
|
| 553 |
+
md += "### 3. Development Standards\n"
|
| 554 |
+
if dev_tools:
|
| 555 |
+
md += "Before committing, ensure your code meets the project standards:\n"
|
| 556 |
+
if "ESLint" in dev_tools or "Prettier" in dev_tools:
|
| 557 |
+
md += "- **Linting**: Run `npm run lint` to fix style issues.\n"
|
| 558 |
+
if "Jest" in dev_tools or "pytest" in dev_tools:
|
| 559 |
+
md += "- **Testing**: Run tests to ensure no regressions.\n"
|
| 560 |
+
md += "- Keep pull requests small and focused on a single task.\n\n"
|
| 561 |
+
md += "### 4. Commit Messages\n"
|
| 562 |
+
md += "We follow the **Conventional Commits** specification:\n"
|
| 563 |
+
md += "- `feat: add new search component`\n"
|
| 564 |
+
md += "- `fix: handle null pointer exception`\n"
|
| 565 |
+
md += "- `chore: update dependencies`\n"
|
| 566 |
+
md += "```bash\n"
|
| 567 |
+
md += "git commit -m 'feat: implement amazing feature'\n"
|
| 568 |
+
md += "```\n\n"
|
| 569 |
+
md += "### 5. Pull Request Process\n"
|
| 570 |
+
md += "1. Push your branch: `git push origin feat/your-feature-name`\n"
|
| 571 |
+
md += "2. Open a Pull Request against the `main` branch.\n"
|
| 572 |
+
md += "3. Fill out the PR template with details about your changes.\n"
|
| 573 |
+
md += "4. Wait for code review and address any feedback.\n"
|
| 574 |
+
md += "5. Once approved, your changes will be merged!\n\n"
|
| 575 |
+
|
| 576 |
+
md += "## About this Guide\n"
|
| 577 |
+
md += "This documentation was automatically generated by **GitGud AI**.\n"
|
| 578 |
+
md += "- **Engine:** CodeBERT (Transformer-based Language Model)\n"
|
| 579 |
+
md += "- **Analysis:** Zero-shot semantic classification of file paths and content.\n"
|
| 580 |
+
md += "- **Accuracy:** The 'Project DNA' and 'Layer Composition' metrics are derived from vector embeddings of your codebase, providing a mathematical approximation of your architecture.\n"
|
| 581 |
+
|
| 582 |
+
return md
|
| 583 |
+
|
| 584 |
+
def _get_project_dna(self, stats: dict, total: int) -> str:
|
| 585 |
+
"""
|
| 586 |
+
Interprets the layer statistics to give a high-level project description.
|
| 587 |
+
"""
|
| 588 |
+
backend_pct = (stats["Backend"] / total) * 100
|
| 589 |
+
frontend_pct = (stats["Frontend"] / total) * 100
|
| 590 |
+
ops_pct = (stats["DevOps"] / total) * 100
|
| 591 |
+
|
| 592 |
+
if backend_pct > 50:
|
| 593 |
+
return "This project is a **Backend-focused service**, likely an API or microservice. The majority of the codebase is dedicated to business logic and data handling."
|
| 594 |
+
elif frontend_pct > 50:
|
| 595 |
+
return "This project is a **Frontend-heavy application**, focusing on UI/UX and client-side logic."
|
| 596 |
+
elif backend_pct > 30 and frontend_pct > 30:
|
| 597 |
+
return "This is a balanced **Full-Stack Application**, containing significant logic for both client and server components."
|
| 598 |
+
elif ops_pct > 40:
|
| 599 |
+
return "This repository appears to be an **Infrastructure or Configuration** project (IaC), heavily focused on deployment and orchestration."
|
| 600 |
+
else:
|
| 601 |
+
return "This is a **General purpose repository**, possibly a library or a mix of various utilities."
|
| 602 |
+
|
| 603 |
+
def _get_testing_status(self, stats: dict, total: int) -> str:
|
| 604 |
+
test_pct = (stats["Testing"] / total) * 100
|
| 605 |
+
if test_pct > 20:
|
| 606 |
+
return "[Excellent] **Excellent Test Coverage**. A significant portion of the codebase is dedicated to testing."
|
| 607 |
+
elif test_pct > 5:
|
| 608 |
+
return "[Moderate] **Moderate Testing**. Tests are present but may not cover all modules."
|
| 609 |
+
else:
|
| 610 |
+
return "[Low] **Low Test Coverage**. Very few test files were detected. Recommended to add more unit or integration tests."
|
| 611 |
+
|
| 612 |
+
def _detect_features(self, files: list[str], stats: dict) -> dict:
|
| 613 |
+
features = {}
|
| 614 |
+
files_str = " ".join(files).lower()
|
| 615 |
+
|
| 616 |
+
# Authentication - Only if CodeBERT detected Security logic
|
| 617 |
+
if stats["Security"] > 0:
|
| 618 |
+
auth_indicators = [
|
| 619 |
+
"/auth/",
|
| 620 |
+
"/login",
|
| 621 |
+
"/register",
|
| 622 |
+
"passport",
|
| 623 |
+
"jwt",
|
| 624 |
+
"session",
|
| 625 |
+
"bcrypt",
|
| 626 |
+
"strategy",
|
| 627 |
+
]
|
| 628 |
+
if any(x in files_str for x in auth_indicators):
|
| 629 |
+
features["Authentication"] = (
|
| 630 |
+
"Implements user authentication (Login/Signup/Session management)."
|
| 631 |
+
)
|
| 632 |
+
|
| 633 |
+
# Database - Only if CodeBERT detected Backend logic
|
| 634 |
+
if stats["Backend"] > 0:
|
| 635 |
+
db_indicators = [
|
| 636 |
+
"schema.prisma",
|
| 637 |
+
"models.py",
|
| 638 |
+
"migration",
|
| 639 |
+
"sequelize",
|
| 640 |
+
"typeorm",
|
| 641 |
+
"mongoose",
|
| 642 |
+
"/db/",
|
| 643 |
+
"/database/",
|
| 644 |
+
]
|
| 645 |
+
if any(x in files_str for x in db_indicators):
|
| 646 |
+
features["Database"] = (
|
| 647 |
+
"Includes database schema definitions or ORM models."
|
| 648 |
+
)
|
| 649 |
+
|
| 650 |
+
# API
|
| 651 |
+
api_indicators = [
|
| 652 |
+
"/api/",
|
| 653 |
+
"controllers",
|
| 654 |
+
"resolvers",
|
| 655 |
+
"routes",
|
| 656 |
+
"router",
|
| 657 |
+
"endpoint",
|
| 658 |
+
]
|
| 659 |
+
if any(x in files_str for x in api_indicators):
|
| 660 |
+
features["API"] = "Exposes RESTful endpoints or GraphQL resolvers."
|
| 661 |
+
|
| 662 |
+
# Realtime
|
| 663 |
+
if any(x in files_str for x in ["socket", "websocket", "io.", "channel"]):
|
| 664 |
+
features["Real-time"] = "Uses WebSockets or real-time event channels."
|
| 665 |
+
|
| 666 |
+
# UI Architecture
|
| 667 |
+
if stats["Frontend"] > 0:
|
| 668 |
+
ui_indicators = ["components/", "views/", ".tsx", ".jsx", ".vue", "/pages/"]
|
| 669 |
+
if any(x in files_str for x in ui_indicators):
|
| 670 |
+
features["UI Architecture"] = "Modular component-based user interface."
|
| 671 |
+
|
| 672 |
+
return features
|
| 673 |
+
|
| 674 |
+
def _detect_dev_tools(self, files: list[str]) -> set:
|
| 675 |
+
tools = set()
|
| 676 |
+
files_str = " ".join(files).lower()
|
| 677 |
+
|
| 678 |
+
if "eslint" in files_str:
|
| 679 |
+
tools.add("ESLint")
|
| 680 |
+
if "prettier" in files_str:
|
| 681 |
+
tools.add("Prettier")
|
| 682 |
+
if "jest" in files_str:
|
| 683 |
+
tools.add("Jest")
|
| 684 |
+
if "cypress" in files_str:
|
| 685 |
+
tools.add("Cypress")
|
| 686 |
+
if "github/workflows" in files_str:
|
| 687 |
+
tools.add("GitHub Actions")
|
| 688 |
+
if "husky" in files_str:
|
| 689 |
+
tools.add("Husky")
|
| 690 |
+
if "tailwind" in files_str:
|
| 691 |
+
tools.add("Tailwind CSS")
|
| 692 |
+
if "vite" in files_str:
|
| 693 |
+
tools.add("Vite")
|
| 694 |
+
if "webpack" in files_str:
|
| 695 |
+
tools.add("Webpack")
|
| 696 |
+
|
| 697 |
+
return tools
|
| 698 |
+
|
| 699 |
+
def _generate_tree_with_ai(self, files: list[str], layer_map: dict) -> str:
|
| 700 |
+
tree = {}
|
| 701 |
+
for f in files:
|
| 702 |
+
parts = f.split("/")
|
| 703 |
+
if any(p in ["node_modules", ".git", "__pycache__", "dist"] for p in parts):
|
| 704 |
+
continue
|
| 705 |
+
|
| 706 |
+
curr = tree
|
| 707 |
+
for part in parts[:3]: # Depth 3
|
| 708 |
+
curr = curr.setdefault(part, {})
|
| 709 |
+
|
| 710 |
+
lines = []
|
| 711 |
+
|
| 712 |
+
def render(node, path_prefix="", tree_prefix=""):
|
| 713 |
+
keys = sorted(node.keys())
|
| 714 |
+
for i, key in enumerate(keys):
|
| 715 |
+
is_last = i == len(keys) - 1
|
| 716 |
+
full_path = f"{path_prefix}/{key}".strip("/")
|
| 717 |
+
|
| 718 |
+
# Get layer from map or predict if it's a folder
|
| 719 |
+
prediction = classifier.predict(full_path)
|
| 720 |
+
layer = layer_map.get(full_path, prediction["label"])
|
| 721 |
+
label = f" [{layer}]" if layer != "Generic" else ""
|
| 722 |
+
|
| 723 |
+
connector = "└── " if is_last else "├── "
|
| 724 |
+
lines.append(f"{tree_prefix}{connector}{key}{label}")
|
| 725 |
+
|
| 726 |
+
if node[key]:
|
| 727 |
+
render(
|
| 728 |
+
node[key],
|
| 729 |
+
full_path,
|
| 730 |
+
tree_prefix + (" " if is_last else "│ "),
|
| 731 |
+
)
|
| 732 |
+
|
| 733 |
+
render(tree)
|
| 734 |
+
return "\n".join(lines[:60])
|
| 735 |
+
|
| 736 |
+
|
| 737 |
+
# Create the global instance
|
| 738 |
+
classifier = CodeClassifier()
|
| 739 |
+
guide_generator = GuideGenerator()
|