import gradio as gr from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import pandas as pd import numpy as np import os from datetime import datetime import socket import nltk import threading import time from huggingface_hub import HfApi, HfFolder # For sentence tokenization nltk.download('punkt') nltk.download("punkt_tab") ############################### # LOGGING SETUP ############################### log_dir = "./logs" os.makedirs(log_dir, exist_ok=True) log_file_path = os.path.join(log_dir, "Second_Opinion_Logs.log") def upload_logs_to_hf(): """Upload the local log file to Hugging Face repo.""" try: api = HfApi() token = HfFolder.get_token() or os.getenv("HUGGINGFACE_HUB_TOKEN") if not token: print(" No HF token found — skipping upload.") return api.upload_file( path_or_fileobj=log_file_path, path_in_repo="logs/Second_Opinion_Logs.log", repo_id="singhn9/privateSOWN", repo_type="space", token=token, ) print("Logs uploaded to Hugging Face repo.") except Exception as e: print(f" Log upload failed: {e}") def background_log_uploader(interval=300): while True: time.sleep(interval) if os.path.exists(log_file_path): upload_logs_to_hf() threading.Thread(target=background_log_uploader, daemon=True).start() def log_action(action, request=None): try: user_ip = "Unknown IP" if request and hasattr(request, 'client'): user_ip = request.client.host else: user_ip = socket.gethostbyname(socket.gethostname()) except Exception: user_ip = "Unknown IP" timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") log_entry = f"{timestamp} (GMT) - IP: {user_ip} - {action}\n" try: with open(log_file_path, 'a') as log_file: log_file.write(log_entry) print(f"Log entry added: {log_entry.strip()}") except Exception as e: print(f"Error logging action: {e}") def log_input_text(resume_text, job_list, user_ip="Unknown IP"): try: timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") # specific handling for list or string jobs_str = "\n---\n".join(job_list) if isinstance(job_list, list) else str(job_list) log_entry = ( f"{timestamp} (GMT) - IP: {user_ip}\n" f"--- Resume Input ---\n{resume_text}\n\n" f"--- Job Descriptions Input ---\n{jobs_str}\n" "---------------------------------------------\n" ) with open(log_file_path, 'a') as log_file: log_file.write(log_entry) print(f"✅ Logged full user input at {timestamp} from {user_ip}") except Exception as e: print(f"Error logging input: {e}") ############################### # LOAD MODELS ############################### models = { "all-MiniLM-L6-v2": SentenceTransformer("all-MiniLM-L6-v2"), "paraphrase-MiniLM-L6-v2": SentenceTransformer("paraphrase-MiniLM-L6-v2"), "multi-qa-MiniLM-L6-cos-v1": SentenceTransformer("multi-qa-MiniLM-L6-cos-v1"), "all-mpnet-base-v2": SentenceTransformer("all-mpnet-base-v2"), "paraphrase-mpnet-base-v2": SentenceTransformer("paraphrase-mpnet-base-v2"), "all-distilroberta-v1": SentenceTransformer("all-distilroberta-v1"), "paraphrase-albert-small-v2": SentenceTransformer("paraphrase-albert-small-v2"), "multi-qa-distilbert-cos-v1": SentenceTransformer("multi-qa-distilbert-cos-v1"), "distiluse-base-multilingual-cased-v2": SentenceTransformer("distiluse-base-multilingual-cased-v2"), "all-MiniLM-L12-v2": SentenceTransformer("all-MiniLM-L12-v2"), } ############################### # MAIN SIMILARITY FUNCTION ############################### def compute_similarity(resume_text, job_list): try: if not resume_text.strip(): return "Error: Resume cannot be empty.", None # Filter out empty strings from the list job_list = [j for j in job_list if j.strip()] if len(job_list) == 0: return "Error: Please add at least one job description.", None results = {} for model_name, model in models.items(): documents = [resume_text] + job_list embeddings = model.encode(documents) resume_embedding = embeddings[0] job_embeddings = embeddings[1:] similarities = cosine_similarity([resume_embedding], job_embeddings).flatten() results[model_name] = similarities df = pd.DataFrame(results, index=[f"Job {i+1}" for i in range(len(job_list))]).T metrics = { "Average": df.mean(axis=0), "Variance": df.var(axis=0), "Median": df.median(axis=0), "Standard Deviation": df.std(axis=0), "Certainty Score": 1 - (df.var(axis=0) / df.var(axis=0).max()), } for metric_name, values in metrics.items(): df.loc[metric_name] = values model_rows = df.iloc[:-5] metrics_rows = df.iloc[-5:] styled_df = model_rows.style.background_gradient(cmap="Greens", axis=None).to_html() styled_df += metrics_rows.to_html(header=False) best_job = metrics["Average"].idxmax() reasoning = f"The best job match is {best_job} based on the highest average similarity score." # --- RESTORED FULL DESCRIPTION AND REFERENCES --- description = """
Explanation of the Table:
If you liked this application, feel free to send your feedback, suggestions, or adulations to 21f1006368@ds.study.iitm.ac.in.
""" return reasoning, styled_df + description except Exception as e: return f"Error during computation: {str(e)}", None ############################### # APPROACH A EXPLANATION ############################### def explain_scores_by_sentences(model, resume_text, job_text, top_k=3): from nltk.tokenize import sent_tokenize resume_sents = sent_tokenize(resume_text) job_sents = sent_tokenize(job_text) if not resume_sents or not job_sents: return "No sentences found in resume or job description." resume_embeddings = model.encode(resume_sents) job_embeddings = model.encode(job_sents) sim_matrix = cosine_similarity(resume_embeddings, job_embeddings) flat_sim = sim_matrix.flatten() top_k_indices = np.argsort(flat_sim)[::-1][:top_k] explanation_html = "#{rank}:
Resume: {resume_sentence}
Job: {job_sentence}
Similarity Score: {score:.4f}
No results to display.
", gr.update(visible=False), ) except Exception as e: log_action(f"Error during process for IP {user_ip}: {str(e)}") raise e def show_details(table): return gr.update(value=table, visible=True) INVITE_CODE = "INDIAMBA" access_granted = gr.State(False) ############################### # BUILD THE GRADIO INTERFACE ############################### with gr.Blocks(css=""" .job-display { border: 1px solid #ccc; padding: 10px; border-radius: 5px; background: #f9f9f9; min-height: 50px; } """) as app: # State to hold the list of jobs job_list_state = gr.State([]) gr.Markdown("# Second Opinion with Naval v1.2 – “Morning Daze”") gr.Markdown("Youtube Video: https://www.youtube.com/watch?v=khGulN2vAyY ") gr.Markdown("🔐 This app requires an invite code to continue. Ask Naval if you don't have one.") with gr.Row(): code_input = gr.Textbox(label="Enter Invite Code", type="password", placeholder="Ask Naval for access code") access_button = gr.Button("Submit") access_warning = gr.Markdown(value="Access denied. Please enter the correct invite code.", visible=False) main_ui = gr.Group(visible=False) with main_ui: gr.Markdown("### ✍️ Input Section") with gr.Row(): # Left Column: Resume with gr.Column(scale=1): resume_input = gr.Textbox(label="Paste Resume", lines=15, placeholder="Paste your resume here...") # Right Column: Dynamic Job Entry with gr.Column(scale=1): gr.Markdown("#### Add Job Descriptions") single_job_input = gr.Textbox(label="Paste ONE Job Description Here", lines=5, placeholder="Paste a single JD here and click 'Add Job' below.") with gr.Row(): add_job_btn = gr.Button("➕ Add Job", variant="secondary") clear_jobs_btn = gr.Button("🗑️ Clear All Jobs", variant="stop") gr.Markdown("#### Your Added Jobs List:") job_display_area = gr.HTML(value="No jobs added yet...", elem_classes="job-display") gr.Markdown("---") gr.Markdown("### 🔍 Matching Section") with gr.Row(): match_button = gr.Button("Match My Resume to Added Jobs", variant="primary") processing_output = gr.HTML(value="", visible=False) with gr.Row(): recommendation_output = gr.HTML(label="Recommendation", visible=True) with gr.Row(): table_output = gr.HTML(label="Similarity Table", visible=False) with gr.Row(): nerd_button = gr.Button("Papa Please Preach More", visible=False) gr.Markdown("---") gr.Markdown("### 📊 Explanation Section") explanation_output = gr.HTML(label="Model Explanation", visible=False) # Event: Add Job add_job_btn.click( add_job_to_list, inputs=[single_job_input, job_list_state], outputs=[job_list_state, job_display_area, single_job_input] # Update state, display, and clear input ) # Event: Clear Jobs clear_jobs_btn.click( clear_jobs, inputs=[], outputs=[job_list_state, job_display_area, single_job_input] ) # Event: Match match_button.click( process_and_display, inputs=[resume_input, job_list_state], outputs=[processing_output, recommendation_output, table_output, nerd_button] ) nerd_button.click(show_details, inputs=[table_output], outputs=[table_output]) with gr.Row(): job_index_to_explain = gr.Number(label="Job Index (1-based for humans, but logic uses 0-based)", value=0, precision=0) with gr.Row(): for m_name in models.keys(): btn = gr.Button(f"Explain {m_name}") btn.click( fn=lambda resume, jobs, idx, m=m_name: explain_model_scores(m, resume, jobs, idx), inputs=[resume_input, job_list_state, job_index_to_explain], outputs=[explanation_output], ) def check_invite(user_code): if user_code.strip() == INVITE_CODE: return True, gr.update(visible=False), gr.update(visible=True) else: return False, gr.update(visible=True), gr.update(visible=False) access_button.click(fn=check_invite, inputs=[code_input], outputs=[access_granted, access_warning, main_ui]) app.launch()