Spaces:
Running
Running
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import pandas as pd | |
| import numpy as np | |
| import os | |
| from datetime import datetime | |
| import socket | |
| import nltk | |
| import threading | |
| import time | |
| from huggingface_hub import HfApi, HfFolder | |
| # For sentence tokenization | |
| nltk.download('punkt') | |
| nltk.download("punkt_tab") | |
| ############################### | |
| # LOGGING SETUP | |
| ############################### | |
| log_dir = "./logs" | |
| os.makedirs(log_dir, exist_ok=True) | |
| log_file_path = os.path.join(log_dir, "Second_Opinion_Logs.log") | |
| def upload_logs_to_hf(): | |
| """Upload the local log file to Hugging Face repo.""" | |
| try: | |
| api = HfApi() | |
| token = HfFolder.get_token() or os.getenv("HUGGINGFACE_HUB_TOKEN") | |
| if not token: | |
| print(" No HF token found — skipping upload.") | |
| return | |
| api.upload_file( | |
| path_or_fileobj=log_file_path, | |
| path_in_repo="logs/Second_Opinion_Logs.log", | |
| repo_id="singhn9/privateSOWN", | |
| repo_type="space", | |
| token=token, | |
| ) | |
| print("Logs uploaded to Hugging Face repo.") | |
| except Exception as e: | |
| print(f" Log upload failed: {e}") | |
| def background_log_uploader(interval=300): | |
| while True: | |
| time.sleep(interval) | |
| if os.path.exists(log_file_path): | |
| upload_logs_to_hf() | |
| threading.Thread(target=background_log_uploader, daemon=True).start() | |
| def log_action(action, request=None): | |
| try: | |
| user_ip = "Unknown IP" | |
| if request and hasattr(request, 'client'): | |
| user_ip = request.client.host | |
| else: | |
| user_ip = socket.gethostbyname(socket.gethostname()) | |
| except Exception: | |
| user_ip = "Unknown IP" | |
| timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") | |
| log_entry = f"{timestamp} (GMT) - IP: {user_ip} - {action}\n" | |
| try: | |
| with open(log_file_path, 'a') as log_file: | |
| log_file.write(log_entry) | |
| print(f"Log entry added: {log_entry.strip()}") | |
| except Exception as e: | |
| print(f"Error logging action: {e}") | |
| def log_input_text(resume_text, job_list, user_ip="Unknown IP"): | |
| try: | |
| timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") | |
| # specific handling for list or string | |
| jobs_str = "\n---\n".join(job_list) if isinstance(job_list, list) else str(job_list) | |
| log_entry = ( | |
| f"{timestamp} (GMT) - IP: {user_ip}\n" | |
| f"--- Resume Input ---\n{resume_text}\n\n" | |
| f"--- Job Descriptions Input ---\n{jobs_str}\n" | |
| "---------------------------------------------\n" | |
| ) | |
| with open(log_file_path, 'a') as log_file: | |
| log_file.write(log_entry) | |
| print(f"✅ Logged full user input at {timestamp} from {user_ip}") | |
| except Exception as e: | |
| print(f"Error logging input: {e}") | |
| ############################### | |
| # LOAD MODELS | |
| ############################### | |
| models = { | |
| "all-MiniLM-L6-v2": SentenceTransformer("all-MiniLM-L6-v2"), | |
| "paraphrase-MiniLM-L6-v2": SentenceTransformer("paraphrase-MiniLM-L6-v2"), | |
| "multi-qa-MiniLM-L6-cos-v1": SentenceTransformer("multi-qa-MiniLM-L6-cos-v1"), | |
| "all-mpnet-base-v2": SentenceTransformer("all-mpnet-base-v2"), | |
| "paraphrase-mpnet-base-v2": SentenceTransformer("paraphrase-mpnet-base-v2"), | |
| "all-distilroberta-v1": SentenceTransformer("all-distilroberta-v1"), | |
| "paraphrase-albert-small-v2": SentenceTransformer("paraphrase-albert-small-v2"), | |
| "multi-qa-distilbert-cos-v1": SentenceTransformer("multi-qa-distilbert-cos-v1"), | |
| "distiluse-base-multilingual-cased-v2": SentenceTransformer("distiluse-base-multilingual-cased-v2"), | |
| "all-MiniLM-L12-v2": SentenceTransformer("all-MiniLM-L12-v2"), | |
| } | |
| ############################### | |
| # MAIN SIMILARITY FUNCTION | |
| ############################### | |
| def compute_similarity(resume_text, job_list): | |
| try: | |
| if not resume_text.strip(): | |
| return "<b>Error:</b> Resume cannot be empty.", None | |
| # Filter out empty strings from the list | |
| job_list = [j for j in job_list if j.strip()] | |
| if len(job_list) == 0: | |
| return "<b>Error:</b> Please add at least one job description.", None | |
| results = {} | |
| for model_name, model in models.items(): | |
| documents = [resume_text] + job_list | |
| embeddings = model.encode(documents) | |
| resume_embedding = embeddings[0] | |
| job_embeddings = embeddings[1:] | |
| similarities = cosine_similarity([resume_embedding], job_embeddings).flatten() | |
| results[model_name] = similarities | |
| df = pd.DataFrame(results, index=[f"Job {i+1}" for i in range(len(job_list))]).T | |
| metrics = { | |
| "Average": df.mean(axis=0), | |
| "Variance": df.var(axis=0), | |
| "Median": df.median(axis=0), | |
| "Standard Deviation": df.std(axis=0), | |
| "Certainty Score": 1 - (df.var(axis=0) / df.var(axis=0).max()), | |
| } | |
| for metric_name, values in metrics.items(): | |
| df.loc[metric_name] = values | |
| model_rows = df.iloc[:-5] | |
| metrics_rows = df.iloc[-5:] | |
| styled_df = model_rows.style.background_gradient(cmap="Greens", axis=None).to_html() | |
| styled_df += metrics_rows.to_html(header=False) | |
| best_job = metrics["Average"].idxmax() | |
| reasoning = f"<b>The best job match is {best_job} based on the highest average similarity score.</b>" | |
| # --- RESTORED FULL DESCRIPTION AND REFERENCES --- | |
| description = """ | |
| <p><b>Explanation of the Table:</b></p> | |
| <ul> | |
| <li><b>Models:</b> Each row corresponds to a pre-trained model used for computing similarity. Below are details about each model:</li> | |
| <ul> | |
| <li><b>all-MiniLM-L6-v2:</b> Trained on NLI and STS datasets. Developed by Hugging Face and Microsoft. | |
| (<a href="https://arxiv.org/abs/2012.15832" target="_blank">Research Paper</a>, | |
| <a href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2" target="_blank">Model Card</a>).</li> | |
| <li><b>paraphrase-MiniLM-L6-v2:</b> Optimized for paraphrase detection on datasets like Quora Questions and MSRPC. | |
| (<a href="https://arxiv.org/abs/2012.15832" target="_blank">Research Paper</a>, | |
| <a href="https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L6-v2" target="_blank">Model Card</a>).</li> | |
| <li><b>multi-qa-MiniLM-L6-cos-v1:</b> Fine-tuned for question-answering tasks using datasets like SQuAD and Natural Questions. | |
| (<a href="https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1" target="_blank">Model Card</a>).</li> | |
| <li><b>all-mpnet-base-v2:</b> Robust embeddings for high-contextualized tasks. | |
| (<a href="https://arxiv.org/abs/2004.09297" target="_blank">Research Paper</a>, | |
| <a href="https://huggingface.co/sentence-transformers/all-mpnet-base-v2" target="_blank">Model Card</a>).</li> | |
| <li><b>paraphrase-mpnet-base-v2:</b> Reliable for paraphrase tasks, trained on diverse datasets. | |
| (<a href="https://huggingface.co/sentence-transformers/paraphrase-mpnet-base-v2" target="_blank">Model Card</a>).</li> | |
| <li><b>all-distilroberta-v1:</b> A lightweight RoBERTa-based model for sentence embeddings. | |
| (<a href="https://arxiv.org/abs/1907.11692" target="_blank">Research Paper</a>, | |
| <a href="https://huggingface.co/sentence-transformers/all-distilroberta-v1" target="_blank">Model Card</a>).</li> | |
| <li><b>paraphrase-albert-small-v2:</b> Suitable for paraphrasing in resource-constrained environments. | |
| (<a href="https://huggingface.co/sentence-transformers/paraphrase-albert-small-v2" target="_blank">Model Card</a>).</li> | |
| <li><b>multi-qa-distilbert-cos-v1:</b> Optimized for multilingual question-answering tasks. | |
| (<a href="https://huggingface.co/sentence-transformers/multi-qa-distilbert-cos-v1" target="_blank">Model Card</a>).</li> | |
| <li><b>distiluse-base-multilingual-cased-v2:</b> Trained on multilingual datasets for cross-lingual embeddings. | |
| (<a href="https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2" target="_blank">Model Card</a>).</li> | |
| <li><b>all-MiniLM-L12-v2:</b> Deeper MiniLM variant for enhanced contextual understanding. | |
| (<a href="https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2" target="_blank">Model Card</a>).</li> | |
| </ul> | |
| <li><b>Metrics:</b> | |
| <ul> | |
| <li><b>Average:</b> Mean similarity score for each job description.</li> | |
| <li><b>Variance:</b> Variability in the similarity scores.</li> | |
| <li><b>Median:</b> Middle value of the similarity scores.</li> | |
| <li><b>Standard Deviation:</b> Spread of the similarity scores around the mean.</li> | |
| <li><b>Certainty Score:</b> Indicates model agreement, with 1 being the highest consensus.</li> | |
| </ul> | |
| </li> | |
| </ul> | |
| <p>If you liked this application, feel free to send your feedback, suggestions, or adulations to | |
| <b>21f1006368@ds.study.iitm.ac.in</b>.</p> | |
| """ | |
| return reasoning, styled_df + description | |
| except Exception as e: | |
| return f"<b>Error during computation:</b> {str(e)}", None | |
| ############################### | |
| # APPROACH A EXPLANATION | |
| ############################### | |
| def explain_scores_by_sentences(model, resume_text, job_text, top_k=3): | |
| from nltk.tokenize import sent_tokenize | |
| resume_sents = sent_tokenize(resume_text) | |
| job_sents = sent_tokenize(job_text) | |
| if not resume_sents or not job_sents: | |
| return "<b>No sentences found in resume or job description.</b>" | |
| resume_embeddings = model.encode(resume_sents) | |
| job_embeddings = model.encode(job_sents) | |
| sim_matrix = cosine_similarity(resume_embeddings, job_embeddings) | |
| flat_sim = sim_matrix.flatten() | |
| top_k_indices = np.argsort(flat_sim)[::-1][:top_k] | |
| explanation_html = "<h4>Top Similar Sentence Pairs</h4>" | |
| for rank, idx in enumerate(top_k_indices, start=1): | |
| row = idx // job_embeddings.shape[0] | |
| col = idx % job_embeddings.shape[0] | |
| score = sim_matrix[row, col] | |
| resume_sentence = resume_sents[row] | |
| job_sentence = job_sents[col] | |
| explanation_html += f""" | |
| <p><b>#{rank}:</b><br> | |
| <b>Resume:</b> {resume_sentence}<br> | |
| <b>Job:</b> {job_sentence}<br> | |
| <b>Similarity Score:</b> {score:.4f}</p> | |
| """ | |
| return explanation_html | |
| def explain_model_scores(model_name, resume, job_list, selected_job_idx, top_k=3): | |
| try: | |
| model = models[model_name] | |
| # Validate inputs | |
| if not job_list or len(job_list) == 0: | |
| return gr.update(value="<b>Error:</b> No jobs added to analyze.", visible=True) | |
| selected_job_idx = int(selected_job_idx) | |
| if selected_job_idx < 0 or selected_job_idx >= len(job_list): | |
| return gr.update(value=f"<b>Error:</b> Invalid job index {selected_job_idx}. You only have {len(job_list)} jobs.", visible=True) | |
| resume_text = resume.strip() | |
| job_text = job_list[selected_job_idx].strip() | |
| if not resume_text: | |
| return gr.update(value="<b>No resume text provided.</b>", visible=True) | |
| explanation_html = explain_scores_by_sentences(model, resume_text, job_text, top_k) | |
| return gr.update(value=explanation_html, visible=True) | |
| except Exception as e: | |
| return gr.update(value=f"<b>Error in explanation:</b> {str(e)}", visible=True) | |
| ############################### | |
| # GRADIO APP LOGIC | |
| ############################### | |
| # --- Job List Management --- | |
| def add_job_to_list(current_job, job_list): | |
| if not current_job.strip(): | |
| return job_list, job_list, "" # Do nothing if empty | |
| updated_list = job_list + [current_job] | |
| # Create a nice HTML display of current jobs | |
| display_html = "<ul>" | |
| for i, job in enumerate(updated_list): | |
| snippet = job[:100] + "..." if len(job) > 100 else job | |
| display_html += f"<li><b>Job {i+1}:</b> {snippet}</li>" | |
| display_html += "</ul>" | |
| return updated_list, display_html, "" # Clear the input box | |
| def clear_jobs(): | |
| return [], "<i>No jobs added yet...</i>", "" | |
| # --- Main Processing --- | |
| def process_and_display(resume, job_list, request=None): | |
| try: | |
| user_ip = "Unknown IP" | |
| if request and hasattr(request, 'client'): | |
| user_ip = request.client.host | |
| else: | |
| user_ip = socket.gethostbyname(socket.gethostname()) | |
| log_action(f"Process and display triggered for IP: {user_ip}") | |
| log_input_text(resume, job_list, user_ip=user_ip) | |
| yield gr.update(value="<b>Processing...</b>", visible=True), None, None, gr.update(visible=False) | |
| reasoning, table = compute_similarity(resume, job_list) | |
| if table: | |
| log_action(f"Successfully processed and displayed results for IP: {user_ip}") | |
| yield ( | |
| gr.update(value="", visible=False), | |
| reasoning, | |
| table, | |
| gr.update(value="Papa Please Preach More", visible=True), | |
| ) | |
| else: | |
| log_action(f"Error: No results to display for IP: {user_ip}") | |
| yield ( | |
| gr.update(value="", visible=False), | |
| reasoning, | |
| "<p>No results to display.</p>", | |
| gr.update(visible=False), | |
| ) | |
| except Exception as e: | |
| log_action(f"Error during process for IP {user_ip}: {str(e)}") | |
| raise e | |
| def show_details(table): | |
| return gr.update(value=table, visible=True) | |
| INVITE_CODE = "INDIAMBA" | |
| access_granted = gr.State(False) | |
| ############################### | |
| # BUILD THE GRADIO INTERFACE | |
| ############################### | |
| with gr.Blocks(css=""" | |
| .job-display { border: 1px solid #ccc; padding: 10px; border-radius: 5px; background: #f9f9f9; min-height: 50px; } | |
| """) as app: | |
| # State to hold the list of jobs | |
| job_list_state = gr.State([]) | |
| gr.Markdown("# Second Opinion with Naval v1.2 – “Morning Daze”") | |
| gr.Markdown("Youtube Video: https://www.youtube.com/watch?v=khGulN2vAyY ") | |
| gr.Markdown("🔐 This app requires an invite code to continue. Ask Naval if you don't have one.") | |
| with gr.Row(): | |
| code_input = gr.Textbox(label="Enter Invite Code", type="password", placeholder="Ask Naval for access code") | |
| access_button = gr.Button("Submit") | |
| access_warning = gr.Markdown(value="Access denied. Please enter the correct invite code.", visible=False) | |
| main_ui = gr.Group(visible=False) | |
| with main_ui: | |
| gr.Markdown("### ✍️ Input Section") | |
| with gr.Row(): | |
| # Left Column: Resume | |
| with gr.Column(scale=1): | |
| resume_input = gr.Textbox(label="Paste Resume", lines=15, placeholder="Paste your resume here...") | |
| # Right Column: Dynamic Job Entry | |
| with gr.Column(scale=1): | |
| gr.Markdown("#### Add Job Descriptions") | |
| single_job_input = gr.Textbox(label="Paste ONE Job Description Here", lines=5, placeholder="Paste a single JD here and click 'Add Job' below.") | |
| with gr.Row(): | |
| add_job_btn = gr.Button("➕ Add Job", variant="secondary") | |
| clear_jobs_btn = gr.Button("🗑️ Clear All Jobs", variant="stop") | |
| gr.Markdown("#### Your Added Jobs List:") | |
| job_display_area = gr.HTML(value="<i>No jobs added yet...</i>", elem_classes="job-display") | |
| gr.Markdown("---") | |
| gr.Markdown("### 🔍 Matching Section") | |
| with gr.Row(): | |
| match_button = gr.Button("Match My Resume to Added Jobs", variant="primary") | |
| processing_output = gr.HTML(value="", visible=False) | |
| with gr.Row(): | |
| recommendation_output = gr.HTML(label="Recommendation", visible=True) | |
| with gr.Row(): | |
| table_output = gr.HTML(label="Similarity Table", visible=False) | |
| with gr.Row(): | |
| nerd_button = gr.Button("Papa Please Preach More", visible=False) | |
| gr.Markdown("---") | |
| gr.Markdown("### 📊 Explanation Section") | |
| explanation_output = gr.HTML(label="Model Explanation", visible=False) | |
| # Event: Add Job | |
| add_job_btn.click( | |
| add_job_to_list, | |
| inputs=[single_job_input, job_list_state], | |
| outputs=[job_list_state, job_display_area, single_job_input] # Update state, display, and clear input | |
| ) | |
| # Event: Clear Jobs | |
| clear_jobs_btn.click( | |
| clear_jobs, | |
| inputs=[], | |
| outputs=[job_list_state, job_display_area, single_job_input] | |
| ) | |
| # Event: Match | |
| match_button.click( | |
| process_and_display, | |
| inputs=[resume_input, job_list_state], | |
| outputs=[processing_output, recommendation_output, table_output, nerd_button] | |
| ) | |
| nerd_button.click(show_details, inputs=[table_output], outputs=[table_output]) | |
| with gr.Row(): | |
| job_index_to_explain = gr.Number(label="Job Index (1-based for humans, but logic uses 0-based)", value=0, precision=0) | |
| with gr.Row(): | |
| for m_name in models.keys(): | |
| btn = gr.Button(f"Explain {m_name}") | |
| btn.click( | |
| fn=lambda resume, jobs, idx, m=m_name: explain_model_scores(m, resume, jobs, idx), | |
| inputs=[resume_input, job_list_state, job_index_to_explain], | |
| outputs=[explanation_output], | |
| ) | |
| def check_invite(user_code): | |
| if user_code.strip() == INVITE_CODE: | |
| return True, gr.update(visible=False), gr.update(visible=True) | |
| else: | |
| return False, gr.update(visible=True), gr.update(visible=False) | |
| access_button.click(fn=check_invite, inputs=[code_input], outputs=[access_granted, access_warning, main_ui]) | |
| app.launch() |