privateSOWN / app.py
singhn9's picture
Update app.py
2db4220 verified
import gradio as gr
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import numpy as np
import os
from datetime import datetime
import socket
import nltk
import threading
import time
from huggingface_hub import HfApi, HfFolder
# For sentence tokenization
nltk.download('punkt')
nltk.download("punkt_tab")
###############################
# LOGGING SETUP
###############################
log_dir = "./logs"
os.makedirs(log_dir, exist_ok=True)
log_file_path = os.path.join(log_dir, "Second_Opinion_Logs.log")
def upload_logs_to_hf():
"""Upload the local log file to Hugging Face repo."""
try:
api = HfApi()
token = HfFolder.get_token() or os.getenv("HUGGINGFACE_HUB_TOKEN")
if not token:
print(" No HF token found — skipping upload.")
return
api.upload_file(
path_or_fileobj=log_file_path,
path_in_repo="logs/Second_Opinion_Logs.log",
repo_id="singhn9/privateSOWN",
repo_type="space",
token=token,
)
print("Logs uploaded to Hugging Face repo.")
except Exception as e:
print(f" Log upload failed: {e}")
def background_log_uploader(interval=300):
while True:
time.sleep(interval)
if os.path.exists(log_file_path):
upload_logs_to_hf()
threading.Thread(target=background_log_uploader, daemon=True).start()
def log_action(action, request=None):
try:
user_ip = "Unknown IP"
if request and hasattr(request, 'client'):
user_ip = request.client.host
else:
user_ip = socket.gethostbyname(socket.gethostname())
except Exception:
user_ip = "Unknown IP"
timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
log_entry = f"{timestamp} (GMT) - IP: {user_ip} - {action}\n"
try:
with open(log_file_path, 'a') as log_file:
log_file.write(log_entry)
print(f"Log entry added: {log_entry.strip()}")
except Exception as e:
print(f"Error logging action: {e}")
def log_input_text(resume_text, job_list, user_ip="Unknown IP"):
try:
timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
# specific handling for list or string
jobs_str = "\n---\n".join(job_list) if isinstance(job_list, list) else str(job_list)
log_entry = (
f"{timestamp} (GMT) - IP: {user_ip}\n"
f"--- Resume Input ---\n{resume_text}\n\n"
f"--- Job Descriptions Input ---\n{jobs_str}\n"
"---------------------------------------------\n"
)
with open(log_file_path, 'a') as log_file:
log_file.write(log_entry)
print(f"✅ Logged full user input at {timestamp} from {user_ip}")
except Exception as e:
print(f"Error logging input: {e}")
###############################
# LOAD MODELS
###############################
models = {
"all-MiniLM-L6-v2": SentenceTransformer("all-MiniLM-L6-v2"),
"paraphrase-MiniLM-L6-v2": SentenceTransformer("paraphrase-MiniLM-L6-v2"),
"multi-qa-MiniLM-L6-cos-v1": SentenceTransformer("multi-qa-MiniLM-L6-cos-v1"),
"all-mpnet-base-v2": SentenceTransformer("all-mpnet-base-v2"),
"paraphrase-mpnet-base-v2": SentenceTransformer("paraphrase-mpnet-base-v2"),
"all-distilroberta-v1": SentenceTransformer("all-distilroberta-v1"),
"paraphrase-albert-small-v2": SentenceTransformer("paraphrase-albert-small-v2"),
"multi-qa-distilbert-cos-v1": SentenceTransformer("multi-qa-distilbert-cos-v1"),
"distiluse-base-multilingual-cased-v2": SentenceTransformer("distiluse-base-multilingual-cased-v2"),
"all-MiniLM-L12-v2": SentenceTransformer("all-MiniLM-L12-v2"),
}
###############################
# MAIN SIMILARITY FUNCTION
###############################
def compute_similarity(resume_text, job_list):
try:
if not resume_text.strip():
return "<b>Error:</b> Resume cannot be empty.", None
# Filter out empty strings from the list
job_list = [j for j in job_list if j.strip()]
if len(job_list) == 0:
return "<b>Error:</b> Please add at least one job description.", None
results = {}
for model_name, model in models.items():
documents = [resume_text] + job_list
embeddings = model.encode(documents)
resume_embedding = embeddings[0]
job_embeddings = embeddings[1:]
similarities = cosine_similarity([resume_embedding], job_embeddings).flatten()
results[model_name] = similarities
df = pd.DataFrame(results, index=[f"Job {i+1}" for i in range(len(job_list))]).T
metrics = {
"Average": df.mean(axis=0),
"Variance": df.var(axis=0),
"Median": df.median(axis=0),
"Standard Deviation": df.std(axis=0),
"Certainty Score": 1 - (df.var(axis=0) / df.var(axis=0).max()),
}
for metric_name, values in metrics.items():
df.loc[metric_name] = values
model_rows = df.iloc[:-5]
metrics_rows = df.iloc[-5:]
styled_df = model_rows.style.background_gradient(cmap="Greens", axis=None).to_html()
styled_df += metrics_rows.to_html(header=False)
best_job = metrics["Average"].idxmax()
reasoning = f"<b>The best job match is {best_job} based on the highest average similarity score.</b>"
# --- RESTORED FULL DESCRIPTION AND REFERENCES ---
description = """
<p><b>Explanation of the Table:</b></p>
<ul>
<li><b>Models:</b> Each row corresponds to a pre-trained model used for computing similarity. Below are details about each model:</li>
<ul>
<li><b>all-MiniLM-L6-v2:</b> Trained on NLI and STS datasets. Developed by Hugging Face and Microsoft.
(<a href="https://arxiv.org/abs/2012.15832" target="_blank">Research Paper</a>,
<a href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2" target="_blank">Model Card</a>).</li>
<li><b>paraphrase-MiniLM-L6-v2:</b> Optimized for paraphrase detection on datasets like Quora Questions and MSRPC.
(<a href="https://arxiv.org/abs/2012.15832" target="_blank">Research Paper</a>,
<a href="https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L6-v2" target="_blank">Model Card</a>).</li>
<li><b>multi-qa-MiniLM-L6-cos-v1:</b> Fine-tuned for question-answering tasks using datasets like SQuAD and Natural Questions.
(<a href="https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1" target="_blank">Model Card</a>).</li>
<li><b>all-mpnet-base-v2:</b> Robust embeddings for high-contextualized tasks.
(<a href="https://arxiv.org/abs/2004.09297" target="_blank">Research Paper</a>,
<a href="https://huggingface.co/sentence-transformers/all-mpnet-base-v2" target="_blank">Model Card</a>).</li>
<li><b>paraphrase-mpnet-base-v2:</b> Reliable for paraphrase tasks, trained on diverse datasets.
(<a href="https://huggingface.co/sentence-transformers/paraphrase-mpnet-base-v2" target="_blank">Model Card</a>).</li>
<li><b>all-distilroberta-v1:</b> A lightweight RoBERTa-based model for sentence embeddings.
(<a href="https://arxiv.org/abs/1907.11692" target="_blank">Research Paper</a>,
<a href="https://huggingface.co/sentence-transformers/all-distilroberta-v1" target="_blank">Model Card</a>).</li>
<li><b>paraphrase-albert-small-v2:</b> Suitable for paraphrasing in resource-constrained environments.
(<a href="https://huggingface.co/sentence-transformers/paraphrase-albert-small-v2" target="_blank">Model Card</a>).</li>
<li><b>multi-qa-distilbert-cos-v1:</b> Optimized for multilingual question-answering tasks.
(<a href="https://huggingface.co/sentence-transformers/multi-qa-distilbert-cos-v1" target="_blank">Model Card</a>).</li>
<li><b>distiluse-base-multilingual-cased-v2:</b> Trained on multilingual datasets for cross-lingual embeddings.
(<a href="https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2" target="_blank">Model Card</a>).</li>
<li><b>all-MiniLM-L12-v2:</b> Deeper MiniLM variant for enhanced contextual understanding.
(<a href="https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2" target="_blank">Model Card</a>).</li>
</ul>
<li><b>Metrics:</b>
<ul>
<li><b>Average:</b> Mean similarity score for each job description.</li>
<li><b>Variance:</b> Variability in the similarity scores.</li>
<li><b>Median:</b> Middle value of the similarity scores.</li>
<li><b>Standard Deviation:</b> Spread of the similarity scores around the mean.</li>
<li><b>Certainty Score:</b> Indicates model agreement, with 1 being the highest consensus.</li>
</ul>
</li>
</ul>
<p>If you liked this application, feel free to send your feedback, suggestions, or adulations to
<b>21f1006368@ds.study.iitm.ac.in</b>.</p>
"""
return reasoning, styled_df + description
except Exception as e:
return f"<b>Error during computation:</b> {str(e)}", None
###############################
# APPROACH A EXPLANATION
###############################
def explain_scores_by_sentences(model, resume_text, job_text, top_k=3):
from nltk.tokenize import sent_tokenize
resume_sents = sent_tokenize(resume_text)
job_sents = sent_tokenize(job_text)
if not resume_sents or not job_sents:
return "<b>No sentences found in resume or job description.</b>"
resume_embeddings = model.encode(resume_sents)
job_embeddings = model.encode(job_sents)
sim_matrix = cosine_similarity(resume_embeddings, job_embeddings)
flat_sim = sim_matrix.flatten()
top_k_indices = np.argsort(flat_sim)[::-1][:top_k]
explanation_html = "<h4>Top Similar Sentence Pairs</h4>"
for rank, idx in enumerate(top_k_indices, start=1):
row = idx // job_embeddings.shape[0]
col = idx % job_embeddings.shape[0]
score = sim_matrix[row, col]
resume_sentence = resume_sents[row]
job_sentence = job_sents[col]
explanation_html += f"""
<p><b>#{rank}:</b><br>
<b>Resume:</b> {resume_sentence}<br>
<b>Job:</b> {job_sentence}<br>
<b>Similarity Score:</b> {score:.4f}</p>
"""
return explanation_html
def explain_model_scores(model_name, resume, job_list, selected_job_idx, top_k=3):
try:
model = models[model_name]
# Validate inputs
if not job_list or len(job_list) == 0:
return gr.update(value="<b>Error:</b> No jobs added to analyze.", visible=True)
selected_job_idx = int(selected_job_idx)
if selected_job_idx < 0 or selected_job_idx >= len(job_list):
return gr.update(value=f"<b>Error:</b> Invalid job index {selected_job_idx}. You only have {len(job_list)} jobs.", visible=True)
resume_text = resume.strip()
job_text = job_list[selected_job_idx].strip()
if not resume_text:
return gr.update(value="<b>No resume text provided.</b>", visible=True)
explanation_html = explain_scores_by_sentences(model, resume_text, job_text, top_k)
return gr.update(value=explanation_html, visible=True)
except Exception as e:
return gr.update(value=f"<b>Error in explanation:</b> {str(e)}", visible=True)
###############################
# GRADIO APP LOGIC
###############################
# --- Job List Management ---
def add_job_to_list(current_job, job_list):
if not current_job.strip():
return job_list, job_list, "" # Do nothing if empty
updated_list = job_list + [current_job]
# Create a nice HTML display of current jobs
display_html = "<ul>"
for i, job in enumerate(updated_list):
snippet = job[:100] + "..." if len(job) > 100 else job
display_html += f"<li><b>Job {i+1}:</b> {snippet}</li>"
display_html += "</ul>"
return updated_list, display_html, "" # Clear the input box
def clear_jobs():
return [], "<i>No jobs added yet...</i>", ""
# --- Main Processing ---
def process_and_display(resume, job_list, request=None):
try:
user_ip = "Unknown IP"
if request and hasattr(request, 'client'):
user_ip = request.client.host
else:
user_ip = socket.gethostbyname(socket.gethostname())
log_action(f"Process and display triggered for IP: {user_ip}")
log_input_text(resume, job_list, user_ip=user_ip)
yield gr.update(value="<b>Processing...</b>", visible=True), None, None, gr.update(visible=False)
reasoning, table = compute_similarity(resume, job_list)
if table:
log_action(f"Successfully processed and displayed results for IP: {user_ip}")
yield (
gr.update(value="", visible=False),
reasoning,
table,
gr.update(value="Papa Please Preach More", visible=True),
)
else:
log_action(f"Error: No results to display for IP: {user_ip}")
yield (
gr.update(value="", visible=False),
reasoning,
"<p>No results to display.</p>",
gr.update(visible=False),
)
except Exception as e:
log_action(f"Error during process for IP {user_ip}: {str(e)}")
raise e
def show_details(table):
return gr.update(value=table, visible=True)
INVITE_CODE = "INDIAMBA"
access_granted = gr.State(False)
###############################
# BUILD THE GRADIO INTERFACE
###############################
with gr.Blocks(css="""
.job-display { border: 1px solid #ccc; padding: 10px; border-radius: 5px; background: #f9f9f9; min-height: 50px; }
""") as app:
# State to hold the list of jobs
job_list_state = gr.State([])
gr.Markdown("# Second Opinion with Naval v1.2 – “Morning Daze”")
gr.Markdown("Youtube Video: https://www.youtube.com/watch?v=khGulN2vAyY ")
gr.Markdown("🔐 This app requires an invite code to continue. Ask Naval if you don't have one.")
with gr.Row():
code_input = gr.Textbox(label="Enter Invite Code", type="password", placeholder="Ask Naval for access code")
access_button = gr.Button("Submit")
access_warning = gr.Markdown(value="Access denied. Please enter the correct invite code.", visible=False)
main_ui = gr.Group(visible=False)
with main_ui:
gr.Markdown("### ✍️ Input Section")
with gr.Row():
# Left Column: Resume
with gr.Column(scale=1):
resume_input = gr.Textbox(label="Paste Resume", lines=15, placeholder="Paste your resume here...")
# Right Column: Dynamic Job Entry
with gr.Column(scale=1):
gr.Markdown("#### Add Job Descriptions")
single_job_input = gr.Textbox(label="Paste ONE Job Description Here", lines=5, placeholder="Paste a single JD here and click 'Add Job' below.")
with gr.Row():
add_job_btn = gr.Button("➕ Add Job", variant="secondary")
clear_jobs_btn = gr.Button("🗑️ Clear All Jobs", variant="stop")
gr.Markdown("#### Your Added Jobs List:")
job_display_area = gr.HTML(value="<i>No jobs added yet...</i>", elem_classes="job-display")
gr.Markdown("---")
gr.Markdown("### 🔍 Matching Section")
with gr.Row():
match_button = gr.Button("Match My Resume to Added Jobs", variant="primary")
processing_output = gr.HTML(value="", visible=False)
with gr.Row():
recommendation_output = gr.HTML(label="Recommendation", visible=True)
with gr.Row():
table_output = gr.HTML(label="Similarity Table", visible=False)
with gr.Row():
nerd_button = gr.Button("Papa Please Preach More", visible=False)
gr.Markdown("---")
gr.Markdown("### 📊 Explanation Section")
explanation_output = gr.HTML(label="Model Explanation", visible=False)
# Event: Add Job
add_job_btn.click(
add_job_to_list,
inputs=[single_job_input, job_list_state],
outputs=[job_list_state, job_display_area, single_job_input] # Update state, display, and clear input
)
# Event: Clear Jobs
clear_jobs_btn.click(
clear_jobs,
inputs=[],
outputs=[job_list_state, job_display_area, single_job_input]
)
# Event: Match
match_button.click(
process_and_display,
inputs=[resume_input, job_list_state],
outputs=[processing_output, recommendation_output, table_output, nerd_button]
)
nerd_button.click(show_details, inputs=[table_output], outputs=[table_output])
with gr.Row():
job_index_to_explain = gr.Number(label="Job Index (1-based for humans, but logic uses 0-based)", value=0, precision=0)
with gr.Row():
for m_name in models.keys():
btn = gr.Button(f"Explain {m_name}")
btn.click(
fn=lambda resume, jobs, idx, m=m_name: explain_model_scores(m, resume, jobs, idx),
inputs=[resume_input, job_list_state, job_index_to_explain],
outputs=[explanation_output],
)
def check_invite(user_code):
if user_code.strip() == INVITE_CODE:
return True, gr.update(visible=False), gr.update(visible=True)
else:
return False, gr.update(visible=True), gr.update(visible=False)
access_button.click(fn=check_invite, inputs=[code_input], outputs=[access_granted, access_warning, main_ui])
app.launch()