Spaces:

singhn9
/

privateSOWN

Running

App Files Files Community

privateSOWN / app.py

singhn9

Update app.py

2db4220 verified 22 days ago

raw

history blame contribute delete

18.5 kB

	import gradio as gr
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	import pandas as pd
	import numpy as np
	import os
	from datetime import datetime
	import socket
	import nltk
	import threading
	import time
	from huggingface_hub import HfApi, HfFolder

	# For sentence tokenization
	nltk.download('punkt')
	nltk.download("punkt_tab")

	###############################
	# LOGGING SETUP
	###############################
	log_dir = "./logs"
	os.makedirs(log_dir, exist_ok=True)
	log_file_path = os.path.join(log_dir, "Second_Opinion_Logs.log")

	def upload_logs_to_hf():
	"""Upload the local log file to Hugging Face repo."""
	try:
	api = HfApi()
	token = HfFolder.get_token() or os.getenv("HUGGINGFACE_HUB_TOKEN")
	if not token:
	print(" No HF token found — skipping upload.")
	return
	api.upload_file(
	path_or_fileobj=log_file_path,
	path_in_repo="logs/Second_Opinion_Logs.log",
	repo_id="singhn9/privateSOWN",
	repo_type="space",
	token=token,
	)
	print("Logs uploaded to Hugging Face repo.")
	except Exception as e:
	print(f" Log upload failed: {e}")

	def background_log_uploader(interval=300):
	while True:
	time.sleep(interval)
	if os.path.exists(log_file_path):
	upload_logs_to_hf()

	threading.Thread(target=background_log_uploader, daemon=True).start()

	def log_action(action, request=None):
	try:
	user_ip = "Unknown IP"
	if request and hasattr(request, 'client'):
	user_ip = request.client.host
	else:
	user_ip = socket.gethostbyname(socket.gethostname())
	except Exception:
	user_ip = "Unknown IP"

	timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
	log_entry = f"{timestamp} (GMT) - IP: {user_ip} - {action}\n"

	try:
	with open(log_file_path, 'a') as log_file:
	log_file.write(log_entry)
	print(f"Log entry added: {log_entry.strip()}")
	except Exception as e:
	print(f"Error logging action: {e}")

	def log_input_text(resume_text, job_list, user_ip="Unknown IP"):
	try:
	timestamp = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
	# specific handling for list or string
	jobs_str = "\n---\n".join(job_list) if isinstance(job_list, list) else str(job_list)

	log_entry = (
	f"{timestamp} (GMT) - IP: {user_ip}\n"
	f"--- Resume Input ---\n{resume_text}\n\n"
	f"--- Job Descriptions Input ---\n{jobs_str}\n"
	"---------------------------------------------\n"
	)
	with open(log_file_path, 'a') as log_file:
	log_file.write(log_entry)
	print(f"✅ Logged full user input at {timestamp} from {user_ip}")
	except Exception as e:
	print(f"Error logging input: {e}")

	###############################
	# LOAD MODELS
	###############################
	models = {
	"all-MiniLM-L6-v2": SentenceTransformer("all-MiniLM-L6-v2"),
	"paraphrase-MiniLM-L6-v2": SentenceTransformer("paraphrase-MiniLM-L6-v2"),
	"multi-qa-MiniLM-L6-cos-v1": SentenceTransformer("multi-qa-MiniLM-L6-cos-v1"),
	"all-mpnet-base-v2": SentenceTransformer("all-mpnet-base-v2"),
	"paraphrase-mpnet-base-v2": SentenceTransformer("paraphrase-mpnet-base-v2"),
	"all-distilroberta-v1": SentenceTransformer("all-distilroberta-v1"),
	"paraphrase-albert-small-v2": SentenceTransformer("paraphrase-albert-small-v2"),
	"multi-qa-distilbert-cos-v1": SentenceTransformer("multi-qa-distilbert-cos-v1"),
	"distiluse-base-multilingual-cased-v2": SentenceTransformer("distiluse-base-multilingual-cased-v2"),
	"all-MiniLM-L12-v2": SentenceTransformer("all-MiniLM-L12-v2"),
	}

	###############################
	# MAIN SIMILARITY FUNCTION
	###############################
	def compute_similarity(resume_text, job_list):
	try:
	if not resume_text.strip():
	return "<b>Error:</b> Resume cannot be empty.", None

	# Filter out empty strings from the list
	job_list = [j for j in job_list if j.strip()]

	if len(job_list) == 0:
	return "<b>Error:</b> Please add at least one job description.", None

	results = {}
	for model_name, model in models.items():
	documents = [resume_text] + job_list
	embeddings = model.encode(documents)
	resume_embedding = embeddings[0]
	job_embeddings = embeddings[1:]
	similarities = cosine_similarity([resume_embedding], job_embeddings).flatten()
	results[model_name] = similarities

	df = pd.DataFrame(results, index=[f"Job {i+1}" for i in range(len(job_list))]).T

	metrics = {
	"Average": df.mean(axis=0),
	"Variance": df.var(axis=0),
	"Median": df.median(axis=0),
	"Standard Deviation": df.std(axis=0),
	"Certainty Score": 1 - (df.var(axis=0) / df.var(axis=0).max()),
	}
	for metric_name, values in metrics.items():
	df.loc[metric_name] = values

	model_rows = df.iloc[:-5]
	metrics_rows = df.iloc[-5:]

	styled_df = model_rows.style.background_gradient(cmap="Greens", axis=None).to_html()
	styled_df += metrics_rows.to_html(header=False)

	best_job = metrics["Average"].idxmax()
	reasoning = f"<b>The best job match is {best_job} based on the highest average similarity score.</b>"

	# --- RESTORED FULL DESCRIPTION AND REFERENCES ---
	description = """
	<p><b>Explanation of the Table:</b></p>
	<ul>
	<li><b>Models:</b> Each row corresponds to a pre-trained model used for computing similarity. Below are details about each model:</li>
	<ul>
	<li><b>all-MiniLM-L6-v2:</b> Trained on NLI and STS datasets. Developed by Hugging Face and Microsoft.
	(<a href="https://arxiv.org/abs/2012.15832" target="_blank">Research Paper</a>,
	<a href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2" target="_blank">Model Card</a>).</li>
	<li><b>paraphrase-MiniLM-L6-v2:</b> Optimized for paraphrase detection on datasets like Quora Questions and MSRPC.
	(<a href="https://arxiv.org/abs/2012.15832" target="_blank">Research Paper</a>,
	<a href="https://huggingface.co/sentence-transformers/paraphrase-MiniLM-L6-v2" target="_blank">Model Card</a>).</li>
	<li><b>multi-qa-MiniLM-L6-cos-v1:</b> Fine-tuned for question-answering tasks using datasets like SQuAD and Natural Questions.
	(<a href="https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1" target="_blank">Model Card</a>).</li>
	<li><b>all-mpnet-base-v2:</b> Robust embeddings for high-contextualized tasks.
	(<a href="https://arxiv.org/abs/2004.09297" target="_blank">Research Paper</a>,
	<a href="https://huggingface.co/sentence-transformers/all-mpnet-base-v2" target="_blank">Model Card</a>).</li>
	<li><b>paraphrase-mpnet-base-v2:</b> Reliable for paraphrase tasks, trained on diverse datasets.
	(<a href="https://huggingface.co/sentence-transformers/paraphrase-mpnet-base-v2" target="_blank">Model Card</a>).</li>
	<li><b>all-distilroberta-v1:</b> A lightweight RoBERTa-based model for sentence embeddings.
	(<a href="https://arxiv.org/abs/1907.11692" target="_blank">Research Paper</a>,
	<a href="https://huggingface.co/sentence-transformers/all-distilroberta-v1" target="_blank">Model Card</a>).</li>
	<li><b>paraphrase-albert-small-v2:</b> Suitable for paraphrasing in resource-constrained environments.
	(<a href="https://huggingface.co/sentence-transformers/paraphrase-albert-small-v2" target="_blank">Model Card</a>).</li>
	<li><b>multi-qa-distilbert-cos-v1:</b> Optimized for multilingual question-answering tasks.
	(<a href="https://huggingface.co/sentence-transformers/multi-qa-distilbert-cos-v1" target="_blank">Model Card</a>).</li>
	<li><b>distiluse-base-multilingual-cased-v2:</b> Trained on multilingual datasets for cross-lingual embeddings.
	(<a href="https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2" target="_blank">Model Card</a>).</li>
	<li><b>all-MiniLM-L12-v2:</b> Deeper MiniLM variant for enhanced contextual understanding.
	(<a href="https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2" target="_blank">Model Card</a>).</li>
	</ul>
	<li><b>Metrics:</b>
	<ul>
	<li><b>Average:</b> Mean similarity score for each job description.</li>
	<li><b>Variance:</b> Variability in the similarity scores.</li>
	<li><b>Median:</b> Middle value of the similarity scores.</li>
	<li><b>Standard Deviation:</b> Spread of the similarity scores around the mean.</li>
	<li><b>Certainty Score:</b> Indicates model agreement, with 1 being the highest consensus.</li>
	</ul>
	</li>
	</ul>
	<p>If you liked this application, feel free to send your feedback, suggestions, or adulations to
	<b>21f1006368@ds.study.iitm.ac.in</b>.</p>
	"""

	return reasoning, styled_df + description

	except Exception as e:
	return f"<b>Error during computation:</b> {str(e)}", None

	###############################
	# APPROACH A EXPLANATION
	###############################
	def explain_scores_by_sentences(model, resume_text, job_text, top_k=3):
	from nltk.tokenize import sent_tokenize

	resume_sents = sent_tokenize(resume_text)
	job_sents = sent_tokenize(job_text)

	if not resume_sents or not job_sents:
	return "<b>No sentences found in resume or job description.</b>"

	resume_embeddings = model.encode(resume_sents)
	job_embeddings = model.encode(job_sents)

	sim_matrix = cosine_similarity(resume_embeddings, job_embeddings)
	flat_sim = sim_matrix.flatten()
	top_k_indices = np.argsort(flat_sim)[::-1][:top_k]

	explanation_html = "<h4>Top Similar Sentence Pairs</h4>"
	for rank, idx in enumerate(top_k_indices, start=1):
	row = idx // job_embeddings.shape[0]
	col = idx % job_embeddings.shape[0]
	score = sim_matrix[row, col]

	resume_sentence = resume_sents[row]
	job_sentence = job_sents[col]
	explanation_html += f"""
	<p><b>#{rank}:</b><br>
	<b>Resume:</b> {resume_sentence}<br>
	<b>Job:</b> {job_sentence}<br>
	<b>Similarity Score:</b> {score:.4f}</p>
	"""
	return explanation_html

	def explain_model_scores(model_name, resume, job_list, selected_job_idx, top_k=3):
	try:
	model = models[model_name]

	# Validate inputs
	if not job_list or len(job_list) == 0:
	return gr.update(value="<b>Error:</b> No jobs added to analyze.", visible=True)

	selected_job_idx = int(selected_job_idx)
	if selected_job_idx < 0 or selected_job_idx >= len(job_list):
	return gr.update(value=f"<b>Error:</b> Invalid job index {selected_job_idx}. You only have {len(job_list)} jobs.", visible=True)

	resume_text = resume.strip()
	job_text = job_list[selected_job_idx].strip()

	if not resume_text:
	return gr.update(value="<b>No resume text provided.</b>", visible=True)

	explanation_html = explain_scores_by_sentences(model, resume_text, job_text, top_k)
	return gr.update(value=explanation_html, visible=True)

	except Exception as e:
	return gr.update(value=f"<b>Error in explanation:</b> {str(e)}", visible=True)

	###############################
	# GRADIO APP LOGIC
	###############################

	# --- Job List Management ---
	def add_job_to_list(current_job, job_list):
	if not current_job.strip():
	return job_list, job_list, "" # Do nothing if empty

	updated_list = job_list + [current_job]
	# Create a nice HTML display of current jobs
	display_html = "<ul>"
	for i, job in enumerate(updated_list):
	snippet = job[:100] + "..." if len(job) > 100 else job
	display_html += f"<li><b>Job {i+1}:</b> {snippet}</li>"
	display_html += "</ul>"

	return updated_list, display_html, "" # Clear the input box

	def clear_jobs():
	return [], "<i>No jobs added yet...</i>", ""

	# --- Main Processing ---
	def process_and_display(resume, job_list, request=None):
	try:
	user_ip = "Unknown IP"
	if request and hasattr(request, 'client'):
	user_ip = request.client.host
	else:
	user_ip = socket.gethostbyname(socket.gethostname())

	log_action(f"Process and display triggered for IP: {user_ip}")
	log_input_text(resume, job_list, user_ip=user_ip)

	yield gr.update(value="<b>Processing...</b>", visible=True), None, None, gr.update(visible=False)

	reasoning, table = compute_similarity(resume, job_list)

	if table:
	log_action(f"Successfully processed and displayed results for IP: {user_ip}")
	yield (
	gr.update(value="", visible=False),
	reasoning,
	table,
	gr.update(value="Papa Please Preach More", visible=True),
	)
	else:
	log_action(f"Error: No results to display for IP: {user_ip}")
	yield (
	gr.update(value="", visible=False),
	reasoning,
	"<p>No results to display.</p>",
	gr.update(visible=False),
	)
	except Exception as e:
	log_action(f"Error during process for IP {user_ip}: {str(e)}")
	raise e

	def show_details(table):
	return gr.update(value=table, visible=True)

	INVITE_CODE = "INDIAMBA"
	access_granted = gr.State(False)

	###############################
	# BUILD THE GRADIO INTERFACE
	###############################
	with gr.Blocks(css="""
	.job-display { border: 1px solid #ccc; padding: 10px; border-radius: 5px; background: #f9f9f9; min-height: 50px; }
	""") as app:

	# State to hold the list of jobs
	job_list_state = gr.State([])

	gr.Markdown("# Second Opinion with Naval v1.2 – “Morning Daze”")
	gr.Markdown("Youtube Video: https://www.youtube.com/watch?v=khGulN2vAyY ")
	gr.Markdown("🔐 This app requires an invite code to continue. Ask Naval if you don't have one.")

	with gr.Row():
	code_input = gr.Textbox(label="Enter Invite Code", type="password", placeholder="Ask Naval for access code")
	access_button = gr.Button("Submit")

	access_warning = gr.Markdown(value="Access denied. Please enter the correct invite code.", visible=False)

	main_ui = gr.Group(visible=False)

	with main_ui:
	gr.Markdown("### ✍️ Input Section")

	with gr.Row():
	# Left Column: Resume
	with gr.Column(scale=1):
	resume_input = gr.Textbox(label="Paste Resume", lines=15, placeholder="Paste your resume here...")

	# Right Column: Dynamic Job Entry
	with gr.Column(scale=1):
	gr.Markdown("#### Add Job Descriptions")
	single_job_input = gr.Textbox(label="Paste ONE Job Description Here", lines=5, placeholder="Paste a single JD here and click 'Add Job' below.")

	with gr.Row():
	add_job_btn = gr.Button("➕ Add Job", variant="secondary")
	clear_jobs_btn = gr.Button("🗑️ Clear All Jobs", variant="stop")

	gr.Markdown("#### Your Added Jobs List:")
	job_display_area = gr.HTML(value="<i>No jobs added yet...</i>", elem_classes="job-display")

	gr.Markdown("---")

	gr.Markdown("### 🔍 Matching Section")
	with gr.Row():
	match_button = gr.Button("Match My Resume to Added Jobs", variant="primary")
	processing_output = gr.HTML(value="", visible=False)

	with gr.Row():
	recommendation_output = gr.HTML(label="Recommendation", visible=True)
	with gr.Row():
	table_output = gr.HTML(label="Similarity Table", visible=False)

	with gr.Row():
	nerd_button = gr.Button("Papa Please Preach More", visible=False)

	gr.Markdown("---")

	gr.Markdown("### 📊 Explanation Section")
	explanation_output = gr.HTML(label="Model Explanation", visible=False)

	# Event: Add Job
	add_job_btn.click(
	add_job_to_list,
	inputs=[single_job_input, job_list_state],
	outputs=[job_list_state, job_display_area, single_job_input] # Update state, display, and clear input
	)

	# Event: Clear Jobs
	clear_jobs_btn.click(
	clear_jobs,
	inputs=[],
	outputs=[job_list_state, job_display_area, single_job_input]
	)

	# Event: Match
	match_button.click(
	process_and_display,
	inputs=[resume_input, job_list_state],
	outputs=[processing_output, recommendation_output, table_output, nerd_button]
	)

	nerd_button.click(show_details, inputs=[table_output], outputs=[table_output])

	with gr.Row():
	job_index_to_explain = gr.Number(label="Job Index (1-based for humans, but logic uses 0-based)", value=0, precision=0)

	with gr.Row():
	for m_name in models.keys():
	btn = gr.Button(f"Explain {m_name}")
	btn.click(
	fn=lambda resume, jobs, idx, m=m_name: explain_model_scores(m, resume, jobs, idx),
	inputs=[resume_input, job_list_state, job_index_to_explain],
	outputs=[explanation_output],
	)

	def check_invite(user_code):
	if user_code.strip() == INVITE_CODE:
	return True, gr.update(visible=False), gr.update(visible=True)
	else:
	return False, gr.update(visible=True), gr.update(visible=False)

	access_button.click(fn=check_invite, inputs=[code_input], outputs=[access_granted, access_warning, main_ui])

	app.launch()