Spaces:

Natwar
/

Text_Summarization_Multipurpose

Sleeping

App Files Files Community

Text_Summarization_Multipurpose / app.py

Natwar

Update app.py

fdebaf9 verified 15 days ago

raw

history blame contribute delete

9.51 kB

	import os
	import subprocess
	import sys
	import warnings
	warnings.filterwarnings("ignore")


	def run_pip(*args):
	"""Run a pip install command and raise on failure."""
	subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-cache-dir"] + list(args))


	# ── Phase 1: Install packages ─────────────────────────────────────────────────

	print("=== Installing gradio (if needed) ===")
	try:
	import gradio # noqa: F401
	print("gradio already installed.")
	except ImportError:
	run_pip("gradio")

	print("=== Installing torch (CPU-only, ~190 MB) ===")
	try:
	import torch # noqa: F401
	print("torch already installed.")
	except ImportError:
	run_pip("torch", "--index-url", "https://download.pytorch.org/whl/cpu")

	print("=== Installing transformers 4.46.3 ===")
	# Pin to last v4 release — transformers 5.x removed the 'summarization' pipeline task.
	try:
	import transformers as _tf
	if _tf.__version__ != "4.46.3":
	raise ImportError("wrong version")
	print("transformers 4.46.3 already installed.")
	except (ImportError, AttributeError):
	run_pip("transformers==4.46.3")

	# ── Phase 2: Fix the requests-vs-httpx incompatibility ───────────────────────
	#
	# What happens:
	# - transformers 4.46.3 requires huggingface-hub<1.0, so pip installs 0.36.x.
	# - huggingface-hub 0.36.x makes get_session() return an httpx.Client when
	# httpx is present (it is — gradio depends on it).
	# - transformers' own hub.py then calls that client with requests-style kwargs:
	# get_session().head(url, allow_redirects=False, proxies=proxies, timeout=10)
	# - httpx.Client rejects every one of these: allow_redirects, proxies, etc.
	#
	# Fix:
	# After importing transformers (so its module object is in sys.modules), replace
	# the `get_session` name inside the `transformers.utils.hub` namespace with a
	# lambda that returns a plain requests.Session. A requests.Session accepts all
	# of those kwargs natively, so every existing call in hub.py works unchanged.

	import transformers.utils.hub as _t_hub # noqa: E402
	import requests as _requests # noqa: E402

	_t_hub.get_session = lambda: _requests.Session()
	print("Patched transformers.utils.hub.get_session → requests.Session()")

	# ── Phase 3: Safe imports ─────────────────────────────────────────────────────

	import gradio as gr # noqa: E402
	import torch # noqa: E402
	from transformers import pipeline # noqa: E402

	# ── App setup ─────────────────────────────────────────────────────────────────

	DEFAULT_MODEL = "sshleifer/distilbart-cnn-6-6"

	AVAILABLE_MODELS = {
	"sshleifer/distilbart-cnn-6-6": "Fast & light, good for general summarization",
	"facebook/bart-large-cnn": "Larger BART model, better detail retention",
	"google/pegasus-cnn_dailymail": "Pegasus model for high-quality summarization",
	"allenai/led-base-16384": "Handles longer scientific documents",
	}

	print(f"Loading default model: {DEFAULT_MODEL}")
	summarizer = pipeline("summarization", model=DEFAULT_MODEL, device=-1) # device=-1 → CPU

	EXAMPLE_TEXTS = {
	"news_article": (
	"In a historic move, global leaders have agreed to phase out fossil fuels over the next two "
	"decades. This landmark decision came after weeks of intense negotiations during the international "
	"climate summit. Experts believe this will drastically cut carbon emissions and pave the way for "
	"sustainable energy sources worldwide. Countries will now be held accountable through annual "
	"environmental reviews."
	),
	"scientific_abstract": (
	"The rise of antibiotic-resistant bacteria poses one of the most significant threats to global "
	"health in the 21st century. Recent studies have shown that the overuse and misuse of antibiotics "
	"in both human medicine and agriculture have accelerated the evolution of resistant strains. In "
	"this review, we summarize current research on bacterial resistance mechanisms, including horizontal "
	"gene transfer and biofilm formation. We also explore novel approaches to combating resistance, "
	"such as bacteriophage therapy, antimicrobial peptides, and CRISPR-based gene editing technologies. "
	"The paper further outlines a strategic framework for integrating surveillance, policy reforms, and "
	"public health initiatives to curb the spread of resistance. While scientific innovation holds "
	"promise, global cooperation and responsible antibiotic stewardship remain essential to preventing "
	"a post-antibiotic era where common infections could once again become deadly."
	),
	"business_report": (
	"The company reported a 32% increase in quarterly revenue, largely driven by the success of its "
	"latest AI-powered product line. International markets, particularly in Asia and Europe, showed "
	"strong adoption rates. Leadership announced plans to reinvest earnings into R&D and global "
	"expansion, while shareholders reacted positively with a 15% spike in stock prices."
	),
	}


	def summarize_text(text, model_name, summary_length, num_beams):
	if not text.strip():
	return "Please provide some text to summarize."
	try:
	global summarizer
	summarizer = pipeline("summarization", model=model_name, device=-1)
	length_mapping = {
	"very_short": (30, 50),
	"short": (50, 70),
	"medium": (70, 100),
	"long": (100, 130),
	}
	min_len, max_len = length_mapping.get(summary_length, (70, 100))
	result = summarizer(
	text,
	max_length=int(max_len),
	min_length=int(min_len),
	num_beams=int(num_beams),
	do_sample=False,
	)
	return result[0]["summary_text"]
	except Exception as exc:
	return f"Error: {exc}"


	def count_words(text):
	return f"{len(text.split())} words"


	def paste_example(example_type):
	return EXAMPLE_TEXTS.get(example_type, "")


	# ── Gradio UI ──────────────────────────────────────────────────────────────────

	with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 📝 Multimodel Text Summarization")
	gr.Markdown(
	"Summarize news, reports, or scientific content using various models like BART, Pegasus, or LED."
	)

	with gr.Row():
	with gr.Column(scale=3):
	text_input = gr.Textbox(
	lines=12,
	label="Text to Summarize",
	placeholder="Paste or type your text here...",
	elem_id="text_input",
	)
	word_counter = gr.Markdown("0 words")
	text_input.change(count_words, inputs=[text_input], outputs=[word_counter])

	with gr.Row():
	example_dropdown = gr.Dropdown(
	choices=list(EXAMPLE_TEXTS.keys()),
	value=None,
	label="Load Example Text",
	)
	example_load_btn = gr.Button("Load Example")

	with gr.Row():
	model_choice = gr.Dropdown(
	choices=list(AVAILABLE_MODELS.keys()),
	value=DEFAULT_MODEL,
	label="Select Summarization Model",
	)
	model_info = gr.Markdown(f"Model info: {AVAILABLE_MODELS[DEFAULT_MODEL]}")

	with gr.Row():
	summary_length = gr.Radio(
	choices=["very_short", "short", "medium", "long"],
	value="medium",
	label="Summary Length",
	)
	num_beams = gr.Slider(minimum=1, maximum=8, value=4, step=1, label="Beam Size")

	summarize_button = gr.Button("Generate Summary", variant="primary", size="lg")

	with gr.Column(scale=2):
	gr.Markdown("### Summary Result")
	summary_output = gr.Textbox(
	label="Generated Summary",
	lines=12,
	placeholder="Your summary will appear here...",
	)

	model_choice.change(
	fn=lambda x: f"Model info: {AVAILABLE_MODELS.get(x, 'Custom model')}",
	inputs=[model_choice],
	outputs=[model_info],
	)
	example_load_btn.click(fn=paste_example, inputs=[example_dropdown], outputs=[text_input])
	summarize_button.click(
	fn=summarize_text,
	inputs=[text_input, model_choice, summary_length, num_beams],
	outputs=[summary_output],
	)

	gr.Markdown("""
	---
	✅ Choose from different summarization models
	✅ Works great for academic, news, or business content
	✅ Customize summary length and beam search for better quality
	Built using Gradio and Hugging Face Transformers
	""")

	if __name__ == "__main__":
	demo.launch()