Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import re | |
| from duckduckgo_search import DDGS | |
| import anthropic | |
| import os | |
| import json | |
| # Initialize clients | |
| anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) | |
| # === 1. Simplified Search Workflow === | |
| def search_workflow(name: str, api_key: str, progress=gr.Progress()): | |
| """ | |
| A simple function to search for articles, fetching exactly 8 news articles: 4 recent, 4 historical. | |
| """ | |
| if not name or not name.strip(): | |
| return "β Please enter a company name.", "" | |
| if not api_key or not api_key.strip(): | |
| return "β Please enter your Anthropic API key.", "" | |
| progress(0, desc="Starting search...") | |
| # Define search queries | |
| recent_keywords = f'"{name}" founder news' | |
| historical_keywords = f'"{name}" founder history origin' | |
| all_articles_markdown = [] | |
| raw_text_for_ai = "" | |
| try: | |
| with DDGS(timeout=20) as ddgs: | |
| # --- Fetch 4 Recent Articles (past year) --- | |
| progress(0.1, desc="Searching for recent articles...") | |
| # The 'timelimit="y"' parameter is a reliable way to get recent results. | |
| recent_results = ddgs.text(keywords=recent_keywords, max_results=4, timelimit='y') or [] | |
| for i, res in enumerate(recent_results): | |
| title = res.get('title', 'No Title') | |
| url = res.get('href', '#') | |
| body = res.get('body', 'No snippet available.') | |
| # Format for display | |
| markdown = f"### (Recent) {title}\n**Source**: [{url}]({url})\n\n{body}\n" | |
| all_articles_markdown.append(markdown) | |
| # Format for AI | |
| raw_text_for_ai += f"Article (Recent):\nTitle: {title}\nContent: {body}\n\n" | |
| # --- Fetch 4 Historical Articles --- | |
| progress(0.5, desc="Searching for historical articles...") | |
| historical_results = ddgs.text(keywords=historical_keywords, max_results=4) or [] | |
| for i, res in enumerate(historical_results): | |
| title = res.get('title', 'No Title') | |
| url = res.get('href', '#') | |
| body = res.get('body', 'No snippet available.') | |
| # Format for display | |
| markdown = f"### (Historical) {title}\n**Source**: [{url}]({url})\n\n{body}\n" | |
| all_articles_markdown.append(markdown) | |
| # Format for AI | |
| raw_text_for_ai += f"Article (Historical):\nTitle: {title}\nContent: {body}\n\n" | |
| except Exception as e: | |
| return f"β An error occurred during search: {e}", "" | |
| if not all_articles_markdown: | |
| return "[INFO] No articles found for that company.", "" | |
| progress(1.0, desc="Search complete!") | |
| final_markdown = f"## Found {len(all_articles_markdown)} Articles\n\n" + "\n---\n".join(all_articles_markdown) | |
| return final_markdown, raw_text_for_ai | |
| # === 2. Simplified Extraction Workflow === | |
| def extraction_workflow(raw_text: str, company_name: str, api_key: str, progress=gr.Progress()): | |
| """ | |
| A simple and robust tool to extract founders from text using the AI model. | |
| """ | |
| if not raw_text or not raw_text.strip(): | |
| return "β Please run a search first to get text to analyze." | |
| if not api_key or not api_key.strip(): | |
| return "β Please enter your Anthropic API key." | |
| progress(0, desc="Preparing prompt for AI...") | |
| prompt = f"""From the provided article snippets about "{company_name}", extract the names of individuals explicitly identified as a founder. | |
| Return a single, valid JSON object with the structure: {{"founders": [{{"name": "Founder's Name", "evidence": "A brief quote or context."}}]}} | |
| If no founders are mentioned, return an empty list: {{"founders": []}}. | |
| Do not add any text outside the JSON object. | |
| ARTICLES: | |
| --- | |
| {raw_text[:20000]} | |
| --- | |
| """ | |
| try: | |
| progress(0.5, desc="Sending request to AI model...") | |
| # Create client with user's API key | |
| client = anthropic.Anthropic(api_key=api_key) | |
| message = client.messages.create( | |
| model="claude-sonnet-4-20250514", # As requested | |
| max_tokens=1024, | |
| temperature=0.0, | |
| messages=[{"role": "user", "content": prompt}] | |
| ) | |
| # This robust check prevents the 'list index out of range' error. | |
| if message and message.content and isinstance(message.content, list) and len(message.content) > 0: | |
| text_block = message.content[0] | |
| if hasattr(text_block, 'text'): | |
| json_text = text_block.text | |
| # Clean the response to find the JSON object | |
| match = re.search(r'\{.*\}', json_text, re.DOTALL) | |
| if match: | |
| clean_json = match.group(0) | |
| try: | |
| parsed_json = json.loads(clean_json) | |
| formatted_json = json.dumps(parsed_json, indent=2) | |
| progress(1.0, desc="Extraction complete!") | |
| return f"```json\n{formatted_json}\n```" | |
| except json.JSONDecodeError: | |
| return f"β οΈ **AI Warning**: The model returned malformed JSON.\n\n{clean_json}" | |
| else: | |
| return f"β οΈ **AI Warning**: The model did not return a JSON object.\n\n{json_text}" | |
| return "β **API Error**: The AI model returned an empty or invalid response." | |
| except Exception as e: | |
| return f"β **An unexpected error occurred during extraction**: {e}" | |
| # === 3. Simplified Gradio UI === | |
| with gr.Blocks(title="Founder Name Extraction Tool", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# π Founder Name Extraction") | |
| gr.Markdown("A tool to find the names of company founders. **Step 1:** Enter your API key and company name. **Step 2:** Search for articles. **Step 3:** Extract founders' names from the results.") | |
| # Hidden state to pass text from search to extraction | |
| search_results_for_ai = gr.State("") | |
| with gr.Row(): | |
| api_key_input = gr.Textbox( | |
| label="Anthropic API Key", | |
| placeholder="sk-ant-...", | |
| type="password", | |
| scale=2 | |
| ) | |
| name_input = gr.Textbox( | |
| label="Company Name", | |
| placeholder="e.g., 'OpenAI', 'SpaceX'", | |
| scale=2 | |
| ) | |
| search_btn = gr.Button("2. π Search for Articles (Uses DuckDuckGo)", variant="primary", scale=1) | |
| with gr.Row(): | |
| extract_btn = gr.Button("3. π Extract Founders from Search Results", variant="secondary") | |
| # Display both sections without tabs | |
| gr.Markdown("### Search Results") | |
| output_search = gr.Markdown() | |
| gr.Markdown("### Founder Intelligence Report") | |
| output_extract = gr.Markdown(value="*Waiting for extraction...*") | |
| # --- Event Wiring --- | |
| # Search button populates the search results and the hidden state | |
| search_btn.click( | |
| fn=search_workflow, | |
| inputs=[name_input, api_key_input], # Added api_key_input | |
| outputs=[output_search, search_results_for_ai], | |
| show_progress="full" | |
| ) | |
| # Extract button uses the hidden state to populate the extraction | |
| extract_btn.click( | |
| fn=extraction_workflow, | |
| inputs=[search_results_for_ai, name_input, api_key_input], # Added api_key_input | |
| outputs=[output_extract], | |
| show_progress="full" | |
| ) | |
| demo.queue() | |
| if __name__ == "__main__": | |
| demo.launch(show_error=True) | |