Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,32 +14,28 @@ from Gradio_UI import GradioUI
|
|
| 14 |
|
| 15 |
search_tool = DuckDuckGoSearchTool()
|
| 16 |
|
| 17 |
-
news_sites = ["https://www.cnn.com/"]
|
| 18 |
-
|
| 19 |
-
site_config = {
|
| 20 |
-
"https://www.cnn.com/": {'tag': 'h2', 'class': 'headline'}
|
| 21 |
-
}
|
| 22 |
-
|
| 23 |
|
| 24 |
@tool
|
| 25 |
-
def get_latest_news(
|
| 26 |
"""
|
| 27 |
-
Tool returns news headlines from
|
| 28 |
-
|
| 29 |
-
Args:
|
| 30 |
-
news_sites (List[str]): A list of URLs of news sites to fetch headlines from.
|
| 31 |
|
| 32 |
Returns:
|
| 33 |
Dict[str, List[str]]: A dictionary where the keys are the news site URLs and the values are lists of headlines.
|
| 34 |
|
| 35 |
Notes:
|
| 36 |
-
The function uses a predefined `site_config` dictionary to determine the HTML tag and class
|
| 37 |
-
The `site_config` dictionary should have the following structure:
|
| 38 |
{
|
| 39 |
"site_url": {'tag': 'html_tag', 'class': 'css_class'}
|
| 40 |
}
|
| 41 |
If a site is not found in `site_config`, it defaults to {'tag': 'h2', 'class': 'headline'}.
|
| 42 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
headlines = {}
|
| 44 |
for site in news_sites:
|
| 45 |
try:
|
|
|
|
| 14 |
|
| 15 |
search_tool = DuckDuckGoSearchTool()
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
@tool
|
| 19 |
+
def get_latest_news() -> Dict[str, List[str]]:
|
| 20 |
"""
|
| 21 |
+
Tool returns news headlines from predefined news sites.
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
Returns:
|
| 24 |
Dict[str, List[str]]: A dictionary where the keys are the news site URLs and the values are lists of headlines.
|
| 25 |
|
| 26 |
Notes:
|
| 27 |
+
The function uses a predefined `news_sites` list and a `site_config` dictionary to determine the HTML tag and class
|
| 28 |
+
to extract headlines from each site. The `site_config` dictionary should have the following structure:
|
| 29 |
{
|
| 30 |
"site_url": {'tag': 'html_tag', 'class': 'css_class'}
|
| 31 |
}
|
| 32 |
If a site is not found in `site_config`, it defaults to {'tag': 'h2', 'class': 'headline'}.
|
| 33 |
"""
|
| 34 |
+
news_sites = ["https://www.cnn.com/"]
|
| 35 |
+
site_config = {
|
| 36 |
+
"https://www.cnn.com/": {'tag': 'h2', 'class': 'headline'}
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
headlines = {}
|
| 40 |
for site in news_sites:
|
| 41 |
try:
|