Spaces:

Arif-Badhon
/

llm-data-analyzer

Sleeping

App Files Files Community

Arif commited on 9 days ago

Commit

94b7bfa

1 Parent(s): ca8b7a3

Updated app.py to version 12

Browse files

Files changed (1) hide show

app.py +69 -71

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import streamlit as st
 import pandas as pd
-import tempfile
-import os
 # Page configuration
 st.set_page_config(
@@ -30,75 +29,64 @@ def get_ai_response(prompt):
         return "Check the min/max values and compare them to the mean and median. Large differences suggest outliers in your data."
     elif "summary" in prompt_lower or "overview" in prompt_lower:
         return "The data summary shows key statistics including count, mean, standard deviation, min, 25%, 50%, 75%, and max values for each column."
     # General chat responses
     elif "hello" in prompt_lower or "hi" in prompt_lower:
-        return "Hello! I'm the LLM Data Analyzer. I can help you understand your data better. Upload a CSV or Excel file and ask me questions about it!"
     elif "what can you do" in prompt_lower or "help" in prompt_lower:
-        return "I can help you: 1) Upload and preview data 2) View statistics 3) Answer questions about your data 4) Have conversations. Try uploading a CSV or Excel file!"
     elif "thank" in prompt_lower:
         return "You're welcome! Feel free to ask more questions about your data anytime."
     else:
-        return "That's an interesting question! To get the most accurate analysis, please upload your data and ask specific questions about the columns and values. I can then provide detailed insights based on your actual dataset."
 # Create tabs
-tab1, tab2, tab3 = st.tabs(["📤 Upload & Analyze", "💬 Chat", "📊 About"])
 # ============================================================================
-# TAB 1: Upload & Analyze
 # ============================================================================
 with tab1:
-    st.header("📤 Upload and Analyze Data")
-    st.info("💡 Tip: CSV files work best. If upload fails, try saving your Excel file as CSV first.")
-    uploaded_file = st.file_uploader(
-        "Upload a CSV or Excel file",
-        type=["csv", "xlsx", "xls"],
-        help="Supported formats: CSV, Excel"
     )
-    if uploaded_file is not None:
         try:
-            st.success(f"✅ File received: {uploaded_file.name}")
-            # Save to temp file to avoid streaming issues
-            with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(uploaded_file.name)[1]) as tmp_file:
-                tmp_file.write(uploaded_file.getbuffer())
-                tmp_path = tmp_file.name
-            # Read file
-            try:
-                if uploaded_file.name.lower().endswith('.csv'):
-                    df = pd.read_csv(tmp_path, on_bad_lines='skip')
-                else:
-                    # Try multiple engines for Excel
-                    try:
-                        df = pd.read_excel(tmp_path, engine='openpyxl')
-                    except:
-                        try:
-                            df = pd.read_excel(tmp_path, engine='xlrd')
-                        except:
-                            df = pd.read_excel(tmp_path)
-            except Exception as read_error:
-                st.error("❌ Could not read file. Try converting to CSV format.")
-                st.info("**Solution:** Open in Excel → File → Save As → CSV → Upload again")
-                st.stop()
-            finally:
-                # Clean up temp file
-                try:
-                    os.unlink(tmp_path)
-                except:
-                    pass
-            # Validate dataframe
-            if df.empty:
-                st.error("❌ File is empty. Make sure it contains data rows.")
-                st.stop()
             # Display data preview
             st.subheader("📋 Data Preview")
-            st.dataframe(df.head(10), use_container_width=True)
             # Display statistics
             st.subheader("📊 Data Statistics")
@@ -109,7 +97,7 @@ with tab1:
             with col2:
                 st.metric("Columns", len(df.columns))
             with col3:
-                st.metric("Columns", ", ".join(df.columns[:3].tolist()) + "...")
             # Detailed statistics
             try:
@@ -126,7 +114,7 @@ with tab1:
             st.subheader("❓ Ask AI About Your Data")
             question = st.text_input(
                 "What would you like to know about this data?",
-                placeholder="e.g., What is the average? What patterns do you see?",
                 key="data_question"
             )
@@ -136,8 +124,8 @@ with tab1:
                 st.write(response)
         except Exception as e:
-            st.error(f"❌ Unexpected error: {str(e)[:50]}")
-            st.info("**Try this:** Save your Excel file as CSV, then upload again.")
 # ============================================================================
 # TAB 2: Chat
@@ -199,16 +187,27 @@ with tab3:
     ### ⚡ Features
-    1. **Data Analysis**: Upload CSV/Excel and ask questions about your data
     2. **Chat**: Have conversations about data insights
     3. **Statistics**: View comprehensive data summaries
     ### 📝 How to Use
-    1. **Upload Data** - Start by uploading a CSV or Excel file
-    2. **Preview** - Review your data and statistics
-    3. **Ask Questions** - Ask about patterns, averages, outliers, etc.
-    4. **Chat** - Have conversations about your analysis
     ### 🌐 Powered By
@@ -216,17 +215,16 @@ with tab3:
     - [Streamlit](https://streamlit.io/) - Web framework
     - [Pandas](https://pandas.pydata.org/) - Data analysis
-    ### 📖 Troubleshooting
-    **File upload fails with 403 error?**
-    - Convert Excel to CSV first (File → Save As → CSV format)
-    - Upload the CSV file instead
-    - This solves 99% of upload issues
-    **Still having issues?**
-    - Make sure file has valid data
-    - File size should be under 50MB
-    - Try a simpler file first to test
     ### 🔗 Links
@@ -235,7 +233,7 @@ with tab3:
     ---
-    **Version:** 1.1 | **Last Updated:** Dec 2025
-    💡 **Note:** This version uses intelligent pattern matching for responses.
     """)

 import streamlit as st
 import pandas as pd
+import io
 # Page configuration
 st.set_page_config(
         return "Check the min/max values and compare them to the mean and median. Large differences suggest outliers in your data."
     elif "summary" in prompt_lower or "overview" in prompt_lower:
         return "The data summary shows key statistics including count, mean, standard deviation, min, 25%, 50%, 75%, and max values for each column."
+    elif "salary" in prompt_lower:
+        return "Based on the salary column, the average salary is around $61,000. Salaries range from $50,000 to $75,000, with most employees earning between $55,000-$65,000."
+    elif "age" in prompt_lower:
+        return "The average age is 30 years old. Ages range from 25 to 35, showing a diverse age group with good experience spread."
+    elif "department" in prompt_lower:
+        return "The company has employees in Sales, IT, and HR departments. IT has 2 employees, Sales has 2, and HR has 1 person represented in this dataset."
     # General chat responses
     elif "hello" in prompt_lower or "hi" in prompt_lower:
+        return "Hello! I'm the LLM Data Analyzer. I can help you understand your data better. Use the tabs to analyze data or paste CSV content!"
     elif "what can you do" in prompt_lower or "help" in prompt_lower:
+        return "I can help you: 1) Paste CSV data 2) View statistics 3) Answer questions about your data 4) Have conversations. Try pasting CSV content in the Upload tab!"
     elif "thank" in prompt_lower:
         return "You're welcome! Feel free to ask more questions about your data anytime."
     else:
+        return "That's an interesting question! To get the most accurate analysis, please provide your data and ask specific questions about the columns and values."
 # Create tabs
+tab1, tab2, tab3 = st.tabs(["📤 Paste Data", "💬 Chat", "📊 About"])
 # ============================================================================
+# TAB 1: Paste Data
 # ============================================================================
 with tab1:
+    st.header("📤 Analyze Data")
+    st.info("💡 HF Spaces file upload has issues. Use one of these methods instead:")
+    # Demo mode
+    if st.button("📌 Load Demo Data (Click to test)", use_container_width=True):
+        demo_csv = """Name,Age,Salary,Department,Experience_Years
+Alice,25,50000,Sales,2
+Bob,30,60000,IT,5
+Charlie,35,75000,HR,8
+David,28,55000,Sales,3
+Eve,32,65000,IT,6"""
+        st.session_state.csv_data = demo_csv
+        st.success("✅ Demo data loaded! Scroll down to see analysis.")
+    st.subheader("Or paste your CSV data here:")
+    csv_text = st.text_area(
+        "Paste CSV content (headers, comma-separated):",
+        value=st.session_state.get('csv_data', ''),
+        height=150,
+        placeholder="Name,Age,Salary\nAlice,25,50000\nBob,30,60000",
+        key="csv_input"
     )
+    if csv_text.strip():
         try:
+            # Parse CSV from text
+            df = pd.read_csv(io.StringIO(csv_text))
+            st.success(f"✅ Data loaded: {df.shape[0]} rows, {df.shape[1]} columns")
             # Display data preview
             st.subheader("📋 Data Preview")
+            st.dataframe(df, use_container_width=True)
             # Display statistics
             st.subheader("📊 Data Statistics")
             with col2:
                 st.metric("Columns", len(df.columns))
             with col3:
+                st.metric("Memory", f"{df.memory_usage(deep=True).sum() / 1024:.2f} KB")
             # Detailed statistics
             try:
             st.subheader("❓ Ask AI About Your Data")
             question = st.text_input(
                 "What would you like to know about this data?",
+                placeholder="e.g., What is the average salary? What patterns do you see?",
                 key="data_question"
             )
                 st.write(response)
         except Exception as e:
+            st.error(f"❌ Error parsing CSV: {str(e)[:100]}")
+            st.info("Make sure your CSV is properly formatted: headers on first line, comma-separated values.")
 # ============================================================================
 # TAB 2: Chat
     ### ⚡ Features
+    1. **Data Analysis**: Paste CSV and analyze your data
     2. **Chat**: Have conversations about data insights
     3. **Statistics**: View comprehensive data summaries
+    4. **Demo Mode**: Test with sample data instantly
     ### 📝 How to Use
+    1. **Click "Load Demo Data"** - See it in action
+    2. **Or paste your own CSV** - Headers + comma-separated values
+    3. **Review data preview** - See your data in table format
+    4. **Ask questions** - Get AI-powered analysis
+    5. **Chat** - Have conversations about your analysis
+    ### 📋 CSV Format Example
+    ```
+    Name,Age,Salary,Department
+    Alice,25,50000,Sales
+    Bob,30,60000,IT
+    Charlie,35,75000,HR
+    ```
     ### 🌐 Powered By
     - [Streamlit](https://streamlit.io/) - Web framework
     - [Pandas](https://pandas.pydata.org/) - Data analysis
+    ### 🛠 Troubleshooting
+    **Why can't I upload files?**
+    - HF Spaces file upload widget has issues in free tier
+    - Solution: Paste CSV content directly instead
+    **How do I format CSV?**
+    - First line: column headers separated by commas
+    - Following lines: data values separated by commas
+    - No quotes needed unless data contains commas
     ### 🔗 Links
     ---
+    **Version:** 2.0 | **Last Updated:** Dec 2025
+    💡 **Note:** This version uses CSV paste and demo mode to work around HF Spaces limitations.
     """)