Spaces:

RushiMane2003
/

market_trial

Sleeping

App Files Files Community

new api

by sikeaditya - opened Sep 25

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+197

-343

Files changed (1) hide show

app.py +197 -343

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# app.py — robusted version (only minimal safe changes added)
 import os
 import json
 from collections import Counter
@@ -8,8 +7,10 @@ import pandas as pd
 import plotly.express as px
 import plotly.io as pio
 import dotenv
 import numpy as np
-from datetime import datetime, timedelta
 dotenv.load_dotenv()
@@ -21,11 +22,9 @@ def clean_and_standardize(df):
     df = df.copy()
     df.columns = df.columns.str.replace('_x0020_', '_', regex=False)
     df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)
-    # drop grade if present; ignore otherwise
-    df = df.drop(columns=['grade'], errors='ignore')
     required_columns = [
         'state', 'district', 'market', 'commodity', 'variety',
-        'arrival_date', 'min_price', 'max_price', 'modal_price'
     ]
     existing_columns = [col for col in required_columns if col in df.columns]
     return df[existing_columns]
@@ -71,145 +70,158 @@ def load_hierarchy_from_json(path='location_hierarchy.json'):
         print(f"CRITICAL ERROR: Could not load '{path}'. Error: {e}")
         return {}
-def get_last_year_date():
-    """Get date from one year ago in dd/MM/yyyy format"""
-    last_year = datetime.now() - timedelta(days=365)
-    return last_year.strftime("%d/%m/%Y")
-def fetch_market_data(state=None, district=None, market=None):
     """
-    Fetcher to use new API endpoint with arrival date filter and market filtering.
     Returns a cleaned DataFrame with duplicate columns consolidated and arrival_date normalized.
     """
     api_key = os.environ.get('DATA_GOV_API_KEY',
                 "579b464db66ec23bdd00000140925613394847c57ae13db180760f06")
     base_url = "https://api.data.gov.in/resource/35985678-0d79-46b4-9ed6-6f13308a1d24"
-    # Build params for API request with arrival date from last year
-    arrival_date = get_last_year_date()
     params = {
         "api-key": api_key,
-        "format": "json",
-        "limit": 1000,
-        "filters[Arrival_Date]": arrival_date
     }
     if state:
         params["filters[State]"] = state
     if district:
         params["filters[District]"] = district
     try:
-        print(f"[fetch_market_data] Sending request to API with arrival date: {arrival_date}. Params: {params}")
-        resp = requests.get(base_url, params=params, timeout=180)
-    except Exception as e:
-        print(f"[fetch_market_data] Network error on request: {e}")
-        # fallback to local CSV if present
         try:
-            if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
-                df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
-                if not df_csv.empty:
-                    df_csv = consolidate_duplicate_columns(df_csv)
-                    cleaned = clean_and_standardize(df_csv)
-                    if market and 'market' in cleaned.columns:
-                        cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
-                    return cleaned
-        except Exception as csv_err:
-            print(f"[fetch_market_data] Could not load final_price_data.csv: {csv_err}")
-            return pd.DataFrame()
-    if resp.status_code != 200:
-        print(f"[fetch_market_data] API returned {resp.status_code}: {resp.text[:500]}")
         try:
-            if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
-                df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
-                if not df_csv.empty:
-                    df_csv = consolidate_duplicate_columns(df_csv)
-                    cleaned = clean_and_standardize(df_csv)
-                    if market and 'market' in cleaned.columns:
-                        cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
-                    return cleaned
-        except Exception as csv_err:
-            print(f"[fetch_market_data] Could not load final_price_data.csv: {csv_err}")
-            return pd.DataFrame()
-    try:
-        data = resp.json()
-    except Exception as e:
-        print(f"[fetch_market_data] JSON decode error: {e}")
         try:
-            if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
-                df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
-                if not df_csv.empty:
-                    df_csv = consolidate_duplicate_columns(df_csv)
-                    cleaned = clean_and_standardize(df_csv)
-                    if market and 'market' in cleaned.columns:
-                        cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
-                    return cleaned
-        except Exception as csv_err:
-            print(f"[fetch_market_data] Could not load final_price_data.csv: {csv_err}")
-            return pd.DataFrame()
-    # Parse the new API response format
-    records = data.get("records", [])
-    if not records and isinstance(data, list):
-        records = data
-    if not records:
-        print("[fetch_market_data] No records returned by API in response.")
         try:
-            if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
-                df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
-                if not df_csv.empty:
-                    df_csv = consolidate_duplicate_columns(df_csv)
-                    cleaned = clean_and_standardize(df_csv)
-                    if market and 'market' in cleaned.columns:
-                        cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
-                    return cleaned
-        except Exception as csv_err:
-            print(f"[fetch_market_data] Could not load final_price_data.csv: {csv_err}")
-            return pd.DataFrame()
-    df_api = pd.DataFrame.from_records(records)
-    # Filter by market if specified (from the API response)
-    if market and 'Market' in df_api.columns:
-        df_api = df_api[df_api['Market'].str.lower() == market.lower()]
-    # Consolidate duplicate columns immediately
-    df_api = consolidate_duplicate_columns(df_api)
-    print(f"[fetch_market_data] Retrieved {len(df_api)} rows from API.")
-    # Merge with final_price_data.csv if exists
-    dataframes_to_combine = [df_api]
-    try:
-        if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
-            df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
-            if not df_csv.empty:
-                df_csv = consolidate_duplicate_columns(df_csv)
-                dataframes_to_combine.append(df_csv)
-    except Exception as csv_err:
-        print(f"[fetch_market_data] Could not load final_price_data.csv for merging: {csv_err}")
-    df_combined = pd.concat(dataframes_to_combine, ignore_index=True, sort=False)
-    df_combined = consolidate_duplicate_columns(df_combined)
-    cleaned = clean_and_standardize(df_combined)
-    if 'arrival_date' in cleaned.columns:
-        cleaned = cleaned.copy()
-        cleaned.loc[:, 'arrival_date'] = pd.to_datetime(
-            cleaned['arrival_date'].astype(str).str.replace('\\/', '-', regex=True),
-            dayfirst=True, errors='coerce'
-        )
-    # Additional market filtering after standardization
-    if market and 'market' in cleaned.columns:
-        cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
-    return cleaned
 # Utility to flatten/clean numeric-like columns safely
-# Utility to flatten/clean numeric-like columns safely (improved version)
 def flatten_column(df, col):
     """
     Ensure df[col] becomes a 1-D numeric Series:
@@ -220,7 +232,6 @@ def flatten_column(df, col):
     """
     if df is None or col not in df.columns:
         return df
     df = df.copy()
     series = df[col]
@@ -236,236 +247,84 @@ def flatten_column(df, col):
     def _first_scalar(x):
         if x is None:
             return None
-        # Handle pandas NA/NaN values
-        try:
-            if pd.isna(x):
-                return None
-        except (TypeError, ValueError):
-            pass
-        # Handle numpy nan
         try:
             if isinstance(x, float) and np.isnan(x):
                 return None
-        except (TypeError, ValueError):
             pass
-        # Direct numeric/string values
-        if isinstance(x, (int, float, str, np.integer, np.floating)):
-            # Clean string values that might contain currency symbols or extra whitespace
-            if isinstance(x, str):
-                # Remove common currency symbols and whitespace
-                cleaned = x.strip().replace('₹', '').replace(',', '').replace('$', '')
-                try:
-                    return float(cleaned) if cleaned else None
-                except ValueError:
-                    return None
             return x
-        # Handle numpy string types
-        if isinstance(x, np.str_):
-            cleaned = str(x).strip().replace('₹', '').replace(',', '').replace('$', '')
-            try:
-                return float(cleaned) if cleaned else None
-            except ValueError:
-                return None
-        # Handle lists, tuples, sets
         if isinstance(x, (list, tuple, set)):
             for item in x:
                 if item is None:
                     continue
-                try:
-                    if pd.isna(item):
-                        continue
-                except (TypeError, ValueError):
-                    pass
                 try:
                     if isinstance(item, float) and np.isnan(item):
                         continue
-                except (TypeError, ValueError):
                     pass
-                # Recursive handling for nested structures
                 if isinstance(item, (list, tuple, set)):
-                    nested_result = _first_scalar(item)
-                    if nested_result is not None:
-                        return nested_result
                     continue
                 if isinstance(item, dict):
                     # try to find a numeric-like key
-                    for k in ('value', 'price', 'modal_price', 'modalPrice', '0'):
-                        if k in item and item[k] is not None:
-                            return _first_scalar(item[k])
                     vals = list(item.values())
                     if vals:
-                        for val in vals:
-                            result = _first_scalar(val)
-                            if result is not None:
-                                return result
-                    continue
-                # Direct value handling
-                if isinstance(item, (int, float, np.integer, np.floating)):
-                    return item
-                if isinstance(item, (str, np.str_)):
-                    cleaned = str(item).strip().replace('₹', '').replace(',', '').replace('$', '')
-                    try:
-                        return float(cleaned) if cleaned else None
-                    except ValueError:
-                        continue
-                # If we can't process it, try to convert directly
-                try:
-                    return float(str(item)) if str(item).strip() else None
-                except (ValueError, TypeError):
                     continue
             return None
-        # Handle dictionaries
         if isinstance(x, dict):
             for k in ('value', 'price', 'modal_price', 'modalPrice', '0'):
-                if k in x and x[k] is not None:
-                    return _first_scalar(x[k])
             vals = list(x.values())
             if vals:
-                for val in vals:
-                    result = _first_scalar(val)
-                    if result is not None:
-                        return result
             return None
-        # fallback: try to convert to string then float
         try:
-            str_val = str(x).strip()
-            if str_val:
-                # Clean common non-numeric characters
-                cleaned = str_val.replace('₹', '').replace(',', '').replace('$', '')
-                return float(cleaned)
-            return None
-        except (ValueError, TypeError):
             return None
-    # Apply the flattening function
     series = series.apply(_first_scalar)
-    # Convert to numeric, coercing errors to NaN
     series = pd.to_numeric(series, errors='coerce')
     # assign back using .loc to avoid SettingWithCopyWarning
     df.loc[:, col] = series
     return df
-# AI insights (improved with better error handling and data validation)
 def get_ai_insights(market_data, state, district, language="English"):
     if not state or not district or market_data is None or market_data.empty:
         return ""
     api_key = os.environ.get('GEMINI_API_KEY')
     if not api_key:
         return "<p>AI insights unavailable.</p>"
-    # Make a copy to avoid modifying original data
-    market_data = market_data.copy()
-    # Ensure modal_price column exists
-    if 'modal_price' not in market_data.columns:
-        return "<p>AI insights unavailable - no price data.</p>"
-    # Flatten and convert to numeric more robustly
     market_data = flatten_column(market_data, 'modal_price')
-    # Additional validation and cleaning for modal_price
-    market_data['modal_price'] = pd.to_numeric(market_data['modal_price'], errors='coerce')
-    # Remove rows where modal_price is NaN or invalid
-    market_data = market_data.dropna(subset=['modal_price', 'commodity'])
-    # Check if we have any valid data left
-    if market_data.empty or len(market_data) == 0:
-        return "<p>AI insights unavailable - no valid price data after cleaning.</p>"
-    # Additional check to ensure modal_price is actually numeric
-    if not pd.api.types.is_numeric_dtype(market_data['modal_price']):
-        # Force conversion one more time
-        market_data['modal_price'] = pd.to_numeric(market_data['modal_price'], errors='coerce')
-        market_data = market_data.dropna(subset=['modal_price'])
-        if market_data.empty:
-            return "<p>AI insights unavailable - could not convert price data to numeric format.</p>"
-    try:
-        # Safe grouping and aggregation
-        # Ensure modal_price is numeric at the group-aggregation stage
-        commodity_prices = (
-            market_data
-            .groupby('commodity', dropna=True)['modal_price']
-            .apply(lambda s: pd.to_numeric(s, errors='coerce').mean())    # coerce per-group
-        )
-        # Force numeric dtype and drop groups that could not be converted
-        commodity_prices = pd.to_numeric(commodity_prices, errors='coerce').dropna()
-        # Guard if no numeric data remains
-        if commodity_prices.empty:
-            return "<p>AI insights unavailable - no numeric commodity price data.</p>"
-        n_commodities = min(5, len(commodity_prices))
-        top_commodities = commodity_prices.nlargest(n_commodities)
-        # Debugging info (safe to keep; helpful when issues arise)
-        print("modal_price dtype:", market_data['modal_price'].dtype)
-        print("modal_price sample values:", market_data['modal_price'].head(20).tolist())
-        print("modal_price value types:", market_data['modal_price'].apply(lambda x: type(x)).value_counts().to_dict())
-        # Format the commodities string
-        top_commodities_str = ", ".join([
-            f"{name} (Avg: ₹{price:.2f})"
-            for name, price in top_commodities.items()
-        ])
-        if not top_commodities_str:
-            return "<p>AI insights unavailable - no commodity price data.</p>"
-    except Exception as e:
-        print(f"Error processing commodity data: {e}")
-        return "<p>AI insights unavailable - error processing commodity data.</p>"
-    # Generate AI prompt
     prompt = f'Analyze agricultural market data for {district}, {state}. Top commodities: {top_commodities_str}. Provide a JSON object with keys "crop_profitability", "market_analysis", "farmer_recommendations", each with an array of insights in {language}.'
     try:
-        api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent"
         headers = {"Content-Type": "application/json"}
-        payload = {
-            "contents": [{"parts": [{"text": prompt}]}],
-            "generationConfig": {"responseMimeType": "application/json"}
-        }
         response = requests.post(f"{api_url}?key={api_key}", headers=headers, json=payload, timeout=25)
         if response.status_code == 200:
-            response_data = response.json()
-            if 'candidates' in response_data and len(response_data['candidates']) > 0:
-                content = response_data['candidates'][0].get('content', {})
-                parts = content.get('parts', [])
-                if parts and len(parts) > 0:
-                    insights_text = parts[0].get('text', '')
-                    if insights_text:
-                        insights_json = json.loads(insights_text)
-                        return format_ai_insights(insights_json)
-            return "<p>Error: Invalid response format from AI model.</p>"
-        else:
-            return f"<p>Error from AI model: {response.status_code}</p>"
-    except requests.exceptions.Timeout:
-        return "<p>AI insights request timed out. Please try again.</p>"
-    except json.JSONDecodeError as e:
-        print(f"JSON decode error in AI insights: {e}")
-        return "<p>Error parsing AI response.</p>"
     except Exception as e:
         print(f"Error generating insights: {e}")
         return "<p>Error generating AI insights.</p>"
@@ -494,10 +353,6 @@ def generate_plots(df):
     for col in ['min_price', 'max_price', 'modal_price']:
         df = flatten_column(df, col)
-    # Ensure numeric modal_price for plotting
-    if 'modal_price' in df.columns:
-        df.loc[:, 'modal_price'] = pd.to_numeric(df['modal_price'], errors='coerce')
     df.dropna(subset=['modal_price', 'commodity'], inplace=True)
     if df.empty:
         return plots
@@ -520,6 +375,33 @@ LOCATION_HIERARCHY = load_hierarchy_from_json()
 print("Location hierarchy loaded.")
 # --- Flask Routes ---
 @app.route('/')
 def index():
     states = sorted(list(LOCATION_HIERARCHY.keys()))
@@ -557,20 +439,18 @@ def filter_data():
     if not state:
         return jsonify({'success': False, 'message': 'Please select a state.'})
-    # Pass market parameter to fetch_market_data for API filtering
-    df_combined = fetch_market_data(state, district, market)
     if df_combined is None or df_combined.empty:
         return jsonify({'success': False, 'message': 'No data found from API or local CSV.'})
     # Defensive copy
     df_filtered = df_combined.copy()
-    # Additional frontend filtering (in case not filtered by API)
     if state:
         df_filtered = df_filtered[df_filtered['state'].str.lower() == state.lower()]
     if district:
         df_filtered = df_filtered[df_filtered['district'].str.lower() == district.lower()]
-    if market and 'market' in df_filtered.columns:
         df_filtered = df_filtered[df_filtered['market'].str.lower() == market.lower()]
     if commodity:
         df_filtered = df_filtered[df_filtered['commodity'].str.lower() == commodity.lower()]
@@ -586,28 +466,10 @@ def filter_data():
     # Consolidate duplicate columns just in case (extra safety)
     df_final = consolidate_duplicate_columns(df_final)
-    # Ensure price columns are numeric (first pass)
     for col in ['min_price', 'max_price', 'modal_price']:
         df_final = flatten_column(df_final, col)
-    # --- NEW: final coercion and safety for modal_price before computing stats/ordering ---
-    if 'modal_price' in df_final.columns:
-        # Coerce any remaining weird values to NaN, then drop them.
-        df_final.loc[:, 'modal_price'] = pd.to_numeric(df_final['modal_price'], errors='coerce')
-        print("After coercion modal_price dtype:", df_final['modal_price'].dtype)
-        print("modal_price sample values (post-coercion):", df_final['modal_price'].head(20).tolist())
-        # Drop rows that have no numeric modal_price (so nsmallest/nlargest won't fail)
-        df_final = df_final.dropna(subset=['modal_price'])
-        # Ensure float dtype
-        if not df_final.empty:
-            df_final.loc[:, 'modal_price'] = df_final['modal_price'].astype(float)
-        else:
-            return jsonify({'success': False, 'message': 'No valid price data after coercion.'})
-    else:
-        return jsonify({'success': False, 'message': 'No modal_price column present after cleaning.'})
-    # -------------------------------------------------------------------------------
     plots = generate_plots(df_final.copy())
     insights = get_ai_insights(df_final.copy(), state, district, language)
@@ -615,21 +477,13 @@ def filter_data():
     if df_final.empty or 'modal_price' not in df_final.columns or df_final['modal_price'].dropna().empty:
         return jsonify({'success': False, 'message': 'No valid price data after cleaning.'})
-    # Now these operations are safe because modal_price is numeric float dtype
-    try:
-        cheapest = df_final.nsmallest(5, 'modal_price')[['commodity', 'market', 'modal_price']]
-        costliest = df_final.nlargest(5, 'modal_price')[['commodity', 'market', 'modal_price']]
-    except Exception as e:
-        # fallback: compute via sort_values if something unexpected happens
-        print(f"Warning: nsmallest/nlargest failed: {e}. Falling back to sort_values.")
-        cheapest = df_final.sort_values('modal_price', ascending=True).head(5)[['commodity', 'market', 'modal_price']]
-        costliest = df_final.sort_values('modal_price', ascending=False).head(5)[['commodity', 'market', 'modal_price']]
     market_stats = {
-        'total_commodities': int(df_final['commodity'].nunique()) if 'commodity' in df_final.columns else 0,
         'avg_modal_price': f"₹{df_final['modal_price'].mean():.2f}",
         'price_range': f"₹{df_final['modal_price'].min():.2f} - ₹{df_final['modal_price'].max():.2f}",
-        'total_markets': int(df_final['market'].nunique()) if 'market' in df_final.columns else 0
     }
     return jsonify({

 import os
 import json
 from collections import Counter
 import plotly.express as px
 import plotly.io as pio
 import dotenv
+import threading
+import tempfile
+import shutil
 import numpy as np
 dotenv.load_dotenv()
     df = df.copy()
     df.columns = df.columns.str.replace('_x0020_', '_', regex=False)
     df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)
     required_columns = [
         'state', 'district', 'market', 'commodity', 'variety',
+        'grade', 'arrival_date', 'min_price', 'max_price', 'modal_price'
     ]
     existing_columns = [col for col in required_columns if col in df.columns]
     return df[existing_columns]
         print(f"CRITICAL ERROR: Could not load '{path}'. Error: {e}")
         return {}
+def fetch_market_data(state=None, district=None,
+                      cache_path='agmarknet_cache.csv',
+                      use_cache=True, force_refresh=False,
+                      sleep_between=0.15, page_size=1000,
+                      synchronous=True):
     """
+    Single-request fetcher (the API returns the full dataset in one response).
     Returns a cleaned DataFrame with duplicate columns consolidated and arrival_date normalized.
     """
     api_key = os.environ.get('DATA_GOV_API_KEY',
                 "579b464db66ec23bdd00000140925613394847c57ae13db180760f06")
     base_url = "https://api.data.gov.in/resource/35985678-0d79-46b4-9ed6-6f13308a1d24"
+    # Use cache if present and not forcing refresh
+    if use_cache and not force_refresh and os.path.exists(cache_path):
+        try:
+            df_cache = pd.read_csv(cache_path)
+            print(f"[fetch_market_data] Loaded cache '{cache_path}' ({len(df_cache)} rows).")
+            dataframes_to_combine = [df_cache]
+            try:
+                df_csv = pd.read_csv("final_price_data.csv")
+                dataframes_to_combine.append(df_csv)
+            except FileNotFoundError:
+                pass
+            df_combined = pd.concat(dataframes_to_combine, ignore_index=True, sort=False)
+            # first, consolidate duplicate columns (if any)
+            df_combined = consolidate_duplicate_columns(df_combined)
+            cleaned = clean_and_standardize(df_combined)
+            if 'arrival_date' in cleaned.columns:
+                cleaned = cleaned.copy()
+                cleaned.loc[:, 'arrival_date'] = pd.to_datetime(
+                    cleaned['arrival_date'].astype(str).str.replace('\\/', '-', regex=True),
+                    dayfirst=True, errors='coerce'
+                )
+            return cleaned
+        except Exception as e:
+            print(f"[fetch_market_data] Failed reading cache: {e}. Will fetch live.")
+    # Background start support
+    if not synchronous:
+        t = threading.Thread(target=fetch_market_data, kwargs={
+            'state': state, 'district': district, 'cache_path': cache_path,
+            'use_cache': use_cache, 'force_refresh': force_refresh,
+            'sleep_between': sleep_between, 'page_size': page_size, 'synchronous': True
+        }, daemon=True)
+        t.start()
+        print("[fetch_market_data] Started background fetcher thread (single-request mode).")
+        return None
+    # Build params for single request
     params = {
         "api-key": api_key,
+        "format": "json"
     }
     if state:
         params["filters[State]"] = state
     if district:
         params["filters[District]"] = district
+    temp_fd, temp_file = tempfile.mkstemp(suffix='.csv')
+    os.close(temp_fd)
     try:
         try:
+            print(f"[fetch_market_data] Sending single request to API (may be large). Params: { {k:v for k,v in params.items() if k!='api-key'} }")
+            resp = requests.get(base_url, params=params, timeout=180)
+        except Exception as e:
+            print(f"[fetch_market_data] Network error on single request: {e}")
+            # fallback to local CSV if present
+            try:
+                df_csv = pd.read_csv("final_price_data.csv")
+                df_csv = consolidate_duplicate_columns(df_csv)
+                return clean_and_standardize(df_csv)
+            except FileNotFoundError:
+                return pd.DataFrame()
+        if resp.status_code != 200:
+            print(f"[fetch_market_data] API returned {resp.status_code}: {resp.text[:500]}")
+            try:
+                df_csv = pd.read_csv("final_price_data.csv")
+                df_csv = consolidate_duplicate_columns(df_csv)
+                return clean_and_standardize(df_csv)
+            except FileNotFoundError:
+                return pd.DataFrame()
         try:
+            data = resp.json()
+        except Exception as e:
+            print(f"[fetch_market_data] JSON decode error: {e}")
+            try:
+                df_csv = pd.read_csv("final_price_data.csv")
+                df_csv = consolidate_duplicate_columns(df_csv)
+                return clean_and_standardize(df_csv)
+            except FileNotFoundError:
+                return pd.DataFrame()
+        records = data.get("records", [])
+        if not records and isinstance(data, list):
+            records = data
+        if not records:
+            print("[fetch_market_data] No records returned by API in single response.")
+            try:
+                df_csv = pd.read_csv("final_price_data.csv")
+                df_csv = consolidate_duplicate_columns(df_csv)
+                return clean_and_standardize(df_csv)
+            except FileNotFoundError:
+                return pd.DataFrame()
+        df_api = pd.DataFrame.from_records(records)
+        # Consolidate duplicate columns immediately
+        df_api = consolidate_duplicate_columns(df_api)
+        # write cache atomically
         try:
+            df_api.to_csv(temp_file, index=False)
+            shutil.move(temp_file, cache_path)
+            print(f"[fetch_market_data] Single-request cache updated at '{cache_path}' ({len(df_api)} rows).")
+        except Exception as e:
+            print(f"[fetch_market_data] Failed to write cache atomically: {e}")
+            try:
+                df_api.to_csv(cache_path, index=False)
+            except Exception as e2:
+                print(f"[fetch_market_data] Fallback write also failed: {e2}")
+        # Merge with final_price_data.csv if exists
+        dataframes_to_combine = [df_api]
         try:
+            df_csv = pd.read_csv("final_price_data.csv")
+            df_csv = consolidate_duplicate_columns(df_csv)
+            dataframes_to_combine.append(df_csv)
+        except FileNotFoundError:
+            pass
+        df_combined = pd.concat(dataframes_to_combine, ignore_index=True, sort=False)
+        df_combined = consolidate_duplicate_columns(df_combined)
+        cleaned = clean_and_standardize(df_combined)
+        if 'arrival_date' in cleaned.columns:
+            cleaned = cleaned.copy()
+            cleaned.loc[:, 'arrival_date'] = pd.to_datetime(
+                cleaned['arrival_date'].astype(str).str.replace('\\/', '-', regex=True),
+                dayfirst=True, errors='coerce'
+            )
+        return cleaned
+    finally:
+        if os.path.exists(temp_file):
+            try:
+                os.remove(temp_file)
+            except Exception:
+                pass
 # Utility to flatten/clean numeric-like columns safely
 def flatten_column(df, col):
     """
     Ensure df[col] becomes a 1-D numeric Series:
     """
     if df is None or col not in df.columns:
         return df
     df = df.copy()
     series = df[col]
     def _first_scalar(x):
         if x is None:
             return None
+        # numpy nan
         try:
             if isinstance(x, float) and np.isnan(x):
                 return None
+        except Exception:
             pass
+        if isinstance(x, (int, float, str, np.integer, np.floating, np.str_)):
             return x
         if isinstance(x, (list, tuple, set)):
             for item in x:
                 if item is None:
                     continue
                 try:
                     if isinstance(item, float) and np.isnan(item):
                         continue
+                except Exception:
                     pass
                 if isinstance(item, (list, tuple, set)):
+                    for sub in item:
+                        if sub is not None:
+                            return sub
                     continue
                 if isinstance(item, dict):
                     # try to find a numeric-like key
+                    for k in ('value', 'price', 'modal_price', '0'):
+                        if k in item:
+                            return item[k]
                     vals = list(item.values())
                     if vals:
+                        return vals[0]
                     continue
+                return item
             return None
         if isinstance(x, dict):
             for k in ('value', 'price', 'modal_price', 'modalPrice', '0'):
+                if k in x:
+                    return x[k]
             vals = list(x.values())
             if vals:
+                return vals[0]
             return None
+        # fallback to string
         try:
+            return str(x)
+        except Exception:
             return None
     series = series.apply(_first_scalar)
     series = pd.to_numeric(series, errors='coerce')
     # assign back using .loc to avoid SettingWithCopyWarning
     df.loc[:, col] = series
     return df
+# AI insights (unchanged logic but using safer flatten)
 def get_ai_insights(market_data, state, district, language="English"):
     if not state or not district or market_data is None or market_data.empty:
         return ""
     api_key = os.environ.get('GEMINI_API_KEY')
     if not api_key:
         return "<p>AI insights unavailable.</p>"
     market_data = flatten_column(market_data, 'modal_price')
+    if 'modal_price' not in market_data.columns:
+        return "<p>AI insights unavailable.</p>"
+    # safe grouping even if some modal_price are NaN
+    top_commodities = market_data.groupby('commodity', dropna=True)['modal_price'].mean().nlargest(5)
+    top_commodities_str = ", ".join([f"{name} (Avg: ₹{price:.2f})" for name, price in top_commodities.items()])
     prompt = f'Analyze agricultural market data for {district}, {state}. Top commodities: {top_commodities_str}. Provide a JSON object with keys "crop_profitability", "market_analysis", "farmer_recommendations", each with an array of insights in {language}.'
     try:
+        api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent"
         headers = {"Content-Type": "application/json"}
+        payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"responseMimeType": "application/json"}}
         response = requests.post(f"{api_url}?key={api_key}", headers=headers, json=payload, timeout=25)
         if response.status_code == 200:
+            insights_json = json.loads(response.json()['candidates'][0]['content']['parts'][0]['text'])
+            return format_ai_insights(insights_json)
+        return f"<p>Error from AI model: {response.status_code}</p>"
     except Exception as e:
         print(f"Error generating insights: {e}")
         return "<p>Error generating AI insights.</p>"
     for col in ['min_price', 'max_price', 'modal_price']:
         df = flatten_column(df, col)
     df.dropna(subset=['modal_price', 'commodity'], inplace=True)
     if df.empty:
         return plots
 print("Location hierarchy loaded.")
 # --- Flask Routes ---
+@app.route('/refresh_cache', methods=['POST'])
+def refresh_cache():
+    state = request.form.get('state')
+    district = request.form.get('district')
+    def _bg():
+        try:
+            fetch_market_data(state=state, district=district, cache_path='agmarknet_cache.csv',
+                              use_cache=False, force_refresh=True, page_size=1000, synchronous=True)
+            print("[refresh_cache] Background refresh finished.")
+        except Exception as e:
+            print(f"[refresh_cache] Background refresh failed: {e}")
+    t = threading.Thread(target=_bg, daemon=True)
+    t.start()
+    return jsonify({'success': True, 'message': 'Background cache refresh started.'})
+@app.route('/download_full_sync', methods=['POST'])
+def download_full_sync():
+    state = request.form.get('state')
+    district = request.form.get('district')
+    df = fetch_market_data(state=state, district=district, cache_path='agmarknet_cache.csv',
+                           use_cache=False, force_refresh=True, page_size=1000, synchronous=True)
+    if df is None or df.empty:
+        return jsonify({'success': False, 'message': 'Download produced no data.'})
+    return jsonify({'success': True, 'message': f'Download complete. Cached {len(df)} rows.'})
 @app.route('/')
 def index():
     states = sorted(list(LOCATION_HIERARCHY.keys()))
     if not state:
         return jsonify({'success': False, 'message': 'Please select a state.'})
+    df_combined = fetch_market_data(state, district)
     if df_combined is None or df_combined.empty:
         return jsonify({'success': False, 'message': 'No data found from API or local CSV.'})
     # Defensive copy
     df_filtered = df_combined.copy()
     if state:
         df_filtered = df_filtered[df_filtered['state'].str.lower() == state.lower()]
     if district:
         df_filtered = df_filtered[df_filtered['district'].str.lower() == district.lower()]
+    if market:
         df_filtered = df_filtered[df_filtered['market'].str.lower() == market.lower()]
     if commodity:
         df_filtered = df_filtered[df_filtered['commodity'].str.lower() == commodity.lower()]
     # Consolidate duplicate columns just in case (extra safety)
     df_final = consolidate_duplicate_columns(df_final)
+    # Ensure price columns are numeric
     for col in ['min_price', 'max_price', 'modal_price']:
         df_final = flatten_column(df_final, col)
     plots = generate_plots(df_final.copy())
     insights = get_ai_insights(df_final.copy(), state, district, language)
     if df_final.empty or 'modal_price' not in df_final.columns or df_final['modal_price'].dropna().empty:
         return jsonify({'success': False, 'message': 'No valid price data after cleaning.'})
+    cheapest = df_final.nsmallest(5, 'modal_price')[['commodity', 'market', 'modal_price']]
+    costliest = df_final.nlargest(5, 'modal_price')[['commodity', 'market', 'modal_price']]
     market_stats = {
+        'total_commodities': int(df_final['commodity'].nunique()),
         'avg_modal_price': f"₹{df_final['modal_price'].mean():.2f}",
         'price_range': f"₹{df_final['modal_price'].min():.2f} - ₹{df_final['modal_price'].max():.2f}",
+        'total_markets': int(df_final['market'].nunique())
     }
     return jsonify({