Spaces:

RushiMane2003
/

market_trial

Sleeping

App Files Files Community

sikeaditya commited on Sep 25

Commit

f776967

verified ·

1 Parent(s): 0456cfe

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -164

app.py CHANGED Viewed

@@ -7,10 +7,8 @@ import pandas as pd
 import plotly.express as px
 import plotly.io as pio
 import dotenv
-import threading
-import tempfile
-import shutil
 import numpy as np
 dotenv.load_dotenv()
@@ -22,9 +20,10 @@ def clean_and_standardize(df):
     df = df.copy()
     df.columns = df.columns.str.replace('_x0020_', '_', regex=False)
     df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)
     required_columns = [
         'state', 'district', 'market', 'commodity', 'variety',
-        'grade', 'arrival_date', 'min_price', 'max_price', 'modal_price'
     ]
     existing_columns = [col for col in required_columns if col in df.columns]
     return df[existing_columns]
@@ -70,156 +69,142 @@ def load_hierarchy_from_json(path='location_hierarchy.json'):
         print(f"CRITICAL ERROR: Could not load '{path}'. Error: {e}")
         return {}
-def fetch_market_data(state=None, district=None,
-                      cache_path='agmarknet_cache.csv',
-                      use_cache=True, force_refresh=False,
-                      sleep_between=0.15, page_size=1000,
-                      synchronous=True):
     """
-    Single-request fetcher (the API returns the full dataset in one response).
     Returns a cleaned DataFrame with duplicate columns consolidated and arrival_date normalized.
     """
     api_key = os.environ.get('DATA_GOV_API_KEY',
                 "579b464db66ec23bdd00000140925613394847c57ae13db180760f06")
     base_url = "https://api.data.gov.in/resource/35985678-0d79-46b4-9ed6-6f13308a1d24"
-    # Use cache if present and not forcing refresh
-    if use_cache and not force_refresh and os.path.exists(cache_path):
-        try:
-            df_cache = pd.read_csv(cache_path)
-            print(f"[fetch_market_data] Loaded cache '{cache_path}' ({len(df_cache)} rows).")
-            dataframes_to_combine = [df_cache]
-            try:
-                df_csv = pd.read_csv("final_price_data.csv")
-                dataframes_to_combine.append(df_csv)
-            except FileNotFoundError:
-                pass
-            df_combined = pd.concat(dataframes_to_combine, ignore_index=True, sort=False)
-            # first, consolidate duplicate columns (if any)
-            df_combined = consolidate_duplicate_columns(df_combined)
-            cleaned = clean_and_standardize(df_combined)
-            if 'arrival_date' in cleaned.columns:
-                cleaned = cleaned.copy()
-                cleaned.loc[:, 'arrival_date'] = pd.to_datetime(
-                    cleaned['arrival_date'].astype(str).str.replace('\\/', '-', regex=True),
-                    dayfirst=True, errors='coerce'
-                )
-            return cleaned
-        except Exception as e:
-            print(f"[fetch_market_data] Failed reading cache: {e}. Will fetch live.")
-    # Background start support
-    if not synchronous:
-        t = threading.Thread(target=fetch_market_data, kwargs={
-            'state': state, 'district': district, 'cache_path': cache_path,
-            'use_cache': use_cache, 'force_refresh': force_refresh,
-            'sleep_between': sleep_between, 'page_size': page_size, 'synchronous': True
-        }, daemon=True)
-        t.start()
-        print("[fetch_market_data] Started background fetcher thread (single-request mode).")
-        return None
-    # Build params for single request
     params = {
         "api-key": api_key,
-        "format": "json"
     }
     if state:
         params["filters[State]"] = state
     if district:
         params["filters[District]"] = district
-    temp_fd, temp_file = tempfile.mkstemp(suffix='.csv')
-    os.close(temp_fd)
     try:
         try:
-            print(f"[fetch_market_data] Sending single request to API (may be large). Params: { {k:v for k,v in params.items() if k!='api-key'} }")
-            resp = requests.get(base_url, params=params, timeout=180)
-        except Exception as e:
-            print(f"[fetch_market_data] Network error on single request: {e}")
-            # fallback to local CSV if present
-            try:
-                df_csv = pd.read_csv("final_price_data.csv")
-                df_csv = consolidate_duplicate_columns(df_csv)
-                return clean_and_standardize(df_csv)
-            except FileNotFoundError:
-                return pd.DataFrame()
-        if resp.status_code != 200:
-            print(f"[fetch_market_data] API returned {resp.status_code}: {resp.text[:500]}")
-            try:
-                df_csv = pd.read_csv("final_price_data.csv")
-                df_csv = consolidate_duplicate_columns(df_csv)
-                return clean_and_standardize(df_csv)
-            except FileNotFoundError:
-                return pd.DataFrame()
         try:
-            data = resp.json()
-        except Exception as e:
-            print(f"[fetch_market_data] JSON decode error: {e}")
-            try:
-                df_csv = pd.read_csv("final_price_data.csv")
-                df_csv = consolidate_duplicate_columns(df_csv)
-                return clean_and_standardize(df_csv)
-            except FileNotFoundError:
-                return pd.DataFrame()
-        records = data.get("records", [])
-        if not records and isinstance(data, list):
-            records = data
-        if not records:
-            print("[fetch_market_data] No records returned by API in single response.")
-            try:
-                df_csv = pd.read_csv("final_price_data.csv")
-                df_csv = consolidate_duplicate_columns(df_csv)
-                return clean_and_standardize(df_csv)
-            except FileNotFoundError:
-                return pd.DataFrame()
-        df_api = pd.DataFrame.from_records(records)
-        # Consolidate duplicate columns immediately
-        df_api = consolidate_duplicate_columns(df_api)
-        # write cache atomically
         try:
-            df_api.to_csv(temp_file, index=False)
-            shutil.move(temp_file, cache_path)
-            print(f"[fetch_market_data] Single-request cache updated at '{cache_path}' ({len(df_api)} rows).")
-        except Exception as e:
-            print(f"[fetch_market_data] Failed to write cache atomically: {e}")
-            try:
-                df_api.to_csv(cache_path, index=False)
-            except Exception as e2:
-                print(f"[fetch_market_data] Fallback write also failed: {e2}")
-        # Merge with final_price_data.csv if exists
-        dataframes_to_combine = [df_api]
         try:
-            df_csv = pd.read_csv("final_price_data.csv")
-            df_csv = consolidate_duplicate_columns(df_csv)
-            dataframes_to_combine.append(df_csv)
-        except FileNotFoundError:
-            pass
-        df_combined = pd.concat(dataframes_to_combine, ignore_index=True, sort=False)
-        df_combined = consolidate_duplicate_columns(df_combined)
-        cleaned = clean_and_standardize(df_combined)
-        if 'arrival_date' in cleaned.columns:
-            cleaned = cleaned.copy()
-            cleaned.loc[:, 'arrival_date'] = pd.to_datetime(
-                cleaned['arrival_date'].astype(str).str.replace('\\/', '-', regex=True),
-                dayfirst=True, errors='coerce'
-            )
-        return cleaned
-    finally:
-        if os.path.exists(temp_file):
-            try:
-                os.remove(temp_file)
-            except Exception:
-                pass
 # Utility to flatten/clean numeric-like columns safely
 def flatten_column(df, col):
@@ -317,7 +302,7 @@ def get_ai_insights(market_data, state, district, language="English"):
     top_commodities_str = ", ".join([f"{name} (Avg: ₹{price:.2f})" for name, price in top_commodities.items()])
     prompt = f'Analyze agricultural market data for {district}, {state}. Top commodities: {top_commodities_str}. Provide a JSON object with keys "crop_profitability", "market_analysis", "farmer_recommendations", each with an array of insights in {language}.'
     try:
-        api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent"
         headers = {"Content-Type": "application/json"}
         payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"responseMimeType": "application/json"}}
         response = requests.post(f"{api_url}?key={api_key}", headers=headers, json=payload, timeout=25)
@@ -375,33 +360,6 @@ LOCATION_HIERARCHY = load_hierarchy_from_json()
 print("Location hierarchy loaded.")
 # --- Flask Routes ---
-@app.route('/refresh_cache', methods=['POST'])
-def refresh_cache():
-    state = request.form.get('state')
-    district = request.form.get('district')
-    def _bg():
-        try:
-            fetch_market_data(state=state, district=district, cache_path='agmarknet_cache.csv',
-                              use_cache=False, force_refresh=True, page_size=1000, synchronous=True)
-            print("[refresh_cache] Background refresh finished.")
-        except Exception as e:
-            print(f"[refresh_cache] Background refresh failed: {e}")
-    t = threading.Thread(target=_bg, daemon=True)
-    t.start()
-    return jsonify({'success': True, 'message': 'Background cache refresh started.'})
-@app.route('/download_full_sync', methods=['POST'])
-def download_full_sync():
-    state = request.form.get('state')
-    district = request.form.get('district')
-    df = fetch_market_data(state=state, district=district, cache_path='agmarknet_cache.csv',
-                           use_cache=False, force_refresh=True, page_size=1000, synchronous=True)
-    if df is None or df.empty:
-        return jsonify({'success': False, 'message': 'Download produced no data.'})
-    return jsonify({'success': True, 'message': f'Download complete. Cached {len(df)} rows.'})
 @app.route('/')
 def index():
     states = sorted(list(LOCATION_HIERARCHY.keys()))
@@ -439,18 +397,20 @@ def filter_data():
     if not state:
         return jsonify({'success': False, 'message': 'Please select a state.'})
-    df_combined = fetch_market_data(state, district)
     if df_combined is None or df_combined.empty:
         return jsonify({'success': False, 'message': 'No data found from API or local CSV.'})
     # Defensive copy
     df_filtered = df_combined.copy()
     if state:
         df_filtered = df_filtered[df_filtered['state'].str.lower() == state.lower()]
     if district:
         df_filtered = df_filtered[df_filtered['district'].str.lower() == district.lower()]
-    if market:
         df_filtered = df_filtered[df_filtered['market'].str.lower() == market.lower()]
     if commodity:
         df_filtered = df_filtered[df_filtered['commodity'].str.lower() == commodity.lower()]
@@ -498,4 +458,4 @@ def filter_data():
 if __name__ == '__main__':
     pio.templates.default = "plotly_white"
-    app.run(debug=True, host='0.0.0.0', port=7860)

 import plotly.express as px
 import plotly.io as pio
 import dotenv
 import numpy as np
+from datetime import datetime, timedelta
 dotenv.load_dotenv()
     df = df.copy()
     df.columns = df.columns.str.replace('_x0020_', '_', regex=False)
     df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)
+    df = df.drop(columns=['grade'])
     required_columns = [
         'state', 'district', 'market', 'commodity', 'variety',
+        'arrival_date', 'min_price', 'max_price', 'modal_price'
     ]
     existing_columns = [col for col in required_columns if col in df.columns]
     return df[existing_columns]
         print(f"CRITICAL ERROR: Could not load '{path}'. Error: {e}")
         return {}
+def get_last_year_date():
+    """Get date from one year ago in dd/MM/yyyy format"""
+    last_year = datetime.now() - timedelta(days=365)
+    return last_year.strftime("%d/%m/%Y")
+def fetch_market_data(state=None, district=None, market=None):
     """
+    Fetcher to use new API endpoint with arrival date filter and market filtering.
     Returns a cleaned DataFrame with duplicate columns consolidated and arrival_date normalized.
     """
     api_key = os.environ.get('DATA_GOV_API_KEY',
                 "579b464db66ec23bdd00000140925613394847c57ae13db180760f06")
     base_url = "https://api.data.gov.in/resource/35985678-0d79-46b4-9ed6-6f13308a1d24"
+    # Build params for API request with arrival date from last year
+    arrival_date = get_last_year_date()
     params = {
         "api-key": api_key,
+        "format": "json",
+        "limit": 1000,
+        "filters[Arrival_Date]": arrival_date
     }
     if state:
         params["filters[State]"] = state
     if district:
         params["filters[District]"] = district
     try:
+        print(f"[fetch_market_data] Sending request to API with arrival date: {arrival_date}. Params: {params}")
+        resp = requests.get(base_url, params=params, timeout=180)
+    except Exception as e:
+        print(f"[fetch_market_data] Network error on request: {e}")
+        # fallback to local CSV if present
         try:
+            if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
+                df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
+                if not df_csv.empty:
+                    df_csv = consolidate_duplicate_columns(df_csv)
+                    cleaned = clean_and_standardize(df_csv)
+                    if market and 'market' in cleaned.columns:
+                        cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
+                    return cleaned
+        except Exception as csv_err:
+            print(f"[fetch_market_data] Could not load final_price_data.csv: {csv_err}")
+            return pd.DataFrame()
+    if resp.status_code != 200:
+        print(f"[fetch_market_data] API returned {resp.status_code}: {resp.text[:500]}")
         try:
+            if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
+                df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
+                if not df_csv.empty:
+                    df_csv = consolidate_duplicate_columns(df_csv)
+                    cleaned = clean_and_standardize(df_csv)
+                    if market and 'market' in cleaned.columns:
+                        cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
+                    return cleaned
+        except Exception as csv_err:
+            print(f"[fetch_market_data] Could not load final_price_data.csv: {csv_err}")
+            return pd.DataFrame()
+    try:
+        data = resp.json()
+    except Exception as e:
+        print(f"[fetch_market_data] JSON decode error: {e}")
         try:
+            if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
+                df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
+                if not df_csv.empty:
+                    df_csv = consolidate_duplicate_columns(df_csv)
+                    cleaned = clean_and_standardize(df_csv)
+                    if market and 'market' in cleaned.columns:
+                        cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
+                    return cleaned
+        except Exception as csv_err:
+            print(f"[fetch_market_data] Could not load final_price_data.csv: {csv_err}")
+            return pd.DataFrame()
+    # Parse the new API response format
+    records = data.get("records", [])
+    if not records and isinstance(data, list):
+        records = data
+    if not records:
+        print("[fetch_market_data] No records returned by API in response.")
         try:
+            if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
+                df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
+                if not df_csv.empty:
+                    df_csv = consolidate_duplicate_columns(df_csv)
+                    cleaned = clean_and_standardize(df_csv)
+                    if market and 'market' in cleaned.columns:
+                        cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
+                    return cleaned
+        except Exception as csv_err:
+            print(f"[fetch_market_data] Could not load final_price_data.csv: {csv_err}")
+            return pd.DataFrame()
+    df_api = pd.DataFrame.from_records(records)
+    # Filter by market if specified (from the API response)
+    if market and 'Market' in df_api.columns:
+        df_api = df_api[df_api['Market'].str.lower() == market.lower()]
+    # Consolidate duplicate columns immediately
+    df_api = consolidate_duplicate_columns(df_api)
+    print(f"[fetch_market_data] Retrieved {len(df_api)} rows from API.")
+    # Merge with final_price_data.csv if exists
+    dataframes_to_combine = [df_api]
+    try:
+        if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
+            df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
+            if not df_csv.empty:
+                df_csv = consolidate_duplicate_columns(df_csv)
+                dataframes_to_combine.append(df_csv)
+    except Exception as csv_err:
+        print(f"[fetch_market_data] Could not load final_price_data.csv for merging: {csv_err}")
+    df_combined = pd.concat(dataframes_to_combine, ignore_index=True, sort=False)
+    df_combined = consolidate_duplicate_columns(df_combined)
+    cleaned = clean_and_standardize(df_combined)
+    if 'arrival_date' in cleaned.columns:
+        cleaned = cleaned.copy()
+        cleaned.loc[:, 'arrival_date'] = pd.to_datetime(
+            cleaned['arrival_date'].astype(str).str.replace('\\/', '-', regex=True),
+            dayfirst=True, errors='coerce'
+        )
+    # Additional market filtering after standardization
+    if market and 'market' in cleaned.columns:
+        cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
+    return cleaned
 # Utility to flatten/clean numeric-like columns safely
 def flatten_column(df, col):
     top_commodities_str = ", ".join([f"{name} (Avg: ₹{price:.2f})" for name, price in top_commodities.items()])
     prompt = f'Analyze agricultural market data for {district}, {state}. Top commodities: {top_commodities_str}. Provide a JSON object with keys "crop_profitability", "market_analysis", "farmer_recommendations", each with an array of insights in {language}.'
     try:
+        api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent"
         headers = {"Content-Type": "application/json"}
         payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"responseMimeType": "application/json"}}
         response = requests.post(f"{api_url}?key={api_key}", headers=headers, json=payload, timeout=25)
 print("Location hierarchy loaded.")
 # --- Flask Routes ---
 @app.route('/')
 def index():
     states = sorted(list(LOCATION_HIERARCHY.keys()))
     if not state:
         return jsonify({'success': False, 'message': 'Please select a state.'})
+    # Pass market parameter to fetch_market_data for API filtering
+    df_combined = fetch_market_data(state, district, market)
     if df_combined is None or df_combined.empty:
         return jsonify({'success': False, 'message': 'No data found from API or local CSV.'})
     # Defensive copy
     df_filtered = df_combined.copy()
+    # Additional frontend filtering (in case not filtered by API)
     if state:
         df_filtered = df_filtered[df_filtered['state'].str.lower() == state.lower()]
     if district:
         df_filtered = df_filtered[df_filtered['district'].str.lower() == district.lower()]
+    if market and 'market' in df_filtered.columns:
         df_filtered = df_filtered[df_filtered['market'].str.lower() == market.lower()]
     if commodity:
         df_filtered = df_filtered[df_filtered['commodity'].str.lower() == commodity.lower()]
 if __name__ == '__main__':
     pio.templates.default = "plotly_white"
+    app.run(debug=True, host='0.0.0.0', port=7860)