Spaces:

RushiMane2003
/

market_trial

Sleeping

App Files Files Community

RushiMane2003 commited on Sep 24

Commit

dfda7e2

verified ·

1 Parent(s): 5f55587

Upload 6 files

Browse files

Files changed (6) hide show

Dockerfile +17 -0
agmarknet_cache.csv +11 -0
app.py +501 -0
final_price_data.csv +0 -0
location_hierarchy.json +0 -0
requirements.txt +8 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+# Base image
+FROM python:3.9-slim
+# Set the working directory
+WORKDIR /app
+# Copy application files
+COPY . /app
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Expose the port your app runs on
+EXPOSE 7860
+# Command to run the application
+CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:7860", "app:app"]

agmarknet_cache.csv ADDED Viewed

	@@ -0,0 +1,11 @@

+State,District,Market,Commodity,Variety,Grade,Arrival_Date,Min_Price,Max_Price,Modal_Price,Commodity_Code
+Goa,South Goa,Curchorem,Cashewnuts,Local(Raw),FAQ,15/05/2010,5400,5400,5400,36
+Goa,South Goa,Ponda,Cashewnuts,Kernel,FAQ,18/06/2010,5300,0,5300,36
+Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,22/02/2010,5800,5800,5800,36
+Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,11/03/2010,5650,5650,5650,36
+Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,20/03/2010,5720,5720,5720,36
+Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,22/03/2010,5800,5800,5800,36
+Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,23/03/2010,5800,5800,5800,36
+Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,24/03/2010,5800,5800,5800,36
+Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,01/04/2010,5900,5900,5900,36
+Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,09/04/2010,5950,5950,5950,36

app.py ADDED Viewed

	@@ -0,0 +1,501 @@

+import os
+import json
+from collections import Counter
+from flask import Flask, render_template, request, jsonify
+import requests
+import pandas as pd
+import plotly.express as px
+import plotly.io as pio
+import dotenv
+import threading
+import tempfile
+import shutil
+import numpy as np
+dotenv.load_dotenv()
+# --- Helper Functions ---
+def clean_and_standardize(df):
+    """Normalize column names and keep only required columns (if present)."""
+    # defensive copy
+    df = df.copy()
+    df.columns = df.columns.str.replace('_x0020_', '_', regex=False)
+    df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)
+    required_columns = [
+        'state', 'district', 'market', 'commodity', 'variety',
+        'grade', 'arrival_date', 'min_price', 'max_price', 'modal_price'
+    ]
+    existing_columns = [col for col in required_columns if col in df.columns]
+    return df[existing_columns]
+def consolidate_duplicate_columns(df):
+    """
+    If the dataframe has duplicate column names (e.g. 'modal_price' twice),
+    consolidate them into a single column by taking the first non-null
+    value across the duplicates (left-to-right).
+    Returns a new DataFrame with unique column names.
+    """
+    if df is None:
+        return df
+    df = df.copy()
+    cols = list(df.columns)
+    counts = Counter(cols)
+    # iterate over names with count > 1
+    for name, cnt in list(counts.items()):
+        if cnt > 1:
+            # indices of columns with this name
+            idxs = [i for i, c in enumerate(cols) if c == name]
+            sub = df.iloc[:, idxs]
+            # First non-null per-row (left-most). bfill along columns and pick first col.
+            try:
+                first_non_null = sub.bfill(axis=1).iloc[:, 0]
+            except Exception:
+                # fallback: apply row-wise
+                first_non_null = sub.apply(lambda row: next((v for v in row if pd.notna(v)), None), axis=1)
+            # Drop all old duplicate columns
+            df = df.drop(columns=sub.columns)
+            # Assign consolidated column (use .loc to avoid SettingWithCopyWarning)
+            df.loc[:, name] = first_non_null
+            # refresh cols list & counts
+            cols = list(df.columns)
+            counts = Counter(cols)
+    return df
+def load_hierarchy_from_json(path='location_hierarchy.json'):
+    try:
+        with open(path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+    except Exception as e:
+        print(f"CRITICAL ERROR: Could not load '{path}'. Error: {e}")
+        return {}
+def fetch_market_data(state=None, district=None,
+                      cache_path='agmarknet_cache.csv',
+                      use_cache=True, force_refresh=False,
+                      sleep_between=0.15, page_size=1000,
+                      synchronous=True):
+    """
+    Single-request fetcher (the API returns the full dataset in one response).
+    Returns a cleaned DataFrame with duplicate columns consolidated and arrival_date normalized.
+    """
+    api_key = os.environ.get('DATA_GOV_API_KEY',
+                "579b464db66ec23bdd00000140925613394847c57ae13db180760f06")
+    base_url = "https://api.data.gov.in/resource/35985678-0d79-46b4-9ed6-6f13308a1d24"
+    # Use cache if present and not forcing refresh
+    if use_cache and not force_refresh and os.path.exists(cache_path):
+        try:
+            df_cache = pd.read_csv(cache_path)
+            print(f"[fetch_market_data] Loaded cache '{cache_path}' ({len(df_cache)} rows).")
+            dataframes_to_combine = [df_cache]
+            try:
+                df_csv = pd.read_csv("final_price_data.csv")
+                dataframes_to_combine.append(df_csv)
+            except FileNotFoundError:
+                pass
+            df_combined = pd.concat(dataframes_to_combine, ignore_index=True, sort=False)
+            # first, consolidate duplicate columns (if any)
+            df_combined = consolidate_duplicate_columns(df_combined)
+            cleaned = clean_and_standardize(df_combined)
+            if 'arrival_date' in cleaned.columns:
+                cleaned = cleaned.copy()
+                cleaned.loc[:, 'arrival_date'] = pd.to_datetime(
+                    cleaned['arrival_date'].astype(str).str.replace('\\/', '-', regex=True),
+                    dayfirst=True, errors='coerce'
+                )
+            return cleaned
+        except Exception as e:
+            print(f"[fetch_market_data] Failed reading cache: {e}. Will fetch live.")
+    # Background start support
+    if not synchronous:
+        t = threading.Thread(target=fetch_market_data, kwargs={
+            'state': state, 'district': district, 'cache_path': cache_path,
+            'use_cache': use_cache, 'force_refresh': force_refresh,
+            'sleep_between': sleep_between, 'page_size': page_size, 'synchronous': True
+        }, daemon=True)
+        t.start()
+        print("[fetch_market_data] Started background fetcher thread (single-request mode).")
+        return None
+    # Build params for single request
+    params = {
+        "api-key": api_key,
+        "format": "json"
+    }
+    if state:
+        params["filters[State]"] = state
+    if district:
+        params["filters[District]"] = district
+    temp_fd, temp_file = tempfile.mkstemp(suffix='.csv')
+    os.close(temp_fd)
+    try:
+        try:
+            print(f"[fetch_market_data] Sending single request to API (may be large). Params: { {k:v for k,v in params.items() if k!='api-key'} }")
+            resp = requests.get(base_url, params=params, timeout=180)
+        except Exception as e:
+            print(f"[fetch_market_data] Network error on single request: {e}")
+            # fallback to local CSV if present
+            try:
+                df_csv = pd.read_csv("final_price_data.csv")
+                df_csv = consolidate_duplicate_columns(df_csv)
+                return clean_and_standardize(df_csv)
+            except FileNotFoundError:
+                return pd.DataFrame()
+        if resp.status_code != 200:
+            print(f"[fetch_market_data] API returned {resp.status_code}: {resp.text[:500]}")
+            try:
+                df_csv = pd.read_csv("final_price_data.csv")
+                df_csv = consolidate_duplicate_columns(df_csv)
+                return clean_and_standardize(df_csv)
+            except FileNotFoundError:
+                return pd.DataFrame()
+        try:
+            data = resp.json()
+        except Exception as e:
+            print(f"[fetch_market_data] JSON decode error: {e}")
+            try:
+                df_csv = pd.read_csv("final_price_data.csv")
+                df_csv = consolidate_duplicate_columns(df_csv)
+                return clean_and_standardize(df_csv)
+            except FileNotFoundError:
+                return pd.DataFrame()
+        records = data.get("records", [])
+        if not records and isinstance(data, list):
+            records = data
+        if not records:
+            print("[fetch_market_data] No records returned by API in single response.")
+            try:
+                df_csv = pd.read_csv("final_price_data.csv")
+                df_csv = consolidate_duplicate_columns(df_csv)
+                return clean_and_standardize(df_csv)
+            except FileNotFoundError:
+                return pd.DataFrame()
+        df_api = pd.DataFrame.from_records(records)
+        # Consolidate duplicate columns immediately
+        df_api = consolidate_duplicate_columns(df_api)
+        # write cache atomically
+        try:
+            df_api.to_csv(temp_file, index=False)
+            shutil.move(temp_file, cache_path)
+            print(f"[fetch_market_data] Single-request cache updated at '{cache_path}' ({len(df_api)} rows).")
+        except Exception as e:
+            print(f"[fetch_market_data] Failed to write cache atomically: {e}")
+            try:
+                df_api.to_csv(cache_path, index=False)
+            except Exception as e2:
+                print(f"[fetch_market_data] Fallback write also failed: {e2}")
+        # Merge with final_price_data.csv if exists
+        dataframes_to_combine = [df_api]
+        try:
+            df_csv = pd.read_csv("final_price_data.csv")
+            df_csv = consolidate_duplicate_columns(df_csv)
+            dataframes_to_combine.append(df_csv)
+        except FileNotFoundError:
+            pass
+        df_combined = pd.concat(dataframes_to_combine, ignore_index=True, sort=False)
+        df_combined = consolidate_duplicate_columns(df_combined)
+        cleaned = clean_and_standardize(df_combined)
+        if 'arrival_date' in cleaned.columns:
+            cleaned = cleaned.copy()
+            cleaned.loc[:, 'arrival_date'] = pd.to_datetime(
+                cleaned['arrival_date'].astype(str).str.replace('\\/', '-', regex=True),
+                dayfirst=True, errors='coerce'
+            )
+        return cleaned
+    finally:
+        if os.path.exists(temp_file):
+            try:
+                os.remove(temp_file)
+            except Exception:
+                pass
+# Utility to flatten/clean numeric-like columns safely
+def flatten_column(df, col):
+    """
+    Ensure df[col] becomes a 1-D numeric Series:
+    - handles when df[col] is a DataFrame (duplicate names),
+    - handles lists/tuples inside cells,
+    - coerces to numeric with errors='coerce'.
+    Returns the modified DataFrame (a copy).
+    """
+    if df is None or col not in df.columns:
+        return df
+    df = df.copy()
+    series = df[col]
+    # If it's accidentally a DataFrame slice (multiple columns with same label),
+    if isinstance(series, pd.DataFrame):
+        # take left-most non-null per row across those columns
+        try:
+            series = series.bfill(axis=1).iloc[:, 0]
+        except Exception:
+            series = series.apply(lambda row: next((v for v in row if pd.notna(v)), None), axis=1)
+    # Flatten lists/tuples: take first non-null element if present
+    def _first_scalar(x):
+        if x is None:
+            return None
+        # numpy nan
+        try:
+            if isinstance(x, float) and np.isnan(x):
+                return None
+        except Exception:
+            pass
+        if isinstance(x, (int, float, str, np.integer, np.floating, np.str_)):
+            return x
+        if isinstance(x, (list, tuple, set)):
+            for item in x:
+                if item is None:
+                    continue
+                try:
+                    if isinstance(item, float) and np.isnan(item):
+                        continue
+                except Exception:
+                    pass
+                if isinstance(item, (list, tuple, set)):
+                    for sub in item:
+                        if sub is not None:
+                            return sub
+                    continue
+                if isinstance(item, dict):
+                    # try to find a numeric-like key
+                    for k in ('value', 'price', 'modal_price', '0'):
+                        if k in item:
+                            return item[k]
+                    vals = list(item.values())
+                    if vals:
+                        return vals[0]
+                    continue
+                return item
+            return None
+        if isinstance(x, dict):
+            for k in ('value', 'price', 'modal_price', 'modalPrice', '0'):
+                if k in x:
+                    return x[k]
+            vals = list(x.values())
+            if vals:
+                return vals[0]
+            return None
+        # fallback to string
+        try:
+            return str(x)
+        except Exception:
+            return None
+    series = series.apply(_first_scalar)
+    series = pd.to_numeric(series, errors='coerce')
+    # assign back using .loc to avoid SettingWithCopyWarning
+    df.loc[:, col] = series
+    return df
+# AI insights (unchanged logic but using safer flatten)
+def get_ai_insights(market_data, state, district, language="English"):
+    if not state or not district or market_data is None or market_data.empty:
+        return ""
+    api_key = os.environ.get('GEMINI_API_KEY')
+    if not api_key:
+        return "<p>AI insights unavailable.</p>"
+    market_data = flatten_column(market_data, 'modal_price')
+    if 'modal_price' not in market_data.columns:
+        return "<p>AI insights unavailable.</p>"
+    # safe grouping even if some modal_price are NaN
+    top_commodities = market_data.groupby('commodity', dropna=True)['modal_price'].mean().nlargest(5)
+    top_commodities_str = ", ".join([f"{name} (Avg: ₹{price:.2f})" for name, price in top_commodities.items()])
+    prompt = f'Analyze agricultural market data for {district}, {state}. Top commodities: {top_commodities_str}. Provide a JSON object with keys "crop_profitability", "market_analysis", "farmer_recommendations", each with an array of insights in {language}.'
+    try:
+        api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent"
+        headers = {"Content-Type": "application/json"}
+        payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"responseMimeType": "application/json"}}
+        response = requests.post(f"{api_url}?key={api_key}", headers=headers, json=payload, timeout=25)
+        if response.status_code == 200:
+            insights_json = json.loads(response.json()['candidates'][0]['content']['parts'][0]['text'])
+            return format_ai_insights(insights_json)
+        return f"<p>Error from AI model: {response.status_code}</p>"
+    except Exception as e:
+        print(f"Error generating insights: {e}")
+        return "<p>Error generating AI insights.</p>"
+def format_ai_insights(insights_json):
+    if not isinstance(insights_json, dict):
+        return ""
+    titles = {"crop_profitability": "Crop Profitability", "market_analysis": "Market Analysis", "farmer_recommendations": "Recommendations"}
+    html = '<div class="insights-header"><h3>AI Market Insights</h3></div>'
+    for key, items in insights_json.items():
+        title = titles.get(key, key.replace('_', ' ').title())
+        html += f'<div class="insight-card"><h5>{title}</h5>'
+        if items:
+            html += '<ul class="insight-list">' + ''.join(f'<li>{item}</li>' for item in items) + '</ul>'
+        html += '</div>'
+    return html
+def generate_plots(df):
+    plots = {}
+    if df is None or df.empty or 'modal_price' not in df.columns:
+        return plots
+    # Work on a copy
+    df = df.copy()
+    for col in ['min_price', 'max_price', 'modal_price']:
+        df = flatten_column(df, col)
+    df.dropna(subset=['modal_price', 'commodity'], inplace=True)
+    if df.empty:
+        return plots
+    # build bar and box plots
+    df_bar = df.groupby('commodity', dropna=True)['modal_price'].mean().reset_index()
+    try:
+        fig_bar = px.bar(df_bar, x='commodity', y='modal_price', title="Average Price by Commodity")
+        plots['bar'] = pio.to_html(fig_bar, full_html=False)
+        fig_box = px.box(df, x='commodity', y='modal_price', title="Price Distribution")
+        plots['box'] = pio.to_html(fig_box, full_html=False)
+    except Exception as e:
+        print(f"[generate_plots] Plotly error: {e}")
+    return plots
+# --- Flask App Initialization ---
+app = Flask(__name__)
+print("Loading location hierarchy from JSON file...")
+LOCATION_HIERARCHY = load_hierarchy_from_json()
+print("Location hierarchy loaded.")
+# --- Flask Routes ---
+@app.route('/refresh_cache', methods=['POST'])
+def refresh_cache():
+    state = request.form.get('state')
+    district = request.form.get('district')
+    def _bg():
+        try:
+            fetch_market_data(state=state, district=district, cache_path='agmarknet_cache.csv',
+                              use_cache=False, force_refresh=True, page_size=1000, synchronous=True)
+            print("[refresh_cache] Background refresh finished.")
+        except Exception as e:
+            print(f"[refresh_cache] Background refresh failed: {e}")
+    t = threading.Thread(target=_bg, daemon=True)
+    t.start()
+    return jsonify({'success': True, 'message': 'Background cache refresh started.'})
+@app.route('/download_full_sync', methods=['POST'])
+def download_full_sync():
+    state = request.form.get('state')
+    district = request.form.get('district')
+    df = fetch_market_data(state=state, district=district, cache_path='agmarknet_cache.csv',
+                           use_cache=False, force_refresh=True, page_size=1000, synchronous=True)
+    if df is None or df.empty:
+        return jsonify({'success': False, 'message': 'Download produced no data.'})
+    return jsonify({'success': True, 'message': f'Download complete. Cached {len(df)} rows.'})
+@app.route('/')
+def index():
+    states = sorted(list(LOCATION_HIERARCHY.keys()))
+    return render_template('index.html', states=states)
+@app.route('/get_districts', methods=['POST'])
+def get_districts():
+    state = request.form.get('state')
+    districts = sorted(list(LOCATION_HIERARCHY.get(state, {}).keys()))
+    return jsonify(districts)
+@app.route('/get_markets', methods=['POST'])
+def get_markets():
+    state = request.form.get('state')
+    district = request.form.get('district')
+    markets = sorted(list(LOCATION_HIERARCHY.get(state, {}).get(district, {}).keys()))
+    return jsonify(markets)
+@app.route('/get_commodities', methods=['POST'])
+def get_commodities():
+    state = request.form.get('state')
+    district = request.form.get('district')
+    market = request.form.get('market')
+    commodities = LOCATION_HIERARCHY.get(state, {}).get(district, {}).get(market, [])
+    return jsonify(commodities)
+@app.route('/filter_data', methods=['POST'])
+def filter_data():
+    state = request.form.get('state')
+    district = request.form.get('district')
+    market = request.form.get('market')
+    commodity = request.form.get('commodity')
+    language = request.form.get('language', 'English')
+    if not state:
+        return jsonify({'success': False, 'message': 'Please select a state.'})
+    df_combined = fetch_market_data(state, district)
+    if df_combined is None or df_combined.empty:
+        return jsonify({'success': False, 'message': 'No data found from API or local CSV.'})
+    # Defensive copy
+    df_filtered = df_combined.copy()
+    if state:
+        df_filtered = df_filtered[df_filtered['state'].str.lower() == state.lower()]
+    if district:
+        df_filtered = df_filtered[df_filtered['district'].str.lower() == district.lower()]
+    if market:
+        df_filtered = df_filtered[df_filtered['market'].str.lower() == market.lower()]
+    if commodity:
+        df_filtered = df_filtered[df_filtered['commodity'].str.lower() == commodity.lower()]
+    if df_filtered.empty:
+        return jsonify({'success': False, 'message': 'No records match the specific criteria.'})
+    deduplication_keys = ['state', 'district', 'market', 'commodity', 'arrival_date']
+    df_final = df_filtered.drop_duplicates(subset=deduplication_keys, keep='first').copy()
+    print(f"Final records to be processed: {len(df_final)}")
+    # Consolidate duplicate columns just in case (extra safety)
+    df_final = consolidate_duplicate_columns(df_final)
+    # Ensure price columns are numeric
+    for col in ['min_price', 'max_price', 'modal_price']:
+        df_final = flatten_column(df_final, col)
+    plots = generate_plots(df_final.copy())
+    insights = get_ai_insights(df_final.copy(), state, district, language)
+    # After cleaning, check for valid price column
+    if df_final.empty or 'modal_price' not in df_final.columns or df_final['modal_price'].dropna().empty:
+        return jsonify({'success': False, 'message': 'No valid price data after cleaning.'})
+    cheapest = df_final.nsmallest(5, 'modal_price')[['commodity', 'market', 'modal_price']]
+    costliest = df_final.nlargest(5, 'modal_price')[['commodity', 'market', 'modal_price']]
+    market_stats = {
+        'total_commodities': int(df_final['commodity'].nunique()),
+        'avg_modal_price': f"₹{df_final['modal_price'].mean():.2f}",
+        'price_range': f"₹{df_final['modal_price'].min():.2f} - ₹{df_final['modal_price'].max():.2f}",
+        'total_markets': int(df_final['market'].nunique())
+    }
+    return jsonify({
+        'success': True,
+        'plots': plots,
+        'insights': insights,
+        'market_html': df_final.to_html(classes="table table-striped table-sm", index=False),
+        'cheapest_html': cheapest.to_html(classes="table table-sm", index=False),
+        'costliest_html': costliest.to_html(classes="table table-sm", index=False),
+        'market_stats': market_stats
+    })
+if __name__ == '__main__':
+    pio.templates.default = "plotly_white"
+    app.run(debug=True, host='0.0.0.0', port=7860)

final_price_data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

location_hierarchy.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+flask
+gunicorn
+requests
+pandas
+numpy
+plotly
+googletrans
+python-dotenv