RushiMane2003 commited on
Commit
dfda7e2
·
verified ·
1 Parent(s): 5f55587

Upload 6 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image
2
+ FROM python:3.9-slim
3
+
4
+ # Set the working directory
5
+ WORKDIR /app
6
+
7
+ # Copy application files
8
+ COPY . /app
9
+
10
+ # Install dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Expose the port your app runs on
14
+ EXPOSE 7860
15
+
16
+ # Command to run the application
17
+ CMD ["gunicorn", "-w", "4", "-b", "0.0.0.0:7860", "app:app"]
agmarknet_cache.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ State,District,Market,Commodity,Variety,Grade,Arrival_Date,Min_Price,Max_Price,Modal_Price,Commodity_Code
2
+ Goa,South Goa,Curchorem,Cashewnuts,Local(Raw),FAQ,15/05/2010,5400,5400,5400,36
3
+ Goa,South Goa,Ponda,Cashewnuts,Kernel,FAQ,18/06/2010,5300,0,5300,36
4
+ Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,22/02/2010,5800,5800,5800,36
5
+ Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,11/03/2010,5650,5650,5650,36
6
+ Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,20/03/2010,5720,5720,5720,36
7
+ Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,22/03/2010,5800,5800,5800,36
8
+ Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,23/03/2010,5800,5800,5800,36
9
+ Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,24/03/2010,5800,5800,5800,36
10
+ Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,01/04/2010,5900,5900,5900,36
11
+ Goa,South Goa,Ponda,Cashewnuts,Local(Raw),FAQ,09/04/2010,5950,5950,5950,36
app.py ADDED
@@ -0,0 +1,501 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from collections import Counter
4
+ from flask import Flask, render_template, request, jsonify
5
+ import requests
6
+ import pandas as pd
7
+ import plotly.express as px
8
+ import plotly.io as pio
9
+ import dotenv
10
+ import threading
11
+ import tempfile
12
+ import shutil
13
+ import numpy as np
14
+
15
+ dotenv.load_dotenv()
16
+
17
+ # --- Helper Functions ---
18
+
19
+ def clean_and_standardize(df):
20
+ """Normalize column names and keep only required columns (if present)."""
21
+ # defensive copy
22
+ df = df.copy()
23
+ df.columns = df.columns.str.replace('_x0020_', '_', regex=False)
24
+ df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)
25
+ required_columns = [
26
+ 'state', 'district', 'market', 'commodity', 'variety',
27
+ 'grade', 'arrival_date', 'min_price', 'max_price', 'modal_price'
28
+ ]
29
+ existing_columns = [col for col in required_columns if col in df.columns]
30
+ return df[existing_columns]
31
+
32
+ def consolidate_duplicate_columns(df):
33
+ """
34
+ If the dataframe has duplicate column names (e.g. 'modal_price' twice),
35
+ consolidate them into a single column by taking the first non-null
36
+ value across the duplicates (left-to-right).
37
+ Returns a new DataFrame with unique column names.
38
+ """
39
+ if df is None:
40
+ return df
41
+ df = df.copy()
42
+ cols = list(df.columns)
43
+ counts = Counter(cols)
44
+ # iterate over names with count > 1
45
+ for name, cnt in list(counts.items()):
46
+ if cnt > 1:
47
+ # indices of columns with this name
48
+ idxs = [i for i, c in enumerate(cols) if c == name]
49
+ sub = df.iloc[:, idxs]
50
+ # First non-null per-row (left-most). bfill along columns and pick first col.
51
+ try:
52
+ first_non_null = sub.bfill(axis=1).iloc[:, 0]
53
+ except Exception:
54
+ # fallback: apply row-wise
55
+ first_non_null = sub.apply(lambda row: next((v for v in row if pd.notna(v)), None), axis=1)
56
+ # Drop all old duplicate columns
57
+ df = df.drop(columns=sub.columns)
58
+ # Assign consolidated column (use .loc to avoid SettingWithCopyWarning)
59
+ df.loc[:, name] = first_non_null
60
+ # refresh cols list & counts
61
+ cols = list(df.columns)
62
+ counts = Counter(cols)
63
+ return df
64
+
65
+ def load_hierarchy_from_json(path='location_hierarchy.json'):
66
+ try:
67
+ with open(path, 'r', encoding='utf-8') as f:
68
+ return json.load(f)
69
+ except Exception as e:
70
+ print(f"CRITICAL ERROR: Could not load '{path}'. Error: {e}")
71
+ return {}
72
+
73
+ def fetch_market_data(state=None, district=None,
74
+ cache_path='agmarknet_cache.csv',
75
+ use_cache=True, force_refresh=False,
76
+ sleep_between=0.15, page_size=1000,
77
+ synchronous=True):
78
+ """
79
+ Single-request fetcher (the API returns the full dataset in one response).
80
+ Returns a cleaned DataFrame with duplicate columns consolidated and arrival_date normalized.
81
+ """
82
+ api_key = os.environ.get('DATA_GOV_API_KEY',
83
+ "579b464db66ec23bdd00000140925613394847c57ae13db180760f06")
84
+ base_url = "https://api.data.gov.in/resource/35985678-0d79-46b4-9ed6-6f13308a1d24"
85
+
86
+ # Use cache if present and not forcing refresh
87
+ if use_cache and not force_refresh and os.path.exists(cache_path):
88
+ try:
89
+ df_cache = pd.read_csv(cache_path)
90
+ print(f"[fetch_market_data] Loaded cache '{cache_path}' ({len(df_cache)} rows).")
91
+ dataframes_to_combine = [df_cache]
92
+ try:
93
+ df_csv = pd.read_csv("final_price_data.csv")
94
+ dataframes_to_combine.append(df_csv)
95
+ except FileNotFoundError:
96
+ pass
97
+ df_combined = pd.concat(dataframes_to_combine, ignore_index=True, sort=False)
98
+ # first, consolidate duplicate columns (if any)
99
+ df_combined = consolidate_duplicate_columns(df_combined)
100
+ cleaned = clean_and_standardize(df_combined)
101
+ if 'arrival_date' in cleaned.columns:
102
+ cleaned = cleaned.copy()
103
+ cleaned.loc[:, 'arrival_date'] = pd.to_datetime(
104
+ cleaned['arrival_date'].astype(str).str.replace('\\/', '-', regex=True),
105
+ dayfirst=True, errors='coerce'
106
+ )
107
+ return cleaned
108
+ except Exception as e:
109
+ print(f"[fetch_market_data] Failed reading cache: {e}. Will fetch live.")
110
+
111
+ # Background start support
112
+ if not synchronous:
113
+ t = threading.Thread(target=fetch_market_data, kwargs={
114
+ 'state': state, 'district': district, 'cache_path': cache_path,
115
+ 'use_cache': use_cache, 'force_refresh': force_refresh,
116
+ 'sleep_between': sleep_between, 'page_size': page_size, 'synchronous': True
117
+ }, daemon=True)
118
+ t.start()
119
+ print("[fetch_market_data] Started background fetcher thread (single-request mode).")
120
+ return None
121
+
122
+ # Build params for single request
123
+ params = {
124
+ "api-key": api_key,
125
+ "format": "json"
126
+ }
127
+ if state:
128
+ params["filters[State]"] = state
129
+ if district:
130
+ params["filters[District]"] = district
131
+
132
+ temp_fd, temp_file = tempfile.mkstemp(suffix='.csv')
133
+ os.close(temp_fd)
134
+ try:
135
+ try:
136
+ print(f"[fetch_market_data] Sending single request to API (may be large). Params: { {k:v for k,v in params.items() if k!='api-key'} }")
137
+ resp = requests.get(base_url, params=params, timeout=180)
138
+ except Exception as e:
139
+ print(f"[fetch_market_data] Network error on single request: {e}")
140
+ # fallback to local CSV if present
141
+ try:
142
+ df_csv = pd.read_csv("final_price_data.csv")
143
+ df_csv = consolidate_duplicate_columns(df_csv)
144
+ return clean_and_standardize(df_csv)
145
+ except FileNotFoundError:
146
+ return pd.DataFrame()
147
+
148
+ if resp.status_code != 200:
149
+ print(f"[fetch_market_data] API returned {resp.status_code}: {resp.text[:500]}")
150
+ try:
151
+ df_csv = pd.read_csv("final_price_data.csv")
152
+ df_csv = consolidate_duplicate_columns(df_csv)
153
+ return clean_and_standardize(df_csv)
154
+ except FileNotFoundError:
155
+ return pd.DataFrame()
156
+
157
+ try:
158
+ data = resp.json()
159
+ except Exception as e:
160
+ print(f"[fetch_market_data] JSON decode error: {e}")
161
+ try:
162
+ df_csv = pd.read_csv("final_price_data.csv")
163
+ df_csv = consolidate_duplicate_columns(df_csv)
164
+ return clean_and_standardize(df_csv)
165
+ except FileNotFoundError:
166
+ return pd.DataFrame()
167
+
168
+ records = data.get("records", [])
169
+ if not records and isinstance(data, list):
170
+ records = data
171
+
172
+ if not records:
173
+ print("[fetch_market_data] No records returned by API in single response.")
174
+ try:
175
+ df_csv = pd.read_csv("final_price_data.csv")
176
+ df_csv = consolidate_duplicate_columns(df_csv)
177
+ return clean_and_standardize(df_csv)
178
+ except FileNotFoundError:
179
+ return pd.DataFrame()
180
+
181
+ df_api = pd.DataFrame.from_records(records)
182
+ # Consolidate duplicate columns immediately
183
+ df_api = consolidate_duplicate_columns(df_api)
184
+
185
+ # write cache atomically
186
+ try:
187
+ df_api.to_csv(temp_file, index=False)
188
+ shutil.move(temp_file, cache_path)
189
+ print(f"[fetch_market_data] Single-request cache updated at '{cache_path}' ({len(df_api)} rows).")
190
+ except Exception as e:
191
+ print(f"[fetch_market_data] Failed to write cache atomically: {e}")
192
+ try:
193
+ df_api.to_csv(cache_path, index=False)
194
+ except Exception as e2:
195
+ print(f"[fetch_market_data] Fallback write also failed: {e2}")
196
+
197
+ # Merge with final_price_data.csv if exists
198
+ dataframes_to_combine = [df_api]
199
+ try:
200
+ df_csv = pd.read_csv("final_price_data.csv")
201
+ df_csv = consolidate_duplicate_columns(df_csv)
202
+ dataframes_to_combine.append(df_csv)
203
+ except FileNotFoundError:
204
+ pass
205
+
206
+ df_combined = pd.concat(dataframes_to_combine, ignore_index=True, sort=False)
207
+ df_combined = consolidate_duplicate_columns(df_combined)
208
+ cleaned = clean_and_standardize(df_combined)
209
+ if 'arrival_date' in cleaned.columns:
210
+ cleaned = cleaned.copy()
211
+ cleaned.loc[:, 'arrival_date'] = pd.to_datetime(
212
+ cleaned['arrival_date'].astype(str).str.replace('\\/', '-', regex=True),
213
+ dayfirst=True, errors='coerce'
214
+ )
215
+ return cleaned
216
+
217
+ finally:
218
+ if os.path.exists(temp_file):
219
+ try:
220
+ os.remove(temp_file)
221
+ except Exception:
222
+ pass
223
+
224
+ # Utility to flatten/clean numeric-like columns safely
225
+ def flatten_column(df, col):
226
+ """
227
+ Ensure df[col] becomes a 1-D numeric Series:
228
+ - handles when df[col] is a DataFrame (duplicate names),
229
+ - handles lists/tuples inside cells,
230
+ - coerces to numeric with errors='coerce'.
231
+ Returns the modified DataFrame (a copy).
232
+ """
233
+ if df is None or col not in df.columns:
234
+ return df
235
+ df = df.copy()
236
+ series = df[col]
237
+
238
+ # If it's accidentally a DataFrame slice (multiple columns with same label),
239
+ if isinstance(series, pd.DataFrame):
240
+ # take left-most non-null per row across those columns
241
+ try:
242
+ series = series.bfill(axis=1).iloc[:, 0]
243
+ except Exception:
244
+ series = series.apply(lambda row: next((v for v in row if pd.notna(v)), None), axis=1)
245
+
246
+ # Flatten lists/tuples: take first non-null element if present
247
+ def _first_scalar(x):
248
+ if x is None:
249
+ return None
250
+ # numpy nan
251
+ try:
252
+ if isinstance(x, float) and np.isnan(x):
253
+ return None
254
+ except Exception:
255
+ pass
256
+ if isinstance(x, (int, float, str, np.integer, np.floating, np.str_)):
257
+ return x
258
+ if isinstance(x, (list, tuple, set)):
259
+ for item in x:
260
+ if item is None:
261
+ continue
262
+ try:
263
+ if isinstance(item, float) and np.isnan(item):
264
+ continue
265
+ except Exception:
266
+ pass
267
+ if isinstance(item, (list, tuple, set)):
268
+ for sub in item:
269
+ if sub is not None:
270
+ return sub
271
+ continue
272
+ if isinstance(item, dict):
273
+ # try to find a numeric-like key
274
+ for k in ('value', 'price', 'modal_price', '0'):
275
+ if k in item:
276
+ return item[k]
277
+ vals = list(item.values())
278
+ if vals:
279
+ return vals[0]
280
+ continue
281
+ return item
282
+ return None
283
+ if isinstance(x, dict):
284
+ for k in ('value', 'price', 'modal_price', 'modalPrice', '0'):
285
+ if k in x:
286
+ return x[k]
287
+ vals = list(x.values())
288
+ if vals:
289
+ return vals[0]
290
+ return None
291
+ # fallback to string
292
+ try:
293
+ return str(x)
294
+ except Exception:
295
+ return None
296
+
297
+ series = series.apply(_first_scalar)
298
+ series = pd.to_numeric(series, errors='coerce')
299
+ # assign back using .loc to avoid SettingWithCopyWarning
300
+ df.loc[:, col] = series
301
+ return df
302
+
303
+ # AI insights (unchanged logic but using safer flatten)
304
+ def get_ai_insights(market_data, state, district, language="English"):
305
+ if not state or not district or market_data is None or market_data.empty:
306
+ return ""
307
+ api_key = os.environ.get('GEMINI_API_KEY')
308
+ if not api_key:
309
+ return "<p>AI insights unavailable.</p>"
310
+
311
+ market_data = flatten_column(market_data, 'modal_price')
312
+ if 'modal_price' not in market_data.columns:
313
+ return "<p>AI insights unavailable.</p>"
314
+
315
+ # safe grouping even if some modal_price are NaN
316
+ top_commodities = market_data.groupby('commodity', dropna=True)['modal_price'].mean().nlargest(5)
317
+ top_commodities_str = ", ".join([f"{name} (Avg: ₹{price:.2f})" for name, price in top_commodities.items()])
318
+ prompt = f'Analyze agricultural market data for {district}, {state}. Top commodities: {top_commodities_str}. Provide a JSON object with keys "crop_profitability", "market_analysis", "farmer_recommendations", each with an array of insights in {language}.'
319
+ try:
320
+ api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent"
321
+ headers = {"Content-Type": "application/json"}
322
+ payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"responseMimeType": "application/json"}}
323
+ response = requests.post(f"{api_url}?key={api_key}", headers=headers, json=payload, timeout=25)
324
+ if response.status_code == 200:
325
+ insights_json = json.loads(response.json()['candidates'][0]['content']['parts'][0]['text'])
326
+ return format_ai_insights(insights_json)
327
+ return f"<p>Error from AI model: {response.status_code}</p>"
328
+ except Exception as e:
329
+ print(f"Error generating insights: {e}")
330
+ return "<p>Error generating AI insights.</p>"
331
+
332
+ def format_ai_insights(insights_json):
333
+ if not isinstance(insights_json, dict):
334
+ return ""
335
+ titles = {"crop_profitability": "Crop Profitability", "market_analysis": "Market Analysis", "farmer_recommendations": "Recommendations"}
336
+ html = '<div class="insights-header"><h3>AI Market Insights</h3></div>'
337
+ for key, items in insights_json.items():
338
+ title = titles.get(key, key.replace('_', ' ').title())
339
+ html += f'<div class="insight-card"><h5>{title}</h5>'
340
+ if items:
341
+ html += '<ul class="insight-list">' + ''.join(f'<li>{item}</li>' for item in items) + '</ul>'
342
+ html += '</div>'
343
+ return html
344
+
345
+ def generate_plots(df):
346
+ plots = {}
347
+ if df is None or df.empty or 'modal_price' not in df.columns:
348
+ return plots
349
+
350
+ # Work on a copy
351
+ df = df.copy()
352
+
353
+ for col in ['min_price', 'max_price', 'modal_price']:
354
+ df = flatten_column(df, col)
355
+
356
+ df.dropna(subset=['modal_price', 'commodity'], inplace=True)
357
+ if df.empty:
358
+ return plots
359
+
360
+ # build bar and box plots
361
+ df_bar = df.groupby('commodity', dropna=True)['modal_price'].mean().reset_index()
362
+ try:
363
+ fig_bar = px.bar(df_bar, x='commodity', y='modal_price', title="Average Price by Commodity")
364
+ plots['bar'] = pio.to_html(fig_bar, full_html=False)
365
+ fig_box = px.box(df, x='commodity', y='modal_price', title="Price Distribution")
366
+ plots['box'] = pio.to_html(fig_box, full_html=False)
367
+ except Exception as e:
368
+ print(f"[generate_plots] Plotly error: {e}")
369
+ return plots
370
+
371
+ # --- Flask App Initialization ---
372
+ app = Flask(__name__)
373
+ print("Loading location hierarchy from JSON file...")
374
+ LOCATION_HIERARCHY = load_hierarchy_from_json()
375
+ print("Location hierarchy loaded.")
376
+
377
+ # --- Flask Routes ---
378
+ @app.route('/refresh_cache', methods=['POST'])
379
+ def refresh_cache():
380
+ state = request.form.get('state')
381
+ district = request.form.get('district')
382
+
383
+ def _bg():
384
+ try:
385
+ fetch_market_data(state=state, district=district, cache_path='agmarknet_cache.csv',
386
+ use_cache=False, force_refresh=True, page_size=1000, synchronous=True)
387
+ print("[refresh_cache] Background refresh finished.")
388
+ except Exception as e:
389
+ print(f"[refresh_cache] Background refresh failed: {e}")
390
+
391
+ t = threading.Thread(target=_bg, daemon=True)
392
+ t.start()
393
+ return jsonify({'success': True, 'message': 'Background cache refresh started.'})
394
+
395
+ @app.route('/download_full_sync', methods=['POST'])
396
+ def download_full_sync():
397
+ state = request.form.get('state')
398
+ district = request.form.get('district')
399
+ df = fetch_market_data(state=state, district=district, cache_path='agmarknet_cache.csv',
400
+ use_cache=False, force_refresh=True, page_size=1000, synchronous=True)
401
+ if df is None or df.empty:
402
+ return jsonify({'success': False, 'message': 'Download produced no data.'})
403
+ return jsonify({'success': True, 'message': f'Download complete. Cached {len(df)} rows.'})
404
+
405
+ @app.route('/')
406
+ def index():
407
+ states = sorted(list(LOCATION_HIERARCHY.keys()))
408
+ return render_template('index.html', states=states)
409
+
410
+ @app.route('/get_districts', methods=['POST'])
411
+ def get_districts():
412
+ state = request.form.get('state')
413
+ districts = sorted(list(LOCATION_HIERARCHY.get(state, {}).keys()))
414
+ return jsonify(districts)
415
+
416
+ @app.route('/get_markets', methods=['POST'])
417
+ def get_markets():
418
+ state = request.form.get('state')
419
+ district = request.form.get('district')
420
+ markets = sorted(list(LOCATION_HIERARCHY.get(state, {}).get(district, {}).keys()))
421
+ return jsonify(markets)
422
+
423
+ @app.route('/get_commodities', methods=['POST'])
424
+ def get_commodities():
425
+ state = request.form.get('state')
426
+ district = request.form.get('district')
427
+ market = request.form.get('market')
428
+ commodities = LOCATION_HIERARCHY.get(state, {}).get(district, {}).get(market, [])
429
+ return jsonify(commodities)
430
+
431
+ @app.route('/filter_data', methods=['POST'])
432
+ def filter_data():
433
+ state = request.form.get('state')
434
+ district = request.form.get('district')
435
+ market = request.form.get('market')
436
+ commodity = request.form.get('commodity')
437
+ language = request.form.get('language', 'English')
438
+
439
+ if not state:
440
+ return jsonify({'success': False, 'message': 'Please select a state.'})
441
+
442
+ df_combined = fetch_market_data(state, district)
443
+ if df_combined is None or df_combined.empty:
444
+ return jsonify({'success': False, 'message': 'No data found from API or local CSV.'})
445
+
446
+ # Defensive copy
447
+ df_filtered = df_combined.copy()
448
+
449
+ if state:
450
+ df_filtered = df_filtered[df_filtered['state'].str.lower() == state.lower()]
451
+ if district:
452
+ df_filtered = df_filtered[df_filtered['district'].str.lower() == district.lower()]
453
+ if market:
454
+ df_filtered = df_filtered[df_filtered['market'].str.lower() == market.lower()]
455
+ if commodity:
456
+ df_filtered = df_filtered[df_filtered['commodity'].str.lower() == commodity.lower()]
457
+
458
+ if df_filtered.empty:
459
+ return jsonify({'success': False, 'message': 'No records match the specific criteria.'})
460
+
461
+ deduplication_keys = ['state', 'district', 'market', 'commodity', 'arrival_date']
462
+ df_final = df_filtered.drop_duplicates(subset=deduplication_keys, keep='first').copy()
463
+
464
+ print(f"Final records to be processed: {len(df_final)}")
465
+
466
+ # Consolidate duplicate columns just in case (extra safety)
467
+ df_final = consolidate_duplicate_columns(df_final)
468
+
469
+ # Ensure price columns are numeric
470
+ for col in ['min_price', 'max_price', 'modal_price']:
471
+ df_final = flatten_column(df_final, col)
472
+
473
+ plots = generate_plots(df_final.copy())
474
+ insights = get_ai_insights(df_final.copy(), state, district, language)
475
+
476
+ # After cleaning, check for valid price column
477
+ if df_final.empty or 'modal_price' not in df_final.columns or df_final['modal_price'].dropna().empty:
478
+ return jsonify({'success': False, 'message': 'No valid price data after cleaning.'})
479
+
480
+ cheapest = df_final.nsmallest(5, 'modal_price')[['commodity', 'market', 'modal_price']]
481
+ costliest = df_final.nlargest(5, 'modal_price')[['commodity', 'market', 'modal_price']]
482
+ market_stats = {
483
+ 'total_commodities': int(df_final['commodity'].nunique()),
484
+ 'avg_modal_price': f"₹{df_final['modal_price'].mean():.2f}",
485
+ 'price_range': f"₹{df_final['modal_price'].min():.2f} - ₹{df_final['modal_price'].max():.2f}",
486
+ 'total_markets': int(df_final['market'].nunique())
487
+ }
488
+
489
+ return jsonify({
490
+ 'success': True,
491
+ 'plots': plots,
492
+ 'insights': insights,
493
+ 'market_html': df_final.to_html(classes="table table-striped table-sm", index=False),
494
+ 'cheapest_html': cheapest.to_html(classes="table table-sm", index=False),
495
+ 'costliest_html': costliest.to_html(classes="table table-sm", index=False),
496
+ 'market_stats': market_stats
497
+ })
498
+
499
+ if __name__ == '__main__':
500
+ pio.templates.default = "plotly_white"
501
+ app.run(debug=True, host='0.0.0.0', port=7860)
final_price_data.csv ADDED
The diff for this file is too large to render. See raw diff
 
location_hierarchy.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ gunicorn
3
+ requests
4
+ pandas
5
+ numpy
6
+ plotly
7
+ googletrans
8
+ python-dotenv