Files changed (1) hide show
  1. app.py +197 -343
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # app.py — robusted version (only minimal safe changes added)
2
  import os
3
  import json
4
  from collections import Counter
@@ -8,8 +7,10 @@ import pandas as pd
8
  import plotly.express as px
9
  import plotly.io as pio
10
  import dotenv
 
 
 
11
  import numpy as np
12
- from datetime import datetime, timedelta
13
 
14
  dotenv.load_dotenv()
15
 
@@ -21,11 +22,9 @@ def clean_and_standardize(df):
21
  df = df.copy()
22
  df.columns = df.columns.str.replace('_x0020_', '_', regex=False)
23
  df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)
24
- # drop grade if present; ignore otherwise
25
- df = df.drop(columns=['grade'], errors='ignore')
26
  required_columns = [
27
  'state', 'district', 'market', 'commodity', 'variety',
28
- 'arrival_date', 'min_price', 'max_price', 'modal_price'
29
  ]
30
  existing_columns = [col for col in required_columns if col in df.columns]
31
  return df[existing_columns]
@@ -71,145 +70,158 @@ def load_hierarchy_from_json(path='location_hierarchy.json'):
71
  print(f"CRITICAL ERROR: Could not load '{path}'. Error: {e}")
72
  return {}
73
 
74
- def get_last_year_date():
75
- """Get date from one year ago in dd/MM/yyyy format"""
76
- last_year = datetime.now() - timedelta(days=365)
77
- return last_year.strftime("%d/%m/%Y")
78
-
79
- def fetch_market_data(state=None, district=None, market=None):
80
  """
81
- Fetcher to use new API endpoint with arrival date filter and market filtering.
82
  Returns a cleaned DataFrame with duplicate columns consolidated and arrival_date normalized.
83
  """
84
  api_key = os.environ.get('DATA_GOV_API_KEY',
85
  "579b464db66ec23bdd00000140925613394847c57ae13db180760f06")
86
  base_url = "https://api.data.gov.in/resource/35985678-0d79-46b4-9ed6-6f13308a1d24"
87
 
88
- # Build params for API request with arrival date from last year
89
- arrival_date = get_last_year_date()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  params = {
91
  "api-key": api_key,
92
- "format": "json",
93
- "limit": 1000,
94
- "filters[Arrival_Date]": arrival_date
95
  }
96
-
97
  if state:
98
  params["filters[State]"] = state
99
  if district:
100
  params["filters[District]"] = district
101
 
 
 
102
  try:
103
- print(f"[fetch_market_data] Sending request to API with arrival date: {arrival_date}. Params: {params}")
104
- resp = requests.get(base_url, params=params, timeout=180)
105
- except Exception as e:
106
- print(f"[fetch_market_data] Network error on request: {e}")
107
- # fallback to local CSV if present
108
  try:
109
- if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
110
- df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
111
- if not df_csv.empty:
112
- df_csv = consolidate_duplicate_columns(df_csv)
113
- cleaned = clean_and_standardize(df_csv)
114
- if market and 'market' in cleaned.columns:
115
- cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
116
- return cleaned
117
- except Exception as csv_err:
118
- print(f"[fetch_market_data] Could not load final_price_data.csv: {csv_err}")
119
- return pd.DataFrame()
120
-
121
- if resp.status_code != 200:
122
- print(f"[fetch_market_data] API returned {resp.status_code}: {resp.text[:500]}")
 
 
 
 
 
 
 
123
  try:
124
- if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
125
- df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
126
- if not df_csv.empty:
127
- df_csv = consolidate_duplicate_columns(df_csv)
128
- cleaned = clean_and_standardize(df_csv)
129
- if market and 'market' in cleaned.columns:
130
- cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
131
- return cleaned
132
- except Exception as csv_err:
133
- print(f"[fetch_market_data] Could not load final_price_data.csv: {csv_err}")
134
- return pd.DataFrame()
135
 
136
- try:
137
- data = resp.json()
138
- except Exception as e:
139
- print(f"[fetch_market_data] JSON decode error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  try:
141
- if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
142
- df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
143
- if not df_csv.empty:
144
- df_csv = consolidate_duplicate_columns(df_csv)
145
- cleaned = clean_and_standardize(df_csv)
146
- if market and 'market' in cleaned.columns:
147
- cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
148
- return cleaned
149
- except Exception as csv_err:
150
- print(f"[fetch_market_data] Could not load final_price_data.csv: {csv_err}")
151
- return pd.DataFrame()
152
-
153
- # Parse the new API response format
154
- records = data.get("records", [])
155
- if not records and isinstance(data, list):
156
- records = data
157
-
158
- if not records:
159
- print("[fetch_market_data] No records returned by API in response.")
160
  try:
161
- if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
162
- df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
163
- if not df_csv.empty:
164
- df_csv = consolidate_duplicate_columns(df_csv)
165
- cleaned = clean_and_standardize(df_csv)
166
- if market and 'market' in cleaned.columns:
167
- cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
168
- return cleaned
169
- except Exception as csv_err:
170
- print(f"[fetch_market_data] Could not load final_price_data.csv: {csv_err}")
171
- return pd.DataFrame()
172
-
173
- df_api = pd.DataFrame.from_records(records)
174
-
175
- # Filter by market if specified (from the API response)
176
- if market and 'Market' in df_api.columns:
177
- df_api = df_api[df_api['Market'].str.lower() == market.lower()]
178
-
179
- # Consolidate duplicate columns immediately
180
- df_api = consolidate_duplicate_columns(df_api)
181
-
182
- print(f"[fetch_market_data] Retrieved {len(df_api)} rows from API.")
183
-
184
- # Merge with final_price_data.csv if exists
185
- dataframes_to_combine = [df_api]
186
- try:
187
- if os.path.exists("final_price_data.csv") and os.path.getsize("final_price_data.csv") > 0:
188
- df_csv = pd.read_csv("final_price_data.csv", encoding='utf-8', on_bad_lines='skip')
189
- if not df_csv.empty:
190
- df_csv = consolidate_duplicate_columns(df_csv)
191
- dataframes_to_combine.append(df_csv)
192
- except Exception as csv_err:
193
- print(f"[fetch_market_data] Could not load final_price_data.csv for merging: {csv_err}")
194
-
195
- df_combined = pd.concat(dataframes_to_combine, ignore_index=True, sort=False)
196
- df_combined = consolidate_duplicate_columns(df_combined)
197
- cleaned = clean_and_standardize(df_combined)
198
- if 'arrival_date' in cleaned.columns:
199
- cleaned = cleaned.copy()
200
- cleaned.loc[:, 'arrival_date'] = pd.to_datetime(
201
- cleaned['arrival_date'].astype(str).str.replace('\\/', '-', regex=True),
202
- dayfirst=True, errors='coerce'
203
- )
204
-
205
- # Additional market filtering after standardization
206
- if market and 'market' in cleaned.columns:
207
- cleaned = cleaned[cleaned['market'].str.lower() == market.lower()]
208
-
209
- return cleaned
210
 
211
  # Utility to flatten/clean numeric-like columns safely
212
- # Utility to flatten/clean numeric-like columns safely (improved version)
213
  def flatten_column(df, col):
214
  """
215
  Ensure df[col] becomes a 1-D numeric Series:
@@ -220,7 +232,6 @@ def flatten_column(df, col):
220
  """
221
  if df is None or col not in df.columns:
222
  return df
223
-
224
  df = df.copy()
225
  series = df[col]
226
 
@@ -236,236 +247,84 @@ def flatten_column(df, col):
236
  def _first_scalar(x):
237
  if x is None:
238
  return None
239
-
240
- # Handle pandas NA/NaN values
241
- try:
242
- if pd.isna(x):
243
- return None
244
- except (TypeError, ValueError):
245
- pass
246
-
247
- # Handle numpy nan
248
  try:
249
  if isinstance(x, float) and np.isnan(x):
250
  return None
251
- except (TypeError, ValueError):
252
  pass
253
-
254
- # Direct numeric/string values
255
- if isinstance(x, (int, float, str, np.integer, np.floating)):
256
- # Clean string values that might contain currency symbols or extra whitespace
257
- if isinstance(x, str):
258
- # Remove common currency symbols and whitespace
259
- cleaned = x.strip().replace('₹', '').replace(',', '').replace('$', '')
260
- try:
261
- return float(cleaned) if cleaned else None
262
- except ValueError:
263
- return None
264
  return x
265
-
266
- # Handle numpy string types
267
- if isinstance(x, np.str_):
268
- cleaned = str(x).strip().replace('₹', '').replace(',', '').replace('$', '')
269
- try:
270
- return float(cleaned) if cleaned else None
271
- except ValueError:
272
- return None
273
-
274
- # Handle lists, tuples, sets
275
  if isinstance(x, (list, tuple, set)):
276
  for item in x:
277
  if item is None:
278
  continue
279
- try:
280
- if pd.isna(item):
281
- continue
282
- except (TypeError, ValueError):
283
- pass
284
  try:
285
  if isinstance(item, float) and np.isnan(item):
286
  continue
287
- except (TypeError, ValueError):
288
  pass
289
-
290
- # Recursive handling for nested structures
291
  if isinstance(item, (list, tuple, set)):
292
- nested_result = _first_scalar(item)
293
- if nested_result is not None:
294
- return nested_result
295
  continue
296
-
297
  if isinstance(item, dict):
298
  # try to find a numeric-like key
299
- for k in ('value', 'price', 'modal_price', 'modalPrice', '0'):
300
- if k in item and item[k] is not None:
301
- return _first_scalar(item[k])
302
  vals = list(item.values())
303
  if vals:
304
- for val in vals:
305
- result = _first_scalar(val)
306
- if result is not None:
307
- return result
308
- continue
309
-
310
- # Direct value handling
311
- if isinstance(item, (int, float, np.integer, np.floating)):
312
- return item
313
- if isinstance(item, (str, np.str_)):
314
- cleaned = str(item).strip().replace('₹', '').replace(',', '').replace('$', '')
315
- try:
316
- return float(cleaned) if cleaned else None
317
- except ValueError:
318
- continue
319
-
320
- # If we can't process it, try to convert directly
321
- try:
322
- return float(str(item)) if str(item).strip() else None
323
- except (ValueError, TypeError):
324
  continue
 
325
  return None
326
-
327
- # Handle dictionaries
328
  if isinstance(x, dict):
329
  for k in ('value', 'price', 'modal_price', 'modalPrice', '0'):
330
- if k in x and x[k] is not None:
331
- return _first_scalar(x[k])
332
  vals = list(x.values())
333
  if vals:
334
- for val in vals:
335
- result = _first_scalar(val)
336
- if result is not None:
337
- return result
338
  return None
339
-
340
- # fallback: try to convert to string then float
341
  try:
342
- str_val = str(x).strip()
343
- if str_val:
344
- # Clean common non-numeric characters
345
- cleaned = str_val.replace('₹', '').replace(',', '').replace('$', '')
346
- return float(cleaned)
347
- return None
348
- except (ValueError, TypeError):
349
  return None
350
 
351
- # Apply the flattening function
352
  series = series.apply(_first_scalar)
353
-
354
- # Convert to numeric, coercing errors to NaN
355
  series = pd.to_numeric(series, errors='coerce')
356
-
357
  # assign back using .loc to avoid SettingWithCopyWarning
358
  df.loc[:, col] = series
359
  return df
360
 
361
- # AI insights (improved with better error handling and data validation)
362
  def get_ai_insights(market_data, state, district, language="English"):
363
  if not state or not district or market_data is None or market_data.empty:
364
  return ""
365
-
366
  api_key = os.environ.get('GEMINI_API_KEY')
367
  if not api_key:
368
  return "<p>AI insights unavailable.</p>"
369
 
370
- # Make a copy to avoid modifying original data
371
- market_data = market_data.copy()
372
-
373
- # Ensure modal_price column exists
374
- if 'modal_price' not in market_data.columns:
375
- return "<p>AI insights unavailable - no price data.</p>"
376
-
377
- # Flatten and convert to numeric more robustly
378
  market_data = flatten_column(market_data, 'modal_price')
379
-
380
- # Additional validation and cleaning for modal_price
381
- market_data['modal_price'] = pd.to_numeric(market_data['modal_price'], errors='coerce')
382
-
383
- # Remove rows where modal_price is NaN or invalid
384
- market_data = market_data.dropna(subset=['modal_price', 'commodity'])
385
-
386
- # Check if we have any valid data left
387
- if market_data.empty or len(market_data) == 0:
388
- return "<p>AI insights unavailable - no valid price data after cleaning.</p>"
389
-
390
- # Additional check to ensure modal_price is actually numeric
391
- if not pd.api.types.is_numeric_dtype(market_data['modal_price']):
392
- # Force conversion one more time
393
- market_data['modal_price'] = pd.to_numeric(market_data['modal_price'], errors='coerce')
394
- market_data = market_data.dropna(subset=['modal_price'])
395
-
396
- if market_data.empty:
397
- return "<p>AI insights unavailable - could not convert price data to numeric format.</p>"
398
-
399
- try:
400
- # Safe grouping and aggregation
401
- # Ensure modal_price is numeric at the group-aggregation stage
402
- commodity_prices = (
403
- market_data
404
- .groupby('commodity', dropna=True)['modal_price']
405
- .apply(lambda s: pd.to_numeric(s, errors='coerce').mean()) # coerce per-group
406
- )
407
-
408
- # Force numeric dtype and drop groups that could not be converted
409
- commodity_prices = pd.to_numeric(commodity_prices, errors='coerce').dropna()
410
-
411
- # Guard if no numeric data remains
412
- if commodity_prices.empty:
413
- return "<p>AI insights unavailable - no numeric commodity price data.</p>"
414
-
415
- n_commodities = min(5, len(commodity_prices))
416
- top_commodities = commodity_prices.nlargest(n_commodities)
417
-
418
- # Debugging info (safe to keep; helpful when issues arise)
419
- print("modal_price dtype:", market_data['modal_price'].dtype)
420
- print("modal_price sample values:", market_data['modal_price'].head(20).tolist())
421
- print("modal_price value types:", market_data['modal_price'].apply(lambda x: type(x)).value_counts().to_dict())
422
-
423
- # Format the commodities string
424
- top_commodities_str = ", ".join([
425
- f"{name} (Avg: ₹{price:.2f})"
426
- for name, price in top_commodities.items()
427
- ])
428
-
429
- if not top_commodities_str:
430
- return "<p>AI insights unavailable - no commodity price data.</p>"
431
-
432
- except Exception as e:
433
- print(f"Error processing commodity data: {e}")
434
- return "<p>AI insights unavailable - error processing commodity data.</p>"
435
-
436
- # Generate AI prompt
437
  prompt = f'Analyze agricultural market data for {district}, {state}. Top commodities: {top_commodities_str}. Provide a JSON object with keys "crop_profitability", "market_analysis", "farmer_recommendations", each with an array of insights in {language}.'
438
-
439
  try:
440
- api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent"
441
  headers = {"Content-Type": "application/json"}
442
- payload = {
443
- "contents": [{"parts": [{"text": prompt}]}],
444
- "generationConfig": {"responseMimeType": "application/json"}
445
- }
446
-
447
  response = requests.post(f"{api_url}?key={api_key}", headers=headers, json=payload, timeout=25)
448
-
449
  if response.status_code == 200:
450
- response_data = response.json()
451
- if 'candidates' in response_data and len(response_data['candidates']) > 0:
452
- content = response_data['candidates'][0].get('content', {})
453
- parts = content.get('parts', [])
454
- if parts and len(parts) > 0:
455
- insights_text = parts[0].get('text', '')
456
- if insights_text:
457
- insights_json = json.loads(insights_text)
458
- return format_ai_insights(insights_json)
459
-
460
- return "<p>Error: Invalid response format from AI model.</p>"
461
- else:
462
- return f"<p>Error from AI model: {response.status_code}</p>"
463
-
464
- except requests.exceptions.Timeout:
465
- return "<p>AI insights request timed out. Please try again.</p>"
466
- except json.JSONDecodeError as e:
467
- print(f"JSON decode error in AI insights: {e}")
468
- return "<p>Error parsing AI response.</p>"
469
  except Exception as e:
470
  print(f"Error generating insights: {e}")
471
  return "<p>Error generating AI insights.</p>"
@@ -494,10 +353,6 @@ def generate_plots(df):
494
  for col in ['min_price', 'max_price', 'modal_price']:
495
  df = flatten_column(df, col)
496
 
497
- # Ensure numeric modal_price for plotting
498
- if 'modal_price' in df.columns:
499
- df.loc[:, 'modal_price'] = pd.to_numeric(df['modal_price'], errors='coerce')
500
-
501
  df.dropna(subset=['modal_price', 'commodity'], inplace=True)
502
  if df.empty:
503
  return plots
@@ -520,6 +375,33 @@ LOCATION_HIERARCHY = load_hierarchy_from_json()
520
  print("Location hierarchy loaded.")
521
 
522
  # --- Flask Routes ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
  @app.route('/')
524
  def index():
525
  states = sorted(list(LOCATION_HIERARCHY.keys()))
@@ -557,20 +439,18 @@ def filter_data():
557
  if not state:
558
  return jsonify({'success': False, 'message': 'Please select a state.'})
559
 
560
- # Pass market parameter to fetch_market_data for API filtering
561
- df_combined = fetch_market_data(state, district, market)
562
  if df_combined is None or df_combined.empty:
563
  return jsonify({'success': False, 'message': 'No data found from API or local CSV.'})
564
 
565
  # Defensive copy
566
  df_filtered = df_combined.copy()
567
 
568
- # Additional frontend filtering (in case not filtered by API)
569
  if state:
570
  df_filtered = df_filtered[df_filtered['state'].str.lower() == state.lower()]
571
  if district:
572
  df_filtered = df_filtered[df_filtered['district'].str.lower() == district.lower()]
573
- if market and 'market' in df_filtered.columns:
574
  df_filtered = df_filtered[df_filtered['market'].str.lower() == market.lower()]
575
  if commodity:
576
  df_filtered = df_filtered[df_filtered['commodity'].str.lower() == commodity.lower()]
@@ -586,28 +466,10 @@ def filter_data():
586
  # Consolidate duplicate columns just in case (extra safety)
587
  df_final = consolidate_duplicate_columns(df_final)
588
 
589
- # Ensure price columns are numeric (first pass)
590
  for col in ['min_price', 'max_price', 'modal_price']:
591
  df_final = flatten_column(df_final, col)
592
 
593
- # --- NEW: final coercion and safety for modal_price before computing stats/ordering ---
594
- if 'modal_price' in df_final.columns:
595
- # Coerce any remaining weird values to NaN, then drop them.
596
- df_final.loc[:, 'modal_price'] = pd.to_numeric(df_final['modal_price'], errors='coerce')
597
- print("After coercion modal_price dtype:", df_final['modal_price'].dtype)
598
- print("modal_price sample values (post-coercion):", df_final['modal_price'].head(20).tolist())
599
-
600
- # Drop rows that have no numeric modal_price (so nsmallest/nlargest won't fail)
601
- df_final = df_final.dropna(subset=['modal_price'])
602
- # Ensure float dtype
603
- if not df_final.empty:
604
- df_final.loc[:, 'modal_price'] = df_final['modal_price'].astype(float)
605
- else:
606
- return jsonify({'success': False, 'message': 'No valid price data after coercion.'})
607
- else:
608
- return jsonify({'success': False, 'message': 'No modal_price column present after cleaning.'})
609
- # -------------------------------------------------------------------------------
610
-
611
  plots = generate_plots(df_final.copy())
612
  insights = get_ai_insights(df_final.copy(), state, district, language)
613
 
@@ -615,21 +477,13 @@ def filter_data():
615
  if df_final.empty or 'modal_price' not in df_final.columns or df_final['modal_price'].dropna().empty:
616
  return jsonify({'success': False, 'message': 'No valid price data after cleaning.'})
617
 
618
- # Now these operations are safe because modal_price is numeric float dtype
619
- try:
620
- cheapest = df_final.nsmallest(5, 'modal_price')[['commodity', 'market', 'modal_price']]
621
- costliest = df_final.nlargest(5, 'modal_price')[['commodity', 'market', 'modal_price']]
622
- except Exception as e:
623
- # fallback: compute via sort_values if something unexpected happens
624
- print(f"Warning: nsmallest/nlargest failed: {e}. Falling back to sort_values.")
625
- cheapest = df_final.sort_values('modal_price', ascending=True).head(5)[['commodity', 'market', 'modal_price']]
626
- costliest = df_final.sort_values('modal_price', ascending=False).head(5)[['commodity', 'market', 'modal_price']]
627
-
628
  market_stats = {
629
- 'total_commodities': int(df_final['commodity'].nunique()) if 'commodity' in df_final.columns else 0,
630
  'avg_modal_price': f"₹{df_final['modal_price'].mean():.2f}",
631
  'price_range': f"₹{df_final['modal_price'].min():.2f} - ₹{df_final['modal_price'].max():.2f}",
632
- 'total_markets': int(df_final['market'].nunique()) if 'market' in df_final.columns else 0
633
  }
634
 
635
  return jsonify({
 
 
1
  import os
2
  import json
3
  from collections import Counter
 
7
  import plotly.express as px
8
  import plotly.io as pio
9
  import dotenv
10
+ import threading
11
+ import tempfile
12
+ import shutil
13
  import numpy as np
 
14
 
15
  dotenv.load_dotenv()
16
 
 
22
  df = df.copy()
23
  df.columns = df.columns.str.replace('_x0020_', '_', regex=False)
24
  df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_', regex=False)
 
 
25
  required_columns = [
26
  'state', 'district', 'market', 'commodity', 'variety',
27
+ 'grade', 'arrival_date', 'min_price', 'max_price', 'modal_price'
28
  ]
29
  existing_columns = [col for col in required_columns if col in df.columns]
30
  return df[existing_columns]
 
70
  print(f"CRITICAL ERROR: Could not load '{path}'. Error: {e}")
71
  return {}
72
 
73
+ def fetch_market_data(state=None, district=None,
74
+ cache_path='agmarknet_cache.csv',
75
+ use_cache=True, force_refresh=False,
76
+ sleep_between=0.15, page_size=1000,
77
+ synchronous=True):
 
78
  """
79
+ Single-request fetcher (the API returns the full dataset in one response).
80
  Returns a cleaned DataFrame with duplicate columns consolidated and arrival_date normalized.
81
  """
82
  api_key = os.environ.get('DATA_GOV_API_KEY',
83
  "579b464db66ec23bdd00000140925613394847c57ae13db180760f06")
84
  base_url = "https://api.data.gov.in/resource/35985678-0d79-46b4-9ed6-6f13308a1d24"
85
 
86
+ # Use cache if present and not forcing refresh
87
+ if use_cache and not force_refresh and os.path.exists(cache_path):
88
+ try:
89
+ df_cache = pd.read_csv(cache_path)
90
+ print(f"[fetch_market_data] Loaded cache '{cache_path}' ({len(df_cache)} rows).")
91
+ dataframes_to_combine = [df_cache]
92
+ try:
93
+ df_csv = pd.read_csv("final_price_data.csv")
94
+ dataframes_to_combine.append(df_csv)
95
+ except FileNotFoundError:
96
+ pass
97
+ df_combined = pd.concat(dataframes_to_combine, ignore_index=True, sort=False)
98
+ # first, consolidate duplicate columns (if any)
99
+ df_combined = consolidate_duplicate_columns(df_combined)
100
+ cleaned = clean_and_standardize(df_combined)
101
+ if 'arrival_date' in cleaned.columns:
102
+ cleaned = cleaned.copy()
103
+ cleaned.loc[:, 'arrival_date'] = pd.to_datetime(
104
+ cleaned['arrival_date'].astype(str).str.replace('\\/', '-', regex=True),
105
+ dayfirst=True, errors='coerce'
106
+ )
107
+ return cleaned
108
+ except Exception as e:
109
+ print(f"[fetch_market_data] Failed reading cache: {e}. Will fetch live.")
110
+
111
+ # Background start support
112
+ if not synchronous:
113
+ t = threading.Thread(target=fetch_market_data, kwargs={
114
+ 'state': state, 'district': district, 'cache_path': cache_path,
115
+ 'use_cache': use_cache, 'force_refresh': force_refresh,
116
+ 'sleep_between': sleep_between, 'page_size': page_size, 'synchronous': True
117
+ }, daemon=True)
118
+ t.start()
119
+ print("[fetch_market_data] Started background fetcher thread (single-request mode).")
120
+ return None
121
+
122
+ # Build params for single request
123
  params = {
124
  "api-key": api_key,
125
+ "format": "json"
 
 
126
  }
 
127
  if state:
128
  params["filters[State]"] = state
129
  if district:
130
  params["filters[District]"] = district
131
 
132
+ temp_fd, temp_file = tempfile.mkstemp(suffix='.csv')
133
+ os.close(temp_fd)
134
  try:
 
 
 
 
 
135
  try:
136
+ print(f"[fetch_market_data] Sending single request to API (may be large). Params: { {k:v for k,v in params.items() if k!='api-key'} }")
137
+ resp = requests.get(base_url, params=params, timeout=180)
138
+ except Exception as e:
139
+ print(f"[fetch_market_data] Network error on single request: {e}")
140
+ # fallback to local CSV if present
141
+ try:
142
+ df_csv = pd.read_csv("final_price_data.csv")
143
+ df_csv = consolidate_duplicate_columns(df_csv)
144
+ return clean_and_standardize(df_csv)
145
+ except FileNotFoundError:
146
+ return pd.DataFrame()
147
+
148
+ if resp.status_code != 200:
149
+ print(f"[fetch_market_data] API returned {resp.status_code}: {resp.text[:500]}")
150
+ try:
151
+ df_csv = pd.read_csv("final_price_data.csv")
152
+ df_csv = consolidate_duplicate_columns(df_csv)
153
+ return clean_and_standardize(df_csv)
154
+ except FileNotFoundError:
155
+ return pd.DataFrame()
156
+
157
  try:
158
+ data = resp.json()
159
+ except Exception as e:
160
+ print(f"[fetch_market_data] JSON decode error: {e}")
161
+ try:
162
+ df_csv = pd.read_csv("final_price_data.csv")
163
+ df_csv = consolidate_duplicate_columns(df_csv)
164
+ return clean_and_standardize(df_csv)
165
+ except FileNotFoundError:
166
+ return pd.DataFrame()
 
 
167
 
168
+ records = data.get("records", [])
169
+ if not records and isinstance(data, list):
170
+ records = data
171
+
172
+ if not records:
173
+ print("[fetch_market_data] No records returned by API in single response.")
174
+ try:
175
+ df_csv = pd.read_csv("final_price_data.csv")
176
+ df_csv = consolidate_duplicate_columns(df_csv)
177
+ return clean_and_standardize(df_csv)
178
+ except FileNotFoundError:
179
+ return pd.DataFrame()
180
+
181
+ df_api = pd.DataFrame.from_records(records)
182
+ # Consolidate duplicate columns immediately
183
+ df_api = consolidate_duplicate_columns(df_api)
184
+
185
+ # write cache atomically
186
  try:
187
+ df_api.to_csv(temp_file, index=False)
188
+ shutil.move(temp_file, cache_path)
189
+ print(f"[fetch_market_data] Single-request cache updated at '{cache_path}' ({len(df_api)} rows).")
190
+ except Exception as e:
191
+ print(f"[fetch_market_data] Failed to write cache atomically: {e}")
192
+ try:
193
+ df_api.to_csv(cache_path, index=False)
194
+ except Exception as e2:
195
+ print(f"[fetch_market_data] Fallback write also failed: {e2}")
196
+
197
+ # Merge with final_price_data.csv if exists
198
+ dataframes_to_combine = [df_api]
 
 
 
 
 
 
 
199
  try:
200
+ df_csv = pd.read_csv("final_price_data.csv")
201
+ df_csv = consolidate_duplicate_columns(df_csv)
202
+ dataframes_to_combine.append(df_csv)
203
+ except FileNotFoundError:
204
+ pass
205
+
206
+ df_combined = pd.concat(dataframes_to_combine, ignore_index=True, sort=False)
207
+ df_combined = consolidate_duplicate_columns(df_combined)
208
+ cleaned = clean_and_standardize(df_combined)
209
+ if 'arrival_date' in cleaned.columns:
210
+ cleaned = cleaned.copy()
211
+ cleaned.loc[:, 'arrival_date'] = pd.to_datetime(
212
+ cleaned['arrival_date'].astype(str).str.replace('\\/', '-', regex=True),
213
+ dayfirst=True, errors='coerce'
214
+ )
215
+ return cleaned
216
+
217
+ finally:
218
+ if os.path.exists(temp_file):
219
+ try:
220
+ os.remove(temp_file)
221
+ except Exception:
222
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  # Utility to flatten/clean numeric-like columns safely
 
225
  def flatten_column(df, col):
226
  """
227
  Ensure df[col] becomes a 1-D numeric Series:
 
232
  """
233
  if df is None or col not in df.columns:
234
  return df
 
235
  df = df.copy()
236
  series = df[col]
237
 
 
247
  def _first_scalar(x):
248
  if x is None:
249
  return None
250
+ # numpy nan
 
 
 
 
 
 
 
 
251
  try:
252
  if isinstance(x, float) and np.isnan(x):
253
  return None
254
+ except Exception:
255
  pass
256
+ if isinstance(x, (int, float, str, np.integer, np.floating, np.str_)):
 
 
 
 
 
 
 
 
 
 
257
  return x
 
 
 
 
 
 
 
 
 
 
258
  if isinstance(x, (list, tuple, set)):
259
  for item in x:
260
  if item is None:
261
  continue
 
 
 
 
 
262
  try:
263
  if isinstance(item, float) and np.isnan(item):
264
  continue
265
+ except Exception:
266
  pass
 
 
267
  if isinstance(item, (list, tuple, set)):
268
+ for sub in item:
269
+ if sub is not None:
270
+ return sub
271
  continue
 
272
  if isinstance(item, dict):
273
  # try to find a numeric-like key
274
+ for k in ('value', 'price', 'modal_price', '0'):
275
+ if k in item:
276
+ return item[k]
277
  vals = list(item.values())
278
  if vals:
279
+ return vals[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  continue
281
+ return item
282
  return None
 
 
283
  if isinstance(x, dict):
284
  for k in ('value', 'price', 'modal_price', 'modalPrice', '0'):
285
+ if k in x:
286
+ return x[k]
287
  vals = list(x.values())
288
  if vals:
289
+ return vals[0]
 
 
 
290
  return None
291
+ # fallback to string
 
292
  try:
293
+ return str(x)
294
+ except Exception:
 
 
 
 
 
295
  return None
296
 
 
297
  series = series.apply(_first_scalar)
 
 
298
  series = pd.to_numeric(series, errors='coerce')
 
299
  # assign back using .loc to avoid SettingWithCopyWarning
300
  df.loc[:, col] = series
301
  return df
302
 
303
+ # AI insights (unchanged logic but using safer flatten)
304
  def get_ai_insights(market_data, state, district, language="English"):
305
  if not state or not district or market_data is None or market_data.empty:
306
  return ""
 
307
  api_key = os.environ.get('GEMINI_API_KEY')
308
  if not api_key:
309
  return "<p>AI insights unavailable.</p>"
310
 
 
 
 
 
 
 
 
 
311
  market_data = flatten_column(market_data, 'modal_price')
312
+ if 'modal_price' not in market_data.columns:
313
+ return "<p>AI insights unavailable.</p>"
314
+
315
+ # safe grouping even if some modal_price are NaN
316
+ top_commodities = market_data.groupby('commodity', dropna=True)['modal_price'].mean().nlargest(5)
317
+ top_commodities_str = ", ".join([f"{name} (Avg: ₹{price:.2f})" for name, price in top_commodities.items()])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  prompt = f'Analyze agricultural market data for {district}, {state}. Top commodities: {top_commodities_str}. Provide a JSON object with keys "crop_profitability", "market_analysis", "farmer_recommendations", each with an array of insights in {language}.'
 
319
  try:
320
+ api_url = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash:generateContent"
321
  headers = {"Content-Type": "application/json"}
322
+ payload = {"contents": [{"parts": [{"text": prompt}]}], "generationConfig": {"responseMimeType": "application/json"}}
 
 
 
 
323
  response = requests.post(f"{api_url}?key={api_key}", headers=headers, json=payload, timeout=25)
 
324
  if response.status_code == 200:
325
+ insights_json = json.loads(response.json()['candidates'][0]['content']['parts'][0]['text'])
326
+ return format_ai_insights(insights_json)
327
+ return f"<p>Error from AI model: {response.status_code}</p>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  except Exception as e:
329
  print(f"Error generating insights: {e}")
330
  return "<p>Error generating AI insights.</p>"
 
353
  for col in ['min_price', 'max_price', 'modal_price']:
354
  df = flatten_column(df, col)
355
 
 
 
 
 
356
  df.dropna(subset=['modal_price', 'commodity'], inplace=True)
357
  if df.empty:
358
  return plots
 
375
  print("Location hierarchy loaded.")
376
 
377
  # --- Flask Routes ---
378
+ @app.route('/refresh_cache', methods=['POST'])
379
+ def refresh_cache():
380
+ state = request.form.get('state')
381
+ district = request.form.get('district')
382
+
383
+ def _bg():
384
+ try:
385
+ fetch_market_data(state=state, district=district, cache_path='agmarknet_cache.csv',
386
+ use_cache=False, force_refresh=True, page_size=1000, synchronous=True)
387
+ print("[refresh_cache] Background refresh finished.")
388
+ except Exception as e:
389
+ print(f"[refresh_cache] Background refresh failed: {e}")
390
+
391
+ t = threading.Thread(target=_bg, daemon=True)
392
+ t.start()
393
+ return jsonify({'success': True, 'message': 'Background cache refresh started.'})
394
+
395
+ @app.route('/download_full_sync', methods=['POST'])
396
+ def download_full_sync():
397
+ state = request.form.get('state')
398
+ district = request.form.get('district')
399
+ df = fetch_market_data(state=state, district=district, cache_path='agmarknet_cache.csv',
400
+ use_cache=False, force_refresh=True, page_size=1000, synchronous=True)
401
+ if df is None or df.empty:
402
+ return jsonify({'success': False, 'message': 'Download produced no data.'})
403
+ return jsonify({'success': True, 'message': f'Download complete. Cached {len(df)} rows.'})
404
+
405
  @app.route('/')
406
  def index():
407
  states = sorted(list(LOCATION_HIERARCHY.keys()))
 
439
  if not state:
440
  return jsonify({'success': False, 'message': 'Please select a state.'})
441
 
442
+ df_combined = fetch_market_data(state, district)
 
443
  if df_combined is None or df_combined.empty:
444
  return jsonify({'success': False, 'message': 'No data found from API or local CSV.'})
445
 
446
  # Defensive copy
447
  df_filtered = df_combined.copy()
448
 
 
449
  if state:
450
  df_filtered = df_filtered[df_filtered['state'].str.lower() == state.lower()]
451
  if district:
452
  df_filtered = df_filtered[df_filtered['district'].str.lower() == district.lower()]
453
+ if market:
454
  df_filtered = df_filtered[df_filtered['market'].str.lower() == market.lower()]
455
  if commodity:
456
  df_filtered = df_filtered[df_filtered['commodity'].str.lower() == commodity.lower()]
 
466
  # Consolidate duplicate columns just in case (extra safety)
467
  df_final = consolidate_duplicate_columns(df_final)
468
 
469
+ # Ensure price columns are numeric
470
  for col in ['min_price', 'max_price', 'modal_price']:
471
  df_final = flatten_column(df_final, col)
472
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
  plots = generate_plots(df_final.copy())
474
  insights = get_ai_insights(df_final.copy(), state, district, language)
475
 
 
477
  if df_final.empty or 'modal_price' not in df_final.columns or df_final['modal_price'].dropna().empty:
478
  return jsonify({'success': False, 'message': 'No valid price data after cleaning.'})
479
 
480
+ cheapest = df_final.nsmallest(5, 'modal_price')[['commodity', 'market', 'modal_price']]
481
+ costliest = df_final.nlargest(5, 'modal_price')[['commodity', 'market', 'modal_price']]
 
 
 
 
 
 
 
 
482
  market_stats = {
483
+ 'total_commodities': int(df_final['commodity'].nunique()),
484
  'avg_modal_price': f"₹{df_final['modal_price'].mean():.2f}",
485
  'price_range': f"₹{df_final['modal_price'].min():.2f} - ₹{df_final['modal_price'].max():.2f}",
486
+ 'total_markets': int(df_final['market'].nunique())
487
  }
488
 
489
  return jsonify({