522H0134-NguyenNhatHuy commited on
Commit
f21dc20
Β·
verified Β·
1 Parent(s): 238ad99

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +659 -659
app.py CHANGED
@@ -1,660 +1,660 @@
1
- import os
2
- import sys
3
- import torch
4
- import pickle
5
- import logging
6
- import tempfile
7
- import requests
8
- import re
9
- import asyncio
10
- import aiohttp
11
- from urllib.parse import quote_plus
12
- from pytube import Search
13
- from PIL import Image
14
- from torchvision import transforms
15
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, AutoModelForCausalLM
16
- import gradio as gr
17
- import pandas as pd
18
- import plotly.express as px
19
- from reportlab.lib.pagesizes import letter
20
- from reportlab.lib import colors
21
- from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image as ReportLabImage
22
- from reportlab.lib.styles import getSampleStyleSheet
23
- from io import BytesIO
24
- from langchain_huggingface import HuggingFacePipeline
25
- from langchain_core.runnables.history import RunnableWithMessageHistory
26
- from langchain_core.chat_history import InMemoryChatMessageHistory
27
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
28
- from pydub import AudioSegment
29
- from pydub.utils import which
30
-
31
- # Local imports (assumed to be available)
32
- from args import get_parser
33
- from model import get_model
34
- from output_utils import prepare_output
35
-
36
- # ============== DEVICE CONFIG ==============
37
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
38
- map_loc = None if torch.cuda.is_available() else "cpu"
39
- logging.getLogger("pytube").setLevel(logging.ERROR)
40
-
41
- # ============== LOAD TRANSLATION MODELS ==============
42
- model_envit5_name = "VietAI/envit5-translation"
43
- try:
44
- tokenizer_envit5 = AutoTokenizer.from_pretrained(model_envit5_name)
45
- model_envit5 = AutoModelForSeq2SeqLM.from_pretrained(
46
- model_envit5_name,
47
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
48
- ).to(device)
49
- pipe_envit5 = pipeline(
50
- "text2text-generation",
51
- model=model_envit5,
52
- tokenizer=tokenizer_envit5,
53
- device=0 if torch.cuda.is_available() else -1,
54
- max_new_tokens=512,
55
- do_sample=False
56
- )
57
- except Exception as e:
58
- print(f"Error loading Vietnamese model: {e}")
59
- pipe_envit5 = None
60
-
61
- models = {
62
- "Japanese": {"model_name": "Helsinki-NLP/opus-mt-en-jap"},
63
- "Chinese": {"model_name": "Helsinki-NLP/opus-mt-en-zh"}
64
- }
65
-
66
- for lang in models:
67
- try:
68
- tokenizer = AutoTokenizer.from_pretrained(models[lang]["model_name"])
69
- model = AutoModelForSeq2SeqLM.from_pretrained(
70
- models[lang]["model_name"],
71
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
72
- ).to(device)
73
- models[lang]["pipe"] = pipeline(
74
- "translation",
75
- model=model,
76
- tokenizer=tokenizer,
77
- device=0 if torch.cuda.is_available() else -1,
78
- max_length=512,
79
- batch_size=4 if torch.cuda.is_available() else 1,
80
- truncation=True
81
- )
82
- except Exception as e:
83
- print(f"Error loading {lang} model: {e}")
84
- models[lang]["pipe"] = None
85
-
86
- # ============== LOAD CHATBOT MODEL ==============
87
- chatbot_tokenizer = AutoTokenizer.from_pretrained("bigscience/bloomz-560m")
88
- chatbot_model = AutoModelForCausalLM.from_pretrained(
89
- "bigscience/bloomz-560m",
90
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
91
- ).to(device)
92
-
93
- chatbot_pipeline = pipeline(
94
- "text-generation",
95
- model=chatbot_model,
96
- tokenizer=chatbot_tokenizer,
97
- device=0 if torch.cuda.is_available() else -1,
98
- max_new_tokens=100,
99
- do_sample=True,
100
- temperature=0.6,
101
- top_p=0.9,
102
- pad_token_id=chatbot_tokenizer.eos_token_id,
103
- batch_size=1
104
- )
105
- llm = HuggingFacePipeline(pipeline=chatbot_pipeline)
106
-
107
- # LangChain Chatbot Setup
108
- prompt = ChatPromptTemplate.from_template("""
109
- You are a professional culinary assistant. You will answer the user's question directly based on the provided recipe.
110
- Do not repeat the recipe or question in your answer. Be concise.
111
-
112
- Dish: {title}
113
- Ingredients: {ingredients}
114
- Instructions: {instructions}
115
-
116
- User Question: {question}
117
- Answer:
118
- """)
119
-
120
-
121
- chain = prompt | llm
122
- chat_histories = {}
123
-
124
- def get_session_history(session_id):
125
- if session_id not in chat_histories:
126
- chat_histories[session_id] = InMemoryChatMessageHistory()
127
- return chat_histories[session_id]
128
-
129
- chatbot_chain = RunnableWithMessageHistory(
130
- chain,
131
- get_session_history,
132
- input_messages_key="question",
133
- history_messages_key="history"
134
- )
135
-
136
- # ============== GLOBAL STATE ==============
137
- current_recipe_context = {"context": "", "title": "", "ingredients": [], "instructions": [], "image": None}
138
-
139
- # ============== RECIPE FORMAT & TRANSLATE ==============
140
- def format_recipe(title, ingredients, instructions, lang):
141
- emoji = {"title": "🍽️", "ingredients": "πŸ§‚", "instructions": "πŸ“–"}
142
- titles = {
143
- "en": {"ingredients": "Ingredients", "instructions": "Instructions"},
144
- "ja": {"ingredients": "Ingredients (材料)", "instructions": "Instructions (δ½œγ‚Šζ–Ή)"},
145
- "zh": {"ingredients": "Ingredients (食材)", "instructions": "Instructions (ζ­₯ιͺ€)"},
146
- "vi": {"ingredients": "Ingredients (NguyΓͺn liệu)", "instructions": "Instructions (CΓ‘ch lΓ m)"},
147
- }
148
-
149
- code_mapping = {
150
- "English (original)": "en",
151
- "Japanese": "ja",
152
- "Chinese": "zh",
153
- "Vietnamese": "vi",
154
- }
155
- code = code_mapping.get(lang, "en")
156
-
157
- result = [f"### {emoji['title']} {title}", f"**{emoji['ingredients']} {titles[code]['ingredients']}:**"]
158
- result.extend([f"- {i}" for i in ingredients])
159
- result.append(f"\n**{emoji['instructions']} {titles[code]['instructions']}:**")
160
- result.extend([f"{i+1}. {step}" for i, step in enumerate(instructions)])
161
- return "\n".join(result)
162
-
163
- def translate_section(text, lang):
164
- if lang == "English (original)":
165
- return text
166
-
167
- if lang == "Vietnamese":
168
- if pipe_envit5 is None:
169
- return f"❗ Vietnamese translation model not available"
170
- try:
171
- max_chunk_length = 400
172
- if len(text) > max_chunk_length:
173
- sentences = text.split('. ')
174
- chunks = []
175
- current_chunk = ""
176
- for sentence in sentences:
177
- if len(current_chunk) + len(sentence) < max_chunk_length:
178
- current_chunk += sentence + ". "
179
- else:
180
- chunks.append(current_chunk)
181
- current_chunk = sentence + ". "
182
- if current_chunk:
183
- chunks.append(current_chunk)
184
- else:
185
- chunks = [text]
186
-
187
- translated_chunks = []
188
- for chunk in chunks:
189
- chunk = f"en-vi: {chunk}"
190
- translated = pipe_envit5(chunk, max_new_tokens=512)[0]["generated_text"]
191
- translated = translated.replace("vi: vi: ", "").replace("vi: Vi: ", "").replace("vi: ", "").strip()
192
- translated_chunks.append(translated)
193
-
194
- return " ".join(translated_chunks)
195
- except Exception as e:
196
- print(f"Vietnamese translation error: {e}")
197
- return text
198
-
199
- if models.get(lang, {}).get("pipe") is None:
200
- return f"❗ Translation model for {lang} not available"
201
-
202
- try:
203
- max_chunk_length = 400
204
- if len(text) > max_chunk_length:
205
- sentences = text.split('. ')
206
- chunks = []
207
- current_chunk = ""
208
- for sentence in sentences:
209
- if len(current_chunk) + len(sentence) < max_chunk_length:
210
- current_chunk += sentence + ". "
211
- else:
212
- chunks.append(current_chunk)
213
- current_chunk = sentence + ". "
214
- if current_chunk:
215
- chunks.append(current_chunk)
216
- else:
217
- chunks = [text]
218
-
219
- translated_chunks = []
220
- for chunk in chunks:
221
- translated = models[lang]["pipe"](chunk, max_length=512)[0]["translation_text"]
222
- translated_chunks.append(translated)
223
-
224
- return " ".join(translated_chunks)
225
- except Exception as e:
226
- print(f"Translation error ({lang}): {e}")
227
- return text
228
-
229
- def translate_recipe(lang):
230
- if not current_recipe_context["title"]:
231
- return "❗ Please generate a recipe from an image first."
232
- title = translate_section(current_recipe_context["title"], lang)
233
- ingrs = [translate_section(i, lang) for i in current_recipe_context["ingredients"]]
234
- instrs = [translate_section(s, lang) for s in current_recipe_context["instructions"]]
235
- return format_recipe(title, ingrs, instrs, lang)
236
-
237
- # ============== NUTRITION ANALYSIS ==============
238
- def nutrition_analysis(ingredient_input):
239
- ingredients = " ".join(ingredient_input.strip().split())
240
- api_url = f'https://api.api-ninjas.com/v1/nutrition?query={ingredients}'
241
- headers = {'X-Api-Key': 'AHVy+tpkUoueBNdaFs9nCg==sFZTMRn8ikZVzx6E'}
242
- response = requests.get(api_url, headers=headers)
243
- if response.status_code != 200:
244
- return "❌ API error or quota exceeded.", None, None, None
245
- data = response.json()
246
- df = pd.DataFrame(data)
247
- numeric_cols = []
248
- for col in df.columns:
249
- if col == "name":
250
- continue
251
- df[col] = pd.to_numeric(df[col], errors="coerce")
252
- if df[col].notna().sum() > 0:
253
- numeric_cols.append(col)
254
- if df.empty or len(numeric_cols) < 3:
255
- return "⚠️ Insufficient numerical data for charts (need at least 3 metrics).", None, None, None
256
- draw_cols = numeric_cols[:3]
257
- fig_bar = px.bar(df, x="name", y=draw_cols[0], title=f"Bar Chart: {draw_cols[0]}", text_auto=True)
258
- pie_data = df[[draw_cols[1], "name"]].dropna()
259
- if pie_data[draw_cols[1]].sum() > 0:
260
- fig_pie = px.pie(pie_data, names="name", values=draw_cols[1], title=f"Pie Chart: {draw_cols[1]}")
261
- else:
262
- fig_pie = px.bar(title="⚠️ Insufficient data for pie chart")
263
- fig_line = px.line(df, x="name", y=draw_cols[2], markers=True, title=f"Line Chart: {draw_cols[2]}")
264
- return "βœ… Analysis successful!", fig_bar, fig_pie, fig_line
265
-
266
- def load_recipe_ingredients():
267
- if not current_recipe_context["ingredients"]:
268
- return "⚠️ No ingredients available. Generate a recipe first."
269
- return "\n".join(current_recipe_context["ingredients"])
270
-
271
- # ============== CHATBOT ==============
272
- def clean_response(response):
273
- # Remove everything before "Answer:" if present
274
- if "Answer:" in response:
275
- response = response.split("Answer:")[-1]
276
-
277
- # Remove potential repetitions of Dish, Ingredients, Instructions
278
- response = re.sub(r"Dish:.*?(Ingredients:|Instructions:).*?", "", response, flags=re.DOTALL)
279
- response = re.sub(r"Ingredients:.*?(Instructions:).*?", "", response, flags=re.DOTALL)
280
- response = re.sub(r"Instructions:.*", "", response, flags=re.DOTALL)
281
-
282
- # Remove redundant system info
283
- response = re.sub(r"You are a professional culinary assistant.*?Answer:", "", response, flags=re.DOTALL)
284
-
285
- # Remove duplicate user question inside response (very common in these LLM outputs)
286
- response = re.sub(r"User Question:.*", "", response, flags=re.DOTALL)
287
-
288
- # Final strip + cleanup
289
- return response.strip()
290
-
291
-
292
- def validate_cooking_time(question, instructions):
293
- # Extract cooking times from instructions
294
- time_pattern = r"(\d+)\s*(minutes|minute)"
295
- total_time = 0
296
- for instr in instructions:
297
- matches = re.findall(time_pattern, instr)
298
- for match in matches:
299
- total_time += int(match[0])
300
-
301
- # Check if user question contains a time
302
- user_time = re.search(time_pattern, question)
303
- if user_time:
304
- user_minutes = int(user_time.group(1))
305
- if user_minutes != total_time:
306
- return f"The recipe takes about {total_time} minutes to cook, not {user_minutes} minutes."
307
- return None
308
-
309
- def generate_chat_response(message, session_id="default"):
310
- if not current_recipe_context["title"]:
311
- return "Please generate a recipe from an image before asking about the dish."
312
-
313
- # Validate cooking time if relevant
314
- correction = validate_cooking_time(message, current_recipe_context["instructions"])
315
-
316
- response = chatbot_chain.invoke(
317
- {
318
- "title": current_recipe_context["title"],
319
- "ingredients": ", ".join(current_recipe_context["ingredients"]),
320
- "instructions": " ".join(current_recipe_context["instructions"]),
321
- "question": message
322
- },
323
- config={"configurable": {"session_id": session_id}}
324
- )
325
-
326
- response = clean_response(response)
327
- if correction:
328
- response = f"{correction} {response}"
329
-
330
- return response.strip()
331
-
332
-
333
- def chat_with_bot(message, chat_history, session_id="default"):
334
- if not message.strip():
335
- return "", chat_history
336
- response = generate_chat_response(message, session_id)
337
- chat_history.append({"role": "user", "content": message})
338
- chat_history.append({"role": "assistant", "content": response})
339
- return "", chat_history
340
-
341
- # ============== IMAGE TO RECIPE ==============
342
- with open("ingr_vocab.pkl", 'rb') as f:
343
- ingrs_vocab = pickle.load(f)
344
- with open("instr_vocab.pkl", 'rb') as f:
345
- vocab = pickle.load(f)
346
-
347
- args = get_parser()
348
- args.maxseqlen = 15
349
- args.ingrs_only = False
350
- model_ic = get_model(args, len(ingrs_vocab), len(vocab))
351
- model_ic.load_state_dict(torch.load("modelbest.ckpt", map_location=map_loc, weights_only=True))
352
- model_ic.to(device).eval()
353
-
354
- transform = transforms.Compose([
355
- transforms.Resize(256),
356
- transforms.CenterCrop(224),
357
- transforms.ToTensor(),
358
- transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
359
- ])
360
-
361
- def generate_recipe(image):
362
- if image is None:
363
- return "❗ Please upload an image."
364
- current_recipe_context["image"] = image
365
- image = transform(image.convert("RGB")).unsqueeze(0).to(device)
366
- with torch.no_grad():
367
- outputs = model_ic.sample(image, greedy=True, temperature=1.0, beam=-1, true_ingrs=None)
368
- ids = (outputs['ingr_ids'].cpu().numpy(), outputs['recipe_ids'].cpu().numpy())
369
- outs, valid = prepare_output(ids[1][0], ids[0][0], ingrs_vocab, vocab)
370
- if not valid['is_valid']:
371
- return f"❌ Invalid recipe: {valid['reason']}"
372
- current_recipe_context.update({
373
- "title": outs['title'],
374
- "ingredients": outs['ingrs'],
375
- "instructions": outs['recipe']
376
- })
377
- return format_recipe(outs['title'], outs['ingrs'], outs['recipe'], "English (original)")
378
-
379
- # ============== GOOGLE TTS ==============
380
- languages_tts = {
381
- "English": "en",
382
- "Chinese": "zh-CN",
383
- "Japanese": "ja",
384
- "Vietnamese": "vi",
385
- }
386
-
387
- async def fetch_tts_audio_async(session, chunk, lang_code):
388
- url = f"https://translate.google.com/translate_tts?ie=UTF-8&q={quote_plus(chunk)}&tl={lang_code}&client=tw-ob"
389
- headers = {
390
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
391
- "Referer": "https://translate.google.com/",
392
- }
393
- try:
394
- async with session.get(url, headers=headers, timeout=10) as response:
395
- response.raise_for_status()
396
- return await response.read()
397
- except Exception as e:
398
- print(f"TTS Error for chunk: {e}")
399
- return None
400
-
401
- async def fetch_all_tts_audio(chunks, lang_code):
402
- async with aiohttp.ClientSession() as session:
403
- tasks = [fetch_tts_audio_async(session, chunk, lang_code) for chunk in chunks]
404
- return await asyncio.gather(*tasks)
405
-
406
- def google_tts(text, lang):
407
- if not text or text.startswith("❗"):
408
- return None, gr.update(visible=False)
409
-
410
- # Clean text for TTS
411
- clean_text = text.replace("**", "").replace("###", "").replace("- ", "")
412
- for emoji in ["🍽️", "πŸ§‚", "πŸ“–"]:
413
- clean_text = clean_text.replace(emoji, "")
414
-
415
- # Split into chunks (Google TTS max ~200 chars)
416
- max_chunk_length = 200
417
- chunks = [clean_text[i:i+max_chunk_length] for i in range(0, len(clean_text), max_chunk_length)]
418
- if not chunks:
419
- return None, gr.update(visible=False)
420
-
421
- # Fetch audio chunks asynchronously
422
- lang_code = languages_tts.get(lang, "en")
423
- audio_contents = asyncio.run(fetch_all_tts_audio(chunks, lang_code))
424
-
425
- # Filter out failed requests
426
- audio_files = []
427
- for i, content in enumerate(audio_contents):
428
- if content:
429
- with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
430
- f.write(content)
431
- audio_files.append(f.name)
432
-
433
- if not audio_files:
434
- return None, gr.update(visible=False)
435
-
436
- # Combine audio if FFmpeg is available
437
- if len(audio_files) == 1:
438
- return audio_files[0], gr.update(visible=True)
439
-
440
- if which("ffmpeg"):
441
- try:
442
- combined = AudioSegment.empty()
443
- for file in audio_files:
444
- combined += AudioSegment.from_mp3(file)
445
- output_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
446
- combined.export(output_file, format="mp3")
447
- for file in audio_files:
448
- os.unlink(file)
449
- return output_file, gr.update(visible=True)
450
- except Exception as e:
451
- print(f"Error combining audio files: {e}")
452
- # Fallback to first chunk
453
- for i in range(1, len(audio_files)):
454
- os.unlink(audio_files[i])
455
- return audio_files[0], gr.update(visible=True)
456
- else:
457
- print("FFmpeg not found, returning first audio chunk.")
458
- for i in range(1, len(audio_files)):
459
- os.unlink(audio_files[i])
460
- return audio_files[0], gr.update(visible=True)
461
-
462
- # ============== VIDEO SEARCH ==============
463
- def search_top_3_videos(keyword):
464
- if not keyword.strip():
465
- return ["", "", ""] * 3
466
- try:
467
- search = Search(f"How to make {keyword}")
468
- results = search.results[:3]
469
- embeds, titles, urls = [], [], []
470
- for video in results:
471
- embed_html = f'''
472
- <iframe width="520" height="320"
473
- src="https://www.youtube.com/embed/{video.video_id}"
474
- frameborder="0" allowfullscreen></iframe>
475
- '''
476
- embeds.append(embed_html)
477
- titles.append(video.title)
478
- urls.append(f"https://www.youtube.com/watch?v={video.video_id}")
479
- while len(embeds) < 3:
480
- embeds.append("No video found")
481
- titles.append("")
482
- urls.append("")
483
- return embeds + titles + urls
484
- except Exception as e:
485
- print(f"Video search error: {e}")
486
- return ["", "", ""] * 3
487
-
488
- # ============== RESTAURANT SEARCH ==============
489
- def get_google_maps_search_url(dish_name, city="Ho Chi Minh City"):
490
- query = f"{dish_name} in {city}"
491
- url = f"https://www.google.com/maps/search/{query.replace(' ', '+')}"
492
- return url
493
-
494
- def search_and_show_link(dish):
495
- if not dish.strip():
496
- return "Go to Google Maps", gr.update(visible=False)
497
- url = get_google_maps_search_url(dish)
498
- return url, gr.update(visible=True)
499
-
500
- # ============== PDF GENERATION ==============
501
- def generate_pdf_recipe():
502
- if not current_recipe_context["title"]:
503
- return None, "❗ Please generate a recipe from an image first."
504
-
505
- output_file = "recipe.pdf"
506
- doc = SimpleDocTemplate(output_file, pagesize=letter)
507
- styles = getSampleStyleSheet()
508
- story = []
509
-
510
- if current_recipe_context["image"]:
511
- try:
512
- img_buffer = BytesIO()
513
- current_recipe_context["image"].save(img_buffer, format="PNG")
514
- img_buffer.seek(0)
515
- img = ReportLabImage(img_buffer, width=200, height=200)
516
- story.append(img)
517
- story.append(Spacer(1, 12))
518
- except Exception as e:
519
- print(f"Error adding image to PDF: {e}")
520
-
521
- story.append(Paragraph(current_recipe_context["title"], styles['Title']))
522
- story.append(Spacer(1, 12))
523
- story.append(Paragraph("Ingredients:", styles['Heading2']))
524
- for ingr in current_recipe_context["ingredients"]:
525
- story.append(Paragraph(f"- {ingr}", styles['Normal']))
526
- story.append(Spacer(1, 12))
527
- story.append(Paragraph("Instructions:", styles['Heading2']))
528
- for i, instr in enumerate(current_recipe_context["instructions"], 1):
529
- story.append(Paragraph(f"{i}. {instr}", styles['Normal']))
530
-
531
- doc.build(story)
532
- return output_file, "βœ… Recipe saved as recipe.pdf"
533
-
534
- # ============== GRADIO UI ==============
535
- with gr.Blocks(theme=gr.themes.Soft(), title="AI Recipe Generator") as demo:
536
- gr.Markdown("""
537
- # 🍳 AI Recipe Generator & Multilingual Cooking Assistant
538
- Generate recipes from images, translate to multiple languages, get cooking videos, chat with a culinary assistant, analyze nutrition, and find restaurants!
539
- """)
540
-
541
- with gr.Tab("πŸ“· Generate Recipe"):
542
- with gr.Row():
543
- with gr.Column():
544
- image_input = gr.Image(type="pil", label="Upload Dish Image", height=300)
545
- gen_btn = gr.Button("Generate Recipe", variant="primary", elem_id="action-btn")
546
- save_pdf_btn = gr.Button("Save as PDF", variant="secondary", elem_id="action-btn")
547
- pdf_output = gr.File(label="Download Recipe PDF", interactive=False)
548
- recipe_output = gr.Markdown("### Your recipe will appear here", elem_classes="recipe-box")
549
- gen_btn.click(generate_recipe, inputs=image_input, outputs=recipe_output)
550
- save_pdf_btn.click(fn=generate_pdf_recipe, outputs=[pdf_output, recipe_output])
551
-
552
- with gr.Tab("🌍 Translate & TTS"):
553
- with gr.Row():
554
- with gr.Column():
555
- lang_dropdown = gr.Dropdown(
556
- choices=["English (original)", "Japanese", "Chinese", "Vietnamese"],
557
- value="Japanese",
558
- label="Select Language"
559
- )
560
- with gr.Row():
561
- trans_btn = gr.Button("Translate Recipe", variant="primary", elem_id="action-btn")
562
- tts_btn = gr.Button("πŸ”ˆ Listen to Recipe", variant="secondary", elem_id="action-btn")
563
- with gr.Column():
564
- translation_output = gr.Markdown("### Translated recipe will appear here", elem_classes="recipe-box")
565
- tts_audio = gr.Audio(interactive=False, label="Audio Output", visible=False)
566
- trans_btn.click(fn=translate_recipe, inputs=lang_dropdown, outputs=translation_output)
567
- tts_btn.click(fn=google_tts, inputs=[translation_output, lang_dropdown], outputs=[tts_audio, tts_audio])
568
-
569
- with gr.Tab("πŸŽ₯ Cooking Videos"):
570
- with gr.Row():
571
- with gr.Column():
572
- video_keyword = gr.Textbox(label="Search Cooking Videos", placeholder="e.g. beef pho")
573
- search_btn = gr.Button("Search Videos", variant="primary", elem_id="action-btn")
574
- with gr.Column():
575
- video_embeds, video_titles, video_urls = [], [], []
576
- for i in range(3):
577
- with gr.Column():
578
- video_embeds.append(gr.HTML(label=f"🎬 Video {i+1}"))
579
- video_titles.append(gr.Textbox(label=f"πŸ“Œ Title {i+1}", interactive=False))
580
- video_urls.append(gr.Textbox(label=f"πŸ”— URL {i+1}", interactive=False, visible=False))
581
- search_btn.click(fn=search_top_3_videos, inputs=video_keyword, outputs=video_embeds + video_titles + video_urls)
582
-
583
- with gr.Tab("πŸ’¬ Culinary Chatbot"):
584
- chatbot = gr.Chatbot(height=400, type="messages")
585
- with gr.Row():
586
- chat_input = gr.Textbox(placeholder="Ask about the dish...", scale=4)
587
- chat_btn = gr.Button("Send", variant="primary", scale=1, elem_id="action-btn")
588
- chat_btn.click(chat_with_bot, inputs=[chat_input, chatbot], outputs=[chat_input, chatbot])
589
- chat_input.submit(chat_with_bot, inputs=[chat_input, chatbot], outputs=[chat_input, chatbot])
590
-
591
- with gr.Tab("πŸ₯— Nutrition Analysis"):
592
- with gr.Row():
593
- with gr.Column():
594
- ingredient_input = gr.Textbox(
595
- label="🧾 Enter Ingredients (one per line or space-separated)",
596
- lines=10,
597
- placeholder="cheese\npepper\negg\n..."
598
- )
599
- with gr.Row():
600
- load_ingredients_btn = gr.Button("Load Recipe Ingredients", variant="secondary", elem_id="action-btn")
601
- analyze_btn = gr.Button("Analyze Nutrition", variant="primary", elem_id="action-btn")
602
- with gr.Column():
603
- nutrition_message = gr.Textbox(label="πŸ”” Message", interactive=False)
604
- bar_chart = gr.Plot(label="πŸ“Š Bar Chart")
605
- pie_chart = gr.Plot(label="πŸ₯§ Pie Chart")
606
- line_chart = gr.Plot(label="πŸ“ˆ Line Chart")
607
- load_ingredients_btn.click(fn=load_recipe_ingredients, outputs=ingredient_input)
608
- analyze_btn.click(
609
- fn=nutrition_analysis,
610
- inputs=ingredient_input,
611
- outputs=[nutrition_message, bar_chart, pie_chart, line_chart]
612
- )
613
-
614
- with gr.Tab("🍽️ Find Restaurants"):
615
- with gr.Row():
616
- with gr.Column():
617
- dish_input = gr.Textbox(label="Enter Dish Name", placeholder="e.g. beef pho", interactive=True)
618
- search_restaurant_btn = gr.Button("Find Restaurants", variant="primary", elem_id="action-btn")
619
- open_maps_btn = gr.Button("Go to Google Maps", visible=True, variant="secondary", elem_id="open-maps-btn")
620
- search_restaurant_btn.click(fn=search_and_show_link, inputs=dish_input, outputs=[open_maps_btn, open_maps_btn])
621
- open_maps_btn.click(
622
- fn=lambda url: url,
623
- inputs=open_maps_btn,
624
- outputs=None,
625
- js="(url) => { if(url) window.open(url, '_blank'); }"
626
- )
627
-
628
- demo.css = """
629
- .recipe-box {
630
- padding: 20px;
631
- border-radius: 10px;
632
- background: #f9f9f9;
633
- border: 1px solid #e0e0e0;
634
- }
635
- .dark .recipe-box {
636
- background: #2a2a2a;
637
- border-color: #444;
638
- }
639
- .gr-box {
640
- margin-bottom: 20px;
641
- }
642
- #action-btn {
643
- max-width: 220px;
644
- margin: 10px auto;
645
- font-weight: 600;
646
- font-size: 16px;
647
- border-radius: 8px;
648
- }
649
- #open-maps-btn {
650
- max-width: 220px;
651
- margin: 10px auto;
652
- font-weight: 600;
653
- font-size: 16px;
654
- border-radius: 8px;
655
- }
656
- """
657
-
658
- if __name__ == "__main__":
659
- demo.launch(pwa=True)
660
 
 
1
+ import os
2
+ import sys
3
+ import torch
4
+ import pickle
5
+ import logging
6
+ import tempfile
7
+ import requests
8
+ import re
9
+ import asyncio
10
+ import aiohttp
11
+ from urllib.parse import quote_plus
12
+ from pytube import Search
13
+ from PIL import Image
14
+ from torchvision import transforms
15
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline, AutoModelForCausalLM
16
+ import gradio as gr
17
+ import pandas as pd
18
+ import plotly.express as px
19
+ from reportlab.lib.pagesizes import letter
20
+ from reportlab.lib import colors
21
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image as ReportLabImage
22
+ from reportlab.lib.styles import getSampleStyleSheet
23
+ from io import BytesIO
24
+ from langchain_huggingface import HuggingFacePipeline
25
+ from langchain_core.runnables.history import RunnableWithMessageHistory
26
+ from langchain_core.chat_history import InMemoryChatMessageHistory
27
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
28
+ from pydub import AudioSegment
29
+ from pydub.utils import which
30
+
31
+ # Local imports (assumed to be available)
32
+ from args import get_parser
33
+ from model import get_model
34
+ from output_utils import prepare_output
35
+
36
+ # ============== DEVICE CONFIG ==============
37
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
38
+ map_loc = None if torch.cuda.is_available() else "cpu"
39
+ logging.getLogger("pytube").setLevel(logging.ERROR)
40
+
41
+ # ============== LOAD TRANSLATION MODELS ==============
42
+ model_envit5_name = "VietAI/envit5-translation"
43
+ try:
44
+ tokenizer_envit5 = AutoTokenizer.from_pretrained(model_envit5_name)
45
+ model_envit5 = AutoModelForSeq2SeqLM.from_pretrained(
46
+ model_envit5_name,
47
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
48
+ ).to(device)
49
+ pipe_envit5 = pipeline(
50
+ "text2text-generation",
51
+ model=model_envit5,
52
+ tokenizer=tokenizer_envit5,
53
+ device=0 if torch.cuda.is_available() else -1,
54
+ max_new_tokens=512,
55
+ do_sample=False
56
+ )
57
+ except Exception as e:
58
+ print(f"Error loading Vietnamese model: {e}")
59
+ pipe_envit5 = None
60
+
61
+ models = {
62
+ "Japanese": {"model_name": "Helsinki-NLP/opus-mt-en-jap"},
63
+ "Chinese": {"model_name": "Helsinki-NLP/opus-mt-en-zh"}
64
+ }
65
+
66
+ for lang in models:
67
+ try:
68
+ tokenizer = AutoTokenizer.from_pretrained(models[lang]["model_name"])
69
+ model = AutoModelForSeq2SeqLM.from_pretrained(
70
+ models[lang]["model_name"],
71
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
72
+ ).to(device)
73
+ models[lang]["pipe"] = pipeline(
74
+ "translation",
75
+ model=model,
76
+ tokenizer=tokenizer,
77
+ device=0 if torch.cuda.is_available() else -1,
78
+ max_length=512,
79
+ batch_size=4 if torch.cuda.is_available() else 1,
80
+ truncation=True
81
+ )
82
+ except Exception as e:
83
+ print(f"Error loading {lang} model: {e}")
84
+ models[lang]["pipe"] = None
85
+
86
+ # ============== LOAD CHATBOT MODEL ==============
87
+ chatbot_tokenizer = AutoTokenizer.from_pretrained("bigscience/bloomz-560m")
88
+ chatbot_model = AutoModelForCausalLM.from_pretrained(
89
+ "bigscience/bloomz-560m",
90
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
91
+ ).to(device)
92
+
93
+ chatbot_pipeline = pipeline(
94
+ "text-generation",
95
+ model=chatbot_model,
96
+ tokenizer=chatbot_tokenizer,
97
+ device=0 if torch.cuda.is_available() else -1,
98
+ max_new_tokens=100,
99
+ do_sample=True,
100
+ temperature=0.6,
101
+ top_p=0.9,
102
+ pad_token_id=chatbot_tokenizer.eos_token_id,
103
+ batch_size=1
104
+ )
105
+ llm = HuggingFacePipeline(pipeline=chatbot_pipeline)
106
+
107
+ # LangChain Chatbot Setup
108
+ prompt = ChatPromptTemplate.from_template("""
109
+ You are a professional culinary assistant. You will answer the user's question directly based on the provided recipe.
110
+ Do not repeat the recipe or question in your answer. Be concise.
111
+
112
+ Dish: {title}
113
+ Ingredients: {ingredients}
114
+ Instructions: {instructions}
115
+
116
+ User Question: {question}
117
+ Answer:
118
+ """)
119
+
120
+
121
+ chain = prompt | llm
122
+ chat_histories = {}
123
+
124
+ def get_session_history(session_id):
125
+ if session_id not in chat_histories:
126
+ chat_histories[session_id] = InMemoryChatMessageHistory()
127
+ return chat_histories[session_id]
128
+
129
+ chatbot_chain = RunnableWithMessageHistory(
130
+ chain,
131
+ get_session_history,
132
+ input_messages_key="question",
133
+ history_messages_key="history"
134
+ )
135
+
136
+ # ============== GLOBAL STATE ==============
137
+ current_recipe_context = {"context": "", "title": "", "ingredients": [], "instructions": [], "image": None}
138
+
139
+ # ============== RECIPE FORMAT & TRANSLATE ==============
140
+ def format_recipe(title, ingredients, instructions, lang):
141
+ emoji = {"title": "🍽️", "ingredients": "πŸ§‚", "instructions": "πŸ“–"}
142
+ titles = {
143
+ "en": {"ingredients": "Ingredients", "instructions": "Instructions"},
144
+ "ja": {"ingredients": "Ingredients (材料)", "instructions": "Instructions (δ½œγ‚Šζ–Ή)"},
145
+ "zh": {"ingredients": "Ingredients (食材)", "instructions": "Instructions (ζ­₯ιͺ€)"},
146
+ "vi": {"ingredients": "Ingredients (NguyΓͺn liệu)", "instructions": "Instructions (CΓ‘ch lΓ m)"},
147
+ }
148
+
149
+ code_mapping = {
150
+ "English (original)": "en",
151
+ "Japanese": "ja",
152
+ "Chinese": "zh",
153
+ "Vietnamese": "vi",
154
+ }
155
+ code = code_mapping.get(lang, "en")
156
+
157
+ result = [f"### {emoji['title']} {title}", f"**{emoji['ingredients']} {titles[code]['ingredients']}:**"]
158
+ result.extend([f"- {i}" for i in ingredients])
159
+ result.append(f"\n**{emoji['instructions']} {titles[code]['instructions']}:**")
160
+ result.extend([f"{i+1}. {step}" for i, step in enumerate(instructions)])
161
+ return "\n".join(result)
162
+
163
+ def translate_section(text, lang):
164
+ if lang == "English (original)":
165
+ return text
166
+
167
+ if lang == "Vietnamese":
168
+ if pipe_envit5 is None:
169
+ return f"❗ Vietnamese translation model not available"
170
+ try:
171
+ max_chunk_length = 400
172
+ if len(text) > max_chunk_length:
173
+ sentences = text.split('. ')
174
+ chunks = []
175
+ current_chunk = ""
176
+ for sentence in sentences:
177
+ if len(current_chunk) + len(sentence) < max_chunk_length:
178
+ current_chunk += sentence + ". "
179
+ else:
180
+ chunks.append(current_chunk)
181
+ current_chunk = sentence + ". "
182
+ if current_chunk:
183
+ chunks.append(current_chunk)
184
+ else:
185
+ chunks = [text]
186
+
187
+ translated_chunks = []
188
+ for chunk in chunks:
189
+ chunk = f"en-vi: {chunk}"
190
+ translated = pipe_envit5(chunk, max_new_tokens=512)[0]["generated_text"]
191
+ translated = translated.replace("vi: vi: ", "").replace("vi: Vi: ", "").replace("vi: ", "").strip()
192
+ translated_chunks.append(translated)
193
+
194
+ return " ".join(translated_chunks)
195
+ except Exception as e:
196
+ print(f"Vietnamese translation error: {e}")
197
+ return text
198
+
199
+ if models.get(lang, {}).get("pipe") is None:
200
+ return f"❗ Translation model for {lang} not available"
201
+
202
+ try:
203
+ max_chunk_length = 400
204
+ if len(text) > max_chunk_length:
205
+ sentences = text.split('. ')
206
+ chunks = []
207
+ current_chunk = ""
208
+ for sentence in sentences:
209
+ if len(current_chunk) + len(sentence) < max_chunk_length:
210
+ current_chunk += sentence + ". "
211
+ else:
212
+ chunks.append(current_chunk)
213
+ current_chunk = sentence + ". "
214
+ if current_chunk:
215
+ chunks.append(current_chunk)
216
+ else:
217
+ chunks = [text]
218
+
219
+ translated_chunks = []
220
+ for chunk in chunks:
221
+ translated = models[lang]["pipe"](chunk, max_length=512)[0]["translation_text"]
222
+ translated_chunks.append(translated)
223
+
224
+ return " ".join(translated_chunks)
225
+ except Exception as e:
226
+ print(f"Translation error ({lang}): {e}")
227
+ return text
228
+
229
+ def translate_recipe(lang):
230
+ if not current_recipe_context["title"]:
231
+ return "❗ Please generate a recipe from an image first."
232
+ title = translate_section(current_recipe_context["title"], lang)
233
+ ingrs = [translate_section(i, lang) for i in current_recipe_context["ingredients"]]
234
+ instrs = [translate_section(s, lang) for s in current_recipe_context["instructions"]]
235
+ return format_recipe(title, ingrs, instrs, lang)
236
+
237
+ # ============== NUTRITION ANALYSIS ==============
238
+ def nutrition_analysis(ingredient_input):
239
+ ingredients = " ".join(ingredient_input.strip().split())
240
+ api_url = f'https://api.api-ninjas.com/v1/nutrition?query={ingredients}'
241
+ headers = {'X-Api-Key': 'AHVy+tpkUoueBNdaFs9nCg==sFZTMRn8ikZVzx6E'}
242
+ response = requests.get(api_url, headers=headers)
243
+ if response.status_code != 200:
244
+ return "❌ API error or quota exceeded.", None, None, None
245
+ data = response.json()
246
+ df = pd.DataFrame(data)
247
+ numeric_cols = []
248
+ for col in df.columns:
249
+ if col == "name":
250
+ continue
251
+ df[col] = pd.to_numeric(df[col], errors="coerce")
252
+ if df[col].notna().sum() > 0:
253
+ numeric_cols.append(col)
254
+ if df.empty or len(numeric_cols) < 3:
255
+ return "⚠️ Insufficient numerical data for charts (need at least 3 metrics).", None, None, None
256
+ draw_cols = numeric_cols[:3]
257
+ fig_bar = px.bar(df, x="name", y=draw_cols[0], title=f"Bar Chart: {draw_cols[0]}", text_auto=True)
258
+ pie_data = df[[draw_cols[1], "name"]].dropna()
259
+ if pie_data[draw_cols[1]].sum() > 0:
260
+ fig_pie = px.pie(pie_data, names="name", values=draw_cols[1], title=f"Pie Chart: {draw_cols[1]}")
261
+ else:
262
+ fig_pie = px.bar(title="⚠️ Insufficient data for pie chart")
263
+ fig_line = px.line(df, x="name", y=draw_cols[2], markers=True, title=f"Line Chart: {draw_cols[2]}")
264
+ return "βœ… Analysis successful!", fig_bar, fig_pie, fig_line
265
+
266
+ def load_recipe_ingredients():
267
+ if not current_recipe_context["ingredients"]:
268
+ return "⚠️ No ingredients available. Generate a recipe first."
269
+ return "\n".join(current_recipe_context["ingredients"])
270
+
271
+ # ============== CHATBOT ==============
272
+ def clean_response(response):
273
+ # Remove everything before "Answer:" if present
274
+ if "Answer:" in response:
275
+ response = response.split("Answer:")[-1]
276
+
277
+ # Remove potential repetitions of Dish, Ingredients, Instructions
278
+ response = re.sub(r"Dish:.*?(Ingredients:|Instructions:).*?", "", response, flags=re.DOTALL)
279
+ response = re.sub(r"Ingredients:.*?(Instructions:).*?", "", response, flags=re.DOTALL)
280
+ response = re.sub(r"Instructions:.*", "", response, flags=re.DOTALL)
281
+
282
+ # Remove redundant system info
283
+ response = re.sub(r"You are a professional culinary assistant.*?Answer:", "", response, flags=re.DOTALL)
284
+
285
+ # Remove duplicate user question inside response (very common in these LLM outputs)
286
+ response = re.sub(r"User Question:.*", "", response, flags=re.DOTALL)
287
+
288
+ # Final strip + cleanup
289
+ return response.strip()
290
+
291
+
292
+ def validate_cooking_time(question, instructions):
293
+ # Extract cooking times from instructions
294
+ time_pattern = r"(\d+)\s*(minutes|minute)"
295
+ total_time = 0
296
+ for instr in instructions:
297
+ matches = re.findall(time_pattern, instr)
298
+ for match in matches:
299
+ total_time += int(match[0])
300
+
301
+ # Check if user question contains a time
302
+ user_time = re.search(time_pattern, question)
303
+ if user_time:
304
+ user_minutes = int(user_time.group(1))
305
+ if user_minutes != total_time:
306
+ return f"The recipe takes about {total_time} minutes to cook, not {user_minutes} minutes."
307
+ return None
308
+
309
+ def generate_chat_response(message, session_id="default"):
310
+ if not current_recipe_context["title"]:
311
+ return "Please generate a recipe from an image before asking about the dish."
312
+
313
+ # Validate cooking time if relevant
314
+ correction = validate_cooking_time(message, current_recipe_context["instructions"])
315
+
316
+ response = chatbot_chain.invoke(
317
+ {
318
+ "title": current_recipe_context["title"],
319
+ "ingredients": ", ".join(current_recipe_context["ingredients"]),
320
+ "instructions": " ".join(current_recipe_context["instructions"]),
321
+ "question": message
322
+ },
323
+ config={"configurable": {"session_id": session_id}}
324
+ )
325
+
326
+ response = clean_response(response)
327
+ if correction:
328
+ response = f"{correction} {response}"
329
+
330
+ return response.strip()
331
+
332
+
333
+ def chat_with_bot(message, chat_history, session_id="default"):
334
+ if not message.strip():
335
+ return "", chat_history
336
+ response = generate_chat_response(message, session_id)
337
+ chat_history.append({"role": "user", "content": message})
338
+ chat_history.append({"role": "assistant", "content": response})
339
+ return "", chat_history
340
+
341
+ # ============== IMAGE TO RECIPE ==============
342
+ with open("ingr_vocab.pkl", 'rb') as f:
343
+ ingrs_vocab = pickle.load(f)
344
+ with open("instr_vocab.pkl", 'rb') as f:
345
+ vocab = pickle.load(f)
346
+
347
+ args = get_parser()
348
+ args.maxseqlen = 15
349
+ args.ingrs_only = False
350
+ model_ic = get_model(args, len(ingrs_vocab), len(vocab))
351
+ model_ic.load_state_dict(torch.load("modelbest.ckpt", map_location=map_loc, weights_only=True))
352
+ model_ic.to(device).eval()
353
+
354
+ transform = transforms.Compose([
355
+ transforms.Resize(256),
356
+ transforms.CenterCrop(224),
357
+ transforms.ToTensor(),
358
+ transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
359
+ ])
360
+
361
+ def generate_recipe(image):
362
+ if image is None:
363
+ return "❗ Please upload an image."
364
+ current_recipe_context["image"] = image
365
+ image = transform(image.convert("RGB")).unsqueeze(0).to(device)
366
+ with torch.no_grad():
367
+ outputs = model_ic.sample(image, greedy=True, temperature=1.0, beam=-1, true_ingrs=None)
368
+ ids = (outputs['ingr_ids'].cpu().numpy(), outputs['recipe_ids'].cpu().numpy())
369
+ outs, valid = prepare_output(ids[1][0], ids[0][0], ingrs_vocab, vocab)
370
+ if not valid['is_valid']:
371
+ return f"❌ Invalid recipe: {valid['reason']}"
372
+ current_recipe_context.update({
373
+ "title": outs['title'],
374
+ "ingredients": outs['ingrs'],
375
+ "instructions": outs['recipe']
376
+ })
377
+ return format_recipe(outs['title'], outs['ingrs'], outs['recipe'], "English (original)")
378
+
379
+ # ============== GOOGLE TTS ==============
380
+ languages_tts = {
381
+ "English": "en",
382
+ "Chinese": "zh-CN",
383
+ "Japanese": "ja",
384
+ "Vietnamese": "vi",
385
+ }
386
+
387
+ async def fetch_tts_audio_async(session, chunk, lang_code):
388
+ url = f"https://translate.google.com/translate_tts?ie=UTF-8&q={quote_plus(chunk)}&tl={lang_code}&client=tw-ob"
389
+ headers = {
390
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
391
+ "Referer": "https://translate.google.com/",
392
+ }
393
+ try:
394
+ async with session.get(url, headers=headers, timeout=10) as response:
395
+ response.raise_for_status()
396
+ return await response.read()
397
+ except Exception as e:
398
+ print(f"TTS Error for chunk: {e}")
399
+ return None
400
+
401
+ async def fetch_all_tts_audio(chunks, lang_code):
402
+ async with aiohttp.ClientSession() as session:
403
+ tasks = [fetch_tts_audio_async(session, chunk, lang_code) for chunk in chunks]
404
+ return await asyncio.gather(*tasks)
405
+
406
+ def google_tts(text, lang):
407
+ if not text or text.startswith("❗"):
408
+ return None, gr.update(visible=False)
409
+
410
+ # Clean text for TTS
411
+ clean_text = text.replace("**", "").replace("###", "").replace("- ", "")
412
+ for emoji in ["🍽️", "πŸ§‚", "πŸ“–"]:
413
+ clean_text = clean_text.replace(emoji, "")
414
+
415
+ # Split into chunks (Google TTS max ~200 chars)
416
+ max_chunk_length = 200
417
+ chunks = [clean_text[i:i+max_chunk_length] for i in range(0, len(clean_text), max_chunk_length)]
418
+ if not chunks:
419
+ return None, gr.update(visible=False)
420
+
421
+ # Fetch audio chunks asynchronously
422
+ lang_code = languages_tts.get(lang, "en")
423
+ audio_contents = asyncio.run(fetch_all_tts_audio(chunks, lang_code))
424
+
425
+ # Filter out failed requests
426
+ audio_files = []
427
+ for i, content in enumerate(audio_contents):
428
+ if content:
429
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
430
+ f.write(content)
431
+ audio_files.append(f.name)
432
+
433
+ if not audio_files:
434
+ return None, gr.update(visible=False)
435
+
436
+ # Combine audio if FFmpeg is available
437
+ if len(audio_files) == 1:
438
+ return audio_files[0], gr.update(visible=True)
439
+
440
+ if which("ffmpeg"):
441
+ try:
442
+ combined = AudioSegment.empty()
443
+ for file in audio_files:
444
+ combined += AudioSegment.from_mp3(file)
445
+ output_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name
446
+ combined.export(output_file, format="mp3")
447
+ for file in audio_files:
448
+ os.unlink(file)
449
+ return output_file, gr.update(visible=True)
450
+ except Exception as e:
451
+ print(f"Error combining audio files: {e}")
452
+ # Fallback to first chunk
453
+ for i in range(1, len(audio_files)):
454
+ os.unlink(audio_files[i])
455
+ return audio_files[0], gr.update(visible=True)
456
+ else:
457
+ print("FFmpeg not found, returning first audio chunk.")
458
+ for i in range(1, len(audio_files)):
459
+ os.unlink(audio_files[i])
460
+ return audio_files[0], gr.update(visible=True)
461
+
462
+ # ============== VIDEO SEARCH ==============
463
+ def search_top_3_videos(keyword):
464
+ if not keyword.strip():
465
+ return ["", "", ""] * 3
466
+ try:
467
+ search = Search(f"How to make {keyword}")
468
+ results = search.results[:3]
469
+ embeds, titles, urls = [], [], []
470
+ for video in results:
471
+ embed_html = f'''
472
+ <iframe width="520" height="320"
473
+ src="https://www.youtube.com/embed/{video.video_id}"
474
+ frameborder="0" allowfullscreen></iframe>
475
+ '''
476
+ embeds.append(embed_html)
477
+ titles.append(video.title)
478
+ urls.append(f"https://www.youtube.com/watch?v={video.video_id}")
479
+ while len(embeds) < 3:
480
+ embeds.append("No video found")
481
+ titles.append("")
482
+ urls.append("")
483
+ return embeds + titles + urls
484
+ except Exception as e:
485
+ print(f"Video search error: {e}")
486
+ return ["", "", ""] * 3
487
+
488
+ # ============== RESTAURANT SEARCH ==============
489
+ def get_google_maps_search_url(dish_name, city="Ho Chi Minh City"):
490
+ query = f"{dish_name} in {city}"
491
+ url = f"https://www.google.com/maps/search/{query.replace(' ', '+')}"
492
+ return url
493
+
494
+ def search_and_show_link(dish):
495
+ if not dish.strip():
496
+ return "Go to Google Maps", gr.update(visible=False)
497
+ url = get_google_maps_search_url(dish)
498
+ return url, gr.update(visible=True)
499
+
500
+ # ============== PDF GENERATION ==============
501
+ def generate_pdf_recipe():
502
+ if not current_recipe_context["title"]:
503
+ return None, "❗ Please generate a recipe from an image first."
504
+
505
+ output_file = "recipe.pdf"
506
+ doc = SimpleDocTemplate(output_file, pagesize=letter)
507
+ styles = getSampleStyleSheet()
508
+ story = []
509
+
510
+ if current_recipe_context["image"]:
511
+ try:
512
+ img_buffer = BytesIO()
513
+ current_recipe_context["image"].save(img_buffer, format="PNG")
514
+ img_buffer.seek(0)
515
+ img = ReportLabImage(img_buffer, width=200, height=200)
516
+ story.append(img)
517
+ story.append(Spacer(1, 12))
518
+ except Exception as e:
519
+ print(f"Error adding image to PDF: {e}")
520
+
521
+ story.append(Paragraph(current_recipe_context["title"], styles['Title']))
522
+ story.append(Spacer(1, 12))
523
+ story.append(Paragraph("Ingredients:", styles['Heading2']))
524
+ for ingr in current_recipe_context["ingredients"]:
525
+ story.append(Paragraph(f"- {ingr}", styles['Normal']))
526
+ story.append(Spacer(1, 12))
527
+ story.append(Paragraph("Instructions:", styles['Heading2']))
528
+ for i, instr in enumerate(current_recipe_context["instructions"], 1):
529
+ story.append(Paragraph(f"{i}. {instr}", styles['Normal']))
530
+
531
+ doc.build(story)
532
+ return output_file, "βœ… Recipe saved as recipe.pdf"
533
+
534
+ # ============== GRADIO UI ==============
535
+ with gr.Blocks(theme=gr.themes.Soft(), title="AI Recipe Generator") as demo:
536
+ gr.Markdown("""
537
+ # 🍳 AI Recipe Generator & Multilingual Cooking Assistant
538
+ Generate recipes from images, translate to multiple languages, get cooking videos, chat with a culinary assistant, analyze nutrition, and find restaurants!
539
+ """)
540
+
541
+ with gr.Tab("πŸ“· Generate Recipe"):
542
+ with gr.Row():
543
+ with gr.Column():
544
+ image_input = gr.Image(type="pil", label="Upload Dish Image", height=300)
545
+ gen_btn = gr.Button("Generate Recipe", variant="primary", elem_id="action-btn")
546
+ save_pdf_btn = gr.Button("Save as PDF", variant="secondary", elem_id="action-btn")
547
+ pdf_output = gr.File(label="Download Recipe PDF", interactive=False)
548
+ recipe_output = gr.Markdown("### Your recipe will appear here", elem_classes="recipe-box")
549
+ gen_btn.click(generate_recipe, inputs=image_input, outputs=recipe_output)
550
+ save_pdf_btn.click(fn=generate_pdf_recipe, outputs=[pdf_output, recipe_output])
551
+
552
+ with gr.Tab("🌍 Translate & TTS"):
553
+ with gr.Row():
554
+ with gr.Column():
555
+ lang_dropdown = gr.Dropdown(
556
+ choices=["English (original)", "Japanese", "Chinese", "Vietnamese"],
557
+ value="Japanese",
558
+ label="Select Language"
559
+ )
560
+ with gr.Row():
561
+ trans_btn = gr.Button("Translate Recipe", variant="primary", elem_id="action-btn")
562
+ tts_btn = gr.Button("πŸ”ˆ Listen to Recipe", variant="secondary", elem_id="action-btn")
563
+ with gr.Column():
564
+ translation_output = gr.Markdown("### Translated recipe will appear here", elem_classes="recipe-box")
565
+ tts_audio = gr.Audio(interactive=False, label="Audio Output", visible=False)
566
+ trans_btn.click(fn=translate_recipe, inputs=lang_dropdown, outputs=translation_output)
567
+ tts_btn.click(fn=google_tts, inputs=[translation_output, lang_dropdown], outputs=[tts_audio, tts_audio])
568
+
569
+ with gr.Tab("πŸŽ₯ Cooking Videos"):
570
+ with gr.Row():
571
+ with gr.Column():
572
+ video_keyword = gr.Textbox(label="Search Cooking Videos", placeholder="e.g. beef pho")
573
+ search_btn = gr.Button("Search Videos", variant="primary", elem_id="action-btn")
574
+ with gr.Column():
575
+ video_embeds, video_titles, video_urls = [], [], []
576
+ for i in range(3):
577
+ with gr.Column():
578
+ video_embeds.append(gr.HTML(label=f"🎬 Video {i+1}"))
579
+ video_titles.append(gr.Textbox(label=f"πŸ“Œ Title {i+1}", interactive=False))
580
+ video_urls.append(gr.Textbox(label=f"πŸ”— URL {i+1}", interactive=False, visible=False))
581
+ search_btn.click(fn=search_top_3_videos, inputs=video_keyword, outputs=video_embeds + video_titles + video_urls)
582
+
583
+ with gr.Tab("πŸ’¬ Culinary Chatbot"):
584
+ chatbot = gr.Chatbot(height=400, type="messages")
585
+ with gr.Row():
586
+ chat_input = gr.Textbox(placeholder="Ask about the dish...", scale=4)
587
+ chat_btn = gr.Button("Send", variant="primary", scale=1, elem_id="action-btn")
588
+ chat_btn.click(chat_with_bot, inputs=[chat_input, chatbot], outputs=[chat_input, chatbot])
589
+ chat_input.submit(chat_with_bot, inputs=[chat_input, chatbot], outputs=[chat_input, chatbot])
590
+
591
+ with gr.Tab("πŸ₯— Nutrition Analysis"):
592
+ with gr.Row():
593
+ with gr.Column():
594
+ ingredient_input = gr.Textbox(
595
+ label="🧾 Enter Ingredients (one per line or space-separated)",
596
+ lines=10,
597
+ placeholder="cheese\npepper\negg\n..."
598
+ )
599
+ with gr.Row():
600
+ load_ingredients_btn = gr.Button("Load Recipe Ingredients", variant="secondary", elem_id="action-btn")
601
+ analyze_btn = gr.Button("Analyze Nutrition", variant="primary", elem_id="action-btn")
602
+ with gr.Column():
603
+ nutrition_message = gr.Textbox(label="πŸ”” Message", interactive=False)
604
+ bar_chart = gr.Plot(label="πŸ“Š Bar Chart")
605
+ pie_chart = gr.Plot(label="πŸ₯§ Pie Chart")
606
+ line_chart = gr.Plot(label="πŸ“ˆ Line Chart")
607
+ load_ingredients_btn.click(fn=load_recipe_ingredients, outputs=ingredient_input)
608
+ analyze_btn.click(
609
+ fn=nutrition_analysis,
610
+ inputs=ingredient_input,
611
+ outputs=[nutrition_message, bar_chart, pie_chart, line_chart]
612
+ )
613
+
614
+ with gr.Tab("🍽️ Find Restaurants"):
615
+ with gr.Row():
616
+ with gr.Column():
617
+ dish_input = gr.Textbox(label="Enter Dish Name", placeholder="e.g. beef pho", interactive=True)
618
+ search_restaurant_btn = gr.Button("Find Restaurants", variant="primary", elem_id="action-btn")
619
+ open_maps_btn = gr.Button("Go to Google Maps", visible=True, variant="secondary", elem_id="open-maps-btn")
620
+ search_restaurant_btn.click(fn=search_and_show_link, inputs=dish_input, outputs=[open_maps_btn, open_maps_btn])
621
+ open_maps_btn.click(
622
+ fn=lambda url: url,
623
+ inputs=open_maps_btn,
624
+ outputs=None,
625
+ js="(url) => { if(url) window.open(url, '_blank'); }"
626
+ )
627
+
628
+ demo.css = """
629
+ .recipe-box {
630
+ padding: 20px;
631
+ border-radius: 10px;
632
+ background: #f9f9f9;
633
+ border: 1px solid #e0e0e0;
634
+ }
635
+ .dark .recipe-box {
636
+ background: #2a2a2a;
637
+ border-color: #444;
638
+ }
639
+ .gr-box {
640
+ margin-bottom: 20px;
641
+ }
642
+ #action-btn {
643
+ max-width: 220px;
644
+ margin: 10px auto;
645
+ font-weight: 600;
646
+ font-size: 16px;
647
+ border-radius: 8px;
648
+ }
649
+ #open-maps-btn {
650
+ max-width: 220px;
651
+ margin: 10px auto;
652
+ font-weight: 600;
653
+ font-size: 16px;
654
+ border-radius: 8px;
655
+ }
656
+ """
657
+
658
+ if __name__ == "__main__":
659
+ demo.launch()
660