Spaces:

522H0134-NguyenNhatHuy
/

inverse-cooking-v2

Running

App Files Files Community

522H0134-NguyenNhatHuy commited on Jun 12

Commit

26a38e1

verified ·

1 Parent(s): f21dc20

Update app.py

Browse files

Files changed (1) hide show

app.py +199 -170

app.py CHANGED Viewed

@@ -27,77 +27,87 @@ from langchain_core.chat_history import InMemoryChatMessageHistory
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from pydub import AudioSegment
 from pydub.utils import which
-# Local imports (assumed to be available)
-from args import get_parser
-from model import get_model
-from output_utils import prepare_output
 # ============== DEVICE CONFIG ==============
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-map_loc = None if torch.cuda.is_available() else "cpu"
 logging.getLogger("pytube").setLevel(logging.ERROR)
-# ============== LOAD TRANSLATION MODELS ==============
-model_envit5_name = "VietAI/envit5-translation"
-try:
-    tokenizer_envit5 = AutoTokenizer.from_pretrained(model_envit5_name)
-    model_envit5 = AutoModelForSeq2SeqLM.from_pretrained(
-        model_envit5_name,
-        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
-    ).to(device)
-    pipe_envit5 = pipeline(
-        "text2text-generation",
-        model=model_envit5,
-        tokenizer=tokenizer_envit5,
-        device=0 if torch.cuda.is_available() else -1,
-        max_new_tokens=512,
-        do_sample=False
     )
 except Exception as e:
     print(f"Error loading Vietnamese model: {e}")
     pipe_envit5 = None
 models = {
-    "Japanese": {"model_name": "Helsinki-NLP/opus-mt-en-jap"},
     "Chinese": {"model_name": "Helsinki-NLP/opus-mt-en-zh"}
 }
 for lang in models:
     try:
-        tokenizer = AutoTokenizer.from_pretrained(models[lang]["model_name"])
-        model = AutoModelForSeq2SeqLM.from_pretrained(
-            models[lang]["model_name"],
-            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
-        ).to(device)
-        models[lang]["pipe"] = pipeline(
-            "translation",
-            model=model,
-            tokenizer=tokenizer,
-            device=0 if torch.cuda.is_available() else -1,
-            max_length=512,
-            batch_size=4 if torch.cuda.is_available() else 1,
-            truncation=True
-        )
     except Exception as e:
         print(f"Error loading {lang} model: {e}")
         models[lang]["pipe"] = None
-# ============== LOAD CHATBOT MODEL ==============
-chatbot_tokenizer = AutoTokenizer.from_pretrained("bigscience/bloomz-560m")
 chatbot_model = AutoModelForCausalLM.from_pretrained(
-    "bigscience/bloomz-560m",
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
-).to(device)
 chatbot_pipeline = pipeline(
     "text-generation",
     model=chatbot_model,
     tokenizer=chatbot_tokenizer,
-    device=0 if torch.cuda.is_available() else -1,
-    max_new_tokens=100,
     do_sample=True,
-    temperature=0.6,
     top_p=0.9,
     pad_token_id=chatbot_tokenizer.eos_token_id,
     batch_size=1
@@ -107,7 +117,7 @@ llm = HuggingFacePipeline(pipeline=chatbot_pipeline)
 # LangChain Chatbot Setup
 prompt = ChatPromptTemplate.from_template("""
 You are a professional culinary assistant. You will answer the user's question directly based on the provided recipe.
-Do not repeat the recipe or question in your answer. Be concise.
 Dish: {title}
 Ingredients: {ingredients}
@@ -117,7 +127,6 @@ User Question: {question}
 Answer:
 """)
 chain = prompt | llm
 chat_histories = {}
@@ -136,7 +145,65 @@ chatbot_chain = RunnableWithMessageHistory(
 # ============== GLOBAL STATE ==============
 current_recipe_context = {"context": "", "title": "", "ingredients": [], "instructions": [], "image": None}
-# ============== RECIPE FORMAT & TRANSLATE ==============
 def format_recipe(title, ingredients, instructions, lang):
     emoji = {"title": "🍽️", "ingredients": "🧂", "instructions": "📖"}
     titles = {
@@ -160,7 +227,8 @@ def format_recipe(title, ingredients, instructions, lang):
     result.extend([f"{i+1}. {step}" for i, step in enumerate(instructions)])
     return "\n".join(result)
-def translate_section(text, lang):
     if lang == "English (original)":
         return text
@@ -168,26 +236,13 @@ def translate_section(text, lang):
         if pipe_envit5 is None:
             return f"❗ Vietnamese translation model not available"
         try:
-            max_chunk_length = 400
-            if len(text) > max_chunk_length:
-                sentences = text.split('. ')
-                chunks = []
-                current_chunk = ""
-                for sentence in sentences:
-                    if len(current_chunk) + len(sentence) < max_chunk_length:
-                        current_chunk += sentence + ". "
-                    else:
-                        chunks.append(current_chunk)
-                        current_chunk = sentence + ". "
-                if current_chunk:
-                    chunks.append(current_chunk)
-            else:
-                chunks = [text]
             translated_chunks = []
             for chunk in chunks:
                 chunk = f"en-vi: {chunk}"
-                translated = pipe_envit5(chunk, max_new_tokens=512)[0]["generated_text"]
                 translated = translated.replace("vi: vi: ", "").replace("vi: Vi: ", "").replace("vi: ", "").strip()
                 translated_chunks.append(translated)
@@ -200,25 +255,12 @@ def translate_section(text, lang):
         return f"❗ Translation model for {lang} not available"
     try:
-        max_chunk_length = 400
-        if len(text) > max_chunk_length:
-            sentences = text.split('. ')
-            chunks = []
-            current_chunk = ""
-            for sentence in sentences:
-                if len(current_chunk) + len(sentence) < max_chunk_length:
-                    current_chunk += sentence + ". "
-                else:
-                    chunks.append(current_chunk)
-                    current_chunk = sentence + ". "
-            if current_chunk:
-                chunks.append(current_chunk)
-        else:
-            chunks = [text]
         translated_chunks = []
         for chunk in chunks:
-            translated = models[lang]["pipe"](chunk, max_length=512)[0]["translation_text"]
             translated_chunks.append(translated)
         return " ".join(translated_chunks)
@@ -229,9 +271,9 @@ def translate_section(text, lang):
 def translate_recipe(lang):
     if not current_recipe_context["title"]:
         return "❗ Please generate a recipe from an image first."
-    title = translate_section(current_recipe_context["title"], lang)
-    ingrs = [translate_section(i, lang) for i in current_recipe_context["ingredients"]]
-    instrs = [translate_section(s, lang) for s in current_recipe_context["instructions"]]
     return format_recipe(title, ingrs, instrs, lang)
 # ============== NUTRITION ANALYSIS ==============
@@ -239,58 +281,60 @@ def nutrition_analysis(ingredient_input):
     ingredients = " ".join(ingredient_input.strip().split())
     api_url = f'https://api.api-ninjas.com/v1/nutrition?query={ingredients}'
     headers = {'X-Api-Key': 'AHVy+tpkUoueBNdaFs9nCg==sFZTMRn8ikZVzx6E'}
-    response = requests.get(api_url, headers=headers)
-    if response.status_code != 200:
-        return "❌ API error or quota exceeded.", None, None, None
-    data = response.json()
-    df = pd.DataFrame(data)
-    numeric_cols = []
-    for col in df.columns:
-        if col == "name":
-            continue
-        df[col] = pd.to_numeric(df[col], errors="coerce")
-        if df[col].notna().sum() > 0:
-            numeric_cols.append(col)
-    if df.empty or len(numeric_cols) < 3:
-        return "⚠️ Insufficient numerical data for charts (need at least 3 metrics).", None, None, None
-    draw_cols = numeric_cols[:3]
-    fig_bar = px.bar(df, x="name", y=draw_cols[0], title=f"Bar Chart: {draw_cols[0]}", text_auto=True)
-    pie_data = df[[draw_cols[1], "name"]].dropna()
-    if pie_data[draw_cols[1]].sum() > 0:
-        fig_pie = px.pie(pie_data, names="name", values=draw_cols[1], title=f"Pie Chart: {draw_cols[1]}")
-    else:
-        fig_pie = px.bar(title="⚠️ Insufficient data for pie chart")
-    fig_line = px.line(df, x="name", y=draw_cols[2], markers=True, title=f"Line Chart: {draw_cols[2]}")
-    return "✅ Analysis successful!", fig_bar, fig_pie, fig_line
 def load_recipe_ingredients():
     if not current_recipe_context["ingredients"]:
         return "⚠️ No ingredients available. Generate a recipe first."
     return "\n".join(current_recipe_context["ingredients"])
-# ============== CHATBOT ==============
 def clean_response(response):
-    # Remove everything before "Answer:" if present
     if "Answer:" in response:
         response = response.split("Answer:")[-1]
-    # Remove potential repetitions of Dish, Ingredients, Instructions
     response = re.sub(r"Dish:.*?(Ingredients:|Instructions:).*?", "", response, flags=re.DOTALL)
     response = re.sub(r"Ingredients:.*?(Instructions:).*?", "", response, flags=re.DOTALL)
     response = re.sub(r"Instructions:.*", "", response, flags=re.DOTALL)
-    # Remove redundant system info
     response = re.sub(r"You are a professional culinary assistant.*?Answer:", "", response, flags=re.DOTALL)
-    # Remove duplicate user question inside response (very common in these LLM outputs)
     response = re.sub(r"User Question:.*", "", response, flags=re.DOTALL)
-    # Final strip + cleanup
     return response.strip()
 def validate_cooking_time(question, instructions):
-    # Extract cooking times from instructions
     time_pattern = r"(\d+)\s*(minutes|minute)"
     total_time = 0
     for instr in instructions:
@@ -298,7 +342,6 @@ def validate_cooking_time(question, instructions):
         for match in matches:
             total_time += int(match[0])
-    # Check if user question contains a time
     user_time = re.search(time_pattern, question)
     if user_time:
         user_minutes = int(user_time.group(1))
@@ -310,7 +353,6 @@ def generate_chat_response(message, session_id="default"):
     if not current_recipe_context["title"]:
         return "Please generate a recipe from an image before asking about the dish."
-    # Validate cooking time if relevant
     correction = validate_cooking_time(message, current_recipe_context["instructions"])
     response = chatbot_chain.invoke(
@@ -329,7 +371,6 @@ def generate_chat_response(message, session_id="default"):
     return response.strip()
 def chat_with_bot(message, chat_history, session_id="default"):
     if not message.strip():
         return "", chat_history
@@ -338,45 +379,40 @@ def chat_with_bot(message, chat_history, session_id="default"):
     chat_history.append({"role": "assistant", "content": response})
     return "", chat_history
-# ============== IMAGE TO RECIPE ==============
-with open("ingr_vocab.pkl", 'rb') as f:
-    ingrs_vocab = pickle.load(f)
-with open("instr_vocab.pkl", 'rb') as f:
-    vocab = pickle.load(f)
-args = get_parser()
-args.maxseqlen = 15
-args.ingrs_only = False
-model_ic = get_model(args, len(ingrs_vocab), len(vocab))
-model_ic.load_state_dict(torch.load("modelbest.ckpt", map_location=map_loc, weights_only=True))
-model_ic.to(device).eval()
-transform = transforms.Compose([
-    transforms.Resize(256),
-    transforms.CenterCrop(224),
-    transforms.ToTensor(),
-    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
-])
-def generate_recipe(image):
     if image is None:
         return "❗ Please upload an image."
     current_recipe_context["image"] = image
-    image = transform(image.convert("RGB")).unsqueeze(0).to(device)
-    with torch.no_grad():
-        outputs = model_ic.sample(image, greedy=True, temperature=1.0, beam=-1, true_ingrs=None)
-    ids = (outputs['ingr_ids'].cpu().numpy(), outputs['recipe_ids'].cpu().numpy())
-    outs, valid = prepare_output(ids[1][0], ids[0][0], ingrs_vocab, vocab)
-    if not valid['is_valid']:
-        return f"❌ Invalid recipe: {valid['reason']}"
-    current_recipe_context.update({
-        "title": outs['title'],
-        "ingredients": outs['ingrs'],
-        "instructions": outs['recipe']
-    })
-    return format_recipe(outs['title'], outs['ingrs'], outs['recipe'], "English (original)")
-# ============== GOOGLE TTS ==============
 languages_tts = {
     "English": "en",
     "Chinese": "zh-CN",
@@ -407,22 +443,18 @@ def google_tts(text, lang):
     if not text or text.startswith("❗"):
         return None, gr.update(visible=False)
-    # Clean text for TTS
     clean_text = text.replace("**", "").replace("###", "").replace("- ", "")
     for emoji in ["🍽️", "🧂", "📖"]:
         clean_text = clean_text.replace(emoji, "")
-    # Split into chunks (Google TTS max ~200 chars)
-    max_chunk_length = 200
     chunks = [clean_text[i:i+max_chunk_length] for i in range(0, len(clean_text), max_chunk_length)]
     if not chunks:
         return None, gr.update(visible=False)
-    # Fetch audio chunks asynchronously
     lang_code = languages_tts.get(lang, "en")
     audio_contents = asyncio.run(fetch_all_tts_audio(chunks, lang_code))
-    # Filter out failed requests
     audio_files = []
     for i, content in enumerate(audio_contents):
         if content:
@@ -433,7 +465,6 @@ def google_tts(text, lang):
     if not audio_files:
         return None, gr.update(visible=False)
-    # Combine audio if FFmpeg is available
     if len(audio_files) == 1:
         return audio_files[0], gr.update(visible=True)
@@ -449,7 +480,6 @@ def google_tts(text, lang):
             return output_file, gr.update(visible=True)
         except Exception as e:
             print(f"Error combining audio files: {e}")
-            # Fallback to first chunk
             for i in range(1, len(audio_files)):
                 os.unlink(audio_files[i])
             return audio_files[0], gr.update(visible=True)
@@ -546,7 +576,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Recipe Generator") as demo:
                 save_pdf_btn = gr.Button("Save as PDF", variant="secondary", elem_id="action-btn")
                 pdf_output = gr.File(label="Download Recipe PDF", interactive=False)
             recipe_output = gr.Markdown("### Your recipe will appear here", elem_classes="recipe-box")
-        gen_btn.click(generate_recipe, inputs=image_input, outputs=recipe_output)
         save_pdf_btn.click(fn=generate_pdf_recipe, outputs=[pdf_output, recipe_output])
     with gr.Tab("🌍 Translate & TTS"):
@@ -656,5 +686,4 @@ with gr.Blocks(theme=gr.themes.Soft(), title="AI Recipe Generator") as demo:
     """
 if __name__ == "__main__":
-    demo.launch()

 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from pydub import AudioSegment
 from pydub.utils import which
+from functools import lru_cache
+import onnxruntime as ort
 # ============== DEVICE CONFIG ==============
+device = torch.device("cpu")  # Force CPU usage
+map_loc = "cpu"
+torch.set_num_threads(1)  # Reduce thread contention
 logging.getLogger("pytube").setLevel(logging.ERROR)
+# ============== LOAD TRANSLATION MODELS (OPTIMIZED) ==============
+def load_translation_model(model_name, task="translation"):
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSeq2SeqLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float32,
     )
+    # Apply dynamic quantization
+    model = torch.quantization.quantize_dynamic(
+        model,
+        {torch.nn.Linear},
+        dtype=torch.qint8
+    )
+    model.eval()
+    model.to('cpu')
+    return pipeline(
+        task,
+        model=model,
+        tokenizer=tokenizer,
+        device=-1,
+        max_length=256,
+        batch_size=1,
+        truncation=True
+    )
+# Load models with optimizations
+try:
+    pipe_envit5 = load_translation_model("VietAI/envit5-translation", "text2text-generation")
 except Exception as e:
     print(f"Error loading Vietnamese model: {e}")
     pipe_envit5 = None
 models = {
+    "Japanese": {"model_name": "Helsinki-NLP/opus-mt-en-ja"},  # Smaller model
     "Chinese": {"model_name": "Helsinki-NLP/opus-mt-en-zh"}
 }
 for lang in models:
     try:
+        models[lang]["pipe"] = load_translation_model(models[lang]["model_name"])
     except Exception as e:
         print(f"Error loading {lang} model: {e}")
         models[lang]["pipe"] = None
+# ============== LOAD CHATBOT MODEL (OPTIMIZED) ==============
+chatbot_tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
 chatbot_model = AutoModelForCausalLM.from_pretrained(
+    "microsoft/phi-2",
+    torch_dtype=torch.float32,
+    trust_remote_code=True
+)
+# Apply quantization
+chatbot_model = torch.quantization.quantize_dynamic(
+    chatbot_model,
+    {torch.nn.Linear},
+    dtype=torch.qint8
+)
+chatbot_model.to('cpu').eval()
 chatbot_pipeline = pipeline(
     "text-generation",
     model=chatbot_model,
     tokenizer=chatbot_tokenizer,
+    device=-1,
+    max_new_tokens=80,
     do_sample=True,
+    temperature=0.7,
     top_p=0.9,
     pad_token_id=chatbot_tokenizer.eos_token_id,
     batch_size=1
 # LangChain Chatbot Setup
 prompt = ChatPromptTemplate.from_template("""
 You are a professional culinary assistant. You will answer the user's question directly based on the provided recipe.
+Be concise and helpful.
 Dish: {title}
 Ingredients: {ingredients}
 Answer:
 """)
 chain = prompt | llm
 chat_histories = {}
 # ============== GLOBAL STATE ==============
 current_recipe_context = {"context": "", "title": "", "ingredients": [], "instructions": [], "image": None}
+# ============== RECIPE MODEL (OPTIMIZED) ==============
+with open("ingr_vocab.pkl", 'rb') as f:
+    ingrs_vocab = pickle.load(f)
+with open("instr_vocab.pkl", 'rb') as f:
+    vocab = pickle.load(f)
+# Optimized transform with smaller image size
+transform = transforms.Compose([
+    transforms.Resize(128),
+    transforms.CenterCrop(112),
+    transforms.ToTensor(),
+    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
+])
+# Load model with optimizations
+def get_model_optimized(args, ingr_vocab_size, instr_vocab_size):
+    from model import get_model  # Local import to avoid circular dependency
+    model = get_model(args, ingr_vocab_size, instr_vocab_size)
+    model.load_state_dict(torch.load("modelbest.ckpt", map_location="cpu"))
+    # Apply optimizations
+    model = torch.jit.script(model)  # TorchScript compilation
+    model = model.to('cpu').eval()
+    # Apply dynamic quantization
+    model = torch.quantization.quantize_dynamic(
+        model,
+        {torch.nn.Linear},
+        dtype=torch.qint8
+    )
+    return model
+# Initialize model
+args = type('', (), {})()  # Simple args object
+args.maxseqlen = 15
+args.ingrs_only = False
+model_ic = get_model_optimized(args, len(ingrs_vocab), len(vocab))
+# Convert to ONNX for faster inference
+def convert_to_onnx():
+    if not os.path.exists("modelbest.onnx"):
+        dummy_input = torch.randn(1, 3, 112, 112).to('cpu')
+        torch.onnx.export(
+            model_ic,
+            dummy_input,
+            "modelbest.onnx",
+            export_params=True,
+            opset_version=11,
+            do_constant_folding=True,
+            input_names=['input'],
+            output_names=['output'],
+            dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
+        )
+    return ort.InferenceSession("modelbest.onnx", providers=['CPUExecutionProvider'])
+ort_session = convert_to_onnx()
+# ============== RECIPE FUNCTIONS ==============
 def format_recipe(title, ingredients, instructions, lang):
     emoji = {"title": "🍽️", "ingredients": "🧂", "instructions": "📖"}
     titles = {
     result.extend([f"{i+1}. {step}" for i, step in enumerate(instructions)])
     return "\n".join(result)
+@lru_cache(maxsize=32)
+def translate_section_cached(text, lang):
     if lang == "English (original)":
         return text
         if pipe_envit5 is None:
             return f"❗ Vietnamese translation model not available"
         try:
+            max_chunk_length = 300  # Reduced from 400
+            chunks = [text[i:i+max_chunk_length] for i in range(0, len(text), max_chunk_length)]
             translated_chunks = []
             for chunk in chunks:
                 chunk = f"en-vi: {chunk}"
+                translated = pipe_envit5(chunk, max_new_tokens=256)[0]["generated_text"]  # Reduced tokens
                 translated = translated.replace("vi: vi: ", "").replace("vi: Vi: ", "").replace("vi: ", "").strip()
                 translated_chunks.append(translated)
         return f"❗ Translation model for {lang} not available"
     try:
+        max_chunk_length = 300  # Reduced from 400
+        chunks = [text[i:i+max_chunk_length] for i in range(0, len(text), max_chunk_length)]
         translated_chunks = []
         for chunk in chunks:
+            translated = models[lang]["pipe"](chunk, max_length=256)[0]["translation_text"]  # Reduced length
             translated_chunks.append(translated)
         return " ".join(translated_chunks)
 def translate_recipe(lang):
     if not current_recipe_context["title"]:
         return "❗ Please generate a recipe from an image first."
+    title = translate_section_cached(current_recipe_context["title"], lang)
+    ingrs = [translate_section_cached(i, lang) for i in current_recipe_context["ingredients"]]
+    instrs = [translate_section_cached(s, lang) for s in current_recipe_context["instructions"]]
     return format_recipe(title, ingrs, instrs, lang)
 # ============== NUTRITION ANALYSIS ==============
     ingredients = " ".join(ingredient_input.strip().split())
     api_url = f'https://api.api-ninjas.com/v1/nutrition?query={ingredients}'
     headers = {'X-Api-Key': 'AHVy+tpkUoueBNdaFs9nCg==sFZTMRn8ikZVzx6E'}
+    try:
+        response = requests.get(api_url, headers=headers, timeout=10)
+        if response.status_code != 200:
+            return "❌ API error or quota exceeded.", None, None, None
+        data = response.json()
+        if not data:
+            return "⚠️ No nutrition data found.", None, None, None
+        df = pd.DataFrame(data)
+        numeric_cols = []
+        for col in df.columns:
+            if col == "name":
+                continue
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+            if df[col].notna().sum() > 0:
+                numeric_cols.append(col)
+        if df.empty or len(numeric_cols) < 3:
+            return "⚠️ Insufficient numerical data for charts.", None, None, None
+        draw_cols = numeric_cols[:3]
+        fig_bar = px.bar(df, x="name", y=draw_cols[0], title=f"Bar Chart: {draw_cols[0]}", text_auto=True)
+        pie_data = df[[draw_cols[1], "name"]].dropna()
+        if pie_data[draw_cols[1]].sum() > 0:
+            fig_pie = px.pie(pie_data, names="name", values=draw_cols[1], title=f"Pie Chart: {draw_cols[1]}")
+        else:
+            fig_pie = px.bar(title="⚠️ Insufficient data for pie chart")
+        fig_line = px.line(df, x="name", y=draw_cols[2], markers=True, title=f"Line Chart: {draw_cols[2]}")
+        return "✅ Analysis successful!", fig_bar, fig_pie, fig_line
+    except Exception as e:
+        print(f"Nutrition analysis error: {e}")
+        return "❌ Error during nutrition analysis.", None, None, None
 def load_recipe_ingredients():
     if not current_recipe_context["ingredients"]:
         return "⚠️ No ingredients available. Generate a recipe first."
     return "\n".join(current_recipe_context["ingredients"])
+# ============== CHATBOT FUNCTIONS ==============
 def clean_response(response):
     if "Answer:" in response:
         response = response.split("Answer:")[-1]
     response = re.sub(r"Dish:.*?(Ingredients:|Instructions:).*?", "", response, flags=re.DOTALL)
     response = re.sub(r"Ingredients:.*?(Instructions:).*?", "", response, flags=re.DOTALL)
     response = re.sub(r"Instructions:.*", "", response, flags=re.DOTALL)
     response = re.sub(r"You are a professional culinary assistant.*?Answer:", "", response, flags=re.DOTALL)
     response = re.sub(r"User Question:.*", "", response, flags=re.DOTALL)
     return response.strip()
 def validate_cooking_time(question, instructions):
     time_pattern = r"(\d+)\s*(minutes|minute)"
     total_time = 0
     for instr in instructions:
         for match in matches:
             total_time += int(match[0])
     user_time = re.search(time_pattern, question)
     if user_time:
         user_minutes = int(user_time.group(1))
     if not current_recipe_context["title"]:
         return "Please generate a recipe from an image before asking about the dish."
     correction = validate_cooking_time(message, current_recipe_context["instructions"])
     response = chatbot_chain.invoke(
     return response.strip()
 def chat_with_bot(message, chat_history, session_id="default"):
     if not message.strip():
         return "", chat_history
     chat_history.append({"role": "assistant", "content": response})
     return "", chat_history
+# ============== IMAGE TO RECIPE (OPTIMIZED) ==============
+def generate_recipe_with_progress(image, progress=gr.Progress()):
+    progress(0.1, desc="Preprocessing image...")
     if image is None:
         return "❗ Please upload an image."
     current_recipe_context["image"] = image
+    image_tensor = transform(image.convert("RGB")).unsqueeze(0).numpy()
+    progress(0.3, desc="Running model inference...")
+    try:
+        inputs = {'input': image_tensor}
+        outputs = ort_session.run(None, inputs)
+        progress(0.7, desc="Processing results...")
+        ids = (outputs[0], outputs[1])  # Adjust based on actual ONNX output
+        outs, valid = prepare_output(ids[1][0], ids[0][0], ingrs_vocab, vocab)
+        if not valid['is_valid']:
+            return f"❌ Invalid recipe: {valid['reason']}"
+        current_recipe_context.update({
+            "title": outs['title'],
+            "ingredients": outs['ingrs'],
+            "instructions": outs['recipe']
+        })
+        progress(0.9, desc="Formatting output...")
+        return format_recipe(outs['title'], outs['ingrs'], outs['recipe'], "English (original)")
+    except Exception as e:
+        print(f"Recipe generation error: {e}")
+        return f"❌ Error generating recipe: {str(e)}"
+# ============== TTS FUNCTIONS ==============
 languages_tts = {
     "English": "en",
     "Chinese": "zh-CN",
     if not text or text.startswith("❗"):
         return None, gr.update(visible=False)
     clean_text = text.replace("**", "").replace("###", "").replace("- ", "")
     for emoji in ["🍽️", "🧂", "📖"]:
         clean_text = clean_text.replace(emoji, "")
+    max_chunk_length = 150  # Reduced from 200
     chunks = [clean_text[i:i+max_chunk_length] for i in range(0, len(clean_text), max_chunk_length)]
     if not chunks:
         return None, gr.update(visible=False)
     lang_code = languages_tts.get(lang, "en")
     audio_contents = asyncio.run(fetch_all_tts_audio(chunks, lang_code))
     audio_files = []
     for i, content in enumerate(audio_contents):
         if content:
     if not audio_files:
         return None, gr.update(visible=False)
     if len(audio_files) == 1:
         return audio_files[0], gr.update(visible=True)
             return output_file, gr.update(visible=True)
         except Exception as e:
             print(f"Error combining audio files: {e}")
             for i in range(1, len(audio_files)):
                 os.unlink(audio_files[i])
             return audio_files[0], gr.update(visible=True)
                 save_pdf_btn = gr.Button("Save as PDF", variant="secondary", elem_id="action-btn")
                 pdf_output = gr.File(label="Download Recipe PDF", interactive=False)
             recipe_output = gr.Markdown("### Your recipe will appear here", elem_classes="recipe-box")
+        gen_btn.click(generate_recipe_with_progress, inputs=image_input, outputs=recipe_output)
         save_pdf_btn.click(fn=generate_pdf_recipe, outputs=[pdf_output, recipe_output])
     with gr.Tab("🌍 Translate & TTS"):
     """
 if __name__ == "__main__":
+    demo.launch()