Qwen-Image-Edit_Fast-Presets

Running on Zero

App Files Files Community

LPX55 commited on Aug 19

Commit

dd8a1e0

verified ·

1 Parent(s): 09d4d06

review: local prompt inf. (#1)

Browse files

- review: local prompt inf. (f8c747170c82f9b807d1c4f08dc22a8c1b3ebaa2)

Files changed (1) hide show

app_local.py +168 -123

app_local.py CHANGED Viewed

@@ -9,6 +9,7 @@ from diffusers.utils import is_xformers_available
 import os
 import re
 import gc
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 #############################
@@ -78,24 +79,31 @@ Please provide the rewritten instruction in a clean `json` format as:
 def extract_json_response(model_output: str) -> str:
     """Extract rewritten instruction from potentially messy JSON output"""
     try:
         # Try to find the JSON portion in the output
         start_idx = model_output.find('{')
-        end_idx = model_output.rfind('}') + 1
-        if start_idx == -1 or end_idx == 0:
             return None
         json_str = model_output[start_idx:end_idx]
-        # Clean up common formatting issues
-        json_str = re.sub(r'(?<!")\b(\w+)\b(?=":)', r'"\1"', json_str)  # Add quotes to keys
-        json_str = re.sub(r':\s*([^"{\[]|true|false|null)', r': "\1"', json_str)  # Add quotes to values
         # Parse JSON
         data = json.loads(json_str)
         # Extract rewritten prompt from possible key variations
         possible_keys = [
-            "Rewritten", "rewritten", "Rewrited", "rewrited",
             "Output", "output", "Enhanced", "enhanced"
         ]
         for key in possible_keys:
@@ -105,16 +113,23 @@ def extract_json_response(model_output: str) -> str:
         # Try nested path
         if "Response" in data and "Rewritten" in data["Response"]:
             return data["Response"]["Rewritten"].strip()
-        # Fallback to direct extraction
-        for value in data.values():
-            if isinstance(value, str) and 10 < len(value) < 500:
-                return value.strip()
-    except Exception:
-        pass
-    return None
 def polish_prompt(original_prompt: str) -> str:
     """Enhanced prompt rewriting using original system prompt with JSON handling"""
     load_rewriter()
@@ -136,11 +151,11 @@ def polish_prompt(original_prompt: str) -> str:
     with torch.no_grad():
         generated_ids = rewriter_model.generate(
             **model_inputs,
-            max_new_tokens=256,  # Maintain token count for good JSON generation
             do_sample=True,
-            temperature=0.6,
             top_p=0.9,
-            no_repeat_ngram_size=2,
             pad_token_id=rewriter_tokenizer.eos_token_id
         )
@@ -150,36 +165,50 @@ def polish_prompt(original_prompt: str) -> str:
         skip_special_tokens=True
     ).strip()
     # Try to extract JSON content
-    rewritten_prompt = extract_json_response(enhanced)
     if rewritten_prompt:
-        # Clean up substitutions from the JSON output
-        rewritten_prompt = re.sub(r'(Replace|Change|Add) "([^"]*)"', r'\1 \2', rewritten_prompt)
-        rewritten_prompt = rewritten_prompt.replace('\\"', '"')
         return rewritten_prompt
     # Fallback cleanup if JSON extraction fails
-    print(f"⚠️ JSON extraction failed, using raw output: {enhanced}")
-    fallback = re.sub(r'```.*?```', '', enhanced, flags=re.DOTALL)  # Remove code blocks
-    fallback = re.sub(r'[\{\}\[\]"]', '', fallback)  # Remove JSON artifacts
-    fallback = fallback.split('\n')[0]  # Take first line
-    # Try to extract before colon separator
-    if ': ' in fallback:
-        return fallback.split(': ')[1].strip()
-    return fallback.strip()
 # Load main image editing pipeline
 pipe = QwenImageEditPipeline.from_pretrained(
-    "Qwen/Qwen-Image-Edit",
     torch_dtype=dtype
 ).to(device)
 # Load LoRA weights for acceleration
 pipe.load_lora_weights(
-    "lightx2v/Qwen-Image-Lightning",
     weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors"
 )
 pipe.fuse_lora()
@@ -189,7 +218,6 @@ if is_xformers_available():
 else:
     print("xformers not available")
 def unload_rewriter():
     """Clear enhancement model from memory"""
     global rewriter_tokenizer, rewriter_model
@@ -206,7 +234,7 @@ def infer(
     prompt,
     seed=42,
     randomize_seed=False,
-    true_guidance_scale=4.0,
     num_inference_steps=8,
     rewrite_prompt=False,
     num_images_per_prompt=1,
@@ -220,19 +248,19 @@ def infer(
         try:
             enhanced_instruction = polish_prompt(original_prompt)
             prompt_info = (
-                f"<div style='margin:10px; padding:10px; border-radius:8px; border-left:4px solid #4CAF50; background: #f5f9fe'>"
                 f"<h4 style='margin-top: 0;'>🚀 Prompt Enhancement</h4>"
                 f"<p><strong>Original:</strong> {original_prompt}</p>"
-                f"<p><strong>Enhanced:</strong> {enhanced_instruction}</p>"
                 f"</div>"
             )
             prompt = enhanced_instruction
         except Exception as e:
             gr.Warning(f"Prompt enhancement failed: {str(e)}")
             prompt_info = (
-                f"<div style='margin:10px; padding:10px; border-radius:8px; border-left:4px solid #FF5252; background: #fef5f5'>"
                 f"<h4 style='margin-top: 0;'>⚠️ Enhancement Not Applied</h4>"
-                f"<p>Using original prompt. Error: {str(e)}</p>"
                 f"</div>"
             )
     else:
@@ -247,9 +275,7 @@ def infer(
     unload_rewriter()
     # Set seed for reproducibility
-    seed_val = seed
-    if randomize_seed:
-        seed_val = random.randint(0, 2**32 - 1)
     generator = torch.Generator(device=device).manual_seed(seed_val)
     try:
@@ -263,121 +289,140 @@ def infer(
             true_cfg_scale=true_guidance_scale,
             num_images_per_prompt=num_images_per_prompt
         ).images
     except Exception as e:
         gr.Error(f"Image generation failed: {str(e)}")
-        prompt_info = (
-            f"<div style='margin:10px; padding:10px; border-radius:8px; border-left:4px solid #dd2c00; background: #fef5f5'>"
-            f"<h4 style='margin-top: 0;'><strong>⚠️ Error:</strong> {str(e)}</h4>"
             f"</div>"
         )
-        return [], seed_val, prompt_info
-    return edited_images, seed_val, prompt_info
 MAX_SEED = np.iinfo(np.int32).max
-examples = [
-    "Replace the cat with a friendly golden retriever. Make it look happier, and add more background details.",
-    "Add text 'Qwen - AI for image editing' in Chinese at the bottom center with a small shadow.",
-    "Change the style to 1970s vintage, add old photo effect, restore any scratches on the wall or window.",
-    "Remove the blue sky and replace it with a dark night cityscape.",
-    """Replace "Qwen" with "通义" in the Image. Ensure Chinese font is used and position it at top left."""
-]
-with gr.Blocks(title="Qwen Image Editor Fast") as demo:
     gr.Markdown("""
-    <div style="text-align: center;">
-        <h1>⚡️ Qwen-Image-Edit Lightning Fast 8-STEP</h1>
-        <p>8-step image editing with lightx2v's LoRA and local prompt enhancement</p>
-        <p>🚧 Work in progress, further improvements coming soon.</p>
     </div>
     """)
-    with gr.Row():
         # Input Column
-        with gr.Column():
-            input_image = gr.Image(label="Input Image", type="pil")
-            prompt = gr.Textbox(label="Edit Instruction", placeholder="e.g. Add a dog to the right side", lines=2)
-            with gr.Accordion("Advanced Settings", open=False):
-                gr.Markdown("### Generation Parameters")
                 with gr.Row():
-                    seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
-                    randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                 with gr.Row():
                     true_guidance_scale = gr.Slider(
-                        label="Guidance Scale", minimum=1.0, maximum=5.0, step=0.1, value=4.0
                     )
                     num_inference_steps = gr.Slider(
-                        label="Inference Steps", minimum=4, maximum=16, step=1, value=8
-                    )
-                    num_images_per_prompt = gr.Slider(
-                        label="Output Images", minimum=1, maximum=4, step=1, value=2
                     )
-            rewrite_toggle = gr.Checkbox(
-                label="Enable AI Prompt Enhancement",
-                value=True
-            )
-            run_button = gr.Button("Generate Edits", variant="primary")
         # Output Column
-        with gr.Column():
             result = gr.Gallery(
-                label="Output Images",
-                columns=lambda x: 2 if x > 1 else 1,
-                object_fit="contain",
-                height="auto"
             )
             prompt_info = gr.HTML(
-                "<div style='margin-top:20px; padding:15px; border-radius:8px; background:#f8f9fa'>"
-                "<p>Prompt details will appear here after generation</p></div>"
             )
-    # gr.Examples(
-    #     examples=examples,
-    #     inputs=[prompt],
-    #     label="Try These Examples",
-    #     cache_examples=True
-    # )
-    # Main processing
-    run_event = run_button.click(
         fn=infer,
-        inputs=[
-            input_image,
-            prompt,
-            seed,
-            randomize_seed,
-            true_guidance_scale,
-            num_inference_steps,
-            rewrite_toggle,
-            num_images_per_prompt
-        ],
-        outputs=[result, seed, prompt_info]
     )
     prompt.submit(
         fn=infer,
-        inputs=[
-            input_image,
-            prompt,
-            seed,
-            randomize_seed,
-            true_guidance_scale,
-            num_inference_steps,
-            rewrite_toggle,
-            num_images_per_prompt
-        ],
-        outputs=[result, seed, prompt_info]
-    )
-    # Vectorize prompt info visibility
-    run_event.then(
-        fn=lambda: gr.update(visible=True),
-        inputs=None,
-        outputs=[prompt_info],
-        queue=False
     )
 if __name__ == "__main__":

 import os
 import re
 import gc
+import json  # Added json import
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 #############################
 def extract_json_response(model_output: str) -> str:
     """Extract rewritten instruction from potentially messy JSON output"""
+    # New: Remove code block markers first
+    model_output = re.sub(r'```(?:json)?\s*', '', model_output)
     try:
         # Try to find the JSON portion in the output
         start_idx = model_output.find('{')
+        end_idx = model_output.rfind('}')
+        if start_idx == -1 or end_idx == -1:
             return None
+        # Expand to the full object including outer braces
+        end_idx += 1  # Include the closing brace
         json_str = model_output[start_idx:end_idx]
+        # Improved quote handling for values
+        json_str = re.sub(r'(\w+)\s*:', r'"\1":', json_str)  # Quote keys
+        json_str = re.sub(r':\s*([^"\s{[]+)', r': "\1"', json_str)  # Quote unquoted string values
         # Parse JSON
         data = json.loads(json_str)
         # Extract rewritten prompt from possible key variations
         possible_keys = [
+            "Rewritten", "rewritten", "Rewrited", "rewrited",
             "Output", "output", "Enhanced", "enhanced"
         ]
         for key in possible_keys:
         # Try nested path
         if "Response" in data and "Rewritten" in data["Response"]:
             return data["Response"]["Rewritten"].strip()
+        # Handle nested JSON objects (additional protection)
+        if isinstance(data, dict):
+            for value in data.values():
+                if isinstance(value, dict) and "Rewritten" in value:
+                    return value["Rewritten"].strip()
+        # Try to find any string value that looks like an instruction
+        str_values = [v for v in data.values() if isinstance(v, str) and 10 < len(v) < 500]
+        if str_values:
+            return str_values[0].strip()
+    except Exception as e:
+        print(f"JSON parse error: {str(e)}")
+    return None
 def polish_prompt(original_prompt: str) -> str:
     """Enhanced prompt rewriting using original system prompt with JSON handling"""
     load_rewriter()
     with torch.no_grad():
         generated_ids = rewriter_model.generate(
             **model_inputs,
+            max_new_tokens=150,  # Reduced for better quality
             do_sample=True,
+            temperature=0.4,  # Less creative but more focused
             top_p=0.9,
+            no_repeat_ngram_size=3,
             pad_token_id=rewriter_tokenizer.eos_token_id
         )
         skip_special_tokens=True
     ).strip()
+    # New: Last-resort JSON content extraction
+    json_str = enhanced
+    if '```' in enhanced:
+        parts = enhanced.split('```')
+        if len(parts) >= 3:
+            json_str = parts[1]  # Take content between first set of ```
     # Try to extract JSON content
+    rewritten_prompt = extract_json_response(json_str if '```' in enhanced else enhanced)
     if rewritten_prompt:
+        # Clean up remaining artifacts
+        rewritten_prompt = re.sub(r'(Replace|Change|Add) "(.*?)"', r'\1 \2', rewritten_prompt)
+        rewritten_prompt = rewritten_prompt.replace('\\"', '"').replace('\\n', ' ')
         return rewritten_prompt
     # Fallback cleanup if JSON extraction fails
+    if '```' in enhanced:
+        # Extract content from code blocks
+        parts = enhanced.split('```')
+        if len(parts) >= 3:
+            rewritten_prompt = parts[1].strip()
+        else:
+            rewritten_prompt = enhanced
+    else:
+        rewritten_prompt = enhanced
+    # Improved cleaning of fallback output
+    rewritten_prompt = re.sub(r'.*{.*}.*', '', rewritten_prompt)
+    rewritten_prompt = re.sub(r'\s\s+', ' ', rewritten_prompt).strip()
+    if ': ' in rewritten_prompt:
+        rewritten_prompt = rewritten_prompt.split(': ', 1)[-1].strip()
+    return rewritten_prompt[:200]  # Ensure reasonable length
 # Load main image editing pipeline
 pipe = QwenImageEditPipeline.from_pretrained(
+    "Qwen/Qwen-Image-Edit",
     torch_dtype=dtype
 ).to(device)
 # Load LoRA weights for acceleration
 pipe.load_lora_weights(
+    "lightx2v/Qwen-Image-Lightning",
     weight_name="Qwen-Image-Lightning-8steps-V1.1.safetensors"
 )
 pipe.fuse_lora()
 else:
     print("xformers not available")
 def unload_rewriter():
     """Clear enhancement model from memory"""
     global rewriter_tokenizer, rewriter_model
     prompt,
     seed=42,
     randomize_seed=False,
+    true_guidance_scale=1.0,
     num_inference_steps=8,
     rewrite_prompt=False,
     num_images_per_prompt=1,
         try:
             enhanced_instruction = polish_prompt(original_prompt)
             prompt_info = (
+                f"<div style='margin:10px; padding:15px; border-radius:8px; border-left:4px solid #4CAF50; background: #f5f9fe'>"
                 f"<h4 style='margin-top: 0;'>🚀 Prompt Enhancement</h4>"
                 f"<p><strong>Original:</strong> {original_prompt}</p>"
+                f"<p><strong style='color:#2E7D32;'>Enhanced:</strong> {enhanced_instruction}</p>"
                 f"</div>"
             )
             prompt = enhanced_instruction
         except Exception as e:
             gr.Warning(f"Prompt enhancement failed: {str(e)}")
             prompt_info = (
+                f"<div style='margin:10px; padding:15px; border-radius:8px; border-left:4px solid #FF5252; background: #fef5f5'>"
                 f"<h4 style='margin-top: 0;'>⚠️ Enhancement Not Applied</h4>"
+                f"<p>Using original prompt. Error: {str(e)[:100]}</p>"
                 f"</div>"
             )
     else:
     unload_rewriter()
     # Set seed for reproducibility
+    seed_val = seed if not randomize_seed else random.randint(0, MAX_SEED)
     generator = torch.Generator(device=device).manual_seed(seed_val)
     try:
             true_cfg_scale=true_guidance_scale,
             num_images_per_prompt=num_images_per_prompt
         ).images
+        return edited_images, seed_val, prompt_info
     except Exception as e:
         gr.Error(f"Image generation failed: {str(e)}")
+        return [], seed_val, (
+            f"<div style='margin:10px; padding:15px; border-radius:8px; border-left:4px solid #dd2c00; background: #fef5f5'>"
+            f"<h4 style='margin-top: 0;'>⚠️ Processing Error</h4>"
+            f"<p>{str(e)[:200]}</p>"
             f"</div>"
         )
 MAX_SEED = np.iinfo(np.int32).max
+with gr.Blocks(title="Qwen Image Editor Fast", css=".gr-gallery {min-height: 300px}") as demo:
     gr.Markdown("""
+    <div style="text-align: center; background: linear-gradient(to right, #3a7bd5, #00d2ff); color: white; padding: 20px; border-radius: 8px;">
+        <h1 style="margin-bottom: 5px;">⚡️ Qwen-Image-Edit Lightning</h1>
+        <p>8-step inferencing • Local Prompt Enhancement • H200 Optimized</p>
     </div>
     """)
+    with gr.Row(equal_height=True):
         # Input Column
+        with gr.Column(scale=1):
+            input_image = gr.Image(
+                label="Source Image",
+                type="pil",
+                height=300
+            )
+            prompt = gr.Textbox(
+                label="Edit Instructions",
+                placeholder="e.g. Replace the background with a beach sunset...",
+                lines=2,
+                max_lines=4
+            )
+            with gr.Row():
+                rewrite_toggle = gr.Checkbox(
+                    label="Enable Prompt Enhancement",
+                    value=True,
+                    interactive=True
+                )
+                run_button = gr.Button(
+                    "Generate Edits",
+                    variant="primary",
+                    min_width=120
+                )
+            with gr.Accordion("Advanced Parameters", open=False):
                 with gr.Row():
+                    seed = gr.Slider(
+                        label="Seed",
+                        min=0,
+                        max=MAX_SEED,
+                        step=1,
+                        value=42
+                    )
+                    randomize_seed = gr.Checkbox(
+                        label="Random Seed",
+                        value=True
+                    )
                 with gr.Row():
                     true_guidance_scale = gr.Slider(
+                        label="Guidance Scale",
+                        min=1.0,
+                        max=5.0,
+                        step=0.1,
+                        value=1.0
                     )
                     num_inference_steps = gr.Slider(
+                        label="Inference Steps",
+                        min=4,
+                        max=16,
+                        step=1,
+                        value=8
                     )
+                num_images_per_prompt = gr.Slider(
+                    label="Output Count",
+                    min=1,
+                    max=4,
+                    step=1,
+                    value=1
+                )
         # Output Column
+        with gr.Column(scale=1):
             result = gr.Gallery(
+                label="Edited Images",
+                columns=lambda x: min(x, 2),
+                height=500,
+                object_fit="cover",
+                preview=True
             )
             prompt_info = gr.HTML(
+                value="<div style='padding:15px; background:#f8f9fa; border-radius:8px; margin-top:15px'>"
+                "Prompt details will appear after generation</div>"
             )
+    # Examples
+    gr.Examples(
+        examples=[
+            "Change the background scene to a rooftop bar at night",
+            "Transform to pixel art style with 8-bit graphics",
+            "Replace all text with 'Qwen AI' in futuristic font"
+        ],
+        inputs=[prompt],
+        label="Sample Instructions",
+        cache_examples=True
+    )
+    # Set up processing
+    inputs = [
+        input_image,
+        prompt,
+        seed,
+        randomize_seed,
+        true_guidance_scale,
+        num_inference_steps,
+        rewrite_toggle,
+        num_images_per_prompt
+    ]
+    outputs = [result, seed, prompt_info]
+    run_button.click(
         fn=infer,
+        inputs=inputs,
+        outputs=outputs
     )
     prompt.submit(
         fn=infer,
+        inputs=inputs,
+        outputs=outputs
     )
 if __name__ == "__main__":