multimodalart's picture
fix error with correct lora path (#1)
ee7db05 verified
import gradio as gr
import numpy as np
import random
import torch
import spaces
from PIL import Image
from diffusers import FlowMatchEulerDiscreteScheduler
from optimization import optimize_pipeline_
from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
import math
# --- Model Loading ---
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"
scheduler_config = {
"base_image_seq_len": 256,
"base_shift": math.log(3),
"invert_sigmas": False,
"max_image_seq_len": 8192,
"max_shift": math.log(3),
"num_train_timesteps": 1000,
"shift": 1.0,
"shift_terminal": None,
"stochastic_sampling": False,
"time_shift_type": "exponential",
"use_beta_sigmas": False,
"use_dynamic_shifting": True,
"use_exponential_sigmas": False,
"use_karras_sigmas": False,
}
scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
pipe = QwenImageEditPlusPipeline.from_pretrained(
"Qwen/Qwen-Image-Edit-2509",
scheduler=scheduler,
torch_dtype=dtype
).to(device)
pipe.load_lora_weights(
"2vXpSwA7/iroiro-lora",
weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors"
)
pipe.fuse_lora(lora_scale=1.0)
pipe.load_lora_weights(
"dx8152/Qwen-Edit-2509-Multiple-angles",
weight_name="镜头转换.safetensors",
)
pipe.fuse_lora(lora_scale=1.0)
pipe.transformer.__class__ = QwenImageTransformer2DModel
pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")
# --- Constants ---
MAX_SEED = np.iinfo(np.int32).max
# 内部デフォルト(アコーディオンの初期値にも使用)
DEFAULT_SEED = 0
DEFAULT_RANDOMIZE = True
DEFAULT_TRUE_GUIDANCE_SCALE = 1.0
DEFAULT_NUM_INFERENCE_STEPS = 4
# カメラオプション(送信値は常に 'cn')
CAMERA_OPTIONS = [
{"cn": "镜头方向左回转45度", "ja": "左に45度回転", "en": "Rotate camera 45° left"},
{"cn": "镜头向右回转45度", "ja": "右に45度回転", "en": "Rotate camera 45° right"},
{"cn": "镜头方向左回转90度", "ja": "左に90度回転", "en": "Rotate camera 90° left"},
{"cn": "镜头向右回转90度", "ja": "右に90度回転", "en": "Rotate camera 90° right"},
{"cn": "将镜头转为俯视", "ja": "上から見下ろす", "en": "Switch to top-down view"},
{"cn": "将镜头转为仰视", "ja": "下から見上げる", "en": "Switch to low-angle view"},
{"cn": "将镜头转为特写镜头", "ja": "クローズアップ", "en": "Switch to close-up lens"},
{"cn": "将镜头转为中近景镜头", "ja": "ややクローズアップ", "en": "Switch to medium close-up lens"},
{"cn": "将镜头转为拉远镜头", "ja": "ズームアウト", "en": "Switch to zoom out lens"},
]
# 自由入力オプション(言語別表示)
CUSTOM_OPTION_VALUE = "__custom__"
CUSTOM_LABELS = {
"en": "Custom (enter Chinese or English prompt)",
"ja": "自由入力(中国語、英語で入力)",
"zh": "自定义(中文或英文输入)",
}
# i18n 辞書(表示は単言語)
I18N = {
"title": {
"en": "Camera Work",
"ja": "カメラワーク",
"zh": "镜头控制",
},
"notice": {
"en": "Note: Please avoid uploading images created by others. There may be rights infringements.",
"ja": "注意:他者が作成した画像のアップロードはご遠慮ください。権利侵害の可能性があります。",
"zh": "注意:请勿上传他人创作的图片,可能涉及权利侵害。",
},
"input_image": {"en": "Input image", "ja": "入力画像", "zh": "输入图像"},
"dropdown_label": {
"en": "Camera work",
"ja": "カメラワーク",
"zh": "镜头操作",
},
"custom_cn_label": {
"en": "Custom prompt(English and Chinese recommended)",
"ja": "自由入力のプロンプト(英語、中国語がおすすめ)",
"zh": "自定义提示词(推荐英文和中文)",
},
"custom_cn_ph": {
"en": "e.g., 将镜头转为斜俯视 并 拉远镜头",
"ja": "例: 将镜头转为斜俯视 并 拉远镜头",
"zh": "例如:将镜头转为斜俯视 并 拉远镜头",
},
"extra_label": {
"en": "Extra prompt (optional, appended at end)(English and Chinese recommended)",
"ja": "追加プロンプト(任意・末尾に付加)(英語、中国語がおすすめ)",
"zh": "附加提示词(可选,追加在末尾)(推荐英文和中文)",
},
"extra_ph": {
"en": "e.g., Subject is a girl",
"ja": "例: 被摄体是一名女孩子",
"zh": "例如:被摄体是一名女孩子",
},
"accordion": {"en": "Advanced settings", "ja": "詳細設定", "zh": "高级设置"},
"seed": {"en": "Seed", "ja": "Seed", "zh": "Seed"},
"rand": {"en": "Randomize seed", "ja": "ランダムシード", "zh": "随机种子"},
"tgs": {"en": "True guidance scale", "ja": "True guidance scale", "zh": "True guidance scale"},
"steps": {"en": "Steps", "ja": "生成ステップ数", "zh": "生成步数"},
"run": {"en": "Generate", "ja": "生成", "zh": "生成"},
"output": {"en": "Output image", "ja": "出力画像", "zh": "输出图像"},
"status": {"en": "Status", "ja": "ステータス", "zh": "状态"},
"status_ok": {
"en": "Generated 1 image (PNG).",
"ja": "1枚生成しました(PNG)。",
"zh": "已生成 1 张图片(PNG)。",
},
"err_no_img": {
"en": "Error: Please upload an input image.",
"ja": "エラー: 入力画像をアップロードしてください",
"zh": "错误:请先上传输入图像。",
},
"err_no_custom": {
"en": "Error: Please enter a custom prompt.",
"ja": "エラー: 自由入力のプロンプトを入力してください",
"zh": "错误:请输入自定义提示词。",
},
"lang_label": {"en": "UI Language", "ja": "UI言語", "zh": "界面语言"},
}
def t(key, lang):
return I18N[key][lang]
def build_dropdown_choices(lang):
# 表示は選択言語のみ(送信値は中国語)
if lang not in ("en", "ja", "zh"):
lang = "en"
ch = []
for item in CAMERA_OPTIONS:
label = item[lang] # 単言語表示
ch.append((label, item["cn"])) # 値は中国語を送る
ch.append((CUSTOM_LABELS[lang], CUSTOM_OPTION_VALUE))
return ch
def _append_prompt(base: str, extra: str) -> str:
extra = (extra or "").strip()
return (base if not extra else f"{base} {extra}").strip()
def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale):
generator = torch.Generator(device=device).manual_seed(seed)
result = pipe(
image=input_images if input_images else None,
prompt=prompt,
negative_prompt=" ",
num_inference_steps=num_inference_steps,
generator=generator,
true_cfg_scale=true_guidance_scale,
num_images_per_prompt=1,
).images
return result[0]
@spaces.GPU()
def generate_from_dropdown(
image,
dropdown_value_cn,
custom_cn,
extra_prompt="",
seed=DEFAULT_SEED,
randomize_seed=DEFAULT_RANDOMIZE,
true_guidance_scale=DEFAULT_TRUE_GUIDANCE_SCALE,
num_inference_steps=DEFAULT_NUM_INFERENCE_STEPS,
lang="en",
progress=gr.Progress(track_tqdm=True),
):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
if image is None:
return None, t("err_no_img", lang)
if isinstance(image, Image.Image):
input_image = image.convert("RGB")
else:
input_image = Image.open(image).convert("RGB")
pil_images = [input_image]
if dropdown_value_cn == CUSTOM_OPTION_VALUE:
base_cn = (custom_cn or "").strip()
if not base_cn:
return None, t("err_no_custom", lang)
else:
base_cn = dropdown_value_cn or CAMERA_OPTIONS[0]["cn"]
final_prompt = _append_prompt(base_cn, extra_prompt)
progress(0.6, desc="Generating..." if lang=="en" else ("生成中..." if lang=="ja" else "生成中..."))
out = generate_single_view(pil_images, final_prompt, seed, num_inference_steps, true_guidance_scale)
progress(1.0, desc="Done" if lang=="en" else ("完了" if lang=="ja" else "完成"))
return out, t("status_ok", lang)
# --- UI ---
css = """
#app-wrap {margin: 0 auto; max-width: 1200px;}
.notice {
background: #fff8e1;
border: 1px solid #facc15;
color: #713f12;
padding: 12px 14px;
border-radius: 12px;
font-weight: 600;
line-height: 1.5;
margin-bottom: 10px;
}
.card {
background: white;
border: 1px solid #e5e7eb;
border-radius: 14px;
padding: 14px;
box-shadow: 0 1px 2px rgba(0,0,0,0.04);
}
.small { font-size: 12px; color: #6b7280; }
.preview {
background: #f9fafb;
border: 1px dashed #cbd5e1;
border-radius: 10px;
padding: 8px 10px;
font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace;
white-space: pre-wrap;
}
"""
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
# 言語選択(デフォルト英語)
lang_selector = gr.Radio(
label=I18N["lang_label"]["en"],
choices=[("English", "en"), ("日本語", "ja"), ("中文", "zh")],
value="en",
interactive=True,
)
title_md = gr.Markdown(I18N["title"]["en"])
with gr.Column(elem_id="app-wrap"):
notice_html = gr.HTML(f"<div class='notice'>{I18N['notice']['en']}</div>")
with gr.Row():
with gr.Column(scale=1):
input_image = gr.Image(label=I18N["input_image"]["en"], type="pil", height=420)
with gr.Column(scale=1, elem_classes=["card"]):
dropdown = gr.Dropdown(
label=I18N["dropdown_label"]["en"],
choices=build_dropdown_choices("en"),
value=CAMERA_OPTIONS[0]["cn"], # 値は中国語(見た目は単言語ラベル)
allow_custom_value=False,
interactive=True,
)
custom_cn = gr.Textbox(
label=I18N["custom_cn_label"]["en"],
placeholder=I18N["custom_cn_ph"]["en"],
visible=False,
lines=2
)
extra_prompt = gr.Textbox(
label=I18N["extra_label"]["en"],
placeholder=I18N["extra_ph"]["en"],
lines=2
)
# 詳細設定アコーディオン
with gr.Accordion(I18N["accordion"]["en"], open=False) as adv_acc:
seed = gr.Slider(label=I18N["seed"]["en"], minimum=0, maximum=MAX_SEED, step=1, value=DEFAULT_SEED)
randomize_seed = gr.Checkbox(label=I18N["rand"]["en"], value=DEFAULT_RANDOMIZE)
true_guidance_scale = gr.Slider(label=I18N["tgs"]["en"], minimum=1.0, maximum=10.0, step=0.1, value=DEFAULT_TRUE_GUIDANCE_SCALE)
num_inference_steps = gr.Slider(label=I18N["steps"]["en"], minimum=1, maximum=40, step=1, value=DEFAULT_NUM_INFERENCE_STEPS)
run_button = gr.Button(I18N["run"]["en"], variant="primary")
with gr.Row():
with gr.Column(scale=1, elem_classes=["card"]):
result_image = gr.Image(label=I18N["output"]["en"], type="pil", format="png", height=520, show_download_button=True)
status_text = gr.Textbox(label=I18N["status"]["en"], interactive=False)
# ドロップダウン選択に応じた「自由入力」欄の表示切替のみ残す
def _toggle_custom(v_cn, extra, custom_text):
is_custom = (v_cn == CUSTOM_OPTION_VALUE)
return gr.update(visible=is_custom)
dropdown.change(
fn=_toggle_custom,
inputs=[dropdown, extra_prompt, custom_cn],
outputs=[custom_cn]
)
extra_prompt.change(
fn=_toggle_custom,
inputs=[dropdown, extra_prompt, custom_cn],
outputs=[custom_cn]
)
custom_cn.change(
fn=_toggle_custom,
inputs=[dropdown, extra_prompt, custom_cn],
outputs=[custom_cn]
)
# 言語切替(単言語表示に統一)
def _switch_lang(lang, current_dropdown_value):
return (
gr.update(label=I18N["lang_label"][lang]), # lang_selector label
I18N["title"][lang], # title_md value
gr.update(value=f"<div class='notice'>{I18N['notice'][lang]}</div>"), # notice_html
gr.update(label=I18N["input_image"][lang]), # input_image label
gr.update(label=I18N["dropdown_label"][lang],
choices=build_dropdown_choices(lang),
value=current_dropdown_value if current_dropdown_value else CAMERA_OPTIONS[0]["cn"]), # dropdown
gr.update(label=I18N["custom_cn_label"][lang], placeholder=I18N["custom_cn_ph"][lang]), # custom_cn
gr.update(label=I18N["extra_label"][lang], placeholder=I18N["extra_ph"][lang]), # extra_prompt
gr.update(label=I18N["seed"][lang]), # seed
gr.update(label=I18N["rand"][lang]), # randomize_seed
gr.update(label=I18N["tgs"][lang]), # true_guidance_scale
gr.update(label=I18N["steps"][lang]), # num_inference_steps
gr.update(value=I18N["run"][lang]), # run_button text
gr.update(label=I18N["output"][lang]), # result_image
gr.update(label=I18N["status"][lang]), # status_text
)
lang_selector.change(
fn=_switch_lang,
inputs=[lang_selector, dropdown],
outputs=[
lang_selector, # label update
title_md, # markdown title
notice_html, # notice
input_image, # image label
dropdown, # dropdown (choices/label/value)
custom_cn, # custom label/ph
extra_prompt, # extra label/ph
seed, # seed label
randomize_seed, # randomize label
true_guidance_scale, # tgs label
num_inference_steps, # steps label
run_button, # button text
result_image, # label
status_text, # label
],
)
# 実行(UIは単言語表示、送信は中国語値)
run_button.click(
fn=generate_from_dropdown,
inputs=[input_image, dropdown, custom_cn, extra_prompt, seed, randomize_seed, true_guidance_scale, num_inference_steps, lang_selector],
outputs=[result_image, status_text],
)
if __name__ == "__main__":
demo.launch()