import gradio as gr import numpy as np import random import torch import spaces from PIL import Image from diffusers import FlowMatchEulerDiscreteScheduler from optimization import optimize_pipeline_ from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3 import math # --- Model Loading --- dtype = torch.bfloat16 device = "cuda" if torch.cuda.is_available() else "cpu" scheduler_config = { "base_image_seq_len": 256, "base_shift": math.log(3), "invert_sigmas": False, "max_image_seq_len": 8192, "max_shift": math.log(3), "num_train_timesteps": 1000, "shift": 1.0, "shift_terminal": None, "stochastic_sampling": False, "time_shift_type": "exponential", "use_beta_sigmas": False, "use_dynamic_shifting": True, "use_exponential_sigmas": False, "use_karras_sigmas": False, } scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config) pipe = QwenImageEditPlusPipeline.from_pretrained( "Qwen/Qwen-Image-Edit-2509", scheduler=scheduler, torch_dtype=dtype ).to(device) pipe.load_lora_weights( "2vXpSwA7/iroiro-lora", weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors" ) pipe.fuse_lora(lora_scale=1.0) pipe.load_lora_weights( "dx8152/Qwen-Edit-2509-Multiple-angles", weight_name="镜头转换.safetensors", ) pipe.fuse_lora(lora_scale=1.0) pipe.transformer.__class__ = QwenImageTransformer2DModel pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3()) optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt") # --- Constants --- MAX_SEED = np.iinfo(np.int32).max # 内部デフォルト(アコーディオンの初期値にも使用) DEFAULT_SEED = 0 DEFAULT_RANDOMIZE = True DEFAULT_TRUE_GUIDANCE_SCALE = 1.0 DEFAULT_NUM_INFERENCE_STEPS = 4 # カメラオプション(送信値は常に 'cn') CAMERA_OPTIONS = [ {"cn": "镜头方向左回转45度", "ja": "左に45度回転", "en": "Rotate camera 45° left"}, {"cn": "镜头向右回转45度", "ja": "右に45度回転", "en": "Rotate camera 45° right"}, {"cn": "镜头方向左回转90度", "ja": "左に90度回転", "en": "Rotate camera 90° left"}, {"cn": "镜头向右回转90度", "ja": "右に90度回転", "en": "Rotate camera 90° right"}, {"cn": "将镜头转为俯视", "ja": "上から見下ろす", "en": "Switch to top-down view"}, {"cn": "将镜头转为仰视", "ja": "下から見上げる", "en": "Switch to low-angle view"}, {"cn": "将镜头转为特写镜头", "ja": "クローズアップ", "en": "Switch to close-up lens"}, {"cn": "将镜头转为中近景镜头", "ja": "ややクローズアップ", "en": "Switch to medium close-up lens"}, {"cn": "将镜头转为拉远镜头", "ja": "ズームアウト", "en": "Switch to zoom out lens"}, ] # 自由入力オプション(言語別表示) CUSTOM_OPTION_VALUE = "__custom__" CUSTOM_LABELS = { "en": "Custom (enter Chinese or English prompt)", "ja": "自由入力(中国語、英語で入力)", "zh": "自定义(中文或英文输入)", } # i18n 辞書(表示は単言語) I18N = { "title": { "en": "Camera Work", "ja": "カメラワーク", "zh": "镜头控制", }, "notice": { "en": "Note: Please avoid uploading images created by others. There may be rights infringements.", "ja": "注意:他者が作成した画像のアップロードはご遠慮ください。権利侵害の可能性があります。", "zh": "注意:请勿上传他人创作的图片,可能涉及权利侵害。", }, "input_image": {"en": "Input image", "ja": "入力画像", "zh": "输入图像"}, "dropdown_label": { "en": "Camera work", "ja": "カメラワーク", "zh": "镜头操作", }, "custom_cn_label": { "en": "Custom prompt(English and Chinese recommended)", "ja": "自由入力のプロンプト(英語、中国語がおすすめ)", "zh": "自定义提示词(推荐英文和中文)", }, "custom_cn_ph": { "en": "e.g., 将镜头转为斜俯视 并 拉远镜头", "ja": "例: 将镜头转为斜俯视 并 拉远镜头", "zh": "例如:将镜头转为斜俯视 并 拉远镜头", }, "extra_label": { "en": "Extra prompt (optional, appended at end)(English and Chinese recommended)", "ja": "追加プロンプト(任意・末尾に付加)(英語、中国語がおすすめ)", "zh": "附加提示词(可选,追加在末尾)(推荐英文和中文)", }, "extra_ph": { "en": "e.g., Subject is a girl", "ja": "例: 被摄体是一名女孩子", "zh": "例如:被摄体是一名女孩子", }, "accordion": {"en": "Advanced settings", "ja": "詳細設定", "zh": "高级设置"}, "seed": {"en": "Seed", "ja": "Seed", "zh": "Seed"}, "rand": {"en": "Randomize seed", "ja": "ランダムシード", "zh": "随机种子"}, "tgs": {"en": "True guidance scale", "ja": "True guidance scale", "zh": "True guidance scale"}, "steps": {"en": "Steps", "ja": "生成ステップ数", "zh": "生成步数"}, "run": {"en": "Generate", "ja": "生成", "zh": "生成"}, "output": {"en": "Output image", "ja": "出力画像", "zh": "输出图像"}, "status": {"en": "Status", "ja": "ステータス", "zh": "状态"}, "status_ok": { "en": "Generated 1 image (PNG).", "ja": "1枚生成しました(PNG)。", "zh": "已生成 1 张图片(PNG)。", }, "err_no_img": { "en": "Error: Please upload an input image.", "ja": "エラー: 入力画像をアップロードしてください", "zh": "错误:请先上传输入图像。", }, "err_no_custom": { "en": "Error: Please enter a custom prompt.", "ja": "エラー: 自由入力のプロンプトを入力してください", "zh": "错误:请输入自定义提示词。", }, "lang_label": {"en": "UI Language", "ja": "UI言語", "zh": "界面语言"}, } def t(key, lang): return I18N[key][lang] def build_dropdown_choices(lang): # 表示は選択言語のみ(送信値は中国語) if lang not in ("en", "ja", "zh"): lang = "en" ch = [] for item in CAMERA_OPTIONS: label = item[lang] # 単言語表示 ch.append((label, item["cn"])) # 値は中国語を送る ch.append((CUSTOM_LABELS[lang], CUSTOM_OPTION_VALUE)) return ch def _append_prompt(base: str, extra: str) -> str: extra = (extra or "").strip() return (base if not extra else f"{base} {extra}").strip() def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale): generator = torch.Generator(device=device).manual_seed(seed) result = pipe( image=input_images if input_images else None, prompt=prompt, negative_prompt=" ", num_inference_steps=num_inference_steps, generator=generator, true_cfg_scale=true_guidance_scale, num_images_per_prompt=1, ).images return result[0] @spaces.GPU() def generate_from_dropdown( image, dropdown_value_cn, custom_cn, extra_prompt="", seed=DEFAULT_SEED, randomize_seed=DEFAULT_RANDOMIZE, true_guidance_scale=DEFAULT_TRUE_GUIDANCE_SCALE, num_inference_steps=DEFAULT_NUM_INFERENCE_STEPS, lang="en", progress=gr.Progress(track_tqdm=True), ): if randomize_seed: seed = random.randint(0, MAX_SEED) if image is None: return None, t("err_no_img", lang) if isinstance(image, Image.Image): input_image = image.convert("RGB") else: input_image = Image.open(image).convert("RGB") pil_images = [input_image] if dropdown_value_cn == CUSTOM_OPTION_VALUE: base_cn = (custom_cn or "").strip() if not base_cn: return None, t("err_no_custom", lang) else: base_cn = dropdown_value_cn or CAMERA_OPTIONS[0]["cn"] final_prompt = _append_prompt(base_cn, extra_prompt) progress(0.6, desc="Generating..." if lang=="en" else ("生成中..." if lang=="ja" else "生成中...")) out = generate_single_view(pil_images, final_prompt, seed, num_inference_steps, true_guidance_scale) progress(1.0, desc="Done" if lang=="en" else ("完了" if lang=="ja" else "完成")) return out, t("status_ok", lang) # --- UI --- css = """ #app-wrap {margin: 0 auto; max-width: 1200px;} .notice { background: #fff8e1; border: 1px solid #facc15; color: #713f12; padding: 12px 14px; border-radius: 12px; font-weight: 600; line-height: 1.5; margin-bottom: 10px; } .card { background: white; border: 1px solid #e5e7eb; border-radius: 14px; padding: 14px; box-shadow: 0 1px 2px rgba(0,0,0,0.04); } .small { font-size: 12px; color: #6b7280; } .preview { background: #f9fafb; border: 1px dashed #cbd5e1; border-radius: 10px; padding: 8px 10px; font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", monospace; white-space: pre-wrap; } """ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: # 言語選択(デフォルト英語) lang_selector = gr.Radio( label=I18N["lang_label"]["en"], choices=[("English", "en"), ("日本語", "ja"), ("中文", "zh")], value="en", interactive=True, ) title_md = gr.Markdown(I18N["title"]["en"]) with gr.Column(elem_id="app-wrap"): notice_html = gr.HTML(f"
{I18N['notice']['en']}
") with gr.Row(): with gr.Column(scale=1): input_image = gr.Image(label=I18N["input_image"]["en"], type="pil", height=420) with gr.Column(scale=1, elem_classes=["card"]): dropdown = gr.Dropdown( label=I18N["dropdown_label"]["en"], choices=build_dropdown_choices("en"), value=CAMERA_OPTIONS[0]["cn"], # 値は中国語(見た目は単言語ラベル) allow_custom_value=False, interactive=True, ) custom_cn = gr.Textbox( label=I18N["custom_cn_label"]["en"], placeholder=I18N["custom_cn_ph"]["en"], visible=False, lines=2 ) extra_prompt = gr.Textbox( label=I18N["extra_label"]["en"], placeholder=I18N["extra_ph"]["en"], lines=2 ) # 詳細設定アコーディオン with gr.Accordion(I18N["accordion"]["en"], open=False) as adv_acc: seed = gr.Slider(label=I18N["seed"]["en"], minimum=0, maximum=MAX_SEED, step=1, value=DEFAULT_SEED) randomize_seed = gr.Checkbox(label=I18N["rand"]["en"], value=DEFAULT_RANDOMIZE) true_guidance_scale = gr.Slider(label=I18N["tgs"]["en"], minimum=1.0, maximum=10.0, step=0.1, value=DEFAULT_TRUE_GUIDANCE_SCALE) num_inference_steps = gr.Slider(label=I18N["steps"]["en"], minimum=1, maximum=40, step=1, value=DEFAULT_NUM_INFERENCE_STEPS) run_button = gr.Button(I18N["run"]["en"], variant="primary") with gr.Row(): with gr.Column(scale=1, elem_classes=["card"]): result_image = gr.Image(label=I18N["output"]["en"], type="pil", format="png", height=520, show_download_button=True) status_text = gr.Textbox(label=I18N["status"]["en"], interactive=False) # ドロップダウン選択に応じた「自由入力」欄の表示切替のみ残す def _toggle_custom(v_cn, extra, custom_text): is_custom = (v_cn == CUSTOM_OPTION_VALUE) return gr.update(visible=is_custom) dropdown.change( fn=_toggle_custom, inputs=[dropdown, extra_prompt, custom_cn], outputs=[custom_cn] ) extra_prompt.change( fn=_toggle_custom, inputs=[dropdown, extra_prompt, custom_cn], outputs=[custom_cn] ) custom_cn.change( fn=_toggle_custom, inputs=[dropdown, extra_prompt, custom_cn], outputs=[custom_cn] ) # 言語切替(単言語表示に統一) def _switch_lang(lang, current_dropdown_value): return ( gr.update(label=I18N["lang_label"][lang]), # lang_selector label I18N["title"][lang], # title_md value gr.update(value=f"
{I18N['notice'][lang]}
"), # notice_html gr.update(label=I18N["input_image"][lang]), # input_image label gr.update(label=I18N["dropdown_label"][lang], choices=build_dropdown_choices(lang), value=current_dropdown_value if current_dropdown_value else CAMERA_OPTIONS[0]["cn"]), # dropdown gr.update(label=I18N["custom_cn_label"][lang], placeholder=I18N["custom_cn_ph"][lang]), # custom_cn gr.update(label=I18N["extra_label"][lang], placeholder=I18N["extra_ph"][lang]), # extra_prompt gr.update(label=I18N["seed"][lang]), # seed gr.update(label=I18N["rand"][lang]), # randomize_seed gr.update(label=I18N["tgs"][lang]), # true_guidance_scale gr.update(label=I18N["steps"][lang]), # num_inference_steps gr.update(value=I18N["run"][lang]), # run_button text gr.update(label=I18N["output"][lang]), # result_image gr.update(label=I18N["status"][lang]), # status_text ) lang_selector.change( fn=_switch_lang, inputs=[lang_selector, dropdown], outputs=[ lang_selector, # label update title_md, # markdown title notice_html, # notice input_image, # image label dropdown, # dropdown (choices/label/value) custom_cn, # custom label/ph extra_prompt, # extra label/ph seed, # seed label randomize_seed, # randomize label true_guidance_scale, # tgs label num_inference_steps, # steps label run_button, # button text result_image, # label status_text, # label ], ) # 実行(UIは単言語表示、送信は中国語値) run_button.click( fn=generate_from_dropdown, inputs=[input_image, dropdown, custom_cn, extra_prompt, seed, randomize_seed, true_guidance_scale, num_inference_steps, lang_selector], outputs=[result_image, status_text], ) if __name__ == "__main__": demo.launch()