File size: 8,885 Bytes
71f5363
7d4ee71
 
 
 
 
6571814
 
 
 
 
ec6ec95
7d4ee71
 
 
 
 
 
b6713ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6eb3715
b6713ac
 
6eb3715
 
 
 
 
 
 
b6713ac
6eb3715
bf1b84a
6eb3715
cbc822a
6571814
 
c41048d
6571814
6eb3715
7d4ee71
218b488
d233177
6eb3715
 
 
218b488
b001fe7
f071260
1fc0b65
f071260
 
 
218b488
1fc0b65
218b488
 
 
 
6eb3715
218b488
 
 
 
 
 
 
a03b5b8
1fc0b65
a03b5b8
 
 
6eb3715
a03b5b8
 
6eb3715
 
 
a03b5b8
6eb3715
 
a03b5b8
 
 
 
 
 
35a6f3d
218b488
d12bd9a
e5d1f5f
7d4ee71
 
 
306e662
7d4ee71
 
1fc0b65
7d4ee71
 
d12bd9a
6acc20e
6eb3715
d12bd9a
 
 
6eb3715
 
d12bd9a
a03b5b8
f071260
 
 
 
 
 
6eb3715
f071260
6eb3715
 
f071260
a03b5b8
6eb3715
f071260
7d4ee71
6eb3715
f071260
6eb3715
1fc0b65
 
6eb3715
 
7d4ee71
6eb3715
 
60fbee5
 
 
 
 
 
 
 
 
 
 
7d4ee71
 
 
d233177
6eb3715
a03b5b8
7d4ee71
1fc0b65
60fbee5
 
 
 
 
 
 
 
6eb3715
60fbee5
 
f071260
 
 
 
 
 
6eb3715
218b488
6eb3715
7d4ee71
6eb3715
 
7d4ee71
6eb3715
 
a03b5b8
6eb3715
 
7d4ee71
218b488
6eb3715
218b488
6eb3715
 
7d4ee71
218b488
 
1fc0b65
a03b5b8
7d4ee71
 
 
1fc0b65
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import gradio as gr
import numpy as np
import random
import torch
import spaces
from PIL import Image
from diffusers import FlowMatchEulerDiscreteScheduler
from optimization import optimize_pipeline_
from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
import math
import os

# --- Model Loading ---
dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"

# Scheduler configuration for Lightning
scheduler_config = {
    "base_image_seq_len": 256,
    "base_shift": math.log(3),
    "invert_sigmas": False,
    "max_image_seq_len": 8192,
    "max_shift": math.log(3),
    "num_train_timesteps": 1000,
    "shift": 1.0,
    "shift_terminal": None,
    "stochastic_sampling": False,
    "time_shift_type": "exponential",
    "use_beta_sigmas": False,
    "use_dynamic_shifting": True,
    "use_exponential_sigmas": False,
    "use_karras_sigmas": False,
}

# Initialize scheduler
scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)

# Load model
pipe = QwenImageEditPlusPipeline.from_pretrained(
    "Qwen/Qwen-Image-Edit-2509",
    scheduler=scheduler,
    torch_dtype=dtype
).to(device)

pipe.load_lora_weights(
    "2vXpSwA7/iroiro-lora",
    weight_name="qwen_lora/Qwen-Image-Edit-2509-Lightning-4steps-V1.0-bf16_dim1.safetensors"
)
pipe.fuse_lora(lora_scale=0.7)
pipe.transformer.__class__ = QwenImageTransformer2DModel
pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
optimize_pipeline_(pipe, image=[Image.new("RGB", (1024, 1024)), Image.new("RGB", (1024, 1024))], prompt="prompt")

# --- Constants ---
MAX_SEED = np.iinfo(np.int32).max
PROMPTS = {
    "front": "Move the camera to a front-facing position so the full body of the character is visible. The character stands with both arms extended slightly downward and close to the thighs, keeping the body evenly balanced on both sides. The legs are positioned symmetrically with a narrow stance. The background is plain white.",
    "back": "Move the camera to a back-facing position so the full body of the character is visible. Background is plain white.",
    "left": "Move the camera to a side view (profile) from the left so the full body of the character is visible. Background is plain white.",
    "right": "Move the camera to a side view (profile) from the right so the full body of the character is visible. Background is plain white."
}

def _append_prompt(base: str, extra: str) -> str:
    """末尾にユーザー指定のプロンプトを追記(空なら変更なし)"""
    extra = (extra or "").strip()
    return (base if not extra else f"{base} {extra}").strip()

def generate_single_view(input_images, prompt, seed, num_inference_steps, true_guidance_scale):
    """単一視点画像生成"""
    generator = torch.Generator(device=device).manual_seed(seed)
    result = pipe(
        image=input_images if input_images else None,
        prompt=prompt,
        negative_prompt=" ",
        num_inference_steps=num_inference_steps,
        generator=generator,
        true_cfg_scale=true_guidance_scale,
        num_images_per_prompt=1,
    ).images
    return result[0]

def concat_images_horizontally(images, bg_color=(255, 255, 255)):
    """画像を横に連結"""
    images = [img.convert("RGB") for img in images if img is not None]
    if not images:
        return None
    h = max(img.height for img in images)
    resized = []
    for img in images:
        if img.height != h:
            w = int(img.width * (h / img.height))
            img = img.resize((w, h), Image.LANCZOS)
        resized.append(img)
    w_total = sum(img.width for img in resized)
    canvas = Image.new("RGB", (w_total, h), bg_color)
    x = 0
    for img in resized:
        canvas.paste(img, (x, 0))
        x += img.width
    return canvas

@spaces.GPU()
def generate_turnaround(
    image,
    extra_prompt="",
    seed=42,
    randomize_seed=False,
    true_guidance_scale=1.0,
    num_inference_steps=4,
    progress=gr.Progress(track_tqdm=True),
):
    """4視点+横連結PNG生成(ユーザー追記プロンプト対応)"""
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    if image is None:
        return None, None, None, None, None, seed, "エラー: 入力画像をアップロードしてください"

    if isinstance(image, Image.Image):
        input_image = image.convert("RGB")
    else:
        input_image = Image.open(image).convert("RGB")

    pil_images = [input_image]

    # 各プロンプト末尾に追記
    p_front = _append_prompt(PROMPTS["front"], extra_prompt)
    p_back  = _append_prompt(PROMPTS["back"],  extra_prompt)
    p_left  = _append_prompt(PROMPTS["left"],  extra_prompt)
    p_right = _append_prompt(PROMPTS["right"], extra_prompt)

    progress(0.25, desc="正面生成中...")
    front = generate_single_view(pil_images, p_front, seed, num_inference_steps, true_guidance_scale)

    progress(0.5, desc="背面生成中...")
    back = generate_single_view([front], p_back, seed+1, num_inference_steps, true_guidance_scale)

    progress(0.75, desc="左側面生成中...")
    left = generate_single_view([front], p_left, seed+2, num_inference_steps, true_guidance_scale)

    progress(1.0, desc="右側面生成中...")
    right = generate_single_view([front], p_right, seed+3, num_inference_steps, true_guidance_scale)

    concat = concat_images_horizontally([front, right, back, left])
    return front, back, left, right, concat, seed, "✅ PNG形式で4視点+連結画像を生成しました"

# --- UI ---
css = """
#col-container {margin: 0 auto; max-width: 1400px;}
.image-container img {object-fit: contain !important; max-width: 100%; max-height: 100%;}
/* 追加: 注意ボックスのスタイル */
.notice {
  background: #fff5f5;
  border: 1px solid #fca5a5;
  color: #7f1d1d;
  padding: 12px 14px;
  border-radius: 10px;
  font-weight: 600;
  line-height: 1.5;
  margin-bottom: 10px;
}
"""

with gr.Blocks(css=css) as demo:
    gr.Markdown("# キャラクター4視点立ち絵自動生成")
    gr.Markdown("アップロードしたキャラクター画像から正面・背面・左右側面、さらに4枚連結のPNG画像を出力します。")

    with gr.Column(elem_id="col-container"):
        # 追加: 注意文(アップロード欄の直前に表示)
        gr.HTML(
            "<div class='notice'>"
            "注意:他者が作成した画像のアップロードはご遠慮ください。"
            "他人の著作物・肖像権を侵害する恐れがあります。"
            "当アプリ作成者は、アップロード内容による権利侵害について一切の責任を負いません。"
            "</div>"
        )

        input_image = gr.Image(label="入力画像", type="pil", height=500)

        # 追記プロンプト欄
        extra_prompt = gr.Textbox(
            label="追加プロンプト(各視点プロンプトの末尾に追記)",
            placeholder="例: high detail, anime style, soft lighting, 4k, pastel colors",
            lines=2
        )

        run_button = gr.Button("🎨 生成開始", variant="primary")
        status_text = gr.Textbox(label="ステータス", interactive=False)

        with gr.Row():
            result_front = gr.Image(label="正面", type="pil", format="png", height=400, show_download_button=True)
            result_back = gr.Image(label="背面", type="pil", format="png", height=400, show_download_button=True)
        with gr.Row():
            result_left = gr.Image(label="左側面", type="pil", format="png", height=400, show_download_button=True)
            result_right = gr.Image(label="右側面", type="pil", format="png", height=400, show_download_button=True)

        # PNG連結出力
        result_concat = gr.Image(label="連結画像(正面→右→背面→左)", type="pil", format="png", height=400, show_download_button=True)

        with gr.Accordion("⚙️ 詳細設定", open=False):
            seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
            randomize_seed = gr.Checkbox(label="ランダムシード", value=True)
            true_guidance_scale = gr.Slider(label="True guidance scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
            num_inference_steps = gr.Slider(label="生成ステップ数", minimum=1, maximum=40, step=1, value=4)

    run_button.click(
        fn=generate_turnaround,
        inputs=[input_image, extra_prompt, seed, randomize_seed, true_guidance_scale, num_inference_steps],
        outputs=[result_front, result_back, result_left, result_right, result_concat, seed, status_text],
    )

if __name__ == "__main__":
    demo.launch()