Spaces:
Running
on
Zero
Running
on
Zero
Style: Run Black formatter
Browse files
app.py
CHANGED
|
@@ -12,7 +12,7 @@ from PIL import Image, ImageChops
|
|
| 12 |
from huggingface_hub import hf_hub_download
|
| 13 |
|
| 14 |
# Setup ComfyUI if not already set up
|
| 15 |
-
#if not os.path.exists("ComfyUI"):
|
| 16 |
# print("Setting up ComfyUI...")
|
| 17 |
# subprocess.run(["bash", "setup_comfyui.sh"], check=True)
|
| 18 |
|
|
@@ -21,25 +21,63 @@ os.makedirs("output", exist_ok=True)
|
|
| 21 |
|
| 22 |
# Download models if not already present
|
| 23 |
print("Checking and downloading models...")
|
| 24 |
-
hf_hub_download(
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
hf_hub_download(
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# Import required functions and setup ComfyUI path
|
| 35 |
import folder_paths
|
| 36 |
|
|
|
|
| 37 |
def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
|
| 38 |
try:
|
| 39 |
return obj[index]
|
| 40 |
except KeyError:
|
| 41 |
return obj["result"][index]
|
| 42 |
|
|
|
|
| 43 |
def find_path(name: str, path: str = None) -> str:
|
| 44 |
if path is None:
|
| 45 |
path = os.getcwd()
|
|
@@ -52,12 +90,14 @@ def find_path(name: str, path: str = None) -> str:
|
|
| 52 |
return None
|
| 53 |
return find_path(name, parent_directory)
|
| 54 |
|
|
|
|
| 55 |
def add_comfyui_directory_to_sys_path() -> None:
|
| 56 |
comfyui_path = find_path("ComfyUI")
|
| 57 |
if comfyui_path is not None and os.path.isdir(comfyui_path):
|
| 58 |
sys.path.append(comfyui_path)
|
| 59 |
print(f"'{comfyui_path}' added to sys.path")
|
| 60 |
|
|
|
|
| 61 |
def add_extra_model_paths() -> None:
|
| 62 |
try:
|
| 63 |
from main import load_extra_path_config
|
|
@@ -69,27 +109,30 @@ def add_extra_model_paths() -> None:
|
|
| 69 |
else:
|
| 70 |
print("Could not find the extra_model_paths config file.")
|
| 71 |
|
|
|
|
| 72 |
# Initialize paths
|
| 73 |
add_comfyui_directory_to_sys_path()
|
| 74 |
add_extra_model_paths()
|
| 75 |
|
|
|
|
| 76 |
def import_custom_nodes() -> None:
|
| 77 |
import asyncio
|
| 78 |
import execution
|
| 79 |
from nodes import init_extra_nodes
|
| 80 |
import server
|
| 81 |
-
|
| 82 |
# Create a new event loop if running in a new thread
|
| 83 |
try:
|
| 84 |
loop = asyncio.get_event_loop()
|
| 85 |
except RuntimeError:
|
| 86 |
loop = asyncio.new_event_loop()
|
| 87 |
asyncio.set_event_loop(loop)
|
| 88 |
-
|
| 89 |
server_instance = server.PromptServer(loop)
|
| 90 |
execution.PromptQueue(server_instance)
|
| 91 |
init_extra_nodes()
|
| 92 |
|
|
|
|
| 93 |
# Import all necessary nodes
|
| 94 |
print("Importing ComfyUI nodes...")
|
| 95 |
try:
|
|
@@ -108,7 +151,7 @@ try:
|
|
| 108 |
UNETLoader,
|
| 109 |
CLIPTextEncode,
|
| 110 |
)
|
| 111 |
-
|
| 112 |
# Initialize all constant nodes and models in global context
|
| 113 |
import_custom_nodes()
|
| 114 |
except Exception as e:
|
|
@@ -156,7 +199,9 @@ SAMPLER = ksamplerselect.get_sampler(sampler_name="euler")
|
|
| 156 |
|
| 157 |
# Initialize depth model
|
| 158 |
cr_clip_input_switch = NODE_CLASS_MAPPINGS["CR Clip Input Switch"]()
|
| 159 |
-
downloadandloaddepthanythingv2model = NODE_CLASS_MAPPINGS[
|
|
|
|
|
|
|
| 160 |
DEPTH_MODEL = downloadandloaddepthanythingv2model.loadmodel(
|
| 161 |
model="depth_anything_v2_vitl_fp32.safetensors"
|
| 162 |
)
|
|
@@ -177,7 +222,7 @@ clipvisionencode = CLIPVisionEncode()
|
|
| 177 |
stylemodelapplyadvanced = NODE_CLASS_MAPPINGS["StyleModelApplyAdvanced"]()
|
| 178 |
emptylatentimage = EmptyLatentImage()
|
| 179 |
basicguider = NODE_CLASS_MAPPINGS["BasicGuider"]()
|
| 180 |
-
basicscheduler = NODE_CLASS_MAPPINGS["BasicScheduler"]()
|
| 181 |
randomnoise = NODE_CLASS_MAPPINGS["RandomNoise"]()
|
| 182 |
samplercustomadvanced = NODE_CLASS_MAPPINGS["SamplerCustomAdvanced"]()
|
| 183 |
vaedecode = VAEDecode()
|
|
@@ -193,18 +238,31 @@ from comfy import model_management
|
|
| 193 |
model_loaders = [CLIP_MODEL, VAE_MODEL, UNET_MODEL, CLIP_VISION_MODEL]
|
| 194 |
|
| 195 |
print("Loading models to GPU...")
|
| 196 |
-
model_management.load_models_gpu(
|
| 197 |
-
|
| 198 |
-
])
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
print("Setup complete!")
|
| 201 |
|
|
|
|
| 202 |
@spaces.GPU
|
| 203 |
-
def generate_image(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
"""Main generation function that processes inputs and returns the path to the generated image."""
|
| 205 |
timestamp = random.randint(10000, 99999)
|
| 206 |
output_filename = f"flux_zen_{timestamp}.png"
|
| 207 |
-
|
| 208 |
with torch.inference_mode():
|
| 209 |
# Set up CLIP
|
| 210 |
clip_switch = cr_clip_input_switch.switch(
|
|
@@ -212,7 +270,7 @@ def generate_image(prompt, structure_image, style_image, depth_strength=15, cann
|
|
| 212 |
clip1=get_value_at_index(CLIP_MODEL, 0),
|
| 213 |
clip2=get_value_at_index(CLIP_MODEL, 0),
|
| 214 |
)
|
| 215 |
-
|
| 216 |
# Encode text
|
| 217 |
text_encoded = cliptextencode.encode(
|
| 218 |
text=prompt,
|
|
@@ -222,10 +280,10 @@ def generate_image(prompt, structure_image, style_image, depth_strength=15, cann
|
|
| 222 |
text="",
|
| 223 |
clip=get_value_at_index(clip_switch, 0),
|
| 224 |
)
|
| 225 |
-
|
| 226 |
# Process structure image
|
| 227 |
structure_img = loadimage.load_image(image=structure_image)
|
| 228 |
-
|
| 229 |
# Resize image
|
| 230 |
resized_img = imageresize.execute(
|
| 231 |
width=get_value_at_index(CONST_1024, 0),
|
|
@@ -236,59 +294,59 @@ def generate_image(prompt, structure_image, style_image, depth_strength=15, cann
|
|
| 236 |
multiple_of=16,
|
| 237 |
image=get_value_at_index(structure_img, 0),
|
| 238 |
)
|
| 239 |
-
|
| 240 |
# Get image size
|
| 241 |
size_info = getimagesizeandcount.getsize(
|
| 242 |
image=get_value_at_index(resized_img, 0)
|
| 243 |
)
|
| 244 |
-
|
| 245 |
# Encode VAE
|
| 246 |
vae_encoded = vaeencode.encode(
|
| 247 |
pixels=get_value_at_index(size_info, 0),
|
| 248 |
vae=get_value_at_index(VAE_MODEL, 0),
|
| 249 |
)
|
| 250 |
-
|
| 251 |
# Process canny
|
| 252 |
canny_processed = canny_prossessor.detect_edge(
|
| 253 |
-
image=get_value_at_index(size_info, 0),
|
| 254 |
-
low_threshold=0.4,
|
| 255 |
-
high_threshold=0.8
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
#Apply canny Advanced
|
| 259 |
canny_conditions = controlNetApplyAdvanced.apply_controlnet(
|
| 260 |
-
positive=get_value_at_index(text_encoded, 0),
|
| 261 |
-
negative=get_value_at_index(empty_text, 0),
|
| 262 |
-
control_net=get_value_at_index(CANNY_XLABS_MODEL, 0),
|
| 263 |
-
image=get_value_at_index(canny_processed, 0),
|
| 264 |
-
strength=canny_strength,
|
| 265 |
-
start_percent=0.0,
|
| 266 |
-
end_percent=0.5,
|
| 267 |
-
vae=get_value_at_index(VAE_MODEL, 0)
|
| 268 |
-
|
| 269 |
-
|
| 270 |
# Process depth
|
| 271 |
depth_processed = depthanything_v2.process(
|
| 272 |
da_model=get_value_at_index(DEPTH_MODEL, 0),
|
| 273 |
images=get_value_at_index(size_info, 0),
|
| 274 |
)
|
| 275 |
-
|
| 276 |
# Apply Flux guidance
|
| 277 |
flux_guided = fluxguidance.append(
|
| 278 |
guidance=depth_strength,
|
| 279 |
conditioning=get_value_at_index(canny_conditions, 0),
|
| 280 |
)
|
| 281 |
-
|
| 282 |
# Process style image
|
| 283 |
style_img = loadimage.load_image(image=style_image)
|
| 284 |
-
|
| 285 |
# Encode style with CLIP Vision
|
| 286 |
style_encoded = clipvisionencode.encode(
|
| 287 |
crop="center",
|
| 288 |
clip_vision=get_value_at_index(CLIP_VISION_MODEL, 0),
|
| 289 |
image=get_value_at_index(style_img, 0),
|
| 290 |
)
|
| 291 |
-
|
| 292 |
# Set up conditioning
|
| 293 |
conditioning = instructpixtopixconditioning.encode(
|
| 294 |
positive=get_value_at_index(flux_guided, 0),
|
|
@@ -296,7 +354,7 @@ def generate_image(prompt, structure_image, style_image, depth_strength=15, cann
|
|
| 296 |
vae=get_value_at_index(VAE_MODEL, 0),
|
| 297 |
pixels=get_value_at_index(depth_processed, 0),
|
| 298 |
)
|
| 299 |
-
|
| 300 |
# Apply style
|
| 301 |
style_applied = stylemodelapplyadvanced.apply_stylemodel(
|
| 302 |
strength=style_strength,
|
|
@@ -304,20 +362,20 @@ def generate_image(prompt, structure_image, style_image, depth_strength=15, cann
|
|
| 304 |
style_model=get_value_at_index(STYLE_MODEL, 0),
|
| 305 |
clip_vision_output=get_value_at_index(style_encoded, 0),
|
| 306 |
)
|
| 307 |
-
|
| 308 |
# Set up empty latent
|
| 309 |
empty_latent = emptylatentimage.generate(
|
| 310 |
width=get_value_at_index(resized_img, 1),
|
| 311 |
height=get_value_at_index(resized_img, 2),
|
| 312 |
batch_size=1,
|
| 313 |
)
|
| 314 |
-
|
| 315 |
# Set up guidance
|
| 316 |
guided = basicguider.get_guider(
|
| 317 |
model=get_value_at_index(UNET_MODEL, 0),
|
| 318 |
conditioning=get_value_at_index(style_applied, 0),
|
| 319 |
)
|
| 320 |
-
|
| 321 |
# Set up scheduler
|
| 322 |
schedule = basicscheduler.get_sigmas(
|
| 323 |
scheduler="simple",
|
|
@@ -325,10 +383,10 @@ def generate_image(prompt, structure_image, style_image, depth_strength=15, cann
|
|
| 325 |
denoise=1,
|
| 326 |
model=get_value_at_index(UNET_MODEL, 0),
|
| 327 |
)
|
| 328 |
-
|
| 329 |
# Generate random noise
|
| 330 |
noise = randomnoise.get_noise(noise_seed=random.randint(1, 2**64))
|
| 331 |
-
|
| 332 |
# Sample
|
| 333 |
sampled = samplercustomadvanced.sample(
|
| 334 |
noise=get_value_at_index(noise, 0),
|
|
@@ -337,25 +395,25 @@ def generate_image(prompt, structure_image, style_image, depth_strength=15, cann
|
|
| 337 |
sigmas=get_value_at_index(schedule, 0),
|
| 338 |
latent_image=get_value_at_index(empty_latent, 0),
|
| 339 |
)
|
| 340 |
-
|
| 341 |
# Decode VAE
|
| 342 |
decoded = vaedecode.decode(
|
| 343 |
samples=get_value_at_index(sampled, 0),
|
| 344 |
vae=get_value_at_index(VAE_MODEL, 0),
|
| 345 |
)
|
| 346 |
-
|
| 347 |
# Create text node for prefix
|
| 348 |
prefix = cr_text.text_multiline(text=f"flux_zen_{timestamp}")
|
| 349 |
-
|
| 350 |
# Use SaveImage node to save the image
|
| 351 |
saved_data = saveimage.save_images(
|
| 352 |
filename_prefix=get_value_at_index(prefix, 0),
|
| 353 |
images=get_value_at_index(decoded, 0),
|
| 354 |
)
|
| 355 |
-
|
| 356 |
try:
|
| 357 |
saved_path = f"output/{saved_data['ui']['images'][0]['filename']}"
|
| 358 |
-
|
| 359 |
return saved_path
|
| 360 |
except Exception as e:
|
| 361 |
print(f"Error getting saved image path: {e}")
|
|
@@ -365,59 +423,56 @@ def generate_image(prompt, structure_image, style_image, depth_strength=15, cann
|
|
| 365 |
|
| 366 |
with gr.Blocks(css="footer {visibility: hidden}") as app:
|
| 367 |
gr.Markdown("# 🎨 FLUX Zen Style Depth+Canny")
|
| 368 |
-
gr.Markdown(
|
| 369 |
-
|
|
|
|
|
|
|
| 370 |
with gr.Row():
|
| 371 |
with gr.Column(scale=1):
|
| 372 |
prompt_input = gr.Textbox(
|
| 373 |
-
label="Prompt",
|
| 374 |
placeholder="Enter your prompt here...",
|
| 375 |
-
info="Describe the image you want to generate"
|
| 376 |
)
|
| 377 |
with gr.Row():
|
| 378 |
with gr.Column(scale=1):
|
| 379 |
structure_image = gr.Image(
|
| 380 |
-
image_mode=
|
| 381 |
-
label="Structure Image",
|
| 382 |
-
type="filepath"
|
| 383 |
)
|
| 384 |
depth_strength = gr.Slider(
|
| 385 |
-
minimum=0,
|
| 386 |
-
maximum=50,
|
| 387 |
-
value=15,
|
| 388 |
label="Depth Strength",
|
| 389 |
-
info="Controls how much the depth map influences the result"
|
| 390 |
)
|
| 391 |
canny_strength = gr.Slider(
|
| 392 |
-
minimum=0,
|
| 393 |
-
maximum=1.0,
|
| 394 |
-
value=0.30,
|
| 395 |
label="Canny Strength",
|
| 396 |
-
info="Controls how much the edge detection influences the result"
|
| 397 |
)
|
| 398 |
steps = gr.Slider(
|
| 399 |
-
minimum=10,
|
| 400 |
-
maximum=50,
|
| 401 |
-
value=28,
|
| 402 |
label="Steps",
|
| 403 |
-
info="More steps = better quality but slower generation"
|
| 404 |
)
|
| 405 |
with gr.Column(scale=1):
|
| 406 |
-
style_image = gr.Image(
|
| 407 |
-
label="Style Image",
|
| 408 |
-
type="filepath"
|
| 409 |
-
)
|
| 410 |
style_strength = gr.Slider(
|
| 411 |
-
minimum=0,
|
| 412 |
-
maximum=1,
|
| 413 |
-
value=0.5,
|
| 414 |
label="Style Strength",
|
| 415 |
-
info="Controls how much the style image influences the result"
|
| 416 |
)
|
| 417 |
-
|
| 418 |
with gr.Row():
|
| 419 |
-
generate_btn = gr.Button("Generate",value=True, variant="primary")
|
| 420 |
-
|
| 421 |
with gr.Column(scale=1):
|
| 422 |
output_image = gr.Image(label="Generated Image")
|
| 423 |
|
|
@@ -441,11 +496,20 @@ with gr.Blocks(css="footer {visibility: hidden}") as app:
|
|
| 441 |
|
| 442 |
generate_btn.click(
|
| 443 |
fn=generate_image,
|
| 444 |
-
inputs=[
|
| 445 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 446 |
)
|
| 447 |
-
|
| 448 |
-
gr.Markdown(
|
|
|
|
| 449 |
## How to use
|
| 450 |
1. Enter a prompt describing the image you want to generate
|
| 451 |
2. Upload a structure image to provide the basic shape/composition
|
|
@@ -456,11 +520,12 @@ with gr.Blocks(css="footer {visibility: hidden}") as app:
|
|
| 456 |
## About
|
| 457 |
This demo uses FLUX.1-Redux-dev for style transfer, FLUX.1-Depth-dev for depth-guided generation,
|
| 458 |
and XLabs Canny for edge detection and structure preservation.
|
| 459 |
-
"""
|
|
|
|
| 460 |
|
| 461 |
if __name__ == "__main__":
|
| 462 |
# Create an examples directory if it doesn't exist , for now it is empty
|
| 463 |
os.makedirs("examples", exist_ok=True)
|
| 464 |
-
|
| 465 |
# Launch the app
|
| 466 |
-
app.launch(share=True)
|
|
|
|
| 12 |
from huggingface_hub import hf_hub_download
|
| 13 |
|
| 14 |
# Setup ComfyUI if not already set up
|
| 15 |
+
# if not os.path.exists("ComfyUI"):
|
| 16 |
# print("Setting up ComfyUI...")
|
| 17 |
# subprocess.run(["bash", "setup_comfyui.sh"], check=True)
|
| 18 |
|
|
|
|
| 21 |
|
| 22 |
# Download models if not already present
|
| 23 |
print("Checking and downloading models...")
|
| 24 |
+
hf_hub_download(
|
| 25 |
+
repo_id="black-forest-labs/FLUX.1-Redux-dev",
|
| 26 |
+
filename="flux1-redux-dev.safetensors",
|
| 27 |
+
local_dir="models/style_models",
|
| 28 |
+
)
|
| 29 |
+
hf_hub_download(
|
| 30 |
+
repo_id="black-forest-labs/FLUX.1-Depth-dev",
|
| 31 |
+
filename="flux1-depth-dev.safetensors",
|
| 32 |
+
local_dir="models/diffusion_models",
|
| 33 |
+
)
|
| 34 |
+
hf_hub_download(
|
| 35 |
+
repo_id="black-forest-labs/FLUX.1-Canny-dev",
|
| 36 |
+
filename="flux1-canny-dev.safetensors",
|
| 37 |
+
local_dir="models/controlnet",
|
| 38 |
+
)
|
| 39 |
+
hf_hub_download(
|
| 40 |
+
repo_id="XLabs-AI/flux-controlnet-collections",
|
| 41 |
+
filename="flux-canny-controlnet-v3.safetensors",
|
| 42 |
+
local_dir="models/controlnet",
|
| 43 |
+
)
|
| 44 |
+
hf_hub_download(
|
| 45 |
+
repo_id="Comfy-Org/sigclip_vision_384",
|
| 46 |
+
filename="sigclip_vision_patch14_384.safetensors",
|
| 47 |
+
local_dir="models/clip_vision",
|
| 48 |
+
)
|
| 49 |
+
hf_hub_download(
|
| 50 |
+
repo_id="Kijai/DepthAnythingV2-safetensors",
|
| 51 |
+
filename="depth_anything_v2_vitl_fp32.safetensors",
|
| 52 |
+
local_dir="models/depthanything",
|
| 53 |
+
)
|
| 54 |
+
hf_hub_download(
|
| 55 |
+
repo_id="black-forest-labs/FLUX.1-dev",
|
| 56 |
+
filename="ae.safetensors",
|
| 57 |
+
local_dir="models/vae/FLUX1",
|
| 58 |
+
)
|
| 59 |
+
hf_hub_download(
|
| 60 |
+
repo_id="comfyanonymous/flux_text_encoders",
|
| 61 |
+
filename="clip_l.safetensors",
|
| 62 |
+
local_dir="models/text_encoders",
|
| 63 |
+
)
|
| 64 |
+
t5_path = hf_hub_download(
|
| 65 |
+
repo_id="comfyanonymous/flux_text_encoders",
|
| 66 |
+
filename="t5xxl_fp16.safetensors",
|
| 67 |
+
local_dir="models/text_encoders/t5",
|
| 68 |
+
)
|
| 69 |
|
| 70 |
# Import required functions and setup ComfyUI path
|
| 71 |
import folder_paths
|
| 72 |
|
| 73 |
+
|
| 74 |
def get_value_at_index(obj: Union[Sequence, Mapping], index: int) -> Any:
|
| 75 |
try:
|
| 76 |
return obj[index]
|
| 77 |
except KeyError:
|
| 78 |
return obj["result"][index]
|
| 79 |
|
| 80 |
+
|
| 81 |
def find_path(name: str, path: str = None) -> str:
|
| 82 |
if path is None:
|
| 83 |
path = os.getcwd()
|
|
|
|
| 90 |
return None
|
| 91 |
return find_path(name, parent_directory)
|
| 92 |
|
| 93 |
+
|
| 94 |
def add_comfyui_directory_to_sys_path() -> None:
|
| 95 |
comfyui_path = find_path("ComfyUI")
|
| 96 |
if comfyui_path is not None and os.path.isdir(comfyui_path):
|
| 97 |
sys.path.append(comfyui_path)
|
| 98 |
print(f"'{comfyui_path}' added to sys.path")
|
| 99 |
|
| 100 |
+
|
| 101 |
def add_extra_model_paths() -> None:
|
| 102 |
try:
|
| 103 |
from main import load_extra_path_config
|
|
|
|
| 109 |
else:
|
| 110 |
print("Could not find the extra_model_paths config file.")
|
| 111 |
|
| 112 |
+
|
| 113 |
# Initialize paths
|
| 114 |
add_comfyui_directory_to_sys_path()
|
| 115 |
add_extra_model_paths()
|
| 116 |
|
| 117 |
+
|
| 118 |
def import_custom_nodes() -> None:
|
| 119 |
import asyncio
|
| 120 |
import execution
|
| 121 |
from nodes import init_extra_nodes
|
| 122 |
import server
|
| 123 |
+
|
| 124 |
# Create a new event loop if running in a new thread
|
| 125 |
try:
|
| 126 |
loop = asyncio.get_event_loop()
|
| 127 |
except RuntimeError:
|
| 128 |
loop = asyncio.new_event_loop()
|
| 129 |
asyncio.set_event_loop(loop)
|
| 130 |
+
|
| 131 |
server_instance = server.PromptServer(loop)
|
| 132 |
execution.PromptQueue(server_instance)
|
| 133 |
init_extra_nodes()
|
| 134 |
|
| 135 |
+
|
| 136 |
# Import all necessary nodes
|
| 137 |
print("Importing ComfyUI nodes...")
|
| 138 |
try:
|
|
|
|
| 151 |
UNETLoader,
|
| 152 |
CLIPTextEncode,
|
| 153 |
)
|
| 154 |
+
|
| 155 |
# Initialize all constant nodes and models in global context
|
| 156 |
import_custom_nodes()
|
| 157 |
except Exception as e:
|
|
|
|
| 199 |
|
| 200 |
# Initialize depth model
|
| 201 |
cr_clip_input_switch = NODE_CLASS_MAPPINGS["CR Clip Input Switch"]()
|
| 202 |
+
downloadandloaddepthanythingv2model = NODE_CLASS_MAPPINGS[
|
| 203 |
+
"DownloadAndLoadDepthAnythingV2Model"
|
| 204 |
+
]()
|
| 205 |
DEPTH_MODEL = downloadandloaddepthanythingv2model.loadmodel(
|
| 206 |
model="depth_anything_v2_vitl_fp32.safetensors"
|
| 207 |
)
|
|
|
|
| 222 |
stylemodelapplyadvanced = NODE_CLASS_MAPPINGS["StyleModelApplyAdvanced"]()
|
| 223 |
emptylatentimage = EmptyLatentImage()
|
| 224 |
basicguider = NODE_CLASS_MAPPINGS["BasicGuider"]()
|
| 225 |
+
basicscheduler = NODE_CLASS_MAPPINGS["BasicScheduler"]()
|
| 226 |
randomnoise = NODE_CLASS_MAPPINGS["RandomNoise"]()
|
| 227 |
samplercustomadvanced = NODE_CLASS_MAPPINGS["SamplerCustomAdvanced"]()
|
| 228 |
vaedecode = VAEDecode()
|
|
|
|
| 238 |
model_loaders = [CLIP_MODEL, VAE_MODEL, UNET_MODEL, CLIP_VISION_MODEL]
|
| 239 |
|
| 240 |
print("Loading models to GPU...")
|
| 241 |
+
model_management.load_models_gpu(
|
| 242 |
+
[
|
| 243 |
+
loader[0].patcher if hasattr(loader[0], "patcher") else loader[0]
|
| 244 |
+
for loader in model_loaders
|
| 245 |
+
]
|
| 246 |
+
)
|
| 247 |
|
| 248 |
print("Setup complete!")
|
| 249 |
|
| 250 |
+
|
| 251 |
@spaces.GPU
|
| 252 |
+
def generate_image(
|
| 253 |
+
prompt,
|
| 254 |
+
structure_image,
|
| 255 |
+
style_image,
|
| 256 |
+
depth_strength=15,
|
| 257 |
+
canny_strength=30,
|
| 258 |
+
style_strength=0.5,
|
| 259 |
+
steps=28,
|
| 260 |
+
progress=gr.Progress(track_tqdm=True),
|
| 261 |
+
):
|
| 262 |
"""Main generation function that processes inputs and returns the path to the generated image."""
|
| 263 |
timestamp = random.randint(10000, 99999)
|
| 264 |
output_filename = f"flux_zen_{timestamp}.png"
|
| 265 |
+
|
| 266 |
with torch.inference_mode():
|
| 267 |
# Set up CLIP
|
| 268 |
clip_switch = cr_clip_input_switch.switch(
|
|
|
|
| 270 |
clip1=get_value_at_index(CLIP_MODEL, 0),
|
| 271 |
clip2=get_value_at_index(CLIP_MODEL, 0),
|
| 272 |
)
|
| 273 |
+
|
| 274 |
# Encode text
|
| 275 |
text_encoded = cliptextencode.encode(
|
| 276 |
text=prompt,
|
|
|
|
| 280 |
text="",
|
| 281 |
clip=get_value_at_index(clip_switch, 0),
|
| 282 |
)
|
| 283 |
+
|
| 284 |
# Process structure image
|
| 285 |
structure_img = loadimage.load_image(image=structure_image)
|
| 286 |
+
|
| 287 |
# Resize image
|
| 288 |
resized_img = imageresize.execute(
|
| 289 |
width=get_value_at_index(CONST_1024, 0),
|
|
|
|
| 294 |
multiple_of=16,
|
| 295 |
image=get_value_at_index(structure_img, 0),
|
| 296 |
)
|
| 297 |
+
|
| 298 |
# Get image size
|
| 299 |
size_info = getimagesizeandcount.getsize(
|
| 300 |
image=get_value_at_index(resized_img, 0)
|
| 301 |
)
|
| 302 |
+
|
| 303 |
# Encode VAE
|
| 304 |
vae_encoded = vaeencode.encode(
|
| 305 |
pixels=get_value_at_index(size_info, 0),
|
| 306 |
vae=get_value_at_index(VAE_MODEL, 0),
|
| 307 |
)
|
| 308 |
+
|
| 309 |
# Process canny
|
| 310 |
canny_processed = canny_prossessor.detect_edge(
|
| 311 |
+
image=get_value_at_index(size_info, 0),
|
| 312 |
+
low_threshold=0.4,
|
| 313 |
+
high_threshold=0.8,
|
| 314 |
+
)
|
| 315 |
+
|
| 316 |
+
# Apply canny Advanced
|
| 317 |
canny_conditions = controlNetApplyAdvanced.apply_controlnet(
|
| 318 |
+
positive=get_value_at_index(text_encoded, 0),
|
| 319 |
+
negative=get_value_at_index(empty_text, 0),
|
| 320 |
+
control_net=get_value_at_index(CANNY_XLABS_MODEL, 0),
|
| 321 |
+
image=get_value_at_index(canny_processed, 0),
|
| 322 |
+
strength=canny_strength,
|
| 323 |
+
start_percent=0.0,
|
| 324 |
+
end_percent=0.5,
|
| 325 |
+
vae=get_value_at_index(VAE_MODEL, 0),
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
# Process depth
|
| 329 |
depth_processed = depthanything_v2.process(
|
| 330 |
da_model=get_value_at_index(DEPTH_MODEL, 0),
|
| 331 |
images=get_value_at_index(size_info, 0),
|
| 332 |
)
|
| 333 |
+
|
| 334 |
# Apply Flux guidance
|
| 335 |
flux_guided = fluxguidance.append(
|
| 336 |
guidance=depth_strength,
|
| 337 |
conditioning=get_value_at_index(canny_conditions, 0),
|
| 338 |
)
|
| 339 |
+
|
| 340 |
# Process style image
|
| 341 |
style_img = loadimage.load_image(image=style_image)
|
| 342 |
+
|
| 343 |
# Encode style with CLIP Vision
|
| 344 |
style_encoded = clipvisionencode.encode(
|
| 345 |
crop="center",
|
| 346 |
clip_vision=get_value_at_index(CLIP_VISION_MODEL, 0),
|
| 347 |
image=get_value_at_index(style_img, 0),
|
| 348 |
)
|
| 349 |
+
|
| 350 |
# Set up conditioning
|
| 351 |
conditioning = instructpixtopixconditioning.encode(
|
| 352 |
positive=get_value_at_index(flux_guided, 0),
|
|
|
|
| 354 |
vae=get_value_at_index(VAE_MODEL, 0),
|
| 355 |
pixels=get_value_at_index(depth_processed, 0),
|
| 356 |
)
|
| 357 |
+
|
| 358 |
# Apply style
|
| 359 |
style_applied = stylemodelapplyadvanced.apply_stylemodel(
|
| 360 |
strength=style_strength,
|
|
|
|
| 362 |
style_model=get_value_at_index(STYLE_MODEL, 0),
|
| 363 |
clip_vision_output=get_value_at_index(style_encoded, 0),
|
| 364 |
)
|
| 365 |
+
|
| 366 |
# Set up empty latent
|
| 367 |
empty_latent = emptylatentimage.generate(
|
| 368 |
width=get_value_at_index(resized_img, 1),
|
| 369 |
height=get_value_at_index(resized_img, 2),
|
| 370 |
batch_size=1,
|
| 371 |
)
|
| 372 |
+
|
| 373 |
# Set up guidance
|
| 374 |
guided = basicguider.get_guider(
|
| 375 |
model=get_value_at_index(UNET_MODEL, 0),
|
| 376 |
conditioning=get_value_at_index(style_applied, 0),
|
| 377 |
)
|
| 378 |
+
|
| 379 |
# Set up scheduler
|
| 380 |
schedule = basicscheduler.get_sigmas(
|
| 381 |
scheduler="simple",
|
|
|
|
| 383 |
denoise=1,
|
| 384 |
model=get_value_at_index(UNET_MODEL, 0),
|
| 385 |
)
|
| 386 |
+
|
| 387 |
# Generate random noise
|
| 388 |
noise = randomnoise.get_noise(noise_seed=random.randint(1, 2**64))
|
| 389 |
+
|
| 390 |
# Sample
|
| 391 |
sampled = samplercustomadvanced.sample(
|
| 392 |
noise=get_value_at_index(noise, 0),
|
|
|
|
| 395 |
sigmas=get_value_at_index(schedule, 0),
|
| 396 |
latent_image=get_value_at_index(empty_latent, 0),
|
| 397 |
)
|
| 398 |
+
|
| 399 |
# Decode VAE
|
| 400 |
decoded = vaedecode.decode(
|
| 401 |
samples=get_value_at_index(sampled, 0),
|
| 402 |
vae=get_value_at_index(VAE_MODEL, 0),
|
| 403 |
)
|
| 404 |
+
|
| 405 |
# Create text node for prefix
|
| 406 |
prefix = cr_text.text_multiline(text=f"flux_zen_{timestamp}")
|
| 407 |
+
|
| 408 |
# Use SaveImage node to save the image
|
| 409 |
saved_data = saveimage.save_images(
|
| 410 |
filename_prefix=get_value_at_index(prefix, 0),
|
| 411 |
images=get_value_at_index(decoded, 0),
|
| 412 |
)
|
| 413 |
+
|
| 414 |
try:
|
| 415 |
saved_path = f"output/{saved_data['ui']['images'][0]['filename']}"
|
| 416 |
+
|
| 417 |
return saved_path
|
| 418 |
except Exception as e:
|
| 419 |
print(f"Error getting saved image path: {e}")
|
|
|
|
| 423 |
|
| 424 |
with gr.Blocks(css="footer {visibility: hidden}") as app:
|
| 425 |
gr.Markdown("# 🎨 FLUX Zen Style Depth+Canny")
|
| 426 |
+
gr.Markdown(
|
| 427 |
+
"Flux[dev] Redux + Flux[dev] Depth and XLabs Canny based on the space FLUX Style Shaping"
|
| 428 |
+
)
|
| 429 |
+
|
| 430 |
with gr.Row():
|
| 431 |
with gr.Column(scale=1):
|
| 432 |
prompt_input = gr.Textbox(
|
| 433 |
+
label="Prompt",
|
| 434 |
placeholder="Enter your prompt here...",
|
| 435 |
+
info="Describe the image you want to generate",
|
| 436 |
)
|
| 437 |
with gr.Row():
|
| 438 |
with gr.Column(scale=1):
|
| 439 |
structure_image = gr.Image(
|
| 440 |
+
image_mode="RGB", label="Structure Image", type="filepath"
|
|
|
|
|
|
|
| 441 |
)
|
| 442 |
depth_strength = gr.Slider(
|
| 443 |
+
minimum=0,
|
| 444 |
+
maximum=50,
|
| 445 |
+
value=15,
|
| 446 |
label="Depth Strength",
|
| 447 |
+
info="Controls how much the depth map influences the result",
|
| 448 |
)
|
| 449 |
canny_strength = gr.Slider(
|
| 450 |
+
minimum=0,
|
| 451 |
+
maximum=1.0,
|
| 452 |
+
value=0.30,
|
| 453 |
label="Canny Strength",
|
| 454 |
+
info="Controls how much the edge detection influences the result",
|
| 455 |
)
|
| 456 |
steps = gr.Slider(
|
| 457 |
+
minimum=10,
|
| 458 |
+
maximum=50,
|
| 459 |
+
value=28,
|
| 460 |
label="Steps",
|
| 461 |
+
info="More steps = better quality but slower generation",
|
| 462 |
)
|
| 463 |
with gr.Column(scale=1):
|
| 464 |
+
style_image = gr.Image(label="Style Image", type="filepath")
|
|
|
|
|
|
|
|
|
|
| 465 |
style_strength = gr.Slider(
|
| 466 |
+
minimum=0,
|
| 467 |
+
maximum=1,
|
| 468 |
+
value=0.5,
|
| 469 |
label="Style Strength",
|
| 470 |
+
info="Controls how much the style image influences the result",
|
| 471 |
)
|
| 472 |
+
|
| 473 |
with gr.Row():
|
| 474 |
+
generate_btn = gr.Button("Generate", value=True, variant="primary")
|
| 475 |
+
|
| 476 |
with gr.Column(scale=1):
|
| 477 |
output_image = gr.Image(label="Generated Image")
|
| 478 |
|
|
|
|
| 496 |
|
| 497 |
generate_btn.click(
|
| 498 |
fn=generate_image,
|
| 499 |
+
inputs=[
|
| 500 |
+
prompt_input,
|
| 501 |
+
structure_image,
|
| 502 |
+
style_image,
|
| 503 |
+
depth_strength,
|
| 504 |
+
canny_strength,
|
| 505 |
+
style_strength,
|
| 506 |
+
steps,
|
| 507 |
+
],
|
| 508 |
+
outputs=[output_image],
|
| 509 |
)
|
| 510 |
+
|
| 511 |
+
gr.Markdown(
|
| 512 |
+
"""
|
| 513 |
## How to use
|
| 514 |
1. Enter a prompt describing the image you want to generate
|
| 515 |
2. Upload a structure image to provide the basic shape/composition
|
|
|
|
| 520 |
## About
|
| 521 |
This demo uses FLUX.1-Redux-dev for style transfer, FLUX.1-Depth-dev for depth-guided generation,
|
| 522 |
and XLabs Canny for edge detection and structure preservation.
|
| 523 |
+
"""
|
| 524 |
+
)
|
| 525 |
|
| 526 |
if __name__ == "__main__":
|
| 527 |
# Create an examples directory if it doesn't exist , for now it is empty
|
| 528 |
os.makedirs("examples", exist_ok=True)
|
| 529 |
+
|
| 530 |
# Launch the app
|
| 531 |
+
app.launch(share=True)
|