Qwen-Image-Edit-Rapid-AIO-Loras-Experimental

Running on Zero

App Files Files Community

Professional Noob commited on Feb 3

Commit

c549fb8

verified ·

1 Parent(s): 45b0d64

Update app.py

Browse files

Files changed (1) hide show

app.py +302 -292

app.py CHANGED Viewed

@@ -1,29 +1,14 @@
 # app.py
-# Complete, self-contained Gradio app with:
-#  1) Robust "ensure_pil_rgb" to avoid /tmp/gradio path issues across ZeroGPU workers
-#  2) Per-process GRADIO_TEMP_DIR to reduce temp collisions
-#  3) Qwen2.5-VL RoPE patch to avoid cublasSgemmStridedBatched failures (broadcast multiply instead)
-#  4) Extra debug logging around inputs, image routing, and prompt/token lengths
 import os
 import re
 import gc
 import traceback
 import random
-import time
 from typing import Iterable, Optional
-# -------------------------------
-# Temp-dir hardening (helps when multiple users hit the Space)
-# -------------------------------
-# IMPORTANT: This doesn't magically share files between runtimes, but it reduces collisions
-# and makes temp behavior more deterministic.
-if not os.environ.get("GRADIO_TEMP_DIR"):
-    _pid = os.getpid()
-    _tmp = f"/tmp/gradio_{_pid}"
-    os.makedirs(_tmp, exist_ok=True)
-    os.environ["GRADIO_TEMP_DIR"] = _tmp
 import gradio as gr
 import numpy as np
 import spaces
@@ -44,68 +29,132 @@ from safetensors.torch import load_file as safetensors_load_file
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
 # ============================================================
-# Qwen2.5-VL RoPE patch (avoid cublas batched GEMM)
 # ============================================================
-def patch_qwen25vl_rope_gemm_to_mul():
     """
-    The observed crash:
-      RuntimeError: CUDA error: CUBLAS_STATUS_INVALID_VALUE
-      ... modeling_qwen2_5_vl.py line ~521
-      freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(2, 3)
-    This patches Qwen2.5-VL rotary embedding to compute the outer product
-    via broadcast multiply instead of batched GEMM.
     """
     try:
-        import transformers.models.qwen2_5_vl.modeling_qwen2_5_vl as m
-        Rotary = getattr(m, "Qwen2_5_VLRotaryEmbedding", None)
-        if Rotary is None:
-            print("[patch] Qwen2_5_VLRotaryEmbedding not found; skipping RoPE patch.")
-            return
-        # Guard: only patch once
-        if getattr(Rotary, "_rope_mul_patch_applied", False):
-            print("[patch] RoPE patch already applied.")
-            return
-        def safe_forward(self, x, position_ids):
-            """
-            Return (cos, sin) with shapes compatible with original.
-            """
-            inv_freq = self.inv_freq.to(device=x.device)
-            # position_ids is typically (bs, seq) or (seq,)
-            if position_ids.dim() == 1:
-                position_ids_ = position_ids.unsqueeze(0)  # (1, seq)
-            else:
-                position_ids_ = position_ids  # (bs, seq)
-            # Outer product via broadcast:
-            # inv: (1,1,dim/2,1)
-            # pos: (bs,1,1,seq)
-            # freqs: (bs,1,seq,dim/2)
-            inv = inv_freq[None, None, :, None].float()
-            pos = position_ids_[:, None, None, :].float()
-            freqs = (inv * pos).transpose(2, 3)
-            # Original commonly duplicates for sin/cos on last dim
-            emb = torch.cat((freqs, freqs), dim=-1)  # (bs,1,seq,dim)
-            cos = emb.cos().to(dtype=x.dtype)
-            sin = emb.sin().to(dtype=x.dtype)
-            return cos, sin
-        Rotary.forward = safe_forward
-        Rotary._rope_mul_patch_applied = True
-        print("[patch] Patched Qwen2.5-VL RoPE to avoid cublas batched GEMM.")
     except Exception as e:
-        print("[patch] Failed to patch RoPE:", repr(e))
-patch_qwen25vl_rope_gemm_to_mul()
 # ============================================================
 # Theme
@@ -126,7 +175,6 @@ colors.orange_red = colors.Color(
     c950="#802200",
 )
 class OrangeRedTheme(Soft):
     def __init__(
         self,
@@ -182,17 +230,15 @@ class OrangeRedTheme(Soft):
             block_label_background_fill="*primary_200",
         )
 orange_red_theme = OrangeRedTheme()
 # ============================================================
-# Device
 # ============================================================
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print("===== Application Startup at", time.strftime("%Y-%m-%d %H:%M:%S"), "=====")
-print("GRADIO_TEMP_DIR =", os.environ.get("GRADIO_TEMP_DIR"))
 print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
 print("torch.__version__ =", torch.__version__)
 print("torch.version.cuda =", torch.version.cuda)
@@ -203,6 +249,14 @@ if torch.cuda.is_available():
     print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
 print("Using device:", device)
 # ============================================================
 # AIO version (Space variable)
 # ============================================================
@@ -213,7 +267,6 @@ DEFAULT_AIO_VERSION = "v19"
 _VER_RE = re.compile(r"^v\d+$")
 _DIGITS_RE = re.compile(r"^\d+$")
 def _normalize_version(raw: str) -> Optional[str]:
     if raw is None:
         return None
@@ -226,7 +279,6 @@ def _normalize_version(raw: str) -> Optional[str]:
         return f"v{s}"
     return None
 _AIO_ENV_RAW = os.environ.get("AIO_VERSION", "")
 _AIO_ENV_NORM = _normalize_version(_AIO_ENV_RAW)
@@ -237,6 +289,7 @@ print(f"AIO_VERSION (env raw) = {_AIO_ENV_RAW!r}")
 print(f"AIO_VERSION (normalized) = {_AIO_ENV_NORM!r}")
 print(f"Using AIO_VERSION = {AIO_VERSION} ({AIO_VERSION_SOURCE})")
 # ============================================================
 # Pipeline
 # ============================================================
@@ -248,7 +301,6 @@ from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
 dtype = torch.bfloat16
 def _load_pipe_with_version(version: str) -> QwenImageEditPlusPipeline:
     sub = f"{version}/transformer"
     print(f"📦 Loading AIO transformer: {AIO_REPO_ID} / {sub}")
@@ -264,8 +316,6 @@ def _load_pipe_with_version(version: str) -> QwenImageEditPlusPipeline:
     ).to(device)
     return p
-# Forgiving load: try env/default version, fallback to v19 if it fails
 try:
     pipe = _load_pipe_with_version(AIO_VERSION)
 except Exception:
@@ -286,64 +336,9 @@ except Exception as e:
 MAX_SEED = np.iinfo(np.int32).max
-# ============================================================
-# Robust image coercion (prevents /tmp/gradio FileNotFound crashes)
-# ============================================================
-def ensure_pil_rgb(x, *, label: str = "") -> Optional[Image.Image]:
-    """
-    Accepts:
-      - PIL.Image
-      - numpy arrays
-      - Gallery tuples (img, caption)
-      - dict payloads with path/name
-      - file path strings (best-effort)
-    Returns PIL RGB or None.
-    """
-    if x is None:
-        return None
-    # Gallery often returns (img, caption)
-    if isinstance(x, tuple) and len(x) >= 1:
-        x = x[0]
-    # Sometimes dict payloads
-    if isinstance(x, dict):
-        path = x.get("path") or x.get("name")
-        if path:
-            x = path
-    if isinstance(x, Image.Image):
-        return x.convert("RGB")
-    if isinstance(x, np.ndarray):
-        try:
-            return Image.fromarray(x).convert("RGB")
-        except Exception:
-            return None
-    if isinstance(x, str):
-        # Path-based input: may fail on ZeroGPU worker if file isn't present.
-        exists = os.path.exists(x)
-        print(f"[DEBUG][ensure_pil_rgb] {label} got filepath: {x!r} exists={exists}")
-        if not exists:
-            # Return None so caller can show a meaningful error (instead of crashing).
-            return None
-        try:
-            return Image.open(x).convert("RGB")
-        except Exception as e:
-            print(f"[DEBUG][ensure_pil_rgb] {label} PIL open failed: {e!r}")
-            return None
-    # Last resort
-    try:
-        return Image.fromarray(np.array(x)).convert("RGB")
-    except Exception:
-        return None
 # ============================================================
-# Derived conditioning (Transformers): Pose + Depth
 # ============================================================
 POSE_MODEL_ID = "usyd-community/vitpose-base-simple"
@@ -363,11 +358,9 @@ COCO17_EDGES = [
     (12, 14), (14, 16),
 ]
 def _derived_device(use_gpu: bool) -> torch.device:
     return torch.device("cuda" if (use_gpu and torch.cuda.is_available()) else "cpu")
 def _load_pose_models(dev: torch.device):
     key = str(dev)
     if key in _POSE_CACHE:
@@ -385,7 +378,6 @@ def _load_pose_models(dev: torch.device):
     _POSE_CACHE[key] = (det_proc, det_model, pose_proc, pose_model)
     return _POSE_CACHE[key]
 def _load_depth_models(dev: torch.device):
     key = str(dev)
     if key in _DEPTH_CACHE:
@@ -398,7 +390,6 @@ def _load_depth_models(dev: torch.device):
     _DEPTH_CACHE[key] = (proc, model)
     return _DEPTH_CACHE[key]
 def _draw_skeleton_on_blank(
     size: tuple[int, int],
     persons_keypoints: list[np.ndarray],
@@ -425,15 +416,9 @@ def _draw_skeleton_on_blank(
             if sc[i] < kp_thresh:
                 continue
             x, y = float(kps[i, 0]), float(kps[i, 1])
-            draw.ellipse(
-                [(x - point_r, y - point_r), (x + point_r, y + point_r)],
-                fill=(255, 255, 255),
-                outline=None,
-            )
     return canvas
 def make_pose_map(
     img: Image.Image,
     *,
@@ -481,17 +466,14 @@ def make_pose_map(
     persons_kps, persons_sc = [], []
     for pr in pose_results:
-        kps = pr["keypoints"].detach().cpu().numpy()
-        sc = pr["scores"].detach().cpu().numpy()
-        persons_kps.append(kps)
-        persons_sc.append(sc)
     if not persons_kps:
         return Image.new("RGB", img.size, (0, 0, 0))
     return _draw_skeleton_on_blank(img.size, persons_kps, persons_sc)
 def make_depth_map(img: Image.Image, *, use_gpu: bool) -> Image.Image:
     img = img.convert("RGB")
     dev = _derived_device(use_gpu)
@@ -520,19 +502,86 @@ def make_depth_map(img: Image.Image, *, use_gpu: bool) -> Image.Image:
     return Image.fromarray(depth8, mode="L").convert("RGB")
 def _append_to_gallery(existing, new_img: Image.Image):
     items = []
     if existing:
         for it in existing:
-            pil = ensure_pil_rgb(it, label="gallery_item")
             if pil is not None:
                 items.append(pil)
     items.append(new_img)
     return items
 # ============================================================
-# LoRA adapters + presets
 # ============================================================
 NONE_LORA = "None"
@@ -689,8 +738,8 @@ LORA_PRESET_PROMPTS = {
     "Any2Real_2601": "change the picture 1 to realistic photograph",
     "Semirealistic-photo-detailer": "transform the image to semi-realistic image",
     "AnyPose": "Make the person in image 1 do the exact same pose of the person in image 2. Changing the style and background of the image of the person in image 1 is undesirable, so don't do it. The new pose should be pixel accurate to the pose we are trying to copy. The position of the arms and head and legs should be the same as the pose we are trying to copy. Change the field of view and angle to match exactly image 2. Head tilt and eye gaze pose should match the person in image 2.",
-    "Hyperrealistic-Portrait": "Transform the image into an ultra-realistic photorealistic portrait with strict identity preservation, facing straight to the camera.",
-    "Ultrarealistic-Portrait": "Transform the image into an ultra-realistic glamour portrait while strictly preserving the subject’s identity.",
     "Upscale2K": "Upscale this picture to 4K resolution.",
     "BFS-Best-FaceSwap": "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, and nose structure of Picture 2. copy the eye direction, head rotation, and micro-expressions from Picture 1. high quality, sharp details, 4k",
     "BFS-Best-FaceSwap-merge": "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, and nose structure of Picture 2. copy the eye direction, head rotation, and micro-expressions from Picture 1. high quality, sharp details, 4k",
@@ -698,14 +747,14 @@ LORA_PRESET_PROMPTS = {
 LOADED_ADAPTERS = set()
 # ============================================================
-# Helpers: resolution
 # ============================================================
 def _round_to_multiple(x: int, m: int) -> int:
     return max(m, (int(x) // m) * m)
 def compute_canvas_dimensions_from_area(
     image: Image.Image,
     target_area: int,
@@ -713,35 +762,29 @@ def compute_canvas_dimensions_from_area(
 ) -> tuple[int, int]:
     w, h = image.size
     aspect = w / h if h else 1.0
     from qwenimage.pipeline_qwenimage_edit_plus import calculate_dimensions
     width, height = calculate_dimensions(int(target_area), float(aspect))
     width = _round_to_multiple(int(width), int(multiple_of))
     height = _round_to_multiple(int(height), int(multiple_of))
     return width, height
 def get_target_area_for_lora(
     image: Image.Image,
     lora_adapter: str,
     user_target_megapixels: float,
 ) -> int:
     spec = ADAPTER_SPECS.get(lora_adapter, {})
     if "target_area" in spec:
         try:
             return int(spec["target_area"])
         except Exception:
             pass
     if "target_megapixels" in spec:
         try:
             mp = float(spec["target_megapixels"])
             return int(mp * 1024 * 1024)
         except Exception:
             pass
     if "target_long_edge" in spec:
         try:
             long_edge = int(spec["target_long_edge"])
@@ -755,47 +798,19 @@ def get_target_area_for_lora(
             return int(new_w * new_h)
         except Exception:
             pass
     return int(float(user_target_megapixels) * 1024 * 1024)
 # ============================================================
-# Helpers: multi-input routing
 # ============================================================
 def lora_requires_two_images(lora_adapter: str) -> bool:
     return bool(ADAPTER_SPECS.get(lora_adapter, {}).get("requires_two_images", False))
 def image2_label_for_lora(lora_adapter: str) -> str:
     return str(ADAPTER_SPECS.get(lora_adapter, {}).get("image2_label", "Upload Reference (Image 2)"))
-def build_labeled_images(
-    img1: Image.Image,
-    img2: Optional[Image.Image],
-    extra_imgs: Optional[list[Image.Image]],
-) -> dict[str, Image.Image]:
-    labeled: dict[str, Image.Image] = {}
-    idx = 1
-    labeled[f"image_{idx}"] = img1
-    idx += 1
-    if img2 is not None:
-        labeled[f"image_{idx}"] = img2
-        idx += 1
-    if extra_imgs:
-        for im in extra_imgs:
-            if im is None:
-                continue
-            labeled[f"image_{idx}"] = im
-            idx += 1
-    return labeled
-# ============================================================
-# Helpers: BFS alpha key fix
-# ============================================================
 def _inject_missing_alpha_keys(state_dict: dict) -> dict:
     bases = {}
     for k, v in state_dict.items():
@@ -808,6 +823,7 @@ def _inject_missing_alpha_keys(state_dict: dict) -> dict:
     for base, rank in bases.items():
         alpha_tensor = torch.tensor(float(rank), dtype=torch.float32)
         full_alpha = f"{base}.alpha"
         if full_alpha not in state_dict:
             state_dict[full_alpha] = alpha_tensor
@@ -817,10 +833,8 @@ def _inject_missing_alpha_keys(state_dict: dict) -> dict:
             stripped_alpha = f"{stripped_base}.alpha"
             if stripped_alpha not in state_dict:
                 state_dict[stripped_alpha] = alpha_tensor
     return state_dict
 def _filter_to_diffusers_lora_keys(state_dict: dict) -> tuple[dict, dict]:
     keep_suffixes = (
         ".lora_up.weight",
@@ -863,7 +877,6 @@ def _filter_to_diffusers_lora_keys(state_dict: dict) -> tuple[dict, dict]:
     }
     return out, stats
 def _duplicate_stripped_prefix_keys(state_dict: dict, prefix: str = "diffusion_model.") -> dict:
     out = dict(state_dict)
     for k, v in list(state_dict.items()):
@@ -874,7 +887,6 @@ def _duplicate_stripped_prefix_keys(state_dict: dict, prefix: str = "diffusion_m
             out[stripped] = v
     return out
 def _load_lora_weights_with_fallback(repo: str, weight_name: str, adapter_name: str, needs_alpha_fix: bool = False):
     try:
         pipe.load_lora_weights(repo, weight_name=weight_name, adapter_name=adapter_name)
@@ -900,11 +912,9 @@ def _load_lora_weights_with_fallback(repo: str, weight_name: str, adapter_name:
             f"kept={stats['kept']} dropped_patch={stats['dropped_patch']} "
             f"dropped_other={stats['dropped_other']} normalized_alpha={stats['normalized_alpha']}"
         )
         pipe.load_lora_weights(sd, adapter_name=adapter_name)
         return
 def _ensure_loaded_and_get_active_adapters(selected_lora: str):
     spec = ADAPTER_SPECS.get(selected_lora)
     if not spec:
@@ -942,7 +952,6 @@ def _ensure_loaded_and_get_active_adapters(selected_lora: str):
             adapter_names.append(adapter_name)
             adapter_weights.append(strength)
     else:
         repo = spec["repo"]
         weights = spec["weights"]
@@ -998,41 +1007,34 @@ def on_lora_change_ui(selected_lora, current_prompt, current_extras_condition_on
     return prompt_update, img2_update, extras_update
 def set_output_as_image1(last):
     if last is None:
         raise gr.Error("No output available yet.")
     return gr.update(value=last)
 def set_output_as_image2(last):
     if last is None:
         raise gr.Error("No output available yet.")
     return gr.update(value=last)
 def set_output_as_extra(last, existing_extra):
     if last is None:
         raise gr.Error("No output available yet.")
     return _append_to_gallery(existing_extra, last)
-# ============================================================
-# Derived conditioning UI action
-# ============================================================
 @spaces.GPU
 def add_derived_ref(img1, existing_extra, derived_type, derived_use_gpu, derived_max_people):
-    img1_pil = ensure_pil_rgb(img1, label="image_1_for_derived")
-    if img1_pil is None:
-        raise gr.Error(
-            "Image 1 could not be read in the GPU worker. "
-            "If you uploaded via API or a prior session, re-upload the image."
-        )
     if derived_type == "None":
         return gr.update(value=existing_extra), gr.update(visible=False, value=None)
-    base = img1_pil.convert("RGB")
     if derived_type == "Pose (ViTPose, fast)":
         derived = make_pose_map(base, use_gpu=bool(derived_use_gpu), mode="fast")
@@ -1053,18 +1055,29 @@ def add_derived_ref(img1, existing_extra, derived_type, derived_use_gpu, derived
 # ============================================================
-# Inference
 # ============================================================
-def _debug_cuda_mem(prefix="[DEBUG][cuda]"):
     if not torch.cuda.is_available():
         return
     try:
         free, total = torch.cuda.mem_get_info()
-        print(f"{prefix} mem free={free/1e9:.2f}GB total={total/1e9:.2f}GB")
-    except Exception as e:
-        print(f"{prefix} mem_get_info failed: {e!r}")
 @spaces.GPU
 def infer(
@@ -1086,31 +1099,12 @@ def infer(
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
-    # -------------------- Robust image reads --------------------
-    img1 = ensure_pil_rgb(input_image_1, label="input_image_1")
-    img2 = ensure_pil_rgb(input_image_2, label="input_image_2") if input_image_2 is not None else None
-    # Debug what we actually received (helps track unexpected path payloads)
-    print("[DEBUG][infer] input types:",
-          type(input_image_1), type(input_image_2), type(input_images_extra))
-    print("[DEBUG][infer] img1_ok:", img1 is not None, "img2_ok:", (img2 is not None))
-    if img1 is None:
-        raise gr.Error(
-            "Could not read Image 1 inside the GPU worker. "
-            "This can happen if Gradio passed a temp file path that isn't present in the worker. "
-            "Please re-upload Image 1 and try again."
-        )
-    # Normalize extra images (Gallery) to PIL RGB
-    extra_imgs: list[Image.Image] = []
-    if input_images_extra:
-        for i, item in enumerate(input_images_extra):
-            pil = ensure_pil_rgb(item, label=f"gallery[{i}]")
-            if pil is not None:
-                extra_imgs.append(pil)
-    # -------------------- Adapter handling --------------------
     if lora_adapter == NONE_LORA:
         try:
             pipe.set_adapters([], adapter_weights=[])
@@ -1121,7 +1115,6 @@ def infer(
         adapter_names, adapter_weights = _ensure_loaded_and_get_active_adapters(lora_adapter)
         pipe.set_adapters(adapter_names, adapter_weights=adapter_weights)
-    # -------------------- Seed / prompts --------------------
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
@@ -1132,17 +1125,34 @@ def infer(
         "extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
     )
-    # If a LoRA requires image2, enforce
     if lora_requires_two_images(lora_adapter) and img2 is None:
         raise gr.Error("This LoRA needs two images. Please upload Image 2 as well.")
-    # Label images as image_1, image_2, image_3...
     labeled = build_labeled_images(img1, img2, extra_imgs)
     pipe_images = list(labeled.values())
     if len(pipe_images) == 1:
         pipe_images = pipe_images[0]
-    # Canvas sizing
     target_area = get_target_area_for_lora(img1, lora_adapter, float(target_megapixels))
     width, height = compute_canvas_dimensions_from_area(
         img1,
@@ -1150,42 +1160,29 @@ def infer(
         multiple_of=int(pipe.vae_scale_factor * 2),
     )
-    # VAE indices (conditioning-only extras)
     vae_image_indices = None
     if extras_condition_only:
         if isinstance(pipe_images, list) and len(pipe_images) > 2:
             vae_image_indices = [0, 1] if len(pipe_images) >= 2 else [0]
-    # -------------------- Extra debug logs --------------------
-    prompt_s = "" if prompt is None else str(prompt)
     print(
         "[DEBUG][infer] submitting request | "
-        f"lora_adapter={lora_adapter!r} seed={seed} "
-        f"prompt_len={len(prompt_s)} steps={steps} true_cfg_scale={guidance_scale} "
-        f"target_mp={target_megapixels} canvas=({width}x{height}) "
-        f"n_images={(len(pipe_images) if isinstance(pipe_images, list) else 1)} "
-        f"vae_image_indices={vae_image_indices} pad_to_canvas={bool(pad_to_canvas)}"
     )
-    print("[DEBUG][infer] image_1 size:", img1.size, "image_2 size:", (img2.size if img2 else None))
-    if extra_imgs:
-        print("[DEBUG][infer] extra_imgs:", [im.size for im in extra_imgs][:8], ("..." if len(extra_imgs) > 8 else ""))
-    # Optional: log token count (if tokenizer exists)
-    try:
-        tok = getattr(pipe, "tokenizer", None)
-        if tok is not None and prompt_s:
-            ids = tok(prompt_s, return_tensors="pt", truncation=True).input_ids
-            print("[DEBUG][infer] prompt token_count:", int(ids.shape[-1]))
-    except Exception as e:
-        print("[DEBUG][infer] token_count failed:", repr(e))
-    _debug_cuda_mem(prefix="[DEBUG][cuda][before]")
-    # -------------------- Run --------------------
     try:
         result = pipe(
             image=pipe_images,
-            prompt=prompt_s,
             negative_prompt=negative_prompt,
             height=height,
             width=width,
@@ -1196,13 +1193,15 @@ def infer(
             pad_to_canvas=bool(pad_to_canvas),
         ).images[0]
         return result, seed, result
-    except Exception:
         print("---- [ERROR][infer] exception ----")
         print(traceback.format_exc())
         print("---------------------------------")
         raise
     finally:
-        _debug_cuda_mem(prefix="[DEBUG][cuda][after]")
         gc.collect()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
@@ -1210,24 +1209,17 @@ def infer(
 @spaces.GPU
 def infer_example(input_image, prompt, lora_adapter):
-    img = ensure_pil_rgb(input_image, label="example_image")
-    if img is None:
         return None, 0, None
     guidance_scale = 1.0
     steps = 4
     result, seed, last = infer(
-        img,
-        None,
-        None,
-        prompt,
-        lora_adapter,
-        0,
-        True,
-        guidance_scale,
-        steps,
-        1.0,
-        True,
-        True,
     )
     return result, seed, last
@@ -1253,9 +1245,9 @@ with gr.Blocks() as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown("# **Qwen-Image-Edit-2511-LoRAs-Fast**", elem_id="main-title")
         gr.Markdown(
-            "Perform diverse image edits using specialized LoRA adapters for "
-            "Qwen-Image-Edit-2511. Includes safeguards for ZeroGPU file-path uploads "
-            "and a RoPE patch to avoid certain CUDA cublas failures."
         )
         gr.Markdown(aio_status_line)
@@ -1283,6 +1275,7 @@ with gr.Blocks() as demo:
             with gr.Column():
                 output_image = gr.Image(label="Output Image", interactive=False, format="png", height=353)
                 last_output = gr.State(value=None)
                 with gr.Row():
@@ -1348,7 +1341,6 @@ with gr.Blocks() as demo:
                         value=True,
                     )
-        # On LoRA selection: preset prompt + toggle Image 2
         lora_adapter.change(
             fn=on_lora_change_ui,
             inputs=[lora_adapter, prompt, extras_condition_only],
@@ -1360,6 +1352,26 @@ with gr.Blocks() as demo:
                 ["examples/5.jpg", "Remove shadows and relight the image using soft lighting.", "Light-Restoration"],
                 ["examples/4.jpg", "Use a subtle golden-hour filter with smooth light diffusion.", "Relight"],
                 ["examples/2.jpeg", "Rotate the camera 45 degrees to the left.", "Multiple-Angles"],
                 ["examples/11.jpg", "Upscale this picture to 4K resolution.", "Upscale2K"],
             ],
             inputs=[input_image_1, prompt, lora_adapter],
@@ -1388,12 +1400,10 @@ with gr.Blocks() as demo:
         outputs=[output_image, seed, last_output],
     )
-    # Output routing buttons
     btn_out_to_img1.click(fn=set_output_as_image1, inputs=[last_output], outputs=[input_image_1])
     btn_out_to_img2.click(fn=set_output_as_image2, inputs=[last_output], outputs=[input_image_2])
     btn_out_to_extra.click(fn=set_output_as_extra, inputs=[last_output, input_images_extra], outputs=[input_images_extra])
-    # Derived conditioning button
     add_derived_btn.click(
         fn=add_derived_ref,
         inputs=[input_image_1, input_images_extra, derived_type, derived_use_gpu, derived_max_people],

 # app.py
 import os
 import re
 import gc
+import uuid
+import time
+import math
 import traceback
 import random
 from typing import Iterable, Optional
 import gradio as gr
 import numpy as np
 import spaces
 from gradio.themes import Soft
 from gradio.themes.utils import colors, fonts, sizes
+# ============================================================
+# Process-unique temp dir (helps avoid /tmp collisions)
+# ============================================================
+def _ensure_unique_gradio_tmp():
+    """
+    ZeroGPU/Spaces can serve multiple users across recycled containers.
+    Gradio may use /tmp/gradio by default. We force a unique directory per process.
+    """
+    if os.environ.get("GRADIO_TEMP_DIR"):
+        print(f"GRADIO_TEMP_DIR = {os.environ['GRADIO_TEMP_DIR']}")
+        return
+    pid = os.getpid()
+    tmp = f"/tmp/gradio_{pid}_{uuid.uuid4().hex[:8]}"
+    os.environ["GRADIO_TEMP_DIR"] = tmp
+    try:
+        os.makedirs(tmp, exist_ok=True)
+    except Exception:
+        pass
+    print(f"GRADIO_TEMP_DIR = {tmp}")
+_ensure_unique_gradio_tmp()
 # ============================================================
+# Patch: Qwen2.5-VL RoPE (avoid cublas batched GEMM; preserve shapes)
 # ============================================================
+def patch_qwen25vl_rope_no_gemm():
     """
+    Patch Qwen2.5-VL rotary embedding to avoid the matmul that can trigger:
+      CUBLAS_STATUS_INVALID_VALUE (cublasSgemmStridedBatched)
+    on some ZeroGPU/H200 MIG configurations.
+    CRITICAL: Preserve exact output shapes used by apply_multimodal_rotary_pos_emb,
+    otherwise you get split_with_sizes mismatches.
     """
+    if os.environ.get("DISABLE_ROPE_PATCH", "").strip() == "1":
+        print("[patch][rope] DISABLE_ROPE_PATCH=1 -> skipping patch.")
+        return
     try:
+        from transformers.models.qwen2_5_vl import modeling_qwen2_5_vl as qvl
     except Exception as e:
+        print(f"[patch][rope] could not import qwen2_5_vl modeling: {e}")
+        return
+    Rotary = None
+    for name in ["Qwen2_5_VLRotaryEmbedding", "Qwen2_5RotaryEmbedding", "RotaryEmbedding"]:
+        Rotary = getattr(qvl, name, None)
+        if Rotary is not None:
+            break
+    if Rotary is None:
+        print("[patch][rope] rotary embedding class not found; no patch applied.")
+        return
+    orig_forward = Rotary.forward
+    def forward_no_gemm(self, x, position_ids):
+        # Fallback to original if structure differs
+        if not hasattr(self, "inv_freq") or position_ids is None:
+            return orig_forward(self, x, position_ids)
+        # Determine rotary dim from module config (NOT x.shape[-1])
+        if hasattr(self, "dim") and isinstance(self.dim, int):
+            rope_dim = int(self.dim)
+        else:
+            rope_dim = int(self.inv_freq.numel() * 2)
+        # Normalize position_ids to (bs, seq)
+        if position_ids.ndim > 2:
+            pos = position_ids.reshape(position_ids.shape[0], -1)
+        else:
+            pos = position_ids
+        # Compute on the same device as inv_freq/x
+        dev = self.inv_freq.device
+        pos = pos.to(device=dev)
+        # Broadcast multiply instead of matmul:
+        # inv: (1,1,dim/2,1), pos: (bs,1,1,seq) -> freqs: (bs,1,dim/2,seq)
+        inv = self.inv_freq[None, None, :, None].float()
+        posf = pos[:, None, None, :].float()
+        freqs = (inv * posf).transpose(2, 3)  # (bs,1,seq,dim/2)
+        # Double to full rotary dim
+        emb = torch.cat((freqs, freqs), dim=-1)  # (bs,1,seq,dim)
+        # Enforce exact expected rotary dim
+        if emb.shape[-1] != rope_dim:
+            emb = emb[..., :rope_dim]
+        cos = emb.cos()
+        sin = emb.sin()
+        # Respect attention scaling if present
+        attn_scale = getattr(self, "attention_scaling", None)
+        if attn_scale is not None:
+            cos = cos * attn_scale
+            sin = sin * attn_scale
+        # Match dtype expectations (upstream typically returns same dtype as x)
+        cos = cos.to(dtype=x.dtype)
+        sin = sin.to(dtype=x.dtype)
+        # Optional debug (enable by env)
+        if os.environ.get("DEBUG_ROPE", "").strip() == "1":
+            ms = getattr(self, "mrope_section", None)
+            if ms is not None:
+                try:
+                    ms_list = list(ms)
+                    print(f"[DEBUG][rope] rope_dim={rope_dim} cos_last={cos.shape[-1]} mrope_sum={sum(ms_list)} mrope={ms_list}")
+                except Exception:
+                    print(f"[DEBUG][rope] rope_dim={rope_dim} cos_last={cos.shape[-1]} (mrope_section unreadable)")
+            else:
+                print(f"[DEBUG][rope] rope_dim={rope_dim} cos_last={cos.shape[-1]} (no mrope_section attr)")
+        return cos, sin
+    Rotary.forward = forward_no_gemm
+    print("[patch] Patched Qwen2.5-VL RoPE matmul -> broadcast multiply (shape-preserving).")
+patch_qwen25vl_rope_no_gemm()
 # ============================================================
 # Theme
     c950="#802200",
 )
 class OrangeRedTheme(Soft):
     def __init__(
         self,
             block_label_background_fill="*primary_200",
         )
 orange_red_theme = OrangeRedTheme()
 # ============================================================
+# Device / Env debug
 # ============================================================
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
 print("torch.__version__ =", torch.__version__)
 print("torch.version.cuda =", torch.version.cuda)
     print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
 print("Using device:", device)
+# Optional: make matmul a bit more stable (doesn't change correctness)
+try:
+    torch.backends.cuda.matmul.allow_tf32 = True
+    torch.backends.cudnn.allow_tf32 = True
+except Exception:
+    pass
 # ============================================================
 # AIO version (Space variable)
 # ============================================================
 _VER_RE = re.compile(r"^v\d+$")
 _DIGITS_RE = re.compile(r"^\d+$")
 def _normalize_version(raw: str) -> Optional[str]:
     if raw is None:
         return None
         return f"v{s}"
     return None
 _AIO_ENV_RAW = os.environ.get("AIO_VERSION", "")
 _AIO_ENV_NORM = _normalize_version(_AIO_ENV_RAW)
 print(f"AIO_VERSION (normalized) = {_AIO_ENV_NORM!r}")
 print(f"Using AIO_VERSION = {AIO_VERSION} ({AIO_VERSION_SOURCE})")
 # ============================================================
 # Pipeline
 # ============================================================
 dtype = torch.bfloat16
 def _load_pipe_with_version(version: str) -> QwenImageEditPlusPipeline:
     sub = f"{version}/transformer"
     print(f"📦 Loading AIO transformer: {AIO_REPO_ID} / {sub}")
     ).to(device)
     return p
 try:
     pipe = _load_pipe_with_version(AIO_VERSION)
 except Exception:
 MAX_SEED = np.iinfo(np.int32).max
 # ============================================================
+# Derived conditioning (Transformers): Pose + Depth  (v1-style)
 # ============================================================
 POSE_MODEL_ID = "usyd-community/vitpose-base-simple"
     (12, 14), (14, 16),
 ]
 def _derived_device(use_gpu: bool) -> torch.device:
     return torch.device("cuda" if (use_gpu and torch.cuda.is_available()) else "cpu")
 def _load_pose_models(dev: torch.device):
     key = str(dev)
     if key in _POSE_CACHE:
     _POSE_CACHE[key] = (det_proc, det_model, pose_proc, pose_model)
     return _POSE_CACHE[key]
 def _load_depth_models(dev: torch.device):
     key = str(dev)
     if key in _DEPTH_CACHE:
     _DEPTH_CACHE[key] = (proc, model)
     return _DEPTH_CACHE[key]
 def _draw_skeleton_on_blank(
     size: tuple[int, int],
     persons_keypoints: list[np.ndarray],
             if sc[i] < kp_thresh:
                 continue
             x, y = float(kps[i, 0]), float(kps[i, 1])
+            draw.ellipse([(x - point_r, y - point_r), (x + point_r, y + point_r)], fill=(255, 255, 255))
     return canvas
 def make_pose_map(
     img: Image.Image,
     *,
     persons_kps, persons_sc = [], []
     for pr in pose_results:
+        persons_kps.append(pr["keypoints"].detach().cpu().numpy())
+        persons_sc.append(pr["scores"].detach().cpu().numpy())
     if not persons_kps:
         return Image.new("RGB", img.size, (0, 0, 0))
     return _draw_skeleton_on_blank(img.size, persons_kps, persons_sc)
 def make_depth_map(img: Image.Image, *, use_gpu: bool) -> Image.Image:
     img = img.convert("RGB")
     dev = _derived_device(use_gpu)
     return Image.fromarray(depth8, mode="L").convert("RGB")
+# ============================================================
+# Helpers: gallery normalization + debug-friendly PIL conversion
+# ============================================================
+def _to_pil_rgb(x) -> Optional[Image.Image]:
+    """
+    Accepts PIL / numpy / (image, caption) tuples / gradio dicts and returns PIL RGB.
+    Also safely ignores broken temp paths.
+    """
+    if x is None:
+        return None
+    # Gallery often returns (image, caption)
+    if isinstance(x, tuple) and len(x) >= 1:
+        x = x[0]
+        if x is None:
+            return None
+    # Some gradio versions can return dict with a temp file path
+    if isinstance(x, dict):
+        # common keys: 'name' or 'path'
+        p = x.get("name") or x.get("path")
+        if isinstance(p, str):
+            if not os.path.exists(p):
+                print(f"[WARN] extra image path missing, skipping: {p}")
+                return None
+            try:
+                return Image.open(p).convert("RGB")
+            except Exception as e:
+                print(f"[WARN] failed to open extra image path {p}: {e}")
+                return None
+    if isinstance(x, Image.Image):
+        return x.convert("RGB")
+    if isinstance(x, np.ndarray):
+        try:
+            return Image.fromarray(x).convert("RGB")
+        except Exception:
+            return None
+    # last resort
+    try:
+        return Image.fromarray(np.array(x)).convert("RGB")
+    except Exception:
+        return None
 def _append_to_gallery(existing, new_img: Image.Image):
     items = []
     if existing:
         for it in existing:
+            pil = _to_pil_rgb(it)
             if pil is not None:
                 items.append(pil)
     items.append(new_img)
     return items
+def build_labeled_images(
+    img1: Image.Image,
+    img2: Optional[Image.Image],
+    extra_imgs: Optional[list[Image.Image]],
+) -> dict[str, Image.Image]:
+    labeled: dict[str, Image.Image] = {}
+    idx = 1
+    labeled[f"image_{idx}"] = img1
+    idx += 1
+    if img2 is not None:
+        labeled[f"image_{idx}"] = img2
+        idx += 1
+    if extra_imgs:
+        for im in extra_imgs:
+            if im is None:
+                continue
+            labeled[f"image_{idx}"] = im
+            idx += 1
+    return labeled
 # ============================================================
+# LoRA adapters + presets (your v1 config)
 # ============================================================
 NONE_LORA = "None"
     "Any2Real_2601": "change the picture 1 to realistic photograph",
     "Semirealistic-photo-detailer": "transform the image to semi-realistic image",
     "AnyPose": "Make the person in image 1 do the exact same pose of the person in image 2. Changing the style and background of the image of the person in image 1 is undesirable, so don't do it. The new pose should be pixel accurate to the pose we are trying to copy. The position of the arms and head and legs should be the same as the pose we are trying to copy. Change the field of view and angle to match exactly image 2. Head tilt and eye gaze pose should match the person in image 2.",
+    "Hyperrealistic-Portrait": "Transform the image into an ultra-realistic photorealistic portrait with strict identity preservation, facing straight to the camera. Enhance pore-level skin textures, realistic moisture effects, and natural wet hair clumping against the skin. Apply cool-toned soft-box lighting with subtle highlights and shadows, maintain realistic green-hazel eye catchlights without synthetic gloss, and preserve soft natural lip texture. Use shallow depth of field with a clean bokeh background, an 85mm macro photographic look, and raw photo grading without retouching to maintain realism and original details.",
+    "Ultrarealistic-Portrait": "Transform the image into an ultra-realistic glamour portrait while strictly preserving the subject’s identity. Apply a close-up composition with a slight head tilt and a hand near the face, enhance cinematic directional lighting with dramatic fashion-style highlights, and refine makeup details including glowing skin, glossy lips, luminous highlighter, and defined eyes. Increase skin realism with detailed epidermal textures such as micropores, microhairs, subtle oil sheen, natural highlights, soft wrinkles, and subsurface scattering. Maintain a luxury fashion-magazine look in a 9:16 aspect ratio, preserving realism, facial structure, and original details without over-smoothing or retouching.",
     "Upscale2K": "Upscale this picture to 4K resolution.",
     "BFS-Best-FaceSwap": "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, and nose structure of Picture 2. copy the eye direction, head rotation, and micro-expressions from Picture 1. high quality, sharp details, 4k",
     "BFS-Best-FaceSwap-merge": "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, and nose structure of Picture 2. copy the eye direction, head rotation, and micro-expressions from Picture 1. high quality, sharp details, 4k",
 LOADED_ADAPTERS = set()
 # ============================================================
+# Helpers: resolution (area-based sizing)
 # ============================================================
 def _round_to_multiple(x: int, m: int) -> int:
     return max(m, (int(x) // m) * m)
 def compute_canvas_dimensions_from_area(
     image: Image.Image,
     target_area: int,
 ) -> tuple[int, int]:
     w, h = image.size
     aspect = w / h if h else 1.0
     from qwenimage.pipeline_qwenimage_edit_plus import calculate_dimensions
     width, height = calculate_dimensions(int(target_area), float(aspect))
     width = _round_to_multiple(int(width), int(multiple_of))
     height = _round_to_multiple(int(height), int(multiple_of))
     return width, height
 def get_target_area_for_lora(
     image: Image.Image,
     lora_adapter: str,
     user_target_megapixels: float,
 ) -> int:
     spec = ADAPTER_SPECS.get(lora_adapter, {})
     if "target_area" in spec:
         try:
             return int(spec["target_area"])
         except Exception:
             pass
     if "target_megapixels" in spec:
         try:
             mp = float(spec["target_megapixels"])
             return int(mp * 1024 * 1024)
         except Exception:
             pass
     if "target_long_edge" in spec:
         try:
             long_edge = int(spec["target_long_edge"])
             return int(new_w * new_h)
         except Exception:
             pass
     return int(float(user_target_megapixels) * 1024 * 1024)
 # ============================================================
+# Helpers: LoRA routing + BFS alpha fixes (your v1 logic)
 # ============================================================
 def lora_requires_two_images(lora_adapter: str) -> bool:
     return bool(ADAPTER_SPECS.get(lora_adapter, {}).get("requires_two_images", False))
 def image2_label_for_lora(lora_adapter: str) -> str:
     return str(ADAPTER_SPECS.get(lora_adapter, {}).get("image2_label", "Upload Reference (Image 2)"))
 def _inject_missing_alpha_keys(state_dict: dict) -> dict:
     bases = {}
     for k, v in state_dict.items():
     for base, rank in bases.items():
         alpha_tensor = torch.tensor(float(rank), dtype=torch.float32)
         full_alpha = f"{base}.alpha"
         if full_alpha not in state_dict:
             state_dict[full_alpha] = alpha_tensor
             stripped_alpha = f"{stripped_base}.alpha"
             if stripped_alpha not in state_dict:
                 state_dict[stripped_alpha] = alpha_tensor
     return state_dict
 def _filter_to_diffusers_lora_keys(state_dict: dict) -> tuple[dict, dict]:
     keep_suffixes = (
         ".lora_up.weight",
     }
     return out, stats
 def _duplicate_stripped_prefix_keys(state_dict: dict, prefix: str = "diffusion_model.") -> dict:
     out = dict(state_dict)
     for k, v in list(state_dict.items()):
             out[stripped] = v
     return out
 def _load_lora_weights_with_fallback(repo: str, weight_name: str, adapter_name: str, needs_alpha_fix: bool = False):
     try:
         pipe.load_lora_weights(repo, weight_name=weight_name, adapter_name=adapter_name)
             f"kept={stats['kept']} dropped_patch={stats['dropped_patch']} "
             f"dropped_other={stats['dropped_other']} normalized_alpha={stats['normalized_alpha']}"
         )
         pipe.load_lora_weights(sd, adapter_name=adapter_name)
         return
 def _ensure_loaded_and_get_active_adapters(selected_lora: str):
     spec = ADAPTER_SPECS.get(selected_lora)
     if not spec:
             adapter_names.append(adapter_name)
             adapter_weights.append(strength)
     else:
         repo = spec["repo"]
         weights = spec["weights"]
     return prompt_update, img2_update, extras_update
+# ============================================================
+# UI helpers: output routing + derived conditioning
+# ============================================================
 def set_output_as_image1(last):
     if last is None:
         raise gr.Error("No output available yet.")
     return gr.update(value=last)
 def set_output_as_image2(last):
     if last is None:
         raise gr.Error("No output available yet.")
     return gr.update(value=last)
 def set_output_as_extra(last, existing_extra):
     if last is None:
         raise gr.Error("No output available yet.")
     return _append_to_gallery(existing_extra, last)
 @spaces.GPU
 def add_derived_ref(img1, existing_extra, derived_type, derived_use_gpu, derived_max_people):
+    if img1 is None:
+        raise gr.Error("Please upload Image 1 first.")
     if derived_type == "None":
         return gr.update(value=existing_extra), gr.update(visible=False, value=None)
+    base = img1.convert("RGB")
     if derived_type == "Pose (ViTPose, fast)":
         derived = make_pose_map(base, use_gpu=bool(derived_use_gpu), mode="fast")
 # ============================================================
+# Debug helpers (CUDA mem + token count)
 # ============================================================
+def _cuda_mem(prefix: str):
     if not torch.cuda.is_available():
         return
     try:
         free, total = torch.cuda.mem_get_info()
+        print(f"[DEBUG][cuda][{prefix}] mem free={free/1e9:.2f}GB total={total/1e9:.2f}GB")
+    except Exception:
+        pass
+def _approx_token_count(text: str) -> int:
+    # Lightweight: we avoid forcing tokenizer calls here; this is only for debug.
+    # Rule-of-thumb: ~4 chars per token in English-ish text.
+    if not text:
+        return 0
+    return max(1, int(math.ceil(len(text) / 4.0)))
+# ============================================================
+# Inference
+# ============================================================
 @spaces.GPU
 def infer(
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
+    print("[DEBUG][infer] input types:", type(input_image_1), type(input_image_2), type(input_images_extra))
+    if input_image_1 is None:
+        raise gr.Error("Please upload Image 1.")
+    # Handle "None"
     if lora_adapter == NONE_LORA:
         try:
             pipe.set_adapters([], adapter_weights=[])
         adapter_names, adapter_weights = _ensure_loaded_and_get_active_adapters(lora_adapter)
         pipe.set_adapters(adapter_names, adapter_weights=adapter_weights)
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
         "extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
     )
+    img1 = input_image_1.convert("RGB") if isinstance(input_image_1, Image.Image) else _to_pil_rgb(input_image_1)
+    if img1 is None:
+        raise gr.Error("Image 1 could not be read (unexpected input type/path).")
+    img2 = None
+    if input_image_2 is not None:
+        img2 = input_image_2.convert("RGB") if isinstance(input_image_2, Image.Image) else _to_pil_rgb(input_image_2)
+        if img2 is None:
+            raise gr.Error("Image 2 could not be read (unexpected input type/path).")
+    # Normalize extra images (Gallery)
+    extra_imgs: list[Image.Image] = []
+    if input_images_extra:
+        for item in input_images_extra:
+            pil = _to_pil_rgb(item)
+            if pil is not None:
+                extra_imgs.append(pil)
+    # Enforce 2-image LoRA behavior
     if lora_requires_two_images(lora_adapter) and img2 is None:
         raise gr.Error("This LoRA needs two images. Please upload Image 2 as well.")
     labeled = build_labeled_images(img1, img2, extra_imgs)
     pipe_images = list(labeled.values())
     if len(pipe_images) == 1:
         pipe_images = pipe_images[0]
     target_area = get_target_area_for_lora(img1, lora_adapter, float(target_megapixels))
     width, height = compute_canvas_dimensions_from_area(
         img1,
         multiple_of=int(pipe.vae_scale_factor * 2),
     )
     vae_image_indices = None
     if extras_condition_only:
         if isinstance(pipe_images, list) and len(pipe_images) > 2:
             vae_image_indices = [0, 1] if len(pipe_images) >= 2 else [0]
+    # Debug summary
+    n_images = len(pipe_images) if isinstance(pipe_images, list) else 1
+    tok_est = _approx_token_count(prompt or "")
     print(
         "[DEBUG][infer] submitting request | "
+        f"lora_adapter={lora_adapter!r} seed={seed} prompt_len={len(prompt or '')} "
+        f"steps={steps} true_cfg_scale={guidance_scale} target_mp={target_megapixels} "
+        f"canvas=({width}x{height}) n_images={n_images} vae_image_indices={vae_image_indices} "
+        f"pad_to_canvas={bool(pad_to_canvas)}"
     )
+    print(f"[DEBUG][infer] image_1 size: {img1.size} image_2 size: {img2.size if img2 else None}")
+    print(f"[DEBUG][infer] prompt token_estimate: {tok_est}")
+    _cuda_mem("before")
     try:
         result = pipe(
             image=pipe_images,
+            prompt=prompt,
             negative_prompt=negative_prompt,
             height=height,
             width=width,
             pad_to_canvas=bool(pad_to_canvas),
         ).images[0]
         return result, seed, result
+    except Exception as e:
         print("---- [ERROR][infer] exception ----")
         print(traceback.format_exc())
         print("---------------------------------")
         raise
     finally:
+        _cuda_mem("after")
         gc.collect()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
 @spaces.GPU
 def infer_example(input_image, prompt, lora_adapter):
+    if input_image is None:
         return None, 0, None
+    input_pil = input_image.convert("RGB")
     guidance_scale = 1.0
     steps = 4
     result, seed, last = infer(
+        input_pil, None, None,
+        prompt, lora_adapter,
+        0, True,
+        guidance_scale, steps, 1.0,
+        True, True
     )
     return result, seed, last
     with gr.Column(elem_id="col-container"):
         gr.Markdown("# **Qwen-Image-Edit-2511-LoRAs-Fast**", elem_id="main-title")
         gr.Markdown(
+            "Perform diverse image edits using specialized "
+            "[LoRA](https://huggingface.co/models?other=base_model:adapter:Qwen/Qwen-Image-Edit-2511) adapters for the "
+            "[Qwen-Image-Edit](https://huggingface.co/Qwen/Qwen-Image-Edit-2511) model. Uses a Diffusers compatible extraction of the transformers from Phr00t's Rapid AIO merge."
         )
         gr.Markdown(aio_status_line)
             with gr.Column():
                 output_image = gr.Image(label="Output Image", interactive=False, format="png", height=353)
                 last_output = gr.State(value=None)
                 with gr.Row():
                         value=True,
                     )
         lora_adapter.change(
             fn=on_lora_change_ui,
             inputs=[lora_adapter, prompt, extras_condition_only],
                 ["examples/5.jpg", "Remove shadows and relight the image using soft lighting.", "Light-Restoration"],
                 ["examples/4.jpg", "Use a subtle golden-hour filter with smooth light diffusion.", "Relight"],
                 ["examples/2.jpeg", "Rotate the camera 45 degrees to the left.", "Multiple-Angles"],
+                [
+                    "examples/12.jpg",
+                    "flatcolor Desaturate the image and lower the contrast to create a flat, ungraded look similar to a camera log profile. Preserve details in the highlights and shadows.",
+                    "Flat-Log",
+                ],
+                ["examples/7.jpg", "Light source from the Right Rear", "Multi-Angle-Lighting"],
+                ["examples/10.jpeg", "Upscale the image.", "Upscale-Image"],
+                ["examples/7.jpg", "Light source from the Below", "Multi-Angle-Lighting"],
+                ["examples/2.jpeg", "Switch the camera to a top-down right corner view.", "Multiple-Angles"],
+                [
+                    "examples/9.jpg",
+                    "The camera moves slightly forward as sunlight breaks through the clouds, casting a soft glow around the character's silhouette in the mist. Realistic cinematic style, atmospheric depth.",
+                    "Next-Scene",
+                ],
+                ["examples/8.jpg", "Make the subjects skin details more prominent and natural.", "Edit-Skin"],
+                ["examples/6.jpg", "Switch the camera to a bottom-up view.", "Multiple-Angles"],
+                ["examples/6.jpg", "Rotate the camera 180 degrees upside down.", "Multiple-Angles"],
+                ["examples/4.jpg", "Rotate the camera 45 degrees to the right.", "Multiple-Angles"],
+                ["examples/4.jpg", "Switch the camera to a top-down view.", "Multiple-Angles"],
+                ["examples/4.jpg", "Switch the camera to a wide-angle lens.", "Multiple-Angles"],
                 ["examples/11.jpg", "Upscale this picture to 4K resolution.", "Upscale2K"],
             ],
             inputs=[input_image_1, prompt, lora_adapter],
         outputs=[output_image, seed, last_output],
     )
     btn_out_to_img1.click(fn=set_output_as_image1, inputs=[last_output], outputs=[input_image_1])
     btn_out_to_img2.click(fn=set_output_as_image2, inputs=[last_output], outputs=[input_image_2])
     btn_out_to_extra.click(fn=set_output_as_extra, inputs=[last_output, input_images_extra], outputs=[input_images_extra])
     add_derived_btn.click(
         fn=add_derived_ref,
         inputs=[input_image_1, input_images_extra, derived_type, derived_use_gpu, derived_max_people],