Qwen-Image-Edit-Rapid-AIO-Loras-Experimental

Running on Zero

App Files Files Community

Professional Noob commited on Jan 24

Commit

bfef88e

verified ·

1 Parent(s): d5b8c31

Update app.py

Browse files

Files changed (1) hide show

app.py +193 -139

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import os
 import re
 import gc
-import threading
 import random
 from typing import Iterable, Optional
 import gradio as gr
@@ -112,7 +113,7 @@ if torch.cuda.is_available():
 print("Using device:", device)
 # ============================================================
-# Pipeline + AIO versioning
 # ============================================================
 from diffusers import FlowMatchEulerDiscreteScheduler  # noqa: F401
@@ -122,60 +123,97 @@ from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
 dtype = torch.bfloat16
 AIO_REPO_ID = "Pr0f3ssi0n4ln00b/Phr00t-Qwen-Rapid-AIO"
 DEFAULT_AIO_VERSION = "v19"
-_AIO_VERSION_RE = re.compile(r"^(v\d+)/transformer/")
-AIO_SWITCH_LOCK = threading.Lock()
 CURRENT_AIO_VERSION = DEFAULT_AIO_VERSION
-def _discover_aio_versions(repo_id: str) -> list[str]:
     """
-    Lists versions by scanning repo files for paths like:
-      v19/transformer/...
-      v21/transformer/...
-    Returns sorted: v1, v2, v10, ...
     """
     try:
         api = HfApi()
-        files = api.list_repo_files(repo_id=repo_id)
         versions = set()
         for f in files:
-            m = _AIO_VERSION_RE.match(f)
-            if m:
-                versions.add(m.group(1))
         if not versions:
             return [DEFAULT_AIO_VERSION]
-        # numeric sort on the digits after 'v'
-        out = sorted(list(versions), key=lambda s: int(s[1:]) if s[1:].isdigit() else 10**9)
-        return out
     except Exception as e:
-        print(f"⚠️ Could not discover AIO versions from repo: {e}")
         return [DEFAULT_AIO_VERSION]
-AIO_VERSIONS = _discover_aio_versions(AIO_REPO_ID)
-if DEFAULT_AIO_VERSION not in AIO_VERSIONS and AIO_VERSIONS:
-    CURRENT_AIO_VERSION = AIO_VERSIONS[0]
-else:
-    CURRENT_AIO_VERSION = DEFAULT_AIO_VERSION
-def _load_aio_transformer(version: str) -> QwenImageTransformer2DModel:
     """
-    IMPORTANT: we do NOT pass device_map="cuda" here.
-    Loading with device_map can trigger diffusers' CUDA caching-allocator warmup path,
-    which is where your NVML/PyTorch allocator assert is happening on MIG/ZeroGPU.
     """
-    subfolder = f"{version}/transformer"
-    print(f"📦 Loading AIO transformer: {AIO_REPO_ID} / {subfolder} (CPU -> then move to {device})")
-    t = QwenImageTransformer2DModel.from_pretrained(
-        AIO_REPO_ID,
-        subfolder=subfolder,
-        torch_dtype=dtype,
-    )
-    return t
 def _apply_fa3_if_possible():
@@ -186,33 +224,18 @@ def _apply_fa3_if_possible():
         print(f"Warning: Could not set FA3 processor: {e}")
-def _hard_cuda_cleanup():
-    gc.collect()
-    if torch.cuda.is_available():
-        try:
-            torch.cuda.synchronize()
-        except Exception:
-            pass
-        torch.cuda.empty_cache()
-        try:
-            torch.cuda.ipc_collect()
-        except Exception:
-            pass
-# Build pipeline once. We only swap pipe.transformer at runtime.
 pipe = QwenImageEditPlusPipeline.from_pretrained(
     "Qwen/Qwen-Image-Edit-2511",
-    transformer=_load_aio_transformer(CURRENT_AIO_VERSION),
     torch_dtype=dtype,
 ).to(device)
-# move transformer to device (pipeline .to() might not fully move nested module in some custom pipelines)
-try:
-    pipe.transformer.to(device)
-except Exception:
-    pass
 _apply_fa3_if_possible()
 MAX_SEED = np.iinfo(np.int32).max
@@ -566,12 +589,11 @@ def _ensure_loaded_and_get_active_adapters(selected_lora: str):
 # ============================================================
-# AIO version switching (robust for MIG/ZeroGPU)
 # ============================================================
 def _unload_all_loras():
-    # When swapping transformer versions, previously loaded LoRAs are no longer safe to keep around.
     global LOADED_ADAPTERS
     try:
         pipe.set_adapters([], adapter_weights=[])
@@ -584,39 +606,39 @@ def _unload_all_loras():
     LOADED_ADAPTERS.clear()
-def switch_aio_version(target_version: str):
     """
-    Swap only the transformer module inside the pipeline.
-    Key safeguards vs your crash:
-      - detach & move old transformer off GPU before loading a new one
-      - load new transformer without device_map (CPU), then move to GPU
-      - clear LoRAs because module graph changes across transformers
     """
     global CURRENT_AIO_VERSION
-    if not target_version:
-        return
-    if target_version == CURRENT_AIO_VERSION:
         return
-    with AIO_SWITCH_LOCK:
-        if target_version == CURRENT_AIO_VERSION:
             return
-        print(f"🔁 Switching AIO transformer to: {AIO_REPO_ID} / {target_version}/transformer")
-        # Make sure no adapters are active and free adapter memory references
         _unload_all_loras()
-        # Detach old transformer as aggressively as possible
         old_t = getattr(pipe, "transformer", None)
         try:
-            # Register a tiny placeholder so the pipeline drops references to the huge module
-            pipe.register_modules(transformer=torch.nn.Identity())
         except Exception:
-            # Fallback: direct attribute overwrite
-            pipe.transformer = torch.nn.Identity()
         if old_t is not None:
             try:
@@ -627,25 +649,22 @@ def switch_aio_version(target_version: str):
         _hard_cuda_cleanup()
-        # Load on CPU (no device_map) then move to GPU
-        new_t = _load_aio_transformer(target_version)
-        try:
-            new_t.to(device)
-        except Exception as e:
-            # Ensure we don't leave partially loaded modules around
-            del new_t
-            _hard_cuda_cleanup()
-            raise gr.Error(f"Failed to move transformer {target_version} to {device}: {e}")
-        # Swap in
         try:
-            pipe.register_modules(transformer=new_t)
         except Exception:
             pipe.transformer = new_t
         _apply_fa3_if_possible()
-        CURRENT_AIO_VERSION = target_version
         _hard_cuda_cleanup()
@@ -655,6 +674,7 @@ def switch_aio_version(target_version: str):
 def on_lora_change_ui(selected_lora, current_prompt):
     if selected_lora != NONE_LORA:
         preset = LORA_PRESET_PROMPTS.get(selected_lora, "")
         if preset and (current_prompt is None or str(current_prompt).strip() == ""):
@@ -664,6 +684,7 @@ def on_lora_change_ui(selected_lora, current_prompt):
     else:
         prompt_update = gr.update(value=current_prompt)
     if lora_requires_two_images(selected_lora):
         img2_update = gr.update(visible=True, label=image2_label_for_lora(selected_lora))
     else:
@@ -673,14 +694,19 @@ def on_lora_change_ui(selected_lora, current_prompt):
 def refresh_aio_versions_ui(current_value: str):
-    global AIO_VERSIONS
-    AIO_VERSIONS = _discover_aio_versions(AIO_REPO_ID)
-    if current_value in AIO_VERSIONS:
         new_value = current_value
     else:
-        new_value = DEFAULT_AIO_VERSION if DEFAULT_AIO_VERSION in AIO_VERSIONS else (AIO_VERSIONS[0] if AIO_VERSIONS else DEFAULT_AIO_VERSION)
-    return gr.update(choices=AIO_VERSIONS, value=new_value), f"Found {len(AIO_VERSIONS)} version(s): {', '.join(AIO_VERSIONS)}"
 # ============================================================
@@ -702,58 +728,62 @@ def infer(
     steps,
     progress=gr.Progress(track_tqdm=True),
 ):
-    _hard_cuda_cleanup()
-    if input_image_1 is None:
-        raise gr.Error("Please upload Image 1.")
-    # Ensure selected AIO version is loaded
-    if aio_version and aio_version != CURRENT_AIO_VERSION:
-        switch_aio_version(aio_version)
-    # Handle "None"
-    if lora_adapter == NONE_LORA:
-        try:
-            pipe.set_adapters([], adapter_weights=[])
-        except Exception:
-            if LOADED_ADAPTERS:
-                pipe.set_adapters(list(LOADED_ADAPTERS), adapter_weights=[0.0] * len(LOADED_ADAPTERS))
-    else:
-        adapter_names, adapter_weights = _ensure_loaded_and_get_active_adapters(lora_adapter)
-        pipe.set_adapters(adapter_names, adapter_weights=adapter_weights)
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator(device=device).manual_seed(seed)
-    negative_prompt = (
-        "worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, "
-        "extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
-    )
-    img1 = input_image_1.convert("RGB")
-    img2 = input_image_2.convert("RGB") if input_image_2 is not None else None
-    extra_imgs: list[Image.Image] = []
-    if input_images_extra:
-        for item in input_images_extra:
-            pil = _to_pil_rgb(item)
-            if pil is not None:
-                extra_imgs.append(pil)
-    if lora_requires_two_images(lora_adapter) and img2 is None:
-        raise gr.Error("This LoRA needs two images. Please upload Image 2 as well.")
-    labeled = build_labeled_images(img1, img2, extra_imgs)
-    pipe_images = list(labeled.values())
-    if len(pipe_images) == 1:
-        pipe_images = pipe_images[0]
-    target_long_edge = get_target_long_edge_for_lora(lora_adapter)
-    width, height = compute_dimensions(img1, target_long_edge)
-    try:
         result = pipe(
             image=pipe_images,
             prompt=prompt,
@@ -764,7 +794,11 @@ def infer(
             generator=generator,
             true_cfg_scale=guidance_scale,
         ).images[0]
         return result, seed
     finally:
         _hard_cuda_cleanup()
@@ -776,7 +810,7 @@ def infer_example(input_image, prompt, lora_adapter):
     input_pil = input_image.convert("RGB")
     guidance_scale = 1.0
     steps = 4
-    # Examples always run on current loaded AIO version
     result, seed = infer(CURRENT_AIO_VERSION, input_pil, None, None, prompt, lora_adapter, 0, True, guidance_scale, steps)
     return result, seed
@@ -805,11 +839,24 @@ with gr.Blocks() as demo:
         with gr.Row():
             aio_version = gr.Dropdown(
                 label="Phr00t Rapid AIO Version",
-                choices=AIO_VERSIONS,
-                value=CURRENT_AIO_VERSION if CURRENT_AIO_VERSION in AIO_VERSIONS else (AIO_VERSIONS[0] if AIO_VERSIONS else DEFAULT_AIO_VERSION),
             )
             refresh_versions = gr.Button("Refresh", variant="secondary")
-        aio_status = gr.Markdown(f"Found {len(AIO_VERSIONS)} version(s): {', '.join(AIO_VERSIONS)}")
         refresh_versions.click(
             fn=refresh_aio_versions_ui,
@@ -897,7 +944,14 @@ with gr.Blocks() as demo:
             label="Examples",
         )
     run_button.click(
         fn=infer,
         inputs=[
             aio_version,

 import os
 import re
 import gc
 import random
+import threading
+import traceback
 from typing import Iterable, Optional
 import gradio as gr
 print("Using device:", device)
 # ============================================================
+# Pipeline
 # ============================================================
 from diffusers import FlowMatchEulerDiscreteScheduler  # noqa: F401
 dtype = torch.bfloat16
+# ============================================================
+# AIO version discovery + caching (CPU) + switching (GPU)
+# ============================================================
 AIO_REPO_ID = "Pr0f3ssi0n4ln00b/Phr00t-Qwen-Rapid-AIO"
 DEFAULT_AIO_VERSION = "v19"
+_AIO_VER_RE = re.compile(r"^(v\d+)$")
+_AIO_SWITCH_LOCK = threading.Lock()
 CURRENT_AIO_VERSION = DEFAULT_AIO_VERSION
+def _hard_cuda_cleanup():
+    gc.collect()
+    if torch.cuda.is_available():
+        try:
+            torch.cuda.synchronize()
+        except Exception:
+            pass
+        torch.cuda.empty_cache()
+        try:
+            torch.cuda.ipc_collect()
+        except Exception:
+            pass
+def discover_aio_versions(repo_id: str) -> list[str]:
     """
+    Finds versions by scanning repo file paths with the naming convention:
+      vNN/transformer/...
     """
     try:
         api = HfApi()
+        files = api.list_repo_files(repo_id=repo_id, repo_type="model")
         versions = set()
         for f in files:
+            if "/transformer/" not in f:
+                continue
+            head = f.split("/transformer/", 1)[0]
+            if _AIO_VER_RE.fullmatch(head):
+                versions.add(head)
         if not versions:
             return [DEFAULT_AIO_VERSION]
+        return sorted(versions, key=lambda s: int(s[1:]))
     except Exception as e:
+        print(f"⚠️ AIO version discovery failed: {e}")
         return [DEFAULT_AIO_VERSION]
+AVAILABLE_AIO_VERSIONS = discover_aio_versions(AIO_REPO_ID)
+if DEFAULT_AIO_VERSION not in AVAILABLE_AIO_VERSIONS and AVAILABLE_AIO_VERSIONS:
+    DEFAULT_AIO_VERSION = AVAILABLE_AIO_VERSIONS[0]
+CURRENT_AIO_VERSION = DEFAULT_AIO_VERSION
+def ensure_aio_cached(version: str) -> None:
     """
+    CPU-only: download all files under vXX/transformer/ into HF cache.
+    This prevents long Hub downloads during GPU tasks (which often causes
+    "GPU task aborted" on ZeroGPU).
     """
+    version = version or DEFAULT_AIO_VERSION
+    sub = f"{version}/transformer"
+    api = HfApi()
+    files = api.list_repo_files(repo_id=AIO_REPO_ID, repo_type="model")
+    needed = [f for f in files if f.startswith(sub + "/")]
+    if not needed:
+        raise gr.Error(f"No files found under {sub}/ in {AIO_REPO_ID}")
+    for f in needed:
+        hf_hub_download(repo_id=AIO_REPO_ID, filename=f, repo_type="model")
+def ensure_aio_cached_ui(version: str):
+    """
+    Gradio handler (CPU): cache selected version.
+    Returns status markdown + keeps run button interactive.
+    """
+    try:
+        version = version or DEFAULT_AIO_VERSION
+        print(f"⬇️  Caching AIO version on CPU: {version}")
+        ensure_aio_cached(version)
+        return gr.update(value=f"✅ Cached {version} (ready)"), gr.update(interactive=True)
+    except Exception as e:
+        print("❌ Cache step failed:\n", traceback.format_exc())
+        raise gr.Error(f"Cache failed for {version}: {e}")
 def _apply_fa3_if_possible():
         print(f"Warning: Could not set FA3 processor: {e}")
+# Build pipeline once (default version at startup)
 pipe = QwenImageEditPlusPipeline.from_pretrained(
     "Qwen/Qwen-Image-Edit-2511",
+    transformer=QwenImageTransformer2DModel.from_pretrained(
+        AIO_REPO_ID,
+        subfolder=f"{DEFAULT_AIO_VERSION}/transformer",
+        torch_dtype=dtype,
+        device_map="cuda",  # keep your existing setup
+    ),
     torch_dtype=dtype,
 ).to(device)
 _apply_fa3_if_possible()
 MAX_SEED = np.iinfo(np.int32).max
 # ============================================================
+# AIO switch (GPU, local cache only; called inside infer)
 # ============================================================
 def _unload_all_loras():
     global LOADED_ADAPTERS
     try:
         pipe.set_adapters([], adapter_weights=[])
     LOADED_ADAPTERS.clear()
+def _switch_aio_version_local_only(version: str):
     """
+    Must be called while already inside a GPU task.
+    Uses local_files_only=True (assumes ensure_aio_cached ran on CPU first).
     """
     global CURRENT_AIO_VERSION
+    version = version or DEFAULT_AIO_VERSION
+    if version == CURRENT_AIO_VERSION:
         return
+    with _AIO_SWITCH_LOCK:
+        if version == CURRENT_AIO_VERSION:
             return
+        print(f"🔁 Switching AIO transformer to: {AIO_REPO_ID} / {version}/transformer (local-only)")
+        # LoRAs are transformer-graph dependent
         _unload_all_loras()
+        # Detach old transformer strongly
         old_t = getattr(pipe, "transformer", None)
+        try:
+            if hasattr(pipe, "_modules") and "transformer" in pipe._modules:
+                pipe._modules.pop("transformer", None)
+        except Exception:
+            pass
         try:
+            pipe.transformer = None
         except Exception:
+            pass
         if old_t is not None:
             try:
         _hard_cuda_cleanup()
+        # Load from local cache only (no downloads inside GPU task)
+        new_t = QwenImageTransformer2DModel.from_pretrained(
+            AIO_REPO_ID,
+            subfolder=f"{version}/transformer",
+            torch_dtype=dtype,
+            local_files_only=True,
+        ).to(device)
         try:
+            pipe.add_module("transformer", new_t)
         except Exception:
             pipe.transformer = new_t
         _apply_fa3_if_possible()
+        CURRENT_AIO_VERSION = version
         _hard_cuda_cleanup()
 def on_lora_change_ui(selected_lora, current_prompt):
+    # Preset prompt (fill only if empty)
     if selected_lora != NONE_LORA:
         preset = LORA_PRESET_PROMPTS.get(selected_lora, "")
         if preset and (current_prompt is None or str(current_prompt).strip() == ""):
     else:
         prompt_update = gr.update(value=current_prompt)
+    # Image2 visibility/label
     if lora_requires_two_images(selected_lora):
         img2_update = gr.update(visible=True, label=image2_label_for_lora(selected_lora))
     else:
 def refresh_aio_versions_ui(current_value: str):
+    global AVAILABLE_AIO_VERSIONS, DEFAULT_AIO_VERSION
+    AVAILABLE_AIO_VERSIONS = discover_aio_versions(AIO_REPO_ID)
+    if DEFAULT_AIO_VERSION not in AVAILABLE_AIO_VERSIONS and AVAILABLE_AIO_VERSIONS:
+        DEFAULT_AIO_VERSION = AVAILABLE_AIO_VERSIONS[0]
+    if current_value in AVAILABLE_AIO_VERSIONS:
         new_value = current_value
     else:
+        new_value = DEFAULT_AIO_VERSION
+    status = f"Found {len(AVAILABLE_AIO_VERSIONS)} version(s): {', '.join(AVAILABLE_AIO_VERSIONS)}"
+    return gr.update(choices=AVAILABLE_AIO_VERSIONS, value=new_value), gr.update(value=status)
 # ============================================================
     steps,
     progress=gr.Progress(track_tqdm=True),
 ):
+    try:
+        _hard_cuda_cleanup()
+        if input_image_1 is None:
+            raise gr.Error("Please upload Image 1.")
+        # Switch AIO version quickly (local cache only). No downloads here.
+        if aio_version and aio_version != CURRENT_AIO_VERSION:
+            _switch_aio_version_local_only(aio_version)
+        # Handle "None"
+        if lora_adapter == NONE_LORA:
+            try:
+                pipe.set_adapters([], adapter_weights=[])
+            except Exception:
+                if LOADED_ADAPTERS:
+                    pipe.set_adapters(list(LOADED_ADAPTERS), adapter_weights=[0.0] * len(LOADED_ADAPTERS))
+        else:
+            adapter_names, adapter_weights = _ensure_loaded_and_get_active_adapters(lora_adapter)
+            pipe.set_adapters(adapter_names, adapter_weights=adapter_weights)
+        if randomize_seed:
+            seed = random.randint(0, MAX_SEED)
+        generator = torch.Generator(device=device).manual_seed(seed)
+        negative_prompt = (
+            "worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, "
+            "extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
+        )
+        img1 = input_image_1.convert("RGB")
+        img2 = input_image_2.convert("RGB") if input_image_2 is not None else None
+        # Normalize extra images (Gallery) to PIL RGB
+        extra_imgs: list[Image.Image] = []
+        if input_images_extra:
+            for item in input_images_extra:
+                pil = _to_pil_rgb(item)
+                if pil is not None:
+                    extra_imgs.append(pil)
+        # Enforce existing 2-image LoRA behavior
+        if lora_requires_two_images(lora_adapter) and img2 is None:
+            raise gr.Error("This LoRA needs two images. Please upload Image 2 as well.")
+        # Label images as image_1, image_2, image_3...
+        labeled = build_labeled_images(img1, img2, extra_imgs)
+        pipe_images = list(labeled.values())
+        if len(pipe_images) == 1:
+            pipe_images = pipe_images[0]
+        # Resolution derived from Image 1
+        target_long_edge = get_target_long_edge_for_lora(lora_adapter)
+        width, height = compute_dimensions(img1, target_long_edge)
         result = pipe(
             image=pipe_images,
             prompt=prompt,
             generator=generator,
             true_cfg_scale=guidance_scale,
         ).images[0]
         return result, seed
+    except Exception as e:
+        print("❌ Infer failed:\n", traceback.format_exc())
+        raise
     finally:
         _hard_cuda_cleanup()
     input_pil = input_image.convert("RGB")
     guidance_scale = 1.0
     steps = 4
+    # Examples use current loaded AIO version, no swapping.
     result, seed = infer(CURRENT_AIO_VERSION, input_pil, None, None, prompt, lora_adapter, 0, True, guidance_scale, steps)
     return result, seed
         with gr.Row():
             aio_version = gr.Dropdown(
                 label="Phr00t Rapid AIO Version",
+                choices=AVAILABLE_AIO_VERSIONS,
+                value=DEFAULT_AIO_VERSION,
+                interactive=True,
             )
             refresh_versions = gr.Button("Refresh", variant="secondary")
+        aio_status = gr.Markdown(
+            f"Found {len(AVAILABLE_AIO_VERSIONS)} version(s): {', '.join(AVAILABLE_AIO_VERSIONS)}"
+        )
+        # When user changes version: CPU-cache it (fast if already cached)
+        # Also keep Run enabled (interactive=True)
+        run_button_placeholder = gr.Button("Edit Image", variant="primary", visible=False)
+        aio_version.change(
+            fn=ensure_aio_cached_ui,
+            inputs=[aio_version],
+            outputs=[aio_status, run_button_placeholder],
+        )
         refresh_versions.click(
             fn=refresh_aio_versions_ui,
             label="Examples",
         )
+    # Run pipeline:
+    #   1) CPU cache selected version (fast if already cached)
+    #   2) GPU infer (will switch using local_files_only=True if needed)
     run_button.click(
+        fn=ensure_aio_cached_ui,
+        inputs=[aio_version],
+        outputs=[aio_status, run_button],
+    ).then(
         fn=infer,
         inputs=[
             aio_version,