Qwen-Image-Edit-Rapid-AIO-Loras-Experimental

Running on Zero

App Files Files Community

Professional Noob commited on Jan 24

Commit

63f2ef7

verified ·

1 Parent(s): d92200a

Update app.py

Browse files

Files changed (1) hide show

app.py +136 -104

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import os
 import re
 import gc
 import random
 import threading
 import traceback
@@ -114,7 +116,7 @@ if torch.cuda.is_available():
 print("Using device:", device)
 # ============================================================
-# Pipeline imports (KEEP your existing transformer class)
 # ============================================================
 from diffusers import FlowMatchEulerDiscreteScheduler  # noqa: F401
@@ -125,41 +127,34 @@ from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
 dtype = torch.bfloat16
 # ============================================================
-# AIO versioning
 # ============================================================
 AIO_REPO_ID = "Pr0f3ssi0n4ln00b/Phr00t-Qwen-Rapid-AIO"
 DEFAULT_AIO_VERSION = "v19"
 _AIO_VER_RE = re.compile(r"^(v\d+)$")
-# Cache control (prevents double-download when dropdown+run are both triggered)
-_CACHED_AIO_VERSIONS: set[str] = set()
-_CACHE_LOCKS: dict[str, threading.Lock] = {}
-_CACHE_LOCKS_GUARD = threading.Lock()
-# GPU switch lock (prevents concurrent swaps)
-_AIO_SWITCH_LOCK = threading.Lock()
-def _hard_cuda_cleanup():
-    gc.collect()
-    if torch.cuda.is_available():
-        try:
-            torch.cuda.synchronize()
-        except Exception:
-            pass
-        torch.cuda.empty_cache()
-        try:
-            torch.cuda.ipc_collect()
-        except Exception:
-            pass
-def _get_cache_lock(version: str) -> threading.Lock:
-    with _CACHE_LOCKS_GUARD:
-        if version not in _CACHE_LOCKS:
-            _CACHE_LOCKS[version] = threading.Lock()
-        return _CACHE_LOCKS[version]
 def discover_aio_versions(repo_id: str) -> list[str]:
@@ -188,14 +183,50 @@ def discover_aio_versions(repo_id: str) -> list[str]:
 AVAILABLE_AIO_VERSIONS = discover_aio_versions(AIO_REPO_ID)
-if DEFAULT_AIO_VERSION not in AVAILABLE_AIO_VERSIONS and AVAILABLE_AIO_VERSIONS:
-    DEFAULT_AIO_VERSION = AVAILABLE_AIO_VERSIONS[0]
 def ensure_aio_cached(version: str) -> None:
     """
     CPU-only: download all files under vXX/transformer/ into HF cache.
     Idempotent + locked per version to avoid duplicate concurrent downloads.
     """
     version = version or DEFAULT_AIO_VERSION
@@ -214,7 +245,6 @@ def ensure_aio_cached(version: str) -> None:
         if not needed:
             raise gr.Error(f"No files found under {sub}/ in {AIO_REPO_ID}")
-        # Download into cache (fast/no-op if already present)
         for f in needed:
             hf_hub_download(repo_id=AIO_REPO_ID, filename=f, repo_type="model")
@@ -228,28 +258,21 @@ def ensure_aio_cached_ui(version: str):
     try:
         version = version or DEFAULT_AIO_VERSION
         if version in _CACHED_AIO_VERSIONS:
-            return gr.update(value=f"✅ Cached **{version}** (on disk)")
         print(f"⬇️  Caching AIO version on CPU: {version}")
         ensure_aio_cached(version)
-        return gr.update(value=f"✅ Cached **{version}** (on disk)")
     except Exception as e:
         print("❌ Cache step failed:\n", traceback.format_exc())
         raise gr.Error(f"Cache failed for {version}: {e}")
 def refresh_aio_versions_ui(current_value: str):
-    global AVAILABLE_AIO_VERSIONS, DEFAULT_AIO_VERSION
     AVAILABLE_AIO_VERSIONS = discover_aio_versions(AIO_REPO_ID)
-    if DEFAULT_AIO_VERSION not in AVAILABLE_AIO_VERSIONS and AVAILABLE_AIO_VERSIONS:
-        DEFAULT_AIO_VERSION = AVAILABLE_AIO_VERSIONS[0]
-    if current_value in AVAILABLE_AIO_VERSIONS:
-        new_value = current_value
-    else:
-        new_value = DEFAULT_AIO_VERSION
     status = f"Found {len(AVAILABLE_AIO_VERSIONS)} version(s): {', '.join(AVAILABLE_AIO_VERSIONS)}"
     return gr.update(choices=AVAILABLE_AIO_VERSIONS, value=new_value), gr.update(value=status)
@@ -262,24 +285,45 @@ def _apply_fa3_if_possible():
         print(f"Warning: Could not set FA3 processor: {e}")
 # ============================================================
-# Build pipeline once (default version at startup)
-#   IMPORTANT: do NOT use device_map="cuda" here.
-#   Load transformer on CPU, then move whole pipeline to GPU normally.
 # ============================================================
-print(f"📦 Loading AIO transformer: {AIO_REPO_ID} / {DEFAULT_AIO_VERSION}/transformer (CPU -> then move to cuda)")
-_default_transformer = QwenImageTransformer2DModel.from_pretrained(
-    AIO_REPO_ID,
-    subfolder=f"{DEFAULT_AIO_VERSION}/transformer",
-    torch_dtype=dtype,
-    # NO device_map here
-)
 pipe = QwenImageEditPlusPipeline.from_pretrained(
     "Qwen/Qwen-Image-Edit-2511",
-    transformer=_default_transformer,
     torch_dtype=dtype,
 ).to(device)
@@ -636,40 +680,10 @@ def _unload_all_loras():
 # ============================================================
-# AIO switch (GPU, local cache only) — SAFER SWAP
 # ============================================================
-def _remove_transformer_from_pipe():
-    """Best-effort remove transformer references so GC can free VRAM."""
-    try:
-        if hasattr(pipe, "_modules") and "transformer" in pipe._modules:
-            pipe._modules.pop("transformer", None)
-    except Exception:
-        pass
-    try:
-        pipe.transformer = None
-    except Exception:
-        pass
-def _register_transformer_in_pipe(new_t):
-    """Register transformer the diffusers way when possible."""
-    try:
-        if hasattr(pipe, "register_modules"):
-            pipe.register_modules(transformer=new_t)
-            return
-    except Exception:
-        pass
-    # fallback
-    pipe.transformer = new_t
-    try:
-        if hasattr(pipe, "_modules"):
-            pipe._modules["transformer"] = new_t
-    except Exception:
-        pass
 def _switch_aio_version_local_only(target_version: str, current_loaded: str) -> str:
     """
     Must be called while already inside a GPU task.
@@ -686,30 +700,42 @@ def _switch_aio_version_local_only(target_version: str, current_loaded: str) ->
         print(f"🔁 Switching AIO transformer to: {AIO_REPO_ID} / {target_version}/transformer (local-only)")
-        # Detach LoRAs first (keeps swap cleaner)
         _unload_all_loras()
-        # Drop old transformer refs and free VRAM WITHOUT copying back to CPU
         old_t = getattr(pipe, "transformer", None)
-        _remove_transformer_from_pipe()
         if old_t is not None:
             try:
-                del old_t
             except Exception:
                 pass
         _hard_cuda_cleanup()
-        # Load new transformer on CPU (from local cache), then move to GPU normally
         new_t = QwenImageTransformer2DModel.from_pretrained(
             AIO_REPO_ID,
             subfolder=f"{target_version}/transformer",
             torch_dtype=dtype,
             local_files_only=True,
-        )
-        new_t = new_t.to(device)
-        _register_transformer_in_pipe(new_t)
         _apply_fa3_if_possible()
         _hard_cuda_cleanup()
@@ -810,17 +836,16 @@ def infer(
         target_long_edge = get_target_long_edge_for_lora(lora_adapter)
         width, height = compute_dimensions(img1, target_long_edge)
-        with torch.inference_mode():
-            result = pipe(
-                image=pipe_images,
-                prompt=prompt,
-                negative_prompt=negative_prompt,
-                height=height,
-                width=width,
-                num_inference_steps=steps,
-                generator=generator,
-                true_cfg_scale=guidance_scale,
-            ).images[0]
         status = f"✅ Loaded: **{new_loaded}** | Selected: **{aio_version}**"
         return result, seed, new_loaded, gr.update(value=status)
@@ -838,7 +863,6 @@ def infer_example(input_image, prompt, lora_adapter, loaded_version_state):
     input_pil = input_image.convert("RGB")
     guidance_scale = 1.0
     steps = 4
-    # Examples: run with currently loaded transformer (no switch)
     result, seed, new_loaded, _ = infer(
         loaded_version_state,
         loaded_version_state,
@@ -886,6 +910,7 @@ with gr.Blocks() as demo:
                 interactive=True,
             )
             refresh_versions = gr.Button("Refresh", variant="secondary")
         aio_status = gr.Markdown(
             f"✅ Loaded: **{DEFAULT_AIO_VERSION}** | Found {len(AVAILABLE_AIO_VERSIONS)} version(s): {', '.join(AVAILABLE_AIO_VERSIONS)}"
@@ -949,6 +974,13 @@ with gr.Blocks() as demo:
             outputs=[aio_version, aio_status],
         )
         gr.Examples(
             examples=[
                 ["examples/1.jpg", "Transform into anime.", "Photo-to-Anime"],

 import os
 import re
 import gc
+import sys
+import time
 import random
 import threading
 import traceback
 print("Using device:", device)
 # ============================================================
+# Pipeline imports (keep your existing transformer class)
 # ============================================================
 from diffusers import FlowMatchEulerDiscreteScheduler  # noqa: F401
 dtype = torch.bfloat16
 # ============================================================
+# AIO versioning + "boot version" persistence
 # ============================================================
 AIO_REPO_ID = "Pr0f3ssi0n4ln00b/Phr00t-Qwen-Rapid-AIO"
 DEFAULT_AIO_VERSION = "v19"
 _AIO_VER_RE = re.compile(r"^(v\d+)$")
+# Preferred boot version sources:
+#   1) Space Variable: DEFAULT_AIO_VERSION
+#   2) Local preference file in HF cache dir (best-effort; not guaranteed across cold rebuilds)
+_PREF_PATH = os.path.join(os.path.expanduser("~"), ".cache", "aio_default_version.txt")
+def _read_pref_file() -> Optional[str]:
+    try:
+        if os.path.isfile(_PREF_PATH):
+            with open(_PREF_PATH, "r", encoding="utf-8") as f:
+                v = f.read().strip()
+            return v or None
+    except Exception:
+        return None
+    return None
+def _write_pref_file(v: str) -> None:
+    os.makedirs(os.path.dirname(_PREF_PATH), exist_ok=True)
+    with open(_PREF_PATH, "w", encoding="utf-8") as f:
+        f.write(v)
 def discover_aio_versions(repo_id: str) -> list[str]:
 AVAILABLE_AIO_VERSIONS = discover_aio_versions(AIO_REPO_ID)
+# pick boot version (env > file > fallback)
+_env_boot = (os.environ.get("DEFAULT_AIO_VERSION") or "").strip()
+_file_boot = (_read_pref_file() or "").strip()
+BOOT_AIO_VERSION = _env_boot or _file_boot or DEFAULT_AIO_VERSION
+if BOOT_AIO_VERSION not in AVAILABLE_AIO_VERSIONS and AVAILABLE_AIO_VERSIONS:
+    BOOT_AIO_VERSION = AVAILABLE_AIO_VERSIONS[0]
+DEFAULT_AIO_VERSION = BOOT_AIO_VERSION  # use boot version as the UI + pipeline default
+# Cache control (prevents double-download when dropdown+run are both triggered)
+_CACHED_AIO_VERSIONS: set[str] = set()
+_CACHE_LOCKS: dict[str, threading.Lock] = {}
+_CACHE_LOCKS_GUARD = threading.Lock()
+# GPU switch lock (prevents concurrent swaps)
+_AIO_SWITCH_LOCK = threading.Lock()
+def _hard_cuda_cleanup():
+    gc.collect()
+    if torch.cuda.is_available():
+        try:
+            torch.cuda.synchronize()
+        except Exception:
+            pass
+        torch.cuda.empty_cache()
+        try:
+            torch.cuda.ipc_collect()
+        except Exception:
+            pass
+def _get_cache_lock(version: str) -> threading.Lock:
+    with _CACHE_LOCKS_GUARD:
+        if version not in _CACHE_LOCKS:
+            _CACHE_LOCKS[version] = threading.Lock()
+        return _CACHE_LOCKS[version]
 def ensure_aio_cached(version: str) -> None:
     """
     CPU-only: download all files under vXX/transformer/ into HF cache.
     Idempotent + locked per version to avoid duplicate concurrent downloads.
     """
     version = version or DEFAULT_AIO_VERSION
         if not needed:
             raise gr.Error(f"No files found under {sub}/ in {AIO_REPO_ID}")
         for f in needed:
             hf_hub_download(repo_id=AIO_REPO_ID, filename=f, repo_type="model")
     try:
         version = version or DEFAULT_AIO_VERSION
         if version in _CACHED_AIO_VERSIONS:
+            return gr.update(value=f"✅ Cached {version} (ready)")
         print(f"⬇️  Caching AIO version on CPU: {version}")
         ensure_aio_cached(version)
+        return gr.update(value=f"✅ Cached {version} (ready)")
     except Exception as e:
         print("❌ Cache step failed:\n", traceback.format_exc())
         raise gr.Error(f"Cache failed for {version}: {e}")
 def refresh_aio_versions_ui(current_value: str):
+    global AVAILABLE_AIO_VERSIONS
     AVAILABLE_AIO_VERSIONS = discover_aio_versions(AIO_REPO_ID)
+    new_value = current_value if current_value in AVAILABLE_AIO_VERSIONS else (AVAILABLE_AIO_VERSIONS[0] if AVAILABLE_AIO_VERSIONS else DEFAULT_AIO_VERSION)
     status = f"Found {len(AVAILABLE_AIO_VERSIONS)} version(s): {', '.join(AVAILABLE_AIO_VERSIONS)}"
     return gr.update(choices=AVAILABLE_AIO_VERSIONS, value=new_value), gr.update(value=status)
         print(f"Warning: Could not set FA3 processor: {e}")
+def set_default_and_restart_ui(version: str):
+    """
+    Best-effort: store desired boot version and force a restart so it loads at startup
+    (avoids transformer swapping during inference when possible).
+    """
+    version = version or DEFAULT_AIO_VERSION
+    if version not in AVAILABLE_AIO_VERSIONS:
+        raise gr.Error(f"Unknown version: {version}")
+    try:
+        _write_pref_file(version)
+    except Exception as e:
+        print(f"⚠️ Could not write preference file: {e}")
+    # Trigger restart a moment after returning UI update
+    def _restart_soon():
+        time.sleep(1.0)
+        # Let the supervisor restart the process
+        os._exit(0)
+    threading.Thread(target=_restart_soon, daemon=True).start()
+    return gr.update(value=f"✅ Saved startup version: **{version}**. Restarting Space now…")
 # ============================================================
+# Build pipeline once (boot version at startup)
 # ============================================================
+print(f"📦 Boot AIO version: {DEFAULT_AIO_VERSION} (env={_env_boot or '—'}, file={_file_boot or '—'})")
+print(f"📦 Loading AIO transformer: {AIO_REPO_ID} / {DEFAULT_AIO_VERSION}/transformer (startup)")
 pipe = QwenImageEditPlusPipeline.from_pretrained(
     "Qwen/Qwen-Image-Edit-2511",
+    transformer=QwenImageTransformer2DModel.from_pretrained(
+        AIO_REPO_ID,
+        subfolder=f"{DEFAULT_AIO_VERSION}/transformer",
+        torch_dtype=dtype,
+        device_map="cuda",  # keep your existing setup
+    ),
     torch_dtype=dtype,
 ).to(device)
 # ============================================================
+# AIO switch (GPU, local cache only)
 # ============================================================
 def _switch_aio_version_local_only(target_version: str, current_loaded: str) -> str:
     """
     Must be called while already inside a GPU task.
         print(f"🔁 Switching AIO transformer to: {AIO_REPO_ID} / {target_version}/transformer (local-only)")
         _unload_all_loras()
         old_t = getattr(pipe, "transformer", None)
+        # Drop module registry refs so old transformer can be freed
+        try:
+            if hasattr(pipe, "_modules") and "transformer" in pipe._modules:
+                pipe._modules.pop("transformer", None)
+        except Exception:
+            pass
+        try:
+            pipe.transformer = None
+        except Exception:
+            pass
         if old_t is not None:
             try:
+                old_t.to("cpu")
             except Exception:
                 pass
+            del old_t
         _hard_cuda_cleanup()
         new_t = QwenImageTransformer2DModel.from_pretrained(
             AIO_REPO_ID,
             subfolder=f"{target_version}/transformer",
             torch_dtype=dtype,
             local_files_only=True,
+        ).to(device)
+        try:
+            pipe.add_module("transformer", new_t)
+        except Exception:
+            pipe.transformer = new_t
         _apply_fa3_if_possible()
         _hard_cuda_cleanup()
         target_long_edge = get_target_long_edge_for_lora(lora_adapter)
         width, height = compute_dimensions(img1, target_long_edge)
+        result = pipe(
+            image=pipe_images,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            height=height,
+            width=width,
+            num_inference_steps=steps,
+            generator=generator,
+            true_cfg_scale=guidance_scale,
+        ).images[0]
         status = f"✅ Loaded: **{new_loaded}** | Selected: **{aio_version}**"
         return result, seed, new_loaded, gr.update(value=status)
     input_pil = input_image.convert("RGB")
     guidance_scale = 1.0
     steps = 4
     result, seed, new_loaded, _ = infer(
         loaded_version_state,
         loaded_version_state,
                 interactive=True,
             )
             refresh_versions = gr.Button("Refresh", variant="secondary")
+            set_default_restart = gr.Button("Set as startup version & restart", variant="secondary")
         aio_status = gr.Markdown(
             f"✅ Loaded: **{DEFAULT_AIO_VERSION}** | Found {len(AVAILABLE_AIO_VERSIONS)} version(s): {', '.join(AVAILABLE_AIO_VERSIONS)}"
             outputs=[aio_version, aio_status],
         )
+        # Save boot version + restart
+        set_default_restart.click(
+            fn=set_default_and_restart_ui,
+            inputs=[aio_version],
+            outputs=[aio_status],
+        )
         gr.Examples(
             examples=[
                 ["examples/1.jpg", "Transform into anime.", "Photo-to-Anime"],