Qwen-Image-Edit-Rapid-AIO-Loras-Experimental

Running on Zero

App Files Files Community

Professional Noob commited on Jan 24

Commit

4947c41

verified ·

1 Parent(s): 63f2ef7

Update app.py

Browse files

Files changed (1) hide show

app.py +184 -408

app.py CHANGED Viewed

@@ -1,20 +1,16 @@
 import os
 import re
 import gc
-import sys
-import time
-import random
-import threading
 import traceback
-from typing import Iterable, Optional
 import gradio as gr
 import numpy as np
 import spaces
 import torch
 from PIL import Image
-from huggingface_hub import HfApi, hf_hub_download
 from safetensors.torch import load_file as safetensors_load_file
 from gradio.themes import Soft
@@ -104,7 +100,6 @@ orange_red_theme = OrangeRedTheme()
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print("===== Application Startup =====")
 print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
 print("torch.__version__ =", torch.__version__)
 print("torch.version.cuda =", torch.version.cuda)
@@ -116,221 +111,86 @@ if torch.cuda.is_available():
 print("Using device:", device)
 # ============================================================
-# Pipeline imports (keep your existing transformer class)
-# ============================================================
-from diffusers import FlowMatchEulerDiscreteScheduler  # noqa: F401
-from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
-from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
-from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
-dtype = torch.bfloat16
-# ============================================================
-# AIO versioning + "boot version" persistence
 # ============================================================
 AIO_REPO_ID = "Pr0f3ssi0n4ln00b/Phr00t-Qwen-Rapid-AIO"
 DEFAULT_AIO_VERSION = "v19"
-_AIO_VER_RE = re.compile(r"^(v\d+)$")
-# Preferred boot version sources:
-#   1) Space Variable: DEFAULT_AIO_VERSION
-#   2) Local preference file in HF cache dir (best-effort; not guaranteed across cold rebuilds)
-_PREF_PATH = os.path.join(os.path.expanduser("~"), ".cache", "aio_default_version.txt")
-def _read_pref_file() -> Optional[str]:
-    try:
-        if os.path.isfile(_PREF_PATH):
-            with open(_PREF_PATH, "r", encoding="utf-8") as f:
-                v = f.read().strip()
-            return v or None
-    except Exception:
         return None
     return None
-def _write_pref_file(v: str) -> None:
-    os.makedirs(os.path.dirname(_PREF_PATH), exist_ok=True)
-    with open(_PREF_PATH, "w", encoding="utf-8") as f:
-        f.write(v)
-def discover_aio_versions(repo_id: str) -> list[str]:
-    """
-    Finds versions by scanning repo file paths with the naming convention:
-      vNN/transformer/...
-    """
-    try:
-        api = HfApi()
-        files = api.list_repo_files(repo_id=repo_id, repo_type="model")
-        versions = set()
-        for f in files:
-            if "/transformer/" not in f:
-                continue
-            head = f.split("/transformer/", 1)[0]
-            if _AIO_VER_RE.fullmatch(head):
-                versions.add(head)
-        if not versions:
-            return [DEFAULT_AIO_VERSION]
-        return sorted(versions, key=lambda s: int(s[1:]))
-    except Exception as e:
-        print(f"⚠️ AIO version discovery failed: {e}")
-        return [DEFAULT_AIO_VERSION]
-AVAILABLE_AIO_VERSIONS = discover_aio_versions(AIO_REPO_ID)
-# pick boot version (env > file > fallback)
-_env_boot = (os.environ.get("DEFAULT_AIO_VERSION") or "").strip()
-_file_boot = (_read_pref_file() or "").strip()
-BOOT_AIO_VERSION = _env_boot or _file_boot or DEFAULT_AIO_VERSION
-if BOOT_AIO_VERSION not in AVAILABLE_AIO_VERSIONS and AVAILABLE_AIO_VERSIONS:
-    BOOT_AIO_VERSION = AVAILABLE_AIO_VERSIONS[0]
-DEFAULT_AIO_VERSION = BOOT_AIO_VERSION  # use boot version as the UI + pipeline default
-# Cache control (prevents double-download when dropdown+run are both triggered)
-_CACHED_AIO_VERSIONS: set[str] = set()
-_CACHE_LOCKS: dict[str, threading.Lock] = {}
-_CACHE_LOCKS_GUARD = threading.Lock()
-# GPU switch lock (prevents concurrent swaps)
-_AIO_SWITCH_LOCK = threading.Lock()
-def _hard_cuda_cleanup():
-    gc.collect()
-    if torch.cuda.is_available():
-        try:
-            torch.cuda.synchronize()
-        except Exception:
-            pass
-        torch.cuda.empty_cache()
-        try:
-            torch.cuda.ipc_collect()
-        except Exception:
-            pass
-def _get_cache_lock(version: str) -> threading.Lock:
-    with _CACHE_LOCKS_GUARD:
-        if version not in _CACHE_LOCKS:
-            _CACHE_LOCKS[version] = threading.Lock()
-        return _CACHE_LOCKS[version]
-def ensure_aio_cached(version: str) -> None:
-    """
-    CPU-only: download all files under vXX/transformer/ into HF cache.
-    Idempotent + locked per version to avoid duplicate concurrent downloads.
-    """
-    version = version or DEFAULT_AIO_VERSION
-    if version in _CACHED_AIO_VERSIONS:
-        return
-    lock = _get_cache_lock(version)
-    with lock:
-        if version in _CACHED_AIO_VERSIONS:
-            return
-        sub = f"{version}/transformer"
-        api = HfApi()
-        files = api.list_repo_files(repo_id=AIO_REPO_ID, repo_type="model")
-        needed = [f for f in files if f.startswith(sub + "/")]
-        if not needed:
-            raise gr.Error(f"No files found under {sub}/ in {AIO_REPO_ID}")
-        for f in needed:
-            hf_hub_download(repo_id=AIO_REPO_ID, filename=f, repo_type="model")
-        _CACHED_AIO_VERSIONS.add(version)
-def ensure_aio_cached_ui(version: str):
-    """
-    Gradio handler (CPU): cache selected version.
-    """
-    try:
-        version = version or DEFAULT_AIO_VERSION
-        if version in _CACHED_AIO_VERSIONS:
-            return gr.update(value=f"✅ Cached {version} (ready)")
-        print(f"⬇️  Caching AIO version on CPU: {version}")
-        ensure_aio_cached(version)
-        return gr.update(value=f"✅ Cached {version} (ready)")
-    except Exception as e:
-        print("❌ Cache step failed:\n", traceback.format_exc())
-        raise gr.Error(f"Cache failed for {version}: {e}")
-def refresh_aio_versions_ui(current_value: str):
-    global AVAILABLE_AIO_VERSIONS
-    AVAILABLE_AIO_VERSIONS = discover_aio_versions(AIO_REPO_ID)
-    new_value = current_value if current_value in AVAILABLE_AIO_VERSIONS else (AVAILABLE_AIO_VERSIONS[0] if AVAILABLE_AIO_VERSIONS else DEFAULT_AIO_VERSION)
-    status = f"Found {len(AVAILABLE_AIO_VERSIONS)} version(s): {', '.join(AVAILABLE_AIO_VERSIONS)}"
-    return gr.update(choices=AVAILABLE_AIO_VERSIONS, value=new_value), gr.update(value=status)
-def _apply_fa3_if_possible():
-    try:
-        pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
-        print("Flash Attention 3 Processor set successfully.")
-    except Exception as e:
-        print(f"Warning: Could not set FA3 processor: {e}")
-def set_default_and_restart_ui(version: str):
-    """
-    Best-effort: store desired boot version and force a restart so it loads at startup
-    (avoids transformer swapping during inference when possible).
-    """
-    version = version or DEFAULT_AIO_VERSION
-    if version not in AVAILABLE_AIO_VERSIONS:
-        raise gr.Error(f"Unknown version: {version}")
-    try:
-        _write_pref_file(version)
-    except Exception as e:
-        print(f"⚠️ Could not write preference file: {e}")
-    # Trigger restart a moment after returning UI update
-    def _restart_soon():
-        time.sleep(1.0)
-        # Let the supervisor restart the process
-        os._exit(0)
-    threading.Thread(target=_restart_soon, daemon=True).start()
-    return gr.update(value=f"✅ Saved startup version: **{version}**. Restarting Space now…")
 # ============================================================
-# Build pipeline once (boot version at startup)
 # ============================================================
-print(f"📦 Boot AIO version: {DEFAULT_AIO_VERSION} (env={_env_boot or '—'}, file={_file_boot or '—'})")
-print(f"📦 Loading AIO transformer: {AIO_REPO_ID} / {DEFAULT_AIO_VERSION}/transformer (startup)")
-pipe = QwenImageEditPlusPipeline.from_pretrained(
-    "Qwen/Qwen-Image-Edit-2511",
-    transformer=QwenImageTransformer2DModel.from_pretrained(
-        AIO_REPO_ID,
-        subfolder=f"{DEFAULT_AIO_VERSION}/transformer",
-        torch_dtype=dtype,
-        device_map="cuda",  # keep your existing setup
-    ),
-    torch_dtype=dtype,
-).to(device)
-_apply_fa3_if_possible()
-# mark default as cached (it’s loaded anyway)
-_CACHED_AIO_VERSIONS.add(DEFAULT_AIO_VERSION)
 MAX_SEED = np.iinfo(np.int32).max
@@ -396,7 +256,7 @@ ADAPTER_SPECS = {
         "weights": "bfs_head_v5_2511_original.safetensors",
         "adapter_name": "BFS-Best-Faceswap",
         "strength": 1.0,
-        "needs_alpha_fix": True,
     },
     "Multiple-Angles": {
         "type": "single",
@@ -473,6 +333,7 @@ LORA_PRESET_PROMPTS = {
     "BFS-Best-FaceSwap": "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, and nose structure of Picture 2. copy the eye direction, head rotation, and micro-expressions from Picture 1. high quality, sharp details, 4k",
 }
 LOADED_ADAPTERS = set()
 # ============================================================
@@ -514,9 +375,14 @@ def image2_label_for_lora(lora_adapter: str) -> str:
 def _to_pil_rgb(x) -> Optional[Image.Image]:
     if x is None:
         return None
     if isinstance(x, tuple) and len(x) >= 1:
         x = x[0]
         if x is None:
@@ -528,6 +394,7 @@ def _to_pil_rgb(x) -> Optional[Image.Image]:
     if isinstance(x, np.ndarray):
         return Image.fromarray(x).convert("RGB")
     try:
         return Image.fromarray(np.array(x)).convert("RGB")
     except Exception:
@@ -539,8 +406,16 @@ def build_labeled_images(
     img2: Optional[Image.Image],
     extra_imgs: Optional[list[Image.Image]],
 ) -> dict[str, Image.Image]:
     labeled: dict[str, Image.Image] = {}
     idx = 1
     labeled[f"image_{idx}"] = img1
     idx += 1
@@ -564,7 +439,17 @@ def build_labeled_images(
 def _inject_missing_alpha_keys(state_dict: dict) -> dict:
     bases = {}
     for k, v in state_dict.items():
         if not isinstance(v, torch.Tensor):
             continue
@@ -590,6 +475,10 @@ def _inject_missing_alpha_keys(state_dict: dict) -> dict:
 def _load_lora_weights_with_fallback(repo: str, weight_name: str, adapter_name: str, needs_alpha_fix: bool = False):
     try:
         pipe.load_lora_weights(repo, weight_name=weight_name, adapter_name=adapter_name)
         return
@@ -601,6 +490,7 @@ def _load_lora_weights_with_fallback(repo: str, weight_name: str, adapter_name:
         local_path = hf_hub_download(repo_id=repo, filename=weight_name)
         sd = safetensors_load_file(local_path)
         sd = _inject_missing_alpha_keys(sd)
         pipe.load_lora_weights(sd, adapter_name=adapter_name)
         return
@@ -632,13 +522,18 @@ def _ensure_loaded_and_get_active_adapters(selected_lora: str):
             if adapter_name not in LOADED_ADAPTERS:
                 print(f"--- Downloading and Loading Adapter Part: {selected_lora} / {adapter_name} ---")
-                _load_lora_weights_with_fallback(
-                    repo=repo,
-                    weight_name=weights,
-                    adapter_name=adapter_name,
-                    needs_alpha_fix=needs_alpha_fix,
-                )
-                LOADED_ADAPTERS.add(adapter_name)
             adapter_names.append(adapter_name)
             adapter_weights.append(strength)
@@ -652,13 +547,18 @@ def _ensure_loaded_and_get_active_adapters(selected_lora: str):
         if adapter_name not in LOADED_ADAPTERS:
             print(f"--- Downloading and Loading Adapter: {selected_lora} ---")
-            _load_lora_weights_with_fallback(
-                repo=repo,
-                weight_name=weights,
-                adapter_name=adapter_name,
-                needs_alpha_fix=needs_alpha_fix,
-            )
-            LOADED_ADAPTERS.add(adapter_name)
         adapter_names = [adapter_name]
         adapter_weights = [strength]
@@ -666,89 +566,13 @@ def _ensure_loaded_and_get_active_adapters(selected_lora: str):
     return adapter_names, adapter_weights
-def _unload_all_loras():
-    global LOADED_ADAPTERS
-    try:
-        pipe.set_adapters([], adapter_weights=[])
-    except Exception:
-        pass
-    try:
-        pipe.unload_lora_weights()
-    except Exception:
-        pass
-    LOADED_ADAPTERS.clear()
-# ============================================================
-# AIO switch (GPU, local cache only)
-# ============================================================
-def _switch_aio_version_local_only(target_version: str, current_loaded: str) -> str:
-    """
-    Must be called while already inside a GPU task.
-    Uses local_files_only=True (assumes ensure_aio_cached ran on CPU first).
-    Returns the new loaded version (or unchanged).
-    """
-    target_version = target_version or DEFAULT_AIO_VERSION
-    if target_version == current_loaded:
-        return current_loaded
-    with _AIO_SWITCH_LOCK:
-        if target_version == current_loaded:
-            return current_loaded
-        print(f"🔁 Switching AIO transformer to: {AIO_REPO_ID} / {target_version}/transformer (local-only)")
-        _unload_all_loras()
-        old_t = getattr(pipe, "transformer", None)
-        # Drop module registry refs so old transformer can be freed
-        try:
-            if hasattr(pipe, "_modules") and "transformer" in pipe._modules:
-                pipe._modules.pop("transformer", None)
-        except Exception:
-            pass
-        try:
-            pipe.transformer = None
-        except Exception:
-            pass
-        if old_t is not None:
-            try:
-                old_t.to("cpu")
-            except Exception:
-                pass
-            del old_t
-        _hard_cuda_cleanup()
-        new_t = QwenImageTransformer2DModel.from_pretrained(
-            AIO_REPO_ID,
-            subfolder=f"{target_version}/transformer",
-            torch_dtype=dtype,
-            local_files_only=True,
-        ).to(device)
-        try:
-            pipe.add_module("transformer", new_t)
-        except Exception:
-            pipe.transformer = new_t
-        _apply_fa3_if_possible()
-        _hard_cuda_cleanup()
-        return target_version
 # ============================================================
 # UI handlers
 # ============================================================
 def on_lora_change_ui(selected_lora, current_prompt):
     if selected_lora != NONE_LORA:
         preset = LORA_PRESET_PROMPTS.get(selected_lora, "")
         if preset and (current_prompt is None or str(current_prompt).strip() == ""):
@@ -758,6 +582,7 @@ def on_lora_change_ui(selected_lora, current_prompt):
     else:
         prompt_update = gr.update(value=current_prompt)
     if lora_requires_two_images(selected_lora):
         img2_update = gr.update(visible=True, label=image2_label_for_lora(selected_lora))
     else:
@@ -773,11 +598,9 @@ def on_lora_change_ui(selected_lora, current_prompt):
 @spaces.GPU
 def infer(
-    aio_version,
-    loaded_version_state,
     input_image_1,
     input_image_2,
-    input_images_extra,
     prompt,
     lora_adapter,
     seed,
@@ -786,56 +609,61 @@ def infer(
     steps,
     progress=gr.Progress(track_tqdm=True),
 ):
-    try:
-        _hard_cuda_cleanup()
-        if input_image_1 is None:
-            raise gr.Error("Please upload Image 1.")
-        # Switch (local cache only)
-        new_loaded = _switch_aio_version_local_only(aio_version, loaded_version_state)
-        # LoRA handling
-        if lora_adapter == NONE_LORA:
-            try:
-                pipe.set_adapters([], adapter_weights=[])
-            except Exception:
-                if LOADED_ADAPTERS:
-                    pipe.set_adapters(list(LOADED_ADAPTERS), adapter_weights=[0.0] * len(LOADED_ADAPTERS))
-        else:
-            adapter_names, adapter_weights = _ensure_loaded_and_get_active_adapters(lora_adapter)
-            pipe.set_adapters(adapter_names, adapter_weights=adapter_weights)
-        if randomize_seed:
-            seed = random.randint(0, MAX_SEED)
-        generator = torch.Generator(device=device).manual_seed(seed)
-        negative_prompt = (
-            "worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, "
-            "extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
-        )
-        img1 = input_image_1.convert("RGB")
-        img2 = input_image_2.convert("RGB") if input_image_2 is not None else None
-        extra_imgs: list[Image.Image] = []
-        if input_images_extra:
-            for item in input_images_extra:
-                pil = _to_pil_rgb(item)
-                if pil is not None:
-                    extra_imgs.append(pil)
-        if lora_requires_two_images(lora_adapter) and img2 is None:
-            raise gr.Error("This LoRA needs two images. Please upload Image 2 as well.")
-        labeled = build_labeled_images(img1, img2, extra_imgs)
-        pipe_images = list(labeled.values())
-        if len(pipe_images) == 1:
-            pipe_images = pipe_images[0]
-        target_long_edge = get_target_long_edge_for_lora(lora_adapter)
-        width, height = compute_dimensions(img1, target_long_edge)
         result = pipe(
             image=pipe_images,
             prompt=prompt,
@@ -846,37 +674,23 @@ def infer(
             generator=generator,
             true_cfg_scale=guidance_scale,
         ).images[0]
-        status = f"✅ Loaded: **{new_loaded}** | Selected: **{aio_version}**"
-        return result, seed, new_loaded, gr.update(value=status)
-    except Exception:
-        print("❌ Infer failed:\n", traceback.format_exc())
-        raise
     finally:
-        _hard_cuda_cleanup()
 @spaces.GPU
-def infer_example(input_image, prompt, lora_adapter, loaded_version_state):
     if input_image is None:
-        return None, 0, loaded_version_state
     input_pil = input_image.convert("RGB")
     guidance_scale = 1.0
     steps = 4
-    result, seed, new_loaded, _ = infer(
-        loaded_version_state,
-        loaded_version_state,
-        input_pil,
-        None,
-        None,
-        prompt,
-        lora_adapter,
-        0,
-        True,
-        guidance_scale,
-        steps,
-    )
-    return result, seed, new_loaded
 # ============================================================
@@ -891,9 +705,12 @@ css = """
 #main-title h1 {font-size: 2.1em !important;}
 """
-with gr.Blocks() as demo:
-    loaded_version_state = gr.State(DEFAULT_AIO_VERSION)
     with gr.Column(elem_id="col-container"):
         gr.Markdown("# **Qwen-Image-Edit-2511-LoRAs-Fast**", elem_id="main-title")
         gr.Markdown(
@@ -901,20 +718,7 @@ with gr.Blocks() as demo:
             "[LoRA](https://huggingface.co/models?other=base_model:adapter:Qwen/Qwen-Image-Edit-2511) adapters for the "
             "[Qwen-Image-Edit](https://huggingface.co/Qwen/Qwen-Image-Edit-2511) model."
         )
-        with gr.Row():
-            aio_version = gr.Dropdown(
-                label="Phr00t Rapid AIO Version",
-                choices=AVAILABLE_AIO_VERSIONS,
-                value=DEFAULT_AIO_VERSION,
-                interactive=True,
-            )
-            refresh_versions = gr.Button("Refresh", variant="secondary")
-            set_default_restart = gr.Button("Set as startup version & restart", variant="secondary")
-        aio_status = gr.Markdown(
-            f"✅ Loaded: **{DEFAULT_AIO_VERSION}** | Found {len(AVAILABLE_AIO_VERSIONS)} version(s): {', '.join(AVAILABLE_AIO_VERSIONS)}"
-        )
         with gr.Row(equal_height=True):
             with gr.Column():
@@ -955,32 +759,13 @@ with gr.Blocks() as demo:
                     guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
                     steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=4)
         lora_adapter.change(
             fn=on_lora_change_ui,
             inputs=[lora_adapter, prompt],
             outputs=[prompt, input_image_2],
         )
-        # Dropdown change: CPU cache (idempotent + locked)
-        aio_version.change(
-            fn=ensure_aio_cached_ui,
-            inputs=[aio_version],
-            outputs=[aio_status],
-        )
-        refresh_versions.click(
-            fn=refresh_aio_versions_ui,
-            inputs=[aio_version],
-            outputs=[aio_version, aio_status],
-        )
-        # Save boot version + restart
-        set_default_restart.click(
-            fn=set_default_and_restart_ui,
-            inputs=[aio_version],
-            outputs=[aio_status],
-        )
         gr.Examples(
             examples=[
                 ["examples/1.jpg", "Transform into anime.", "Photo-to-Anime"],
@@ -1009,25 +794,16 @@ with gr.Blocks() as demo:
                 ["examples/4.jpg", "Switch the camera to a wide-angle lens.", "Multiple-Angles"],
                 ["examples/11.jpg", "Upscale this picture to 4K resolution.", "Upscale2K"],
             ],
-            inputs=[input_image_1, prompt, lora_adapter, loaded_version_state],
-            outputs=[output_image, seed, loaded_version_state],
             fn=infer_example,
             cache_examples=False,
             label="Examples",
         )
-    # Run:
-    #   1) CPU cache selected version (idempotent/locked)
-    #   2) GPU infer (switch local-only if needed)
     run_button.click(
-        fn=ensure_aio_cached_ui,
-        inputs=[aio_version],
-        outputs=[aio_status],
-    ).then(
         fn=infer,
         inputs=[
-            aio_version,
-            loaded_version_state,
             input_image_1,
             input_image_2,
             input_images_extra,
@@ -1038,7 +814,7 @@ with gr.Blocks() as demo:
             guidance_scale,
             steps,
         ],
-        outputs=[output_image, seed, loaded_version_state, aio_status],
     )
 if __name__ == "__main__":

 import os
 import re
 import gc
 import traceback
 import gradio as gr
 import numpy as np
 import spaces
 import torch
+import random
 from PIL import Image
+from typing import Iterable, Optional
+from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file as safetensors_load_file
 from gradio.themes import Soft
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
 print("torch.__version__ =", torch.__version__)
 print("torch.version.cuda =", torch.version.cuda)
 print("Using device:", device)
 # ============================================================
+# AIO version (Space variable)
 # ============================================================
 AIO_REPO_ID = "Pr0f3ssi0n4ln00b/Phr00t-Qwen-Rapid-AIO"
 DEFAULT_AIO_VERSION = "v19"
+_VER_RE = re.compile(r"^v\d+$")
+_DIGITS_RE = re.compile(r"^\d+$")
+def _normalize_version(raw: str) -> Optional[str]:
+    if raw is None:
         return None
+    s = str(raw).strip()
+    if not s:
+        return None
+    if _VER_RE.fullmatch(s):
+        return s
+    # forgiving: allow "21" -> "v21"
+    if _DIGITS_RE.fullmatch(s):
+        return f"v{s}"
     return None
+_AIO_ENV_RAW = os.environ.get("AIO_VERSION", "")
+_AIO_ENV_NORM = _normalize_version(_AIO_ENV_RAW)
+AIO_VERSION = _AIO_ENV_NORM or DEFAULT_AIO_VERSION
+AIO_VERSION_SOURCE = "env" if _AIO_ENV_NORM else "default(v19)"
+print(f"AIO_VERSION (env raw) = {_AIO_ENV_RAW!r}")
+print(f"AIO_VERSION (normalized) = {_AIO_ENV_NORM!r}")
+print(f"Using AIO_VERSION = {AIO_VERSION} ({AIO_VERSION_SOURCE})")
 # ============================================================
+# Pipeline
 # ============================================================
+from diffusers import FlowMatchEulerDiscreteScheduler  # noqa: F401
+from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
+from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
+from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
+dtype = torch.bfloat16
+def _load_pipe_with_version(version: str) -> QwenImageEditPlusPipeline:
+    sub = f"{version}/transformer"
+    print(f"📦 Loading AIO transformer: {AIO_REPO_ID} / {sub}")
+    p = QwenImageEditPlusPipeline.from_pretrained(
+        "Qwen/Qwen-Image-Edit-2511",
+        transformer=QwenImageTransformer2DModel.from_pretrained(
+            AIO_REPO_ID,
+            subfolder=sub,
+            torch_dtype=dtype,
+            device_map="cuda",
+        ),
+        torch_dtype=dtype,
+    ).to(device)
+    return p
+# Forgiving load: try env/default version, fallback to v19 if it fails
+try:
+    pipe = _load_pipe_with_version(AIO_VERSION)
+except Exception as e:
+    print("❌ Failed to load requested AIO_VERSION. Falling back to v19.")
+    print("---- exception ----")
+    print(traceback.format_exc())
+    print("-------------------")
+    AIO_VERSION = DEFAULT_AIO_VERSION
+    AIO_VERSION_SOURCE = "fallback_to_v19"
+    pipe = _load_pipe_with_version(AIO_VERSION)
+# Apply FA3 Optimization
+try:
+    pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
+    print("Flash Attention 3 Processor set successfully.")
+except Exception as e:
+    print(f"Warning: Could not set FA3 processor: {e}")
 MAX_SEED = np.iinfo(np.int32).max
         "weights": "bfs_head_v5_2511_original.safetensors",
         "adapter_name": "BFS-Best-Faceswap",
         "strength": 1.0,
+        "needs_alpha_fix": True,  # <-- fixes KeyError 'img_in.alpha'
     },
     "Multiple-Angles": {
         "type": "single",
     "BFS-Best-FaceSwap": "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, and nose structure of Picture 2. copy the eye direction, head rotation, and micro-expressions from Picture 1. high quality, sharp details, 4k",
 }
+# Track what is currently loaded in memory (adapter_name values)
 LOADED_ADAPTERS = set()
 # ============================================================
 def _to_pil_rgb(x) -> Optional[Image.Image]:
+    """
+    Accepts PIL / numpy / (image, caption) tuples from gr.Gallery and returns PIL RGB.
+    Gradio Gallery commonly yields tuples like (image, caption).
+    """
     if x is None:
         return None
+    # Gallery often returns (image, caption)
     if isinstance(x, tuple) and len(x) >= 1:
         x = x[0]
         if x is None:
     if isinstance(x, np.ndarray):
         return Image.fromarray(x).convert("RGB")
+    # Best-effort fallback
     try:
         return Image.fromarray(np.array(x)).convert("RGB")
     except Exception:
     img2: Optional[Image.Image],
     extra_imgs: Optional[list[Image.Image]],
 ) -> dict[str, Image.Image]:
+    """
+    Creates labels image_1, image_2, image_3... based on what is actually uploaded:
+      - img1 is always image_1
+      - img2 becomes image_2 only if present
+      - extras start immediately after the last present base box
+    The pipeline receives images in this exact order.
+    """
     labeled: dict[str, Image.Image] = {}
     idx = 1
     labeled[f"image_{idx}"] = img1
     idx += 1
 def _inject_missing_alpha_keys(state_dict: dict) -> dict:
+    """
+    Diffusers' Qwen LoRA converter expects '<module>.alpha' keys.
+    BFS safetensors omits them. We inject alpha = rank (neutral scaling).
+    IMPORTANT: diffusers may strip 'diffusion_model.' before lookup, so we
+    inject BOTH:
+      - diffusion_model.xxx.alpha
+      - xxx.alpha
+    """
     bases = {}
     for k, v in state_dict.items():
         if not isinstance(v, torch.Tensor):
             continue
 def _load_lora_weights_with_fallback(repo: str, weight_name: str, adapter_name: str, needs_alpha_fix: bool = False):
+    """
+    Normal path: pipe.load_lora_weights(repo, weight_name=..., adapter_name=...)
+    BFS fallback: download safetensors, inject missing alpha keys, then load from dict.
+    """
     try:
         pipe.load_lora_weights(repo, weight_name=weight_name, adapter_name=adapter_name)
         return
         local_path = hf_hub_download(repo_id=repo, filename=weight_name)
         sd = safetensors_load_file(local_path)
         sd = _inject_missing_alpha_keys(sd)
         pipe.load_lora_weights(sd, adapter_name=adapter_name)
         return
             if adapter_name not in LOADED_ADAPTERS:
                 print(f"--- Downloading and Loading Adapter Part: {selected_lora} / {adapter_name} ---")
+                try:
+                    _load_lora_weights_with_fallback(
+                        repo=repo,
+                        weight_name=weights,
+                        adapter_name=adapter_name,
+                        needs_alpha_fix=needs_alpha_fix,
+                    )
+                    LOADED_ADAPTERS.add(adapter_name)
+                except Exception as e:
+                    raise gr.Error(f"Failed to load adapter part {selected_lora}/{adapter_name}: {e}")
+            else:
+                print(f"--- Adapter part already loaded: {selected_lora} / {adapter_name} ---")
             adapter_names.append(adapter_name)
             adapter_weights.append(strength)
         if adapter_name not in LOADED_ADAPTERS:
             print(f"--- Downloading and Loading Adapter: {selected_lora} ---")
+            try:
+                _load_lora_weights_with_fallback(
+                    repo=repo,
+                    weight_name=weights,
+                    adapter_name=adapter_name,
+                    needs_alpha_fix=needs_alpha_fix,
+                )
+                LOADED_ADAPTERS.add(adapter_name)
+            except Exception as e:
+                raise gr.Error(f"Failed to load adapter {selected_lora}: {e}")
+        else:
+            print(f"--- Adapter {selected_lora} is already loaded. ---")
         adapter_names = [adapter_name]
         adapter_weights = [strength]
     return adapter_names, adapter_weights
 # ============================================================
 # UI handlers
 # ============================================================
 def on_lora_change_ui(selected_lora, current_prompt):
+    # Preset prompt (fill only if empty)
     if selected_lora != NONE_LORA:
         preset = LORA_PRESET_PROMPTS.get(selected_lora, "")
         if preset and (current_prompt is None or str(current_prompt).strip() == ""):
     else:
         prompt_update = gr.update(value=current_prompt)
+    # Image2 visibility/label
     if lora_requires_two_images(selected_lora):
         img2_update = gr.update(visible=True, label=image2_label_for_lora(selected_lora))
     else:
 @spaces.GPU
 def infer(
     input_image_1,
     input_image_2,
+    input_images_extra,  # gallery multi-image box
     prompt,
     lora_adapter,
     seed,
     steps,
     progress=gr.Progress(track_tqdm=True),
 ):
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    if input_image_1 is None:
+        raise gr.Error("Please upload Image 1.")
+    # Handle "None"
+    if lora_adapter == NONE_LORA:
+        try:
+            pipe.set_adapters([], adapter_weights=[])
+        except Exception:
+            if LOADED_ADAPTERS:
+                pipe.set_adapters(list(LOADED_ADAPTERS), adapter_weights=[0.0] * len(LOADED_ADAPTERS))
+    else:
+        adapter_names, adapter_weights = _ensure_loaded_and_get_active_adapters(lora_adapter)
+        pipe.set_adapters(adapter_names, adapter_weights=adapter_weights)
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    generator = torch.Generator(device=device).manual_seed(seed)
+    negative_prompt = (
+        "worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, "
+        "extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
+    )
+    img1 = input_image_1.convert("RGB")
+    img2 = input_image_2.convert("RGB") if input_image_2 is not None else None
+    # Normalize extra images (Gallery) to PIL RGB (handles tuples from Gallery)
+    extra_imgs: list[Image.Image] = []
+    if input_images_extra:
+        for item in input_images_extra:
+            pil = _to_pil_rgb(item)
+            if pil is not None:
+                extra_imgs.append(pil)
+    # Enforce existing 2-image LoRA behavior (image_1 + image_2 required)
+    if lora_requires_two_images(lora_adapter) and img2 is None:
+        raise gr.Error("This LoRA needs two images. Please upload Image 2 as well.")
+    # Label images as image_1, image_2, image_3...
+    labeled = build_labeled_images(img1, img2, extra_imgs)
+    # Pass to pipeline in labeled order. Keep single-image call when only one is present.
+    pipe_images = list(labeled.values())
+    if len(pipe_images) == 1:
+        pipe_images = pipe_images[0]
+    # Resolution derived from Image 1 (base/body/target)
+    target_long_edge = get_target_long_edge_for_lora(lora_adapter)
+    width, height = compute_dimensions(img1, target_long_edge)
+    try:
         result = pipe(
             image=pipe_images,
             prompt=prompt,
             generator=generator,
             true_cfg_scale=guidance_scale,
         ).images[0]
+        return result, seed
     finally:
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
 @spaces.GPU
+def infer_example(input_image, prompt, lora_adapter):
     if input_image is None:
+        return None, 0
     input_pil = input_image.convert("RGB")
     guidance_scale = 1.0
     steps = 4
+    # Examples don't supply Image 2 or extra images; and example list doesn't include AnyPose/BFS.
+    result, seed = infer(input_pil, None, None, prompt, lora_adapter, 0, True, guidance_scale, steps)
+    return result, seed
 # ============================================================
 #main-title h1 {font-size: 2.1em !important;}
 """
+aio_status_line = (
+    f"**AIO transformer version:** `{AIO_VERSION}`  "
+    f"({AIO_VERSION_SOURCE}; env `AIO_VERSION`={_AIO_ENV_RAW!r})"
+)
+with gr.Blocks() as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown("# **Qwen-Image-Edit-2511-LoRAs-Fast**", elem_id="main-title")
         gr.Markdown(
             "[LoRA](https://huggingface.co/models?other=base_model:adapter:Qwen/Qwen-Image-Edit-2511) adapters for the "
             "[Qwen-Image-Edit](https://huggingface.co/Qwen/Qwen-Image-Edit-2511) model."
         )
+        gr.Markdown(aio_status_line)
         with gr.Row(equal_height=True):
             with gr.Column():
                     guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
                     steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=4)
+        # On LoRA selection: preset prompt + toggle Image 2
         lora_adapter.change(
             fn=on_lora_change_ui,
             inputs=[lora_adapter, prompt],
             outputs=[prompt, input_image_2],
         )
         gr.Examples(
             examples=[
                 ["examples/1.jpg", "Transform into anime.", "Photo-to-Anime"],
                 ["examples/4.jpg", "Switch the camera to a wide-angle lens.", "Multiple-Angles"],
                 ["examples/11.jpg", "Upscale this picture to 4K resolution.", "Upscale2K"],
             ],
+            inputs=[input_image_1, prompt, lora_adapter],
+            outputs=[output_image, seed],
             fn=infer_example,
             cache_examples=False,
             label="Examples",
         )
     run_button.click(
         fn=infer,
         inputs=[
             input_image_1,
             input_image_2,
             input_images_extra,
             guidance_scale,
             steps,
         ],
+        outputs=[output_image, seed],
     )
 if __name__ == "__main__":