Qwen-Image-Edit-Rapid-AIO-Loras-Experimental

Running on Zero

App Files Files Community

Professional Noob commited on Feb 10

Commit

1d96c5d

verified ·

1 Parent(s): c08afc1

Update app.py

Browse files

Files changed (1) hide show

app.py +535 -828

app.py CHANGED Viewed

@@ -1,24 +1,32 @@
 import os
 import re
 import gc
 import traceback
-import base64
-import io
 import gradio as gr
 import numpy as np
 import spaces
 import torch
-import random
 from PIL import Image
-from typing import Iterable, Optional, Tuple
-from transformers import (
-    AutoImageProcessor,
-    AutoModelForDepthEstimation,
-)
 from huggingface_hub import hf_hub_download
-from huggingface_hub import InferenceClient
 from safetensors.torch import load_file as safetensors_load_file
 from gradio.themes import Soft
@@ -43,7 +51,6 @@ colors.orange_red = colors.Color(
     c950="#802200",
 )
 class OrangeRedTheme(Soft):
     def __init__(
         self,
@@ -99,7 +106,6 @@ class OrangeRedTheme(Soft):
             block_label_background_fill="*primary_200",
         )
 orange_red_theme = OrangeRedTheme()
 # ============================================================
@@ -107,7 +113,6 @@ orange_red_theme = OrangeRedTheme()
 # ============================================================
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
 print("torch.__version__ =", torch.__version__)
 print("torch.version.cuda =", torch.version.cuda)
@@ -118,17 +123,18 @@ if torch.cuda.is_available():
     print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
 print("Using device:", device)
 # ============================================================
 # AIO version (Space variable)
 # ============================================================
 AIO_REPO_ID = "Pr0f3ssi0n4ln00b/Phr00t-Qwen-Rapid-AIO"
 DEFAULT_AIO_VERSION = "v19"
 _VER_RE = re.compile(r"^v\d+$")
 _DIGITS_RE = re.compile(r"^\d+$")
 def _normalize_version(raw: str) -> Optional[str]:
     if raw is None:
         return None
@@ -141,13 +147,10 @@ def _normalize_version(raw: str) -> Optional[str]:
         return f"v{s}"
     return None
 _AIO_ENV_RAW = os.environ.get("AIO_VERSION", "")
 _AIO_ENV_NORM = _normalize_version(_AIO_ENV_RAW)
 AIO_VERSION = _AIO_ENV_NORM or DEFAULT_AIO_VERSION
 AIO_VERSION_SOURCE = "env" if _AIO_ENV_NORM else "default(v19)"
 print(f"AIO_VERSION (env raw) = {_AIO_ENV_RAW!r}")
 print(f"AIO_VERSION (normalized) = {_AIO_ENV_NORM!r}")
 print(f"Using AIO_VERSION = {AIO_VERSION} ({AIO_VERSION_SOURCE})")
@@ -161,12 +164,9 @@ from qwenimage.pipeline_qwenimage_edit_plus import QwenImageEditPlusPipeline
 from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
 from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
-dtype = torch.bfloat16
 def _load_pipe_with_version(version: str) -> QwenImageEditPlusPipeline:
     sub = f"{version}/transformer"
-    print(f"📦 Loading AIO transformer: {AIO_REPO_ID} / {sub}")
     p = QwenImageEditPlusPipeline.from_pretrained(
         "Qwen/Qwen-Image-Edit-2511",
         transformer=QwenImageTransformer2DModel.from_pretrained(
@@ -179,12 +179,13 @@ def _load_pipe_with_version(version: str) -> QwenImageEditPlusPipeline:
     ).to(device)
     return p
 try:
     pipe = _load_pipe_with_version(AIO_VERSION)
 except Exception:
     print("❌ Failed to load requested AIO_VERSION. Falling back to v19.")
     print(traceback.format_exc())
     AIO_VERSION = DEFAULT_AIO_VERSION
     AIO_VERSION_SOURCE = "fallback_to_v19"
     pipe = _load_pipe_with_version(AIO_VERSION)
@@ -195,33 +196,57 @@ try:
 except Exception as e:
     print(f"Warning: Could not set FA3 processor: {e}")
-MAX_SEED = np.iinfo(np.int32).max
 # ============================================================
-# Derived conditioning (Depth Anything) ONLY — ViTPose removed
 # ============================================================
 DEPTH_MODEL_ID = "depth-anything/Depth-Anything-V2-Small-hf"
 _DEPTH_CACHE = {}
 def _derived_device(use_gpu: bool) -> torch.device:
     return torch.device("cuda" if (use_gpu and torch.cuda.is_available()) else "cpu")
 def _load_depth_models(dev: torch.device):
     key = str(dev)
     if key in _DEPTH_CACHE:
         return _DEPTH_CACHE[key]
     proc = AutoImageProcessor.from_pretrained(DEPTH_MODEL_ID)
     model = AutoModelForDepthEstimation.from_pretrained(DEPTH_MODEL_ID).to(dev)
     model.eval()
     _DEPTH_CACHE[key] = (proc, model)
     return _DEPTH_CACHE[key]
 def make_depth_map(img: Image.Image, *, use_gpu: bool) -> Image.Image:
     img = img.convert("RGB")
     dev = _derived_device(use_gpu)
@@ -233,7 +258,7 @@ def make_depth_map(img: Image.Image, *, use_gpu: bool) -> Image.Image:
     with torch.no_grad():
         out = model(**inputs)
-    pred = out.predicted_depth
     pred = torch.nn.functional.interpolate(
         pred.unsqueeze(1),
         size=(img.height, img.width),
@@ -249,30 +274,6 @@ def make_depth_map(img: Image.Image, *, use_gpu: bool) -> Image.Image:
     depth8 = (arr * 255.0).clip(0, 255).astype(np.uint8)
     return Image.fromarray(depth8, mode="L").convert("RGB")
-def _to_pil_rgb(item):
-    if item is None:
-        return None
-    if isinstance(item, (tuple, list)) and len(item) >= 1:
-        item = item[0]
-    if isinstance(item, Image.Image):
-        return item.convert("RGB")
-    if isinstance(item, np.ndarray):
-        return Image.fromarray(item).convert("RGB")
-    return None
-def _append_to_gallery(existing, new_img: Image.Image):
-    items = []
-    if existing:
-        for it in existing:
-            pil = _to_pil_rgb(it)
-            if pil is not None:
-                items.append(pil)
-    items.append(new_img)
-    return items
 # ============================================================
 # LoRA adapters + presets
 # ============================================================
@@ -297,7 +298,7 @@ ADAPTER_SPECS = {
     "AnyPose": {
         "type": "package",
         "requires_two_images": True,
-        "image2_label": "Upload Pose Reference (Image 2)",
         "parts": [
             {
                 "repo": "lilylilith/AnyPose",
@@ -337,7 +338,7 @@ ADAPTER_SPECS = {
     "BFS-Best-FaceSwap": {
         "type": "single",
         "requires_two_images": True,
-        "image2_label": "Upload Head/Face Donor (Image 2)",
         "repo": "Alissonerdx/BFS-Best-Face-Swap",
         "weights": "bfs_head_v5_2511_original.safetensors",
         "adapter_name": "BFS-Best-Faceswap",
@@ -347,7 +348,7 @@ ADAPTER_SPECS = {
     "BFS-Best-FaceSwap-merge": {
         "type": "single",
         "requires_two_images": True,
-        "image2_label": "Upload Head/Face Donor (Image 2)",
         "repo": "Alissonerdx/BFS-Best-Face-Swap",
         "weights": "bfs_head_v5_2511_merged_version_rank_32_fp32.safetensors",
         "adapter_name": "BFS-Best-Faceswap-merge",
@@ -430,12 +431,30 @@ ADAPTER_SPECS = {
 LORA_PRESET_PROMPTS = {
     "Any2Real_2601": "change the picture 1 to realistic photograph",
     "Semirealistic-photo-detailer": "transform the image to semi-realistic image",
-    "AnyPose": "Make the person in image 1 do the exact same pose of the person in image 2. Changing the style and background of the image of the person in image 1 is undesirable, so don't do it. The new pose should be pixel accurate to the pose we are trying to copy. The position of the arms and head and legs should be the same as the pose we are trying to copy. Change the field of view and angle to match exactly image 2. Head tilt and eye gaze pose should match the person in image 2.",
-    "Hyperrealistic-Portrait": "Transform the image into an ultra-realistic photorealistic portrait with strict identity preservation, facing straight to the camera. Enhance pore-level skin textures, realistic moisture effects, and natural wet hair clumping against the skin. Apply cool-toned soft-box lighting with subtle highlights and shadows, maintain realistic green-hazel eye catchlights without synthetic gloss, and preserve soft natural lip texture. Use shallow depth of field with a clean background, an 85mm macro photographic look, and raw photo grading without retouching to maintain realism and original details.",
-    "Ultrarealistic-Portrait": "Transform the image into an ultra-realistic glamour portrait while strictly preserving the subject’s identity. Apply a close-up composition with a slight head tilt and a hand near the face, enhance cinematic directional lighting with dramatic fashion-style highlights, and refine makeup details including glowing skin, glossy lips, luminous highlighter, and defined eyes. Increase skin realism with detailed epidermal textures such as micropores, microhairs, subtle oil sheen, natural highlights, soft wrinkles, and subsurface scattering. Maintain a luxury fashion-magazine look in a 9:16 aspect ratio, preserving realism, facial structure, and original details without over-smoothing or retouching.",
     "Upscale2K": "Upscale this picture to 4K resolution.",
-    "BFS-Best-FaceSwap": "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, and nose structure of Picture 2. copy the eye direction, head rotation, and micro-expressions from Picture 1. high quality, sharp details, 4k",
-    "BFS-Best-FaceSwap-merge": "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, and nose structure of Picture 2. copy the eye direction, head rotation, and micro-expressions from Picture 1. high quality, sharp details, 4k",
 }
 LOADED_ADAPTERS = set()
@@ -444,856 +463,544 @@ LOADED_ADAPTERS = set()
 # Helpers: resolution
 # ============================================================
 def _round_to_multiple(x: int, m: int) -> int:
     return max(m, (int(x) // m) * m)
-def compute_canvas_dimensions_from_area(
-    image: Image.Image,
-    target_area: int,
-    multiple_of: int = 64,
-) -> Tuple[int, int]:
-    w0, h0 = image.size
-    if w0 <= 0 or h0 <= 0:
-        return 512, 512
-    aspect = w0 / h0
-    w = int((target_area * aspect) ** 0.5)
-    h = int(w / aspect) if aspect != 0 else int((target_area) ** 0.5)
-    w = _round_to_multiple(w, multiple_of)
-    h = _round_to_multiple(h, multiple_of)
-    w = max(multiple_of, w)
-    h = max(multiple_of, h)
-    return w, h
-def get_target_area_for_lora(image: Image.Image, lora_adapter: str, target_megapixels: float) -> int:
     spec = ADAPTER_SPECS.get(lora_adapter, {})
-    long_edge = spec.get("target_long_edge", None)
-    if long_edge:
-        w0, h0 = image.size
-        if w0 <= 0 or h0 <= 0:
-            return int(1.0 * 1024 * 1024)
-        scale = float(long_edge) / float(max(w0, h0))
-        w = int(w0 * scale)
-        h = int(h0 * scale)
-        return max(64 * 64, w * h)
-    mp = float(target_megapixels)
-    return max(64 * 64, int(mp * 1_000_000))
 # ============================================================
-# Helpers: LoRA loading + alpha fix
 # ============================================================
-def _download_from_hf(repo_id: str, filename: str) -> str:
-    return hf_hub_download(repo_id=repo_id, filename=filename)
-def _maybe_apply_alpha_fix(state_dict: dict) -> dict:
-    if "img_in.alpha" not in state_dict:
-        for k in list(state_dict.keys()):
-            if k.endswith("img_in.weight") or k.endswith("img_in.bias"):
-                t = state_dict[k]
-                if hasattr(t, "new_zeros"):
-                    state_dict["img_in.alpha"] = t.new_zeros(())
-                break
-    return state_dict
-def _load_single_lora(spec: dict):
-    local_path = _download_from_hf(spec["repo"], spec["weights"])
-    sd = safetensors_load_file(local_path)
-    if spec.get("needs_alpha_fix", False):
-        sd = _maybe_apply_alpha_fix(sd)
-    pipe.load_lora_weights(sd, adapter_name=spec["adapter_name"])
-    LOADED_ADAPTERS.add(spec["adapter_name"])
-def _ensure_loaded_and_get_active_adapters(lora_adapter: str):
-    spec = ADAPTER_SPECS.get(lora_adapter, None)
-    if spec is None:
-        return [], []
-    if spec["type"] == "single":
-        if spec["adapter_name"] not in LOADED_ADAPTERS:
-            _load_single_lora(spec)
-        return [spec["adapter_name"]], [spec.get("strength", 1.0)]
-    adapter_names = []
-    weights = []
-    for part in spec["parts"]:
-        if part["adapter_name"] not in LOADED_ADAPTERS:
-            _load_single_lora(part)
-        adapter_names.append(part["adapter_name"])
-        weights.append(part.get("strength", 1.0))
-    return adapter_names, weights
 def lora_requires_two_images(lora_adapter: str) -> bool:
-    spec = ADAPTER_SPECS.get(lora_adapter, {})
-    return bool(spec.get("requires_two_images", False))
-def get_image2_label_for_lora(lora_adapter: str) -> str:
-    spec = ADAPTER_SPECS.get(lora_adapter, {})
-    return spec.get("image2_label", "Upload Reference (Image 2)")
-def build_labeled_images(img1: Image.Image, img2: Optional[Image.Image], extras: list[Image.Image]):
-    labeled = {"image_1": img1}
-    if img2 is not None:
-        labeled["image_2"] = img2
-    for ex in extras:
-        labeled[f"image_{len(labeled) + 1}"] = ex
-    return labeled
 # ============================================================
-# UI: lora change handler
 # ============================================================
-def on_lora_change_ui(lora_adapter, current_prompt, current_extras_condition_only):
-    preset = LORA_PRESET_PROMPTS.get(lora_adapter, None)
-    prompt_update = gr.update(value=preset) if preset else gr.update(value=current_prompt)
-    needs_two = lora_requires_two_images(lora_adapter)
-    img2_update = gr.update(visible=needs_two, label=get_image2_label_for_lora(lora_adapter))
-    extras_update = gr.update(value=True) if needs_two else gr.update(value=current_extras_condition_only)
-    return prompt_update, img2_update, extras_update
-# ============================================================
-# Output routing + derived conditioning
-# ============================================================
-def set_output_as_image1(last):
-    if last is None:
-        raise gr.Error("No output available yet.")
-    return gr.update(value=last)
-def set_output_as_image2(last):
-    if last is None:
-        raise gr.Error("No output available yet.")
-    return gr.update(value=last)
-def set_output_as_extra(last, existing_extra):
-    if last is None:
-        raise gr.Error("No output available yet.")
-    return _append_to_gallery(existing_extra, last)
-@spaces.GPU
-def add_derived_ref(img1, existing_extra, derived_type, derived_use_gpu):
-    if img1 is None:
-        raise gr.Error("Please upload Image 1 first.")
-    if derived_type == "None":
-        return gr.update(value=existing_extra), gr.update(visible=False, value=None)
-    base = img1.convert("RGB")
-    if derived_type == "Depth (Depth Anything V2 Small)":
-        derived = make_depth_map(base, use_gpu=bool(derived_use_gpu))
     else:
-        raise gr.Error(f"Unknown derived type: {derived_type}")
-    new_gallery = _append_to_gallery(existing_extra, derived)
-    return gr.update(value=new_gallery), gr.update(visible=True, value=derived)
 # ============================================================
-# Prompt Helper (outsourced VLM calls, UI stays clean)
 # ============================================================
-# Configuration via env vars (no UI clutter)
-HF_TOKEN = os.environ.get("HF_TOKEN", "").strip() or os.environ.get("HUGGINGFACEHUB_API_TOKEN", "").strip()
-HF_PROVIDER = os.environ.get("HF_PROVIDER", "nebius").strip()
-HF_VLM_MODEL = os.environ.get("HF_VLM_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct").strip()
-_client_cache = {}
-def _get_client() -> InferenceClient:
-    key = (HF_PROVIDER, bool(HF_TOKEN))
-    if key in _client_cache:
-        return _client_cache[key]
-    if not HF_TOKEN:
-        raise gr.Error("Captioning is not configured (missing HF_TOKEN).")
-    client = InferenceClient(provider=HF_PROVIDER, api_key=HF_TOKEN)
-    _client_cache[key] = client
-    return client
-def _encode_image_data_url(img: Image.Image, max_side: int = 1536, fmt: str = "PNG") -> str:
-    """
-    Converts PIL to data URL (base64). Downscales to keep payload reasonable.
-    """
-    img = img.convert("RGB")
     w, h = img.size
-    scale = min(1.0, float(max_side) / float(max(w, h))) if max(w, h) > 0 else 1.0
-    if scale < 1.0:
-        img = img.resize((max(1, int(w * scale)), max(1, int(h * scale))), Image.LANCZOS)
-    buf = io.BytesIO()
-    img.save(buf, format=fmt)
-    b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
-    mime = "image/png" if fmt.upper() == "PNG" else "image/jpeg"
-    return f"data:{mime};base64,{b64}"
-def _chat_with_image(
-    system_prompt: str,
-    user_text: str,
-    image: Image.Image,
-    *,
-    max_tokens: int,
-    temperature: float,
-) -> str:
-    client = _get_client()
-    data_url = _encode_image_data_url(image)
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": user_text},
-                {"type": "image_url", "image_url": {"url": data_url}},
-            ],
-        },
-    ]
-    # Hugging Face chat.completions interface
-    resp = client.chat.completions.create(
-        model=HF_VLM_MODEL,
-        messages=messages,
-        max_tokens=int(max_tokens),
-        temperature=float(temperature),
-    )
-    return (resp.choices[0].message.content or "").strip()
-def _chat_text_only(
-    system_prompt: str,
-    user_text: str,
-    *,
-    max_tokens: int,
-    temperature: float,
-) -> str:
-    client = _get_client()
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": [{"type": "text", "text": user_text}]},
-    ]
-    resp = client.chat.completions.create(
-        model=HF_VLM_MODEL,
-        messages=messages,
-        max_tokens=int(max_tokens),
-        temperature=float(temperature),
-    )
-    return (resp.choices[0].message.content or "").strip()
-def _has_header(text: str, header: str) -> bool:
-    return header in (text or "")
-def _enforce_once_retry_image(system_prompt: str, user_text: str, image: Image.Image, header: str, max_tokens: int, temperature: float) -> str:
-    out = _chat_with_image(system_prompt, user_text, image, max_tokens=max_tokens, temperature=temperature)
-    if _has_header(out, header):
-        return out
-    # one strict retry
-    retry_user = (
-        user_text
-        + "\n\nIMPORTANT: You did not follow the required output format. "
-        + f"Return EXACTLY the block starting with {header} and fill each line. No extra text."
-    )
-    out2 = _chat_with_image(system_prompt, retry_user, image, max_tokens=max_tokens, temperature=temperature)
-    return out2
-def _enforce_once_retry_text(system_prompt: str, user_text: str, header: str, max_tokens: int, temperature: float) -> str:
-    out = _chat_text_only(system_prompt, user_text, max_tokens=max_tokens, temperature=temperature)
-    if _has_header(out, header):
-        return out
-    retry_user = (
-        user_text
-        + "\n\nIMPORTANT: You did not follow the required output format. "
-        + f"Return EXACTLY the sections starting with {header}. No extra text."
-    )
-    return _chat_text_only(system_prompt, retry_user, max_tokens=max_tokens, temperature=temperature)
-# --------- BASE (Pic1) extraction prompt (no identity) ----------
-BFS_BASE_SYSTEM = """You are extracting non-identity facial and contextual signals from Picture 1 (BASE) for a head/face swap.
-CRITICAL: DO NOT describe identity/likeness traits. That means:
-- No age, ethnicity/race/nationality guesses, attractiveness judgments, “looks like X”
-- No skin tone, facial structure descriptions, “round face”, “strong jaw”, etc.
-- No hair color/style as identity markers (only mention hair if it occludes the face, e.g. “hair covering left eye”)
-Focus ONLY on:
-- Head pose (yaw/pitch/roll, tilt, chin/jaw position)
-- Gaze and eyelids (direction, openness)
-- Micro-expressions / muscle cues (brow knit/raise, squint, lip tension, mouth corners, cheek tension, jaw set)
-- Mouth details (open/closed, teeth, tongue if visible)
-- Mood inference (max 2 labels) with visible evidence cues
-- Occlusions and interactions (hands, objects, glasses, shadows) relevant to face recreation
-- Visibility notes (unclear/occluded/shadowed)
-Output format (return exactly this block, nothing else):
-[BASE_SIGNALS_PIC1]
-Head pose:
-Gaze & eyelids:
-Expression (muscle cues):
-Mouth details:
-Mood (max 2 labels):
-Evidence for mood (visible cues only):
-Occlusions & interactions:
-Visibility notes (unclear/occluded/shadowed areas):
-"""
-BFS_BASE_USER = """Analyze the single provided image as Picture 1 (BASE).
-Fill every line with either an observation or the word "unclear". Keep it concise."""
-# --------- DONOR (Pic2) extraction prompt (identity only) ----------
-BFS_DONOR_SYSTEM = """You are extracting inherent identity/likeness traits from Picture 2 (DONOR) for a head/face swap.
-CRITICAL: DO NOT describe expression, mood, gaze direction, head pose/rotation, body pose, or actions.
-Focus ONLY on visible physical traits:
-- Face shape & proportions (jawline, cheekbones, chin shape)
-- Skin tone/undertone + texture (freckles/moles only if visible)
-- Eyes (color, shape), brows (shape/thickness)
-- Nose structure (bridge, tip, nostrils)
-- Lips/mouth shape (fullness, cupid’s bow)
-- Chin/jaw details
-- Hair (color, style, hairline)
-- Distinctive traits (scars/moles/freckles if visible)
-- Visibility notes (unclear/occluded/shadowed)
-Output format (return exactly this block, nothing else):
-[DONOR_TRAITS_PIC2]
-Face shape & proportions:
-Skin tone & texture:
-Eyes & brows:
-Nose structure:
-Lips & mouth shape:
-Chin/jaw details:
-Hair (color, style, hairline):
-Distinctive traits (scars/moles/freckles if visible):
-Visibility notes (unclear/occluded/shadowed areas):
-"""
-BFS_DONOR_USER = """Analyze the single provided image as Picture 2 (DONOR).
-Fill every line with either an observation or the word "unclear". Keep it concise."""
-# --------- Text-only prompt builder ----------
-BFS_BUILDER_SYSTEM = """You are a prompt editor for BFS-BestFaceSwap.
-Input you may receive:
-- A core prompt (already includes head_swap instructions)
-- BASE_SIGNALS_PIC1 text (pose/expression/mood/occlusions; non-identity)
-- Optional DONOR_TRAITS_PIC2 text (identity-only traits)
-Your job:
-- Produce a compact addendum that improves expressiveness transfer and reduces ambiguity.
-- Do NOT add any identity traits from the base signals.
-- Do NOT add any pose/expression/mood from donor traits.
-- Prefer concrete, visible cues over vague adjectives.
-- Keep it short (ideally 6–14 lines total).
-- If donor traits are missing or mostly "unclear", omit donor section entirely.
-Output EXACTLY two sections (donor section may be omitted if not provided/usable):
-[ADDENDUM_BASE]
-(bullets or short lines; use the best cues from BASE_SIGNALS)
-[ADDENDUM_DONOR]
-(optional; only if donor traits contain useful visible info; no pose/expression)
-"""
-def scrub_placeholder(text: str, enabled: bool) -> str:
-    # Placeholder for future strict scrubber pass (no-op).
-    return text
 @spaces.GPU
-def caption_base_pic1(
-    img1,
-    max_new_tokens: int,
-    temperature: float,
-    strict_scrubber: bool,
-    show_debug: bool,
 ):
     if img1 is None:
-        raise gr.Error("Please upload Image 1 (base) first.")
-    raw = _enforce_once_retry_image(
-        BFS_BASE_SYSTEM,
-        BFS_BASE_USER,
-        img1,
-        header="[BASE_SIGNALS_PIC1]",
-        max_tokens=int(max_new_tokens),
-        temperature=float(temperature),
-    )
-    out = scrub_placeholder(raw, enabled=bool(strict_scrubber))
-    debug = raw if bool(show_debug) else ""
-    return out, debug
-@spaces.GPU
-def caption_donor_pic2(
-    img2,
-    max_new_tokens: int,
-    temperature: float,
-    strict_scrubber: bool,
-    show_debug: bool,
-):
-    if img2 is None:
-        raise gr.Error("Please upload Image 2 (donor) first.")
-    raw = _enforce_once_retry_image(
-        BFS_DONOR_SYSTEM,
-        BFS_DONOR_USER,
-        img2,
-        header="[DONOR_TRAITS_PIC2]",
-        max_tokens=int(max_new_tokens),
-        temperature=float(temperature),
-    )
-    out = scrub_placeholder(raw, enabled=bool(strict_scrubber))
-    debug = raw if bool(show_debug) else ""
-    return out, debug
-def _compose_final_prompt(core_prompt: str, addendum_text: str, mode: str) -> str:
-    core = (core_prompt or "").strip()
-    addendum = (addendum_text or "").strip()
-    if not addendum:
-        return core
-    if (mode or "").lower().startswith("inject"):
-        injected = core
-        if "{BFS_ADDENDUM}" in injected:
-            injected = injected.replace("{BFS_ADDENDUM}", addendum + "\n")
-            return injected.strip()
-    return (core + "\n\n" + addendum).strip()
-@spaces.GPU
-def build_bfs_addendum_and_final_prompt(
-    core_prompt: str,
-    base_caption: str,
-    donor_caption: str,
-    integration_mode: str,
-    max_new_tokens: int,
-    temperature: float,
-    show_debug: bool,
-):
-    base = (base_caption or "").strip()
-    donor = (donor_caption or "").strip()
-    core = (core_prompt or "").strip()
-    if not base:
-        raise gr.Error("Generate BASE signals (Pic1) first (or paste them) before building an addendum.")
-    user_text = (
-        "CORE PROMPT:\n"
-        f"{core}\n\n"
-        "BASE_SIGNALS_PIC1:\n"
-        f"{base}\n\n"
-        "DONOR_TRAITS_PIC2:\n"
-        f"{donor if donor else '(none)'}\n\n"
-        "Produce the addendum now."
-    )
-    raw = _enforce_once_retry_text(
-        BFS_BUILDER_SYSTEM,
-        user_text,
-        header="[ADDENDUM_BASE]",
-        max_tokens=int(max_new_tokens),
-        temperature=float(temperature),
     )
-    final_prompt = _compose_final_prompt(core, raw, integration_mode)
-    debug = raw if bool(show_debug) else ""
-    return raw, final_prompt, debug
-# ============================================================
-# Inference
-# ============================================================
-@spaces.GPU
-def infer(
-    input_image_1,
-    input_image_2,
-    input_images_extra,
-    prompt,
-    lora_adapter,
-    seed,
-    randomize_seed,
-    guidance_scale,
-    steps,
-    target_megapixels,
-    extras_condition_only,
-    pad_to_canvas,
-    progress=gr.Progress(track_tqdm=True),
-):
     gc.collect()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
-    if input_image_1 is None:
-        raise gr.Error("Please upload Image 1.")
-    if lora_adapter == NONE_LORA:
-        try:
-            pipe.set_adapters([], adapter_weights=[])
-        except Exception:
-            if LOADED_ADAPTERS:
-                pipe.set_adapters(list(LOADED_ADAPTERS), adapter_weights=[0.0] * len(LOADED_ADAPTERS))
-    else:
-        adapter_names, adapter_weights = _ensure_loaded_and_get_active_adapters(lora_adapter)
-        pipe.set_adapters(adapter_names, adapter_weights=adapter_weights)
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator(device=device).manual_seed(seed)
-    negative_prompt = (
-        "worst quality, low quality, bad anatomy, bad hands, text, error, missing fingers, "
-        "extra digit, fewer digits, cropped, jpeg artifacts, signature, watermark, username, blurry"
-    )
-    img1 = input_image_1.convert("RGB")
-    img2 = input_image_2.convert("RGB") if input_image_2 is not None else None
-    extra_imgs: list[Image.Image] = []
-    if input_images_extra:
-        for item in input_images_extra:
-            pil = _to_pil_rgb(item)
-            if pil is not None:
-                extra_imgs.append(pil)
-    if lora_requires_two_images(lora_adapter) and img2 is None:
-        raise gr.Error("This LoRA needs two images. Please upload Image 2 as well.")
-    labeled = build_labeled_images(img1, img2, extra_imgs)
-    pipe_images = list(labeled.values())
-    if len(pipe_images) == 1:
-        pipe_images = pipe_images[0]
-    target_area = get_target_area_for_lora(img1, lora_adapter, float(target_megapixels))
-    width, height = compute_canvas_dimensions_from_area(
-        img1,
-        target_area=target_area,
-        multiple_of=int(pipe.vae_scale_factor * 2),
-    )
-    vae_image_indices = None
-    if extras_condition_only:
-        if isinstance(pipe_images, list) and len(pipe_images) > 2:
-            vae_image_indices = [0, 1] if len(pipe_images) >= 2 else [0]
-    try:
-        result = pipe(
-            image=pipe_images,
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            height=height,
-            width=width,
-            num_inference_steps=steps,
-            generator=generator,
-            true_cfg_scale=guidance_scale,
-            vae_image_indices=vae_image_indices,
-            pad_to_canvas=bool(pad_to_canvas),
-        ).images[0]
-        return result, seed, result
-    finally:
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-@spaces.GPU
-def infer_example(input_image, prompt, lora_adapter):
-    if input_image is None:
-        return None, 0, None
-    input_pil = input_image.convert("RGB")
-    guidance_scale = 1.0
-    steps = 4
-    result, seed, last = infer(
-        input_pil,
-        None,
-        None,
-        prompt,
-        lora_adapter,
-        0,
-        True,
-        guidance_scale,
-        steps,
-        1.0,
-        True,
-        True,
-    )
-    return result, seed, last
-# ============================================================
-# UI
-# ============================================================
-css = """
-#col-container { margin: 0 auto; max-width: 960px; }
-#main-title h1 { font-size: 2.1em !important; }
 """
-aio_status_line = (
-    f"**AIO transformer version:** `{AIO_VERSION}`  "
-    f"({AIO_VERSION_SOURCE}; env `AIO_VERSION`={_AIO_ENV_RAW!r})"
-)
-with gr.Blocks() as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown("# **Qwen-Image-Edit-2511-LoRAs-Fast**", elem_id="main-title")
-        gr.Markdown(
-            "Perform diverse image edits using specialized "
-            "[LoRA](https://huggingface.co/models?other=base_model:adapter:Qwen/Qwen-Image-Edit-2511) adapters for the "
-            "[Qwen-Image-Edit](https://huggingface.co/Qwen/Qwen-Image-Edit-2511) model."
-        )
-        gr.Markdown(aio_status_line)
-        with gr.Row(equal_height=True):
-            with gr.Column():
-                input_image_1 = gr.Image(label="Upload Image 1 (Base / Target)", type="pil", height=290)
-                input_image_2 = gr.Image(label="Upload Reference (Image 2)", type="pil", height=290, visible=False)
-                input_images_extra = gr.Gallery(
-                    label="Upload Additional Images (auto-indexed after Image 1/2)",
-                    type="pil",
-                    height=290,
-                    columns=4,
-                    rows=2,
-                    interactive=True,
                 )
-                prompt = gr.Text(
-                    label="Edit Prompt",
-                    show_label=True,
-                    placeholder="e.g., transform into photo..",
                 )
-                with gr.Accordion("BFS Prompt Helper", open=False):
-                    with gr.Row():
-                        helper_max_tokens = gr.Slider(label="Max new tokens", minimum=64, maximum=1024, step=16, value=384)
-                        helper_temperature = gr.Slider(label="Temperature (0 = deterministic)", minimum=0.0, maximum=1.2, step=0.05, value=0.2)
-                    with gr.Row():
-                        strict_scrubber = gr.Checkbox(label="Strict scrubber (placeholder, no-op)", value=False)
-                        show_debug = gr.Checkbox(label="Show debug outputs", value=False)
-                    with gr.Row():
-                        btn_cap_base = gr.Button("Generate BASE signals (Pic1)", variant="secondary")
-                        btn_cap_donor = gr.Button("Generate DONOR traits (Pic2) (optional)", variant="secondary")
-                    with gr.Row():
-                        caption_pic1 = gr.Textbox(label="BASE signals (from Image 1)", lines=12, value="")
-                        caption_pic2 = gr.Textbox(label="DONOR traits (from Image 2) (optional)", lines=12, value="")
-                    with gr.Row():
-                        debug_base = gr.Textbox(label="Debug: raw BASE output", lines=8, visible=False)
-                        debug_donor = gr.Textbox(label="Debug: raw DONOR output", lines=8, visible=False)
-                    integration_mode = gr.Radio(
-                        label="How to apply addendum to the core prompt",
-                        choices=["Concatenate", "Inject (placeholder {BFS_ADDENDUM})"],
-                        value="Concatenate",
-                    )
-                    with gr.Row():
-                        btn_build_addendum = gr.Button("Build addendum + final prompt", variant="primary")
-                        btn_apply_final = gr.Button("Apply final prompt → Edit Prompt", variant="secondary")
-                    bfs_addendum = gr.Textbox(label="Built addendum (editable)", lines=10, value="")
-                    bfs_final_prompt = gr.Textbox(label="Final prompt preview (editable)", lines=10, value="")
-                    debug_builder = gr.Textbox(label="Debug: raw builder output", lines=8, visible=False)
-                run_button = gr.Button("Edit Image", variant="primary")
-            with gr.Column():
-                output_image = gr.Image(label="Output Image", interactive=False, format="png", height=353)
-                last_output = gr.State(value=None)
-                with gr.Row():
-                    btn_out_to_img1 = gr.Button("⬅️ Output → Image 1", variant="secondary")
-                    btn_out_to_img2 = gr.Button("⬅️ Output → Image 2", variant="secondary")
-                    btn_out_to_extra = gr.Button("➕ Output → Extra Ref", variant="secondary")
-                derived_preview = gr.Image(
-                    label="Derived Conditioning Preview",
-                    interactive=False,
-                    format="png",
-                    height=200,
-                    visible=False,
                 )
-                with gr.Row():
-                    lora_choices = [NONE_LORA] + list(ADAPTER_SPECS.keys())
-                    lora_adapter = gr.Dropdown(
-                        label="Choose Editing Style",
-                        choices=lora_choices,
-                        value=NONE_LORA,
-                    )
-                with gr.Accordion("Advanced Settings", open=False, visible=True):
-                    with gr.Accordion("Derived Conditioning (Depth)", open=False):
-                        derived_type = gr.Dropdown(
-                            label="Derived Type (from Image 1)",
-                            choices=["None", "Depth (Depth Anything V2 Small)"],
-                            value="None",
-                        )
-                        derived_use_gpu = gr.Checkbox(label="Use GPU for derived model", value=False)
-                        add_derived_btn = gr.Button("➕ Add derived ref to Extras (conditioning-only recommended)")
-                    seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
-                    randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
-                    guidance_scale = gr.Slider(label="Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=1.0)
-                    steps = gr.Slider(label="Inference Steps", minimum=1, maximum=50, step=1, value=4)
-                    target_megapixels = gr.Slider(
-                        label="Target Megapixels (canvas)",
-                        minimum=0.5,
-                        maximum=6.0,
-                        step=0.1,
-                        value=1.0,
-                    )
-                    extras_condition_only = gr.Checkbox(
-                        label="Extra references are conditioning-only (exclude from VAE)",
-                        value=True,
-                    )
-                    pad_to_canvas = gr.Checkbox(
-                        label="Pad images to canvas aspect (avoid warping)",
-                        value=True,
-                    )
-        # LoRA selection: preset prompt + toggle Image 2
-        lora_adapter.change(
-            fn=on_lora_change_ui,
-            inputs=[lora_adapter, prompt, extras_condition_only],
-            outputs=[prompt, input_image_2, extras_condition_only],
-        )
-        # Debug visibility toggles
-        show_debug.change(
-            fn=lambda x: (
-                gr.update(visible=bool(x)),
-                gr.update(visible=bool(x)),
-                gr.update(visible=bool(x)),
-            ),
-            inputs=[show_debug],
-            outputs=[debug_base, debug_donor, debug_builder],
-        )
-        # Caption buttons (single-image)
-        btn_cap_base.click(
-            fn=caption_base_pic1,
-            inputs=[input_image_1, helper_max_tokens, helper_temperature, strict_scrubber, show_debug],
-            outputs=[caption_pic1, debug_base],
-        )
-        btn_cap_donor.click(
-            fn=caption_donor_pic2,
-            inputs=[input_image_2, helper_max_tokens, helper_temperature, strict_scrubber, show_debug],
-            outputs=[caption_pic2, debug_donor],
-        )
-        # Builder (text-only)
-        btn_build_addendum.click(
-            fn=build_bfs_addendum_and_final_prompt,
-            inputs=[
-                prompt,
-                caption_pic1,
-                caption_pic2,
-                integration_mode,
-                helper_max_tokens,
-                helper_temperature,
-                show_debug,
-            ],
-            outputs=[bfs_addendum, bfs_final_prompt, debug_builder],
-        )
-        # Apply final prompt to the Edit Prompt box
-        btn_apply_final.click(
-            fn=lambda x: gr.update(value=x),
-            inputs=[bfs_final_prompt],
-            outputs=[prompt],
-        )
-        gr.Examples(
-            examples=[
-                ["examples/5.jpg", "Remove shadows and relight the image using soft lighting.", "Light-Restoration"],
-                ["examples/4.jpg", "Use a subtle golden-hour filter with smooth light diffusion.", "Relight"],
-                ["examples/2.jpeg", "Rotate the camera 45 degrees to the left.", "Multiple-Angles"],
-                ["examples/11.jpg", "Upscale this picture to 4K resolution.", "Upscale2K"],
-            ],
-            inputs=[input_image_1, prompt, lora_adapter],
-            outputs=[output_image, seed, last_output],
-            fn=infer_example,
-            cache_examples=False,
-            label="Examples",
-        )
-    run_button.click(
-        fn=infer,
         inputs=[
-            input_image_1,
-            input_image_2,
-            input_images_extra,
             prompt,
             lora_adapter,
             seed,
             randomize_seed,
-            guidance_scale,
             steps,
             target_megapixels,
             extras_condition_only,
-            pad_to_canvas,
         ],
-        outputs=[output_image, seed, last_output],
     )
-    # Output routing
-    btn_out_to_img1.click(fn=set_output_as_image1, inputs=[last_output], outputs=[input_image_1])
-    btn_out_to_img2.click(fn=set_output_as_image2, inputs=[last_output], outputs=[input_image_2])
-    btn_out_to_extra.click(fn=set_output_as_extra, inputs=[last_output, input_images_extra], outputs=[input_images_extra])
-    # Derived conditioning: append depth map
-    add_derived_btn.click(
-        fn=add_derived_ref,
-        inputs=[input_image_1, input_images_extra, derived_type, derived_use_gpu],
-        outputs=[input_images_extra, derived_preview],
-    )
-if __name__ == "__main__":
-    demo.queue(max_size=30).launch(
-        css=css,
-        theme=orange_red_theme,
-        mcp_server=True,
-        ssr_mode=False,
-        show_error=True,
-    )

+---
+## 2) `app.py`
+> Replace your existing `app.py` with this.
+>
+> Notes:
+> - **ViTPose removed** (no imports, no model loading)
+> - Depth conditioning is the only derived conditioning mode
+> - **Picture 1 / Picture 2** labels
+> - Output routing buttons included
+> - LCD step dropdown (32/56/112) controls both canvas snapping and pipeline snapping
+```python
 import os
 import re
 import gc
 import traceback
+import random
+from typing import Iterable, Optional
 import gradio as gr
 import numpy as np
 import spaces
 import torch
 from PIL import Image
+from transformers import AutoImageProcessor, AutoModelForDepthEstimation
 from huggingface_hub import hf_hub_download
 from safetensors.torch import load_file as safetensors_load_file
 from gradio.themes import Soft
     c950="#802200",
 )
 class OrangeRedTheme(Soft):
     def __init__(
         self,
             block_label_background_fill="*primary_200",
         )
 orange_red_theme = OrangeRedTheme()
 # ============================================================
 # ============================================================
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 print("CUDA_VISIBLE_DEVICES=", os.environ.get("CUDA_VISIBLE_DEVICES"))
 print("torch.__version__ =", torch.__version__)
 print("torch.version.cuda =", torch.version.cuda)
     print("device name:", torch.cuda.get_device_name(torch.cuda.current_device()))
 print("Using device:", device)
+dtype = torch.bfloat16
+MAX_SEED = np.iinfo(np.int32).max
 # ============================================================
 # AIO version (Space variable)
 # ============================================================
 AIO_REPO_ID = "Pr0f3ssi0n4ln00b/Phr00t-Qwen-Rapid-AIO"
 DEFAULT_AIO_VERSION = "v19"
 _VER_RE = re.compile(r"^v\d+$")
 _DIGITS_RE = re.compile(r"^\d+$")
 def _normalize_version(raw: str) -> Optional[str]:
     if raw is None:
         return None
         return f"v{s}"
     return None
 _AIO_ENV_RAW = os.environ.get("AIO_VERSION", "")
 _AIO_ENV_NORM = _normalize_version(_AIO_ENV_RAW)
 AIO_VERSION = _AIO_ENV_NORM or DEFAULT_AIO_VERSION
 AIO_VERSION_SOURCE = "env" if _AIO_ENV_NORM else "default(v19)"
 print(f"AIO_VERSION (env raw) = {_AIO_ENV_RAW!r}")
 print(f"AIO_VERSION (normalized) = {_AIO_ENV_NORM!r}")
 print(f"Using AIO_VERSION = {AIO_VERSION} ({AIO_VERSION_SOURCE})")
 from qwenimage.transformer_qwenimage import QwenImageTransformer2DModel
 from qwenimage.qwen_fa3_processor import QwenDoubleStreamAttnProcessorFA3
 def _load_pipe_with_version(version: str) -> QwenImageEditPlusPipeline:
     sub = f"{version}/transformer"
+    print(f"Loading AIO transformer: {AIO_REPO_ID} / {sub}")
     p = QwenImageEditPlusPipeline.from_pretrained(
         "Qwen/Qwen-Image-Edit-2511",
         transformer=QwenImageTransformer2DModel.from_pretrained(
     ).to(device)
     return p
 try:
     pipe = _load_pipe_with_version(AIO_VERSION)
 except Exception:
     print("❌ Failed to load requested AIO_VERSION. Falling back to v19.")
+    print("---- exception ----")
     print(traceback.format_exc())
+    print("-------------------")
     AIO_VERSION = DEFAULT_AIO_VERSION
     AIO_VERSION_SOURCE = "fallback_to_v19"
     pipe = _load_pipe_with_version(AIO_VERSION)
 except Exception as e:
     print(f"Warning: Could not set FA3 processor: {e}")
+# ============================================================
+# VAE tiling toggle (UI-controlled; OFF by default)
+# ============================================================
+def _apply_vae_tiling(enabled: bool):
+    """
+    Toggle VAE tiling on the global pipeline.
+    This does NOT require a Space restart; it applies to the next pipe(...) call.
+    """
+    try:
+        if enabled:
+            if hasattr(pipe, "enable_vae_tiling"):
+                pipe.enable_vae_tiling()
+                print("✅ VAE tiling ENABLED (per UI).")
+            elif hasattr(pipe, "vae") and hasattr(pipe.vae, "enable_tiling"):
+                pipe.vae.enable_tiling()
+                print("✅ VAE tiling ENABLED via pipe.vae.enable_tiling() (per UI).")
+            else:
+                print("⚠️ No enable_vae_tiling()/vae.enable_tiling() found; cannot enable.")
+        else:
+            if hasattr(pipe, "disable_vae_tiling"):
+                pipe.disable_vae_tiling()
+                print("VAE tiling DISABLED (per UI).")
+            elif hasattr(pipe, "vae") and hasattr(pipe.vae, "disable_tiling"):
+                pipe.vae.disable_tiling()
+                print("VAE tiling DISABLED via pipe.vae.disable_tiling() (per UI).")
+            else:
+                print("⚠️ No disable_vae_tiling()/vae.disable_tiling() found; leaving current state unchanged.")
+    except Exception as e:
+        print(f"⚠️ VAE tiling toggle failed: {e}")
 # ============================================================
+# Derived conditioning (Depth only) — ViTPose REMOVED
 # ============================================================
 DEPTH_MODEL_ID = "depth-anything/Depth-Anything-V2-Small-hf"
 _DEPTH_CACHE = {}
 def _derived_device(use_gpu: bool) -> torch.device:
     return torch.device("cuda" if (use_gpu and torch.cuda.is_available()) else "cpu")
 def _load_depth_models(dev: torch.device):
     key = str(dev)
     if key in _DEPTH_CACHE:
         return _DEPTH_CACHE[key]
     proc = AutoImageProcessor.from_pretrained(DEPTH_MODEL_ID)
     model = AutoModelForDepthEstimation.from_pretrained(DEPTH_MODEL_ID).to(dev)
     model.eval()
     _DEPTH_CACHE[key] = (proc, model)
     return _DEPTH_CACHE[key]
 def make_depth_map(img: Image.Image, *, use_gpu: bool) -> Image.Image:
     img = img.convert("RGB")
     dev = _derived_device(use_gpu)
     with torch.no_grad():
         out = model(**inputs)
+    pred = out.predicted_depth  # (B,H,W)
     pred = torch.nn.functional.interpolate(
         pred.unsqueeze(1),
         size=(img.height, img.width),
     depth8 = (arr * 255.0).clip(0, 255).astype(np.uint8)
     return Image.fromarray(depth8, mode="L").convert("RGB")
 # ============================================================
 # LoRA adapters + presets
 # ============================================================
     "AnyPose": {
         "type": "package",
         "requires_two_images": True,
+        "image2_label": "Picture 2 (Pose Reference)",
         "parts": [
             {
                 "repo": "lilylilith/AnyPose",
     "BFS-Best-FaceSwap": {
         "type": "single",
         "requires_two_images": True,
+        "image2_label": "Picture 2 (Head/Face Donor)",
         "repo": "Alissonerdx/BFS-Best-Face-Swap",
         "weights": "bfs_head_v5_2511_original.safetensors",
         "adapter_name": "BFS-Best-Faceswap",
     "BFS-Best-FaceSwap-merge": {
         "type": "single",
         "requires_two_images": True,
+        "image2_label": "Picture 2 (Head/Face Donor)",
         "repo": "Alissonerdx/BFS-Best-Face-Swap",
         "weights": "bfs_head_v5_2511_merged_version_rank_32_fp32.safetensors",
         "adapter_name": "BFS-Best-Faceswap-merge",
 LORA_PRESET_PROMPTS = {
     "Any2Real_2601": "change the picture 1 to realistic photograph",
     "Semirealistic-photo-detailer": "transform the image to semi-realistic image",
+    "AnyPose": (
+        "Make the person in image 1 do the exact same pose of the person in image 2. "
+        "Changing the style and background of the image of the person in image 1 is undesirable, so don't do it. "
+        "The new pose should be pixel accurate to the pose we are trying to copy. "
+        "Change the field of view and angle to match exactly image 2."
+    ),
+    "Hyperrealistic-Portrait": (
+        "Transform the image into an ultra-realistic photorealistic portrait with strict identity preservation, "
+        "facing straight to the camera. Enhance pore-level skin textures, realistic moisture effects, and natural wet hair clumping. "
+        "Use shallow depth of field with a clean background."
+    ),
+    "Ultrarealistic-Portrait": (
+        "Transform the image into an ultra-realistic glamour portrait while strictly preserving the subject’s identity. "
+        "Enhance cinematic directional lighting and keep realism without over-smoothing."
+    ),
     "Upscale2K": "Upscale this picture to 4K resolution.",
+    "BFS-Best-FaceSwap": (
+        "head_swap: start with Picture 1 as the base image. replace the head with Picture 2, preserving identity of Picture 2. "
+        "copy eye direction and micro-expressions from Picture 1. high quality, sharp details, 4k"
+    ),
+    "BFS-Best-FaceSwap-merge": (
+        "head_swap: start with Picture 1 as the base image. replace the head with Picture 2, preserving identity of Picture 2. "
+        "copy eye direction and micro-expressions from Picture 1. high quality, sharp details, 4k"
+    ),
 }
 LOADED_ADAPTERS = set()
 # Helpers: resolution
 # ============================================================
 def _round_to_multiple(x: int, m: int) -> int:
+    m = max(1, int(m))
     return max(m, (int(x) // m) * m)
+def compute_canvas_dimensions_from_area(image: Image.Image, target_area: int, multiple_of: int) -> tuple[int, int]:
+    w, h = image.size
+    aspect = w / h if h else 1.0
+    from qwenimage.pipeline_qwenimage_edit_plus import calculate_dimensions
+    width, height = calculate_dimensions(int(target_area), float(aspect), multiple=int(multiple_of))
+    width = _round_to_multiple(int(width), int(multiple_of))
+    height = _round_to_multiple(int(height), int(multiple_of))
+    return width, height
+def get_target_area_for_lora(image: Image.Image, lora_adapter: str, user_target_megapixels: float) -> int:
     spec = ADAPTER_SPECS.get(lora_adapter, {})
+    if "target_area" in spec:
+        try:
+            return int(spec["target_area"])
+        except Exception:
+            pass
+    if "target_megapixels" in spec:
+        try:
+            mp = float(spec["target_megapixels"])
+            return int(mp * 1024 * 1024)
+        except Exception:
+            pass
+    if "target_long_edge" in spec:
+        try:
+            long_edge = int(spec["target_long_edge"])
+            w, h = image.size
+            if w >= h:
+                new_w = long_edge
+                new_h = int(round(long_edge * (h / w)))
+            else:
+                new_h = long_edge
+                new_w = int(round(long_edge * (w / h)))
+            return int(new_w * new_h)
+        except Exception:
+            pass
+    return int(float(user_target_megapixels) * 1024 * 1024)
 # ============================================================
+# Helpers: gallery normalization
 # ============================================================
+def _to_pil_rgb(x) -> Optional[Image.Image]:
+    if x is None:
+        return None
+    if isinstance(x, tuple) and len(x) >= 1:
+        x = x[0]
+    if x is None:
+        return None
+    if isinstance(x, Image.Image):
+        return x.convert("RGB")
+    if isinstance(x, np.ndarray):
+        return Image.fromarray(x).convert("RGB")
+    try:
+        return Image.fromarray(np.array(x)).convert("RGB")
+    except Exception:
+        return None
+def _append_to_gallery(existing, new_img: Image.Image):
+    items = []
+    if existing:
+        for it in existing:
+            pil = _to_pil_rgb(it)
+            if pil is not None:
+                items.append(pil)
+    items.append(new_img)
+    return items
 def lora_requires_two_images(lora_adapter: str) -> bool:
+    return bool(ADAPTER_SPECS.get(lora_adapter, {}).get("requires_two_images", False))
+def image2_label_for_lora(lora_adapter: str) -> str:
+    return str(ADAPTER_SPECS.get(lora_adapter, {}).get("image2_label", "Picture 2"))
 # ============================================================
+# Helpers: BFS alpha key fix / strict filtering for merged safetensors
 # ============================================================
+def _inject_missing_alpha_keys(state_dict: dict) -> dict:
+    bases = {}
+    for k, v in state_dict.items():
+        if not isinstance(v, torch.Tensor):
+            continue
+        if k.endswith(".lora_down.weight") and v.ndim >= 1:
+            base = k[: -len(".lora_down.weight")]
+            rank = int(v.shape[0])
+            bases[base] = rank
+    for base, rank in bases.items():
+        alpha_tensor = torch.tensor(float(rank), dtype=torch.float32)
+        full_alpha = f"{base}.alpha"
+        if full_alpha not in state_dict:
+            state_dict[full_alpha] = alpha_tensor
+        if base.startswith("diffusion_model."):
+            stripped_base = base[len("diffusion_model.") :]
+            stripped_alpha = f"{stripped_base}.alpha"
+            if stripped_alpha not in state_dict:
+                state_dict[stripped_alpha] = alpha_tensor
+    return state_dict
+def _filter_to_diffusers_lora_keys(state_dict: dict) -> tuple[dict, dict]:
+    keep_suffixes = (
+        ".lora_up.weight",
+        ".lora_down.weight",
+        ".lora_mid.weight",
+        ".alpha",
+        ".lora_alpha",
+    )
+    dropped_patch = 0
+    dropped_other = 0
+    kept = 0
+    normalized_alpha = 0
+    out = {}
+    for k, v in state_dict.items():
+        if not isinstance(v, torch.Tensor):
+            dropped_other += 1
+            continue
+        if k.endswith(".diff") or k.endswith(".diff_b"):
+            dropped_patch += 1
+            continue
+        if not k.endswith(keep_suffixes):
+            dropped_other += 1
+            continue
+        if k.endswith(".lora_alpha"):
+            base = k[: -len(".lora_alpha")]
+            k2 = f"{base}.alpha"
+            out[k2] = v.float() if v.dtype != torch.float32 else v
+            normalized_alpha += 1
+            kept += 1
+            continue
+        out[k] = v
+        kept += 1
+    stats = {
+        "kept": kept,
+        "dropped_patch": dropped_patch,
+        "dropped_other": dropped_other,
+        "normalized_alpha": normalized_alpha,
+    }
+    return out, stats
+def _duplicate_stripped_prefix_keys(state_dict: dict, prefix: str = "diffusion_model.") -> dict:
+    out = dict(state_dict)
+    for k, v in list(state_dict.items()):
+        if not k.startswith(prefix):
+            continue
+        stripped = k[len(prefix) :]
+        if stripped not in out:
+            out[stripped] = v
+    return out
+def _load_lora_weights_with_fallback(repo: str, weight_name: str, adapter_name: str, needs_alpha_fix: bool = False):
+    try:
+        pipe.load_lora_weights(repo, weight_name=weight_name, adapter_name=adapter_name)
+        return
+    except (KeyError, ValueError) as e:
+        if not needs_alpha_fix:
+            raise
+        print(
+            "⚠️ LoRA load failed (will try safe dict fallback). "
+            f"Adapter={adapter_name!r} file={weight_name!r} error={type(e).__name__}: {e}"
+        )
+        local_path = hf_hub_download(repo_id=repo, filename=weight_name)
+        sd = safetensors_load_file(local_path)
+        sd = _inject_missing_alpha_keys(sd)
+        sd, stats = _filter_to_diffusers_lora_keys(sd)
+        sd = _duplicate_stripped_prefix_keys(sd)
+        print("LoRA dict stats:", stats)
+        pipe.load_lora_weights(sd, adapter_name=adapter_name)
+        return
+def _ensure_loaded_and_get_active_adapters(selected_lora: str):
+    spec = ADAPTER_SPECS.get(selected_lora)
+    if not spec:
+        raise gr.Error(f"Configuration not found for: {selected_lora}")
+    adapter_names = []
+    adapter_weights = []
+    if spec.get("type") == "package":
+        parts = spec.get("parts", [])
+        if not parts:
+            raise gr.Error(f"Package spec has no parts: {selected_lora}")
+        for part in parts:
+            repo = part["repo"]
+            weights = part["weights"]
+            name = part["adapter_name"]
+            strength = float(part.get("strength", 1.0))
+            needs_alpha_fix = bool(part.get("needs_alpha_fix", False))
+            if name not in LOADED_ADAPTERS:
+                _load_lora_weights_with_fallback(repo, weights, name, needs_alpha_fix=needs_alpha_fix)
+                LOADED_ADAPTERS.add(name)
+            adapter_names.append(name)
+            adapter_weights.append(strength)
     else:
+        repo = spec["repo"]
+        weights = spec["weights"]
+        name = spec["adapter_name"]
+        strength = float(spec.get("strength", 1.0))
+        needs_alpha_fix = bool(spec.get("needs_alpha_fix", False))
+        if name not in LOADED_ADAPTERS:
+            _load_lora_weights_with_fallback(repo, weights, name, needs_alpha_fix=needs_alpha_fix)
+            LOADED_ADAPTERS.add(name)
+        adapter_names.append(name)
+        adapter_weights.append(strength)
+    return adapter_names, adapter_weights
 # ============================================================
+# UI helpers
 # ============================================================
+def _fmt_img_info(img: Optional[Image.Image]) -> str:
+    if img is None:
+        return "—"
     w, h = img.size
+    mp = (w * h) / (1024 * 1024)
+    ar = (w / h) if h else 0
+    return f"**{w}×{h}** • **{mp:.2f} MP** • **AR {ar:.3f}**"
+def _bfs_tooltip(selected_lora: str) -> gr.Update:
+    if selected_lora in ("BFS-Best-FaceSwap", "BFS-Best-FaceSwap-merge"):
+        return gr.update(
+            visible=True,
+            value="ℹ️ **BFS FaceSwap:** Picture 1 = **Base** (scene), Picture 2 = **Donor** (head/face).",
+        )
+    if selected_lora == "AnyPose":
+        return gr.update(
+            visible=True,
+            value="ℹ️ **AnyPose:** Picture 1 = **Subject**, Picture 2 = **Pose reference**.",
+        )
+    return gr.update(visible=False, value="")
+# ============================================================
+# Inference
+# ============================================================
+def _seed_everything(seed: int):
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed)
 @spaces.GPU
+def infer(
+    img1: Image.Image,
+    img2: Optional[Image.Image],
+    extra_gallery,
+    prompt: str,
+    lora_adapter: str,
+    seed: int,
+    randomize_seed: bool,
+    guidance_scale: float,
+    steps: int,
+    target_megapixels: float,
+    use_input_area: bool,
+    keep_2x_output: bool,
+    vae_tiling: bool,
+    extras_condition_only: bool,
+    resolution_multiple: int,
+    vae_ref_megapixels: float,
+    use_depth: bool,
+    derived_on_gpu: bool,
 ):
     if img1 is None:
+        raise gr.Error("Picture 1 is required.")
+    img1 = img1.convert("RGB")
+    img2 = img2.convert("RGB") if img2 is not None else None
+    # Seed
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    seed = int(seed) % MAX_SEED
+    _seed_everything(seed)
+    # VAE tiling toggle
+    _apply_vae_tiling(bool(vae_tiling))
+    # Load / activate LoRA
+    if lora_adapter != NONE_LORA:
+        adapter_names, adapter_weights = _ensure_loaded_and_get_active_adapters(lora_adapter)
+        pipe.set_adapters(adapter_names, adapter_weights)
+    else:
+        try:
+            pipe.set_adapters([])
+        except Exception:
+            pass
+    # Images list: Picture1, Picture2 (optional), extras..., derived (optional)
+    images = [img1]
+    base_count = 1
+    if lora_requires_two_images(lora_adapter):
+        if img2 is None:
+            raise gr.Error(f"{lora_adapter} requires Picture 2.")
+        images.append(img2)
+        base_count = 2
+    else:
+        img2 = None  # ignore if not needed
+    extras = []
+    if extra_gallery:
+        for it in extra_gallery:
+            p = _to_pil_rgb(it)
+            if p is not None:
+                extras.append(p)
+    images.extend(extras)
+    derived_preview = None
+    derived_index = None
+    if use_depth:
+        derived_preview = make_depth_map(img1, use_gpu=bool(derived_on_gpu))
+        images.append(derived_preview)
+        derived_index = len(images) - 1
+    # Canvas sizing
+    res_mult = int(resolution_multiple)
+    if use_input_area or float(target_megapixels) <= 0.0:
+        target_area = int(img1.width * img1.height)
+    else:
+        target_area = int(get_target_area_for_lora(img1, lora_adapter, float(target_megapixels)))
+    base_w, base_h = compute_canvas_dimensions_from_area(img1, target_area, res_mult)
+    # Generate at 2x, then downsample unless keep_2x_output
+    gen_w, gen_h = int(base_w * 2), int(base_h * 2)
+    # Extra refs routing (VAE vs conditioning-only)
+    if extras_condition_only:
+        vae_indices = list(range(base_count))
+    else:
+        vae_indices = list(range(len(images)))
+    # Derived depth should ALWAYS be conditioning-only
+    if derived_index is not None and derived_index in vae_indices:
+        vae_indices = [i for i in vae_indices if i != derived_index]
+    # VAE ref size override for extras only
+    vae_ref_area = None
+    if float(vae_ref_megapixels) > 0.0:
+        vae_ref_area = int(float(vae_ref_megapixels) * 1024 * 1024)
+    # Run
+    out = pipe(
+        image=images,
+        prompt=prompt,
+        true_cfg_scale=float(guidance_scale),
+        num_inference_steps=int(steps),
+        width=int(gen_w),
+        height=int(gen_h),
+        pad_to_canvas=True,
+        vae_image_indices=vae_indices,
+        resolution_multiple=int(res_mult),
+        vae_ref_area=vae_ref_area,
+        vae_ref_start_index=int(base_count),
+        generator=torch.Generator(device=device).manual_seed(seed),
     )
+    result = out.images[0] if hasattr(out, "images") else out[0][0]
+    if isinstance(result, np.ndarray):
+        result = Image.fromarray(result)
+    result = result.convert("RGB")
+    if not keep_2x_output:
+        result = result.resize((base_w, base_h), Image.Resampling.LANCZOS)
+    # Cleanup
     gc.collect()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
+    return result, seed, derived_preview
+# ============================================================
+# UI
+# ============================================================
+def _on_lora_change(selected_lora: str):
+    # Prompt preset
+    preset = LORA_PRESET_PROMPTS.get(selected_lora, "")
+    prompt_update = gr.update(value=preset) if preset else gr.update()
+    # Picture 2 visibility/label
+    if lora_requires_two_images(selected_lora):
+        img2_update = gr.update(visible=True, label=image2_label_for_lora(selected_lora))
+    else:
+        img2_update = gr.update(visible=True, label="Picture 2")  # keep visible, but optional
+    tooltip_update = _bfs_tooltip(selected_lora)
+    return prompt_update, img2_update, tooltip_update
+def _out_to_pic1(out_img):
+    return gr.update(value=out_img)
+def _out_to_pic2(out_img):
+    return gr.update(value=out_img)
+def _out_to_extras(existing, out_img):
+    if out_img is None:
+        return gr.update()
+    return gr.update(value=_append_to_gallery(existing, out_img))
+with gr.Blocks(theme=orange_red_theme) as demo:
+    gr.Markdown(
+        f"""
+# Qwen Image Edit — Rapid AIO LoRAs (Merged)
+This experimental space for **QIE-2511** uses an extracted Rapid AIO transformer with LoRA support and extra routing features.
+**Enabled features**
+- Optional conditioning-only routing for extra reference latents
+- Uncapped canvas sizing (MP-based) + **2× generation with optional downsample**
+- Optional **VAE tiling** (for high resolutions)
+- Optional **Depth mapping** for conditioning
+- Optional output routing back to inputs
+**Active AIO version:** `{AIO_VERSION}` *(source: {AIO_VERSION_SOURCE})*
 """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            img1 = gr.Image(label="Picture 1", type="pil")
+            img1_info = gr.Markdown("—")
+            img2 = gr.Image(label="Picture 2", type="pil")
+            img2_info = gr.Markdown("—")
+            bfs_tip = gr.Markdown(visible=False)
+            extra_gallery = gr.Gallery(
+                label="Extra references (optional)",
+                columns=4,
+                height=180,
+            )
+            with gr.Row():
+                use_depth = gr.Checkbox(label="Use Depth conditioning (adds a derived reference)", value=False)
+                derived_on_gpu = gr.Checkbox(label="Run depth on GPU (if available)", value=True)
+            derived_preview = gr.Image(label="Derived conditioning preview", interactive=False, format="png")
+        with gr.Column(scale=1):
+            lora_adapter = gr.Dropdown(
+                label="LoRA",
+                choices=[NONE_LORA] + sorted(list(ADAPTER_SPECS.keys())),
+                value=NONE_LORA,
+            )
+            prompt = gr.Textbox(label="Prompt", lines=4, placeholder="Describe the edit…")
+            with gr.Row():
+                steps = gr.Slider(1, 80, value=40, step=1, label="Steps")
+                guidance = gr.Slider(1.0, 10.0, value=4.0, step=0.1, label="CFG (true_cfg_scale)")
+            with gr.Row():
+                resolution_multiple = gr.Dropdown(
+                    label="Resolution step (LCD lattice)",
+                    choices=[32, 56, 112],
+                    value=32,
                 )
+                vae_ref_megapixels = gr.Slider(
+                    0.0, 4.0, value=0.0, step=0.1,
+                    label="VAE ref MP override (extras only, 0 = off)"
                 )
+            with gr.Row():
+                target_megapixels = gr.Slider(
+                    0.0, 12.0, value=1.0, step=0.1,
+                    label="Canvas megapixels (0 = same as Picture 1)"
                 )
+                use_input_area = gr.Checkbox(label="Use Picture 1 pixel area", value=False)
+            with gr.Row():
+                keep_2x_output = gr.Checkbox(label="Keep 2× output (otherwise downsample)", value=False)
+                extras_condition_only = gr.Checkbox(label="Route extras as conditioning-only (no VAE)", value=True)
+            with gr.Row():
+                vae_tiling = gr.Checkbox(label="VAE tiling", value=False)
+                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+            seed = gr.Number(label="Seed", value=0, precision=0)
+            run_btn = gr.Button("Run", variant="primary")
+            out_img = gr.Image(label="Output", type="pil")
+            with gr.Row():
+                to_pic1 = gr.Button("Output → Picture 1")
+                to_pic2 = gr.Button("Output → Picture 2")
+                to_extras = gr.Button("Output → Extras (append)")
+    # Live info updates
+    img1.change(lambda x: _fmt_img_info(x), inputs=[img1], outputs=[img1_info])
+    img2.change(lambda x: _fmt_img_info(x), inputs=[img2], outputs=[img2_info])
+    # LoRA change
+    lora_adapter.change(_on_lora_change, inputs=[lora_adapter], outputs=[prompt, img2, bfs_tip])
+    # Run
+    run_btn.click(
+        infer,
         inputs=[
+            img1,
+            img2,
+            extra_gallery,
             prompt,
             lora_adapter,
             seed,
             randomize_seed,
+            guidance,
             steps,
             target_megapixels,
+            use_input_area,
+            keep_2x_output,
+            vae_tiling,
             extras_condition_only,
+            resolution_multiple,
+            vae_ref_megapixels,
+            use_depth,
+            derived_on_gpu,
         ],
+        outputs=[out_img, seed, derived_preview],
     )
+    # Output routing buttons
+    to_pic1.click(_out_to_pic1, inputs=[out_img], outputs=[img1])
+    to_pic2.click(_out_to_pic2, inputs=[out_img], outputs=[img2])
+    to_extras.click(_out_to_extras, inputs=[extra_gallery, out_img], outputs=[extra_gallery])
+demo.queue(max_size=32).launch()