Professional Noob commited on
Commit
e0e7a89
·
verified ·
1 Parent(s): 5b5f568

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -198
app.py CHANGED
@@ -7,7 +7,8 @@ import numpy as np
7
  import spaces
8
  import torch
9
  import random
10
- from PIL import Image
 
11
  from typing import Iterable, Optional
12
 
13
  from transformers import (
@@ -136,7 +137,6 @@ def _normalize_version(raw: str) -> Optional[str]:
136
  return None
137
  if _VER_RE.fullmatch(s):
138
  return s
139
- # forgiving: allow "21" -> "v21"
140
  if _DIGITS_RE.fullmatch(s):
141
  return f"v{s}"
142
  return None
@@ -180,10 +180,9 @@ def _load_pipe_with_version(version: str) -> QwenImageEditPlusPipeline:
180
  return p
181
 
182
 
183
- # Forgiving load: try env/default version, fallback to v19 if it fails
184
  try:
185
  pipe = _load_pipe_with_version(AIO_VERSION)
186
- except Exception as e:
187
  print("❌ Failed to load requested AIO_VERSION. Falling back to v19.")
188
  print("---- exception ----")
189
  print(traceback.format_exc())
@@ -192,7 +191,6 @@ except Exception as e:
192
  AIO_VERSION_SOURCE = "fallback_to_v19"
193
  pipe = _load_pipe_with_version(AIO_VERSION)
194
 
195
- # Apply FA3 Optimization
196
  try:
197
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
198
  print("Flash Attention 3 Processor set successfully.")
@@ -202,47 +200,36 @@ except Exception as e:
202
  MAX_SEED = np.iinfo(np.int32).max
203
 
204
  # ============================================================
205
- # Derived conditioning (Pose + Depth)
206
- # - Pose: DWPose via rtmlib (ONNX). Includes face/hands in wholebody.
207
- # - Depth: Depth Anything V2 Small (Transformers-compatible)
208
  # ============================================================
209
 
210
- # Depth (Transformers)
211
  DEPTH_MODEL_ID = "depth-anything/Depth-Anything-V2-Small-hf"
212
 
213
- # Pose (rtmlib)
214
- _DWPOSE_CACHE = {} # key: "cpu" / "cuda" -> Wholebody instance
215
- _DEPTH_CACHE = {} # key: "cpu" / "cuda" -> (processor, model)
216
 
217
 
218
  def _derived_device(use_gpu: bool) -> torch.device:
219
  return torch.device("cuda" if (use_gpu and torch.cuda.is_available()) else "cpu")
220
 
221
 
222
- def _to_cv2_bgr(pil: Image.Image):
223
- # Avoid importing cv2 at module import time if not installed yet
224
- import cv2 # noqa: F401
225
- arr = np.array(pil.convert("RGB"))
226
- # RGB -> BGR
227
- return arr[:, :, ::-1].copy()
228
-
229
-
230
- def _bgr_to_pil_rgb(bgr: np.ndarray) -> Image.Image:
231
- rgb = bgr[:, :, ::-1]
232
- return Image.fromarray(rgb.astype(np.uint8), mode="RGB")
233
 
234
 
235
- def _load_dwpose_model(use_gpu: bool):
236
  """
237
- DWPose Wholebody via rtmlib.
238
-
239
- Notes:
240
- - This path avoids easy-dwpose (which hard-pins an old huggingface_hub).
241
- - Uses ONNXRuntime backend by default.
242
- - If user selects GPU, we try device='cuda'. If onnxruntime-gpu is not installed,
243
- rtmlib may raise; we catch and fall back to CPU.
244
  """
245
- key = "cuda" if (use_gpu and torch.cuda.is_available()) else "cpu"
246
  if key in _DWPOSE_CACHE:
247
  return _DWPOSE_CACHE[key]
248
 
@@ -250,29 +237,25 @@ def _load_dwpose_model(use_gpu: bool):
250
  from rtmlib import Wholebody
251
  except Exception as e:
252
  raise gr.Error(
253
- "Missing dependency for DWPose: rtmlib. "
254
- "Add `rtmlib`, `onnxruntime`, and OpenCV (headless recommended) to requirements.txt.\n"
255
  f"Import error: {e}"
256
  )
257
 
258
- backend = "onnxruntime"
259
- dev = "cuda" if key == "cuda" else "cpu"
260
-
261
  try:
262
- # to_openpose=True => OpenPose-style keypoint layout + drawing (incl face/hands for wholebody)
263
  model = Wholebody(
264
- to_openpose=True,
265
- mode="balanced", # 'performance'/'lightweight'/'balanced'
266
- backend=backend,
267
- device=dev,
268
  )
269
  except Exception as e:
270
- if key == "cuda":
271
- print(f"⚠️ rtmlib cuda init failed ({e}); falling back to CPU.")
272
  model = Wholebody(
273
- to_openpose=True,
274
- mode="balanced",
275
- backend=backend,
276
  device="cpu",
277
  )
278
  else:
@@ -282,72 +265,99 @@ def _load_dwpose_model(use_gpu: bool):
282
  return model
283
 
284
 
285
- def _load_depth_models(dev: torch.device):
286
- key = str(dev)
287
- if key in _DEPTH_CACHE:
288
- return _DEPTH_CACHE[key]
289
-
290
- proc = AutoImageProcessor.from_pretrained(DEPTH_MODEL_ID)
291
- model = AutoModelForDepthEstimation.from_pretrained(DEPTH_MODEL_ID).to(dev)
292
- model.eval()
293
-
294
- _DEPTH_CACHE[key] = (proc, model)
295
- return _DEPTH_CACHE[key]
296
-
297
-
298
- def make_dwpose_map(
299
  img: Image.Image,
300
  *,
301
  use_gpu: bool,
302
- max_people: int = 4,
303
  kp_thresh: float = 0.20,
 
 
304
  ) -> Image.Image:
305
  """
306
- Returns an OpenPose-style pose map (RGB) using rtmlib Wholebody (DWPose 133 kpts).
307
- Includes face + hands.
308
 
309
- We draw on a black canvas (like ControlNet pose maps).
 
 
 
310
  """
311
  img = img.convert("RGB")
312
- wb = _load_dwpose_model(use_gpu=bool(use_gpu))
 
 
 
313
 
314
- # rtmlib expects cv2 BGR image
315
- bgr = _to_cv2_bgr(img)
 
 
 
 
 
 
 
 
 
 
316
 
317
  try:
318
- keypoints, scores = wb(bgr)
 
 
319
  except Exception as e:
320
- # If anything goes wrong, fail gracefully
321
- print("⚠️ DWPose inference failed:", e)
322
- return Image.new("RGB", img.size, (0, 0, 0))
323
-
324
- # keypoints: (N, K, 2), scores: (N, K)
325
- if keypoints is None or len(keypoints) == 0:
326
- return Image.new("RGB", img.size, (0, 0, 0))
327
 
328
- # Limit people
329
  try:
330
- n = int(max_people)
331
- if n > 0 and keypoints.shape[0] > n:
332
- keypoints = keypoints[:n]
333
- scores = scores[:n]
334
- except Exception:
335
- pass
 
 
 
 
 
336
 
337
- # Draw on black canvas
338
  try:
339
  from rtmlib import draw_skeleton
340
  except Exception as e:
341
  raise gr.Error(f"rtmlib draw_skeleton import failed: {e}")
342
 
343
  canvas = np.zeros_like(bgr, dtype=np.uint8)
344
- # rtmlib uses kpt_thr threshold
345
- canvas = draw_skeleton(canvas, keypoints, scores, kpt_thr=float(kp_thresh))
346
- out = _bgr_to_pil_rgb(canvas)
347
- # Ensure same size as input
348
- if out.size != img.size:
349
- out = out.resize(img.size, Image.BILINEAR)
350
- return out
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351
 
352
 
353
  def make_depth_map(img: Image.Image, *, use_gpu: bool) -> Image.Image:
@@ -362,10 +372,7 @@ def make_depth_map(img: Image.Image, *, use_gpu: bool) -> Image.Image:
362
  with torch.no_grad():
363
  out = model(**inputs)
364
 
365
- # predicted_depth: (B, H, W)
366
  pred = out.predicted_depth
367
-
368
- # Upsample to original image size
369
  pred = torch.nn.functional.interpolate(
370
  pred.unsqueeze(1),
371
  size=(img.height, img.width),
@@ -463,7 +470,7 @@ ADAPTER_SPECS = {
463
  "weights": "bfs_head_v5_2511_original.safetensors",
464
  "adapter_name": "BFS-Best-Faceswap",
465
  "strength": 1.0,
466
- "needs_alpha_fix": True, # <-- fixes KeyError 'img_in.alpha'
467
  },
468
  "BFS-Best-FaceSwap-merge": {
469
  "type": "single",
@@ -473,7 +480,7 @@ ADAPTER_SPECS = {
473
  "weights": "bfs_head_v5_2511_merged_version_rank_32_fp32.safetensors",
474
  "adapter_name": "BFS-Best-Faceswap-merge",
475
  "strength": 1.1,
476
- "needs_alpha_fix": True, # <-- fixes KeyError 'img_in.alpha'
477
  },
478
  "F2P": {
479
  "type": "single",
@@ -559,16 +566,12 @@ LORA_PRESET_PROMPTS = {
559
  "BFS-Best-FaceSwap-merge": "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, and nose structure of Picture 2. copy the eye direction, head rotation, and micro-expressions from Picture 1. high quality, sharp details, 4k",
560
  }
561
 
562
- # Track what is currently loaded in memory (adapter_name values)
563
  LOADED_ADAPTERS = set()
564
 
565
  # ============================================================
566
  # Helpers: resolution
567
  # ============================================================
568
 
569
- # We prefer *area-based* sizing (≈ megapixels) over long-edge sizing.
570
- # This aligns better with Qwen-Image-Edit's internal assumptions and reduces FOV drift.
571
-
572
  def _round_to_multiple(x: int, m: int) -> int:
573
  return max(m, (int(x) // m) * m)
574
 
@@ -577,14 +580,9 @@ def compute_canvas_dimensions_from_area(
577
  target_area: int,
578
  multiple_of: int,
579
  ) -> tuple[int, int]:
580
- """Compute (width, height) that matches image aspect ratio and approximates target_area.
581
-
582
- The result is floored to be divisible by multiple_of (typically vae_scale_factor*2).
583
- """
584
  w, h = image.size
585
  aspect = w / h if h else 1.0
586
 
587
- # Use the pipeline's own area->(w,h) helper for consistency.
588
  from qwenimage.pipeline_qwenimage_edit_plus import calculate_dimensions
589
 
590
  width, height = calculate_dimensions(int(target_area), float(aspect))
@@ -597,13 +595,6 @@ def get_target_area_for_lora(
597
  lora_adapter: str,
598
  user_target_megapixels: float,
599
  ) -> int:
600
- """Return target pixel area for the canvas.
601
-
602
- Priority:
603
- 1) Adapter spec: target_area (pixels) or target_megapixels
604
- 2) Adapter spec: target_long_edge (legacy) -> converted to area using image aspect
605
- 3) User slider target megapixels
606
- """
607
  spec = ADAPTER_SPECS.get(lora_adapter, {})
608
 
609
  if "target_area" in spec:
@@ -619,7 +610,6 @@ def get_target_area_for_lora(
619
  except Exception:
620
  pass
621
 
622
- # Legacy support (e.g. Upscale2K)
623
  if "target_long_edge" in spec:
624
  try:
625
  long_edge = int(spec["target_long_edge"])
@@ -634,7 +624,6 @@ def get_target_area_for_lora(
634
  except Exception:
635
  pass
636
 
637
- # User default
638
  return int(float(user_target_megapixels) * 1024 * 1024)
639
 
640
  # ============================================================
@@ -644,67 +633,43 @@ def get_target_area_for_lora(
644
  def lora_requires_two_images(lora_adapter: str) -> bool:
645
  return bool(ADAPTER_SPECS.get(lora_adapter, {}).get("requires_two_images", False))
646
 
647
-
648
  def image2_label_for_lora(lora_adapter: str) -> str:
649
  return str(ADAPTER_SPECS.get(lora_adapter, {}).get("image2_label", "Upload Reference (Image 2)"))
650
 
651
-
652
  def _to_pil_rgb(x) -> Optional[Image.Image]:
653
- """
654
- Accepts PIL / numpy / (image, caption) tuples from gr.Gallery and returns PIL RGB.
655
- Gradio Gallery commonly yields tuples like (image, caption).
656
- """
657
  if x is None:
658
  return None
659
-
660
- # Gallery often returns (image, caption)
661
  if isinstance(x, tuple) and len(x) >= 1:
662
  x = x[0]
663
  if x is None:
664
  return None
665
-
666
  if isinstance(x, Image.Image):
667
  return x.convert("RGB")
668
-
669
  if isinstance(x, np.ndarray):
670
  return Image.fromarray(x).convert("RGB")
671
-
672
- # Best-effort fallback
673
  try:
674
  return Image.fromarray(np.array(x)).convert("RGB")
675
  except Exception:
676
  return None
677
 
678
-
679
  def build_labeled_images(
680
  img1: Image.Image,
681
  img2: Optional[Image.Image],
682
  extra_imgs: Optional[list[Image.Image]],
683
  ) -> dict[str, Image.Image]:
684
- """
685
- Creates labels image_1, image_2, image_3... based on what is actually uploaded:
686
- - img1 is always image_1
687
- - img2 becomes image_2 only if present
688
- - extras start immediately after the last present base box
689
- The pipeline receives images in this exact order.
690
- """
691
  labeled: dict[str, Image.Image] = {}
692
  idx = 1
693
-
694
  labeled[f"image_{idx}"] = img1
695
  idx += 1
696
-
697
  if img2 is not None:
698
  labeled[f"image_{idx}"] = img2
699
  idx += 1
700
-
701
  if extra_imgs:
702
  for im in extra_imgs:
703
  if im is None:
704
  continue
705
  labeled[f"image_{idx}"] = im
706
  idx += 1
707
-
708
  return labeled
709
 
710
  # ============================================================
@@ -712,17 +677,7 @@ def build_labeled_images(
712
  # ============================================================
713
 
714
  def _inject_missing_alpha_keys(state_dict: dict) -> dict:
715
- """
716
- Diffusers' Qwen LoRA converter expects '<module>.alpha' keys.
717
- BFS safetensors omits them. We inject alpha = rank (neutral scaling).
718
-
719
- IMPORTANT: diffusers may strip 'diffusion_model.' before lookup, so we
720
- inject BOTH:
721
- - diffusion_model.xxx.alpha
722
- - xxx.alpha
723
- """
724
  bases = {}
725
-
726
  for k, v in state_dict.items():
727
  if not isinstance(v, torch.Tensor):
728
  continue
@@ -733,22 +688,17 @@ def _inject_missing_alpha_keys(state_dict: dict) -> dict:
733
 
734
  for base, rank in bases.items():
735
  alpha_tensor = torch.tensor(float(rank), dtype=torch.float32)
736
-
737
  full_alpha = f"{base}.alpha"
738
  if full_alpha not in state_dict:
739
  state_dict[full_alpha] = alpha_tensor
740
-
741
  if base.startswith("diffusion_model."):
742
  stripped_base = base[len("diffusion_model.") :]
743
  stripped_alpha = f"{stripped_base}.alpha"
744
  if stripped_alpha not in state_dict:
745
  state_dict[stripped_alpha] = alpha_tensor
746
-
747
  return state_dict
748
 
749
-
750
  def _filter_to_diffusers_lora_keys(state_dict: dict) -> tuple[dict, dict]:
751
- """Return (filtered_state_dict, stats)."""
752
  keep_suffixes = (
753
  ".lora_up.weight",
754
  ".lora_down.weight",
@@ -756,7 +706,6 @@ def _filter_to_diffusers_lora_keys(state_dict: dict) -> tuple[dict, dict]:
756
  ".alpha",
757
  ".lora_alpha",
758
  )
759
-
760
  dropped_patch = 0
761
  dropped_other = 0
762
  kept = 0
@@ -767,15 +716,12 @@ def _filter_to_diffusers_lora_keys(state_dict: dict) -> tuple[dict, dict]:
767
  if not isinstance(v, torch.Tensor):
768
  dropped_other += 1
769
  continue
770
-
771
  if k.endswith(".diff") or k.endswith(".diff_b"):
772
  dropped_patch += 1
773
  continue
774
-
775
  if not k.endswith(keep_suffixes):
776
  dropped_other += 1
777
  continue
778
-
779
  if k.endswith(".lora_alpha"):
780
  base = k[: -len(".lora_alpha")]
781
  k2 = f"{base}.alpha"
@@ -783,7 +729,6 @@ def _filter_to_diffusers_lora_keys(state_dict: dict) -> tuple[dict, dict]:
783
  normalized_alpha += 1
784
  kept += 1
785
  continue
786
-
787
  out[k] = v
788
  kept += 1
789
 
@@ -795,7 +740,6 @@ def _filter_to_diffusers_lora_keys(state_dict: dict) -> tuple[dict, dict]:
795
  }
796
  return out, stats
797
 
798
-
799
  def _duplicate_stripped_prefix_keys(state_dict: dict, prefix: str = "diffusion_model.") -> dict:
800
  out = dict(state_dict)
801
  for k, v in list(state_dict.items()):
@@ -806,7 +750,6 @@ def _duplicate_stripped_prefix_keys(state_dict: dict, prefix: str = "diffusion_m
806
  out[stripped] = v
807
  return out
808
 
809
-
810
  def _load_lora_weights_with_fallback(repo: str, weight_name: str, adapter_name: str, needs_alpha_fix: bool = False):
811
  try:
812
  pipe.load_lora_weights(repo, weight_name=weight_name, adapter_name=adapter_name)
@@ -814,12 +757,10 @@ def _load_lora_weights_with_fallback(repo: str, weight_name: str, adapter_name:
814
  except (KeyError, ValueError) as e:
815
  if not needs_alpha_fix:
816
  raise
817
-
818
  print(
819
  "⚠️ LoRA load failed (will try safe dict fallback). "
820
  f"Adapter={adapter_name!r} file={weight_name!r} error={type(e).__name__}: {e}"
821
  )
822
-
823
  local_path = hf_hub_download(repo_id=repo, filename=weight_name)
824
  sd = safetensors_load_file(local_path)
825
 
@@ -832,11 +773,9 @@ def _load_lora_weights_with_fallback(repo: str, weight_name: str, adapter_name:
832
  f"kept={stats['kept']} dropped_patch={stats['dropped_patch']} "
833
  f"dropped_other={stats['dropped_other']} normalized_alpha={stats['normalized_alpha']}"
834
  )
835
-
836
  pipe.load_lora_weights(sd, adapter_name=adapter_name)
837
  return
838
 
839
-
840
  def _ensure_loaded_and_get_active_adapters(selected_lora: str):
841
  spec = ADAPTER_SPECS.get(selected_lora)
842
  if not spec:
@@ -849,7 +788,6 @@ def _ensure_loaded_and_get_active_adapters(selected_lora: str):
849
  parts = spec.get("parts", [])
850
  if not parts:
851
  raise gr.Error(f"Package spec has no parts: {selected_lora}")
852
-
853
  for part in parts:
854
  repo = part["repo"]
855
  weights = part["weights"]
@@ -871,10 +809,8 @@ def _ensure_loaded_and_get_active_adapters(selected_lora: str):
871
  raise gr.Error(f"Failed to load adapter part {selected_lora}/{adapter_name}: {e}")
872
  else:
873
  print(f"--- Adapter part already loaded: {selected_lora} / {adapter_name} ---")
874
-
875
  adapter_names.append(adapter_name)
876
  adapter_weights.append(strength)
877
-
878
  else:
879
  repo = spec["repo"]
880
  weights = spec["weights"]
@@ -902,13 +838,11 @@ def _ensure_loaded_and_get_active_adapters(selected_lora: str):
902
 
903
  return adapter_names, adapter_weights
904
 
905
-
906
  # ============================================================
907
  # UI handlers
908
  # ============================================================
909
 
910
  def on_lora_change_ui(selected_lora, current_prompt, current_extras_condition_only):
911
- # Preset prompt (fill only if empty)
912
  if selected_lora != NONE_LORA:
913
  preset = LORA_PRESET_PROMPTS.get(selected_lora, "")
914
  if preset and (current_prompt is None or str(current_prompt).strip() == ""):
@@ -918,13 +852,11 @@ def on_lora_change_ui(selected_lora, current_prompt, current_extras_condition_on
918
  else:
919
  prompt_update = gr.update(value=current_prompt)
920
 
921
- # Image2 visibility/label
922
  if lora_requires_two_images(selected_lora):
923
  img2_update = gr.update(visible=True, label=image2_label_for_lora(selected_lora))
924
  else:
925
  img2_update = gr.update(visible=False, value=None, label="Upload Reference (Image 2)")
926
 
927
- # Extra references routing default:
928
  if selected_lora in ("BFS-Best-FaceSwap", "BFS-Best-FaceSwap-merge", "AnyPose"):
929
  extras_update = gr.update(value=True)
930
  else:
@@ -941,21 +873,26 @@ def set_output_as_image1(last):
941
  raise gr.Error("No output available yet.")
942
  return gr.update(value=last)
943
 
944
-
945
  def set_output_as_image2(last):
946
  if last is None:
947
  raise gr.Error("No output available yet.")
948
  return gr.update(value=last)
949
 
950
-
951
  def set_output_as_extra(last, existing_extra):
952
  if last is None:
953
  raise gr.Error("No output available yet.")
954
  return _append_to_gallery(existing_extra, last)
955
 
956
-
957
  @spaces.GPU
958
- def add_derived_ref(img1, existing_extra, derived_type, derived_use_gpu, derived_max_people):
 
 
 
 
 
 
 
 
959
  if img1 is None:
960
  raise gr.Error("Please upload Image 1 first.")
961
 
@@ -964,12 +901,13 @@ def add_derived_ref(img1, existing_extra, derived_type, derived_use_gpu, derived
964
 
965
  base = img1.convert("RGB")
966
 
967
- if derived_type == "Pose (DWPose / rtmlib)":
968
- derived = make_dwpose_map(
969
  base,
970
  use_gpu=bool(derived_use_gpu),
971
- max_people=int(derived_max_people),
972
  kp_thresh=0.20,
 
 
973
  )
974
  elif derived_type == "Depth (Depth Anything V2 Small)":
975
  derived = make_depth_map(base, use_gpu=bool(derived_use_gpu))
@@ -979,7 +917,6 @@ def add_derived_ref(img1, existing_extra, derived_type, derived_use_gpu, derived
979
  new_gallery = _append_to_gallery(existing_extra, derived)
980
  return gr.update(value=new_gallery), gr.update(visible=True, value=derived)
981
 
982
-
983
  # ============================================================
984
  # Inference
985
  # ============================================================
@@ -988,7 +925,7 @@ def add_derived_ref(img1, existing_extra, derived_type, derived_use_gpu, derived
988
  def infer(
989
  input_image_1,
990
  input_image_2,
991
- input_images_extra, # gallery multi-image box
992
  prompt,
993
  lora_adapter,
994
  seed,
@@ -1007,7 +944,6 @@ def infer(
1007
  if input_image_1 is None:
1008
  raise gr.Error("Please upload Image 1.")
1009
 
1010
- # Handle "None"
1011
  if lora_adapter == NONE_LORA:
1012
  try:
1013
  pipe.set_adapters([], adapter_weights=[])
@@ -1030,7 +966,6 @@ def infer(
1030
  img1 = input_image_1.convert("RGB")
1031
  img2 = input_image_2.convert("RGB") if input_image_2 is not None else None
1032
 
1033
- # Normalize extra images (Gallery) to PIL RGB (handles tuples from Gallery)
1034
  extra_imgs: list[Image.Image] = []
1035
  if input_images_extra:
1036
  for item in input_images_extra:
@@ -1038,19 +973,15 @@ def infer(
1038
  if pil is not None:
1039
  extra_imgs.append(pil)
1040
 
1041
- # Enforce existing 2-image LoRA behavior (image_1 + image_2 required)
1042
  if lora_requires_two_images(lora_adapter) and img2 is None:
1043
  raise gr.Error("This LoRA needs two images. Please upload Image 2 as well.")
1044
 
1045
- # Label images as image_1, image_2, image_3...
1046
  labeled = build_labeled_images(img1, img2, extra_imgs)
1047
 
1048
- # Pass to pipeline in labeled order. Keep single-image call when only one is present.
1049
  pipe_images = list(labeled.values())
1050
  if len(pipe_images) == 1:
1051
  pipe_images = pipe_images[0]
1052
 
1053
- # Resolution derived from Image 1 (base/body/target)
1054
  target_area = get_target_area_for_lora(img1, lora_adapter, float(target_megapixels))
1055
  width, height = compute_canvas_dimensions_from_area(
1056
  img1,
@@ -1058,7 +989,6 @@ def infer(
1058
  multiple_of=int(pipe.vae_scale_factor * 2),
1059
  )
1060
 
1061
- # Decide which images participate in the VAE latent stream.
1062
  vae_image_indices = None
1063
  if extras_condition_only:
1064
  if isinstance(pipe_images, list) and len(pipe_images) > 2:
@@ -1066,8 +996,8 @@ def infer(
1066
 
1067
  try:
1068
  print(
1069
- "[DEBUG][infer] submitting request | "
1070
- f"lora_adapter={lora_adapter!r} seed={seed} prompt={prompt!r}"
1071
  )
1072
 
1073
  result = pipe(
@@ -1088,7 +1018,6 @@ def infer(
1088
  if torch.cuda.is_available():
1089
  torch.cuda.empty_cache()
1090
 
1091
-
1092
  @spaces.GPU
1093
  def infer_example(input_image, prompt, lora_adapter):
1094
  if input_image is None:
@@ -1099,7 +1028,6 @@ def infer_example(input_image, prompt, lora_adapter):
1099
  result, seed, last = infer(input_pil, None, None, prompt, lora_adapter, 0, True, guidance_scale, steps, 1.0, True, True)
1100
  return result, seed, last
1101
 
1102
-
1103
  # ============================================================
1104
  # UI
1105
  # ============================================================
@@ -1181,20 +1109,33 @@ with gr.Blocks() as demo:
1181
  label="Derived Type (from Image 1)",
1182
  choices=[
1183
  "None",
1184
- "Pose (DWPose / rtmlib)",
1185
  "Depth (Depth Anything V2 Small)",
1186
  ],
1187
  value="None",
1188
  )
1189
  derived_use_gpu = gr.Checkbox(label="Use GPU for derived model", value=False)
 
 
1190
  derived_max_people = gr.Slider(
1191
- label="Max people (pose)",
1192
  minimum=1,
1193
  maximum=10,
1194
  step=1,
1195
  value=4,
1196
  )
1197
- add_derived_btn = gr.Button("➕ Add derived ref to Extras (conditioning-only recommended)")
 
 
 
 
 
 
 
 
 
 
 
1198
 
1199
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
1200
  randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
@@ -1216,7 +1157,6 @@ with gr.Blocks() as demo:
1216
  value=True,
1217
  )
1218
 
1219
- # On LoRA selection: preset prompt + toggle Image 2
1220
  lora_adapter.change(
1221
  fn=on_lora_change_ui,
1222
  inputs=[lora_adapter, prompt, extras_condition_only],
@@ -1276,15 +1216,21 @@ with gr.Blocks() as demo:
1276
  outputs=[output_image, seed, last_output],
1277
  )
1278
 
1279
- # Output routing buttons
1280
  btn_out_to_img1.click(fn=set_output_as_image1, inputs=[last_output], outputs=[input_image_1])
1281
  btn_out_to_img2.click(fn=set_output_as_image2, inputs=[last_output], outputs=[input_image_2])
1282
  btn_out_to_extra.click(fn=set_output_as_extra, inputs=[last_output, input_images_extra], outputs=[input_images_extra])
1283
 
1284
- # Derived conditioning: append pose/depth map as extra ref (UI shows preview)
1285
  add_derived_btn.click(
1286
  fn=add_derived_ref,
1287
- inputs=[input_image_1, input_images_extra, derived_type, derived_use_gpu, derived_max_people],
 
 
 
 
 
 
 
 
1288
  outputs=[input_images_extra, derived_preview],
1289
  )
1290
 
 
7
  import spaces
8
  import torch
9
  import random
10
+ import cv2
11
+ from PIL import Image, ImageDraw
12
  from typing import Iterable, Optional
13
 
14
  from transformers import (
 
137
  return None
138
  if _VER_RE.fullmatch(s):
139
  return s
 
140
  if _DIGITS_RE.fullmatch(s):
141
  return f"v{s}"
142
  return None
 
180
  return p
181
 
182
 
 
183
  try:
184
  pipe = _load_pipe_with_version(AIO_VERSION)
185
+ except Exception:
186
  print("❌ Failed to load requested AIO_VERSION. Falling back to v19.")
187
  print("---- exception ----")
188
  print(traceback.format_exc())
 
191
  AIO_VERSION_SOURCE = "fallback_to_v19"
192
  pipe = _load_pipe_with_version(AIO_VERSION)
193
 
 
194
  try:
195
  pipe.transformer.set_attn_processor(QwenDoubleStreamAttnProcessorFA3())
196
  print("Flash Attention 3 Processor set successfully.")
 
200
  MAX_SEED = np.iinfo(np.int32).max
201
 
202
  # ============================================================
203
+ # Derived conditioning: DWPose + Depth
 
 
204
  # ============================================================
205
 
 
206
  DEPTH_MODEL_ID = "depth-anything/Depth-Anything-V2-Small-hf"
207
 
208
+ # Lazy caches keyed by device string ("cpu" / "cuda")
209
+ _DWPOSE_CACHE = {}
210
+ _DEPTH_CACHE = {}
211
 
212
 
213
  def _derived_device(use_gpu: bool) -> torch.device:
214
  return torch.device("cuda" if (use_gpu and torch.cuda.is_available()) else "cpu")
215
 
216
 
217
+ def _load_depth_models(dev: torch.device):
218
+ key = str(dev)
219
+ if key in _DEPTH_CACHE:
220
+ return _DEPTH_CACHE[key]
221
+ proc = AutoImageProcessor.from_pretrained(DEPTH_MODEL_ID)
222
+ model = AutoModelForDepthEstimation.from_pretrained(DEPTH_MODEL_ID).to(dev)
223
+ model.eval()
224
+ _DEPTH_CACHE[key] = (proc, model)
225
+ return _DEPTH_CACHE[key]
 
 
226
 
227
 
228
+ def _load_dwpose(use_gpu: bool, *, to_openpose: bool = True, mode: str = "balanced", backend: str = "onnxruntime"):
229
  """
230
+ DWPose-ish wholebody via rtmlib Wholebody (RTMW-DW by default in rtmlib downloads).
 
 
 
 
 
 
231
  """
232
+ key = ("cuda" if (use_gpu and torch.cuda.is_available()) else "cpu", bool(to_openpose), str(mode), str(backend))
233
  if key in _DWPOSE_CACHE:
234
  return _DWPOSE_CACHE[key]
235
 
 
237
  from rtmlib import Wholebody
238
  except Exception as e:
239
  raise gr.Error(
240
+ "rtmlib not available. Add `rtmlib` to requirements.txt.\n"
 
241
  f"Import error: {e}"
242
  )
243
 
244
+ device_str = "cuda" if (use_gpu and torch.cuda.is_available()) else "cpu"
 
 
245
  try:
 
246
  model = Wholebody(
247
+ to_openpose=bool(to_openpose),
248
+ mode=str(mode),
249
+ backend=str(backend),
250
+ device=device_str,
251
  )
252
  except Exception as e:
253
+ if device_str == "cuda":
254
+ print(f"⚠️ rtmlib Wholebody CUDA init failed: {e} -> falling back to CPU")
255
  model = Wholebody(
256
+ to_openpose=bool(to_openpose),
257
+ mode=str(mode),
258
+ backend=str(backend),
259
  device="cpu",
260
  )
261
  else:
 
265
  return model
266
 
267
 
268
+ def make_dwpose_map_debug(
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  img: Image.Image,
270
  *,
271
  use_gpu: bool,
 
272
  kp_thresh: float = 0.20,
273
+ to_openpose: bool = True,
274
+ openpose_skeleton: Optional[bool] = None,
275
  ) -> Image.Image:
276
  """
277
+ Run rtmlib Wholebody and attempt to draw with rtmlib.draw_skeleton,
278
+ BUT includes verbose debugging prints so we can see shapes / K.
279
 
280
+ IMPORTANT:
281
+ - If to_openpose=True, outputs commonly have K=134 (openpose wholebody).
282
+ - If to_openpose=False, outputs commonly have K=133 (coco wholebody).
283
+ - rtmlib.draw_skeleton needs correct openpose_skeleton flag AND correct shape.
284
  """
285
  img = img.convert("RGB")
286
+ wb = _load_dwpose(use_gpu=bool(use_gpu), to_openpose=bool(to_openpose))
287
+ bgr = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
288
+
289
+ keypoints, scores = wb(bgr)
290
 
291
+ # -------------------- DEBUGGER --------------------
292
+ kps = np.asarray(keypoints)
293
+ sc = np.asarray(scores)
294
+
295
+ print("[DWPose debug] keypoints type:", type(keypoints), "scores type:", type(scores))
296
+ print("[DWPose debug] kps.shape:", getattr(kps, "shape", None), "dtype:", getattr(kps, "dtype", None))
297
+ print("[DWPose debug] sc.shape :", getattr(sc, "shape", None), "dtype:", getattr(sc, "dtype", None))
298
+
299
+ if isinstance(keypoints, list):
300
+ print("[DWPose debug] keypoints list len:", len(keypoints))
301
+ if isinstance(scores, list):
302
+ print("[DWPose debug] scores list len:", len(scores))
303
 
304
  try:
305
+ if hasattr(kps, "shape") and len(kps.shape) >= 2:
306
+ K = kps.shape[-2] # works for (K,2) and (N,K,2)
307
+ print("[DWPose debug] inferred K (num keypoints):", int(K))
308
  except Exception as e:
309
+ print("[DWPose debug] could not infer K:", e)
 
 
 
 
 
 
310
 
 
311
  try:
312
+ if kps.ndim == 3:
313
+ print("[DWPose debug] first person first 3 kpts:", kps[0, :3, :])
314
+ if sc.ndim >= 2:
315
+ print("[DWPose debug] first person first 3 scores:", sc[0, :3])
316
+ elif kps.ndim == 2:
317
+ print("[DWPose debug] first 3 kpts:", kps[:3, :])
318
+ if sc.ndim >= 1:
319
+ print("[DWPose debug] first 3 scores:", sc[:3])
320
+ except Exception as e:
321
+ print("[DWPose debug] sample print failed:", e)
322
+ # ------------------ END DEBUGGER ------------------
323
 
324
+ # Attempt to draw (this is what currently errors for you)
325
  try:
326
  from rtmlib import draw_skeleton
327
  except Exception as e:
328
  raise gr.Error(f"rtmlib draw_skeleton import failed: {e}")
329
 
330
  canvas = np.zeros_like(bgr, dtype=np.uint8)
331
+
332
+ # IMPORTANT: draw_skeleton in your pasted code infers skeleton by num_keypoints,
333
+ # but also needs correct openpose_skeleton flag depending on whether K is openpose-style.
334
+ # For debug run, we allow:
335
+ # - openpose_skeleton override if provided
336
+ # - else: default to 'to_openpose' (best guess)
337
+ if openpose_skeleton is None:
338
+ openpose_skeleton = bool(to_openpose)
339
+
340
+ # Normalize shapes BEFORE calling draw_skeleton so it doesn't mis-read K as 2
341
+ kps2 = np.asarray(keypoints)
342
+ sc2 = np.asarray(scores)
343
+
344
+ # If single instance comes back as (K,2) we must expand before draw_skeleton reads shape[1]
345
+ if kps2.ndim == 2 and kps2.shape[-1] == 2:
346
+ kps2 = kps2[None, :, :]
347
+ if sc2.ndim == 1:
348
+ sc2 = sc2[None, :]
349
+
350
+ # Now call rtmlib's draw
351
+ out = draw_skeleton(
352
+ canvas,
353
+ kps2,
354
+ sc2,
355
+ openpose_skeleton=bool(openpose_skeleton),
356
+ kpt_thr=float(kp_thresh),
357
+ )
358
+
359
+ out_rgb = cv2.cvtColor(out, cv2.COLOR_BGR2RGB)
360
+ return Image.fromarray(out_rgb)
361
 
362
 
363
  def make_depth_map(img: Image.Image, *, use_gpu: bool) -> Image.Image:
 
372
  with torch.no_grad():
373
  out = model(**inputs)
374
 
 
375
  pred = out.predicted_depth
 
 
376
  pred = torch.nn.functional.interpolate(
377
  pred.unsqueeze(1),
378
  size=(img.height, img.width),
 
470
  "weights": "bfs_head_v5_2511_original.safetensors",
471
  "adapter_name": "BFS-Best-Faceswap",
472
  "strength": 1.0,
473
+ "needs_alpha_fix": True,
474
  },
475
  "BFS-Best-FaceSwap-merge": {
476
  "type": "single",
 
480
  "weights": "bfs_head_v5_2511_merged_version_rank_32_fp32.safetensors",
481
  "adapter_name": "BFS-Best-Faceswap-merge",
482
  "strength": 1.1,
483
+ "needs_alpha_fix": True,
484
  },
485
  "F2P": {
486
  "type": "single",
 
566
  "BFS-Best-FaceSwap-merge": "head_swap: start with Picture 1 as the base image, keeping its lighting, environment, and background. remove the head from Picture 1 completely and replace it with the head from Picture 2, strictly preserving the hair, eye color, and nose structure of Picture 2. copy the eye direction, head rotation, and micro-expressions from Picture 1. high quality, sharp details, 4k",
567
  }
568
 
 
569
  LOADED_ADAPTERS = set()
570
 
571
  # ============================================================
572
  # Helpers: resolution
573
  # ============================================================
574
 
 
 
 
575
  def _round_to_multiple(x: int, m: int) -> int:
576
  return max(m, (int(x) // m) * m)
577
 
 
580
  target_area: int,
581
  multiple_of: int,
582
  ) -> tuple[int, int]:
 
 
 
 
583
  w, h = image.size
584
  aspect = w / h if h else 1.0
585
 
 
586
  from qwenimage.pipeline_qwenimage_edit_plus import calculate_dimensions
587
 
588
  width, height = calculate_dimensions(int(target_area), float(aspect))
 
595
  lora_adapter: str,
596
  user_target_megapixels: float,
597
  ) -> int:
 
 
 
 
 
 
 
598
  spec = ADAPTER_SPECS.get(lora_adapter, {})
599
 
600
  if "target_area" in spec:
 
610
  except Exception:
611
  pass
612
 
 
613
  if "target_long_edge" in spec:
614
  try:
615
  long_edge = int(spec["target_long_edge"])
 
624
  except Exception:
625
  pass
626
 
 
627
  return int(float(user_target_megapixels) * 1024 * 1024)
628
 
629
  # ============================================================
 
633
  def lora_requires_two_images(lora_adapter: str) -> bool:
634
  return bool(ADAPTER_SPECS.get(lora_adapter, {}).get("requires_two_images", False))
635
 
 
636
  def image2_label_for_lora(lora_adapter: str) -> str:
637
  return str(ADAPTER_SPECS.get(lora_adapter, {}).get("image2_label", "Upload Reference (Image 2)"))
638
 
 
639
  def _to_pil_rgb(x) -> Optional[Image.Image]:
 
 
 
 
640
  if x is None:
641
  return None
 
 
642
  if isinstance(x, tuple) and len(x) >= 1:
643
  x = x[0]
644
  if x is None:
645
  return None
 
646
  if isinstance(x, Image.Image):
647
  return x.convert("RGB")
 
648
  if isinstance(x, np.ndarray):
649
  return Image.fromarray(x).convert("RGB")
 
 
650
  try:
651
  return Image.fromarray(np.array(x)).convert("RGB")
652
  except Exception:
653
  return None
654
 
 
655
  def build_labeled_images(
656
  img1: Image.Image,
657
  img2: Optional[Image.Image],
658
  extra_imgs: Optional[list[Image.Image]],
659
  ) -> dict[str, Image.Image]:
 
 
 
 
 
 
 
660
  labeled: dict[str, Image.Image] = {}
661
  idx = 1
 
662
  labeled[f"image_{idx}"] = img1
663
  idx += 1
 
664
  if img2 is not None:
665
  labeled[f"image_{idx}"] = img2
666
  idx += 1
 
667
  if extra_imgs:
668
  for im in extra_imgs:
669
  if im is None:
670
  continue
671
  labeled[f"image_{idx}"] = im
672
  idx += 1
 
673
  return labeled
674
 
675
  # ============================================================
 
677
  # ============================================================
678
 
679
  def _inject_missing_alpha_keys(state_dict: dict) -> dict:
 
 
 
 
 
 
 
 
 
680
  bases = {}
 
681
  for k, v in state_dict.items():
682
  if not isinstance(v, torch.Tensor):
683
  continue
 
688
 
689
  for base, rank in bases.items():
690
  alpha_tensor = torch.tensor(float(rank), dtype=torch.float32)
 
691
  full_alpha = f"{base}.alpha"
692
  if full_alpha not in state_dict:
693
  state_dict[full_alpha] = alpha_tensor
 
694
  if base.startswith("diffusion_model."):
695
  stripped_base = base[len("diffusion_model.") :]
696
  stripped_alpha = f"{stripped_base}.alpha"
697
  if stripped_alpha not in state_dict:
698
  state_dict[stripped_alpha] = alpha_tensor
 
699
  return state_dict
700
 
 
701
  def _filter_to_diffusers_lora_keys(state_dict: dict) -> tuple[dict, dict]:
 
702
  keep_suffixes = (
703
  ".lora_up.weight",
704
  ".lora_down.weight",
 
706
  ".alpha",
707
  ".lora_alpha",
708
  )
 
709
  dropped_patch = 0
710
  dropped_other = 0
711
  kept = 0
 
716
  if not isinstance(v, torch.Tensor):
717
  dropped_other += 1
718
  continue
 
719
  if k.endswith(".diff") or k.endswith(".diff_b"):
720
  dropped_patch += 1
721
  continue
 
722
  if not k.endswith(keep_suffixes):
723
  dropped_other += 1
724
  continue
 
725
  if k.endswith(".lora_alpha"):
726
  base = k[: -len(".lora_alpha")]
727
  k2 = f"{base}.alpha"
 
729
  normalized_alpha += 1
730
  kept += 1
731
  continue
 
732
  out[k] = v
733
  kept += 1
734
 
 
740
  }
741
  return out, stats
742
 
 
743
  def _duplicate_stripped_prefix_keys(state_dict: dict, prefix: str = "diffusion_model.") -> dict:
744
  out = dict(state_dict)
745
  for k, v in list(state_dict.items()):
 
750
  out[stripped] = v
751
  return out
752
 
 
753
  def _load_lora_weights_with_fallback(repo: str, weight_name: str, adapter_name: str, needs_alpha_fix: bool = False):
754
  try:
755
  pipe.load_lora_weights(repo, weight_name=weight_name, adapter_name=adapter_name)
 
757
  except (KeyError, ValueError) as e:
758
  if not needs_alpha_fix:
759
  raise
 
760
  print(
761
  "⚠️ LoRA load failed (will try safe dict fallback). "
762
  f"Adapter={adapter_name!r} file={weight_name!r} error={type(e).__name__}: {e}"
763
  )
 
764
  local_path = hf_hub_download(repo_id=repo, filename=weight_name)
765
  sd = safetensors_load_file(local_path)
766
 
 
773
  f"kept={stats['kept']} dropped_patch={stats['dropped_patch']} "
774
  f"dropped_other={stats['dropped_other']} normalized_alpha={stats['normalized_alpha']}"
775
  )
 
776
  pipe.load_lora_weights(sd, adapter_name=adapter_name)
777
  return
778
 
 
779
  def _ensure_loaded_and_get_active_adapters(selected_lora: str):
780
  spec = ADAPTER_SPECS.get(selected_lora)
781
  if not spec:
 
788
  parts = spec.get("parts", [])
789
  if not parts:
790
  raise gr.Error(f"Package spec has no parts: {selected_lora}")
 
791
  for part in parts:
792
  repo = part["repo"]
793
  weights = part["weights"]
 
809
  raise gr.Error(f"Failed to load adapter part {selected_lora}/{adapter_name}: {e}")
810
  else:
811
  print(f"--- Adapter part already loaded: {selected_lora} / {adapter_name} ---")
 
812
  adapter_names.append(adapter_name)
813
  adapter_weights.append(strength)
 
814
  else:
815
  repo = spec["repo"]
816
  weights = spec["weights"]
 
838
 
839
  return adapter_names, adapter_weights
840
 
 
841
  # ============================================================
842
  # UI handlers
843
  # ============================================================
844
 
845
  def on_lora_change_ui(selected_lora, current_prompt, current_extras_condition_only):
 
846
  if selected_lora != NONE_LORA:
847
  preset = LORA_PRESET_PROMPTS.get(selected_lora, "")
848
  if preset and (current_prompt is None or str(current_prompt).strip() == ""):
 
852
  else:
853
  prompt_update = gr.update(value=current_prompt)
854
 
 
855
  if lora_requires_two_images(selected_lora):
856
  img2_update = gr.update(visible=True, label=image2_label_for_lora(selected_lora))
857
  else:
858
  img2_update = gr.update(visible=False, value=None, label="Upload Reference (Image 2)")
859
 
 
860
  if selected_lora in ("BFS-Best-FaceSwap", "BFS-Best-FaceSwap-merge", "AnyPose"):
861
  extras_update = gr.update(value=True)
862
  else:
 
873
  raise gr.Error("No output available yet.")
874
  return gr.update(value=last)
875
 
 
876
  def set_output_as_image2(last):
877
  if last is None:
878
  raise gr.Error("No output available yet.")
879
  return gr.update(value=last)
880
 
 
881
  def set_output_as_extra(last, existing_extra):
882
  if last is None:
883
  raise gr.Error("No output available yet.")
884
  return _append_to_gallery(existing_extra, last)
885
 
 
886
  @spaces.GPU
887
+ def add_derived_ref(
888
+ img1,
889
+ existing_extra,
890
+ derived_type,
891
+ derived_use_gpu,
892
+ derived_max_people, # kept for UI compatibility; not used by dwpose here
893
+ derived_dwpose_to_openpose,
894
+ derived_dwpose_openpose_flag,
895
+ ):
896
  if img1 is None:
897
  raise gr.Error("Please upload Image 1 first.")
898
 
 
901
 
902
  base = img1.convert("RGB")
903
 
904
+ if derived_type == "Pose (DWPose / rtmlib) [DEBUG]":
905
+ derived = make_dwpose_map_debug(
906
  base,
907
  use_gpu=bool(derived_use_gpu),
 
908
  kp_thresh=0.20,
909
+ to_openpose=bool(derived_dwpose_to_openpose),
910
+ openpose_skeleton=(None if derived_dwpose_openpose_flag == "Auto" else (derived_dwpose_openpose_flag == "True")),
911
  )
912
  elif derived_type == "Depth (Depth Anything V2 Small)":
913
  derived = make_depth_map(base, use_gpu=bool(derived_use_gpu))
 
917
  new_gallery = _append_to_gallery(existing_extra, derived)
918
  return gr.update(value=new_gallery), gr.update(visible=True, value=derived)
919
 
 
920
  # ============================================================
921
  # Inference
922
  # ============================================================
 
925
  def infer(
926
  input_image_1,
927
  input_image_2,
928
+ input_images_extra,
929
  prompt,
930
  lora_adapter,
931
  seed,
 
944
  if input_image_1 is None:
945
  raise gr.Error("Please upload Image 1.")
946
 
 
947
  if lora_adapter == NONE_LORA:
948
  try:
949
  pipe.set_adapters([], adapter_weights=[])
 
966
  img1 = input_image_1.convert("RGB")
967
  img2 = input_image_2.convert("RGB") if input_image_2 is not None else None
968
 
 
969
  extra_imgs: list[Image.Image] = []
970
  if input_images_extra:
971
  for item in input_images_extra:
 
973
  if pil is not None:
974
  extra_imgs.append(pil)
975
 
 
976
  if lora_requires_two_images(lora_adapter) and img2 is None:
977
  raise gr.Error("This LoRA needs two images. Please upload Image 2 as well.")
978
 
 
979
  labeled = build_labeled_images(img1, img2, extra_imgs)
980
 
 
981
  pipe_images = list(labeled.values())
982
  if len(pipe_images) == 1:
983
  pipe_images = pipe_images[0]
984
 
 
985
  target_area = get_target_area_for_lora(img1, lora_adapter, float(target_megapixels))
986
  width, height = compute_canvas_dimensions_from_area(
987
  img1,
 
989
  multiple_of=int(pipe.vae_scale_factor * 2),
990
  )
991
 
 
992
  vae_image_indices = None
993
  if extras_condition_only:
994
  if isinstance(pipe_images, list) and len(pipe_images) > 2:
 
996
 
997
  try:
998
  print(
999
+ "[DEBUG][infer] submitting request | "
1000
+ f"lora_adapter={lora_adapter!r} seed={seed} prompt={prompt!r}"
1001
  )
1002
 
1003
  result = pipe(
 
1018
  if torch.cuda.is_available():
1019
  torch.cuda.empty_cache()
1020
 
 
1021
  @spaces.GPU
1022
  def infer_example(input_image, prompt, lora_adapter):
1023
  if input_image is None:
 
1028
  result, seed, last = infer(input_pil, None, None, prompt, lora_adapter, 0, True, guidance_scale, steps, 1.0, True, True)
1029
  return result, seed, last
1030
 
 
1031
  # ============================================================
1032
  # UI
1033
  # ============================================================
 
1109
  label="Derived Type (from Image 1)",
1110
  choices=[
1111
  "None",
1112
+ "Pose (DWPose / rtmlib) [DEBUG]",
1113
  "Depth (Depth Anything V2 Small)",
1114
  ],
1115
  value="None",
1116
  )
1117
  derived_use_gpu = gr.Checkbox(label="Use GPU for derived model", value=False)
1118
+
1119
+ # kept for UI compatibility (not used by dwpose here)
1120
  derived_max_people = gr.Slider(
1121
+ label="Max people (unused for dwpose)",
1122
  minimum=1,
1123
  maximum=10,
1124
  step=1,
1125
  value=4,
1126
  )
1127
+
1128
+ derived_dwpose_to_openpose = gr.Checkbox(
1129
+ label="DWPose output: to_openpose=True (likely K=134)",
1130
+ value=True,
1131
+ )
1132
+ derived_dwpose_openpose_flag = gr.Dropdown(
1133
+ label="draw_skeleton openpose_skeleton flag",
1134
+ choices=["Auto", "True", "False"],
1135
+ value="Auto",
1136
+ )
1137
+
1138
+ add_derived_btn = gr.Button("➕ Add derived ref to Extras (and print debug to logs)")
1139
 
1140
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
1141
  randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
 
1157
  value=True,
1158
  )
1159
 
 
1160
  lora_adapter.change(
1161
  fn=on_lora_change_ui,
1162
  inputs=[lora_adapter, prompt, extras_condition_only],
 
1216
  outputs=[output_image, seed, last_output],
1217
  )
1218
 
 
1219
  btn_out_to_img1.click(fn=set_output_as_image1, inputs=[last_output], outputs=[input_image_1])
1220
  btn_out_to_img2.click(fn=set_output_as_image2, inputs=[last_output], outputs=[input_image_2])
1221
  btn_out_to_extra.click(fn=set_output_as_extra, inputs=[last_output, input_images_extra], outputs=[input_images_extra])
1222
 
 
1223
  add_derived_btn.click(
1224
  fn=add_derived_ref,
1225
+ inputs=[
1226
+ input_image_1,
1227
+ input_images_extra,
1228
+ derived_type,
1229
+ derived_use_gpu,
1230
+ derived_max_people,
1231
+ derived_dwpose_to_openpose,
1232
+ derived_dwpose_openpose_flag,
1233
+ ],
1234
  outputs=[input_images_extra, derived_preview],
1235
  )
1236