{ "zen_model": "zen-omni", "base_model": "Qwen3-Omni", "size": "7B", "type": "multimodal", "modalities": ["text", "vision", "audio"], "architecture": "Qwen3OmniForConditionalGeneration", "description": "Multimodal model based on Qwen3-Omni supporting text, image, and audio inputs" }