""" UI-TARS API Client (Optimized) ⚡ ================================== عميل Python محسّن للتواصل مع UI-TARS API الاستخدام: from ui_tars_client import UITarsClient client = UITarsClient("https://your-space.hf.space") result = client.click_on("Search button", "screenshot.png") """ import base64 import time from typing import Optional, Dict, Any, List, Tuple from pathlib import Path try: import requests except ImportError: raise ImportError("Please install requests: pip install requests") try: from PIL import Image HAS_PIL = True except ImportError: HAS_PIL = False class UITarsClient: """ عميل محسّن للتفاعل مع UI-TARS API مثال: >>> client = UITarsClient("https://my-space.hf.space") >>> result = client.click_on("login button", "screenshot.png") >>> print(f"Action: {result['action']}") """ def __init__( self, base_url: str, api_key: Optional[str] = None, timeout: int = 60, max_retries: int = 3 ): """ تهيئة العميل Args: base_url: رابط API (مثال: https://your-space.hf.space) api_key: مفتاح API (اختياري) timeout: وقت الانتظار بالثواني max_retries: عدد محاولات إعادة الطلب """ self.base_url = base_url.rstrip('/') self.timeout = timeout self.max_retries = max_retries self.headers = {"Content-Type": "application/json"} if api_key: self.headers["Authorization"] = f"Bearer {api_key}" self._check_api() def _check_api(self): """التحقق من توفر API""" try: response = requests.get( f"{self.base_url}/health", headers=self.headers, timeout=10 ) if response.status_code == 200: data = response.json() if not data.get("api_available"): print("⚠️ Model is loading, please wait...") except Exception as e: print(f"⚠️ Warning: Could not connect to API: {e}") def _image_to_base64(self, image_path: str) -> str: """تحويل صورة إلى base64""" if isinstance(image_path, str): with open(image_path, "rb") as f: return base64.b64encode(f.read()).decode('utf-8') elif isinstance(image_path, bytes): return base64.b64encode(image_path).decode('utf-8') else: raise ValueError("image_path must be a file path or bytes") def _optimize_image(self, image_path: str, max_size: Tuple[int, int] = (1280, 720)) -> bytes: """تحسين حجم الصورة للسرعة الأفضل""" if not HAS_PIL: # إذا لم يكن PIL متاحاً، استخدم الصورة كما هي with open(image_path, "rb") as f: return f.read() img = Image.open(image_path) # إذا كانت الصورة أكبر من max_size، قلل حجمها if img.width > max_size[0] or img.height > max_size[1]: img.thumbnail(max_size, Image.Resampling.LANCZOS) from io import BytesIO buffer = BytesIO() img.save(buffer, format='PNG', optimize=True) return buffer.getvalue() def _make_request( self, method: str, endpoint: str, **kwargs ) -> Dict[str, Any]: """إرسال طلب مع إعادة المحاولة التلقائية""" url = f"{self.base_url}{endpoint}" for attempt in range(self.max_retries): try: if method == "GET": response = requests.get(url, headers=self.headers, timeout=self.timeout, **kwargs) elif method == "POST": response = requests.post(url, headers=self.headers, timeout=self.timeout, **kwargs) else: raise ValueError(f"Unsupported method: {method}") # إذا كان النموذج يحمّل، انتظر وأعد المحاولة if response.status_code == 503 or "loading" in response.text.lower(): if attempt < self.max_retries - 1: wait_time = 5 * (attempt + 1) print(f"⏳ Model loading... waiting {wait_time}s (attempt {attempt + 1}/{self.max_retries})") time.sleep(wait_time) continue response.raise_for_status() return response.json() except requests.exceptions.Timeout: if attempt < self.max_retries - 1: print(f"⏳ Timeout... retrying (attempt {attempt + 1}/{self.max_retries})") time.sleep(2) continue else: raise except requests.exceptions.RequestException as e: if attempt < self.max_retries - 1: print(f"⏳ Error... retrying (attempt {attempt + 1}/{self.max_retries})") time.sleep(2) continue else: raise raise Exception("Max retries exceeded") # ========== Helper Methods (طرق مساعدة سهلة) ========== def click_on( self, element: str, screenshot_path: str, optimize_image: bool = True ) -> Dict[str, Any]: """ انقر على عنصر في الشاشة Args: element: وصف العنصر (مثال: "login button", "search icon") screenshot_path: مسار صورة الشاشة optimize_image: تحسين حجم الصورة للسرعة Returns: نتيجة تحتوي على action و coordinates مثال: >>> result = client.click_on("submit button", "screen.png") >>> print(result['coordinates']) # {'x': 500, 'y': 300} """ if optimize_image: image_bytes = self._optimize_image(screenshot_path) image_b64 = base64.b64encode(image_bytes).decode('utf-8') else: image_b64 = self._image_to_base64(screenshot_path) return self.inference( instruction=f"Click on the {element}", image=image_b64, system_prompt_type="computer" ) def type_text( self, text: str, field_description: str, screenshot_path: str ) -> Dict[str, Any]: """ اكتب نصاً في حقل معين Args: text: النص المراد كتابته field_description: وصف الحقل (مثال: "username field", "search box") screenshot_path: مسار صورة الشاشة Returns: نتيجة الإجراء مثال: >>> result = client.type_text("john@example.com", "email field", "screen.png") """ image_b64 = self._image_to_base64(screenshot_path) return self.inference( instruction=f"Click on the {field_description} and type '{text}'", image=image_b64, system_prompt_type="computer" ) def find_element( self, element_description: str, screenshot_path: str, screen_width: int = 1920, screen_height: int = 1080 ) -> Optional[Dict[str, int]]: """ ابحث عن إحداثيات عنصر Args: element_description: وصف العنصر screenshot_path: مسار صورة الشاشة screen_width: عرض الشاشة screen_height: ارتفاع الشاشة Returns: إحداثيات العنصر أو None مثال: >>> coords = client.find_element("logout button", "screen.png") >>> print(f"Found at: {coords}") # {'x': 1800, 'y': 50} """ try: with open(screenshot_path, "rb") as f: files = {"image": (Path(screenshot_path).name, f, "image/png")} data = { "instruction": element_description, "image_width": screen_width, "image_height": screen_height } # إزالة Content-Type header للملفات headers = {k: v for k, v in self.headers.items() if k != "Content-Type"} response = requests.post( f"{self.base_url}/v1/grounding", files=files, data=data, headers=headers, timeout=self.timeout ) response.raise_for_status() result = response.json() return result.get("absolute_coordinates") except Exception as e: print(f"❌ Error finding element: {e}") return None # ========== Core API Methods ========== def health(self) -> Dict[str, Any]: """فحص صحة API""" return self._make_request("GET", "/health") def model_info(self) -> Dict[str, Any]: """الحصول على معلومات النموذج""" return self._make_request("GET", "/model/info") def inference( self, instruction: str, image: Optional[str] = None, system_prompt_type: str = "computer", temperature: float = 0.7, max_tokens: int = 2048 ) -> Dict[str, Any]: """ تنفيذ استدلال Args: instruction: التعليمات image: صورة بصيغة base64 (اختياري) system_prompt_type: نوع النظام (computer, mobile, grounding) temperature: درجة الحرارة max_tokens: أقصى عدد tokens Returns: نتيجة تحتوي على thought, action, coordinates """ payload = { "instruction": instruction, "system_prompt_type": system_prompt_type, "temperature": temperature, "max_tokens": max_tokens } if image: payload["image"] = image return self._make_request("POST", "/v1/inference", json=payload) def chat_completion( self, messages: List[Dict[str, Any]], temperature: float = 0.7, max_tokens: int = 2048 ) -> Dict[str, Any]: """ استدعاء متوافق مع OpenAI Args: messages: قائمة الرسائل temperature: درجة الحرارة max_tokens: أقصى عدد tokens Returns: استجابة بتنسيق OpenAI """ payload = { "model": "ui-tars-1.5-7b", "messages": messages, "temperature": temperature, "max_tokens": max_tokens } return self._make_request("POST", "/v1/chat/completions", json=payload) def batch_inference( self, requests: List[Dict[str, Any]] ) -> Dict[str, Any]: """ معالجة دفعة من الطلبات Args: requests: قائمة الطلبات Returns: نتائج جميع الطلبات """ payload = {"requests": requests} return self._make_request("POST", "/v1/batch/inference", json=payload) # ========== مثال على الاستخدام ========== if __name__ == "__main__": # استبدل بـ URL Space الخاص بك client = UITarsClient("http://localhost:7860") print("="*60) print("🚀 UI-TARS Client Demo") print("="*60) # 1. فحص الصحة print("\n1️⃣ Health Check:") health = client.health() print(f" Status: {health.get('status')}") print(f" API Available: {health.get('api_available')}") # 2. معلومات النموذج print("\n2️⃣ Model Info:") info = client.model_info() print(f" Model: {info.get('model_name')}") print(f" Type: {info.get('api_type')}") # 3. استدلال بسيط print("\n3️⃣ Simple Inference:") result = client.inference( instruction="Click on the start menu", system_prompt_type="computer" ) print(f" Action: {result.get('action')}") # 4. مثال مع صورة (إذا كان لديك صورة) # print("\n4️⃣ Click on element:") # result = client.click_on("login button", "screenshot.png") # print(f" Coordinates: {result.get('coordinates')}") print("\n" + "="*60) print("✅ Demo completed!") print("="*60)