"""
ContractorNegotiationEnv — OpenEnv 0.2.1 compatible environment.

Real-world negotiation environment where the agent negotiates with multiple
contractors to achieve the lowest price. Demonstrates transfer of Diplomacy
skills (coalition pressure, multi-party bluffing) to real-world negotiation.

Swap guide:
    To change the use case, subclass or copy this file and modify:
    - _get_state_text()  → change the scenario description  
    - _compute_reward()  → change what counts as a good outcome
    - reset()            → change the "parties" and their attributes
    Everything else (obs space, action space, training loop) stays identical.
"""

import random
import numpy as np

from openenv.env import Env
from sentence_transformers import SentenceTransformer


class ContractorNegotiationEnv(Env):
    def __init__(self, n_contractors=5, budget=10000, seed=None):
        self.n_contractors = n_contractors
        self.budget = budget
        self.encoder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
        if seed:
            random.seed(seed)
            np.random.seed(seed)
        self.contractors = {}
        self.round = 0
        self.max_rounds = 10
        self.accepted = False

    def reset(self):
        self.contractors = {}
        for i in range(self.n_contractors):
            floor = random.randint(int(self.budget * 0.5), int(self.budget * 0.75))
            ask = random.randint(int(self.budget * 0.8), int(self.budget * 1.1))
            urgency = random.random()
            self.contractors[f"Contractor_{i}"] = {
                "ask": ask,
                "floor": floor,
                "urgency": urgency,
                "rounds": 0,
                "active": True,
                "at_floor": False
            }
        self.round = 0
        self.accepted = False
        obs = self._get_observation()
        return obs, {"round": 0, "best_offer": self._best_offer()}

    def step(self, action: str):
        self.round += 1
        action_lower = action.lower()

        for name, c in self.contractors.items():
            if not c["active"]:
                continue
            c["rounds"] += 1
            drop_rate = 0.05 + (0.1 * c["urgency"])

            if any(w in action_lower for w in ["competing", "other", "beat", "match", "offer"]):
                drop_rate *= 1.5
            if any(w in action_lower for w in ["pressure", "budget", "lower", "reduce"]):
                drop_rate *= 1.2

            new_ask = max(c["floor"], int(c["ask"] * (1 - drop_rate)))
            c["ask"] = new_ask

            if c["ask"] <= c["floor"] * 1.02:
                c["at_floor"] = True

        if "accept" in action_lower:
            self.accepted = True

        reward = self._compute_reward(action_lower)
        obs = self._get_observation()
        done = self.accepted or self.round >= self.max_rounds

        info = {
            "round": self.round,
            "best_offer": self._best_offer(),
            "savings": self.budget - self._best_offer(),
            "savings_pct": (self.budget - self._best_offer()) / self.budget * 100,
            "action_logged": action
        }
        return obs, reward, done, info

    def _compute_reward(self, action_lower):
        best = self._best_offer()
        savings_pct = (self.budget - best) / self.budget
        reward = 0.0

        if savings_pct > 0.20:
            reward += 2.0
        elif savings_pct > 0.15:
            reward += 1.5
        elif savings_pct > 0.10:
            reward += 1.0
        elif savings_pct > 0.05:
            reward += 0.5

        if any(w in action_lower for w in ["competing", "other offer", "beat", "match"]):
            reward += 0.5

        if self.accepted and savings_pct > 0.10:
            reward += 1.0

        if self.round >= self.max_rounds and not self.accepted:
            reward -= 1.0

        return float(reward)

    def _best_offer(self):
        active = [c["ask"] for c in self.contractors.values() if c["active"]]
        return min(active) if active else self.budget

    def _get_state_text(self):
        best = self._best_offer()
        lines = [
            "CONTRACTOR NEGOTIATION STATE",
            f"Round: {self.round}/{self.max_rounds}",
            f"Budget: ${self.budget:,}",
            f"Best current offer: ${best:,}",
            f"Potential savings: ${self.budget - best:,}",
            "",
            "Active contractors:"
        ]
        for name, c in self.contractors.items():
            if not c["active"]:
                continue
            urgency = "HIGH" if c["urgency"] > 0.6 else "LOW"
            lines.append(
                f"  {name}: asking ${c['ask']:,} | urgency: {urgency} | rounds negotiated: {c['rounds']}"
            )
        lines += [
            "",
            f"Rounds remaining: {self.max_rounds - self.round}",
            "What is your next negotiation move?"
        ]
        return "\n".join(lines)

    def _get_observation(self):
        text = self._get_state_text()
        emb = self.encoder.encode(text, convert_to_numpy=True)
        return emb.astype(np.float32)

    def render(self):
        text = self._get_state_text()
        print(text)
        return text

    def close(self):
        print("ContractorNegotiationEnv closed.")

    @property
    def observation_space(self):
        return {"type": "continuous", "shape": (384,), "dtype": "float32"}

    @property
    def action_space(self):
        return {"type": "text", "description": "Natural language negotiation move"}