# -*- coding: utf-8 -*-
"""
tools.step032_translation_llm
OpenAI-compatible client wrapper for Boson Hackathon endpoint (Qwen models).

Public API (kept stable):
    init_llm_model(model_name: str | None = None) -> None
    llm_response(messages: list[dict], device: str = "auto") -> str

Back-compat alias:
    openai_response(messages: list[dict]) -> str
"""

from __future__ import annotations

import os
import random
import time
from typing import List, Dict, Any, Optional, Tuple

from dotenv import load_dotenv
from loguru import logger
from openai import OpenAI
from openai import APIError

load_dotenv()

MODEL_NAME: str = os.getenv("QWEN_TRANSLATION_MODEL", "Qwen3-32B-thinking-Hackathon")
BOSON_API_KEY: str = os.getenv("BOSON_API_KEY", "")
BOSON_BASE_URL: str = os.getenv("BOSON_BASE_URL", "https://hackathon.boson.ai/v1")

DEFAULT_TIMEOUT = int(os.getenv("QWEN_TIMEOUT", "240"))
MAX_RETRIES = int(os.getenv("QWEN_MAX_RETRIES", "4"))
INITIAL_BACKOFF = float(os.getenv("QWEN_INITIAL_BACKOFF", "0.7"))

if not BOSON_API_KEY:
    logger.warning("BOSON_API_KEY is not set; calls will fail until provided.")

_client: Optional[OpenAI] = None
_model_logged: Optional[str] = None

def _get_client() -> OpenAI:
    global _client
    if _client is None:
        _client = OpenAI(api_key=BOSON_API_KEY, base_url=BOSON_BASE_URL)
    return _client

def init_llm_model(model_name: Optional[str] = None) -> None:
    global _model_logged
    chosen = model_name or MODEL_NAME
    if _model_logged != chosen:
        _model_logged = chosen
        logger.info(f"[LLM init] Using hosted model: {chosen} @ {BOSON_BASE_URL}")

def _sanitize_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    cleaned: List[Dict[str, Any]] = []
    for m in messages:
        if not isinstance(m, dict):
            continue
        role = m.get("role")
        content = m.get("content")
        if role and isinstance(content, str) and content.strip():
            cleaned.append({"role": role, "content": content})
    return cleaned or messages

def _should_retry(e: Exception) -> Tuple[bool, str]:
    if isinstance(e, APIError):
        sc = getattr(e, "status_code", None)
        if sc in (429, 408) or (sc is not None and sc >= 500):
            return True, f"HTTP {sc}"
        return False, f"HTTP {sc}"
    etxt = str(e).lower()
    transient = any(k in etxt for k in [
        "temporarily", "timeout", "timed out", "connection reset",
        "connection aborted", "server disconnected", "remote end closed",
        "read error", "write error", "unreachable", "rate limit"
    ])
    return (True, "transient") if transient else (False, "non-transient")

def _backoff_sleep(attempt: int) -> None:
    delay = INITIAL_BACKOFF * (2 ** attempt) + random.random() * 0.25
    time.sleep(delay)

def _chat_completion(
    messages: List[Dict[str, Any]],
    model: Optional[str] = None,
    timeout: Optional[int] = DEFAULT_TIMEOUT,
    extra_body: Optional[Dict[str, Any]] = None,
) -> str:
    if not BOSON_API_KEY:
        raise RuntimeError("BOSON_API_KEY is missing. Set it in your .env")

    chosen_model = model or MODEL_NAME
    init_llm_model(chosen_model)

    msgs = _sanitize_messages(messages)
    kwargs: Dict[str, Any] = {
        "model": chosen_model,
        "messages": msgs,
        "timeout": timeout,
        "extra_body": {
            "temperature": 0,
            "top_p": 1,
            "seed": 0,
            "max_tokens": int(os.getenv("QWEN_MAX_TOKENS", "256")),
        }
    }
    if extra_body:
        kwargs["extra_body"].update(extra_body)

    client = _get_client()
    last_err: Optional[Exception] = None

    for attempt in range(MAX_RETRIES + 1):
        try:
            resp = client.chat.completions.create(**kwargs)
            content = getattr(resp.choices[0].message, "content", "") or ""
            if isinstance(content, list):
                content = " ".join(
                    seg.get("text", "") if isinstance(seg, dict) else str(seg)
                    for seg in content
                )
            return str(content).strip()
        except Exception as e:
            last_err = e
            do_retry, reason = _should_retry(e)
            if attempt < MAX_RETRIES and do_retry:
                logger.debug(f"[LLM] retry {attempt+1}/{MAX_RETRIES} due to {reason}: {e}")
                _backoff_sleep(attempt)
                continue
            logger.warning(f"[LLM] final failure after {attempt+1} attempt(s): {e}")
            break

    return ""

def llm_response(messages: List[Dict[str, Any]], device: str = "auto") -> str:
    return _chat_completion(messages)

def openai_response(messages: List[Dict[str, Any]]) -> str:
    return _chat_completion(messages)

if __name__ == "__main__":
    test = [{"role": "user", "content": "Reply with a single word: ok"}]
    logger.info(f"Using model: {MODEL_NAME} @ {BOSON_BASE_URL}")
    print(llm_response(test))