import os
import gradio as gr
from fastapi import FastAPI, HTTPException
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import uvicorn

# ✅ Load Model Configuration
MODEL_NAME = "hpyapali/tinyllama-workout"
HF_TOKEN = os.getenv("HF_TOKEN", "your_huggingface_api_key")  # Replace with your actual Hugging Face API key

app = FastAPI()

try:
    print("🔄 Loading Model...")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, token=HF_TOKEN)
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
    print("✅ Model Loaded Successfully!")
except Exception as e:
    print(f"❌ Error loading model: {e}")
    pipe = None


# ✅ AI Function - Generates Structured Workout Recommendations
def recommend_next_workout(last_workouts: str):
    """
    Analyzes and ranks workouts based on intensity and heart rate drop.
    Provides a recommendation for the next workout.
    """
    if pipe is None:
        return "❌ AI model not loaded."

    instruction = (
        "You are a fitness AI assistant specializing in analyzing workout effectiveness. "
        "Based on the last 7 workouts, rank them from the most to least effective based on:\n"
        "- Heart rate drop after workout (faster drop = better recovery)\n"
        "- Workout intensity (higher effort = more impact)\n"
        "- Duration (longer workouts generally contribute more)\n"
        "- Calories burned (higher calories = higher impact)\n"
        "- Variability (mixing workout types is important)\n\n"
        "### Last 7 Workouts:\n"
    )

    full_prompt = instruction + last_workouts + "\n\n### Ranking (Best to Least Effective):\n"

    try:
        print(f"🧐 AI Processing: {full_prompt}")
        result = pipe(
            full_prompt, 
            max_new_tokens=150,  # 🔼 Increased token limit for full ranking
            do_sample=True,  # 🔼 Enabled sampling for variability
            temperature=0.7,  # 🔼 Slight randomness for better insights
            top_p=0.9  # 🔼 Limits unlikely outputs while keeping diversity
        )
        print(f"🔍 Raw AI Output: {result}")

        if not result or not result[0]["generated_text"].strip():
            return "❌ AI did not generate any output."

        response_text = result[0]["generated_text"].strip()

        # ✅ Remove repeated prompt if AI echoes it
        if full_prompt in response_text:
            response_text = response_text.replace(full_prompt, "").strip()

        print(f"✅ AI Recommendation: {response_text}")
        return response_text
    except Exception as e:
        print(f"❌ AI Processing Error: {e}")
        return "❌ Error generating workout recommendation."


# ✅ FastAPI Route - Returns AI Response Directly
@app.post("/gradio_api/call/predict")
async def predict(data: dict):
    try:
        last_workouts = data.get("data", [""])[0]
        if not last_workouts:
            raise HTTPException(status_code=400, detail="Invalid input")

        ai_response = recommend_next_workout(last_workouts)

        return {"data": [ai_response]}  # ✅ Directly returning structured response
    except Exception as e:
        return {"error": str(e)}


# ✅ Gradio UI (Optional for Testing)
iface = gr.Interface(
    fn=recommend_next_workout,
    inputs="text",
    outputs="text",
    title="TinyLlama Workout Recommendations",
    description="Enter workout data to receive AI-powered recommendations."
)

# ✅ Ensure Proper Gradio Launch
iface.launch(server_name="0.0.0.0", server_port=7860)

# ✅ FastAPI Server Execution
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=7860)