import streamlit as st
import json
import os
import time
from io import BytesIO
from PIL import Image
from pathlib import Path

from geo_bot import GeoBot, AGENT_PROMPT_TEMPLATE
from benchmark import MapGuesserBenchmark
from config import MODELS_CONFIG, get_data_paths, SUCCESS_THRESHOLD_KM
from langchain_openai import ChatOpenAI
from langchain_anthropic import ChatAnthropic
from langchain_google_genai import ChatGoogleGenerativeAI
from hf_chat import HuggingFaceChat

# Simple API key setup
if "OPENAI_API_KEY" in st.secrets:
    os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
if "ANTHROPIC_API_KEY" in st.secrets:
    os.environ["ANTHROPIC_API_KEY"] = st.secrets["ANTHROPIC_API_KEY"]
if "GOOGLE_API_KEY" in st.secrets:
    os.environ["GOOGLE_API_KEY"] = st.secrets["GOOGLE_API_KEY"]
if "HF_TOKEN" in st.secrets:
    os.environ["HF_TOKEN"] = st.secrets["HF_TOKEN"]


def get_available_datasets():
    datasets_dir = Path("datasets")
    if not datasets_dir.exists():
        return ["default"]
    datasets = []
    for dataset_dir in datasets_dir.iterdir():
        if dataset_dir.is_dir():
            data_paths = get_data_paths(dataset_dir.name)
            if os.path.exists(data_paths["golden_labels"]):
                datasets.append(dataset_dir.name)
    return datasets if datasets else ["default"]


def get_model_class(class_name):
    if class_name == "ChatOpenAI":
        return ChatOpenAI
    elif class_name == "ChatAnthropic":
        return ChatAnthropic
    elif class_name == "ChatGoogleGenerativeAI":
        return ChatGoogleGenerativeAI
    elif class_name == "HuggingFaceChat":
        return HuggingFaceChat
    else:
        raise ValueError(f"Unknown model class: {class_name}")


# UI Setup
st.set_page_config(page_title="MapCrunch AI Agent", layout="wide")
st.title("🗺️ MapCrunch AI Agent")

# Sidebar
with st.sidebar:
    st.header("Configuration")

    dataset_choice = st.selectbox("Dataset", get_available_datasets())
    model_choice = st.selectbox("Model", list(MODELS_CONFIG.keys()))
    steps_per_sample = st.slider("Max Steps", 3, 20, 10)

    # Load dataset
    data_paths = get_data_paths(dataset_choice)
    with open(data_paths["golden_labels"], "r") as f:
        golden_labels = json.load(f).get("samples", [])

    st.info(f"Dataset has {len(golden_labels)} samples")
    num_samples = st.slider(
        "Samples to Test", 1, len(golden_labels), min(3, len(golden_labels))
    )

    start_button = st.button("🚀 Start", type="primary")

# Main Logic
if start_button:
    test_samples = golden_labels[:num_samples]
    config = MODELS_CONFIG[model_choice]
    model_class = get_model_class(config["class"])

    benchmark_helper = MapGuesserBenchmark(dataset_name=dataset_choice)
    all_results = []

    progress_bar = st.progress(0)

    with GeoBot(
        model=model_class, model_name=config["model_name"], headless=True
    ) as bot:
        for i, sample in enumerate(test_samples):
            st.divider()
            st.header(f"Sample {i + 1}/{num_samples} - ID: {sample.get('id', 'N/A')}")

            bot.controller.load_location_from_data(sample)
            bot.controller.setup_clean_environment()

            # Create scrollable container for this sample
            sample_container = st.container()

            with sample_container:
                # Initialize step tracking
                history = []
                final_guess = None

                for step in range(steps_per_sample):
                    step_num = step + 1

                    # Create step container
                    with st.container():
                        st.subheader(f"Step {step_num}/{steps_per_sample}")

                        # Take screenshot and show
                        bot.controller.label_arrows_on_screen()
                        screenshot_bytes = bot.controller.take_street_view_screenshot()

                        col1, col2 = st.columns([1, 2])

                        with col1:
                            st.image(
                                screenshot_bytes,
                                caption=f"What AI sees",
                                use_column_width=True,
                            )

                        with col2:
                            # Build history for AI
                            current_step = {
                                "image_b64": bot.pil_to_base64(
                                    Image.open(BytesIO(screenshot_bytes))
                                ),
                                "action": "N/A",
                            }
                            history.append(current_step)

                            available_actions = bot.controller.get_available_actions()
                            history_text = "\n".join(
                                [
                                    f"Step {j + 1}: {h['action']}"
                                    for j, h in enumerate(history[:-1])
                                ]
                            )
                            if not history_text:
                                history_text = "First step."

                            prompt = AGENT_PROMPT_TEMPLATE.format(
                                remaining_steps=steps_per_sample - step,
                                history_text=history_text,
                                available_actions=json.dumps(available_actions),
                            )

                            # Show AI context
                            st.write("**Available Actions:**")
                            st.code(json.dumps(available_actions, indent=2))

                            st.write("**AI Context:**")
                            st.text_area(
                                "History",
                                history_text,
                                height=100,
                                disabled=True,
                                key=f"history_{i}_{step}",
                            )

                            # Force guess on last step or get AI decision
                            if step_num == steps_per_sample:
                                action = "GUESS"
                                st.warning("Max steps reached. Forcing GUESS.")
                            else:
                                # Get AI response
                                message = bot._create_message_with_history(
                                    prompt, [h["image_b64"] for h in history]
                                )
                                response = bot.model.invoke(message)
                                decision = bot._parse_agent_response(response)

                                if decision is None:
                                    raise ValueError(
                                        f"Failed to parse AI response: {response.content}"
                                    )

                                action = decision["action_details"]["action"]
                                history[-1]["action"] = action

                                # Show AI decision
                                st.write("**AI Reasoning:**")
                                st.info(decision.get("reasoning", "N/A"))

                                st.write("**AI Action:**")
                                st.success(f"`{action}`")

                                # Show raw response
                                with st.expander("Raw AI Response"):
                                    st.text(response.content)

                        # Execute action
                        if action == "GUESS":
                            if step_num == steps_per_sample:
                                # Forced guess - use fallback coordinates
                                lat, lon = 0.0, 0.0
                                st.error("Forced guess with fallback coordinates")
                            else:
                                lat = decision.get("action_details", {}).get("lat")
                                lon = decision.get("action_details", {}).get("lon")

                            if lat is not None and lon is not None:
                                final_guess = (lat, lon)
                                st.success(f"Final Guess: {lat:.4f}, {lon:.4f}")
                            break
                        elif action == "MOVE_FORWARD":
                            bot.controller.move("forward")
                        elif action == "MOVE_BACKWARD":
                            bot.controller.move("backward")
                        elif action == "PAN_LEFT":
                            bot.controller.pan_view("left")
                        elif action == "PAN_RIGHT":
                            bot.controller.pan_view("right")

                        # Auto scroll to bottom
                        st.empty()  # Force refresh to show latest content
                        time.sleep(1)

                # Sample Results
                st.subheader("Sample Result")
                true_coords = {"lat": sample.get("lat"), "lng": sample.get("lng")}
                distance_km = None
                is_success = False

                if final_guess:
                    distance_km = benchmark_helper.calculate_distance(
                        true_coords, final_guess
                    )
                    if distance_km is not None:
                        is_success = distance_km <= SUCCESS_THRESHOLD_KM

                    col1, col2, col3 = st.columns(3)
                    col1.metric(
                        "Final Guess", f"{final_guess[0]:.3f}, {final_guess[1]:.3f}"
                    )
                    col2.metric(
                        "Ground Truth",
                        f"{true_coords['lat']:.3f}, {true_coords['lng']:.3f}",
                    )
                    col3.metric(
                        "Distance",
                        f"{distance_km:.1f} km",
                        delta="Success" if is_success else "Failed",
                    )
                else:
                    st.error("No final guess made")

                all_results.append(
                    {
                        "sample_id": sample.get("id"),
                        "model": model_choice,
                        "true_coordinates": true_coords,
                        "predicted_coordinates": final_guess,
                        "distance_km": distance_km,
                        "success": is_success,
                    }
                )

            progress_bar.progress((i + 1) / num_samples)

    # Final Summary
    st.divider()
    st.header("🏁 Final Results")

    summary = benchmark_helper.generate_summary(all_results)
    if summary and model_choice in summary:
        stats = summary[model_choice]

        # Overall metrics
        col1, col2, col3 = st.columns(3)
        col1.metric("Success Rate", f"{stats.get('success_rate', 0) * 100:.1f}%")
        col2.metric("Average Distance", f"{stats.get('average_distance_km', 0):.1f} km")
        col3.metric("Total Samples", len(all_results))

        # Detailed results table
        st.subheader("Detailed Results")
        st.dataframe(all_results, use_container_width=True)

        # Success breakdown
        successes = [r for r in all_results if r["success"]]
        failures = [r for r in all_results if not r["success"]]

        if successes:
            st.subheader("Successful Samples")
            st.dataframe(successes, use_container_width=True)

        if failures:
            st.subheader("Failed Samples")
            st.dataframe(failures, use_container_width=True)
    else:
        st.error("Could not generate summary")
        st.dataframe(all_results, use_container_width=True)