import os
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
import joblib  # To save and load the trained model
from groq import Groq
from huggingface_hub import login
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Authenticate with Hugging Face using the token stored in .env
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")

if HUGGINGFACE_TOKEN:
    login(token=HUGGINGFACE_TOKEN)
else:
    raise ValueError("Hugging Face token not found in environment variables")

# Initialize Groq client with the API key from .env
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
client = Groq(api_key=GROQ_API_KEY)

# Function to train or load the RandomForestRegressor model
def train_model(df):
    # Encode the 'Crop' column as numeric (e.g., 'Rice' = 2, 'Wheat' = 1)
    df['Crop'] = df['Crop'].map({'Wheat': 1, 'Rice': 2})
    
    # Preprocess the data (drop 'Yield' column for features)
    X = df.drop(columns=["Yield"])  # Features
    y = df["Yield"]  # Target variable
    
    # Train the Random Forest model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X, y)
    
    # Save the model to disk
    model_filename = '/content/crop_yield_model.pkl'
    joblib.dump(model, model_filename)
    
    return model_filename

# Load the trained model
def load_model():
    model_filename = '/content/crop_yield_model.pkl'
    if os.path.exists(model_filename):
        model = joblib.load(model_filename)
    else:
        raise Exception("Model not found. Please upload a valid dataset and train the model.")
    return model

# Function to predict crop yield based on input features
def predict_yield(N, P, K, temperature, humidity, pH, rainfall, crop):
    input_data = {
        "Nitrogen": N,
        "Phosphorus": P,
        "Potassium": K,
        "Temperature": temperature,
        "Humidity": humidity,
        "pH_Value": pH,
        "Rainfall": rainfall,
        "Crop": crop,
    }
    
    model = load_model()  # Load the trained model
    
    # Prepare input data as a DataFrame
    input_df = pd.DataFrame([input_data])
    yield_prediction = model.predict(input_df)
    
    return f"Predicted Yield: {yield_prediction[0]:.2f} kg/ha"

# Gradio Interface function to upload file and train the model
def upload_file(file):
    # Load the dataset from the uploaded CSV file
    try:
        df = pd.read_csv(file.name)
        
        # Check if required columns are present
        required_columns = ["Nitrogen", "Phosphorus", "Potassium", "Temperature", "Humidity", "pH_Value", "Rainfall", "Crop", "Yield"]
        missing_columns = [col for col in required_columns if col not in df.columns]
        
        if missing_columns:
            return f"Error: Missing columns: {', '.join(missing_columns)}. Please upload a file with the required columns."
        
        # Check for missing values in the required columns
        if df[required_columns].isnull().sum().sum() > 0:
            return "Error: Dataset contains missing values. Please clean the dataset before uploading."
        
        # Check data types of the columns (they should be numeric except for the 'Crop' column)
        for col in ["Nitrogen", "Phosphorus", "Potassium", "Temperature", "Humidity", "pH_Value", "Rainfall", "Yield"]:
            if not pd.api.types.is_numeric_dtype(df[col]):
                return f"Error: Column '{col}' contains non-numeric values. Please ensure all feature columns are numeric."
        
        # Encode the 'Crop' column as numeric (e.g., 'Rice' = 2, 'Wheat' = 1)
        df['Crop'] = df['Crop'].map({'Wheat': 1, 'Rice': 2})
        
        # If everything is fine, train the model
        model_filename = train_model(df)
        return f"Model trained successfully and saved as {model_filename}. You can now make predictions."
    
    except Exception as e:
        return f"Error: {str(e)}"

# Gradio Interface function for prediction
def interactive_interface(N, P, K, temperature, humidity, pH, rainfall, crop):
    return predict_yield(N, P, K, temperature, humidity, pH, rainfall, crop)

# Additional Groq API test function
def test_groq_api():
    test_response = client.chat.completions.create(
        messages=[{
            "role": "user",
            "content": "Explain the importance of fast language models",
        }],
        model="llama3-8b-8192",
    )
    return test_response.choices[0].message.content

# Gradio setup for prediction interface
interface = gr.Interface(
    fn=interactive_interface,
    inputs=[
        gr.Number(label="Nitrogen (N)"),
        gr.Number(label="Phosphorus (P)"),
        gr.Number(label="Potassium (K)"),
        gr.Number(label="Temperature (°C)"),
        gr.Number(label="Humidity (%)"),
        gr.Number(label="pH Value"),
        gr.Number(label="Rainfall (mm)"),
        gr.Textbox(label="Crop (1 = Wheat, 2 = Rice)"),
    ],
    outputs=[gr.Textbox(label="Predicted Yield")],
    title="Optimized Crop Yield Prediction",
    description="Input soil and weather parameters to predict crop yield.",
)

# Gradio setup for uploading dataset
upload_interface = gr.Interface(
    fn=upload_file,
    inputs=gr.File(label="Upload your crop yield dataset (CSV)"),
    outputs=[gr.Textbox(label="Status")],
    title="Upload and Train Model",
    description="Upload a CSV file with crop yield data to train the prediction model.",
)

# Test the Groq API and print the response
if __name__ == "__main__":
    print("Groq API Test Response:", test_groq_api())
    upload_interface.launch()
    interface.launch()