import collections
import datetime
import os
import random
import time

import plotly.figure_factory as ff
import json

import pandas as pd
import ray
from PIL import Image
from compiled_jss.CPEnv import CompiledJssEnvCP

from stable_baselines3.common.vec_env import VecEnvWrapper
from torch.distributions import Categorical

import torch
import numpy as np

from MyVecEnv import WrapperRay

import gradio as gr
import docplex.cp.utils_visu as visu
import matplotlib.pyplot as plt


class VecPyTorch(VecEnvWrapper):

    def __init__(self, venv, device):
        super(VecPyTorch, self).__init__(venv)
        self.device = device

    def reset(self):
        return self.venv.reset()

    def step_async(self, actions):
        self.venv.step_async(actions)

    def step_wait(self):
        return self.venv.step_wait()


def make_env(seed, instance):
    def thunk():
        _env = CompiledJssEnvCP(instance)
        return _env

    return thunk


def solve(file):
    random.seed(0)
    np.random.seed(0)
    torch.manual_seed(0)
    num_workers = 1 # only one CPU available
    with torch.inference_mode():
        device = torch.device('cpu')
        actor = torch.jit.load('actor.pt', map_location=device)
        actor.eval()
        start_time = time.time()
        fn_env = [make_env(0, file.name)
                  for _ in range(num_workers)]
        ray_wrapper_env = WrapperRay(lambda n: fn_env[n](),
                                     num_workers, 1, device)
        envs = VecPyTorch(ray_wrapper_env, device)
        current_solution_cost = float('inf')
        current_solution = ''
        obs = envs.reset()
        total_episode = 0
        while total_episode < envs.num_envs:
            logits = actor(obs['interval_rep'], obs['attention_interval_mask'], obs['job_resource_mask'],
                           obs['action_mask'], obs['index_interval'], obs['start_end_tokens'])
            # temperature vector
            if num_workers >= 4:
                temperature = torch.arange(0.5, 2.0, step=(1.5 / num_workers), device=device)
            else:
                temperature = torch.ones(num_workers, device=device)
            logits = logits / temperature[:, None]
            probs = Categorical(logits=logits).probs
            # random sample based on logits
            actions = torch.multinomial(probs, probs.shape[1]).cpu().numpy()
            obs, reward, done, infos = envs.step(actions)
            total_episode += done.sum()
            # total_actions += 1
            # print(f'Episode {total_episode} / {envs.num_envs} - Actions {total_actions}', end='\r')
            for env_idx, info in enumerate(infos):
                if 'makespan' in info and int(info['makespan']) < current_solution_cost:
                    current_solution_cost = int(info['makespan'])
                    current_solution = json.loads(info['solution'])
        total_time = time.time() - start_time
        pretty_output = ""
        for job_id in range(len(current_solution)):
            pretty_output += f"Job {job_id}: {current_solution[job_id]}\n"

        jobs_data = []
        file.seek(0)
        line_str: str = file.readline()
        line_cnt: int = 1
        jobs_count: int = 0
        machines_count: int = 0
        while line_str:
            data = []
            split_data = line_str.split()
            if line_cnt == 1:
                jobs_count, machines_count = int(split_data[0]), int(
                    split_data[1]
                )
            else:
                i = 0
                this_job_op_count = 0
                while i < len(split_data):
                    machine, op_time = int(split_data[i]), int(split_data[i + 1])
                    data.append((machine, op_time))
                    i += 2
                    this_job_op_count += 1
                jobs_data.append(data)
            line_str = file.readline()
            line_cnt += 1
        # convert to integer the current_solution
        current_solution = [[int(x) for x in y] for y in current_solution]
        df = []
        for job_id in range(jobs_count):
            for task_id in range(len(current_solution[job_id])):
                dict_op = dict()
                dict_op["Task"] = "Job {}".format(job_id)
                start_sec = current_solution[job_id][task_id]
                finish_sec = start_sec + jobs_data[job_id][task_id][1]
                dict_op["Start"] = datetime.datetime.fromtimestamp(start_sec)
                dict_op["Finish"] = datetime.datetime.fromtimestamp(finish_sec)
                dict_op["Resource"] = "Machine {}".format(
                    jobs_data[job_id][task_id][0]
                )
                df.append(dict_op)
                i += 1
        fig = None
        colors = [
            tuple([random.random() for _ in range(3)]) for _ in range(machines_count)
        ]
        if len(df) > 0:
            df = pd.DataFrame(df)
            fig = ff.create_gantt(
                df,
                index_col="Resource",
                colors=colors,
                show_colorbar=True,
                group_tasks=True,
            )
            fig.update_yaxes(
                autorange=True
            )
        return pretty_output, fig, str(total_time) + " seconds"

ray.init(log_to_driver=False,
        ignore_reinit_error=True,
        include_dashboard=False)
title = "Job-Shop Scheduling CP RL"
description = "A Job-Shop Scheduling Reinforcement Learning based solver, using an underlying CP model as an " \
              "environment. <br>" \
              "However, due to resource limitations on the HuggingFace platform (a single vCPU available, no GPU), " \
              "the results you obtain here don't represent the full potential of the approach. <br>" \
              "For large instance, we recommend to run this locally outside the interface as it causes a lot of " \
              "For large instance, we recommend to run this locally outside the interface as it causes a lot of " \
              "overhead. <br>" \
              "For fast inference, check out the cached examples below."

article = "<p style='text-align: center'>Article Under Review</p>"
# list all non-hidden files in the 'instances' directory
examples = ['instances/' + f for f in os.listdir('instances') if not f.startswith('.')]
iface = gr.Interface(fn=solve, inputs=gr.File(label="Instance File"), outputs=[gr.Text(label="Solution"), gr.Plot(label="Solution's Gantt Chart"), gr.Text(label="Elapsed Time")], title=title, description=description, article=article, examples=examples)
iface.launch(enable_queue=True)