import os
import requests
import json

import pymupdf  # PyMuPDF

from openai import OpenAI
from dotenv import load_dotenv

import gradio as gr

from pyvis.network import Network
import networkx as nx
from bs4 import BeautifulSoup

# https://arxiv.org/pdf/2410.06401

def download_and_extract(arxiv_url):
    paper_id = arxiv_url.split("/")[-1]
    os.makedirs("downloads", exist_ok=True)
    pdf_path = f"downloads/{paper_id}.pdf"

    response = requests.get(arxiv_url)
    if response.status_code != 200:
        return None, "Failed to download PDF."

    with open(pdf_path, "wb") as f:
        f.write(response.content)

    # text
    text_path = f"downloads/{paper_id}_markdown.md"
    doc = pymupdf.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    with open(text_path, "wb") as f:
        f.write(text.encode('utf-8'))

    return pdf_path, text[:3000], text_path  # File + text preview
    
def get_adj_list(text_path, level):
    with open(text_path, "r") as f:
        text = f.read()

    client = OpenAI(
        base_url="https://api.studio.nebius.com/v1/",
        api_key=os.environ.get("NEBIUS_API_KEY")
    )

    
    response = client.chat.completions.create(
        model="Qwen/Qwen3-235B-A22B",
        max_tokens=10000,
        temperature=0.6,
        top_p=0.95,
        messages=[
            {
                "role": "system",
                "content": f"The markdown version of an arXiv is here: {text}"
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"""Given the markdown version of an arXiv paper, provide an adjacency list of concepts and key terms within the field of the paper. Make it helpful for a student at an {level} level in the relevant field. Format it in json, encased in '```json' and '```'."""
                    }
                ]
            }
        ]
    )
    
    response_json = json.loads(response.to_json())

    # actual output
    output = response_json["choices"][0]["message"]["content"]
    # remove the thinking
    output = output[output.find("```json") + 7 :]
    output = output[: output.find("```")]
    return output


def gen_concept_graph(adj_list):
    adj_list_json = json.loads(adj_list)
    # given adj_list in json format, turn into graph
    G = nx.DiGraph()

    # nodes 
    for u in adj_list_json:
        for v in adj_list_json[u]:
            G.add_edge(u, v)
            
    net = Network(notebook=True, cdn_resources="remote")
    net.from_nx(G)
    
    html = net.generate_html()
    html = html.replace("'", "\"")

    # https://mahimairaja.medium.com/build-knowledge-graph-from-textdata-using-langchain-under-2min-ce0d0d0e44e8
    return f"""<iframe style="width: 100%; height: 600px;margin:0 auto" name="result" allow="midi; geolocation; microphone; camera;
    display-capture; encrypted-media;" sandbox="allow-modals allow-forms
    allow-scripts allow-same-origin allow-popups
    allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
    allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""

def submit(arxiv_url, level):
    pdf_path, text, text_path = download_and_extract(arxiv_url)
    
    adj_list = get_adj_list(text_path, level)
    
    concept_graph_html = gen_concept_graph(adj_list)
    
    return pdf_path, text, adj_list, concept_graph_html

# ==============
load_dotenv()
demo = gr.Interface(
    fn=submit,
    inputs=[
        gr.Textbox("https://arxiv.org/pdf/2410.06401", label="Enter arXiv Link", placeholder="https://arxiv.org/pdf/2410.06401"),
        gr.Dropdown(["Beginner", "Intermediate", "Expert"], label="Explanation Level"),
    ],
    outputs=[
        gr.File(label="Download PDF"),
        gr.Textbox(label="Preview Extracted Text"),
        gr.Textbox(label="Concept Graph Adj List at Selected Level"),
        gr.HTML(label="Concept Graph at Selected Level")
    ],
    title="RALI5 - Reading arXiv Like I'm 5"
)

if __name__ == "__main__":
    demo.launch(mcp_server=True)