import json
import random

import gradio as gr
import pymupdf


def parse_pdf(file_path: str):
    doc = pymupdf.open(file_path)

    report_run_date_time = None
    transportation_provider_name = None

    rows = []
    row = {}
    for page in doc:
        text_page = page.get_textpage()

        state = None
        for block in text_page.extractBLOCKS():
            x0, y0, x1, y1, text = block[:5]
            text: str
            text = text.strip()

            if text.startswith("Report Run Date Time"):
                lines = text.split("\n")
                report_run_date_time = lines[1]
                transportation_provider_name = lines[2].replace("Transportation Provider Name:", "").strip()
            elif "Fidelis Agent" in text:
                state = "page_no"
            elif state == "page_no":
                state = "row_1"
            elif state == "row_1":
                lines = text.split("\n")
                if len(lines) == 1:
                    type_, case_no, plan_id, member_id = lines[0].split(" ")
                else:
                    type_ = lines[0]
                    case_no, plan_id, member_id = lines[1].split(" ")
                row["type"] = type_
                row["case_no"] = case_no
                row["plan_id"] = plan_id
                row["member_id"] = member_id
                state = "row_2"
            elif state == "row_2":
                pickup_info = text
                row["pickup_info"] = pickup_info
                state = "row_3"
            elif state == "row_3":
                text = text.replace("Pickup:", "")
                text = text.strip()
                text = text.replace("\n", " ")
                pickup_time = text
                row["pickup_time"] = pickup_time
                state = "row_4"
            elif state == "row_4":
                lines = text.split("\n")
                assert len(lines) in (3, 4)
                num_of_one_way_trips = int(lines[0])
                vehicle_type = lines[1]
                num_of_riders = int(lines[2])
                if len(lines) == 4:
                    auth_number = lines[3]
                else:
                    auth_number = None
                row["num_of_one_way_trips"] = num_of_one_way_trips
                row["vehicle_type"] = vehicle_type
                row["num_of_riders"] = num_of_riders
                row["auth_number"] = auth_number
                state = "row_5"
            elif state == "row_5":
                dest_info = text
                row["dest_info"] = dest_info
                state = "row_6"
            elif state == "row_6":
                if x0 >= 700:
                    fidelis_agent = text
                    row["special_needs_and_comments"] = None
                    row["fidelis_agent"] = fidelis_agent
                    rows.append(row)
                    row = {}
                    state = "row_1"
                elif x1 > 719:
                    lines = text.split("\n")
                    special_needs_and_comments = "\n".join(lines[:-1])
                    fidelis_agent = lines[-1]
                    row["special_needs_and_comments"] = special_needs_and_comments
                    row["fidelis_agent"] = fidelis_agent
                    rows.append(row)
                    row = {}
                    state = "row_1"
                else:
                    special_needs_and_comments = text
                    row["special_needs_and_comments"] = special_needs_and_comments
                    state = "row_7"
            elif state == "row_7":
                if x0 >= 700:
                    fidelis_agent = text
                    row["fidelis_agent"] = fidelis_agent
                    rows.append(row)
                    row = {}
                    state = "row_1"
                else:
                    special_needs_and_comments += text
                    row["special_needs_and_comments"] = special_needs_and_comments
                    state = "row_8"
            elif state == "row_8":
                assert x0 >= 700
                fidelis_agent = text
                row["fidelis_agent"] = fidelis_agent
                rows.append(row)
                row = {}
                state = "row_1"

    metadata = f"""\
Report Run Date Time: {report_run_date_time}
Transportation Provider Name: {transportation_provider_name}
Number of Items: {len(rows)}
    """

    json_data = {
        "report_run_date_time": report_run_date_time,
        "transportation_provider_name": transportation_provider_name,
        "itmes": rows,
    }

    output_path = f"output_{random.randint(0, 1000000):08d}.json"

    with open(output_path, "w") as f:
        json.dump(json_data, f, indent=4)

    return metadata, output_path


def main():
    app = gr.Interface(
        fn=parse_pdf,
        inputs=gr.File(label="PDF File"),
        outputs=[
            gr.Textbox(label="Metadata", lines=7),
            gr.DownloadButton(label="Download JSON"),
        ],
        allow_flagging=False,
    )
    app.launch()


if __name__ == "__main__":
    main()