File size: 1,423 Bytes
97579c6
 
 
 
 
5017c6b
 
 
 
97579c6
 
5017c6b
 
 
 
 
97579c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import json
import streamlit as st
from PyPDF2 import PdfReader
from transformers import pipeline

# Specify the model name
model_name = "Canstralian/RabbitRedux"  

# Initialize the pipeline
nlp_pipeline = pipeline("text2text-generation", model=model_name)

# Example usage
input_text = "Provide an example of secure Python coding practices."
output = nlp_pipeline(input_text)
print(output)

def process_pdf(file):
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

def convert_to_json(text):
    # Use the Hugging Face model to process the text
    result = nlp_pipeline(text)
    return result[0]['generated_text']

st.title("PDF to JSON Converter")

uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

if uploaded_file is not None:
    st.write("Processing your file...")

    # Extract text from the PDF
    pdf_text = process_pdf(uploaded_file)

    # Convert the extracted text to JSON using the Hugging Face model
    json_output = convert_to_json(pdf_text)

    # Display the JSON output
    st.write("Converted JSON:")
    st.json(json.loads(json_output))

    # Provide a download link for the JSON file
    json_filename = uploaded_file.name.replace(".pdf", ".json")
    st.download_button(
        label="Download JSON",
        data=json_output,
        file_name=json_filename,
        mime="application/json"
    )