File size: 3,489 Bytes
93b4c8c
 
 
 
 
 
c4e8aa2
93b4c8c
fb00050
c4e8aa2
 
 
 
fb00050
 
 
 
 
 
 
 
 
 
 
 
c4e8aa2
fb00050
20e8be4
 
 
 
 
 
c4e8aa2
 
 
20e8be4
c4e8aa2
 
 
 
 
 
 
20e8be4
 
 
 
93b4c8c
20e8be4
 
 
c4e8aa2
 
fb00050
 
 
 
 
 
 
 
 
 
 
 
 
c4e8aa2
20e8be4
 
 
 
c4e8aa2
 
 
 
 
 
 
20e8be4
c4e8aa2
 
 
fb00050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4e8aa2
 
 
20e8be4
fb00050
93b4c8c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Dependencies:
# gradio==3.3.1
# transformers==4.27.1
# torch==2.0.1
# pymupdf==1.21.1

import gradio as gr
from transformers import pipeline
import fitz  # PyMuPDF

# Load a summarization model from Hugging Face
summarizer = pipeline("summarization")

def extract_text_from_pdf(pdf_file):
    text = ""
    try:
        document = fitz.open(stream=pdf_file.read(), filetype="pdf")
        for page_num in range(len(document)):
            page = document.load_page(page_num)
            text += page.get_text()
    except Exception as e:
        return str(e)
    return text

def evaluate_text_against_rubric(rubric_text, text):
    # Split rubric into criteria
    criteria = [criterion.strip() for criterion in rubric_text.split('\n') if criterion.strip()]
    
    if not criteria:
        return "No valid criteria found in the rubric."
    
    if not text:
        return "No text provided for evaluation."
    
    evaluations = {}
    for i, criterion in enumerate(criteria):
        try:
            summary = summarizer(text, max_length=50, min_length=25, do_sample=False)[0]['summary_text']
            evaluations[f'Criteria {i+1}'] = {
                "Criterion": criterion,
                "Score": 3,  # Dummy score for now
                "Comment": f"Evaluation based on criterion: {criterion}",
                "Example": summary
            }
        except Exception as e:
            evaluations[f'Criteria {i+1}'] = {
                "Criterion": criterion,
                "Score": 0,
                "Comment": f"Error during evaluation: {str(e)}",
                "Example": ""
            }
    
    return evaluations

def evaluate(rubric_pdf, rubric_text, text):
    rubric = ""
    if rubric_pdf is not None:
        rubric = extract_text_from_pdf(rubric_pdf)
    elif rubric_text:
        rubric = rubric_text
    
    if not rubric:
        return "No rubric provided."
    
    if not text:
        return "No text provided for evaluation."
    
    evaluation = evaluate_text_against_rubric(rubric, text)
    
    if isinstance(evaluation, str):  # If it's an error message
        return evaluation
    
    evaluation_text = ""
    for criterion, details in evaluation.items():
        evaluation_text += f"{criterion}:\n"
        evaluation_text += f"  Criterion: {details['Criterion']}\n"
        evaluation_text += f"  Score: {details['Score']}\n"
        evaluation_text += f"  Comment: {details['Comment']}\n"
        evaluation_text += f"  Example: {details['Example']}\n\n"
    
    return evaluation_text

# Create Gradio interface
with gr.Blocks() as interface:
    gr.Markdown("# PDF Text Evaluator")
    gr.Markdown("Upload a rubric as a PDF or paste the rubric text, then paste text for evaluation.")
    
    rubric_pdf_input = gr.File(label="Upload Rubric PDF (optional)", type="file")
    rubric_text_input = gr.Textbox(lines=10, placeholder="Or enter your rubric text here...", label="Rubric Text (optional)")
    text_input = gr.Textbox(lines=10, placeholder="Paste the text to be evaluated here...", label="Text to Evaluate")
    
    evaluate_button = gr.Button("Evaluate")
    
    output = gr.Textbox(label="Evaluation Results")
    
    def evaluate_button_clicked(rubric_pdf, rubric_text, text):
        return evaluate(rubric_pdf, rubric_text, text)
    
    evaluate_button.click(evaluate_button_clicked, inputs=[rubric_pdf_input, rubric_text_input, text_input], outputs=output)

# Launch the interface
interface.launch()