File size: 4,694 Bytes
8628f17
 
 
e333fa4
 
8628f17
 
 
 
 
d3e0b87
8628f17
d3e0b87
5a1a3ab
d3e0b87
8628f17
 
 
 
 
 
d3e0b87
 
 
8628f17
 
5a1a3ab
8628f17
d3e0b87
8628f17
d3e0b87
5a1a3ab
d3e0b87
8628f17
5a1a3ab
8628f17
 
 
 
 
 
 
 
 
 
 
 
5a1a3ab
8628f17
 
 
5a1a3ab
8628f17
e984be7
8628f17
 
 
 
 
 
 
 
ebeb9b4
5a1a3ab
 
 
d3e0b87
ebeb9b4
 
 
d3e0b87
ebeb9b4
 
 
 
 
5a1a3ab
 
ebeb9b4
 
 
5a1a3ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8628f17
 
 
 
d3e0b87
5a1a3ab
8628f17
 
 
 
d3e0b87
8628f17
 
5a1a3ab
 
8628f17
 
5a1a3ab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import gradio as gr
from root import RootSignals

client = None
custom_judge = None

def initialize_client(api_key):
    global client
    return RootSignals(api_key=api_key)

def create_judge(api_key, judge_name, judge_prompt):
    global client, custom_judge
    if not api_key:
        return "🔑 Please enter your Root Signals API key first!"
    
    if not client:
        client = initialize_client(api_key)
    
    # Create custom judge
    custom_judge = client.evaluators.create(
        name=judge_name,
        predicate=f'{judge_prompt}\n\nTEXT: {{{{response}}}}',
        intent=f"Intent: {judge_name}",
        model="gemini-2.0-flash",
    )
    
    return f"Your custom LLM-Judge '{judge_name}' is created successfully!"

def evaluate_response(api_key, llm_response):
    global client, custom_judge
    if not api_key:
        return "🔑 Please enter your Root Signals API key first!", ""
        
    if not client or not custom_judge:
        return "Please create a judge first", ""
    
    # Run evaluation using custom judge
    evaluation_result = custom_judge.run(response=llm_response)
    score = evaluation_result.score
    justification = evaluation_result.justification
    return score, justification

# Create the interface with a custom layout
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
    gr.HTML("""<a href="https://api.visitorbadge.io/api/visitors?path=https://huggingface.co/spaces/root-signals/RootEvaluatorsDemo">
               <img src="https://api.visitorbadge.io/api/visitors?path=https://huggingface.co/spaces/root-signals/RootEvaluatorsDemo" />
               </a>""")
    
    with gr.Row():
        gr.Image(value="https://app.rootsignals.ai/images/root-signals-color.svg", height=70)
        gr.Markdown("<div>&nbsp;</div>")  # Add some space below the image
    
    gr.Markdown("# Custom Judge Demo by Root Signals")
    gr.Markdown("[Sign-up](https://app.rootsignals.ai/register) to create your API key or [create a temporary one](https://app.rootsignals.ai/demo-user)!")
    
    api_key = gr.Textbox(
        label="🔑 Root Signals API Key",
        placeholder="Enter your Root Signals API key...",
        type="password",
        show_label=True,
    )
    
    gr.Markdown("---")  # Divider
    
    # Accordion for Creating the Judge
    with gr.Accordion("Create Custom Judge", open=True):
        judge_name = gr.Textbox(label="👨‍⚖️ Judge Name", value="Medical Jargon Judge", placeholder="Enter a name for your custom judge...", interactive=True)
        judge_prompt = gr.Textbox(
            label="📝 Custom Judge Prompt",
            placeholder="Enter the custom judge prompt...",
            value="Evaluate the medical jargon use of a text. Higher scores mean the text include a lot of technical jargon such as drug names and very specific medical terminology.",
            interactive=True,
            lines=5,
            max_lines=10
        )
        create_judge_btn = gr.Button("✨ CREATE JUDGE", variant="primary")
        # Replace gr.Info() with gr.Markdown() to support updates
        info_message = gr.Markdown()
    
    gr.Markdown("---")  # Divider
    
    # Accordion for Execution
    with gr.Accordion("Execute", open=True):
        with gr.Row():
            with gr.Column():
                llm_response = gr.Textbox(
                    label="🤖 LLM Response", 
                    placeholder="Enter the LLM response to be evaluated...",
                    value="This CCR5 co-receptor is used by almost all primary HIV-1 isolates regardless of viral genetic subtype.",
                    interactive=True,
                    lines=5,
                    max_lines=10
                )
                evaluate_btn = gr.Button("🧐 EVALUATE", variant="primary", visible=True)
            with gr.Column():
                score = gr.Textbox(label="📊 Score (between 0 and 1)", interactive=False)
                justification = gr.TextArea(label="💬 Justification", interactive=False)
    
    # Button click events
    create_judge_btn.click(
        fn=create_judge,
        inputs=[api_key, judge_name, judge_prompt],
        outputs=[info_message]  # Register the Markdown component as output
    )
    
    evaluate_btn.click(
        fn=evaluate_response,
        inputs=[api_key, llm_response],
        outputs=[score, justification]
    )
    
    gr.Markdown("[🌐 Homepage](https://www.rootsignals.ai/) | [🤖 Github Repo](https://github.com/root-signals/rs-python-sdk) | [🐍 Python SDK Docs](https://sdk.rootsignals.ai/en/latest/) | [💬 Discord](https://discord.gg/EhazTQsFnj)")

if __name__ == "__main__":
    demo.launch()