Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import streamlit as st
|
3 |
+
from together import Together
|
4 |
+
|
5 |
+
# Initialize the page
|
6 |
+
st.set_page_config(page_title="AI Prompt Evaluator", page_icon="🧠", layout="wide")
|
7 |
+
st.title("AI Prompt Evaluator")
|
8 |
+
|
9 |
+
def evaluate_prompt(prompt):
|
10 |
+
"""Evaluates a prompt based on key principles of good prompt engineering."""
|
11 |
+
|
12 |
+
criteria = {
|
13 |
+
"clarity": bool(re.search(r"\b(who|what|where|when|why|how)\b", prompt.lower())),
|
14 |
+
"specificity": len(prompt.split()) > 5,
|
15 |
+
"context": bool(re.search(r"\b(for example|such as|like|including)\b", prompt.lower())),
|
16 |
+
"output_format": bool(re.search(r"\b(list|table|bullets|code|summary)\b", prompt.lower())),
|
17 |
+
"constraints": bool(re.search(r"\b(limit|max|min|exactly|within)\b", prompt.lower()))
|
18 |
+
}
|
19 |
+
|
20 |
+
score = sum(criteria.values()) * 2 # Scale to 10 points
|
21 |
+
|
22 |
+
suggestions = []
|
23 |
+
if not criteria["clarity"]:
|
24 |
+
suggestions.append("Make the prompt clearer by specifying exactly what you need.")
|
25 |
+
if not criteria["specificity"]:
|
26 |
+
suggestions.append("Make the prompt more detailed and specific.")
|
27 |
+
if not criteria["context"]:
|
28 |
+
suggestions.append("Add some background information to improve relevance.")
|
29 |
+
if not criteria["output_format"]:
|
30 |
+
suggestions.append("Specify how you want the output to be structured (e.g., list, code).")
|
31 |
+
if not criteria["constraints"]:
|
32 |
+
suggestions.append("Define any limits or conditions (e.g., word count, time frame).")
|
33 |
+
|
34 |
+
return score, suggestions, criteria
|
35 |
+
|
36 |
+
def get_ai_response(prompt, model):
|
37 |
+
"""Get a response from the Together API"""
|
38 |
+
try:
|
39 |
+
client = Together(api_key=st.secrets["together_api_key"])
|
40 |
+
response = client.chat.completions.create(
|
41 |
+
model=model,
|
42 |
+
messages=[{"role": "user", "content": prompt}],
|
43 |
+
max_tokens=500
|
44 |
+
)
|
45 |
+
return response.choices[0].message.content
|
46 |
+
except Exception as e:
|
47 |
+
return f"Error: {str(e)}"
|
48 |
+
|
49 |
+
# Sidebar for configuration
|
50 |
+
st.sidebar.header("Configuration")
|
51 |
+
model_options = {
|
52 |
+
"Mistral 7B": "mistralai/Mistral-7B-Instruct-v0.3",
|
53 |
+
"Llama 3 8B": "meta-llama/Llama-3-8B-Instruct",
|
54 |
+
"Qwen 72B": "Qwen/Qwen-72B-Chat"
|
55 |
+
}
|
56 |
+
selected_model = st.sidebar.selectbox("Select AI Model", list(model_options.keys()))
|
57 |
+
|
58 |
+
# Main input area
|
59 |
+
prompt_input = st.text_area("Enter your prompt:", height=150)
|
60 |
+
|
61 |
+
col1, col2 = st.columns(2)
|
62 |
+
with col1:
|
63 |
+
if st.button("Evaluate Prompt", type="primary"):
|
64 |
+
if not prompt_input.strip():
|
65 |
+
st.error("Please enter a prompt to evaluate.")
|
66 |
+
else:
|
67 |
+
# Store API key in secrets
|
68 |
+
if "together_api_key" not in st.secrets:
|
69 |
+
st.secrets["together_api_key"] = "4db152889da5afebdba262f90e4cdcf12976ee8b48d9135c2bb86ef9b0d12bdd"
|
70 |
+
|
71 |
+
score, suggestions, criteria = evaluate_prompt(prompt_input)
|
72 |
+
|
73 |
+
# Display evaluation results
|
74 |
+
st.subheader("Prompt Evaluation Results")
|
75 |
+
|
76 |
+
# Score with color-coded meter
|
77 |
+
if score >= 8:
|
78 |
+
st.success(f"Score: {score}/10 - Excellent!")
|
79 |
+
elif score >= 6:
|
80 |
+
st.info(f"Score: {score}/10 - Good")
|
81 |
+
else:
|
82 |
+
st.warning(f"Score: {score}/10 - Needs Improvement")
|
83 |
+
|
84 |
+
# Criteria checklist
|
85 |
+
st.markdown("#### Criteria Checklist")
|
86 |
+
for criterion, passed in criteria.items():
|
87 |
+
icon = "✅" if passed else "❌"
|
88 |
+
st.markdown(f"{icon} **{criterion.capitalize()}**")
|
89 |
+
|
90 |
+
# Improvement suggestions
|
91 |
+
if suggestions:
|
92 |
+
st.markdown("#### Suggestions to improve your prompt:")
|
93 |
+
for tip in suggestions:
|
94 |
+
st.markdown(f"- {tip}")
|
95 |
+
else:
|
96 |
+
st.success("Your prompt is well-structured!")
|
97 |
+
|
98 |
+
# Get AI response if score is high enough
|
99 |
+
if score >= 4: # Only get response if score is decent
|
100 |
+
with st.spinner("Getting AI response..."):
|
101 |
+
ai_response = get_ai_response(prompt_input, model_options[selected_model])
|
102 |
+
|
103 |
+
st.subheader("AI Response Preview")
|
104 |
+
st.markdown(ai_response)
|
105 |
+
else:
|
106 |
+
st.info("Improve your prompt score to see an AI response preview.")
|
107 |
+
|
108 |
+
with col2:
|
109 |
+
st.subheader("Prompt Engineering Tips")
|
110 |
+
st.markdown("""
|
111 |
+
### How to craft effective prompts:
|
112 |
+
|
113 |
+
1. **Be clear and specific** - Clearly state what you want.
|
114 |
+
2. **Provide context** - Give background information.
|
115 |
+
3. **Specify output format** - Ask for lists, code, or summaries.
|
116 |
+
4. **Set constraints** - Define limits like word count.
|
117 |
+
5. **Use examples** - Show examples of desired output.
|
118 |
+
|
119 |
+
### Examples of good prompts:
|
120 |
+
|
121 |
+
- "Create a 5-item bulleted list of healthy breakfast ideas including nutritional benefits."
|
122 |
+
- "Write a Python function that sorts a list of integers using the bubble sort algorithm."
|
123 |
+
- "Summarize the key benefits of exercise in exactly 100 words for a health newsletter."
|
124 |
+
""")
|
125 |
+
|
126 |
+
# Add explanation of the evaluation criteria
|
127 |
+
st.markdown("---")
|
128 |
+
with st.expander("Understanding the Evaluation Criteria"):
|
129 |
+
st.markdown("""
|
130 |
+
- **Clarity**: Does the prompt include clear question words (who, what, where, when, why, how)?
|
131 |
+
- **Specificity**: Is the prompt detailed enough (more than 5 words)?
|
132 |
+
- **Context**: Does the prompt provide examples or background information?
|
133 |
+
- **Output Format**: Does the prompt specify the desired format (list, table, code, etc.)?
|
134 |
+
- **Constraints**: Does the prompt include specific limitations or requirements?
|
135 |
+
""")
|
136 |
+
|
137 |
+
# Footer
|
138 |
+
st.markdown("---")
|
139 |
+
st.markdown("#### About")
|
140 |
+
st.markdown("This tool helps you create better prompts for AI systems by evaluating them against best practices in prompt engineering.")
|