Spaces:
Running
Running
Improved Documentation using expanders and streamlit text formatting features.
Browse files
app.py
CHANGED
@@ -7,33 +7,46 @@ client = OpenAI(
|
|
7 |
base_url="https://integrate.api.nvidia.com/v1",
|
8 |
api_key=os.environ.get("NVIDIA_API_KEY")
|
9 |
)
|
10 |
-
""
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
- 0.
|
18 |
-
- 0.
|
19 |
-
-
|
20 |
-
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def query_ai_model(prompt, model="meta/llama-3.1-405b-instruct", temperature=0.7, max_tokens=512, top_p=0.9, fact_check=False, num_responses=1):
|
39 |
responses = []
|
|
|
7 |
base_url="https://integrate.api.nvidia.com/v1",
|
8 |
api_key=os.environ.get("NVIDIA_API_KEY")
|
9 |
)
|
10 |
+
st.markdown("## π οΈ Response Specification Features")
|
11 |
+
st.markdown("**The expanders below are parameters that you can adjust to customize the AI response.**")
|
12 |
+
with st.expander("π **Model Selection**"):
|
13 |
+
st.write("Choose the AI model to generate responses.")
|
14 |
+
|
15 |
+
with st.expander("π¨ **Temperature (Creativity Control)**"):
|
16 |
+
st.write("""
|
17 |
+
- **0.0**: Always the same response (deterministic).
|
18 |
+
- **0.1 - 0.3**: Mostly factual and repetitive.
|
19 |
+
- **0.4 - 0.7**: Balanced between coherence and creativity.
|
20 |
+
- **0.8 - 1.0**: Highly creative but less predictable.
|
21 |
+
""")
|
22 |
+
|
23 |
+
with st.expander("π **Max Tokens (Response Length)**"):
|
24 |
+
st.write("Defines the maximum number of words/subwords in the response.")
|
25 |
+
|
26 |
+
with st.expander("π― **Top-p (Nucleus Sampling)**"):
|
27 |
+
st.write("""
|
28 |
+
Controls word diversity by sampling from top-probability tokens:
|
29 |
+
- **High `top_p` + Low `temperature`** β More factual, structured responses.
|
30 |
+
- **High `top_p` + High `temperature`** β More diverse, unexpected responses.
|
31 |
+
""")
|
32 |
+
|
33 |
+
with st.expander("π **Number of Responses**"):
|
34 |
+
st.write("Specifies how many response variations the AI should generate.")
|
35 |
+
|
36 |
+
with st.expander("β
**Fact-Checking**"):
|
37 |
+
st.write("""
|
38 |
+
- If **enabled**, AI prioritizes factual accuracy.
|
39 |
+
- If **disabled**, AI prioritizes creativity.
|
40 |
+
""")
|
41 |
+
|
42 |
+
st.markdown("""
|
43 |
+
### π **Summary**
|
44 |
+
- `temperature` β Adjusts **creativity vs accuracy**.
|
45 |
+
- `max_tokens` β Defines **response length**.
|
46 |
+
- `top_p` β Fine-tunes **word diversity**.
|
47 |
+
- `fact_check` β Ensures **factual correctness** (but may reduce fluency).
|
48 |
+
- `num_responses` β Generates **different variations** of the same prompt.
|
49 |
+
""")
|
50 |
|
51 |
def query_ai_model(prompt, model="meta/llama-3.1-405b-instruct", temperature=0.7, max_tokens=512, top_p=0.9, fact_check=False, num_responses=1):
|
52 |
responses = []
|