Update app.py
Browse files
app.py
CHANGED
@@ -1,66 +1,350 @@
|
|
1 |
-
# We'll generate a sample Streamlit app for LLM fine-tuning and deployment simulation.
|
2 |
-
# Since we can't actually fine-tune large models in this script due to constraints,
|
3 |
-
# we'll simulate the UI and interaction as if the model was already fine-tuned.
|
4 |
-
|
5 |
-
import os
|
6 |
-
|
7 |
-
# Create a simple streamlit app template
|
8 |
-
streamlit_app_code = """
|
9 |
import streamlit as st
|
10 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
# Custom CSS styling
|
15 |
-
st.markdown(
|
16 |
<style>
|
17 |
-
.
|
18 |
-
background-color: #
|
19 |
-
}
|
20 |
-
.stTextInput>div>div>input {
|
21 |
-
border-radius: 10px;
|
22 |
}
|
23 |
.stButton>button {
|
24 |
background-color: #4CAF50;
|
25 |
color: white;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
border-radius: 10px;
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
}
|
30 |
</style>
|
31 |
-
|
32 |
|
33 |
-
|
34 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
#
|
37 |
-
st.sidebar
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
-
#
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
#
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
with st.spinner("Generating response..."):
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
-
#
|
62 |
-
|
63 |
-
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
from streamlit_option_menu import option_menu
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
import time
|
6 |
+
from PIL import Image
|
7 |
+
import os
|
8 |
+
import json
|
9 |
+
import tempfile
|
10 |
+
from huggingface_hub import notebook_login, HfApi, Repository
|
11 |
|
12 |
+
# Set page config
|
13 |
+
st.set_page_config(
|
14 |
+
page_title="LLM Fine-Tuning & Deployment",
|
15 |
+
page_icon=":robot:",
|
16 |
+
layout="wide",
|
17 |
+
initial_sidebar_state="expanded"
|
18 |
+
)
|
19 |
|
20 |
+
# Custom CSS for styling
|
21 |
+
st.markdown("""
|
22 |
<style>
|
23 |
+
.stApp {
|
24 |
+
background-color: #f5f5f5;
|
|
|
|
|
|
|
25 |
}
|
26 |
.stButton>button {
|
27 |
background-color: #4CAF50;
|
28 |
color: white;
|
29 |
+
border-radius: 5px;
|
30 |
+
padding: 0.5rem 1rem;
|
31 |
+
border: none;
|
32 |
+
}
|
33 |
+
.stTextInput>div>div>input {
|
34 |
+
border-radius: 5px;
|
35 |
+
padding: 0.5rem;
|
36 |
+
}
|
37 |
+
.stSelectbox>div>div>select {
|
38 |
+
border-radius: 5px;
|
39 |
+
padding: 0.5rem;
|
40 |
+
}
|
41 |
+
.css-1aumxhk {
|
42 |
+
background-color: #ffffff;
|
43 |
border-radius: 10px;
|
44 |
+
padding: 2rem;
|
45 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
46 |
+
}
|
47 |
+
.header {
|
48 |
+
color: #4CAF50;
|
49 |
+
font-size: 2.5rem;
|
50 |
+
font-weight: bold;
|
51 |
+
margin-bottom: 1rem;
|
52 |
+
}
|
53 |
+
.subheader {
|
54 |
+
color: #333333;
|
55 |
+
font-size: 1.5rem;
|
56 |
+
margin-bottom: 1rem;
|
57 |
}
|
58 |
</style>
|
59 |
+
""", unsafe_allow_html=True)
|
60 |
|
61 |
+
# App header
|
62 |
+
col1, col2 = st.columns([1, 3])
|
63 |
+
with col1:
|
64 |
+
st.image("https://huggingface.co/front/assets/huggingface_logo-noborder.svg", width=100)
|
65 |
+
with col2:
|
66 |
+
st.markdown('<div class="header">LLM Fine-Tuning & Deployment</div>', unsafe_allow_html=True)
|
67 |
+
st.markdown("Fine-tune and deploy your large language models with ease")
|
68 |
|
69 |
+
# Navigation menu
|
70 |
+
with st.sidebar:
|
71 |
+
selected = option_menu(
|
72 |
+
menu_title="Main Menu",
|
73 |
+
options=["Home", "Data Preparation", "Model Selection", "Fine-Tuning", "Evaluation", "Deployment", "About"],
|
74 |
+
icons=["house", "file-earmark-text", "cpu", "gear", "graph-up", "cloud-upload", "info-circle"],
|
75 |
+
menu_icon="cast",
|
76 |
+
default_index=0,
|
77 |
+
)
|
78 |
|
79 |
+
# Home Page
|
80 |
+
if selected == "Home":
|
81 |
+
st.markdown('<div class="subheader">Welcome to LLM Fine-Tuning & Deployment</div>', unsafe_allow_html=True)
|
82 |
+
st.markdown("""
|
83 |
+
This application guides you through the process of fine-tuning large language models (LLMs)
|
84 |
+
and deploying them to Hugging Face Hub.
|
85 |
+
|
86 |
+
**Key Features:**
|
87 |
+
- Prepare your dataset for fine-tuning
|
88 |
+
- Select from popular base models
|
89 |
+
- Configure fine-tuning parameters
|
90 |
+
- Evaluate model performance
|
91 |
+
- Deploy to Hugging Face Hub
|
92 |
+
|
93 |
+
Get started by selecting a step from the sidebar menu.
|
94 |
+
""")
|
95 |
+
|
96 |
+
st.image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/hf-libraries.png",
|
97 |
+
caption="Hugging Face Ecosystem", use_column_width=True)
|
98 |
|
99 |
+
# Data Preparation Page
|
100 |
+
elif selected == "Data Preparation":
|
101 |
+
st.markdown('<div class="subheader">Data Preparation</div>', unsafe_allow_html=True)
|
102 |
+
|
103 |
+
tab1, tab2, tab3 = st.tabs(["Upload Data", "Preview Data", "Data Statistics"])
|
104 |
+
|
105 |
+
with tab1:
|
106 |
+
st.markdown("### Upload Your Dataset")
|
107 |
+
data_file = st.file_uploader("Choose a file (CSV, JSON, or TXT)", type=["csv", "json", "txt"])
|
108 |
+
|
109 |
+
if data_file is not None:
|
110 |
+
file_details = {"FileName": data_file.name, "FileType": data_file.type, "FileSize": data_file.size}
|
111 |
+
st.success("File uploaded successfully!")
|
112 |
+
st.json(file_details)
|
113 |
+
|
114 |
+
# Save uploaded file to temporary location
|
115 |
+
temp_dir = tempfile.mkdtemp()
|
116 |
+
path = os.path.join(temp_dir, data_file.name)
|
117 |
+
with open(path, "wb") as f:
|
118 |
+
f.write(data_file.getbuffer())
|
119 |
+
|
120 |
+
st.session_state['data_path'] = path
|
121 |
+
st.session_state['data_type'] = data_file.type
|
122 |
+
|
123 |
+
with tab2:
|
124 |
+
if 'data_path' in st.session_state:
|
125 |
+
st.markdown("### Data Preview")
|
126 |
+
|
127 |
+
if st.session_state['data_type'] == "text/csv":
|
128 |
+
df = pd.read_csv(st.session_state['data_path'])
|
129 |
+
st.dataframe(df.head())
|
130 |
+
elif st.session_state['data_type'] == "application/json":
|
131 |
+
with open(st.session_state['data_path']) as f:
|
132 |
+
data = json.load(f)
|
133 |
+
st.json(data)
|
134 |
+
else:
|
135 |
+
with open(st.session_state['data_path']) as f:
|
136 |
+
data = f.read()
|
137 |
+
st.text_area("Text Content", data, height=200)
|
138 |
+
else:
|
139 |
+
st.warning("Please upload a file first.")
|
140 |
+
|
141 |
+
with tab3:
|
142 |
+
if 'data_path' in st.session_state:
|
143 |
+
st.markdown("### Data Statistics")
|
144 |
+
|
145 |
+
if st.session_state['data_type'] == "text/csv":
|
146 |
+
df = pd.read_csv(st.session_state['data_path'])
|
147 |
+
|
148 |
+
col1, col2, col3 = st.columns(3)
|
149 |
+
col1.metric("Total Samples", len(df))
|
150 |
+
col2.metric("Columns", len(df.columns))
|
151 |
+
col3.metric("Missing Values", df.isnull().sum().sum())
|
152 |
+
|
153 |
+
st.markdown("**Column Types**")
|
154 |
+
st.table(df.dtypes.reset_index().rename(columns={"index": "Column", 0: "Type"}))
|
155 |
+
else:
|
156 |
+
st.info("Detailed statistics available for CSV files only.")
|
157 |
|
158 |
+
# Model Selection Page
|
159 |
+
elif selected == "Model Selection":
|
160 |
+
st.markdown('<div class="subheader">Model Selection</div>', unsafe_allow_html=True)
|
161 |
+
|
162 |
+
model_options = {
|
163 |
+
"GPT-like": ["gpt2", "gpt2-medium", "gpt2-large", "gpt2-xl"],
|
164 |
+
"BERT-like": ["bert-base-uncased", "bert-large-uncased"],
|
165 |
+
"RoBERTa": ["roberta-base", "roberta-large"],
|
166 |
+
"T5": ["t5-small", "t5-base", "t5-large"],
|
167 |
+
"Custom": ["Enter custom model name"]
|
168 |
+
}
|
169 |
+
|
170 |
+
model_family = st.selectbox("Select Model Family", list(model_options.keys()))
|
171 |
+
|
172 |
+
if model_family == "Custom":
|
173 |
+
model_name = st.text_input("Enter Hugging Face Model ID")
|
174 |
+
else:
|
175 |
+
model_name = st.selectbox("Select Model", model_options[model_family])
|
176 |
+
|
177 |
+
st.markdown("### Model Information")
|
178 |
+
|
179 |
+
if model_name:
|
180 |
+
st.info(f"You've selected: **{model_name}**")
|
181 |
+
|
182 |
+
# Display model card
|
183 |
+
st.markdown(f"View model card on [Hugging Face Hub](https://huggingface.co/{model_name})")
|
184 |
+
|
185 |
+
# Show estimated resource requirements
|
186 |
+
st.markdown("**Estimated Resource Requirements**")
|
187 |
+
|
188 |
+
if "gpt2" in model_name or "large" in model_name:
|
189 |
+
st.warning("This model requires significant GPU memory (8GB+ recommended)")
|
190 |
+
else:
|
191 |
+
st.success("This model can run on modest hardware (4GB GPU memory sufficient for fine-tuning)")
|
192 |
+
|
193 |
+
st.session_state['selected_model'] = model_name
|
194 |
+
|
195 |
+
# Fine-Tuning Page
|
196 |
+
elif selected == "Fine-Tuning":
|
197 |
+
st.markdown('<div class="subheader">Fine-Tuning Configuration</div>', unsafe_allow_html=True)
|
198 |
+
|
199 |
+
if 'selected_model' not in st.session_state:
|
200 |
+
st.warning("Please select a model first from the Model Selection page.")
|
201 |
+
st.stop()
|
202 |
+
|
203 |
+
st.info(f"Fine-tuning model: **{st.session_state['selected_model']}**")
|
204 |
+
|
205 |
+
col1, col2 = st.columns(2)
|
206 |
+
|
207 |
+
with col1:
|
208 |
+
st.markdown("### Training Parameters")
|
209 |
+
epochs = st.slider("Number of Epochs", 1, 20, 3)
|
210 |
+
batch_size = st.selectbox("Batch Size", [4, 8, 16, 32, 64], index=2)
|
211 |
+
learning_rate = st.selectbox("Learning Rate", [1e-5, 3e-5, 5e-5, 1e-4], index=1)
|
212 |
+
|
213 |
+
with col2:
|
214 |
+
st.markdown("### Advanced Options")
|
215 |
+
warmup_steps = st.number_input("Warmup Steps", 0, 1000, 100)
|
216 |
+
weight_decay = st.slider("Weight Decay", 0.0, 0.1, 0.01)
|
217 |
+
fp16 = st.checkbox("Use Mixed Precision (FP16)", value=True)
|
218 |
+
|
219 |
+
st.markdown("### Start Fine-Tuning")
|
220 |
+
|
221 |
+
if st.button("Begin Fine-Tuning Process"):
|
222 |
+
if 'data_path' not in st.session_state:
|
223 |
+
st.error("Please upload your dataset first.")
|
224 |
+
else:
|
225 |
+
with st.spinner("Setting up fine-tuning environment..."):
|
226 |
+
time.sleep(2)
|
227 |
+
|
228 |
+
progress_bar = st.progress(0)
|
229 |
+
status_text = st.empty()
|
230 |
+
|
231 |
+
for i in range(1, 101):
|
232 |
+
progress_bar.progress(i)
|
233 |
+
status_text.text(f"Training progress: {i}%")
|
234 |
+
time.sleep(0.05)
|
235 |
+
|
236 |
+
st.success("Fine-tuning completed successfully!")
|
237 |
+
st.balloons()
|
238 |
+
|
239 |
+
st.session_state['fine_tuned'] = True
|
240 |
+
st.session_state['model_path'] = f"./models/{st.session_state['selected_model']}-fine-tuned"
|
241 |
|
242 |
+
# Evaluation Page
|
243 |
+
elif selected == "Evaluation":
|
244 |
+
st.markdown('<div class="subheader">Model Evaluation</div>', unsafe_allow_html=True)
|
245 |
+
|
246 |
+
if 'fine_tuned' not in st.session_state:
|
247 |
+
st.warning("Please complete the fine-tuning process first.")
|
248 |
+
st.stop()
|
249 |
+
|
250 |
+
st.success(f"Evaluating fine-tuned model: **{st.session_state['selected_model']}**")
|
251 |
+
|
252 |
+
st.markdown("### Evaluation Metrics")
|
253 |
+
|
254 |
+
# Simulated metrics
|
255 |
+
col1, col2, col3 = st.columns(3)
|
256 |
+
col1.metric("Training Loss", "0.456", "-0.124 from baseline")
|
257 |
+
col2.metric("Validation Loss", "0.512", "-0.098 from baseline")
|
258 |
+
col3.metric("Accuracy", "0.872", "+0.15 from baseline")
|
259 |
+
|
260 |
+
st.markdown("### Sample Predictions")
|
261 |
+
|
262 |
+
sample_text = st.text_area("Enter text to test the model", "The movie was...")
|
263 |
+
|
264 |
+
if st.button("Generate Prediction"):
|
265 |
with st.spinner("Generating response..."):
|
266 |
+
time.sleep(2)
|
267 |
+
|
268 |
+
# Simulate different responses
|
269 |
+
responses = {
|
270 |
+
"positive": "The movie was absolutely fantastic! The acting was superb and the storyline kept me engaged throughout.",
|
271 |
+
"negative": "The movie was terrible. Poor acting and a predictable plot made it a complete waste of time.",
|
272 |
+
"neutral": "The movie was okay. It had some good moments but nothing particularly memorable."
|
273 |
+
}
|
274 |
+
|
275 |
+
selected_response = np.random.choice(list(responses.values()))
|
276 |
+
|
277 |
+
st.markdown("**Model Output:**")
|
278 |
+
st.info(selected_response)
|
279 |
+
|
280 |
+
# Deployment Page
|
281 |
+
elif selected == "Deployment":
|
282 |
+
st.markdown('<div class="subheader">Model Deployment</div>', unsafe_allow_html=True)
|
283 |
+
|
284 |
+
if 'fine_tuned' not in st.session_state:
|
285 |
+
st.warning("Please complete the fine-tuning process first.")
|
286 |
+
st.stop()
|
287 |
+
|
288 |
+
st.info(f"Ready to deploy: **{st.session_state['selected_model']}-fine-tuned**")
|
289 |
+
|
290 |
+
st.markdown("### Hugging Face Hub Deployment")
|
291 |
+
|
292 |
+
hf_token = st.text_input("Hugging Face Access Token", type="password")
|
293 |
+
repo_name = st.text_input("Repository Name", "my-fine-tuned-model")
|
294 |
+
privacy = st.radio("Repository Visibility", ["Public", "Private"])
|
295 |
+
|
296 |
+
if st.button("Deploy to Hugging Face Hub"):
|
297 |
+
if not hf_token:
|
298 |
+
st.error("Please provide your Hugging Face access token")
|
299 |
+
else:
|
300 |
+
with st.spinner("Uploading model to Hugging Face Hub..."):
|
301 |
+
time.sleep(3)
|
302 |
+
|
303 |
+
# In a real app, you would use:
|
304 |
+
# api = HfApi()
|
305 |
+
# api.create_repo(repo_name, private=(privacy == "Private"), token=hf_token)
|
306 |
+
# api.upload_folder(folder_path=st.session_state['model_path'], repo_id=repo_name)
|
307 |
+
|
308 |
+
st.success(f"Model successfully deployed to Hugging Face Hub!")
|
309 |
+
st.markdown(f"Your model is available at: [https://huggingface.co/{repo_name}](https://huggingface.co/{repo_name})")
|
310 |
+
|
311 |
+
st.session_state['deployed'] = True
|
312 |
|
313 |
+
# About Page
|
314 |
+
elif selected == "About":
|
315 |
+
st.markdown('<div class="subheader">About This App</div>', unsafe_allow_html=True)
|
316 |
+
|
317 |
+
st.markdown("""
|
318 |
+
**LLM Fine-Tuning & Deployment App**
|
319 |
+
|
320 |
+
This application provides an intuitive interface for fine-tuning large language models
|
321 |
+
and deploying them to Hugging Face Hub.
|
322 |
+
|
323 |
+
**Features:**
|
324 |
+
- Streamlined workflow for LLM fine-tuning
|
325 |
+
- Support for various model architectures
|
326 |
+
- Easy deployment to Hugging Face Hub
|
327 |
+
- Beautiful and responsive UI
|
328 |
+
|
329 |
+
**Technologies Used:**
|
330 |
+
- Streamlit for the web interface
|
331 |
+
- Hugging Face Transformers for model handling
|
332 |
+
- Hugging Face Hub for model deployment
|
333 |
+
|
334 |
+
Developed with ❤️ for the AI community.
|
335 |
+
""")
|
336 |
+
|
337 |
+
st.markdown("---")
|
338 |
+
st.markdown("""
|
339 |
+
**Disclaimer:** This is a demo application. For production use,
|
340 |
+
please ensure you have proper hardware resources and follow best practices
|
341 |
+
for model training and deployment.
|
342 |
+
""")
|
343 |
|
344 |
+
# Footer
|
345 |
+
st.markdown("---")
|
346 |
+
st.markdown("""
|
347 |
+
<div style="text-align: center; color: #666666; font-size: 0.9rem;">
|
348 |
+
LLM Fine-Tuning & Deployment App | Powered by Streamlit and Hugging Face
|
349 |
+
</div>
|
350 |
+
""", unsafe_allow_html=True)
|