Spaces:

Rajesh0279
/

Teraflops

Sleeping

App Files Files Community

Rajesh0279 commited on Jul 9

Commit

679b323

verified ·

1 Parent(s): 6549e3d

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +56 -0

src/streamlit_app.py CHANGED Viewed

@@ -13,9 +13,13 @@ import numpy as np
 import re
 import plotly.express as px
 import plotly.graph_objects as go
 from typing import Optional, Tuple, List, Dict
 from run3 import estimate_training_time_and_cost,get_gpu_teraflops,get_gpu_cost_per_tflop_hour
 from utils import get_all_models_from_database
 # ADD THIS BLOCK HERE (Line 16)
 # Language configuration
@@ -116,6 +120,58 @@ TRANSLATIONS = {
     }
 }
 def get_text(key, lang='en'):
     """Get translated text for given key and language"""
     return TRANSLATIONS.get(lang, TRANSLATIONS['en']).get(key, key)

 import re
 import plotly.express as px
 import plotly.graph_objects as go
+import torch
 from typing import Optional, Tuple, List, Dict
 from run3 import estimate_training_time_and_cost,get_gpu_teraflops,get_gpu_cost_per_tflop_hour
 from utils import get_all_models_from_database
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
 # ADD THIS BLOCK HERE (Line 16)
 # Language configuration
     }
 }
+@st.cache_resource
+def load_llama3_pipeline():
+    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
+    model = AutoModelForCausalLM.from_pretrained(
+        "meta-llama/Llama-3.1-8B-Instruct",
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto" if torch.cuda.is_available() else None
+    )
+    return tokenizer, model
+tokenizer, model = load_llama3_pipeline()
+st.title("🧠 Chat with Llama 3.1 8B (Instruct)")
+if 'chat_history' not in st.session_state:
+    st.session_state.chat_history = [
+        {"role": "system", "content": "You are a helpful, concise assistant."}
+    ]
+user_input = st.text_input("You:", key="user_input")
+if user_input:
+    st.session_state.chat_history.append({"role": "user", "content": user_input})
+    # Format messages into prompt
+    messages = st.session_state.chat_history
+    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    with st.spinner("Llama 3 is thinking..."):
+        output = model.generate(
+            **inputs,
+            max_new_tokens=512,
+            temperature=0.7,
+            do_sample=True,
+            top_p=0.9,
+            pad_token_id=tokenizer.eos_token_id
+        )
+    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
+    response = decoded.split(prompt)[-1].strip()
+    st.session_state.chat_history.append({"role": "assistant", "content": response})
+# Display conversation
+for msg in st.session_state.chat_history:
+    if msg["role"] == "user":
+        st.markdown(f"**You:** {msg['content']}")
+    elif msg["role"] == "assistant":
+        st.markdown(f"**AI:** {msg['content']}")
 def get_text(key, lang='en'):
     """Get translated text for given key and language"""
     return TRANSLATIONS.get(lang, TRANSLATIONS['en']).get(key, key)