Spaces:
Runtime error
Runtime error
File size: 3,491 Bytes
5d000a0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import streamlit as st
import pandas as pd
from io import StringIO
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# Predefined example CSV content
EXAMPLE_CSV_CONTENT = """
"Loss","Date","Score","Opponent","Record","Attendance"
"Hampton (14β12)","September 25","8β7","Padres","67β84","31,193"
"Speier (5β3)","September 26","3β1","Padres","67β85","30,711"
"Elarton (4β9)","September 22","3β1","@ Expos","65β83","9,707"
"Lundquist (0β1)","September 24","15β11","Padres","67β83","30,774"
"Hampton (13β11)","September 6","9β5","Dodgers","61β78","31,407"
"""
# Load the model and tokenizer
@st.cache_resource
def load_model_and_tokenizer():
model_name = "tablegpt/TableGPT2-7B"
model = AutoModelForCausalLM.from_pretrained(
model_name, torch_dtype="auto", device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
return model, tokenizer
model, tokenizer = load_model_and_tokenizer()
# Application UI
st.title("Table Question Answering App")
st.write(
"""
This app uses a language model to answer questions about tabular data.
You can upload your own CSV file or use a predefined example to test it.
"""
)
# Sidebar for input options
st.sidebar.header("Input Options")
data_source = st.sidebar.radio("Choose a data source:", ("Example CSV", "Upload CSV"))
if data_source == "Example CSV":
st.subheader("Using Example CSV Data")
csv_file = StringIO(EXAMPLE_CSV_CONTENT)
df = pd.read_csv(csv_file)
else:
st.subheader("Upload Your CSV File")
uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
else:
st.warning("Please upload a CSV file to proceed.")
st.stop()
# Display the loaded dataframe
st.write("### Data Preview")
st.dataframe(df)
# Question Input
st.write("### Ask a Question")
question = st.text_input("Enter your question:", "εͺδΊζ―θ΅ηζη»©θΎΎε°δΊ40θ40θ΄οΌ")
# Generate response if question is provided
if question:
example_prompt_template = """Given access to several pandas dataframes, write the Python code to answer the user's question.
/*
"{var_name}.head(5).to_string(index=False)" as follows:
{df_info}
*/
Question: {user_question}
"""
prompt = example_prompt_template.format(
var_name="df",
df_info=df.head(5).to_string(index=False),
user_question=question,
)
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
with st.spinner("Generating response..."):
generated_ids = model.generate(**model_inputs, max_new_tokens=512)
generated_ids = [
output_ids[len(input_ids) :]
for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Display response
st.write("### Model Response")
st.text_area("Response", response, height=200)
# Footer
st.sidebar.info(
"""
This app demonstrates the use of a language model for tabular data understanding.
Powered by [Hugging Face Transformers](https://huggingface.co/).
"""
)
|