File size: 3,491 Bytes
5d000a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import streamlit as st
import pandas as pd
from io import StringIO
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Predefined example CSV content
EXAMPLE_CSV_CONTENT = """
"Loss","Date","Score","Opponent","Record","Attendance"
"Hampton (14–12)","September 25","8–7","Padres","67–84","31,193"
"Speier (5–3)","September 26","3–1","Padres","67–85","30,711"
"Elarton (4–9)","September 22","3–1","@ Expos","65–83","9,707"
"Lundquist (0–1)","September 24","15–11","Padres","67–83","30,774"
"Hampton (13–11)","September 6","9–5","Dodgers","61–78","31,407"
"""

# Load the model and tokenizer
@st.cache_resource
def load_model_and_tokenizer():
    model_name = "tablegpt/TableGPT2-7B"
    model = AutoModelForCausalLM.from_pretrained(
        model_name, torch_dtype="auto", device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return model, tokenizer

model, tokenizer = load_model_and_tokenizer()

# Application UI
st.title("Table Question Answering App")
st.write(
    """
    This app uses a language model to answer questions about tabular data.
    You can upload your own CSV file or use a predefined example to test it.
    """
)

# Sidebar for input options
st.sidebar.header("Input Options")
data_source = st.sidebar.radio("Choose a data source:", ("Example CSV", "Upload CSV"))

if data_source == "Example CSV":
    st.subheader("Using Example CSV Data")
    csv_file = StringIO(EXAMPLE_CSV_CONTENT)
    df = pd.read_csv(csv_file)
else:
    st.subheader("Upload Your CSV File")
    uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"])
    if uploaded_file is not None:
        df = pd.read_csv(uploaded_file)
    else:
        st.warning("Please upload a CSV file to proceed.")
        st.stop()

# Display the loaded dataframe
st.write("### Data Preview")
st.dataframe(df)

# Question Input
st.write("### Ask a Question")
question = st.text_input("Enter your question:", "ε“ͺδΊ›ζ―”θ΅›ηš„ζˆ˜η»©θΎΎεˆ°δΊ†40θƒœ40负?")

# Generate response if question is provided
if question:
    example_prompt_template = """Given access to several pandas dataframes, write the Python code to answer the user's question.

    /*
    "{var_name}.head(5).to_string(index=False)" as follows:
    {df_info}
    */

    Question: {user_question}
    """
    prompt = example_prompt_template.format(
        var_name="df",
        df_info=df.head(5).to_string(index=False),
        user_question=question,
    )

    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt},
    ]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    with st.spinner("Generating response..."):
        generated_ids = model.generate(**model_inputs, max_new_tokens=512)
        generated_ids = [
            output_ids[len(input_ids) :]
            for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]
        response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    # Display response
    st.write("### Model Response")
    st.text_area("Response", response, height=200)

# Footer
st.sidebar.info(
    """
    This app demonstrates the use of a language model for tabular data understanding.
    Powered by [Hugging Face Transformers](https://huggingface.co/).
    """
)