appurv-gupta / app.py
shivam12323's picture
Create app.py
bdb5b58 verified
import streamlit as st
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
import os
# Set up OpenAI API key
OPENAI_API_KEY = "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"
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
def train_model_with_transcript(transcript):
"""Train a language model using the transcript."""
# Split transcript into smaller chunks
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = splitter.split_text(transcript)
# Create embeddings and vector store
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_texts(docs, embeddings)
return vectorstore
def generate_similar_content(query, vectorstore):
"""Generate content similar to the input query using the trained model."""
llm = ChatOpenAI(model_name="gpt-3.5-turbo")
retriever = vectorstore.as_retriever()
prompt_template = """
Use the context below to generate content similar to the provided input:
Context: {context}
Input Query: {query}
Similar Content:
"""
prompt = PromptTemplate(input_variables=["context", "query"], template=prompt_template)
chain = LLMChain(llm=llm, prompt=prompt)
context = retriever.get_relevant_documents(query)
context_text = " ".join([doc.page_content for doc in context])
result = chain.run({"context": context_text, "query": query})
return result
# Streamlit app UI
st.title("Text-based Content Generator")
st.markdown("Upload a transcription file, train the model, and generate similar content.")
uploaded_file = st.file_uploader("Upload Transcription File (TXT):", type=["txt"])
if uploaded_file:
with st.spinner("Reading transcription file..."):
transcription = uploaded_file.read().decode("utf-8")
st.success("Transcription file loaded successfully!")
if st.button("Train Model"):
with st.spinner("Training model..."):
vectorstore = train_model_with_transcript(transcription)
st.success("Model trained successfully!")
query = st.text_input("Enter your query to generate similar content:")
if st.button("Generate Content"):
if 'vectorstore' in locals():
with st.spinner("Generating content..."):
result = generate_similar_content(query, vectorstore)
st.success("Content generated successfully!")
st.text_area("Generated Content", value=result, height=200)
else:
st.error("Please train the model first by uploading a transcription file.")