Spaces:
Sleeping
Sleeping
import streamlit as st | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
from langchain.chat_models import ChatOpenAI | |
import os | |
# Set up OpenAI API key | |
OPENAI_API_KEY = "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" | |
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY | |
def train_model_with_transcript(transcript): | |
"""Train a language model using the transcript.""" | |
# Split transcript into smaller chunks | |
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
docs = splitter.split_text(transcript) | |
# Create embeddings and vector store | |
embeddings = OpenAIEmbeddings() | |
vectorstore = FAISS.from_texts(docs, embeddings) | |
return vectorstore | |
def generate_similar_content(query, vectorstore): | |
"""Generate content similar to the input query using the trained model.""" | |
llm = ChatOpenAI(model_name="gpt-3.5-turbo") | |
retriever = vectorstore.as_retriever() | |
prompt_template = """ | |
Use the context below to generate content similar to the provided input: | |
Context: {context} | |
Input Query: {query} | |
Similar Content: | |
""" | |
prompt = PromptTemplate(input_variables=["context", "query"], template=prompt_template) | |
chain = LLMChain(llm=llm, prompt=prompt) | |
context = retriever.get_relevant_documents(query) | |
context_text = " ".join([doc.page_content for doc in context]) | |
result = chain.run({"context": context_text, "query": query}) | |
return result | |
# Streamlit app UI | |
st.title("Text-based Content Generator") | |
st.markdown("Upload a transcription file, train the model, and generate similar content.") | |
uploaded_file = st.file_uploader("Upload Transcription File (TXT):", type=["txt"]) | |
if uploaded_file: | |
with st.spinner("Reading transcription file..."): | |
transcription = uploaded_file.read().decode("utf-8") | |
st.success("Transcription file loaded successfully!") | |
if st.button("Train Model"): | |
with st.spinner("Training model..."): | |
vectorstore = train_model_with_transcript(transcription) | |
st.success("Model trained successfully!") | |
query = st.text_input("Enter your query to generate similar content:") | |
if st.button("Generate Content"): | |
if 'vectorstore' in locals(): | |
with st.spinner("Generating content..."): | |
result = generate_similar_content(query, vectorstore) | |
st.success("Content generated successfully!") | |
st.text_area("Generated Content", value=result, height=200) | |
else: | |
st.error("Please train the model first by uploading a transcription file.") | |