shivam12323 commited on
Commit
bdb5b58
·
verified ·
1 Parent(s): 6a34434

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.text_splitter import CharacterTextSplitter
3
+ from langchain.embeddings.openai import OpenAIEmbeddings
4
+ from langchain.vectorstores import FAISS
5
+ from langchain.chains import LLMChain
6
+ from langchain.prompts import PromptTemplate
7
+ from langchain.chat_models import ChatOpenAI
8
+ import os
9
+
10
+ # Set up OpenAI API key
11
+ OPENAI_API_KEY = "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"
12
+ os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
13
+
14
+ def train_model_with_transcript(transcript):
15
+ """Train a language model using the transcript."""
16
+ # Split transcript into smaller chunks
17
+ splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
18
+ docs = splitter.split_text(transcript)
19
+
20
+ # Create embeddings and vector store
21
+ embeddings = OpenAIEmbeddings()
22
+ vectorstore = FAISS.from_texts(docs, embeddings)
23
+ return vectorstore
24
+
25
+ def generate_similar_content(query, vectorstore):
26
+ """Generate content similar to the input query using the trained model."""
27
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo")
28
+ retriever = vectorstore.as_retriever()
29
+ prompt_template = """
30
+ Use the context below to generate content similar to the provided input:
31
+
32
+ Context: {context}
33
+ Input Query: {query}
34
+
35
+ Similar Content:
36
+ """
37
+ prompt = PromptTemplate(input_variables=["context", "query"], template=prompt_template)
38
+ chain = LLMChain(llm=llm, prompt=prompt)
39
+
40
+ context = retriever.get_relevant_documents(query)
41
+ context_text = " ".join([doc.page_content for doc in context])
42
+
43
+ result = chain.run({"context": context_text, "query": query})
44
+ return result
45
+
46
+ # Streamlit app UI
47
+ st.title("Text-based Content Generator")
48
+ st.markdown("Upload a transcription file, train the model, and generate similar content.")
49
+
50
+ uploaded_file = st.file_uploader("Upload Transcription File (TXT):", type=["txt"])
51
+
52
+ if uploaded_file:
53
+ with st.spinner("Reading transcription file..."):
54
+ transcription = uploaded_file.read().decode("utf-8")
55
+ st.success("Transcription file loaded successfully!")
56
+
57
+ if st.button("Train Model"):
58
+ with st.spinner("Training model..."):
59
+ vectorstore = train_model_with_transcript(transcription)
60
+ st.success("Model trained successfully!")
61
+
62
+ query = st.text_input("Enter your query to generate similar content:")
63
+
64
+ if st.button("Generate Content"):
65
+ if 'vectorstore' in locals():
66
+ with st.spinner("Generating content..."):
67
+ result = generate_similar_content(query, vectorstore)
68
+ st.success("Content generated successfully!")
69
+ st.text_area("Generated Content", value=result, height=200)
70
+ else:
71
+ st.error("Please train the model first by uploading a transcription file.")