ginipick commited on
Commit
a7dd379
ยท
verified ยท
1 Parent(s): dfb57d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -227
app.py CHANGED
@@ -1,232 +1,43 @@
1
  import gradio as gr
 
 
 
2
  import os
3
- from huggingface_hub import InferenceClient
4
 
5
- import pandas as pd
6
- import openpyxl
7
- from openpyxl.utils.dataframe import dataframe_to_rows
8
- from datetime import datetime
9
- from io import BytesIO
10
-
11
- from langchain.text_splitter import RecursiveCharacterTextSplitter
12
- from langchain_community.vectorstores import Chroma
13
- from langchain_community.embeddings import HuggingFaceEmbeddings
14
- from langchain.memory import ConversationBufferMemory
15
- from langchain.chains import create_retrieval_chain, create_history_aware_retriever
16
-
17
- from pathlib import Path
18
- import chromadb
19
- from unidecode import unidecode
20
-
21
- import re
22
- from langchain.schema import Document
23
- from langchain.prompts import PromptTemplate
24
-
25
- # API ํด๋ผ์ด์–ธํŠธ ์ดˆ๊ธฐํ™”
26
- llm_client = InferenceClient("CohereForAI/c4ai-command-r-plus", token=os.getenv("HF_TOKEN"))
27
-
28
- # ๊ธด ๊ธ€ ํ…์ŠคํŠธ ๊ฒฐ๊ณผ๋ฅผ ์œ„ํ•œ ํ•จ์ˆ˜ ์ •์˜
29
- def process_file(file):
30
- df = read_excel_data(file)
31
- if df is None:
32
- raise ValueError("Excel ๋ฐ์ดํ„ฐ๋ฅผ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
33
- longest_reviews = extract_longest_reviews(df)
34
- if longest_reviews is None:
35
- raise ValueError("๊ฐ€์žฅ ๊ธด ๋ฆฌ๋ทฐ๋ฅผ ์ถ”์ถœํ•˜๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
36
- result_file = save_to_excel(longest_reviews)
37
- if result_file is None:
38
- raise ValueError("์—‘์…€ ํŒŒ์ผ๋กœ ์ €์žฅํ•˜๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
39
- return result_file
40
-
41
- def analyze_and_initialize_db(file, chunk_size, chunk_overlap, progress=gr.Progress()):
42
- try:
43
- print("ํŒŒ์ผ ์ฒ˜๋ฆฌ ์‹œ์ž‘")
44
- result_file = process_file(file)
45
- list_file_path = [result_file]
46
- print("๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์ดˆ๊ธฐํ™” ์‹œ์ž‘")
47
- vector_db, collection_name, db_status = initialize_database(list_file_path, chunk_size, chunk_overlap, progress)
48
- print("๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
49
- return vector_db, collection_name, db_status, list_file_path, result_file
50
- except Exception as e:
51
- print(f"Database initialization error: {e}")
52
- return None, None, "Failed", None, None
53
-
54
- def long_text_result(file):
55
- try:
56
- print("๊ธด ํ…์ŠคํŠธ ๊ฒฐ๊ณผ ๋ถ„์„ ์‹œ์ž‘")
57
- progress = gr.Progress()
58
- vector_db, collection_name, db_status, list_file_path, result_file = analyze_and_initialize_db(file, 600, 40, progress)
59
- print(f"DB ์ƒํƒœ: {db_status}")
60
- if db_status == "Complete!":
61
- analysis = "๋ถ„์„์™„๋ฃŒ"
62
- # Vector DB์™€ ๊ด€๋ จ chain ์„ค์ •
63
- global chat_chain
64
- memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
65
- prompt_template = PromptTemplate(template="Based on the following document, please provide a relevant response:", input_variables=["context"])
66
- retriever = create_history_aware_retriever(
67
- retriever=vector_db.as_retriever(),
68
- llm=llm_client,
69
- prompt=prompt_template
70
- )
71
- chat_chain = create_retrieval_chain(
72
- retriever=retriever,
73
- llm=llm_client,
74
- memory=memory
75
- )
76
- else:
77
- analysis = "๋ถ„์„ ์‹คํŒจ"
78
- return analysis
79
- except Exception as e:
80
- print(f"Long text result error: {e}")
81
- return "๋ถ„์„ ์‹คํŒจ"
82
-
83
- def chatbot_response(input_text):
84
- try:
85
- response = chat_chain.run(input_text)
86
- return response
87
- except Exception as e:
88
- print(f"Chatbot response error: {e}")
89
- return "์ฑ—๋ด‡ ์‘๋‹ต ์ƒ์„ฑ ์‹คํŒจ"
90
-
91
- # ์—‘์…€ ๋ฐ์ดํ„ฐ ์ฝ๊ธฐ
92
- def read_excel_data(file):
93
- try:
94
- print("์—‘์…€ ๋ฐ์ดํ„ฐ ์ฝ๊ธฐ ์‹œ์ž‘")
95
- df = pd.read_excel(BytesIO(file), usecols="B, C, D, E", skiprows=1, names=["Review Date", "Option", "Review", "ReviewScore"])
96
- df['Review Date'] = pd.to_datetime(df['Review Date']).dt.tz_localize(None).dt.date
97
- df['Year-Month'] = df['Review Date'].astype(str).str.slice(0, 7)
98
- df['Year'] = df['Review Date'].astype(str).str.slice(0, 4)
99
- df['Month'] = df['Review Date'].astype(str).str.slice(5, 7)
100
- df['Day'] = df['Review Date'].astype(str).str.slice(8, 10)
101
- df['Option1'] = df['Option'].str.split(" / ").str[0] # Extract primary option
102
- df['Review Length'] = df['Review'].str.len() # Calculate review length
103
- return df
104
- except Exception as e:
105
- print(f"Error reading Excel data: {e}")
106
- return None
107
-
108
- def extract_longest_reviews(df):
109
- try:
110
- print("๊ธด ๋ฆฌ๋ทฐ ์ถ”์ถœ ์‹œ์ž‘")
111
- longest_reviews = df.groupby('ReviewScore', group_keys=False).apply(lambda x: x.nlargest(100, 'Review Length', keep='all')).reset_index(drop=True)
112
- return longest_reviews.drop(columns=['Review Length', 'Year-Month', 'Year', 'Month', 'Day', 'Option1']) # Drop unnecessary columns
113
- except Exception as e:
114
- print(f"Error extracting longest reviews: {e}")
115
- return None
116
-
117
- def save_to_excel(longest_reviews):
118
- try:
119
- print("์—‘์…€๋กœ ์ €์žฅ ์‹œ์ž‘")
120
- wb = openpyxl.Workbook()
121
- ws = wb.active
122
- ws.title = "๊ธด ๋ฆฌ๋ทฐ ๋‚ด์šฉ"
123
-
124
- for r in dataframe_to_rows(longest_reviews, index=False, header=True):
125
- ws.append(r)
126
- ws.sheet_properties.tabColor = "00FF00" # Green color
127
-
128
- file_path = "๋ฆฌ๋ทฐ๋ถ„์„_๊ธด๋ฆฌ๋ทฐ_๋‹ค์šด๋กœ๋“œ.xlsx"
129
- wb.save(file_path)
130
- return file_path
131
- except Exception as e:
132
- print(f"Error saving to Excel: {e}")
133
- return None
134
-
135
- def create_collection_name(filepath):
136
- try:
137
- collection_name = Path(filepath).stem
138
- collection_name = collection_name.replace(" ","-")
139
- collection_name = unidecode(collection_name)
140
- collection_name = re.sub('[^A-Za-z0-9]+', '-', collection_name)
141
- collection_name = collection_name[:50]
142
- if len(collection_name) < 3:
143
- collection_name = collection_name + 'xyz'
144
- if not collection_name[0].isalnum():
145
- collection_name = 'A' + collection_name[1:]
146
- if not collection_name[-1].isalnum():
147
- collection_name = collection_name[:-1] + 'Z'
148
- return collection_name
149
- except Exception as e:
150
- print(f"Error creating collection name: {e}")
151
- return "default-collection"
152
-
153
- def load_doc(list_file_path, chunk_size, chunk_overlap):
154
- try:
155
- print("๋ฌธ์„œ ๋กœ๋“œ ์‹œ์ž‘")
156
- pages = []
157
- for file_path in list_file_path:
158
- if file_path.endswith('.xlsx'):
159
- df = pd.read_excel(file_path)
160
- for _, row in df.iterrows():
161
- pages.append(Document(page_content=row.to_string()))
162
 
163
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
164
- doc_splits = text_splitter.split_documents(pages)
165
- return doc_splits
166
- except Exception as e:
167
- print(f"Error loading documents: {e}")
168
- return []
169
-
170
- def create_db(splits, collection_name):
171
- try:
172
- print("DB ์ƒ์„ฑ ์‹œ์ž‘")
173
- embedding = HuggingFaceEmbeddings()
174
- new_client = chromadb.EphemeralClient()
175
- vectordb = Chroma.from_documents(
176
- documents=splits,
177
- embedding=embedding,
178
- client=new_client,
179
- collection_name=collection_name,
180
- )
181
- return vectordb
182
- except Exception as e:
183
- print(f"Error creating database: {e}")
184
- return None
185
-
186
- def initialize_database(list_file_path, chunk_size, chunk_overlap, progress=gr.Progress()):
187
- try:
188
- print("๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์ดˆ๊ธฐํ™” ์‹œ์ž‘")
189
- collection_name = create_collection_name(list_file_path[0])
190
- doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
191
- vector_db = create_db(doc_splits, collection_name)
192
- if vector_db:
193
- print("๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์ดˆ๊ธฐํ™” ์„ฑ๊ณต")
194
- return vector_db, collection_name, "Complete!"
195
- else:
196
- print("๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์ƒ์„ฑ ์‹คํŒจ")
197
- return None, collection_name, "Failed"
198
- except Exception as e:
199
- print(f"Error initializing database: {e}")
200
- return None, None, "Failed"
201
-
202
- # ์ƒˆ๋กœ์šด ํƒญ์— ์™ผ์ชฝ ๊ธด ํ…์ŠคํŠธ ๊ฒฐ๊ณผ์™€ ์˜ค๋ฅธ์ชฝ ์ฑ—๋ด‡ ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
203
- iface = gr.Blocks()
204
-
205
- with iface:
206
- with gr.Row():
207
- with gr.Column():
208
- uploaded_file = gr.File(file_count="single", type="binary", label="์—‘์…€ ํŒŒ์ผ ์—…๋กœ๋“œ")
209
- analysis_status = gr.Textbox(label="๋ถ„์„ ์ƒํƒœ[๋ฐ์ดํ„ฐ์— ๋”ฐ๋ผ ์ตœ๋Œ€ 3๋ถ„์ด์ƒ ์‹œ๊ฐ„์ด ๊ฑธ๋ฆด์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.]", value="", lines=1, interactive=False)
210
- long_text_output = gr.Textbox(label="์ƒํ’ˆ์˜ ์žฅ๋‹จ์  10๊ฐ€์ง€๋ฅผ ๋ถ„์„ํ•ด๋“œ๋ฆฝ๋‹ˆ๋‹ค.", lines=27, interactive=False)
211
- uploaded_file.upload(long_text_result, inputs=uploaded_file, outputs=analysis_status)
212
- with gr.Column():
213
- chatbot_input = gr.Textbox(label="์ฑ—๋ด‡ ์ž…๋ ฅ", placeholder="์ด ์ƒํ’ˆ์— ๋Œ€ํ•œ ์ถ”๊ฐ€์ ์ธ ์ž์„ธํ•œ ๋ถ„์„๋‚ด์šฉ์„ ์ฑ—๋ด‡์—๊ฒŒ ์งˆ๋ฌธํ•˜์„ธ์š”.")
214
- chatbot_examples = gr.Dropdown(
215
- ["๊ธฐ๋Šฅ์ ์ธ ๋‚ด์šฉ ์ค‘ ๋งŒ์กฑ/๋ถˆ๋งŒ์กฑ ํ•ญ๋ชฉ์„ 20๊ฐœ์”ฉ ๋ถ„์„ํ•ด์ฃผ์„ธ์š”",
216
- "๋””์ž์ธ์ ์ธ ๋‚ด์šฉ ์ค‘ ๋งŒ์กฑ/๋ถˆ๋งŒ์กฑ ํ•ญ๋ชฉ์„ 20๊ฐœ์”ฉ ๋ถ„์„ํ•ด์ฃผ์„ธ์š”.",
217
- "๊ฐ์„ฑ์ ์ธ ๋‚ด์šฉ ์ค‘ ๋งŒ์กฑ/๋ถˆ๋งŒ์กฑ ํ•ญ๋ชฉ์„ 20๊ฐœ์”ฉ ๋ถ„์„ํ•ด์ฃผ์„ธ์š”.",
218
- "์ถ”๊ฐ€๋กœ 20๊ฐœ ๋” ํ•ด์ฃผ์„ธ์š”."],
219
- label="์ฑ—๋ด‡ ์˜ˆ์‹œํ•ญ๋ชฉ ์„ ํƒ"
220
- )
221
- chatbot_output = gr.Textbox(label="์ฑ—๋ด‡ ์‘๋‹ต", lines=20) # ์‘๋‹ต ์นธ์„ ๊ธธ๊ฒŒ ์„ค์ •
222
-
223
- with gr.Row():
224
- chatbot_button = gr.Button("์ฑ—๋ด‡์—๊ฒŒ ์งˆ๋ฌธํ•˜๊ธฐ")
225
- clear_button = gr.Button("๋ชจ๋‘ ์ง€์šฐ๊ธฐ")
226
-
227
- chatbot_button.click(chatbot_response, inputs=chatbot_input, outputs=chatbot_output)
228
- clear_button.click(fn=lambda: "", inputs=None, outputs=chatbot_output) # ๋ชจ๋‘ ์ง€์šฐ๊ธฐ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ์‘๋‹ต ๋‚ด์šฉ ์ดˆ๊ธฐํ™”
229
- chatbot_examples.change(fn=lambda x: x, inputs=chatbot_examples, outputs=chatbot_input)
230
 
231
- if __name__ == "__main__":
232
- iface.launch()
 
 
1
  import gradio as gr
2
+ from PIL import Image
3
+ import numpy as np
4
+ import cv2
5
  import os
 
6
 
7
+ def image_to_video(image):
8
+ # ์ด๋ฏธ์ง€๋ฅผ NumPy ๋ฐฐ์—ด๋กœ ๋ณ€ํ™˜
9
+ image_array = np.array(image)
10
+
11
+ # ๋น„๋””์˜ค ์ €์žฅ ๊ฒฝ๋กœ์™€ ์ด๋ฆ„ ์„ค์ •
12
+ output_path = '/mnt/data/output_video.mp4'
13
+
14
+ # ๋น„๋””์˜ค ๋ผ์ดํ„ฐ ์„ค์ •
15
+ height, width, layers = image_array.shape
16
+ size = (width, height)
17
+ video = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), 1, size)
18
+
19
+ # ์ด๋ฏธ์ง€๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ 5์ดˆ ๋™์•ˆ์˜ ๋น„๋””์˜ค ์ƒ์„ฑ (5 ํ”„๋ ˆ์ž„, ๊ฐ๊ฐ 1์ดˆ ์ง€์†)
20
+ for _ in range(5): # 5 ํ”„๋ ˆ์ž„ ์ƒ์„ฑ
21
+ video.write(image_array)
22
+
23
+ # ๋น„๋””์˜ค ์ž‘์„ฑ ์ข…๋ฃŒ
24
+ video.release()
25
+
26
+ return output_path
27
+
28
+ def setup_interface():
29
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
30
+ with gr.Blocks() as demo:
31
+ gr.Markdown("### ์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด 5์ดˆ์งœ๋ฆฌ ๋น„๋””์˜ค๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ with gr.Row():
34
+ image_input = gr.Image(type="pil")
35
+ video_output = gr.Video(label="์ƒ์„ฑ๋œ ๋น„๋””์˜ค")
36
+
37
+ image_input.change(image_to_video, inputs=image_input, outputs=video_output)
38
+
39
+ return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ # ์ธํ„ฐํŽ˜์ด์Šค ์‹คํ–‰
42
+ demo = setup_interface()
43
+ demo.launch()