Compare-RAG-CHAT

Sleeping

App Files Files Community

openfree commited on 7 days ago

Commit

a03145d

verified ·

1 Parent(s): ea4e802

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -36

app.py CHANGED Viewed

@@ -28,7 +28,7 @@ except Exception as e:
 ###############################
 json_path = "/home/user/magic-pdf.json"
 if os.path.exists(json_path):
-    # 기존에 파일이 있으면 로드
     with open(json_path, 'r', encoding='utf-8') as file:
         data = json.load(file)
 else:
@@ -42,17 +42,15 @@ else:
             }
         }
     }
-    # 파일 생성 (필요 없으면 생략)
     with open(json_path, 'w', encoding='utf-8') as file:
         json.dump(data, file, indent=4)
-# 이후 device-mode나 llm-aided-config 필요한 경우 수정
-data['device-mode'] = "cuda"  # 원하는 디바이스로 세팅
 if os.getenv('apikey'):
     data['llm-aided-config']['title_aided']['api_key'] = os.getenv('apikey')
     data['llm-aided-config']['title_aided']['enable'] = True
-# 변경사항 다시 저장
 with open(json_path, 'w', encoding='utf-8') as file:
     json.dump(data, file, indent=4)
@@ -60,14 +58,14 @@ with open(json_path, 'w', encoding='utf-8') as file:
 os.system('cp -r paddleocr /home/user/.paddleocr')
 ###############################
-# 그 외 라이브러리
 ###############################
 import gradio as gr
 from loguru import logger
 from gradio_pdf import PDF
 ###############################
-# magic_pdf 관련 모듈
 ###############################
 from magic_pdf.data.data_reader_writer import FileBasedDataReader
 from magic_pdf.libs.hash_utils import compute_sha256
@@ -174,8 +172,8 @@ def replace_image_with_base64(markdown_text, image_dir_path):
 def to_pdf(file_path):
     """
-    이미지(JPG/PNG 등)를 PDF로 컨버팅.
-    TXT, CSV 파일인 경우 변환 없이 원본 경로를 그대로 반환.
     """
     ext = Path(file_path).suffix.lower()
     if ext in ['.txt', '.csv']:
@@ -193,8 +191,7 @@ def to_pdf(file_path):
 def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
     """
-    업로드된 PDF/이미지 또는 TXT/CSV -> 마크다운 변환
-    (프로그레스 바 표시용)
     """
     ext = Path(file_path).suffix.lower()
     if ext in ['.txt', '.csv']:
@@ -238,9 +235,7 @@ def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table
 def to_markdown_comparison(file_a, file_b, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
     """
-    두 개의 파일을 변환하여 A/B 비교용 마크다운 생성.
-    각 파일은 "문서 A", "문서 B" 헤더로 구분되며,
-    두 파일 모두 업로드된 경우 추가로 비교 분석 지시사항을 포함한다.
     """
     combined_md = ""
     if file_a is not None:
@@ -307,6 +302,7 @@ def format_chat_history(messages: list) -> list:
     """
     formatted_history = []
     for message in messages:
         if not (message.role == "assistant" and hasattr(message, "metadata")):
             formatted_history.append({
                 "role": "user" if message.role == "user" else "assistant",
@@ -326,7 +322,7 @@ def convert_chat_messages_to_gradio_format(messages):
                 gradio_chat.append((user_text or "", assistant_text or ""))
             user_text = msg.content
             assistant_text = None
-        else:
             if user_text is None:
                 user_text = ""
             if assistant_text is None:
@@ -337,20 +333,22 @@ def convert_chat_messages_to_gradio_format(messages):
         gradio_chat.append((user_text or "", assistant_text or ""))
     return gradio_chat
-def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
     """
-    Gemini 응답을 스트리밍 형태로 출력 (user_message 공백 시 임시 문구 사용)
     """
-    if not user_message.strip():
-        user_message = "...(No content from user)..."
     try:
-        print(f"\n=== [Gemini] New Request ===\nUser message: '{user_message}'")
         chat_history = format_chat_history(messages)
         chat = model.start_chat(history=chat_history)
-        response = chat.send_message(user_message, stream=True)
         thought_buffer = ""
         response_buffer = ""
         thinking_complete = False
         # "Thinking" 역할 추가
         messages.append(
             ChatMessage(
@@ -360,10 +358,13 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
             )
         )
         yield convert_chat_messages_to_gradio_format(messages)
         for chunk in response:
             parts = chunk.candidates[0].content.parts
             current_chunk = parts[0].text
             if len(parts) == 2 and not thinking_complete:
                 thought_buffer += current_chunk
                 messages[-1] = ChatMessage(
                     role="assistant",
@@ -371,13 +372,17 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
                     metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                 )
                 yield convert_chat_messages_to_gradio_format(messages)
                 response_buffer = parts[1].text
                 messages.append(ChatMessage(role="assistant", content=response_buffer))
                 thinking_complete = True
             elif thinking_complete:
                 response_buffer += current_chunk
                 messages[-1] = ChatMessage(role="assistant", content=response_buffer)
             else:
                 thought_buffer += current_chunk
                 messages[-1] = ChatMessage(
                     role="assistant",
@@ -385,26 +390,42 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
                     metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                 )
             yield convert_chat_messages_to_gradio_format(messages)
         print(f"\n=== [Gemini] Final Response ===\n{response_buffer}")
     except Exception as e:
         print(f"\n=== [Gemini] Error ===\n{str(e)}")
         messages.append(ChatMessage(role="assistant", content=f"오류가 발생했습니다: {str(e)}"))
         yield convert_chat_messages_to_gradio_format(messages)
-def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
     """
-    문서 변환 결과(문자열)와 함께 질의를 결합하여 history에 추가
     """
     if doc_text.strip():
         user_query = f"다음 문서를 참고하여 답변:\n\n{doc_text}\n\n질문: {msg}"
     else:
         user_query = msg
     history.append(ChatMessage(role="user", content=user_query))
-    return "", history
 def reset_states(file_a, file_b):
     """
-    새 파일 업로드 시 chat_history, md_state, chatbot을 초기화
     """
     return [], "", ""
@@ -421,13 +442,12 @@ if __name__ == "__main__":
                 한 파일만 업로드하면 해당 파일로 분석합니다.</p>
             </div>
             """)
-            # 변환 결과를 보여줄 visible Markdown 컴포넌트
             conversion_md = gr.Markdown(label="변환 결과", visible=True)
-            md_state = gr.State("")      # 내부 상태 (문서 변환 결과 저장)
             chat_history = gr.State([])  # ChatMessage 리스트
-            # visible Chatbot 컴포넌트
             chatbot = gr.Chatbot(visible=True)
             with gr.Row():
@@ -474,24 +494,25 @@ if __name__ == "__main__":
                 chat_input = gr.Textbox(lines=1, placeholder="질문을 입력하세요...")
                 clear_btn = gr.Button("대화 초기화")
             chat_input.submit(
                 fn=user_message,
                 inputs=[chat_input, chat_history, conversion_md],
-                outputs=[chat_input, chat_history]
             ).then(
                 fn=stream_gemini_response,
-                inputs=[chat_input, chat_history],
                 outputs=chatbot
             )
-            def clear_all():
-                return [], "", ""
             clear_btn.click(
                 fn=clear_all,
                 inputs=[],
                 outputs=[chat_history, md_state, chatbot]
             )
-    # 로컬 서버 실행
-    demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, ssr_mode=True)

 ###############################
 json_path = "/home/user/magic-pdf.json"
 if os.path.exists(json_path):
+    # 기존 파일 로드
     with open(json_path, 'r', encoding='utf-8') as file:
         data = json.load(file)
 else:
             }
         }
     }
     with open(json_path, 'w', encoding='utf-8') as file:
         json.dump(data, file, indent=4)
+# 필요 시 업데이트
+data['device-mode'] = "cuda"  # "cpu" 등으로 수정 가능
 if os.getenv('apikey'):
     data['llm-aided-config']['title_aided']['api_key'] = os.getenv('apikey')
     data['llm-aided-config']['title_aided']['enable'] = True
 with open(json_path, 'w', encoding='utf-8') as file:
     json.dump(data, file, indent=4)
 os.system('cp -r paddleocr /home/user/.paddleocr')
 ###############################
+# 기타 라이브러리
 ###############################
 import gradio as gr
 from loguru import logger
 from gradio_pdf import PDF
 ###############################
+# magic_pdf 관련
 ###############################
 from magic_pdf.data.data_reader_writer import FileBasedDataReader
 from magic_pdf.libs.hash_utils import compute_sha256
 def to_pdf(file_path):
     """
+    이미지(JPG/PNG 등)를 PDF로 변환.
+    TXT, CSV 파일이면 그대로 경로 반환.
     """
     ext = Path(file_path).suffix.lower()
     if ext in ['.txt', '.csv']:
 def to_markdown(file_path, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
     """
+    업로드된 PDF/이미지/TXT/CSV -> 마크다운 변환
     """
     ext = Path(file_path).suffix.lower()
     if ext in ['.txt', '.csv']:
 def to_markdown_comparison(file_a, file_b, end_pages, is_ocr, layout_mode, formula_enable, table_enable, language, progress=gr.Progress(track_tqdm=False)):
     """
+    두 파일을 변환 -> A/B 비교용 마크다운
     """
     combined_md = ""
     if file_a is not None:
     """
     formatted_history = []
     for message in messages:
+        # Thinking 역할(assistant+metadata)은 제외
         if not (message.role == "assistant" and hasattr(message, "metadata")):
             formatted_history.append({
                 "role": "user" if message.role == "user" else "assistant",
                 gradio_chat.append((user_text or "", assistant_text or ""))
             user_text = msg.content
             assistant_text = None
+        else:  # assistant
             if user_text is None:
                 user_text = ""
             if assistant_text is None:
         gradio_chat.append((user_text or "", assistant_text or ""))
     return gradio_chat
+def stream_gemini_response(user_query: str, messages: list) -> Iterator[list]:
     """
+    Gemini 응답 스트리밍
     """
+    if not user_query.strip():
+        user_query = "...(No content from user)..."
     try:
+        print(f"\n=== [Gemini] New Request ===\nUser message: '{user_query}'")
         chat_history = format_chat_history(messages)
         chat = model.start_chat(history=chat_history)
+        response = chat.send_message(user_query, stream=True)
         thought_buffer = ""
         response_buffer = ""
         thinking_complete = False
         # "Thinking" 역할 추가
         messages.append(
             ChatMessage(
             )
         )
         yield convert_chat_messages_to_gradio_format(messages)
         for chunk in response:
             parts = chunk.candidates[0].content.parts
             current_chunk = parts[0].text
             if len(parts) == 2 and not thinking_complete:
+                # 첫 번째 파트 = 'Thinking'
                 thought_buffer += current_chunk
                 messages[-1] = ChatMessage(
                     role="assistant",
                     metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                 )
                 yield convert_chat_messages_to_gradio_format(messages)
+                # 두 번째 파트 = 최종 답변
                 response_buffer = parts[1].text
                 messages.append(ChatMessage(role="assistant", content=response_buffer))
                 thinking_complete = True
             elif thinking_complete:
+                # 이미 최종 답변 들어간 상태
                 response_buffer += current_chunk
                 messages[-1] = ChatMessage(role="assistant", content=response_buffer)
             else:
+                # 아직 'Thinking' 중
                 thought_buffer += current_chunk
                 messages[-1] = ChatMessage(
                     role="assistant",
                     metadata={"title": "⚙️ Thinking: *The thoughts produced by the model are experimental"}
                 )
             yield convert_chat_messages_to_gradio_format(messages)
         print(f"\n=== [Gemini] Final Response ===\n{response_buffer}")
     except Exception as e:
         print(f"\n=== [Gemini] Error ===\n{str(e)}")
         messages.append(ChatMessage(role="assistant", content=f"오류가 발생했습니다: {str(e)}"))
         yield convert_chat_messages_to_gradio_format(messages)
+def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list, str]:
     """
+    - msg: 유저가 입력창에 입력한 텍스트
+    - doc_text: 변환된 문서 (conversion_md)
+    - history: ChatMessage 리스트
+    return:
+      (1) UI 입력창을 비울 값 (""),
+      (2) 업데이트된 history,
+      (3) 실제 LLM에 전달할 user_query
     """
     if doc_text.strip():
         user_query = f"다음 문서를 참고하여 답변:\n\n{doc_text}\n\n질문: {msg}"
     else:
         user_query = msg
     history.append(ChatMessage(role="user", content=user_query))
+    return "", history, user_query
 def reset_states(file_a, file_b):
     """
+    새 파일 업로드 시 대화초기화
+    """
+    return [], "", ""
+def clear_all():
+    """
+    대화 전체 초기화
     """
     return [], "", ""
                 한 파일만 업로드하면 해당 파일로 분석합니다.</p>
             </div>
             """)
             conversion_md = gr.Markdown(label="변환 결과", visible=True)
+            md_state = gr.State("")      # 문서 변환 결과
             chat_history = gr.State([])  # ChatMessage 리스트
+            user_query_holder = gr.State("")  # (수정) user_query 임시 저장
             chatbot = gr.Chatbot(visible=True)
             with gr.Row():
                 chat_input = gr.Textbox(lines=1, placeholder="질문을 입력하세요...")
                 clear_btn = gr.Button("대화 초기화")
+            # (수정) user_message -> (chat_input, chat_history, conversion_md)
+            # => outputs=[chat_input, chat_history, user_query_holder]
+            # 이 중 user_query_holder(실제 질의)는 stream_gemini_response로 전달
             chat_input.submit(
                 fn=user_message,
                 inputs=[chat_input, chat_history, conversion_md],
+                outputs=[chat_input, chat_history, user_query_holder]
             ).then(
                 fn=stream_gemini_response,
+                inputs=[user_query_holder, chat_history],
                 outputs=chatbot
             )
             clear_btn.click(
                 fn=clear_all,
                 inputs=[],
                 outputs=[chat_history, md_state, chatbot]
             )
+    # demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, ssr_mode=True)
+    # 공유 링크를 원할 경우 share=True 설정
+    demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, ssr_mode=True, share=False)