Compare-RAG-CHAT

Sleeping

App Files Files Community

openfree commited on 6 days ago

Commit

ea4e802

verified ·

1 Parent(s): 275364c

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -12

app.py CHANGED Viewed

@@ -11,22 +11,52 @@ import pymupdf
 ###############################
 # 환경 설정
 ###############################
 os.system('pip uninstall -y magic-pdf')
 os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
 os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
-os.system('python download_models_hf.py')
-with open('/home/user/magic-pdf.json', 'r') as file:
-    data = json.load(file)
-data['device-mode'] = "cuda"
 if os.getenv('apikey'):
     data['llm-aided-config']['title_aided']['api_key'] = os.getenv('apikey')
     data['llm-aided-config']['title_aided']['enable'] = True
-with open('/home/user/magic-pdf.json', 'w') as file:
     json.dump(data, file, indent=4)
 os.system('cp -r paddleocr /home/user/.paddleocr')
 ###############################
@@ -145,7 +175,7 @@ def replace_image_with_base64(markdown_text, image_dir_path):
 def to_pdf(file_path):
     """
     이미지(JPG/PNG 등)를 PDF로 컨버팅.
-    TXT, CSV 파일인 경우 변환 없이 원본 경로를 반환한다.
     """
     ext = Path(file_path).suffix.lower()
     if ext in ['.txt', '.csv']:
@@ -286,7 +316,7 @@ def format_chat_history(messages: list) -> list:
 def convert_chat_messages_to_gradio_format(messages):
     """
-    ChatMessage list -> [ (유저발화, 봇응답), (...), ... ]
     """
     gradio_chat = []
     user_text, assistant_text = None, None
@@ -309,7 +339,7 @@ def convert_chat_messages_to_gradio_format(messages):
 def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
     """
-    Gemini 응답 스트리밍 (user_message가 공백이면 기본 문구로 대체)
     """
     if not user_message.strip():
         user_message = "...(No content from user)..."
@@ -358,12 +388,12 @@ def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
         print(f"\n=== [Gemini] Final Response ===\n{response_buffer}")
     except Exception as e:
         print(f"\n=== [Gemini] Error ===\n{str(e)}")
-        messages.append(ChatMessage(role="assistant", content=f"I encountered an error: {str(e)}"))
         yield convert_chat_messages_to_gradio_format(messages)
 def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
     """
-    입력 프롬프트와 변환된 문서를 활용해 질문을 구성
     """
     if doc_text.strip():
         user_query = f"다음 문서를 참고하여 답변:\n\n{doc_text}\n\n질문: {msg}"
@@ -374,7 +404,7 @@ def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
 def reset_states(file_a, file_b):
     """
-    새 파일 업로드 시 chat_history와 md_state를 초기화
     """
     return [], "", ""
@@ -394,7 +424,7 @@ if __name__ == "__main__":
             # 변환 결과를 보여줄 visible Markdown 컴포넌트
             conversion_md = gr.Markdown(label="변환 결과", visible=True)
-            md_state = gr.State("")      # 내부 상태 (문서 변환 결과)
             chat_history = gr.State([])  # ChatMessage 리스트
             # visible Chatbot 컴포넌트
@@ -463,4 +493,5 @@ if __name__ == "__main__":
                 outputs=[chat_history, md_state, chatbot]
             )
     demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, ssr_mode=True)

 ###############################
 # 환경 설정
 ###############################
+# 필요 없다면 주석 처리 가능
 os.system('pip uninstall -y magic-pdf')
 os.system('pip install git+https://github.com/opendatalab/MinerU.git@dev')
 os.system('wget https://github.com/opendatalab/MinerU/raw/dev/scripts/download_models_hf.py -O download_models_hf.py')
+# 모델 다운로드 (네트워크가 없는 환경이라면 try/except로 묶거나 주석 처리)
+try:
+    os.system('python download_models_hf.py')
+except Exception as e:
+    print("모델 다운로드 중 에러가 발생했습니다. 네트워크 연결을 확인하거나, 수동으로 모델을 배치하세요.")
+    print("에러 메시지:", e)
+###############################
+# magic-pdf.json 처리
+###############################
+json_path = "/home/user/magic-pdf.json"
+if os.path.exists(json_path):
+    # 기존에 파일이 있으면 로드
+    with open(json_path, 'r', encoding='utf-8') as file:
+        data = json.load(file)
+else:
+    # 없으면 기본값 생성
+    data = {
+        "device-mode": "cuda",  # CPU만 쓰려면 "cpu"
+        "llm-aided-config": {
+            "title_aided": {
+                "api_key": os.getenv('apikey', ""),
+                "enable": bool(os.getenv('apikey'))
+            }
+        }
+    }
+    # 파일 생성 (필요 없으면 생략)
+    with open(json_path, 'w', encoding='utf-8') as file:
+        json.dump(data, file, indent=4)
+# 이후 device-mode나 llm-aided-config 필요한 경우 수정
+data['device-mode'] = "cuda"  # 원하는 디바이스로 세팅
 if os.getenv('apikey'):
     data['llm-aided-config']['title_aided']['api_key'] = os.getenv('apikey')
     data['llm-aided-config']['title_aided']['enable'] = True
+# 변경사항 다시 저장
+with open(json_path, 'w', encoding='utf-8') as file:
     json.dump(data, file, indent=4)
+# paddleocr 복사
 os.system('cp -r paddleocr /home/user/.paddleocr')
 ###############################
 def to_pdf(file_path):
     """
     이미지(JPG/PNG 등)를 PDF로 컨버팅.
+    TXT, CSV 파일인 경우 변환 없이 원본 경로를 그대로 반환.
     """
     ext = Path(file_path).suffix.lower()
     if ext in ['.txt', '.csv']:
 def convert_chat_messages_to_gradio_format(messages):
     """
+    ChatMessage list -> [(유저발화, 봇응답), ...] 형태로 변환
     """
     gradio_chat = []
     user_text, assistant_text = None, None
 def stream_gemini_response(user_message: str, messages: list) -> Iterator[list]:
     """
+    Gemini 응답을 스트리밍 형태로 출력 (user_message 공백 시 임시 문구 사용)
     """
     if not user_message.strip():
         user_message = "...(No content from user)..."
         print(f"\n=== [Gemini] Final Response ===\n{response_buffer}")
     except Exception as e:
         print(f"\n=== [Gemini] Error ===\n{str(e)}")
+        messages.append(ChatMessage(role="assistant", content=f"오류가 발생했습니다: {str(e)}"))
         yield convert_chat_messages_to_gradio_format(messages)
 def user_message(msg: str, history: list, doc_text: str) -> tuple[str, list]:
     """
+    문서 변환 결과(문자열)와 함께 질의를 결합하여 history에 추가
     """
     if doc_text.strip():
         user_query = f"다음 문서를 참고하여 답변:\n\n{doc_text}\n\n질문: {msg}"
 def reset_states(file_a, file_b):
     """
+    새 파일 업로드 시 chat_history, md_state, chatbot을 초기화
     """
     return [], "", ""
             # 변환 결과를 보여줄 visible Markdown 컴포넌트
             conversion_md = gr.Markdown(label="변환 결과", visible=True)
+            md_state = gr.State("")      # 내부 상태 (문서 변환 결과 저장)
             chat_history = gr.State([])  # ChatMessage 리스트
             # visible Chatbot 컴포넌트
                 outputs=[chat_history, md_state, chatbot]
             )
+    # 로컬 서버 실행
     demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, ssr_mode=True)