Robo-Beam

Running

App Files Files Community

seawolf2357 commited on Mar 16

Commit

8fa1eef

1 Parent(s): 039ce92

Update app.py

Browse files

Files changed (1) hide show

app.py +126 -63

app.py CHANGED Viewed

@@ -14,8 +14,13 @@ from loguru import logger
 from PIL import Image
 from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
-# [PDF] PyPDF2 추가
-import PyPDF2
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
@@ -26,6 +31,51 @@ model = Gemma3ForConditionalGeneration.from_pretrained(
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
@@ -52,15 +102,20 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
 def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     """
-    이미지/비디오 개수와 혼합 여부 등을 검사하는 함수.
-    PDF는 검사 로직에서 제외하여 업로드만 허용.
     """
-    # [PDF] PDF 파일 제외 처리
-    pdf_files = [f for f in message["files"] if f.endswith(".pdf")]
-    non_pdf_files = [f for f in message["files"] if not f.endswith(".pdf")]
-    # 기존 로직은 non_pdf_files(= 이미지/비디오)에 대해서만 체크
-    new_image_count, new_video_count = count_files_in_new_message(non_pdf_files)
     history_image_count, history_video_count = count_files_in_history(history)
     image_count = history_image_count + new_image_count
     video_count = history_video_count + new_video_count
@@ -75,25 +130,19 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
         if "<image>" in message["text"]:
             gr.Warning("Using <image> tags with video files is not supported.")
             return False
-        # TODO: Add frame count validation for videos similar to image count limits  # noqa: FIX002, TD002, TD003
     if video_count == 0 and image_count > MAX_NUM_IMAGES:
         gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         return False
-    # [PDF] PDF 갯수 제한(필요하다면)도 추가 가능
-    # 일단 제한은 두지 않고 바로 True 반환
-    # <image> 태그가 있을 경우, 이미지 개수와 매칭 검사
-    if "<image>" in message["text"]:
-        # new_image_count는 pdf 제외된 이미지 수
-        if message["text"].count("<image>") != new_image_count:
-            gr.Warning("The number of <image> tags in the text does not match the number of images.")
-            return False
     return True
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
@@ -128,6 +177,9 @@ def process_video(video_path: str) -> list[dict]:
     return content
 def process_interleaved_images(message: dict) -> list[dict]:
     logger.debug(f"{message['files']=}")
     parts = re.split(r"(<image>)", message["text"])
@@ -149,52 +201,40 @@ def process_interleaved_images(message: dict) -> list[dict]:
     return content
-# [PDF] PDF -> Markdown 변환 함수 추가
-def pdf_to_markdown(pdf_path: str) -> str:
-    """
-    PDF 파일을 텍스트로 추출 후, 간단한 Markdown 형태로 반환.
-    """
-    text_chunks = []
-    with open(pdf_path, "rb") as f:
-        reader = PyPDF2.PdfReader(f)
-        for page_num, page in enumerate(reader.pages, start=1):
-            page_text = page.extract_text()
-            page_text = page_text.strip() if page_text else ""
-            if page_text:
-                # 페이지별로 간단한 헤더와 본문을 Markdown으로 합침
-                text_chunks.append(f"## Page {page_num}\n\n{page_text}\n")
-    return "\n".join(text_chunks)
 def process_new_user_message(message: dict) -> list[dict]:
     """
-    새 user message에서 text, 파일(이미지/비디오/PDF)을 처리.
     """
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}]
-    # [PDF] PDF 파일 목록
-    pdf_files = [f for f in message["files"] if f.endswith(".pdf")]
-    # 이미지·비디오 목록
-    other_files = [f for f in message["files"] if not f.endswith(".pdf")]
-    # 일단 사용자의 text를 가장 먼저 넣는다
     content_list = [{"type": "text", "text": message["text"]}]
-    # PDF 변환 후 추가
-    for pdf_path in pdf_files:
-        pdf_markdown = pdf_to_markdown(pdf_path)
-        if pdf_markdown.strip():
-            content_list.append({"type": "text", "text": pdf_markdown})
-        else:
-            content_list.append({"type": "text", "text": "(PDF에서 텍스트 추출 실패)"})
-    # 영상이 있는지 확인
-    video_files = [f for f in other_files if f.endswith(".mp4")]
     if video_files:
-        # 비디오는 한 개만 처리한다는 전제 (validate_media_constraints에서 이미 검사)
-        # 여러 개일 경우 첫 번째 것만 처리하거나, 경고 처리
         content_list += process_video(video_files[0])
         return content_list
@@ -203,13 +243,16 @@ def process_new_user_message(message: dict) -> list[dict]:
         return process_interleaved_images(message)
     # 일반 이미지(여러 장)
-    image_files = [f for f in other_files if not f.endswith(".mp4")]
     if image_files:
-        content_list += [{"type": "image", "url": path} for path in image_files]
     return content_list
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
     current_user_content: list[dict] = []
@@ -228,6 +271,9 @@ def process_history(history: list[dict]) -> list[dict]:
     return messages
 @spaces.GPU(duration=120)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message, history):
@@ -263,6 +309,9 @@ def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tok
         yield output
 examples = [
     [
         {
@@ -386,21 +435,35 @@ examples = [
 ]
-# [PDF] .pdf 허용
 demo = gr.ChatInterface(
     fn=run,
     type="messages",
     chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
     textbox=gr.MultimodalTextbox(
-        file_types=["image", ".mp4", ".pdf"],  # [PDF] 허용
         file_count="multiple",
         autofocus=True
     ),
     multimodal=True,
     additional_inputs=[
-        gr.Textbox(label="System Prompt", value="ou are a deeply thoughtful AI. Consider problems thoroughly and derive correct solutions through systematic reasoning. Please answer in korean."),
-        gr.Slider(label="Max New Tokens", minimum=100, maximum=8000, step=50, value=2000),
     ],
     stop_btn=False,
     title="Gemma 3 27B IT",

 from PIL import Image
 from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
+# [CSV/TXT 분석용]
+import pandas as pd
+##################################################
+# 전체 전문을 넘기되, 너무 클 경우 잘라내기 위한 상수
+##################################################
+MAX_CONTENT_CHARS = 8000  # 예: 8000자 초과 시 잘라냄
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
+##################################################
+# CSV/TXT 전문 처리 함수
+##################################################
+def analyze_csv_file(path: str) -> str:
+    """
+    CSV 파일 전체를 문자열로 변환하여 리턴.
+    너무 길면 MAX_CONTENT_CHARS까지만 잘라냄.
+    """
+    try:
+        df = pd.read_csv(path)
+        df_str = df.to_string()
+        if len(df_str) > MAX_CONTENT_CHARS:
+            df_str = df_str[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
+        return (
+            f"**[CSV File: {os.path.basename(path)}]**\n\n"
+            f"{df_str}"
+        )
+    except Exception as e:
+        return f"Failed to read CSV ({os.path.basename(path)}): {str(e)}"
+def analyze_txt_file(path: str) -> str:
+    """
+    TXT 파일 전체 내용을 읽어서 모델에 넘김.
+    너무 길면 MAX_CONTENT_CHARS까지만 잘라냄.
+    """
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            text = f.read()
+        if len(text) > MAX_CONTENT_CHARS:
+            text = text[:MAX_CONTENT_CHARS] + "\n...(truncated)..."
+        return (
+            f"**[TXT File: {os.path.basename(path)}]**\n\n"
+            f"{text}"
+        )
+    except Exception as e:
+        return f"Failed to read TXT ({os.path.basename(path)}): {str(e)}"
+##################################################
+# 기존 미디어 파일 검사 로직 (이미지/비디오)
+##################################################
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
 def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     """
+    - 비디오 1개 초과 불가
+    - 비디오/이미지 혼합 불가
+    - 이미지 개수 MAX_NUM_IMAGES 초과 불가
+    - <image> 태그가 있으면 태그 수와 이미지 수 일치
+    CSV, TXT, PDF 등은 여기서 제한하지 않음.
     """
+    media_files = []
+    for f in message["files"]:
+        # mp4나 대표 이미지 확장자만 검사
+        # (파일명에 .png / .jpg / .gif / .webp 등 있을 때)
+        if f.endswith(".mp4") or re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE):
+            media_files.append(f)
+    new_image_count, new_video_count = count_files_in_new_message(media_files)
     history_image_count, history_video_count = count_files_in_history(history)
     image_count = history_image_count + new_image_count
     video_count = history_video_count + new_video_count
         if "<image>" in message["text"]:
             gr.Warning("Using <image> tags with video files is not supported.")
             return False
     if video_count == 0 and image_count > MAX_NUM_IMAGES:
         gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         return False
+    if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
+        gr.Warning("The number of <image> tags in the text does not match the number of images.")
+        return False
     return True
+##################################################
+# 비디오 처리
+##################################################
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
     return content
+##################################################
+# interleaved <image> 태그 처리
+##################################################
 def process_interleaved_images(message: dict) -> list[dict]:
     logger.debug(f"{message['files']=}")
     parts = re.split(r"(<image>)", message["text"])
     return content
+##################################################
+# CSV, TXT 파일도 전문을 LLM에 넘기도록
+##################################################
 def process_new_user_message(message: dict) -> list[dict]:
     """
+    - mp4 -> 비디오 처리
+    - 이미지 -> interleaved or multiple
+    - CSV -> 전체 df.to_string() (너무 길면 잘라냄)
+    - TXT -> 전체 text (너무 길면 잘라냄)
     """
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}]
+    # 확장자별 분류
+    video_files = [f for f in message["files"] if f.endswith(".mp4")]
+    image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
+    csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
+    txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
+    # 사용자 텍스트
     content_list = [{"type": "text", "text": message["text"]}]
+    # CSV 전문
+    for csv_path in csv_files:
+        csv_analysis = analyze_csv_file(csv_path)
+        content_list.append({"type": "text", "text": csv_analysis})
+    # TXT 전문
+    for txt_path in txt_files:
+        txt_analysis = analyze_txt_file(txt_path)
+        content_list.append({"type": "text", "text": txt_analysis})
+    # 비디오
     if video_files:
         content_list += process_video(video_files[0])
         return content_list
         return process_interleaved_images(message)
     # 일반 이미지(여러 장)
     if image_files:
+        for img_path in image_files:
+            content_list.append({"type": "image", "url": img_path})
     return content_list
+##################################################
+# history -> LLM 메시지 변환
+##################################################
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
     current_user_content: list[dict] = []
     return messages
+##################################################
+# 메인 추론 함수
+##################################################
 @spaces.GPU(duration=120)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message, history):
         yield output
+##################################################
+# 예시 목록 (기존)
+##################################################
 examples = [
     [
         {
 ]
+##################################################
+# Gradio ChatInterface
+##################################################
 demo = gr.ChatInterface(
     fn=run,
     type="messages",
     chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
+    # 여기서 WEBP를 포함한 모든 이미지, mp4, csv, txt, pdf 허용
     textbox=gr.MultimodalTextbox(
+        file_types=["image/*", ".mp4", ".csv", ".txt", ".pdf"],
         file_count="multiple",
         autofocus=True
     ),
     multimodal=True,
     additional_inputs=[
+        gr.Textbox(
+            label="System Prompt",
+            value=(
+                "You are a deeply thoughtful AI. Consider problems thoroughly and derive "
+                "correct solutions through systematic reasoning. Please answer in korean."
+            )
+        ),
+        gr.Slider(
+            label="Max New Tokens",
+            minimum=100,
+            maximum=8000,
+            step=50,
+            value=2000
+        ),
     ],
     stop_btn=False,
     title="Gemma 3 27B IT",