Robo-Beam

Running

App Files Files Community

seawolf2357 commited on Mar 16

Commit

77f7fca

1 Parent(s): c80880c

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -57

app.py CHANGED Viewed

@@ -16,13 +16,15 @@ from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIter
 # CSV/TXT 분석
 import pandas as pd
-# PDF 텍스트 추출
 import PyPDF2
-MAX_CONTENT_CHARS = 8000  # 너무 큰 파일을 막기 위해 최대 표시 8000자
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 model = Gemma3ForConditionalGeneration.from_pretrained(
     model_id,
@@ -35,12 +37,10 @@ MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5"))
 ##################################################
-# CSV, TXT, PDF 분석 함수
 ##################################################
 def analyze_csv_file(path: str) -> str:
-    """
-    CSV 파일을 전체 문자열로 변환. 너무 길 경우 일부만 표시.
-    """
     try:
         df = pd.read_csv(path)
         df_str = df.to_string()
@@ -52,9 +52,7 @@ def analyze_csv_file(path: str) -> str:
 def analyze_txt_file(path: str) -> str:
-    """
-    TXT 파일 전문 읽기. 너무 길면 일부만 표시.
-    """
     try:
         with open(path, "r", encoding="utf-8") as f:
             text = f.read()
@@ -66,9 +64,7 @@ def analyze_txt_file(path: str) -> str:
 def pdf_to_markdown(pdf_path: str) -> str:
-    """
-    PDF → Markdown. 페이지별로 간단히 텍스트 추출.
-    """
     text_chunks = []
     try:
         with open(pdf_path, "rb") as f:
@@ -89,7 +85,7 @@ def pdf_to_markdown(pdf_path: str) -> str:
 ##################################################
-# 이미지/비디오 업로드 제한 검사
 ##################################################
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
@@ -106,8 +102,10 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
     image_count = 0
     video_count = 0
     for item in history:
         if item["role"] != "user" or isinstance(item["content"], str):
             continue
         if item["content"][0].endswith(".mp4"):
             video_count += 1
         else:
@@ -117,17 +115,13 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]:
 def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     """
-    - 비디오 1개 초과 불가
-    - 비디오와 이미지 혼합 불가
-    - 이미지 개수 MAX_NUM_IMAGES 초과 불가
-    - <image> 태그가 있으면 태그 수와 실제 이미지 수 일치
-    - CSV, TXT, PDF 등은 여기서 제한하지 않음
     """
     media_files = []
     for f in message["files"]:
-        # 이미지: png/jpg/jpeg/gif/webp
-        # 비디오: mp4
-        # cf) PDF, CSV, TXT 등은 제외
         if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
             media_files.append(f)
@@ -136,9 +130,11 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     image_count = history_image_count + new_image_count
     video_count = history_video_count + new_video_count
     if video_count > 1:
         gr.Warning("Only one video is supported.")
         return False
     if video_count == 1:
         if image_count > 0:
             gr.Warning("Mixing images and videos is not allowed.")
@@ -146,9 +142,11 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
         if "<image>" in message["text"]:
             gr.Warning("Using <image> tags with video files is not supported.")
             return False
     if video_count == 0 and image_count > MAX_NUM_IMAGES:
         gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         return False
     if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
         gr.Warning("The number of <image> tags in the text does not match the number of images.")
         return False
@@ -157,16 +155,16 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool:
 ##################################################
-# 비디오 처리
 ##################################################
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
     total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
     frame_interval = int(fps / 3)
-    frames = []
     for i in range(0, total_frames, frame_interval):
         vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
         success, image = vidcap.read()
@@ -175,7 +173,6 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
             pil_image = Image.fromarray(image)
             timestamp = round(i / fps, 2)
             frames.append((pil_image, timestamp))
     vidcap.release()
     return frames
@@ -183,8 +180,7 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
 def process_video(video_path: str) -> list[dict]:
     content = []
     frames = downsample_video(video_path)
-    for frame in frames:
-        pil_image, timestamp = frame
         with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
             pil_image.save(temp_file.name)
             content.append({"type": "text", "text": f"Frame {timestamp}:"})
@@ -194,7 +190,7 @@ def process_video(video_path: str) -> list[dict]:
 ##################################################
-# interleaved <image> 처리
 ##################################################
 def process_interleaved_images(message: dict) -> list[dict]:
     parts = re.split(r"(<image>)", message["text"])
@@ -207,55 +203,56 @@ def process_interleaved_images(message: dict) -> list[dict]:
         elif part.strip():
             content.append({"type": "text", "text": part.strip()})
         else:
-            # 공백이거나 \n 같은 경우
             if isinstance(part, str) and part != "<image>":
                 content.append({"type": "text", "text": part})
     return content
 ##################################################
-# PDF + CSV + TXT + 이미지/비디오
 ##################################################
 def process_new_user_message(message: dict) -> list[dict]:
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}]
-    # 1) 파일 분류
     video_files = [f for f in message["files"] if f.endswith(".mp4")]
     image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
     csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
     txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
     pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
-    # 2) 사용자 원본 text 추가
     content_list = [{"type": "text", "text": message["text"]}]
-    # 3) CSV
     for csv_path in csv_files:
         csv_analysis = analyze_csv_file(csv_path)
         content_list.append({"type": "text", "text": csv_analysis})
-    # 4) TXT
     for txt_path in txt_files:
         txt_analysis = analyze_txt_file(txt_path)
         content_list.append({"type": "text", "text": txt_analysis})
-    # 5) PDF
     for pdf_path in pdf_files:
         pdf_markdown = pdf_to_markdown(pdf_path)
         content_list.append({"type": "text", "text": pdf_markdown})
-    # 6) 비디오 (한 개만 허용)
     if video_files:
         content_list += process_video(video_files[0])
         return content_list
-    # 7) 이미지 처리
     if "<image>" in message["text"]:
         # interleaved
         return process_interleaved_images(message)
     else:
-        # 일반 여러 장
         for img_path in image_files:
             content_list.append({"type": "image", "url": img_path})
@@ -263,45 +260,45 @@ def process_new_user_message(message: dict) -> list[dict]:
 ##################################################
-# history -> LLM 메시지 변환
 ##################################################
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
     current_user_content: list[dict] = []
     for item in history:
         if item["role"] == "assistant":
-            # user_content가 쌓여있다면 user 메시지로 저장
             if current_user_content:
                 messages.append({"role": "user", "content": current_user_content})
                 current_user_content = []
-            # 그 뒤 item은 assistant
             messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
         else:
-            # user
             content = item["content"]
             if isinstance(content, str):
                 current_user_content.append({"type": "text", "text": content})
             else:
-                # 이미지나 기타
                 current_user_content.append({"type": "image", "url": content[0]})
     return messages
 ##################################################
-# 메인 추론 함수
 ##################################################
 @spaces.GPU(duration=120)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
     if not validate_media_constraints(message, history):
         yield ""
         return
     messages = []
     if system_prompt:
         messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
     messages.extend(process_history(history))
     messages.append({"role": "user", "content": process_new_user_message(message)})
     inputs = processor.apply_chat_template(
         messages,
         add_generation_prompt=True,
@@ -325,9 +322,6 @@ def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tok
         yield output
-##################################################
-# 예시들 (기존)
-##################################################
 ##################################################
 # 예시들 (한글화 버전)
 ##################################################
@@ -462,14 +456,18 @@ examples = [
 demo = gr.ChatInterface(
     fn=run,
     type="messages",
     chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
-    # .webp, .png, .jpg, .jpeg, .gif, .mp4, .csv, .txt, .pdf 모두 허용
     textbox=gr.MultimodalTextbox(
         file_types=[
-            ".webp", ".png", ".jpg", ".jpeg", ".gif",
             ".mp4", ".csv", ".txt", ".pdf"
         ],
         file_count="multiple",
@@ -479,15 +477,18 @@ demo = gr.ChatInterface(
     additional_inputs=[
         gr.Textbox(
             label="System Prompt",
-            value=(
-                "You are a deeply thoughtful AI. Consider problems thoroughly and derive "
-                "correct solutions through systematic reasoning. Please answer in korean."
-            )
         ),
-        gr.Slider(label="Max New Tokens", minimum=100, maximum=8000, step=50, value=2000),
     ],
     stop_btn=False,
-    title="Vidraft-Gemma-3-27B",
     examples=examples,
     run_examples_on_click=False,
     cache_examples=False,
@@ -497,3 +498,6 @@ demo = gr.ChatInterface(
 if __name__ == "__main__":
     demo.launch()

 # CSV/TXT 분석
 import pandas as pd
+# PDF 텍스트 추출용
 import PyPDF2
+##################################################
+# 상수 및 모델 로딩
+##################################################
+MAX_CONTENT_CHARS = 8000  # 너무 큰 파일 내용은 이 정도까지만 표시
 model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
 processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
 model = Gemma3ForConditionalGeneration.from_pretrained(
     model_id,
 ##################################################
+# 1) CSV, TXT, PDF 분석 함수
 ##################################################
 def analyze_csv_file(path: str) -> str:
+    """CSV 파일을 읽어 문자열화. 너무 길면 일부만 출력."""
     try:
         df = pd.read_csv(path)
         df_str = df.to_string()
 def analyze_txt_file(path: str) -> str:
+    """TXT 파일 전체를 읽어 문자열 반환. 너무 길면 잘라냄."""
     try:
         with open(path, "r", encoding="utf-8") as f:
             text = f.read()
 def pdf_to_markdown(pdf_path: str) -> str:
+    """PDF -> 텍스트 추출 -> Markdown 형식으로 변환. 너무 길면 자름."""
     text_chunks = []
     try:
         with open(pdf_path, "rb") as f:
 ##################################################
+# 2) 이미지/비디오 개수 제한 검사
 ##################################################
 def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
     image_count = 0
     image_count = 0
     video_count = 0
     for item in history:
+        # assistant 메시지이거나 content가 str이면 제외
         if item["role"] != "user" or isinstance(item["content"], str):
             continue
+        # 이미지/비디오 경로로만 카운트
         if item["content"][0].endswith(".mp4"):
             video_count += 1
         else:
 def validate_media_constraints(message: dict, history: list[dict]) -> bool:
     """
+    - 이미지/비디오만 대상으로 개수·혼합 제한
+    - CSV, PDF, TXT 등은 대상 제외
+    - <image> 태그와 실제 이미지 수가 일치하는지 등
     """
     media_files = []
     for f in message["files"]:
+        # 이미지 확장자 또는 .mp4
         if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
             media_files.append(f)
     image_count = history_image_count + new_image_count
     video_count = history_video_count + new_video_count
+    # 비디오 1개 초과 불가
     if video_count > 1:
         gr.Warning("Only one video is supported.")
         return False
+    # 비디오 + 이미지 혼합 불가
     if video_count == 1:
         if image_count > 0:
             gr.Warning("Mixing images and videos is not allowed.")
         if "<image>" in message["text"]:
             gr.Warning("Using <image> tags with video files is not supported.")
             return False
+    # 이미지 개수 제한
     if video_count == 0 and image_count > MAX_NUM_IMAGES:
         gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         return False
+    # <image> 태그와 실제 이미지 수가 일치?
     if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
         gr.Warning("The number of <image> tags in the text does not match the number of images.")
         return False
 ##################################################
+# 3) 비디오 처리
 ##################################################
 def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
+    """영상에서 일정 간격으로 프레임을 추출, PIL 이미지와 timestamp 반환."""
     vidcap = cv2.VideoCapture(video_path)
     fps = vidcap.get(cv2.CAP_PROP_FPS)
     total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
     frame_interval = int(fps / 3)
+    frames = []
     for i in range(0, total_frames, frame_interval):
         vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
         success, image = vidcap.read()
             pil_image = Image.fromarray(image)
             timestamp = round(i / fps, 2)
             frames.append((pil_image, timestamp))
     vidcap.release()
     return frames
 def process_video(video_path: str) -> list[dict]:
     content = []
     frames = downsample_video(video_path)
+    for pil_image, timestamp in frames:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
             pil_image.save(temp_file.name)
             content.append({"type": "text", "text": f"Frame {timestamp}:"})
 ##################################################
+# 4) interleaved <image> 처리
 ##################################################
 def process_interleaved_images(message: dict) -> list[dict]:
     parts = re.split(r"(<image>)", message["text"])
         elif part.strip():
             content.append({"type": "text", "text": part.strip()})
         else:
+            # 공백만 있는 경우
             if isinstance(part, str) and part != "<image>":
                 content.append({"type": "text", "text": part})
     return content
 ##################################################
+# 5) CSV/PDF/TXT는 텍스트로만, 이미지/비디오는 경로로
 ##################################################
 def process_new_user_message(message: dict) -> list[dict]:
     if not message["files"]:
         return [{"type": "text", "text": message["text"]}]
+    # 확장자별 분류
     video_files = [f for f in message["files"] if f.endswith(".mp4")]
     image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
     csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
     txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
     pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
+    # user 텍스트 먼저 추가
     content_list = [{"type": "text", "text": message["text"]}]
+    # CSV
     for csv_path in csv_files:
         csv_analysis = analyze_csv_file(csv_path)
+        # 분석 내용만 넣음 (파일 경로를 히스토리에 추가하지 않음)
         content_list.append({"type": "text", "text": csv_analysis})
+    # TXT
     for txt_path in txt_files:
         txt_analysis = analyze_txt_file(txt_path)
         content_list.append({"type": "text", "text": txt_analysis})
+    # PDF
     for pdf_path in pdf_files:
         pdf_markdown = pdf_to_markdown(pdf_path)
         content_list.append({"type": "text", "text": pdf_markdown})
+    # 비디오
     if video_files:
         content_list += process_video(video_files[0])
         return content_list
+    # 이미지
     if "<image>" in message["text"]:
         # interleaved
         return process_interleaved_images(message)
     else:
+        # 여러 장 이미지
         for img_path in image_files:
             content_list.append({"type": "image", "url": img_path})
 ##################################################
+# 6) history -> LLM 메시지 변환
 ##################################################
 def process_history(history: list[dict]) -> list[dict]:
     messages = []
     current_user_content: list[dict] = []
     for item in history:
         if item["role"] == "assistant":
             if current_user_content:
                 messages.append({"role": "user", "content": current_user_content})
                 current_user_content = []
             messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
         else:
             content = item["content"]
             if isinstance(content, str):
                 current_user_content.append({"type": "text", "text": content})
             else:
+                # 이미지 or 기타 파일 url
                 current_user_content.append({"type": "image", "url": content[0]})
     return messages
 ##################################################
+# 7) 메인 추론 함수
 ##################################################
 @spaces.GPU(duration=120)
 def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
+    # a) 이미지/비디오 제한 검사
     if not validate_media_constraints(message, history):
         yield ""
         return
+    # b) 대화 기록 + 이번 메시지
     messages = []
     if system_prompt:
         messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
     messages.extend(process_history(history))
     messages.append({"role": "user", "content": process_new_user_message(message)})
+    # c) 모델 추론
     inputs = processor.apply_chat_template(
         messages,
         add_generation_prompt=True,
         yield output
 ##################################################
 # 예시들 (한글화 버전)
 ##################################################
+##################################################
+# 9) Gradio ChatInterface
+##################################################
 demo = gr.ChatInterface(
     fn=run,
     type="messages",
     chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
+    # 이미지/동영상 + CSV/TXT/PDF 허용 (이미지: webp 포함)
     textbox=gr.MultimodalTextbox(
         file_types=[
+            ".png", ".jpg", ".jpeg", ".gif", ".webp",
             ".mp4", ".csv", ".txt", ".pdf"
         ],
         file_count="multiple",
     additional_inputs=[
         gr.Textbox(
             label="System Prompt",
+            value="You are a deeply thoughtful AI. Consider problems thoroughly and derive correct solutions through systematic reasoning. Please answer in korean."
+        ),
+        gr.Slider(
+            label="Max New Tokens",
+            minimum=100,
+            maximum=8000,
+            step=50,
+            value=2000
         ),
     ],
     stop_btn=False,
+    title="Gemma 3 27B IT",
     examples=examples,
     run_examples_on_click=False,
     cache_examples=False,
 if __name__ == "__main__":
     demo.launch()