Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Commit 
							
							ยท
						
						77f7fca
	
1
								Parent(s):
							
							c80880c
								
Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -16,13 +16,15 @@ from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIter | |
| 16 |  | 
| 17 | 
             
            # CSV/TXT ๋ถ์
         | 
| 18 | 
             
            import pandas as pd
         | 
| 19 | 
            -
             | 
| 20 | 
            -
            # PDF ํ
์คํธ ์ถ์ถ
         | 
| 21 | 
             
            import PyPDF2
         | 
| 22 |  | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
|  | |
|  | |
| 25 | 
             
            model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
         | 
|  | |
| 26 | 
             
            processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
         | 
| 27 | 
             
            model = Gemma3ForConditionalGeneration.from_pretrained(
         | 
| 28 | 
             
                model_id,
         | 
| @@ -35,12 +37,10 @@ MAX_NUM_IMAGES = int(os.getenv("MAX_NUM_IMAGES", "5")) | |
| 35 |  | 
| 36 |  | 
| 37 | 
             
            ##################################################
         | 
| 38 | 
            -
            # CSV, TXT, PDF ๋ถ์ ํจ์
         | 
| 39 | 
             
            ##################################################
         | 
| 40 | 
             
            def analyze_csv_file(path: str) -> str:
         | 
| 41 | 
            -
                """
         | 
| 42 | 
            -
                CSV ํ์ผ์ ์ ์ฒด ๋ฌธ์์ด๋ก ๋ณํ. ๋๋ฌด ๊ธธ ๊ฒฝ์ฐ ์ผ๋ถ๋ง ํ์.
         | 
| 43 | 
            -
                """
         | 
| 44 | 
             
                try:
         | 
| 45 | 
             
                    df = pd.read_csv(path)
         | 
| 46 | 
             
                    df_str = df.to_string()
         | 
| @@ -52,9 +52,7 @@ def analyze_csv_file(path: str) -> str: | |
| 52 |  | 
| 53 |  | 
| 54 | 
             
            def analyze_txt_file(path: str) -> str:
         | 
| 55 | 
            -
                """
         | 
| 56 | 
            -
                TXT ํ์ผ ์ ๋ฌธ ์ฝ๊ธฐ. ๋๋ฌด ๊ธธ๋ฉด ์ผ๋ถ๋ง ํ์.
         | 
| 57 | 
            -
                """
         | 
| 58 | 
             
                try:
         | 
| 59 | 
             
                    with open(path, "r", encoding="utf-8") as f:
         | 
| 60 | 
             
                        text = f.read()
         | 
| @@ -66,9 +64,7 @@ def analyze_txt_file(path: str) -> str: | |
| 66 |  | 
| 67 |  | 
| 68 | 
             
            def pdf_to_markdown(pdf_path: str) -> str:
         | 
| 69 | 
            -
                """
         | 
| 70 | 
            -
                PDF โ Markdown. ํ์ด์ง๋ณ๋ก ๊ฐ๋จํ ํ
์คํธ ์ถ์ถ.
         | 
| 71 | 
            -
                """
         | 
| 72 | 
             
                text_chunks = []
         | 
| 73 | 
             
                try:
         | 
| 74 | 
             
                    with open(pdf_path, "rb") as f:
         | 
| @@ -89,7 +85,7 @@ def pdf_to_markdown(pdf_path: str) -> str: | |
| 89 |  | 
| 90 |  | 
| 91 | 
             
            ##################################################
         | 
| 92 | 
            -
            # ์ด๋ฏธ์ง/๋น๋์ค  | 
| 93 | 
             
            ##################################################
         | 
| 94 | 
             
            def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
         | 
| 95 | 
             
                image_count = 0
         | 
| @@ -106,8 +102,10 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]: | |
| 106 | 
             
                image_count = 0
         | 
| 107 | 
             
                video_count = 0
         | 
| 108 | 
             
                for item in history:
         | 
|  | |
| 109 | 
             
                    if item["role"] != "user" or isinstance(item["content"], str):
         | 
| 110 | 
             
                        continue
         | 
|  | |
| 111 | 
             
                    if item["content"][0].endswith(".mp4"):
         | 
| 112 | 
             
                        video_count += 1
         | 
| 113 | 
             
                    else:
         | 
| @@ -117,17 +115,13 @@ def count_files_in_history(history: list[dict]) -> tuple[int, int]: | |
| 117 |  | 
| 118 | 
             
            def validate_media_constraints(message: dict, history: list[dict]) -> bool:
         | 
| 119 | 
             
                """
         | 
| 120 | 
            -
                -  | 
| 121 | 
            -
                -  | 
| 122 | 
            -
                - ์ด๋ฏธ์ง  | 
| 123 | 
            -
                - <image> ํ๊ทธ๊ฐ ์์ผ๋ฉด ํ๊ทธ ์์ ์ค์  ์ด๋ฏธ์ง ์ ์ผ์น
         | 
| 124 | 
            -
                - CSV, TXT, PDF ๋ฑ์ ์ฌ๊ธฐ์ ์ ํํ์ง ์์
         | 
| 125 | 
             
                """
         | 
| 126 | 
             
                media_files = []
         | 
| 127 | 
             
                for f in message["files"]:
         | 
| 128 | 
            -
                    #  | 
| 129 | 
            -
                    # ๋น๋์ค: mp4
         | 
| 130 | 
            -
                    # cf) PDF, CSV, TXT ๋ฑ์ ์ ์ธ
         | 
| 131 | 
             
                    if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
         | 
| 132 | 
             
                        media_files.append(f)
         | 
| 133 |  | 
| @@ -136,9 +130,11 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool: | |
| 136 | 
             
                image_count = history_image_count + new_image_count
         | 
| 137 | 
             
                video_count = history_video_count + new_video_count
         | 
| 138 |  | 
|  | |
| 139 | 
             
                if video_count > 1:
         | 
| 140 | 
             
                    gr.Warning("Only one video is supported.")
         | 
| 141 | 
             
                    return False
         | 
|  | |
| 142 | 
             
                if video_count == 1:
         | 
| 143 | 
             
                    if image_count > 0:
         | 
| 144 | 
             
                        gr.Warning("Mixing images and videos is not allowed.")
         | 
| @@ -146,9 +142,11 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool: | |
| 146 | 
             
                    if "<image>" in message["text"]:
         | 
| 147 | 
             
                        gr.Warning("Using <image> tags with video files is not supported.")
         | 
| 148 | 
             
                        return False
         | 
|  | |
| 149 | 
             
                if video_count == 0 and image_count > MAX_NUM_IMAGES:
         | 
| 150 | 
             
                    gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         | 
| 151 | 
             
                    return False
         | 
|  | |
| 152 | 
             
                if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
         | 
| 153 | 
             
                    gr.Warning("The number of <image> tags in the text does not match the number of images.")
         | 
| 154 | 
             
                    return False
         | 
| @@ -157,16 +155,16 @@ def validate_media_constraints(message: dict, history: list[dict]) -> bool: | |
| 157 |  | 
| 158 |  | 
| 159 | 
             
            ##################################################
         | 
| 160 | 
            -
            # ๋น๋์ค ์ฒ๋ฆฌ
         | 
| 161 | 
             
            ##################################################
         | 
| 162 | 
             
            def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
         | 
|  | |
| 163 | 
             
                vidcap = cv2.VideoCapture(video_path)
         | 
| 164 | 
             
                fps = vidcap.get(cv2.CAP_PROP_FPS)
         | 
| 165 | 
             
                total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
         | 
| 166 | 
            -
             | 
| 167 | 
             
                frame_interval = int(fps / 3)
         | 
| 168 | 
            -
                frames = []
         | 
| 169 |  | 
|  | |
| 170 | 
             
                for i in range(0, total_frames, frame_interval):
         | 
| 171 | 
             
                    vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
         | 
| 172 | 
             
                    success, image = vidcap.read()
         | 
| @@ -175,7 +173,6 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]: | |
| 175 | 
             
                        pil_image = Image.fromarray(image)
         | 
| 176 | 
             
                        timestamp = round(i / fps, 2)
         | 
| 177 | 
             
                        frames.append((pil_image, timestamp))
         | 
| 178 | 
            -
             | 
| 179 | 
             
                vidcap.release()
         | 
| 180 | 
             
                return frames
         | 
| 181 |  | 
| @@ -183,8 +180,7 @@ def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]: | |
| 183 | 
             
            def process_video(video_path: str) -> list[dict]:
         | 
| 184 | 
             
                content = []
         | 
| 185 | 
             
                frames = downsample_video(video_path)
         | 
| 186 | 
            -
                for  | 
| 187 | 
            -
                    pil_image, timestamp = frame
         | 
| 188 | 
             
                    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
         | 
| 189 | 
             
                        pil_image.save(temp_file.name)
         | 
| 190 | 
             
                        content.append({"type": "text", "text": f"Frame {timestamp}:"})
         | 
| @@ -194,7 +190,7 @@ def process_video(video_path: str) -> list[dict]: | |
| 194 |  | 
| 195 |  | 
| 196 | 
             
            ##################################################
         | 
| 197 | 
            -
            # interleaved <image> ์ฒ๋ฆฌ
         | 
| 198 | 
             
            ##################################################
         | 
| 199 | 
             
            def process_interleaved_images(message: dict) -> list[dict]:
         | 
| 200 | 
             
                parts = re.split(r"(<image>)", message["text"])
         | 
| @@ -207,55 +203,56 @@ def process_interleaved_images(message: dict) -> list[dict]: | |
| 207 | 
             
                    elif part.strip():
         | 
| 208 | 
             
                        content.append({"type": "text", "text": part.strip()})
         | 
| 209 | 
             
                    else:
         | 
| 210 | 
            -
                        #  | 
| 211 | 
             
                        if isinstance(part, str) and part != "<image>":
         | 
| 212 | 
             
                            content.append({"type": "text", "text": part})
         | 
| 213 | 
             
                return content
         | 
| 214 |  | 
| 215 |  | 
| 216 | 
             
            ##################################################
         | 
| 217 | 
            -
            #  | 
| 218 | 
             
            ##################################################
         | 
| 219 | 
             
            def process_new_user_message(message: dict) -> list[dict]:
         | 
| 220 | 
             
                if not message["files"]:
         | 
| 221 | 
             
                    return [{"type": "text", "text": message["text"]}]
         | 
| 222 |  | 
| 223 | 
            -
                #  | 
| 224 | 
             
                video_files = [f for f in message["files"] if f.endswith(".mp4")]
         | 
| 225 | 
             
                image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
         | 
| 226 | 
             
                csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
         | 
| 227 | 
             
                txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
         | 
| 228 | 
             
                pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
         | 
| 229 |  | 
| 230 | 
            -
                #  | 
| 231 | 
             
                content_list = [{"type": "text", "text": message["text"]}]
         | 
| 232 |  | 
| 233 | 
            -
                #  | 
| 234 | 
             
                for csv_path in csv_files:
         | 
| 235 | 
             
                    csv_analysis = analyze_csv_file(csv_path)
         | 
|  | |
| 236 | 
             
                    content_list.append({"type": "text", "text": csv_analysis})
         | 
| 237 |  | 
| 238 | 
            -
                #  | 
| 239 | 
             
                for txt_path in txt_files:
         | 
| 240 | 
             
                    txt_analysis = analyze_txt_file(txt_path)
         | 
| 241 | 
             
                    content_list.append({"type": "text", "text": txt_analysis})
         | 
| 242 |  | 
| 243 | 
            -
                #  | 
| 244 | 
             
                for pdf_path in pdf_files:
         | 
| 245 | 
             
                    pdf_markdown = pdf_to_markdown(pdf_path)
         | 
| 246 | 
             
                    content_list.append({"type": "text", "text": pdf_markdown})
         | 
| 247 |  | 
| 248 | 
            -
                #  | 
| 249 | 
             
                if video_files:
         | 
| 250 | 
             
                    content_list += process_video(video_files[0])
         | 
| 251 | 
             
                    return content_list
         | 
| 252 |  | 
| 253 | 
            -
                #  | 
| 254 | 
             
                if "<image>" in message["text"]:
         | 
| 255 | 
             
                    # interleaved
         | 
| 256 | 
             
                    return process_interleaved_images(message)
         | 
| 257 | 
             
                else:
         | 
| 258 | 
            -
                    #  | 
| 259 | 
             
                    for img_path in image_files:
         | 
| 260 | 
             
                        content_list.append({"type": "image", "url": img_path})
         | 
| 261 |  | 
| @@ -263,45 +260,45 @@ def process_new_user_message(message: dict) -> list[dict]: | |
| 263 |  | 
| 264 |  | 
| 265 | 
             
            ##################################################
         | 
| 266 | 
            -
            # history -> LLM ๋ฉ์์ง ๋ณํ
         | 
| 267 | 
             
            ##################################################
         | 
| 268 | 
             
            def process_history(history: list[dict]) -> list[dict]:
         | 
| 269 | 
             
                messages = []
         | 
| 270 | 
             
                current_user_content: list[dict] = []
         | 
| 271 | 
             
                for item in history:
         | 
| 272 | 
             
                    if item["role"] == "assistant":
         | 
| 273 | 
            -
                        # user_content๊ฐ ์์ฌ์๋ค๋ฉด user ๋ฉ์์ง๋ก ์ ์ฅ
         | 
| 274 | 
             
                        if current_user_content:
         | 
| 275 | 
             
                            messages.append({"role": "user", "content": current_user_content})
         | 
| 276 | 
             
                            current_user_content = []
         | 
| 277 | 
            -
                        # ๊ทธ ๋ค item์ assistant
         | 
| 278 | 
             
                        messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
         | 
| 279 | 
             
                    else:
         | 
| 280 | 
            -
                        # user
         | 
| 281 | 
             
                        content = item["content"]
         | 
| 282 | 
             
                        if isinstance(content, str):
         | 
| 283 | 
             
                            current_user_content.append({"type": "text", "text": content})
         | 
| 284 | 
             
                        else:
         | 
| 285 | 
            -
                            #  | 
| 286 | 
             
                            current_user_content.append({"type": "image", "url": content[0]})
         | 
| 287 | 
             
                return messages
         | 
| 288 |  | 
| 289 |  | 
| 290 | 
             
            ##################################################
         | 
| 291 | 
            -
            # ๋ฉ์ธ ์ถ๋ก  ํจ์
         | 
| 292 | 
             
            ##################################################
         | 
| 293 | 
             
            @spaces.GPU(duration=120)
         | 
| 294 | 
             
            def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
         | 
|  | |
| 295 | 
             
                if not validate_media_constraints(message, history):
         | 
| 296 | 
             
                    yield ""
         | 
| 297 | 
             
                    return
         | 
| 298 |  | 
|  | |
| 299 | 
             
                messages = []
         | 
| 300 | 
             
                if system_prompt:
         | 
| 301 | 
             
                    messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
         | 
| 302 | 
             
                messages.extend(process_history(history))
         | 
| 303 | 
             
                messages.append({"role": "user", "content": process_new_user_message(message)})
         | 
| 304 |  | 
|  | |
| 305 | 
             
                inputs = processor.apply_chat_template(
         | 
| 306 | 
             
                    messages,
         | 
| 307 | 
             
                    add_generation_prompt=True,
         | 
| @@ -325,9 +322,6 @@ def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tok | |
| 325 | 
             
                    yield output
         | 
| 326 |  | 
| 327 |  | 
| 328 | 
            -
            ##################################################
         | 
| 329 | 
            -
            # ์์๋ค (๊ธฐ์กด)
         | 
| 330 | 
            -
            ##################################################
         | 
| 331 | 
             
            ##################################################
         | 
| 332 | 
             
            # ์์๋ค (ํ๊ธํ ๋ฒ์ )
         | 
| 333 | 
             
            ##################################################
         | 
| @@ -462,14 +456,18 @@ examples = [ | |
| 462 |  | 
| 463 |  | 
| 464 |  | 
|  | |
|  | |
|  | |
|  | |
| 465 | 
             
            demo = gr.ChatInterface(
         | 
| 466 | 
             
                fn=run,
         | 
| 467 | 
             
                type="messages",
         | 
| 468 | 
             
                chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
         | 
| 469 | 
            -
                #  | 
| 470 | 
             
                textbox=gr.MultimodalTextbox(
         | 
| 471 | 
             
                    file_types=[
         | 
| 472 | 
            -
                        ". | 
| 473 | 
             
                        ".mp4", ".csv", ".txt", ".pdf"
         | 
| 474 | 
             
                    ],
         | 
| 475 | 
             
                    file_count="multiple",
         | 
| @@ -479,15 +477,18 @@ demo = gr.ChatInterface( | |
| 479 | 
             
                additional_inputs=[
         | 
| 480 | 
             
                    gr.Textbox(
         | 
| 481 | 
             
                        label="System Prompt",
         | 
| 482 | 
            -
                        value= | 
| 483 | 
            -
             | 
| 484 | 
            -
             | 
| 485 | 
            -
                         | 
|  | |
|  | |
|  | |
|  | |
| 486 | 
             
                    ),
         | 
| 487 | 
            -
                    gr.Slider(label="Max New Tokens", minimum=100, maximum=8000, step=50, value=2000),
         | 
| 488 | 
             
                ],
         | 
| 489 | 
             
                stop_btn=False,
         | 
| 490 | 
            -
                title=" | 
| 491 | 
             
                examples=examples,
         | 
| 492 | 
             
                run_examples_on_click=False,
         | 
| 493 | 
             
                cache_examples=False,
         | 
| @@ -497,3 +498,6 @@ demo = gr.ChatInterface( | |
| 497 |  | 
| 498 | 
             
            if __name__ == "__main__":
         | 
| 499 | 
             
                demo.launch()
         | 
|  | |
|  | |
|  | 
|  | |
| 16 |  | 
| 17 | 
             
            # CSV/TXT ๋ถ์
         | 
| 18 | 
             
            import pandas as pd
         | 
| 19 | 
            +
            # PDF ํ
์คํธ ์ถ์ถ์ฉ
         | 
|  | |
| 20 | 
             
            import PyPDF2
         | 
| 21 |  | 
| 22 | 
            +
            ##################################################
         | 
| 23 | 
            +
            # ์์ ๋ฐ ๋ชจ๋ธ ๋ก๋ฉ
         | 
| 24 | 
            +
            ##################################################
         | 
| 25 | 
            +
            MAX_CONTENT_CHARS = 8000  # ๋๋ฌด ํฐ ํ์ผ ๋ด์ฉ์ ์ด ์ ๋๊น์ง๋ง ํ์
         | 
| 26 | 
             
            model_id = os.getenv("MODEL_ID", "google/gemma-3-27b-it")
         | 
| 27 | 
            +
             | 
| 28 | 
             
            processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
         | 
| 29 | 
             
            model = Gemma3ForConditionalGeneration.from_pretrained(
         | 
| 30 | 
             
                model_id,
         | 
|  | |
| 37 |  | 
| 38 |  | 
| 39 | 
             
            ##################################################
         | 
| 40 | 
            +
            # 1) CSV, TXT, PDF ๋ถ์ ํจ์
         | 
| 41 | 
             
            ##################################################
         | 
| 42 | 
             
            def analyze_csv_file(path: str) -> str:
         | 
| 43 | 
            +
                """CSV ํ์ผ์ ์ฝ์ด ๋ฌธ์์ดํ. ๋๋ฌด ๊ธธ๋ฉด ์ผ๋ถ๋ง ์ถ๋ ฅ."""
         | 
|  | |
|  | |
| 44 | 
             
                try:
         | 
| 45 | 
             
                    df = pd.read_csv(path)
         | 
| 46 | 
             
                    df_str = df.to_string()
         | 
|  | |
| 52 |  | 
| 53 |  | 
| 54 | 
             
            def analyze_txt_file(path: str) -> str:
         | 
| 55 | 
            +
                """TXT ํ์ผ ์ ์ฒด๋ฅผ ์ฝ์ด ๋ฌธ์์ด ๋ฐํ. ๋๋ฌด ๊ธธ๋ฉด ์๋ผ๋."""
         | 
|  | |
|  | |
| 56 | 
             
                try:
         | 
| 57 | 
             
                    with open(path, "r", encoding="utf-8") as f:
         | 
| 58 | 
             
                        text = f.read()
         | 
|  | |
| 64 |  | 
| 65 |  | 
| 66 | 
             
            def pdf_to_markdown(pdf_path: str) -> str:
         | 
| 67 | 
            +
                """PDF -> ํ
์คํธ ์ถ์ถ -> Markdown ํ์์ผ๋ก ๋ณํ. ๋๋ฌด ๊ธธ๋ฉด ์๋ฆ."""
         | 
|  | |
|  | |
| 68 | 
             
                text_chunks = []
         | 
| 69 | 
             
                try:
         | 
| 70 | 
             
                    with open(pdf_path, "rb") as f:
         | 
|  | |
| 85 |  | 
| 86 |  | 
| 87 | 
             
            ##################################################
         | 
| 88 | 
            +
            # 2) ์ด๋ฏธ์ง/๋น๋์ค ๊ฐ์ ์ ํ ๊ฒ์ฌ
         | 
| 89 | 
             
            ##################################################
         | 
| 90 | 
             
            def count_files_in_new_message(paths: list[str]) -> tuple[int, int]:
         | 
| 91 | 
             
                image_count = 0
         | 
|  | |
| 102 | 
             
                image_count = 0
         | 
| 103 | 
             
                video_count = 0
         | 
| 104 | 
             
                for item in history:
         | 
| 105 | 
            +
                    # assistant ๋ฉ์์ง์ด๊ฑฐ๋ content๊ฐ str์ด๋ฉด ์ ์ธ
         | 
| 106 | 
             
                    if item["role"] != "user" or isinstance(item["content"], str):
         | 
| 107 | 
             
                        continue
         | 
| 108 | 
            +
                    # ์ด๋ฏธ์ง/๋น๋์ค ๊ฒฝ๋ก๋ก๋ง ์นด์ดํธ
         | 
| 109 | 
             
                    if item["content"][0].endswith(".mp4"):
         | 
| 110 | 
             
                        video_count += 1
         | 
| 111 | 
             
                    else:
         | 
|  | |
| 115 |  | 
| 116 | 
             
            def validate_media_constraints(message: dict, history: list[dict]) -> bool:
         | 
| 117 | 
             
                """
         | 
| 118 | 
            +
                - ์ด๋ฏธ์ง/๋น๋์ค๋ง ๋์์ผ๋ก ๊ฐ์ยทํผํฉ ์ ํ
         | 
| 119 | 
            +
                - CSV, PDF, TXT ๋ฑ์ ๋์ ์ ์ธ
         | 
| 120 | 
            +
                - <image> ํ๊ทธ์ ์ค์  ์ด๋ฏธ์ง ์๊ฐ ์ผ์นํ๋์ง ๋ฑ
         | 
|  | |
|  | |
| 121 | 
             
                """
         | 
| 122 | 
             
                media_files = []
         | 
| 123 | 
             
                for f in message["files"]:
         | 
| 124 | 
            +
                    # ์ด๋ฏธ์ง ํ์ฅ์ ๋๋ .mp4
         | 
|  | |
|  | |
| 125 | 
             
                    if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE) or f.endswith(".mp4"):
         | 
| 126 | 
             
                        media_files.append(f)
         | 
| 127 |  | 
|  | |
| 130 | 
             
                image_count = history_image_count + new_image_count
         | 
| 131 | 
             
                video_count = history_video_count + new_video_count
         | 
| 132 |  | 
| 133 | 
            +
                # ๋น๋์ค 1๊ฐ ์ด๊ณผ ๋ถ๊ฐ
         | 
| 134 | 
             
                if video_count > 1:
         | 
| 135 | 
             
                    gr.Warning("Only one video is supported.")
         | 
| 136 | 
             
                    return False
         | 
| 137 | 
            +
                # ๋น๋์ค + ์ด๋ฏธ์ง ํผํฉ ๋ถ๊ฐ
         | 
| 138 | 
             
                if video_count == 1:
         | 
| 139 | 
             
                    if image_count > 0:
         | 
| 140 | 
             
                        gr.Warning("Mixing images and videos is not allowed.")
         | 
|  | |
| 142 | 
             
                    if "<image>" in message["text"]:
         | 
| 143 | 
             
                        gr.Warning("Using <image> tags with video files is not supported.")
         | 
| 144 | 
             
                        return False
         | 
| 145 | 
            +
                # ์ด๋ฏธ์ง ๊ฐ์ ์ ํ
         | 
| 146 | 
             
                if video_count == 0 and image_count > MAX_NUM_IMAGES:
         | 
| 147 | 
             
                    gr.Warning(f"You can upload up to {MAX_NUM_IMAGES} images.")
         | 
| 148 | 
             
                    return False
         | 
| 149 | 
            +
                # <image> ํ๊ทธ์ ์ค์  ์ด๋ฏธ์ง ์๊ฐ ์ผ์น?
         | 
| 150 | 
             
                if "<image>" in message["text"] and message["text"].count("<image>") != new_image_count:
         | 
| 151 | 
             
                    gr.Warning("The number of <image> tags in the text does not match the number of images.")
         | 
| 152 | 
             
                    return False
         | 
|  | |
| 155 |  | 
| 156 |  | 
| 157 | 
             
            ##################################################
         | 
| 158 | 
            +
            # 3) ๋น๋์ค ์ฒ๋ฆฌ
         | 
| 159 | 
             
            ##################################################
         | 
| 160 | 
             
            def downsample_video(video_path: str) -> list[tuple[Image.Image, float]]:
         | 
| 161 | 
            +
                """์์์์ ์ผ์  ๊ฐ๊ฒฉ์ผ๋ก ํ๋ ์์ ์ถ์ถ, PIL ์ด๋ฏธ์ง์ timestamp ๋ฐํ."""
         | 
| 162 | 
             
                vidcap = cv2.VideoCapture(video_path)
         | 
| 163 | 
             
                fps = vidcap.get(cv2.CAP_PROP_FPS)
         | 
| 164 | 
             
                total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
         | 
|  | |
| 165 | 
             
                frame_interval = int(fps / 3)
         | 
|  | |
| 166 |  | 
| 167 | 
            +
                frames = []
         | 
| 168 | 
             
                for i in range(0, total_frames, frame_interval):
         | 
| 169 | 
             
                    vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
         | 
| 170 | 
             
                    success, image = vidcap.read()
         | 
|  | |
| 173 | 
             
                        pil_image = Image.fromarray(image)
         | 
| 174 | 
             
                        timestamp = round(i / fps, 2)
         | 
| 175 | 
             
                        frames.append((pil_image, timestamp))
         | 
|  | |
| 176 | 
             
                vidcap.release()
         | 
| 177 | 
             
                return frames
         | 
| 178 |  | 
|  | |
| 180 | 
             
            def process_video(video_path: str) -> list[dict]:
         | 
| 181 | 
             
                content = []
         | 
| 182 | 
             
                frames = downsample_video(video_path)
         | 
| 183 | 
            +
                for pil_image, timestamp in frames:
         | 
|  | |
| 184 | 
             
                    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
         | 
| 185 | 
             
                        pil_image.save(temp_file.name)
         | 
| 186 | 
             
                        content.append({"type": "text", "text": f"Frame {timestamp}:"})
         | 
|  | |
| 190 |  | 
| 191 |  | 
| 192 | 
             
            ##################################################
         | 
| 193 | 
            +
            # 4) interleaved <image> ์ฒ๋ฆฌ
         | 
| 194 | 
             
            ##################################################
         | 
| 195 | 
             
            def process_interleaved_images(message: dict) -> list[dict]:
         | 
| 196 | 
             
                parts = re.split(r"(<image>)", message["text"])
         | 
|  | |
| 203 | 
             
                    elif part.strip():
         | 
| 204 | 
             
                        content.append({"type": "text", "text": part.strip()})
         | 
| 205 | 
             
                    else:
         | 
| 206 | 
            +
                        # ๊ณต๋ฐฑ๋ง ์๋ ๊ฒฝ์ฐ
         | 
| 207 | 
             
                        if isinstance(part, str) and part != "<image>":
         | 
| 208 | 
             
                            content.append({"type": "text", "text": part})
         | 
| 209 | 
             
                return content
         | 
| 210 |  | 
| 211 |  | 
| 212 | 
             
            ##################################################
         | 
| 213 | 
            +
            # 5) CSV/PDF/TXT๋ ํ
์คํธ๋ก๋ง, ์ด๋ฏธ์ง/๋น๋์ค๋ ๊ฒฝ๋ก๋ก
         | 
| 214 | 
             
            ##################################################
         | 
| 215 | 
             
            def process_new_user_message(message: dict) -> list[dict]:
         | 
| 216 | 
             
                if not message["files"]:
         | 
| 217 | 
             
                    return [{"type": "text", "text": message["text"]}]
         | 
| 218 |  | 
| 219 | 
            +
                # ํ์ฅ์๋ณ ๋ถ๋ฅ
         | 
| 220 | 
             
                video_files = [f for f in message["files"] if f.endswith(".mp4")]
         | 
| 221 | 
             
                image_files = [f for f in message["files"] if re.search(r"\.(png|jpg|jpeg|gif|webp)$", f, re.IGNORECASE)]
         | 
| 222 | 
             
                csv_files = [f for f in message["files"] if f.lower().endswith(".csv")]
         | 
| 223 | 
             
                txt_files = [f for f in message["files"] if f.lower().endswith(".txt")]
         | 
| 224 | 
             
                pdf_files = [f for f in message["files"] if f.lower().endswith(".pdf")]
         | 
| 225 |  | 
| 226 | 
            +
                # user ํ
์คํธ ๋จผ์  ์ถ๊ฐ
         | 
| 227 | 
             
                content_list = [{"type": "text", "text": message["text"]}]
         | 
| 228 |  | 
| 229 | 
            +
                # CSV
         | 
| 230 | 
             
                for csv_path in csv_files:
         | 
| 231 | 
             
                    csv_analysis = analyze_csv_file(csv_path)
         | 
| 232 | 
            +
                    # ๋ถ์ ๋ด์ฉ๋ง ๋ฃ์ (ํ์ผ ๊ฒฝ๋ก๋ฅผ ํ์คํ ๋ฆฌ์ ์ถ๊ฐํ์ง ์์)
         | 
| 233 | 
             
                    content_list.append({"type": "text", "text": csv_analysis})
         | 
| 234 |  | 
| 235 | 
            +
                # TXT
         | 
| 236 | 
             
                for txt_path in txt_files:
         | 
| 237 | 
             
                    txt_analysis = analyze_txt_file(txt_path)
         | 
| 238 | 
             
                    content_list.append({"type": "text", "text": txt_analysis})
         | 
| 239 |  | 
| 240 | 
            +
                # PDF
         | 
| 241 | 
             
                for pdf_path in pdf_files:
         | 
| 242 | 
             
                    pdf_markdown = pdf_to_markdown(pdf_path)
         | 
| 243 | 
             
                    content_list.append({"type": "text", "text": pdf_markdown})
         | 
| 244 |  | 
| 245 | 
            +
                # ๋น๋์ค
         | 
| 246 | 
             
                if video_files:
         | 
| 247 | 
             
                    content_list += process_video(video_files[0])
         | 
| 248 | 
             
                    return content_list
         | 
| 249 |  | 
| 250 | 
            +
                # ์ด๋ฏธ์ง
         | 
| 251 | 
             
                if "<image>" in message["text"]:
         | 
| 252 | 
             
                    # interleaved
         | 
| 253 | 
             
                    return process_interleaved_images(message)
         | 
| 254 | 
             
                else:
         | 
| 255 | 
            +
                    # ์ฌ๋ฌ ์ฅ ์ด๋ฏธ์ง
         | 
| 256 | 
             
                    for img_path in image_files:
         | 
| 257 | 
             
                        content_list.append({"type": "image", "url": img_path})
         | 
| 258 |  | 
|  | |
| 260 |  | 
| 261 |  | 
| 262 | 
             
            ##################################################
         | 
| 263 | 
            +
            # 6) history -> LLM ๋ฉ์์ง ๋ณํ
         | 
| 264 | 
             
            ##################################################
         | 
| 265 | 
             
            def process_history(history: list[dict]) -> list[dict]:
         | 
| 266 | 
             
                messages = []
         | 
| 267 | 
             
                current_user_content: list[dict] = []
         | 
| 268 | 
             
                for item in history:
         | 
| 269 | 
             
                    if item["role"] == "assistant":
         | 
|  | |
| 270 | 
             
                        if current_user_content:
         | 
| 271 | 
             
                            messages.append({"role": "user", "content": current_user_content})
         | 
| 272 | 
             
                            current_user_content = []
         | 
|  | |
| 273 | 
             
                        messages.append({"role": "assistant", "content": [{"type": "text", "text": item["content"]}]})
         | 
| 274 | 
             
                    else:
         | 
|  | |
| 275 | 
             
                        content = item["content"]
         | 
| 276 | 
             
                        if isinstance(content, str):
         | 
| 277 | 
             
                            current_user_content.append({"type": "text", "text": content})
         | 
| 278 | 
             
                        else:
         | 
| 279 | 
            +
                            # ์ด๋ฏธ์ง or ๊ธฐํ ํ์ผ url
         | 
| 280 | 
             
                            current_user_content.append({"type": "image", "url": content[0]})
         | 
| 281 | 
             
                return messages
         | 
| 282 |  | 
| 283 |  | 
| 284 | 
             
            ##################################################
         | 
| 285 | 
            +
            # 7) ๋ฉ์ธ ์ถ๋ก  ํจ์
         | 
| 286 | 
             
            ##################################################
         | 
| 287 | 
             
            @spaces.GPU(duration=120)
         | 
| 288 | 
             
            def run(message: dict, history: list[dict], system_prompt: str = "", max_new_tokens: int = 512) -> Iterator[str]:
         | 
| 289 | 
            +
                # a) ์ด๋ฏธ์ง/๋น๋์ค ์ ํ ๊ฒ์ฌ
         | 
| 290 | 
             
                if not validate_media_constraints(message, history):
         | 
| 291 | 
             
                    yield ""
         | 
| 292 | 
             
                    return
         | 
| 293 |  | 
| 294 | 
            +
                # b) ๋ํ ๊ธฐ๋ก + ์ด๋ฒ ๋ฉ์์ง
         | 
| 295 | 
             
                messages = []
         | 
| 296 | 
             
                if system_prompt:
         | 
| 297 | 
             
                    messages.append({"role": "system", "content": [{"type": "text", "text": system_prompt}]})
         | 
| 298 | 
             
                messages.extend(process_history(history))
         | 
| 299 | 
             
                messages.append({"role": "user", "content": process_new_user_message(message)})
         | 
| 300 |  | 
| 301 | 
            +
                # c) ๋ชจ๋ธ ์ถ๋ก 
         | 
| 302 | 
             
                inputs = processor.apply_chat_template(
         | 
| 303 | 
             
                    messages,
         | 
| 304 | 
             
                    add_generation_prompt=True,
         | 
|  | |
| 322 | 
             
                    yield output
         | 
| 323 |  | 
| 324 |  | 
|  | |
|  | |
|  | |
| 325 | 
             
            ##################################################
         | 
| 326 | 
             
            # ์์๋ค (ํ๊ธํ ๋ฒ์ )
         | 
| 327 | 
             
            ##################################################
         | 
|  | |
| 456 |  | 
| 457 |  | 
| 458 |  | 
| 459 | 
            +
             | 
| 460 | 
            +
            ##################################################
         | 
| 461 | 
            +
            # 9) Gradio ChatInterface
         | 
| 462 | 
            +
            ##################################################
         | 
| 463 | 
             
            demo = gr.ChatInterface(
         | 
| 464 | 
             
                fn=run,
         | 
| 465 | 
             
                type="messages",
         | 
| 466 | 
             
                chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
         | 
| 467 | 
            +
                # ์ด๋ฏธ์ง/๋์์ + CSV/TXT/PDF ํ์ฉ (์ด๋ฏธ์ง: webp ํฌํจ)
         | 
| 468 | 
             
                textbox=gr.MultimodalTextbox(
         | 
| 469 | 
             
                    file_types=[
         | 
| 470 | 
            +
                        ".png", ".jpg", ".jpeg", ".gif", ".webp",
         | 
| 471 | 
             
                        ".mp4", ".csv", ".txt", ".pdf"
         | 
| 472 | 
             
                    ],
         | 
| 473 | 
             
                    file_count="multiple",
         | 
|  | |
| 477 | 
             
                additional_inputs=[
         | 
| 478 | 
             
                    gr.Textbox(
         | 
| 479 | 
             
                        label="System Prompt",
         | 
| 480 | 
            +
                        value="You are a deeply thoughtful AI. Consider problems thoroughly and derive correct solutions through systematic reasoning. Please answer in korean."
         | 
| 481 | 
            +
                    ),
         | 
| 482 | 
            +
                    gr.Slider(
         | 
| 483 | 
            +
                        label="Max New Tokens",
         | 
| 484 | 
            +
                        minimum=100,
         | 
| 485 | 
            +
                        maximum=8000,
         | 
| 486 | 
            +
                        step=50,
         | 
| 487 | 
            +
                        value=2000
         | 
| 488 | 
             
                    ),
         | 
|  | |
| 489 | 
             
                ],
         | 
| 490 | 
             
                stop_btn=False,
         | 
| 491 | 
            +
                title="Gemma 3 27B IT",
         | 
| 492 | 
             
                examples=examples,
         | 
| 493 | 
             
                run_examples_on_click=False,
         | 
| 494 | 
             
                cache_examples=False,
         | 
|  | |
| 498 |  | 
| 499 | 
             
            if __name__ == "__main__":
         | 
| 500 | 
             
                demo.launch()
         | 
| 501 | 
            +
             | 
| 502 | 
            +
             | 
| 503 | 
            +
             | 
 
			
