Spaces:

hmrizal
/

CSVBot-OpenSource

Sleeping

App Files Files Community

hmrizal commited on Apr 28

Commit

bc3e7d7

verified ·

1 Parent(s): 0cca13a

remove fallback model completely, and uncomment

Browse files

Files changed (1) hide show

app.py +24 -143

app.py CHANGED Viewed

@@ -78,21 +78,9 @@ MODEL_CONFIG = {
         "description": "Lightweight T5 model optimized for instruction following",
         "dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
         "is_t5": True
-    },
-    "Fallback Model": {
-        "name": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
-        "description": "Model sangat ringan untuk fallback",
-        "dtype": torch.float16 if torch.cuda.is_available() else torch.float32
     }
 }
-# Tambahkan model fallback ke MODEL_CONFIG
-# MODEL_CONFIG["Fallback Model"] = {
-#     "name": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
-#     "description": "Model sangat ringan untuk fallback",
-#     "dtype": torch.float16 if torch.cuda.is_available() else torch.float32
-# }
 def initialize_model_once(model_key):
     with MODEL_CACHE["init_lock"]:
         current_model = MODEL_CACHE["model_name"]
@@ -160,35 +148,21 @@ def initialize_model_once(model_key):
                 # Handle standard HF models
                 else:
                     MODEL_CACHE["tokenizer"] = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-                    # Only use quantization if CUDA is available
-                    if torch.cuda.is_available():
-                        quantization_config = BitsAndBytesConfig(
-                            load_in_4bit=True,
-                            bnb_4bit_compute_dtype=torch.float16,
-                            bnb_4bit_quant_type="nf4",
-                            bnb_4bit_use_double_quant=True
-                        )
-                        MODEL_CACHE["model"] = AutoModelForCausalLM.from_pretrained(
-                            model_name,
-                            quantization_config=quantization_config,
-                            torch_dtype=model_info["dtype"],
-                            device_map="auto",
-                            low_cpu_mem_usage=True,
-                            trust_remote_code=True
-                        )
-                    else:
-                        # For CPU-only environments, load without quantization
-                        MODEL_CACHE["model"] = AutoModelForCausalLM.from_pretrained(
-                            model_name,
-                            torch_dtype=torch.float32,  # Use float32 for CPU
-                            device_map=None,
-                            low_cpu_mem_usage=True,
-                            trust_remote_code=True
-                        )
                     MODEL_CACHE["is_gguf"] = False
                 print(f"Model {model_name} loaded successfully")
@@ -206,9 +180,6 @@ def create_llm_pipeline(model_key):
         print(f"Creating pipeline for model: {model_key}")
         tokenizer, model, is_gguf = initialize_model_once(model_key)
-        # Get the model info for reference
-        model_info = MODEL_CONFIG[model_key]
         if model is None:
             raise ValueError(f"Model is None for {model_key}")
@@ -258,85 +229,22 @@ def create_llm_pipeline(model_key):
         import traceback
         print(f"Error creating pipeline: {str(e)}")
         print(traceback.format_exc())
-        # Fallback ke model sederhana jika yang utama gagal
-        if model_key != "Fallback Model":
-            print(f"Trying fallback model")
-            try:
-                return create_fallback_pipeline()
-            except:
-                raise RuntimeError(f"Failed to create pipeline: {str(e)}")
-        else:
-            raise RuntimeError(f"Failed to create pipeline: {str(e)}")
-def create_fallback_pipeline():
-    """Create a fallback pipeline with a very small model"""
-    model_key = "Fallback Model"
-    print(f"Creating minimal fallback pipeline with {MODEL_CONFIG[model_key]['name']}")
-    # Avoid using bitsandbytes for quantization when CUDA is not available
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(MODEL_CONFIG[model_key]["name"])
-        # Load model in 8-bit or without quantization for CPU
-        if torch.cuda.is_available():
-            model = AutoModelForCausalLM.from_pretrained(
-                MODEL_CONFIG[model_key]["name"],
-                torch_dtype=MODEL_CONFIG[model_key]["dtype"],
-                device_map="auto",
-                low_cpu_mem_usage=True
-            )
-        else:
-            # For CPU-only environments, avoid quantization
-            model = AutoModelForCausalLM.from_pretrained(
-                MODEL_CONFIG[model_key]["name"],
-                torch_dtype=torch.float32,  # Use float32 for CPU
-                low_cpu_mem_usage=True
-            )
-        pipe = pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            max_new_tokens=64,  # Reduced for CPU performance
-            temperature=0.3,
-            return_full_text=False,
-        )
-        return HuggingFacePipeline(pipeline=pipe)
-    except Exception as e:
-        print(f"Error creating minimal fallback pipeline: {str(e)}")
-        raise
 def handle_model_loading_error(model_key, session_id):
-    """Handle model loading errors with fallback options"""
-    fallback_hierarchy = [
         "DeepSeek Coder Instruct",  # 1.3B model
-        "Phi-4",                  # 1.5B model
-        "TinyLlama-Chat",           # 1.1B model
-        "Flan-T5-Small"             # Paling ringan
     ]
-    # Jika model yang gagal sudah merupakan fallback terakhir, berikan pesan error
-    if model_key == fallback_hierarchy[-1]:
-        return None, f"Tidak dapat memuat model {model_key}. Harap coba lagi nanti."
-    # Temukan posisi model yang gagal dalam hirarki
-    try:
-        current_index = fallback_hierarchy.index(model_key)
-    except ValueError:
-        current_index = -1
-    # Coba model berikutnya dalam hirarki
-    for fallback_model in fallback_hierarchy[current_index+1:]:
-        try:
-            print(f"Trying fallback model: {fallback_model}")
-            chatbot = ChatBot(session_id, fallback_model)
-            return chatbot, f"Model {model_key} tidak tersedia. Menggunakan {fallback_model} sebagai alternatif."
-        except Exception as e:
-            print(f"Fallback model {fallback_model} also failed: {str(e)}")
-    return None, "Semua model gagal dimuat. Harap coba lagi nanti."
 def create_conversational_chain(db, file_path, model_key):
     llm = create_llm_pipeline(model_key)
@@ -703,18 +611,6 @@ def create_gradio_interface():
                 import traceback
                 print(f"Error processing file with {model_key}: {str(e)}")
                 print(traceback.format_exc())
-                # Coba dengan model fallback
-                try:
-                    chatbot, message = handle_model_loading_error(model_key, sess_id)
-                    if chatbot is not None:
-                        result = chatbot.process_file(file)
-                        return chatbot, True, [(None, message), (None, result)]
-                    else:
-                        return None, False, [(None, message)]
-                except Exception as fb_err:
-                    error_msg = f"Error dengan model {model_key}: {str(e)}\n\nFallback juga gagal: {str(fb_err)}"
-                    return None, False, [(None, error_msg)]
         process_button.click(
             fn=handle_process_file,
@@ -737,21 +633,6 @@ def create_gradio_interface():
             outputs=[chatbot_state, model_selected, chatbot_interface, model_dropdown]
         )
-        # Change model handler
-        # def handle_model_change(model_key, chatbot, sess_id):
-        #     if chatbot is None:
-        #         chatbot = ChatBot(sess_id, model_key)
-        #         return chatbot, [(None, f"Model diatur ke {model_key}. Silakan upload file CSV.")]
-        #     result = chatbot.change_model(model_key)
-        #     return chatbot, chatbot.chat_history + [(None, result)]
-        # change_model_button.click(
-        #     fn=handle_model_change,
-        #     inputs=[model_dropdown, chatbot_state, session_id],
-        #     outputs=[chatbot_state, chatbot_interface]
-        # )
         # Chat handlers
         def user_message_submitted(message, history, chatbot, sess_id):
             history = history + [(message, None)]

         "description": "Lightweight T5 model optimized for instruction following",
         "dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
         "is_t5": True
     }
 }
 def initialize_model_once(model_key):
     with MODEL_CACHE["init_lock"]:
         current_model = MODEL_CACHE["model_name"]
                 # Handle standard HF models
                 else:
+                    quantization_config = BitsAndBytesConfig(
+                        load_in_4bit=True,
+                        bnb_4bit_compute_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                        bnb_4bit_quant_type="nf4",
+                        bnb_4bit_use_double_quant=True
+                    )
                     MODEL_CACHE["tokenizer"] = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+                    MODEL_CACHE["model"] = AutoModelForCausalLM.from_pretrained(
+                        model_name,
+                        quantization_config=quantization_config,
+                        torch_dtype=model_info["dtype"],
+                        device_map="auto" if torch.cuda.is_available() else None,
+                        low_cpu_mem_usage=True,
+                        trust_remote_code=True
+                    )
                     MODEL_CACHE["is_gguf"] = False
                 print(f"Model {model_name} loaded successfully")
         print(f"Creating pipeline for model: {model_key}")
         tokenizer, model, is_gguf = initialize_model_once(model_key)
         if model is None:
             raise ValueError(f"Model is None for {model_key}")
         import traceback
         print(f"Error creating pipeline: {str(e)}")
         print(traceback.format_exc())
 def handle_model_loading_error(model_key, session_id):
+    """Handle model loading errors by providing alternative model suggestions"""
+    suggested_models = [
         "DeepSeek Coder Instruct",  # 1.3B model
+        "Phi-4 Mini Instruct",      # Light model
+        "TinyLlama Chat",           # 1.1B model
+        "Flan T5 Small"             # Lightweight T5
     ]
+    # Remove the current model from suggestions if it's in the list
+    if model_key in suggested_models:
+        suggested_models.remove(model_key)
+    suggestions = ", ".join(suggested_models[:3])  # Only show top 3 suggestions
+    return None, f"Tidak dapat memuat model {model_key}. Silakan coba model lain seperti: {suggestions}"
 def create_conversational_chain(db, file_path, model_key):
     llm = create_llm_pipeline(model_key)
                 import traceback
                 print(f"Error processing file with {model_key}: {str(e)}")
                 print(traceback.format_exc())
         process_button.click(
             fn=handle_process_file,
             outputs=[chatbot_state, model_selected, chatbot_interface, model_dropdown]
         )
         # Chat handlers
         def user_message_submitted(message, history, chatbot, sess_id):
             history = history + [(message, None)]