Spaces:

Heartsync
/

phoenix

Paused

App Files Files Community

seawolf2357 commited on about 1 month ago

Commit

e6bcdb0

verified ·

1 Parent(s): b49c3a3

Update app.py

Browse files

Files changed (1) hide show

app.py +222 -79

app.py CHANGED Viewed

@@ -3,6 +3,7 @@
 Complete Integration - Single File
 L40S GPU + Persistent Storage (SQLite + ChromaDB)
 VIDraft AI Research Lab
 """
@@ -23,6 +24,7 @@ from typing import Dict, List, Any, Tuple, Optional
 import chromadb
 from chromadb.config import Settings
 from einops import rearrange, repeat
 # =====================================================
 # 전역 설정
@@ -32,6 +34,7 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 STORAGE_PATH = "/data"  # HF Spaces 영구 스토리지
 DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
 VECTOR_DB_PATH = f"{STORAGE_PATH}/vector_store"
 # 디렉토리 생성
 Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
@@ -39,6 +42,7 @@ Path(VECTOR_DB_PATH).mkdir(parents=True, exist_ok=True)
 print(f"🚀 PHOENIX Platform initialized on {DEVICE}")
 print(f"💾 Storage: {STORAGE_PATH}")
 # =====================================================
 # 데이터베이스 관리 클래스
@@ -61,6 +65,7 @@ class ExperimentDatabase:
                 CREATE TABLE IF NOT EXISTS experiments (
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
                     model_type TEXT NOT NULL,
                     sequence_length INTEGER,
                     power_mode TEXT,
                     compression_level REAL,
@@ -87,6 +92,11 @@ class ExperimentDatabase:
                 ON experiments(timestamp DESC)
             """)
             conn.commit()
             print("✅ Database initialized")
@@ -97,13 +107,14 @@ class ExperimentDatabase:
             cursor.execute("""
                 INSERT INTO experiments (
-                    model_type, sequence_length, power_mode,
                     compression_level, use_hierarchical, elapsed_time,
                     memory_mb, throughput, avg_retention, compression_ratio,
                     config_json, metrics_json
-                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
             """, (
                 config.get('model_type'),
                 config.get('sequence_length'),
                 config.get('power_mode'),
                 config.get('compression_level'),
@@ -160,9 +171,18 @@ class ExperimentDatabase:
             """)
             by_model = dict(cursor.fetchall())
             return {
                 'total_experiments': total,
-                'by_model': by_model
             }
 class RetentionVectorStore:
@@ -182,7 +202,6 @@ class RetentionVectorStore:
     def add_retention_state(self, experiment_id: int, states: Dict, metadata: Dict):
         """Retention state 저장"""
-        # State를 벡터로 변환
         state_vector = self._states_to_vector(states)
         self.collection.add(
@@ -223,7 +242,6 @@ class RetentionVectorStore:
                 vectors.append(value.mean().item())
                 vectors.append(value.std().item())
-        # 고정 크기로 패딩/자르기
         target_size = 128
         if len(vectors) < target_size:
             vectors.extend([0.0] * (target_size - len(vectors)))
@@ -234,7 +252,6 @@ class RetentionVectorStore:
     def _text_to_vector(self, text: str) -> np.ndarray:
         """텍스트를 벡터로 변환 (간단한 해시 기반)"""
-        # 실제로는 sentence-transformers 사용 권장
         hash_val = hash(text) % (2**31)
         np.random.seed(hash_val)
         return np.random.randn(128)
@@ -346,24 +363,60 @@ class DynamicPowerRetention(nn.Module):
 class PHOENIXRetention(nn.Module):
     """PHOENIX Retention 통합 모델"""
-    def __init__(self, d_model=512, d_state=256, num_layers=12, device='cuda'):
         super().__init__()
         self.d_model = d_model
         self.d_state = d_state
         self.num_layers = num_layers
         self.device = device
         # Core components
-        self.hierarchical = HierarchicalRetention(d_model, d_state)
         self.compressor = AdaptiveCompression(d_state)
-        self.power_adapter = DynamicPowerRetention(d_model)
         # Layer norm
-        self.norm = nn.LayerNorm(d_model)
         self.to(device)
     def forward(self, x, return_states=True):
         # Hierarchical retention
         h_out, states = self.hierarchical(x)
@@ -383,64 +436,93 @@ class PHOENIXRetention(nn.Module):
                 'medium_state': states['medium_state'],
                 'long_state': states['long_state'],
                 'compression_ratio': compression_ratio,
-                'dynamic_power': power
             }
         return output
-class BrumbyRetention(nn.Module):
-    """Brumby 베이스라인"""
-    def __init__(self, d_model=512, d_state=256, power=2, device='cuda'):
         super().__init__()
         self.d_model = d_model
         self.d_state = d_state
-        self.power = power
         self.device = device
-        self.proj_q = nn.Linear(d_model, d_state)
-        self.proj_k = nn.Linear(d_model, d_state)
-        self.proj_v = nn.Linear(d_model, d_state)
-        self.proj_out = nn.Linear(d_state, d_model)
         self.to(device)
     def forward(self, x, return_states=True):
-        batch_size, seq_len, _ = x.shape
-        Q = self.proj_q(x)
-        K = self.proj_k(x)
-        V = self.proj_v(x)
-        # Simple retention (simplified)
-        state = torch.zeros(batch_size, self.d_state).to(x.device)
-        outputs = []
-        for t in range(seq_len):
-            state = 0.9 * state + V[:, t, :] @ K[:, t, :].T
-            output_t = state @ Q[:, t, :].unsqueeze(-1)
-            outputs.append(output_t.squeeze(-1))
-        outputs = torch.stack(outputs, dim=1)
-        outputs = self.proj_out(outputs)
-        if return_states:
-            return outputs, {
-                'state': state,
-                'power': self.power
-            }
-        return outputs
 # =====================================================
 # 유틸리티 함수들
 # =====================================================
 def calculate_metrics(output, states):
     """메트릭 계산"""
     metrics = {}
     # 메모리 사용량 (대략적)
     total_params = sum(p.numel() for p in [output] if isinstance(p, torch.Tensor))
-    metrics['memory_mb'] = (total_params * 4) / (1024 * 1024)  # float32 = 4 bytes
     # Retention 비율
     if 'short_state' in states:
@@ -509,8 +591,8 @@ def plot_memory_usage(metrics):
         x=['Memory (MB)', 'State Size', 'Compression Ratio'],
         y=[
             metrics.get('memory_mb', 0),
-            metrics.get('state_size', 0) / 10,  # Scale down
-            metrics.get('compression_ratio', 0) * 100  # Percentage
         ],
         marker_color=['lightblue', 'lightgreen', 'lightyellow']
     ))
@@ -527,7 +609,6 @@ def plot_performance_comparison(df):
     """성능 비교 시각화"""
     fig = go.Figure()
-    # 속도 비교
     fig.add_trace(go.Bar(
         name='Execution Time (s)',
         x=df['model'],
@@ -535,7 +616,6 @@ def plot_performance_comparison(df):
         marker_color='indianred'
     ))
-    # 처리량 비교
     fig.add_trace(go.Bar(
         name='Throughput (tokens/s)',
         x=df['model'],
@@ -563,33 +643,38 @@ def plot_performance_comparison(df):
 # 모델 초기화
 # =====================================================
-def initialize_models():
-    """모델들 초기화"""
     models = {}
     try:
-        models['phoenix_small'] = PHOENIXRetention(
             d_model=512,
             d_state=256,
             num_layers=12,
-            device=DEVICE
         )
-        models['phoenix_medium'] = PHOENIXRetention(
-            d_model=1024,
-            d_state=512,
-            num_layers=24,
-            device=DEVICE
         )
-        models['brumby_baseline'] = BrumbyRetention(
             d_model=512,
             d_state=256,
-            power=2,
-            device=DEVICE
         )
-        print("✅ Models initialized successfully")
         return models
     except Exception as e:
@@ -599,28 +684,36 @@ def initialize_models():
 # 데이터베이스 및 모델 초기화
 db = ExperimentDatabase(DB_PATH)
 vector_store = RetentionVectorStore(VECTOR_DB_PATH)
-MODELS = initialize_models()
 # =====================================================
 # Gradio 인터페이스 함수들
 # =====================================================
 def run_retention_experiment(
-    model_type, input_text, sequence_length,
     power_mode, compression_level, use_hierarchical
 ):
     """PHOENIX Retention 실험 실행"""
     try:
         start_time = time.time()
-        if model_type not in MODELS:
-            return "❌ 모델을 찾을 수 없습니다.", None, None
-        model = MODELS[model_type]
         # 실험 설정
         config = {
-            'model_type': model_type,
             'sequence_length': sequence_length,
             'power_mode': power_mode,
             'compression_level': compression_level,
@@ -649,15 +742,18 @@ def run_retention_experiment(
         vector_store.add_retention_state(experiment_id, states, config)
         # 결과 텍스트
         result_text = f"""
 ## 🎯 실험 결과 (ID: {experiment_id})
 ### ⚙️ 설정
-- **모델**: {model_type}
-- **시퀀스 길이**: {sequence_length} 토큰
 - **Power 모드**: {power_mode}
 - **압축 레벨**: {compression_level}
 - **계층적 사용**: {"✅" if use_hierarchical else "❌"}
 ### 📊 성능 메트릭
 - **실행 시간**: {elapsed_time:.3f}초
@@ -682,11 +778,12 @@ def run_retention_experiment(
     except Exception as e:
         return f"❌ 실험 실패: {str(e)}", None, None
-def compare_retention_methods(input_text, sequence_length, benchmark_tasks):
     """모델 비교"""
     try:
         results = []
         for model_name, model in MODELS.items():
             start_time = time.time()
@@ -705,6 +802,26 @@ def compare_retention_methods(input_text, sequence_length, benchmark_tasks):
                 'throughput': sequence_length / elapsed_time
             })
         df = pd.DataFrame(results)
         fig = plot_performance_comparison(df)
@@ -744,6 +861,7 @@ def search_experiments(query, top_k=10):
             search_text += f"""
 ### {i}. 실험 #{exp_id} (유사도: {score:.3f})
 - **모델**: {metadata.get('model_type', 'N/A')}
 - **시퀀스 길이**: {metadata.get('sequence_length', 'N/A')}
 - **시간**: {metadata.get('timestamp', 'N/A')}
 ---
@@ -764,7 +882,6 @@ def view_experiment_history(limit=20):
         df = pd.DataFrame(experiments)
-        # 시간별 성능 추이
         fig = px.line(
             df,
             x='timestamp',
@@ -776,7 +893,7 @@ def view_experiment_history(limit=20):
         history_text = f"""
 ## 📊 실험 이력 ({len(df)}개)
-{df[['id', 'model_type', 'sequence_length', 'elapsed_time', 'throughput', 'timestamp']].to_markdown(index=False)}
         """
         return history_text, fig
@@ -800,6 +917,10 @@ def get_database_statistics():
         for model, count in stats['by_model'].items():
             stats_text += f"- **{model}**: {count}개\n"
         return stats_text
     except Exception as e:
@@ -819,7 +940,8 @@ with gr.Blocks(
     **Post-Hierarchical Optimized Efficient Neural Infinite-conteXt**
-    Brumby를 뛰어넘는 차세대 Attention-Free 아키텍처 연구 플랫폼
     ---
     """)
@@ -832,8 +954,15 @@ with gr.Blocks(
                 with gr.Column(scale=1):
                     model_select = gr.Dropdown(
                         choices=list(MODELS.keys()),
-                        value='phoenix_small',
-                        label="모델 선택"
                     )
                     input_text = gr.Textbox(
@@ -875,7 +1004,7 @@ with gr.Blocks(
             run_btn.click(
                 fn=run_retention_experiment,
-                inputs=[model_select, input_text, sequence_length,
                        power_mode, compression_level, use_hierarchical],
                 outputs=[result_output, states_plot, memory_plot]
             )
@@ -884,6 +1013,12 @@ with gr.Blocks(
         with gr.Tab("⚔️ 모델 비교"):
             with gr.Row():
                 with gr.Column(scale=1):
                     compare_text = gr.Textbox(
                         label="비교 텍스트",
                         lines=5,
@@ -909,7 +1044,7 @@ with gr.Blocks(
             compare_btn.click(
                 fn=compare_retention_methods,
-                inputs=[compare_text, compare_length, benchmark_tasks],
                 outputs=[compare_result, compare_plot]
             )
@@ -967,6 +1102,14 @@ with gr.Blocks(
     2. **적응적 압축** - 중요도 기반 동적 압축
     3. **동적 Power** - 입력 따라 자동 최적화
     4. **병렬 경로** - 다중 전략 동시 운영
     **VIDraft AI Research Lab** | L40S GPU + Persistent Storage
     """)

 Complete Integration - Single File
 L40S GPU + Persistent Storage (SQLite + ChromaDB)
+Base Model: IBM Granite 4.0 H 350M
 VIDraft AI Research Lab
 """
 import chromadb
 from chromadb.config import Settings
 from einops import rearrange, repeat
+from transformers import AutoModel, AutoTokenizer, AutoConfig
 # =====================================================
 # 전역 설정
 STORAGE_PATH = "/data"  # HF Spaces 영구 스토리지
 DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
 VECTOR_DB_PATH = f"{STORAGE_PATH}/vector_store"
+DEFAULT_MODEL = "ibm-granite/granite-4.0-h-350m"
 # 디렉토리 생성
 Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
 print(f"🚀 PHOENIX Platform initialized on {DEVICE}")
 print(f"💾 Storage: {STORAGE_PATH}")
+print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
 # =====================================================
 # 데이터베이스 관리 클래스
                 CREATE TABLE IF NOT EXISTS experiments (
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
                     model_type TEXT NOT NULL,
+                    base_model_url TEXT,
                     sequence_length INTEGER,
                     power_mode TEXT,
                     compression_level REAL,
                 ON experiments(timestamp DESC)
             """)
+            cursor.execute("""
+                CREATE INDEX IF NOT EXISTS idx_base_model
+                ON experiments(base_model_url)
+            """)
             conn.commit()
             print("✅ Database initialized")
             cursor.execute("""
                 INSERT INTO experiments (
+                    model_type, base_model_url, sequence_length, power_mode,
                     compression_level, use_hierarchical, elapsed_time,
                     memory_mb, throughput, avg_retention, compression_ratio,
                     config_json, metrics_json
+                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
             """, (
                 config.get('model_type'),
+                config.get('base_model_url'),
                 config.get('sequence_length'),
                 config.get('power_mode'),
                 config.get('compression_level'),
             """)
             by_model = dict(cursor.fetchall())
+            cursor.execute("""
+                SELECT base_model_url, COUNT(*) as count
+                FROM experiments
+                WHERE base_model_url IS NOT NULL
+                GROUP BY base_model_url
+            """)
+            by_base_model = dict(cursor.fetchall())
             return {
                 'total_experiments': total,
+                'by_model': by_model,
+                'by_base_model': by_base_model
             }
 class RetentionVectorStore:
     def add_retention_state(self, experiment_id: int, states: Dict, metadata: Dict):
         """Retention state 저장"""
         state_vector = self._states_to_vector(states)
         self.collection.add(
                 vectors.append(value.mean().item())
                 vectors.append(value.std().item())
         target_size = 128
         if len(vectors) < target_size:
             vectors.extend([0.0] * (target_size - len(vectors)))
     def _text_to_vector(self, text: str) -> np.ndarray:
         """텍스트를 벡터로 변환 (간단한 해시 기반)"""
         hash_val = hash(text) % (2**31)
         np.random.seed(hash_val)
         return np.random.randn(128)
 class PHOENIXRetention(nn.Module):
     """PHOENIX Retention 통합 모델"""
+    def __init__(self, d_model=512, d_state=256, num_layers=12, device='cuda', base_model_url=None):
         super().__init__()
         self.d_model = d_model
         self.d_state = d_state
         self.num_layers = num_layers
         self.device = device
+        self.base_model_url = base_model_url
+        # Base model 로드 (선택적)
+        self.base_model = None
+        if base_model_url:
+            try:
+                print(f"📥 Loading base model: {base_model_url}")
+                self.base_model = AutoModel.from_pretrained(
+                    base_model_url,
+                    trust_remote_code=True
+                ).to(device)
+                # Base model의 hidden size 가져오기
+                if hasattr(self.base_model.config, 'hidden_size'):
+                    self.d_model = self.base_model.config.hidden_size
+                print(f"✅ Base model loaded: {base_model_url}")
+                print(f"📐 Model dimension: {self.d_model}")
+            except Exception as e:
+                print(f"⚠️ Base model loading failed: {e}")
+                print(f"   Continuing with default architecture...")
         # Core components
+        self.hierarchical = HierarchicalRetention(self.d_model, d_state)
         self.compressor = AdaptiveCompression(d_state)
+        self.power_adapter = DynamicPowerRetention(self.d_model)
         # Layer norm
+        self.norm = nn.LayerNorm(self.d_model)
+        # Projection (base model과 연결)
+        if self.base_model:
+            self.base_projection = nn.Linear(self.d_model, self.d_model)
         self.to(device)
     def forward(self, x, return_states=True):
+        # Base model 통과 (있는 경우)
+        if self.base_model is not None:
+            with torch.no_grad():
+                base_output = self.base_model(
+                    inputs_embeds=x,
+                    output_hidden_states=True
+                )
+                # 마지막 hidden state 사용
+                x = base_output.hidden_states[-1]
+                x = self.base_projection(x)
         # Hierarchical retention
         h_out, states = self.hierarchical(x)
                 'medium_state': states['medium_state'],
                 'long_state': states['long_state'],
                 'compression_ratio': compression_ratio,
+                'dynamic_power': power,
+                'base_model_used': self.base_model is not None
             }
         return output
+class TransformerBaseline(nn.Module):
+    """Transformer 베이스라인"""
+    def __init__(self, d_model=512, d_state=256, device='cuda', base_model_url=None):
         super().__init__()
         self.d_model = d_model
         self.d_state = d_state
         self.device = device
+        self.base_model_url = base_model_url
+        # Base model 로드
+        self.base_model = None
+        if base_model_url:
+            try:
+                self.base_model = AutoModel.from_pretrained(
+                    base_model_url,
+                    trust_remote_code=True
+                ).to(device)
+                if hasattr(self.base_model.config, 'hidden_size'):
+                    self.d_model = self.base_model.config.hidden_size
+                print(f"✅ Transformer baseline loaded: {base_model_url}")
+            except Exception as e:
+                print(f"⚠️ Transformer baseline loading failed: {e}")
         self.to(device)
     def forward(self, x, return_states=True):
+        if self.base_model is not None:
+            output = self.base_model(
+                inputs_embeds=x,
+                output_hidden_states=True
+            )
+            last_hidden = output.hidden_states[-1]
+            if return_states:
+                return last_hidden, {
+                    'state': last_hidden[:, -1, :],
+                    'base_model_used': True
+                }
+            return last_hidden
+        else:
+            # Fallback: simple identity
+            if return_states:
+                return x, {'state': x[:, -1, :], 'base_model_used': False}
+            return x
 # =====================================================
 # 유틸리티 함수들
 # =====================================================
+def load_custom_model(model_url: str, model_type: str = "phoenix"):
+    """사용자 지정 모델 로드"""
+    try:
+        if model_type == "phoenix":
+            model = PHOENIXRetention(
+                d_model=512,
+                d_state=256,
+                num_layers=12,
+                device=DEVICE,
+                base_model_url=model_url if model_url.strip() else None
+            )
+        else:  # transformer
+            model = TransformerBaseline(
+                d_model=512,
+                d_state=256,
+                device=DEVICE,
+                base_model_url=model_url if model_url.strip() else None
+            )
+        return model, None
+    except Exception as e:
+        return None, str(e)
 def calculate_metrics(output, states):
     """메트릭 계산"""
     metrics = {}
     # 메모리 사용량 (대략적)
     total_params = sum(p.numel() for p in [output] if isinstance(p, torch.Tensor))
+    metrics['memory_mb'] = (total_params * 4) / (1024 * 1024)
     # Retention 비율
     if 'short_state' in states:
         x=['Memory (MB)', 'State Size', 'Compression Ratio'],
         y=[
             metrics.get('memory_mb', 0),
+            metrics.get('state_size', 0) / 10,
+            metrics.get('compression_ratio', 0) * 100
         ],
         marker_color=['lightblue', 'lightgreen', 'lightyellow']
     ))
     """성능 비교 시각화"""
     fig = go.Figure()
     fig.add_trace(go.Bar(
         name='Execution Time (s)',
         x=df['model'],
         marker_color='indianred'
     ))
     fig.add_trace(go.Bar(
         name='Throughput (tokens/s)',
         x=df['model'],
 # 모델 초기화
 # =====================================================
+def initialize_default_models():
+    """기본 모델들 초기화"""
     models = {}
     try:
+        # PHOENIX with Granite
+        models['phoenix_granite'] = PHOENIXRetention(
             d_model=512,
             d_state=256,
             num_layers=12,
+            device=DEVICE,
+            base_model_url=DEFAULT_MODEL
         )
+        # PHOENIX without base
+        models['phoenix_standalone'] = PHOENIXRetention(
+            d_model=512,
+            d_state=256,
+            num_layers=12,
+            device=DEVICE,
+            base_model_url=None
         )
+        # Transformer baseline
+        models['transformer_granite'] = TransformerBaseline(
             d_model=512,
             d_state=256,
+            device=DEVICE,
+            base_model_url=DEFAULT_MODEL
         )
+        print("✅ Default models initialized")
         return models
     except Exception as e:
 # 데이터베이스 및 모델 초기화
 db = ExperimentDatabase(DB_PATH)
 vector_store = RetentionVectorStore(VECTOR_DB_PATH)
+MODELS = initialize_default_models()
 # =====================================================
 # Gradio 인터페이스 함수들
 # =====================================================
 def run_retention_experiment(
+    model_type, custom_model_url, input_text, sequence_length,
     power_mode, compression_level, use_hierarchical
 ):
     """PHOENIX Retention 실험 실행"""
     try:
         start_time = time.time()
+        # 커스텀 모델 URL이 있으면 로드
+        if custom_model_url and custom_model_url.strip():
+            model, error = load_custom_model(custom_model_url, "phoenix")
+            if error:
+                return f"❌ 모델 로드 실패: {error}", None, None
+            model_name = f"phoenix_custom_{custom_model_url.split('/')[-1]}"
+        else:
+            if model_type not in MODELS:
+                return "❌ 모델을 찾을 수 없습니다.", None, None
+            model = MODELS[model_type]
+            model_name = model_type
         # 실험 설정
         config = {
+            'model_type': model_name,
+            'base_model_url': custom_model_url if custom_model_url else model.base_model_url,
             'sequence_length': sequence_length,
             'power_mode': power_mode,
             'compression_level': compression_level,
         vector_store.add_retention_state(experiment_id, states, config)
         # 결과 텍스트
+        base_model_info = f"**Base Model**: {config['base_model_url']}\n" if config.get('base_model_url') else ""
         result_text = f"""
 ## 🎯 실험 결과 (ID: {experiment_id})
 ### ⚙️ 설정
+- **모델**: {model_name}
+{base_model_info}- **시퀀스 길이**: {sequence_length} 토큰
 - **Power 모드**: {power_mode}
 - **압축 레벨**: {compression_level}
 - **계층적 사용**: {"✅" if use_hierarchical else "❌"}
+- **Base Model 사용**: {"✅" if states.get('base_model_used') else "❌"}
 ### 📊 성능 메트릭
 - **실행 시간**: {elapsed_time:.3f}초
     except Exception as e:
         return f"❌ 실험 실패: {str(e)}", None, None
+def compare_retention_methods(custom_model_url, input_text, sequence_length, benchmark_tasks):
     """모델 비교"""
     try:
         results = []
+        # 기본 모델들 테스트
         for model_name, model in MODELS.items():
             start_time = time.time()
                 'throughput': sequence_length / elapsed_time
             })
+        # 커스텀 모델 테스트
+        if custom_model_url and custom_model_url.strip():
+            custom_model, error = load_custom_model(custom_model_url, "phoenix")
+            if not error:
+                start_time = time.time()
+                x = torch.randn(1, sequence_length, custom_model.d_model).to(DEVICE)
+                with torch.no_grad():
+                    output, states = custom_model(x, return_states=True)
+                elapsed_time = time.time() - start_time
+                metrics = calculate_metrics(output, states)
+                results.append({
+                    'model': f"custom_{custom_model_url.split('/')[-1]}",
+                    'time': elapsed_time,
+                    'memory': metrics.get('memory_mb', 0),
+                    'throughput': sequence_length / elapsed_time
+                })
         df = pd.DataFrame(results)
         fig = plot_performance_comparison(df)
             search_text += f"""
 ### {i}. 실험 #{exp_id} (유사도: {score:.3f})
 - **모델**: {metadata.get('model_type', 'N/A')}
+- **Base Model**: {metadata.get('base_model_url', 'N/A')}
 - **시퀀스 길이**: {metadata.get('sequence_length', 'N/A')}
 - **시간**: {metadata.get('timestamp', 'N/A')}
 ---
         df = pd.DataFrame(experiments)
         fig = px.line(
             df,
             x='timestamp',
         history_text = f"""
 ## 📊 실험 이력 ({len(df)}개)
+{df[['id', 'model_type', 'base_model_url', 'sequence_length', 'elapsed_time', 'throughput', 'timestamp']].to_markdown(index=False)}
         """
         return history_text, fig
         for model, count in stats['by_model'].items():
             stats_text += f"- **{model}**: {count}개\n"
+        stats_text += "\n### Base Model별 실험 수\n"
+        for base_model, count in stats['by_base_model'].items():
+            stats_text += f"- **{base_model}**: {count}개\n"
         return stats_text
     except Exception as e:
     **Post-Hierarchical Optimized Efficient Neural Infinite-conteXt**
+    차세대 Attention-Free 아키텍처 연구 플랫폼
+    Base Model: **IBM Granite 4.0 H 350M** (또는 사용자 지정 모델)
     ---
     """)
                 with gr.Column(scale=1):
                     model_select = gr.Dropdown(
                         choices=list(MODELS.keys()),
+                        value='phoenix_granite',
+                        label="기본 모델 선택"
+                    )
+                    custom_model_url = gr.Textbox(
+                        label="🔗 커스텀 Base Model URL (선택사항)",
+                        placeholder="예: ibm-granite/granite-4.0-h-350m 또는 meta-llama/Llama-3.2-1B",
+                        value="",
+                        info="Hugging Face 모델 URL을 입력하면 해당 모델을 base로 사용합니다"
                     )
                     input_text = gr.Textbox(
             run_btn.click(
                 fn=run_retention_experiment,
+                inputs=[model_select, custom_model_url, input_text, sequence_length,
                        power_mode, compression_level, use_hierarchical],
                 outputs=[result_output, states_plot, memory_plot]
             )
         with gr.Tab("⚔️ 모델 비교"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    compare_custom_url = gr.Textbox(
+                        label="🔗 추가 비교 모델 URL (선택사항)",
+                        placeholder="예: microsoft/phi-2",
+                        value=""
+                    )
                     compare_text = gr.Textbox(
                         label="비교 텍스트",
                         lines=5,
             compare_btn.click(
                 fn=compare_retention_methods,
+                inputs=[compare_custom_url, compare_text, compare_length, benchmark_tasks],
                 outputs=[compare_result, compare_plot]
             )
     2. **적응적 압축** - 중요도 기반 동적 압축
     3. **동적 Power** - 입력 따라 자동 최적화
     4. **병렬 경로** - 다중 전략 동시 운영
+    5. **커스텀 Base** - 모든 HF 모델 지원
+    ### 📚 추천 Base Models
+    - `ibm-granite/granite-4.0-h-350m` (기본)
+    - `meta-llama/Llama-3.2-1B`
+    - `microsoft/phi-2`
+    - `Qwen/Qwen2.5-0.5B`
+    - `google/gemma-2-2b`
     **VIDraft AI Research Lab** | L40S GPU + Persistent Storage
     """)