Ahmedik95316 commited on
Commit
22ef2c1
Β·
1 Parent(s): cc36ad4

Create path_config.py

Browse files
Files changed (1) hide show
  1. path_config.py +246 -0
path_config.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from pathlib import Path
4
+ from typing import Dict, Optional
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ class EnvironmentPathManager:
10
+ """Dynamic path management for different deployment environments"""
11
+
12
+ def __init__(self):
13
+ self.environment = self._detect_environment()
14
+ self.base_paths = self._configure_paths()
15
+ self._ensure_directories()
16
+
17
+ def _detect_environment(self) -> str:
18
+ """Detect the current deployment environment"""
19
+ # Check for HuggingFace Spaces
20
+ if os.environ.get('SPACE_ID') or os.path.exists('/app/app.py') or os.path.exists('/app/streamlit_app.py'):
21
+ return 'huggingface_spaces'
22
+
23
+ # Check for Docker container
24
+ if os.path.exists('/.dockerenv') or os.environ.get('DOCKER_CONTAINER'):
25
+ return 'docker'
26
+
27
+ # Check if running from /app directory (likely container)
28
+ if str(Path.cwd()).startswith('/app'):
29
+ return 'container'
30
+
31
+ # Default to local development
32
+ return 'local'
33
+
34
+ def _configure_paths(self) -> Dict[str, Path]:
35
+ """Configure paths based on environment"""
36
+ if self.environment == 'huggingface_spaces':
37
+ # HuggingFace Spaces: Use /app structure
38
+ base_dir = Path('/app')
39
+ return {
40
+ 'base': base_dir,
41
+ 'data': base_dir / 'data',
42
+ 'model': base_dir / 'model',
43
+ 'logs': base_dir / 'logs',
44
+ 'cache': base_dir / 'cache',
45
+ 'temp': base_dir / 'temp'
46
+ }
47
+
48
+ elif self.environment in ['docker', 'container']:
49
+ # Docker/Container: Use /app structure with /tmp for temporary files
50
+ base_dir = Path('/app')
51
+ return {
52
+ 'base': base_dir,
53
+ 'data': base_dir / 'data',
54
+ 'model': base_dir / 'model',
55
+ 'logs': base_dir / 'logs',
56
+ 'cache': Path('/tmp/cache'),
57
+ 'temp': Path('/tmp/temp')
58
+ }
59
+
60
+ else:
61
+ # Local development: Use project structure
62
+ # Find project root (where this file is located)
63
+ current_file = Path(__file__).resolve()
64
+ project_root = current_file.parent
65
+
66
+ # Navigate up to find the actual project root
67
+ while project_root.parent != project_root:
68
+ if (project_root / 'requirements.txt').exists():
69
+ break
70
+ project_root = project_root.parent
71
+
72
+ return {
73
+ 'base': project_root,
74
+ 'data': project_root / 'data',
75
+ 'model': project_root / 'model',
76
+ 'logs': project_root / 'logs',
77
+ 'cache': project_root / 'cache',
78
+ 'temp': project_root / 'temp'
79
+ }
80
+
81
+ def _ensure_directories(self):
82
+ """Ensure all necessary directories exist"""
83
+ for path_name, path in self.base_paths.items():
84
+ try:
85
+ path.mkdir(parents=True, exist_ok=True)
86
+ logger.debug(f"Ensured directory exists: {path}")
87
+ except PermissionError:
88
+ logger.warning(f"Cannot create directory {path}, using fallback")
89
+ if path_name in ['cache', 'temp']:
90
+ # Fallback to user's home directory for cache/temp
91
+ fallback_path = Path.home() / f'.fake_news_detector/{path_name}'
92
+ fallback_path.mkdir(parents=True, exist_ok=True)
93
+ self.base_paths[path_name] = fallback_path
94
+ except Exception as e:
95
+ logger.error(f"Failed to create directory {path}: {e}")
96
+
97
+ def get_data_path(self, filename: str = '') -> Path:
98
+ """Get data directory path"""
99
+ return self.base_paths['data'] / filename if filename else self.base_paths['data']
100
+
101
+ def get_model_path(self, filename: str = '') -> Path:
102
+ """Get model directory path"""
103
+ return self.base_paths['model'] / filename if filename else self.base_paths['model']
104
+
105
+ def get_logs_path(self, filename: str = '') -> Path:
106
+ """Get logs directory path"""
107
+ return self.base_paths['logs'] / filename if filename else self.base_paths['logs']
108
+
109
+ def get_cache_path(self, filename: str = '') -> Path:
110
+ """Get cache directory path"""
111
+ return self.base_paths['cache'] / filename if filename else self.base_paths['cache']
112
+
113
+ def get_temp_path(self, filename: str = '') -> Path:
114
+ """Get temporary directory path"""
115
+ return self.base_paths['temp'] / filename if filename else self.base_paths['temp']
116
+
117
+ def get_activity_log_path(self) -> Path:
118
+ """Get activity log file path"""
119
+ return self.get_logs_path('activity_log.json')
120
+
121
+ def get_metadata_path(self) -> Path:
122
+ """Get model metadata file path"""
123
+ return self.get_model_path('metadata.json')
124
+
125
+ def get_combined_dataset_path(self) -> Path:
126
+ """Get combined dataset path"""
127
+ return self.get_data_path('combined_dataset.csv')
128
+
129
+ def get_scraped_data_path(self) -> Path:
130
+ """Get scraped data path"""
131
+ return self.get_data_path('scraped_real.csv')
132
+
133
+ def get_generated_data_path(self) -> Path:
134
+ """Get generated fake data path"""
135
+ return self.get_data_path('generated_fake.csv')
136
+
137
+ def get_model_file_path(self) -> Path:
138
+ """Get main model file path"""
139
+ return self.get_model_path('model.pkl')
140
+
141
+ def get_vectorizer_path(self) -> Path:
142
+ """Get vectorizer file path"""
143
+ return self.get_model_path('vectorizer.pkl')
144
+
145
+ def get_pipeline_path(self) -> Path:
146
+ """Get pipeline file path"""
147
+ return self.get_model_path('pipeline.pkl')
148
+
149
+ def get_candidate_model_path(self) -> Path:
150
+ """Get candidate model file path"""
151
+ return self.get_model_path('model_candidate.pkl')
152
+
153
+ def get_candidate_vectorizer_path(self) -> Path:
154
+ """Get candidate vectorizer file path"""
155
+ return self.get_model_path('vectorizer_candidate.pkl')
156
+
157
+ def get_candidate_pipeline_path(self) -> Path:
158
+ """Get candidate pipeline file path"""
159
+ return self.get_model_path('pipeline_candidate.pkl')
160
+
161
+ def list_available_datasets(self) -> Dict[str, bool]:
162
+ """List available datasets and their existence status"""
163
+ datasets = {
164
+ 'combined_dataset.csv': self.get_combined_dataset_path().exists(),
165
+ 'scraped_real.csv': self.get_scraped_data_path().exists(),
166
+ 'generated_fake.csv': self.get_generated_data_path().exists(),
167
+ 'kaggle/Fake.csv': (self.get_data_path() / 'kaggle' / 'Fake.csv').exists(),
168
+ 'kaggle/True.csv': (self.get_data_path() / 'kaggle' / 'True.csv').exists(),
169
+ }
170
+ return datasets
171
+
172
+ def list_available_models(self) -> Dict[str, bool]:
173
+ """List available models and their existence status"""
174
+ models = {
175
+ 'model.pkl': self.get_model_file_path().exists(),
176
+ 'vectorizer.pkl': self.get_vectorizer_path().exists(),
177
+ 'pipeline.pkl': self.get_pipeline_path().exists(),
178
+ 'model_candidate.pkl': self.get_candidate_model_path().exists(),
179
+ 'vectorizer_candidate.pkl': self.get_candidate_vectorizer_path().exists(),
180
+ 'pipeline_candidate.pkl': self.get_candidate_pipeline_path().exists(),
181
+ 'metadata.json': self.get_metadata_path().exists()
182
+ }
183
+ return models
184
+
185
+ def get_environment_info(self) -> Dict:
186
+ """Get comprehensive environment information"""
187
+ return {
188
+ 'environment': self.environment,
189
+ 'base_dir': str(self.base_paths['base']),
190
+ 'data_dir': str(self.base_paths['data']),
191
+ 'model_dir': str(self.base_paths['model']),
192
+ 'logs_dir': str(self.base_paths['logs']),
193
+ 'available_datasets': self.list_available_datasets(),
194
+ 'available_models': self.list_available_models(),
195
+ 'current_working_directory': str(Path.cwd()),
196
+ 'python_path': sys.path[0],
197
+ 'space_id': os.environ.get('SPACE_ID', 'Not HF Spaces'),
198
+ 'docker_env': os.path.exists('/.dockerenv')
199
+ }
200
+
201
+ def log_environment_info(self):
202
+ """Log detailed environment information"""
203
+ info = self.get_environment_info()
204
+ logger.info(f"🌍 Environment: {info['environment']}")
205
+ logger.info(f"πŸ“ Base directory: {info['base_dir']}")
206
+ logger.info(f"πŸ“Š Data directory: {info['data_dir']}")
207
+ logger.info(f"πŸ€– Model directory: {info['model_dir']}")
208
+ logger.info(f"πŸ“ Logs directory: {info['logs_dir']}")
209
+
210
+ # Log available files
211
+ datasets = info['available_datasets']
212
+ models = info['available_models']
213
+
214
+ logger.info(f"πŸ“ˆ Available datasets: {sum(datasets.values())}/{len(datasets)}")
215
+ for name, exists in datasets.items():
216
+ status = "βœ…" if exists else "❌"
217
+ logger.info(f" {status} {name}")
218
+
219
+ logger.info(f"🎯 Available models: {sum(models.values())}/{len(models)}")
220
+ for name, exists in models.items():
221
+ status = "βœ…" if exists else "❌"
222
+ logger.info(f" {status} {name}")
223
+
224
+ # Global instance
225
+ path_manager = EnvironmentPathManager()
226
+
227
+ # Convenience functions for backward compatibility
228
+ def get_data_path(filename: str = '') -> Path:
229
+ return path_manager.get_data_path(filename)
230
+
231
+ def get_model_path(filename: str = '') -> Path:
232
+ return path_manager.get_model_path(filename)
233
+
234
+ def get_logs_path(filename: str = '') -> Path:
235
+ return path_manager.get_logs_path(filename)
236
+
237
+ def get_environment_info() -> Dict:
238
+ return path_manager.get_environment_info()
239
+
240
+ def log_environment_info():
241
+ path_manager.log_environment_info()
242
+
243
+ # For debugging
244
+ if __name__ == "__main__":
245
+ logging.basicConfig(level=logging.INFO)
246
+ log_environment_info()