Ahmedik95316 commited on
Commit
e9f59fc
·
1 Parent(s): 50673ca

Update model/retrain.py

Browse files

Fixed the file paths to correctly point to `tmp` folder as the original paths are read-only

Files changed (1) hide show
  1. model/retrain.py +34 -9
model/retrain.py CHANGED
@@ -10,22 +10,47 @@ import hashlib
10
  import datetime
11
  import shutil
12
 
13
- # Paths
14
- BASE_DIR = Path(__file__).resolve().parent
15
- DATA_DIR = BASE_DIR.parent / "data"
16
- LOGS_DIR = BASE_DIR.parent / "logs"
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  COMBINED = DATA_DIR / "combined_dataset.csv"
19
  SCRAPED = DATA_DIR / "scraped_real.csv"
20
  GENERATED = DATA_DIR / "generated_fake.csv"
21
 
22
- PROD_MODEL = BASE_DIR / "model.pkl"
23
- PROD_VECTORIZER = BASE_DIR / "vectorizer.pkl"
24
 
25
- CANDIDATE_MODEL = BASE_DIR / "model_candidate.pkl"
26
- CANDIDATE_VECTORIZER = BASE_DIR / "vectorizer_candidate.pkl"
27
 
28
- METADATA_PATH = BASE_DIR / "metadata.json"
29
 
30
  def hash_file(path: Path):
31
  return hashlib.md5(path.read_bytes()).hexdigest()
 
10
  import datetime
11
  import shutil
12
 
13
+ # # Paths
14
+ # BASE_DIR = Path(__file__).resolve().parent
15
+ # DATA_DIR = BASE_DIR.parent / "data"
16
+ # LOGS_DIR = BASE_DIR.parent / "logs"
17
 
18
+ # COMBINED = DATA_DIR / "combined_dataset.csv"
19
+ # SCRAPED = DATA_DIR / "scraped_real.csv"
20
+ # GENERATED = DATA_DIR / "generated_fake.csv"
21
+
22
+ # PROD_MODEL = BASE_DIR / "model.pkl"
23
+ # PROD_VECTORIZER = BASE_DIR / "vectorizer.pkl"
24
+
25
+ # CANDIDATE_MODEL = BASE_DIR / "model_candidate.pkl"
26
+ # CANDIDATE_VECTORIZER = BASE_DIR / "vectorizer_candidate.pkl"
27
+
28
+ # METADATA_PATH = BASE_DIR / "metadata.json"
29
+
30
+ # Use /tmp as the writable directory in Docker/Hugging Face
31
+ BASE_DIR = Path("/tmp")
32
+
33
+ # Create writable subdirectories if they don’t exist
34
+ DATA_DIR = BASE_DIR / "data"
35
+ LOGS_DIR = BASE_DIR / "logs"
36
+ MODEL_DIR = BASE_DIR / "model"
37
+
38
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
39
+ LOGS_DIR.mkdir(parents=True, exist_ok=True)
40
+ MODEL_DIR.mkdir(parents=True, exist_ok=True)
41
+
42
+ # File paths
43
  COMBINED = DATA_DIR / "combined_dataset.csv"
44
  SCRAPED = DATA_DIR / "scraped_real.csv"
45
  GENERATED = DATA_DIR / "generated_fake.csv"
46
 
47
+ PROD_MODEL = MODEL_DIR / "model.pkl"
48
+ PROD_VECTORIZER = MODEL_DIR / "vectorizer.pkl"
49
 
50
+ CANDIDATE_MODEL = MODEL_DIR / "model_candidate.pkl"
51
+ CANDIDATE_VECTORIZER = MODEL_DIR / "vectorizer_candidate.pkl"
52
 
53
+ METADATA_PATH = MODEL_DIR / "metadata.json"
54
 
55
  def hash_file(path: Path):
56
  return hashlib.md5(path.read_bytes()).hexdigest()