Syluh27 commited on
Commit
03e0d76
·
1 Parent(s): 897e091
Files changed (1) hide show
  1. model.py +25 -29
model.py CHANGED
@@ -7,37 +7,32 @@ from huggingface_hub import hf_hub_download
7
  import os
8
  import shutil
9
 
10
- # Configuración inicial
11
  HF_TOKEN = os.getenv("HF_TOKEN")
12
  MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
13
 
14
- # Validar variables de entorno
15
- if not HF_TOKEN or not MISTRAL_API_KEY:
16
- raise ValueError("Faltan variables de entorno requeridas")
17
 
18
- # Rutas críticas
19
- CHROMA_DIR = "/home/user/app/chroma_db" # Directorio exclusivo para Chroma
20
- CACHE_PATH = "/home/user/.cache/huggingface/hub/datasets--VictorCarr02--Conversational-Agent-LawsEC"
21
 
 
 
 
 
 
 
22
 
23
- # Limpieza inicial agresiva
24
- def clean_environment():
25
- # Eliminar caché de dataset
26
- if os.path.exists(CACHE_PATH):
27
- shutil.rmtree(CACHE_PATH, ignore_errors=True)
28
-
29
- # Eliminar directorio Chroma existente
30
- if os.path.exists(CHROMA_DIR):
31
- shutil.rmtree(CHROMA_DIR, ignore_errors=True)
32
-
33
- # Crear directorio Chroma vacío
34
  os.makedirs(CHROMA_DIR, exist_ok=True)
35
 
36
 
37
- clean_environment()
38
 
39
- # Descargar SOLO el archivo chroma.sqlite3
40
- chroma_sqlite_path = hf_hub_download(
41
  repo_id="VictorCarr02/Conversational-Agent-LawsEC",
42
  repo_type="dataset",
43
  filename="chroma.sqlite3",
@@ -45,36 +40,37 @@ chroma_sqlite_path = hf_hub_download(
45
  force_download=True
46
  )
47
 
48
- # Mover el archivo a la ubicación controlada
49
- os.rename(chroma_sqlite_path, os.path.join(CHROMA_DIR, "chroma.sqlite3"))
50
 
51
- # Configurar ChromaDB
52
  chroma_client = chromadb.PersistentClient(path=CHROMA_DIR)
 
53
 
54
- # Cargar embeddings (sin archivo local)
55
  embeddings = HuggingFaceEmbeddings(
56
  model_name="sentence-transformers/all-mpnet-base-v2",
57
  model_kwargs={"device": "cpu"}
58
  )
59
 
60
- # Inicializar vector store
61
  vector_store = Chroma(
62
  client=chroma_client,
63
  collection_name="legal_docs",
64
  embedding_function=embeddings
65
  )
66
 
67
- # Configurar LLM
68
  llm = ChatMistralAI(
69
  api_key=MISTRAL_API_KEY,
70
  model="mistral-large-latest",
71
  temperature=0.1
72
  )
73
 
74
- # Crear cadena RAG
75
  rag_chain = RetrievalQA.from_chain_type(
76
  llm=llm,
77
- retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
78
  chain_type="stuff",
79
  return_source_documents=True
80
  )
 
7
  import os
8
  import shutil
9
 
10
+ # Configuración esencial para Spaces
11
  HF_TOKEN = os.getenv("HF_TOKEN")
12
  MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
13
 
14
+ # 1. Configurar rutas específicas para Spaces
15
+ CHROMA_DIR = "/home/user/app/chroma_db" # Ruta dentro del espacio persistente
16
+ os.makedirs(CHROMA_DIR, exist_ok=True)
17
 
 
 
 
18
 
19
+ # 2. Limpieza inicial de conflictos
20
+ def clean_space():
21
+ paths_to_clean = [
22
+ "/home/user/.cache/huggingface/hub/datasets--VictorCarr02--Conversational-Agent-LawsEC",
23
+ CHROMA_DIR
24
+ ]
25
 
26
+ for path in paths_to_clean:
27
+ if os.path.exists(path):
28
+ shutil.rmtree(path, ignore_errors=True)
 
 
 
 
 
 
 
 
29
  os.makedirs(CHROMA_DIR, exist_ok=True)
30
 
31
 
32
+ clean_space()
33
 
34
+ # 3. Descargar y mover chroma.sqlite3
35
+ chroma_source = hf_hub_download(
36
  repo_id="VictorCarr02/Conversational-Agent-LawsEC",
37
  repo_type="dataset",
38
  filename="chroma.sqlite3",
 
40
  force_download=True
41
  )
42
 
43
+ # Mover al directorio controlado
44
+ shutil.move(chroma_source, os.path.join(CHROMA_DIR, "chroma.sqlite3"))
45
 
46
+ # 4. Inicializar ChromaDB
47
  chroma_client = chromadb.PersistentClient(path=CHROMA_DIR)
48
+ collection = chroma_client.get_or_create_collection("legal_docs")
49
 
50
+ # 5. Configurar embeddings (optimizado para Spaces)
51
  embeddings = HuggingFaceEmbeddings(
52
  model_name="sentence-transformers/all-mpnet-base-v2",
53
  model_kwargs={"device": "cpu"}
54
  )
55
 
56
+ # 6. Crear vector store
57
  vector_store = Chroma(
58
  client=chroma_client,
59
  collection_name="legal_docs",
60
  embedding_function=embeddings
61
  )
62
 
63
+ # 7. Configurar Mistral
64
  llm = ChatMistralAI(
65
  api_key=MISTRAL_API_KEY,
66
  model="mistral-large-latest",
67
  temperature=0.1
68
  )
69
 
70
+ # 8. Cadena RAG final
71
  rag_chain = RetrievalQA.from_chain_type(
72
  llm=llm,
73
+ retriever=vector_store.as_retriever(search_kwargs={"k": 2}),
74
  chain_type="stuff",
75
  return_source_documents=True
76
  )