tomas.helmfridsson commited on
Commit
407ce33
·
1 Parent(s): 3d5c39a
Files changed (3) hide show
  1. .gitignore +25 -0
  2. app.py +8 -7
  3. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.pyd
6
+
7
+ # OS-filer
8
+ .DS_Store
9
+ Thumbs.db
10
+
11
+ # Hugging Face cache eller stora modeller
12
+ *.bin
13
+ *.pt
14
+ *.onnx
15
+ *.safetensors
16
+
17
+ # Loggar och tillfälliga filer
18
+ *.log
19
+ *.zip
20
+ *.tmp
21
+ *.cache/
22
+
23
+ # Lokala miljöinställningar
24
+ .env
25
+ .venv
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  from langchain_community.document_loaders import PyPDFLoader
3
  from langchain_community.vectorstores import FAISS
4
- from langchain_community.embeddings import HuggingFaceEmbeddings
5
  from langchain_community.llms import HuggingFacePipeline
6
  from langchain.chains import RetrievalQA
7
  from transformers import pipeline
@@ -10,9 +10,9 @@ import os
10
  # 1. Ladda och indexera alla PDF:er i mappen "dokument/"
11
  def load_vectorstore():
12
  all_docs = []
13
- for filename in os.listdir("document"):
14
  if filename.endswith(".pdf"):
15
- path = os.path.join("document", filename)
16
  loader = PyPDFLoader(path)
17
  docs = loader.load_and_split()
18
  all_docs.extend(docs)
@@ -21,12 +21,13 @@ def load_vectorstore():
21
 
22
  vectorstore = load_vectorstore()
23
 
24
- # 2. Initiera Zephyr-modellen
25
- def load_zephyr():
26
- pipe = pipeline("text-generation", model="HuggingFaceH4/zephyr-7b-beta")
 
27
  return HuggingFacePipeline(pipeline=pipe, model_kwargs={"temperature": 0.3, "max_new_tokens": 512})
28
 
29
- llm = load_zephyr()
30
 
31
  # 3. Bygg QA-kedjan
32
  qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever())
 
1
  import gradio as gr
2
  from langchain_community.document_loaders import PyPDFLoader
3
  from langchain_community.vectorstores import FAISS
4
+ from langchain_huggingface.embeddings import HuggingFaceEmbeddings
5
  from langchain_community.llms import HuggingFacePipeline
6
  from langchain.chains import RetrievalQA
7
  from transformers import pipeline
 
10
  # 1. Ladda och indexera alla PDF:er i mappen "dokument/"
11
  def load_vectorstore():
12
  all_docs = []
13
+ for filename in os.listdir("dokument"):
14
  if filename.endswith(".pdf"):
15
+ path = os.path.join("dokument", filename)
16
  loader = PyPDFLoader(path)
17
  docs = loader.load_and_split()
18
  all_docs.extend(docs)
 
21
 
22
  vectorstore = load_vectorstore()
23
 
24
+ # 2. Initiera en mindre modell
25
+
26
+ def load_model():
27
+ pipe = pipeline("text-generation", model="google/gemma-2b-it")
28
  return HuggingFacePipeline(pipeline=pipe, model_kwargs={"temperature": 0.3, "max_new_tokens": 512})
29
 
30
+ llm = load_model()
31
 
32
  # 3. Bygg QA-kedjan
33
  qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever())
requirements.txt CHANGED
@@ -2,6 +2,7 @@ huggingface_hub==0.25.2
2
  gradio
3
  langchain[all]>=0.1.14
4
  langchain-community>=0.0.19
 
5
  transformers
6
  sentence-transformers
7
  faiss-cpu
 
2
  gradio
3
  langchain[all]>=0.1.14
4
  langchain-community>=0.0.19
5
+ langchain-huggingface>=0.0.6
6
  transformers
7
  sentence-transformers
8
  faiss-cpu