Spaces:
Runtime error
Runtime error
Médéric Hurier (Fmind)
commited on
Commit
·
5c68cc7
1
Parent(s):
beb32ed
Fix configs
Browse files- app.py +5 -3
- database.py +3 -2
- invoke.yaml +1 -0
- lib.py +10 -5
- packages.txt +0 -1
- tasks/convert.py +1 -0
app.py
CHANGED
|
@@ -18,15 +18,17 @@ logging.basicConfig(
|
|
| 18 |
|
| 19 |
# %% CONFIGS
|
| 20 |
|
| 21 |
-
TITLE = "Fmind AI Assistant"
|
| 22 |
THEME = "glass"
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
| 25 |
FUNCTION = lib.get_embedding_function()
|
| 26 |
COLLECTION = CLIENT.get_collection(
|
| 27 |
name=lib.DATABASE_COLLECTION,
|
| 28 |
embedding_function=FUNCTION,
|
| 29 |
)
|
|
|
|
| 30 |
EXAMPLES = [
|
| 31 |
"Who is Médéric Hurier (Fmind)?",
|
| 32 |
"Is Fmind open to new opportunities?",
|
|
|
|
| 18 |
|
| 19 |
# %% CONFIGS
|
| 20 |
|
|
|
|
| 21 |
THEME = "glass"
|
| 22 |
+
TITLE = "Fmind Chatbot"
|
| 23 |
+
|
| 24 |
+
CLIENT = lib.get_database_client(path=lib.DATABASE_PATH)
|
| 25 |
+
ENCODING = tiktoken.get_encoding(encoding_name=lib.EMBEDDING_TOKENIZER)
|
| 26 |
FUNCTION = lib.get_embedding_function()
|
| 27 |
COLLECTION = CLIENT.get_collection(
|
| 28 |
name=lib.DATABASE_COLLECTION,
|
| 29 |
embedding_function=FUNCTION,
|
| 30 |
)
|
| 31 |
+
|
| 32 |
EXAMPLES = [
|
| 33 |
"Who is Médéric Hurier (Fmind)?",
|
| 34 |
"Is Fmind open to new opportunities?",
|
database.py
CHANGED
|
@@ -21,8 +21,9 @@ logging.basicConfig(
|
|
| 21 |
# %% PARSING
|
| 22 |
|
| 23 |
PARSER = argparse.ArgumentParser(description=__doc__)
|
| 24 |
-
PARSER.add_argument("--database", type=str, required=True)
|
| 25 |
PARSER.add_argument("files", type=argparse.FileType("r"), nargs="+")
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# %% FUNCTIONS
|
| 28 |
|
|
@@ -67,7 +68,7 @@ def main(args: list[str] | None = None) -> int:
|
|
| 67 |
embedding_function = lib.get_embedding_function()
|
| 68 |
logging.info("Embedding function: %s", embedding_function)
|
| 69 |
# collection
|
| 70 |
-
database_collection =
|
| 71 |
logging.info("Database collection: %s", database_collection)
|
| 72 |
collection = client.create_collection(
|
| 73 |
name=database_collection, embedding_function=embedding_function
|
|
|
|
| 21 |
# %% PARSING
|
| 22 |
|
| 23 |
PARSER = argparse.ArgumentParser(description=__doc__)
|
|
|
|
| 24 |
PARSER.add_argument("files", type=argparse.FileType("r"), nargs="+")
|
| 25 |
+
PARSER.add_argument("--database", type=str, default=lib.DATABASE_PATH)
|
| 26 |
+
PARSER.add_argument("--collection", type=str, default=lib.DATABASE_COLLECTION)
|
| 27 |
|
| 28 |
# %% FUNCTIONS
|
| 29 |
|
|
|
|
| 68 |
embedding_function = lib.get_embedding_function()
|
| 69 |
logging.info("Embedding function: %s", embedding_function)
|
| 70 |
# collection
|
| 71 |
+
database_collection = opts.collection
|
| 72 |
logging.info("Database collection: %s", database_collection)
|
| 73 |
collection = client.create_collection(
|
| 74 |
name=database_collection, embedding_function=embedding_function
|
invoke.yaml
CHANGED
|
@@ -5,6 +5,7 @@ run:
|
|
| 5 |
app:
|
| 6 |
path: "app.py"
|
| 7 |
database:
|
|
|
|
| 8 |
path: "database"
|
| 9 |
linkedin:
|
| 10 |
html: "files/linkedin.html"
|
|
|
|
| 5 |
app:
|
| 6 |
path: "app.py"
|
| 7 |
database:
|
| 8 |
+
collection: "resume"
|
| 9 |
path: "database"
|
| 10 |
linkedin:
|
| 11 |
html: "files/linkedin.html"
|
lib.py
CHANGED
|
@@ -14,15 +14,20 @@ sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
|
|
| 14 |
import chromadb
|
| 15 |
from chromadb.utils import embedding_functions
|
| 16 |
|
| 17 |
-
# %% TYPINGS
|
| 18 |
-
|
| 19 |
-
Collection = chromadb.Collection
|
| 20 |
-
|
| 21 |
# %% CONFIGS
|
| 22 |
|
| 23 |
DATABASE_COLLECTION = "resume"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
# %% FUNCTIONS
|
| 27 |
|
| 28 |
|
|
@@ -36,7 +41,7 @@ def get_database_client(path: str) -> chromadb.API:
|
|
| 36 |
|
| 37 |
|
| 38 |
def get_embedding_function(
|
| 39 |
-
model_name: str =
|
| 40 |
) -> embedding_functions.EmbeddingFunction:
|
| 41 |
"""Get the embedding function for Chroma DB collections."""
|
| 42 |
return embedding_functions.OpenAIEmbeddingFunction(
|
|
|
|
| 14 |
import chromadb
|
| 15 |
from chromadb.utils import embedding_functions
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# %% CONFIGS
|
| 18 |
|
| 19 |
DATABASE_COLLECTION = "resume"
|
| 20 |
+
DATABASE_PATH = "database"
|
| 21 |
+
|
| 22 |
+
EMBEDDING_MODEL = "text-embedding-ada-002"
|
| 23 |
+
EMBEDDING_TOKENIZER = "cl100k_base"
|
| 24 |
+
|
| 25 |
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
|
| 26 |
|
| 27 |
+
# %% TYPINGS
|
| 28 |
+
|
| 29 |
+
Collection = chromadb.Collection
|
| 30 |
+
|
| 31 |
# %% FUNCTIONS
|
| 32 |
|
| 33 |
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
def get_embedding_function(
|
| 44 |
+
model_name: str = EMBEDDING_MODEL, api_key: str = OPENAI_API_KEY
|
| 45 |
) -> embedding_functions.EmbeddingFunction:
|
| 46 |
"""Get the embedding function for Chroma DB collections."""
|
| 47 |
return embedding_functions.OpenAIEmbeddingFunction(
|
packages.txt
CHANGED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
# https://huggingface.co/docs/hub/spaces-dependencies
|
|
|
|
|
|
tasks/convert.py
CHANGED
|
@@ -26,6 +26,7 @@ def database(ctx: Context) -> None:
|
|
| 26 |
ctx.run(
|
| 27 |
f"""{ctx.venv.python} database.py \
|
| 28 |
--database={ctx.database.path} \
|
|
|
|
| 29 |
{ctx.linkedin.markdown}
|
| 30 |
"""
|
| 31 |
)
|
|
|
|
| 26 |
ctx.run(
|
| 27 |
f"""{ctx.venv.python} database.py \
|
| 28 |
--database={ctx.database.path} \
|
| 29 |
+
--collection={ctx.database.collection} \
|
| 30 |
{ctx.linkedin.markdown}
|
| 31 |
"""
|
| 32 |
)
|