jxtan commited on
Commit
89860e6
·
verified ·
1 Parent(s): cb176cd

Upload 6 files

Browse files
Files changed (6) hide show
  1. README.md +13 -10
  2. app.py +20 -0
  3. config.py +6 -0
  4. logger.py +4 -0
  5. requirements.txt +4 -0
  6. sentence_embeddings.py +84 -0
README.md CHANGED
@@ -1,10 +1,13 @@
1
- ---
2
- title: Fastapi Ai Endpoints
3
- emoji: 🐢
4
- colorFrom: green
5
- colorTo: red
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
1
+ https://huggingface.co/blog/HemanthSai7/deploy-applications-on-huggingface-spaces
2
+
3
+ Objective: Convert any Huggingface repository into an API endpoint
4
+
5
+ Users should be able to call the task and get back in the standard format
6
+
7
+ /sentence-embeddings
8
+
9
+ {
10
+ "model": "BAAI/bge-base-en-v1.5",
11
+ "inputs: ["This is one text", "This is second text"],
12
+ "parameters": {}
13
+ }
app.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi.middleware.cors import CORSMiddleware
2
+ from fastapi import FastAPI
3
+ import sentence_embeddings
4
+
5
+ app = FastAPI()
6
+
7
+ # CORS Support: https://stackoverflow.com/a/66460861
8
+ origins = ["*"]
9
+ app.add_middleware(
10
+ CORSMiddleware,
11
+ allow_origins=origins,
12
+ allow_credentials=True,
13
+ allow_methods=["*"],
14
+ allow_headers=["*"],
15
+ )
16
+
17
+ app.include_router(sentence_embeddings.router)
18
+ if __name__ == '__main__':
19
+ import uvicorn
20
+ uvicorn.run(app, host='0.0.0.0', port=8000)
config.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import os
2
+ import dotenv
3
+
4
+ dotenv.load_dotenv()
5
+
6
+ TEST_MODE = (os.getenv('TEST_MODE', 'False') == "True")
logger.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from datetime import datetime
2
+
3
+ def log(data: dict):
4
+ print(f"{datetime.now().isoformat()}: {data}")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ fastapi
4
+ uvicorn
sentence_embeddings.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+ from fastapi import APIRouter
3
+ from pydantic import BaseModel
4
+ from transformers import AutoTokenizer, AutoModel
5
+ import torch
6
+ from datetime import datetime
7
+ from logger import log
8
+ from hf_to_api.config import TEST_MODE
9
+
10
+ router = APIRouter()
11
+
12
+ class SentenceEmbeddingsInput(BaseModel):
13
+ inputs: list[str]
14
+ model: str
15
+ parameters: dict
16
+
17
+ class SentenceEmbeddingsOutput(BaseModel):
18
+ embeddings: Optional[list[list[float]]] = None
19
+ error: Optional[str] = None
20
+
21
+ @router.post('/sentence-embeddings')
22
+ def sentence_embeddings(inputs: SentenceEmbeddingsInput):
23
+ start_time = datetime.now()
24
+ fn = sentence_embeddings_mapping.get(inputs.model)
25
+ if not fn:
26
+ return SentenceEmbeddingsOutput(
27
+ error=f'No sentence embeddings model found for {inputs.model}'
28
+ )
29
+
30
+ try:
31
+ embeddings = fn(inputs.inputs, inputs.parameters)
32
+
33
+ log({
34
+ "task": "sentence_embeddings",
35
+ "model": inputs.model,
36
+ "start_time": start_time.isoformat(),
37
+ "time_taken": (datetime.now() - start_time).total_seconds(),
38
+ "inputs": inputs.inputs,
39
+ "outputs": embeddings,
40
+ "parameters": inputs.parameters,
41
+ })
42
+ loaded_models_last_updated[inputs.model] = datetime.now()
43
+ return SentenceEmbeddingsOutput(
44
+ embeddings=embeddings
45
+ )
46
+ except Exception as e:
47
+ return SentenceEmbeddingsOutput(
48
+ error=str(e)
49
+ )
50
+
51
+ def generic_sentence_embeddings(model_name: str):
52
+ global loaded_models
53
+
54
+ def process_texts(texts: list[str], parameters: dict):
55
+ if TEST_MODE:
56
+ return [[0.1,0.2]] * len(texts)
57
+
58
+ if model_name in loaded_models:
59
+ tokenizer, model = loaded_models[model_name]
60
+ else:
61
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
62
+ model = AutoModel.from_pretrained(model_name)
63
+ loaded_models[model] = (tokenizer, model)
64
+
65
+ # Tokenize sentences
66
+ encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
67
+ with torch.no_grad():
68
+ model_output = model(**encoded_input)
69
+ sentence_embeddings = model_output[0][:, 0]
70
+
71
+ # normalize embeddings
72
+ sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
73
+ return sentence_embeddings.tolist()
74
+
75
+ return process_texts
76
+
77
+ # Polling every X minutes to
78
+ loaded_models = {}
79
+ loaded_models_last_updated = {}
80
+
81
+ sentence_embeddings_mapping = {
82
+ 'BAAI/bge-base-en-v1.5': generic_sentence_embeddings('BAAI/bge-base-en-v1.5'),
83
+ 'BAAI/bge-large-en-v1.5': generic_sentence_embeddings('BAAI/bge-large-en-v1.5'),
84
+ }