yusufs commited on
Commit
7935381
·
1 Parent(s): 6479dc6

feat(refactor): move the files to root

Browse files
README.md CHANGED
@@ -6,9 +6,39 @@ colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  ---
 
 
9
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
10
 
11
 
12
  ```shell
13
  poetry export -f requirements.txt --output requirements.txt --without-hashes
14
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  sdk: docker
7
  pinned: false
8
  ---
9
+
10
+
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
12
 
13
 
14
  ```shell
15
  poetry export -f requirements.txt --output requirements.txt --without-hashes
16
  ```
17
+
18
+
19
+ ## VLLM OpenAI Compatible API Server
20
+
21
+ > References: https://huggingface.co/spaces/sofianhw/ai/tree/c6527a750644a849b6705bb6fe2fcea4e54a8196
22
+
23
+ This `api_server.py` file is exact copy version from https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/entrypoints/openai/api_server.py
24
+
25
+ * The `HUGGING_FACE_HUB_TOKEN` must exist during runtime.
26
+
27
+ ## Documentation about config
28
+
29
+ * https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/utils.py#L1207-L1221
30
+
31
+ ```shell
32
+ "serve,chat,complete",
33
+ "facebook/opt-12B",
34
+ '--config', 'config.yaml',
35
+ '-tp', '2'
36
+ ```
37
+
38
+ The yaml is equivalent with argument flag params. Consider passing using flag params that defined here for better documentation:
39
+ https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/entrypoints/openai/cli_args.py#L77-L237
40
+
41
+ Other arguments is the same as LLM class such as `--max-model-len`, `--dtype`, or `--otlp-traces-endpoint`
42
+ * https://github.com/vllm-project/vllm/blob/v0.6.4/vllm/config.py#L1061-L1086
43
+ * https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/engine/arg_utils.py#L221-L913
44
+
openai/README.md DELETED
@@ -1,26 +0,0 @@
1
- # VLLM OpenAI Compatible API Server
2
-
3
- > References: https://huggingface.co/spaces/sofianhw/ai/tree/c6527a750644a849b6705bb6fe2fcea4e54a8196
4
-
5
- This `api_server.py` file is exact copy version from https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/entrypoints/openai/api_server.py
6
-
7
- * The `HUGGING_FACE_HUB_TOKEN` must exist during runtime.
8
-
9
- ## Documentation about config
10
-
11
- * https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/utils.py#L1207-L1221
12
-
13
- ```shell
14
- "serve,chat,complete",
15
- "facebook/opt-12B",
16
- '--config', 'config.yaml',
17
- '-tp', '2'
18
- ```
19
-
20
- The yaml is equivalent with argument flag params. Consider passing using flag params that defined here for better documentation:
21
- https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/entrypoints/openai/cli_args.py#L77-L237
22
-
23
- Other arguments is the same as LLM class such as `--max-model-len`, `--dtype`, or `--otlp-traces-endpoint`
24
- * https://github.com/vllm-project/vllm/blob/v0.6.4/vllm/config.py#L1061-L1086
25
- * https://github.com/vllm-project/vllm/blob/v0.6.4.post1/vllm/engine/arg_utils.py#L221-L913
26
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
openai/__init__.py DELETED
File without changes
openai/api_server.py → openai_compatible_api_server.py RENAMED
File without changes
poetry.lock CHANGED
@@ -4117,4 +4117,4 @@ type = ["pytest-mypy"]
4117
  [metadata]
4118
  lock-version = "2.0"
4119
  python-versions = ">=3.12,<3.13"
4120
- content-hash = "3dbf211555b75838ffe36b519ebe3192776ef7f2eb4fb48f33cff9791b9f91fb"
 
4117
  [metadata]
4118
  lock-version = "2.0"
4119
  python-versions = ">=3.12,<3.13"
4120
+ content-hash = "cb3970f2566497f77454d834fb9d3dfe2dfe25be5a327e21bae997924b5c0619"
pyproject.toml CHANGED
@@ -13,6 +13,7 @@ fastapi = "^0.115.5"
13
  pydantic = "^2.10.2"
14
  uvicorn = "^0.32.1"
15
  torch = "^2.5.1"
 
16
 
17
 
18
  [build-system]
 
13
  pydantic = "^2.10.2"
14
  uvicorn = "^0.32.1"
15
  torch = "^2.5.1"
16
+ openai = "^1.55.1"
17
 
18
 
19
  [build-system]
run.sh CHANGED
@@ -3,7 +3,7 @@
3
 
4
  printf "Running vLLM OpenAI compatible API Server at port %s\n" "7860"
5
 
6
- python -u /app/openai/api_server.py \
7
  --model meta-llama/Llama-3.2-3B-Instruct \
8
  --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 \
9
  --host 0.0.0.0 \
 
3
 
4
  printf "Running vLLM OpenAI compatible API Server at port %s\n" "7860"
5
 
6
+ python -u /app/openai_compatible_api_server.py \
7
  --model meta-llama/Llama-3.2-3B-Instruct \
8
  --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 \
9
  --host 0.0.0.0 \