Spaces:
Running
Running
Tobias Bergmann
commited on
Commit
·
b2df681
1
Parent(s):
95c291e
row2byrow pure C implementation
Browse files- app.py +3 -1
- llama-server +2 -2
app.py
CHANGED
@@ -15,10 +15,12 @@ today_date = datetime.today().strftime("%B %-d, %Y") # noqa: DTZ002
|
|
15 |
|
16 |
SYS_PROMPT = f"""Today's Date: {today_date}.
|
17 |
You are Gemma, developed by Google. You are a helpful AI assistant"""
|
18 |
-
TITLE = "Gemma3 1b instruct IQ4_NL from local GGUF server"
|
19 |
DESCRIPTION = """
|
20 |
<p>Gemma3 1b instruct is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
|
21 |
</p>
|
|
|
|
|
22 |
"""
|
23 |
LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
|
24 |
MAX_NEW_TOKENS = 1024
|
|
|
15 |
|
16 |
SYS_PROMPT = f"""Today's Date: {today_date}.
|
17 |
You are Gemma, developed by Google. You are a helpful AI assistant"""
|
18 |
+
TITLE = "Gemma3 1b instruct IQ4_NL from local GGUF server using BPP library"
|
19 |
DESCRIPTION = """
|
20 |
<p>Gemma3 1b instruct is an open-source LLM supporting a 128k context window. This demo uses only 2K context.
|
21 |
</p>
|
22 |
+
<p> The BPP library implements matrix multiplication with far less multiplications.
|
23 |
+
</p>
|
24 |
"""
|
25 |
LLAMA_CPP_SERVER = "http://127.0.0.1:8081"
|
26 |
MAX_NEW_TOKENS = 1024
|
llama-server
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ead85d6cb439c1f7abfcd987bd19e125471e0350e9035f2edbc2ec25af1014d
|
3 |
+
size 6362744
|