Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
fix: improve tasks performance
Browse files
chart/env/prod.yaml
CHANGED
|
@@ -164,7 +164,6 @@ envVars:
|
|
| 164 |
},
|
| 165 |
{
|
| 166 |
"name": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|
| 167 |
-
"tokenizer": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|
| 168 |
"modelUrl": "https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|
| 169 |
"websiteUrl": "https://www.nvidia.com/",
|
| 170 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nvidia-logo.png",
|
|
@@ -297,7 +296,6 @@ envVars:
|
|
| 297 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png",
|
| 298 |
"websiteUrl": "https://nousresearch.com/",
|
| 299 |
"modelUrl": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B",
|
| 300 |
-
"tokenizer": "NousResearch/Hermes-3-Llama-3.1-8B",
|
| 301 |
"promptExamples": [
|
| 302 |
{
|
| 303 |
"title": "Write an email from bullet list",
|
|
@@ -321,7 +319,6 @@ envVars:
|
|
| 321 |
},
|
| 322 |
{
|
| 323 |
"name": "mistralai/Mistral-Nemo-Instruct-2407",
|
| 324 |
-
"tokenizer": "mistralai/Mistral-Nemo-Instruct-2407",
|
| 325 |
"displayName": "mistralai/Mistral-Nemo-Instruct-2407",
|
| 326 |
"description": "A small model with good capabilities in language understanding and commonsense reasoning.",
|
| 327 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
|
|
@@ -351,7 +348,6 @@ envVars:
|
|
| 351 |
},
|
| 352 |
{
|
| 353 |
"name": "microsoft/Phi-3.5-mini-instruct",
|
| 354 |
-
"tokenizer": "microsoft/Phi-3.5-mini-instruct",
|
| 355 |
"description": "One of the best small models (3.8B parameters), super fast for simple tasks.",
|
| 356 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png",
|
| 357 |
"modelUrl": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
|
|
@@ -382,12 +378,11 @@ envVars:
|
|
| 382 |
"name": "meta-llama/Llama-3.1-8B-Instruct",
|
| 383 |
"tools": true,
|
| 384 |
"unlisted": true,
|
| 385 |
-
"
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
]
|
| 391 |
}
|
| 392 |
]
|
| 393 |
|
|
|
|
| 164 |
},
|
| 165 |
{
|
| 166 |
"name": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|
|
|
|
| 167 |
"modelUrl": "https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
|
| 168 |
"websiteUrl": "https://www.nvidia.com/",
|
| 169 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nvidia-logo.png",
|
|
|
|
| 296 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png",
|
| 297 |
"websiteUrl": "https://nousresearch.com/",
|
| 298 |
"modelUrl": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B",
|
|
|
|
| 299 |
"promptExamples": [
|
| 300 |
{
|
| 301 |
"title": "Write an email from bullet list",
|
|
|
|
| 319 |
},
|
| 320 |
{
|
| 321 |
"name": "mistralai/Mistral-Nemo-Instruct-2407",
|
|
|
|
| 322 |
"displayName": "mistralai/Mistral-Nemo-Instruct-2407",
|
| 323 |
"description": "A small model with good capabilities in language understanding and commonsense reasoning.",
|
| 324 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
|
|
|
|
| 348 |
},
|
| 349 |
{
|
| 350 |
"name": "microsoft/Phi-3.5-mini-instruct",
|
|
|
|
| 351 |
"description": "One of the best small models (3.8B parameters), super fast for simple tasks.",
|
| 352 |
"logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png",
|
| 353 |
"modelUrl": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
|
|
|
|
| 378 |
"name": "meta-llama/Llama-3.1-8B-Instruct",
|
| 379 |
"tools": true,
|
| 380 |
"unlisted": true,
|
| 381 |
+
"parameters": {
|
| 382 |
+
"stop": ["<|eot_id|>", "<|im_end|>"],
|
| 383 |
+
"temperature": 0.1,
|
| 384 |
+
"max_new_tokens": 256
|
| 385 |
+
}
|
|
|
|
| 386 |
}
|
| 387 |
]
|
| 388 |
|
src/lib/server/textGeneration/reasoning.ts
CHANGED
|
@@ -15,8 +15,7 @@ export async function generateSummaryOfReasoning(buffer: string): Promise<string
|
|
| 15 |
];
|
| 16 |
|
| 17 |
const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
|
| 18 |
-
The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
|
| 19 |
-
Example: "Thinking about life...", "Summarizing the results...", "Processing the input...". `;
|
| 20 |
|
| 21 |
if (smallModel.tools) {
|
| 22 |
const summaryTool = {
|
|
|
|
| 15 |
];
|
| 16 |
|
| 17 |
const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
|
| 18 |
+
The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points. The reasoning follows: \n`;
|
|
|
|
| 19 |
|
| 20 |
if (smallModel.tools) {
|
| 21 |
const summaryTool = {
|
src/lib/server/textGeneration/title.ts
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import { env } from "$env/dynamic/private";
|
| 2 |
import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint";
|
| 3 |
-
import type { EndpointMessage } from "../endpoints/endpoints";
|
| 4 |
import { logger } from "$lib/server/logger";
|
| 5 |
import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
|
| 6 |
import type { Conversation } from "$lib/types/Conversation";
|
|
@@ -58,7 +57,7 @@ export async function generateTitle(prompt: string) {
|
|
| 58 |
},
|
| 59 |
],
|
| 60 |
preprompt:
|
| 61 |
-
"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence.",
|
| 62 |
tool: titleTool,
|
| 63 |
endpoint,
|
| 64 |
});
|
|
@@ -71,34 +70,9 @@ export async function generateTitle(prompt: string) {
|
|
| 71 |
}
|
| 72 |
}
|
| 73 |
|
| 74 |
-
const messages: Array<EndpointMessage> = [
|
| 75 |
-
{
|
| 76 |
-
from: "system",
|
| 77 |
-
content:
|
| 78 |
-
"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence of four words or less. Always start your answer with an emoji relevant to the summary",
|
| 79 |
-
},
|
| 80 |
-
{ from: "user", content: "Who is the president of Gabon?" },
|
| 81 |
-
{ from: "assistant", content: "🇬🇦 President of Gabon" },
|
| 82 |
-
{ from: "user", content: "Who is Julien Chaumond?" },
|
| 83 |
-
{ from: "assistant", content: "🧑 Julien Chaumond" },
|
| 84 |
-
{ from: "user", content: "what is 1 + 1?" },
|
| 85 |
-
{ from: "assistant", content: "🔢 Simple math operation" },
|
| 86 |
-
{ from: "user", content: "What are the latest news?" },
|
| 87 |
-
{ from: "assistant", content: "📰 Latest news" },
|
| 88 |
-
{ from: "user", content: "How to make a great cheesecake?" },
|
| 89 |
-
{ from: "assistant", content: "🍰 Cheesecake recipe" },
|
| 90 |
-
{ from: "user", content: "what is your favorite movie? do a short answer." },
|
| 91 |
-
{ from: "assistant", content: "🎥 Favorite movie" },
|
| 92 |
-
{ from: "user", content: "Explain the concept of artificial intelligence in one sentence" },
|
| 93 |
-
{ from: "assistant", content: "🤖 AI definition" },
|
| 94 |
-
{ from: "user", content: "Draw a cute cat" },
|
| 95 |
-
{ from: "assistant", content: "🐱 Cute cat drawing" },
|
| 96 |
-
{ from: "user", content: prompt },
|
| 97 |
-
];
|
| 98 |
-
|
| 99 |
return await getReturnFromGenerator(
|
| 100 |
generateFromDefaultEndpoint({
|
| 101 |
-
messages,
|
| 102 |
preprompt:
|
| 103 |
"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
|
| 104 |
generateSettings: {
|
|
|
|
| 1 |
import { env } from "$env/dynamic/private";
|
| 2 |
import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint";
|
|
|
|
| 3 |
import { logger } from "$lib/server/logger";
|
| 4 |
import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
|
| 5 |
import type { Conversation } from "$lib/types/Conversation";
|
|
|
|
| 57 |
},
|
| 58 |
],
|
| 59 |
preprompt:
|
| 60 |
+
"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence. Here is the user message: \n",
|
| 61 |
tool: titleTool,
|
| 62 |
endpoint,
|
| 63 |
});
|
|
|
|
| 70 |
}
|
| 71 |
}
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
return await getReturnFromGenerator(
|
| 74 |
generateFromDefaultEndpoint({
|
| 75 |
+
messages: [{ from: "user", content: prompt }],
|
| 76 |
preprompt:
|
| 77 |
"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
|
| 78 |
generateSettings: {
|
src/lib/server/tools/getToolOutput.ts
CHANGED
|
@@ -42,6 +42,10 @@ export async function getToolOutput<T = string>({
|
|
| 42 |
);
|
| 43 |
calls.push(...extractedCalls);
|
| 44 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
}
|
| 46 |
|
| 47 |
if (calls.length > 0) {
|
|
@@ -52,7 +56,9 @@ export async function getToolOutput<T = string>({
|
|
| 52 |
if (toolCall?.parameters) {
|
| 53 |
// Get the first parameter value since most tools have a single main parameter
|
| 54 |
const firstParamValue = Object.values(toolCall.parameters)[0];
|
| 55 |
-
|
|
|
|
|
|
|
| 56 |
}
|
| 57 |
}
|
| 58 |
|
|
|
|
| 42 |
);
|
| 43 |
calls.push(...extractedCalls);
|
| 44 |
}
|
| 45 |
+
|
| 46 |
+
if (calls.length > 0) {
|
| 47 |
+
break;
|
| 48 |
+
}
|
| 49 |
}
|
| 50 |
|
| 51 |
if (calls.length > 0) {
|
|
|
|
| 56 |
if (toolCall?.parameters) {
|
| 57 |
// Get the first parameter value since most tools have a single main parameter
|
| 58 |
const firstParamValue = Object.values(toolCall.parameters)[0];
|
| 59 |
+
if (typeof firstParamValue === "string") {
|
| 60 |
+
return firstParamValue as T;
|
| 61 |
+
}
|
| 62 |
}
|
| 63 |
}
|
| 64 |
|
src/lib/server/websearch/search/generateQuery.ts
CHANGED
|
@@ -59,7 +59,7 @@ export async function generateQuery(messages: Message[]) {
|
|
| 59 |
const webQuery = await getReturnFromGenerator(
|
| 60 |
generateFromDefaultEndpoint({
|
| 61 |
messages: convQuery,
|
| 62 |
-
preprompt: `The user wants you to search the web for information. Give a relevant google search query to answer the question. Answer with only the query. Today is ${currentDate}`,
|
| 63 |
generateSettings: {
|
| 64 |
max_new_tokens: 30,
|
| 65 |
},
|
|
|
|
| 59 |
const webQuery = await getReturnFromGenerator(
|
| 60 |
generateFromDefaultEndpoint({
|
| 61 |
messages: convQuery,
|
| 62 |
+
preprompt: `The user wants you to search the web for information. Give a relevant google search query to answer the question. Answer with only the query. Today is ${currentDate}. The conversation follows: \n`,
|
| 63 |
generateSettings: {
|
| 64 |
max_new_tokens: 30,
|
| 65 |
},
|