Spaces:
Paused
Paused
Commit
·
e90c5a9
1
Parent(s):
cc9779d
updated the AI news bot
Browse files- .env +1 -1
- src/config.mts +2 -0
- src/createZephyrPrompt.mts +26 -0
- src/getNews.mts +17 -13
- src/main.mts +62 -10
- src/sampleDataFreePlan.mts +0 -0
- src/sleep.mts +6 -0
- src/summarizeWithZephyr.mts +94 -0
.env
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
|
| 2 |
AI_NEWS_HUGGINGFACE_API_KEY=""
|
| 3 |
-
|
| 4 |
AI_NEWS_NEWSDATA_API_KEY=""
|
|
|
|
| 1 |
|
| 2 |
AI_NEWS_HUGGINGFACE_API_KEY=""
|
| 3 |
+
AI_NEWS_HUGGINGFACE_INFERENCE_API_MODEL="HuggingFaceH4/zephyr-7b-beta"
|
| 4 |
AI_NEWS_NEWSDATA_API_KEY=""
|
src/config.mts
CHANGED
|
@@ -19,4 +19,6 @@ try {
|
|
| 19 |
}
|
| 20 |
|
| 21 |
export const huggingfaceApiKey = `${process.env.AI_NEWS_HUGGINGFACE_API_KEY || ""}`
|
|
|
|
|
|
|
| 22 |
export const newsdataApiKey = `${process.env.AI_NEWS_NEWSDATA_API_KEY || ""}`
|
|
|
|
| 19 |
}
|
| 20 |
|
| 21 |
export const huggingfaceApiKey = `${process.env.AI_NEWS_HUGGINGFACE_API_KEY || ""}`
|
| 22 |
+
export const hfInferenceApiModel = `${process.env.AI_NEWS_HUGGINGFACE_INFERENCE_API_MODEL || ""}`
|
| 23 |
+
|
| 24 |
export const newsdataApiKey = `${process.env.AI_NEWS_NEWSDATA_API_KEY || ""}`
|
src/createZephyrPrompt.mts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
interface Message {
|
| 3 |
+
role: "system" | "user" | "assistant";
|
| 4 |
+
content: string;
|
| 5 |
+
}
|
| 6 |
+
|
| 7 |
+
/**
|
| 8 |
+
* Formats the messages for the chat with the LLM model in the style of a pirate.
|
| 9 |
+
* @param messages - Array of message objects with role and content.
|
| 10 |
+
* @returns The formatted chat prompt.
|
| 11 |
+
*/
|
| 12 |
+
export function createZephyrPrompt(messages: Message[]): string {
|
| 13 |
+
let prompt = ``;
|
| 14 |
+
|
| 15 |
+
// Iterate over messages and generate corresponding chat entries.
|
| 16 |
+
messages.forEach(message => {
|
| 17 |
+
prompt += `<|${message.role}|>\n${message.content.trim()}</s>`;
|
| 18 |
+
});
|
| 19 |
+
|
| 20 |
+
if (messages.at(-1)?.role === "user") {
|
| 21 |
+
// Append the assistant's tag for the next response but without a closing tag.
|
| 22 |
+
prompt += `<|assistant|>`;
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
return prompt;
|
| 26 |
+
}
|
src/getNews.mts
CHANGED
|
@@ -4,22 +4,26 @@ import { ApiResponse } from "./types.mts"
|
|
| 4 |
export async function getNews(): Promise<ApiResponse> {
|
| 5 |
|
| 6 |
const q = [
|
| 7 |
-
"ai",
|
| 8 |
-
"artificial intelligence",
|
| 9 |
"openai",
|
| 10 |
"huggingface",
|
| 11 |
-
"hugging face",
|
| 12 |
-
"stable diffusion",
|
| 13 |
-
"ai ethics",
|
| 14 |
-
"tesla optimus",
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
"sdxl",
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
"gpu",
|
| 20 |
-
"nvidia",
|
| 21 |
-
|
| 22 |
-
"apple vision pro"
|
| 23 |
].join(" OR ")
|
| 24 |
|
| 25 |
const country = [
|
|
|
|
| 4 |
export async function getNews(): Promise<ApiResponse> {
|
| 5 |
|
| 6 |
const q = [
|
| 7 |
+
// "ai",
|
| 8 |
+
// "artificial intelligence",
|
| 9 |
"openai",
|
| 10 |
"huggingface",
|
| 11 |
+
// "hugging face",
|
| 12 |
+
// "stable diffusion",
|
| 13 |
+
// "ai ethics",
|
| 14 |
+
// "tesla optimus",
|
| 15 |
+
// "Optimus Gen2",
|
| 16 |
+
"LLM",
|
| 17 |
+
"chatgpt",
|
| 18 |
+
"runwayml",
|
| 19 |
+
"pikalabs",
|
| 20 |
"sdxl",
|
| 21 |
+
// "gaussian splatting",
|
| 22 |
+
// "latent space",
|
| 23 |
+
// "gpu",
|
| 24 |
+
// "nvidia",
|
| 25 |
+
// "spatial computing",
|
| 26 |
+
// "apple vision pro"
|
| 27 |
].join(" OR ")
|
| 28 |
|
| 29 |
const country = [
|
src/main.mts
CHANGED
|
@@ -2,23 +2,29 @@ import YAML from "yaml"
|
|
| 2 |
|
| 3 |
import { sampleDataFreePlan } from "./sampleDataFreePlan.mts"
|
| 4 |
import { getNews } from "./getNews.mts"
|
|
|
|
|
|
|
| 5 |
|
| 6 |
export const main = async () => {
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
let delayInSeconds = 15 * 60 // let's check every 5 minutes
|
| 9 |
console.log(`-------------- ${delayInSeconds} sec have elapsed --------------`)
|
| 10 |
console.log("- checking NewsData.io API..")
|
| 11 |
|
| 12 |
|
| 13 |
-
let
|
| 14 |
|
|
|
|
|
|
|
| 15 |
// const data = await getNews()
|
| 16 |
const data = sampleDataFreePlan
|
| 17 |
|
| 18 |
const interestingNews = data.results.filter(news =>
|
| 19 |
news.language === "english" &&
|
| 20 |
news.country.includes("united states of america")
|
| 21 |
-
)
|
| 22 |
|
| 23 |
interestingNews.forEach(({
|
| 24 |
article_id, // string
|
|
@@ -40,9 +46,7 @@ export const main = async () => {
|
|
| 40 |
sentiment, // string
|
| 41 |
sentiment_stats // SentimentStats
|
| 42 |
}) => {
|
| 43 |
-
|
| 44 |
-
console.log("language:", language)
|
| 45 |
-
newsItems.push({
|
| 46 |
title,
|
| 47 |
data: pubDate,
|
| 48 |
countries: country,
|
|
@@ -51,11 +55,59 @@ export const main = async () => {
|
|
| 51 |
})
|
| 52 |
})
|
| 53 |
|
| 54 |
-
const
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
// loop every hour
|
| 59 |
setTimeout(() => {
|
| 60 |
main()
|
| 61 |
}, delayInSeconds * 1000)
|
|
|
|
| 2 |
|
| 3 |
import { sampleDataFreePlan } from "./sampleDataFreePlan.mts"
|
| 4 |
import { getNews } from "./getNews.mts"
|
| 5 |
+
import { summarizeWithZephyr } from "./summarizeWithZephyr.mts"
|
| 6 |
+
import { sleep } from "./sleep.mts"
|
| 7 |
|
| 8 |
export const main = async () => {
|
| 9 |
+
// console.log(JSON.stringify(await getNews(), null, 2))
|
| 10 |
+
|
| 11 |
+
let delayInSeconds = 15 * 60
|
| 12 |
|
|
|
|
| 13 |
console.log(`-------------- ${delayInSeconds} sec have elapsed --------------`)
|
| 14 |
console.log("- checking NewsData.io API..")
|
| 15 |
|
| 16 |
|
| 17 |
+
let news: any[] = []
|
| 18 |
|
| 19 |
+
let nbMaxNews = 5
|
| 20 |
+
|
| 21 |
// const data = await getNews()
|
| 22 |
const data = sampleDataFreePlan
|
| 23 |
|
| 24 |
const interestingNews = data.results.filter(news =>
|
| 25 |
news.language === "english" &&
|
| 26 |
news.country.includes("united states of america")
|
| 27 |
+
).slice(0, nbMaxNews)
|
| 28 |
|
| 29 |
interestingNews.forEach(({
|
| 30 |
article_id, // string
|
|
|
|
| 46 |
sentiment, // string
|
| 47 |
sentiment_stats // SentimentStats
|
| 48 |
}) => {
|
| 49 |
+
news.push({
|
|
|
|
|
|
|
| 50 |
title,
|
| 51 |
data: pubDate,
|
| 52 |
countries: country,
|
|
|
|
| 55 |
})
|
| 56 |
})
|
| 57 |
|
| 58 |
+
const summaries: string[] = []
|
| 59 |
+
for (const item of news) {
|
| 60 |
+
console.log("- generating summary..")
|
| 61 |
+
|
| 62 |
+
const summary = await summarizeWithZephyr({
|
| 63 |
+
news: YAML.stringify(item)
|
| 64 |
+
})
|
| 65 |
+
console.log(" -> summary: " + summary)
|
| 66 |
+
if (summary) {
|
| 67 |
+
summaries.push(summary)
|
| 68 |
+
await sleep(1000)
|
| 69 |
+
} else {
|
| 70 |
+
console.log("(!) got no data!")
|
| 71 |
+
await sleep(5000)
|
| 72 |
+
}
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
const today = new Date().toLocaleDateString('en-us', {
|
| 76 |
+
weekday: "long",
|
| 77 |
+
year: "numeric",
|
| 78 |
+
month: "short",
|
| 79 |
+
day: "numeric"
|
| 80 |
+
})
|
| 81 |
+
// "Friday, Jul 2, 2021"
|
| 82 |
+
|
| 83 |
+
const markdown = `
|
| 84 |
+
# Title
|
| 85 |
+
|
| 86 |
+
News Report ${today} 🔥
|
| 87 |
+
|
| 88 |
+
# Description
|
| 89 |
+
|
| 90 |
+
A summary of what happened today in the world of tech and AI
|
| 91 |
+
|
| 92 |
+
# Prompt
|
| 93 |
+
|
| 94 |
+
You are a TV news channel agent.
|
| 95 |
+
Your mission is to summarize news (which will be given to you as YAML) into a compact, dense news report suitable for a news anchor.
|
| 96 |
+
Please limit yourself to about 20 paragraphs.
|
| 97 |
+
|
| 98 |
+
Here is the summary of today's news:
|
| 99 |
+
|
| 100 |
+
article:
|
| 101 |
+
${summaries.join("\n\narticle:")}
|
| 102 |
+
`
|
| 103 |
+
|
| 104 |
+
console.log("final markdown:", markdown)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
// TODO: generate a markdow file and upload it to Hugging Face
|
| 110 |
|
|
|
|
| 111 |
setTimeout(() => {
|
| 112 |
main()
|
| 113 |
}, delayInSeconds * 1000)
|
src/sampleDataFreePlan.mts
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/sleep.mts
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
export const sleep = async (durationInMs: number) =>
|
| 2 |
+
new Promise((resolve) => {
|
| 3 |
+
setTimeout(() => {
|
| 4 |
+
resolve(true)
|
| 5 |
+
}, durationInMs)
|
| 6 |
+
})
|
src/summarizeWithZephyr.mts
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { HfInference } from "@huggingface/inference"
|
| 2 |
+
|
| 3 |
+
import { createZephyrPrompt } from "./createZephyrPrompt.mts"
|
| 4 |
+
import { hfInferenceApiModel, huggingfaceApiKey } from "./config.mts"
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
const hf = new HfInference(huggingfaceApiKey)
|
| 8 |
+
|
| 9 |
+
export async function summarizeWithZephyr({
|
| 10 |
+
news,
|
| 11 |
+
neverThrow
|
| 12 |
+
}: {
|
| 13 |
+
news: string
|
| 14 |
+
neverThrow?: boolean
|
| 15 |
+
}): Promise<string> {
|
| 16 |
+
try {
|
| 17 |
+
const inputs = createZephyrPrompt([
|
| 18 |
+
{
|
| 19 |
+
role: "system",
|
| 20 |
+
content: `Your must summarize the content into 2 or 3 sentence. DO NOT write more than than. Keep it dense and simple, and short.`,
|
| 21 |
+
},
|
| 22 |
+
{
|
| 23 |
+
role: "user",
|
| 24 |
+
content: news,
|
| 25 |
+
}
|
| 26 |
+
]) //+ "\n["
|
| 27 |
+
|
| 28 |
+
const nbMaxNewTokens = 250
|
| 29 |
+
|
| 30 |
+
let rawBufferString = ""
|
| 31 |
+
try {
|
| 32 |
+
for await (const output of hf.textGenerationStream({
|
| 33 |
+
model: hfInferenceApiModel,
|
| 34 |
+
inputs,
|
| 35 |
+
parameters: {
|
| 36 |
+
do_sample: true,
|
| 37 |
+
max_new_tokens: nbMaxNewTokens,
|
| 38 |
+
return_full_text: false,
|
| 39 |
+
}
|
| 40 |
+
})) {
|
| 41 |
+
rawBufferString += output.token.text
|
| 42 |
+
// process.stdout.write(output.token.text)
|
| 43 |
+
if (
|
| 44 |
+
rawBufferString.includes("</s>") ||
|
| 45 |
+
rawBufferString.includes("<s>") ||
|
| 46 |
+
rawBufferString.includes("/s>") ||
|
| 47 |
+
rawBufferString.includes("[INST]") ||
|
| 48 |
+
rawBufferString.includes("[/INST]") ||
|
| 49 |
+
rawBufferString.includes("<SYS>") ||
|
| 50 |
+
rawBufferString.includes("<<SYS>>") ||
|
| 51 |
+
rawBufferString.includes("</SYS>") ||
|
| 52 |
+
rawBufferString.includes("<</SYS>>") ||
|
| 53 |
+
rawBufferString.includes("<|user|>") ||
|
| 54 |
+
rawBufferString.includes("<|end|>") ||
|
| 55 |
+
rawBufferString.includes("<|system|>") ||
|
| 56 |
+
rawBufferString.includes("<|assistant|>")
|
| 57 |
+
) {
|
| 58 |
+
break
|
| 59 |
+
}
|
| 60 |
+
}
|
| 61 |
+
} catch (err) {
|
| 62 |
+
// console.error(`error during generation: ${err}`)
|
| 63 |
+
|
| 64 |
+
if (`${err}` === "Error: Model is overloaded") {
|
| 65 |
+
rawBufferString = ``
|
| 66 |
+
}
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
const tmpResult =
|
| 70 |
+
rawBufferString.replaceAll("</s>", "")
|
| 71 |
+
.replaceAll("<s>", "")
|
| 72 |
+
.replaceAll("/s>", "")
|
| 73 |
+
.replaceAll("[INST]", "")
|
| 74 |
+
.replaceAll("[/INST]", "")
|
| 75 |
+
.replaceAll("<SYS>", "")
|
| 76 |
+
.replaceAll("<<SYS>>", "")
|
| 77 |
+
.replaceAll("</SYS>", "")
|
| 78 |
+
.replaceAll("<</SYS>>", "")
|
| 79 |
+
.replaceAll("<|user|>", "")
|
| 80 |
+
.replaceAll("<|end|>", "")
|
| 81 |
+
.replaceAll("<|system|>", "")
|
| 82 |
+
.replaceAll("<|assistant|>", "")
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
return tmpResult
|
| 86 |
+
} catch (err) {
|
| 87 |
+
if (neverThrow) {
|
| 88 |
+
console.error(`summarizeWithZephyr():`, err)
|
| 89 |
+
return ""
|
| 90 |
+
} else {
|
| 91 |
+
throw err
|
| 92 |
+
}
|
| 93 |
+
}
|
| 94 |
+
}
|