import { convert as convertHtmlToPlainText } from "html-to-text";
import { strip as stripEmojis } from "node-emoji";
import { SearxngSearchResult, SearxngService } from "searxng";
import axios from "axios";
const searxng = new SearxngService({
baseURL: "http://127.0.0.1:8080",
defaultSearchParams: {
lang: "auto",
safesearch: 1,
format: "json",
categories: ["web"],
},
});
export async function fetchSearXNG(query: string, limit?: number) {
try {
const resultsResponse = await searxng.search(query);
let graphicalSearchResults: SearxngSearchResult[] = [];
const textualSearchResults: SearxngSearchResult[] = [];
const isVideosOrImagesCategory = (category: string): boolean => {
return category === "images" || category === "videos";
};
for (const result of resultsResponse.results) {
if (isVideosOrImagesCategory(result.category)) {
graphicalSearchResults.push(result);
} else {
textualSearchResults.push(result);
}
}
if (limit && limit > 0) {
graphicalSearchResults = graphicalSearchResults.slice(0, limit);
}
let textResults: [title: string, content: string, url: string][] = [];
const imageResults: [
title: string,
url: string,
thumbnailUrl: string,
sourceUrl: string,
][] = [];
const uniqueHostnames = new Set();
const uniqueSourceUrls = new Set();
const processSnippet = (snippet: string): string => {
const processedSnippet = stripEmojis(
convertHtmlToPlainText(snippet, { wordwrap: false }).trim(),
{ preserveSpaces: true },
);
if (processedSnippet.startsWith("[data:image")) return "";
return processedSnippet;
};
const imagePromises = graphicalSearchResults.map(async (result) => {
const thumbnailSource =
result.category === "videos" ? result.thumbnail : result.thumbnail_src;
let thumbnailSourceIsValid = true;
try {
new URL(thumbnailSource);
} catch {
thumbnailSourceIsValid = false;
}
if (thumbnailSourceIsValid) {
try {
const axiosResponse = await axios.get(thumbnailSource, {
responseType: "arraybuffer",
});
const contentType = axiosResponse.headers["content-type"];
const base64 = Buffer.from(axiosResponse.data).toString("base64");
const thumbnailUrl = `data:${contentType};base64,${base64}`;
if (result.category === "videos") {
return [
result.title,
result.url,
thumbnailUrl,
result.iframe_src || result.url,
];
} else {
return [result.title, result.url, thumbnailUrl, result.img_src];
}
} catch {
return null;
}
}
return null;
});
const resolvedImageResults = await Promise.all(imagePromises);
imageResults.push(
...resolvedImageResults
.filter(
(result): result is [string, string, string, string] =>
result !== null && !uniqueSourceUrls.has(result[3]),
)
.map((result) => {
uniqueSourceUrls.add(result[3]);
return result;
}),
);
textualSearchResults.forEach((result) => {
const { hostname } = new URL(result.url);
if (!uniqueHostnames.has(hostname) && result.content) {
const title = convertHtmlToPlainText(result.title, {
wordwrap: false,
}).trim();
const snippet = processSnippet(result.content);
if (title && snippet) {
textResults.push([title, snippet, result.url]);
uniqueHostnames.add(hostname);
}
}
});
if (limit && limit > 0) {
textResults = textResults.slice(0, limit);
}
return { textResults, imageResults };
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error(`Error fetching search results: ${errorMessage}`);
return { textResults: [], imageResults: [] };
}
}