-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathapp.ts
148 lines (121 loc) · 3.73 KB
/
app.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import { load as cheerioLoad } from "cheerio";
import { env } from "process";
export const description = `You are an expert web research AI, designed to generate a response based on provided search results. Keep in mind today is ${new Date().toISOString()}.`;
export type Argument = {
/**
* The search query.
*/
input: string;
};
const GOOGLE_API_KEY = env.GOOGLE_API_KEY
const GOOGLE_CSE_ID = env.GOOGLE_CSE_ID
console.log(`GOOGLE_API_KEY=${GOOGLE_API_KEY}, GOOGLE_CSE_ID=${GOOGLE_CSE_ID}`)
export async function handler(args: Argument) {
try {
console.log(args)
const resp = await googleSearch(args.input);
let result = await Promise.all(
resp
.filter((item) => item.link)
.map(async (item) => {
console.log(`Reading link [${item.title}]: ${item.link}`)
try {
const html = await fetchWebPage(item.link as string)
const content = extractHtml(html, item.snippet as string)
console.log(`\t->[${content.title}] ${content.content.slice(0, 100)}`)
return {
link: item.link,
title: content.title,
content: content.content || item.snippet,
}
} catch (ex) {
console.error('\t[EE] Error reading link', ex)
return {
link: item.link,
title: item.title,
content: item.snippet,
}
}
})
)
result = result.filter((item) => item.title)
console.log("fetch result", result.length)
return JSON.stringify(result)
} catch (err) {
console.error(err)
return { error: err }
}
}
async function fetchWebPage(url: string) {
try {
const res = await fetch(url);
if (!res.ok) {
return ""
}
const text = await res.text()
return text
} catch (error) {
return ""
}
}
function extractHtml(html: string, defaultContent: string) {
const $ = cheerioLoad(html);
const charset = $('meta[charset]').attr('charset')
if (charset && charset !== 'utf-8') {
return { title: "", content: "" }
}
// remove unwanted elements
$("script, style, nav, footer, header").remove();
// extract title
const title = cleanText($("title").text() || "");
// try to find main content
let mainContent = $("main, article, .content").first();
if (!mainContent.length) {
mainContent = $("body");
}
// extract text from paragraphs
const paragraphs = mainContent.find("p");
let text = paragraphs
.map((i, p) => $(p).text().trim())
.get()
.join(" ");
// if no paragraphs found, get all text
if (!text) {
text = $("body").text().trim();
}
// clean up whitespace
text = text.replace(/\s+/g, " ").trim() || defaultContent;
return {
title: title,
content: text.slice(0, 2400), // limit to first 2400 characters
};
}
async function googleSearch(input: string) {
const url = `https://www.googleapis.com/customsearch/v1?key=${GOOGLE_API_KEY}&cx=${GOOGLE_CSE_ID}&q=${encodeURIComponent(input)}`
const res = await fetch(url);
if (!res.ok) {
throw new Error(
`Got ${res.status} error from Google custom search: ${res.statusText}`
);
}
const json = await res.json();
const results: searchResult[] = json?.items?.map(
(item: searchResult) => ({
title: item.title,
link: item.link,
snippet: item.snippet,
})
) ?? [];
return results;
}
type searchResult = {
title?: string;
link?: string;
snippet?: string;
}
function cleanText(title: string): string {
return title
.replace(/[\r\n\t]+/g, ' ') // Replace newlines and tabs with space
.replace(/\s+/g, ' ') // Replace multiple spaces with single space
.trim(); // Remove leading/trailing whitespace
}