-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmultimodal.js
34 lines (28 loc) · 1.04 KB
/
multimodal.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
const { GoogleGenerativeAI } = require("@google/generative-ai");
const fs = require("fs");
const dotenv = require('dotenv').config()
// Access your API key as an environment variable (see "Set up your API key" above)
const genAI = new GoogleGenerativeAI(process.env.API_KEY);
// Converts local file information to a GoogleGenerativeAI.Part object.
function fileToGenerativePart(path, mimeType) {
return {
inlineData: {
data: Buffer.from(fs.readFileSync(path)).toString("base64"),
mimeType
},
};
}
async function run() {
// For text-and-image input (multimodal), use the gemini-pro-vision model
const model = genAI.getGenerativeModel({ model: "gemini-pro-vision" });
const prompt = "describe the provided images";
const imageParts = [
fileToGenerativePart("image1.jpg", "image/jpeg"),
fileToGenerativePart("image2.jpeg", "image/jpeg"),
];
const result = await model.generateContent([prompt, ...imageParts]);
const response = await result.response;
const text = response.text();
console.log(text);
}
run();