Files
ai-app-ckg/services/gemini.ts

210 lines
6.8 KiB
TypeScript

import { GoogleGenAI, Modality } from "@google/genai";
import { AppModule, ImageConfig, VeoConfig } from '../types';
// Models
const MODEL_CHAT_PRO = 'gemini-3-pro-preview';
const MODEL_RESEARCH = 'gemini-3-flash-preview';
const MODEL_IMAGE = 'gemini-3-pro-image-preview';
const MODEL_VIDEO = 'veo-3.1-fast-generate-preview';
const MODEL_AUDIO_TTS = 'gemini-2.5-flash-preview-tts';
const MODEL_AUDIO_TRANS = 'gemini-3-flash-preview';
export class GeminiService {
private ai: GoogleGenAI | null = null;
private apiKey: string;
constructor(apiKey: string) {
this.apiKey = apiKey;
if (apiKey) {
this.ai = new GoogleGenAI({ apiKey });
}
}
updateKey(apiKey: string) {
this.apiKey = apiKey;
this.ai = new GoogleGenAI({ apiKey });
}
private getClient() {
if (!this.ai) throw new Error("API Key not set");
return this.ai;
}
async generateText(
prompt: string,
module: AppModule,
history: {role: string, parts: any[]}[],
media?: { data: string, mimeType: string }[]
) {
const ai = this.getClient();
let model = MODEL_CHAT_PRO;
let config: any = {};
switch (module) {
case AppModule.TUTOR:
// Use fast model for simple queries if possible, but user wants options.
// We default to Pro for quality in Tutor, but could swap.
// Requirement says: "Use Pro for complex tasks and Flash or Flash-Lite for tasks that should happen fast."
// We'll stick to Pro for general "Tutor" advice as it implies teaching.
model = MODEL_CHAT_PRO;
break;
case AppModule.THINKER:
model = MODEL_CHAT_PRO;
config.thinkingConfig = { thinkingBudget: 32768 };
// config.maxOutputTokens should NOT be set when using max thinking budget if not careful,
// but recommendation says: "Avoid setting this if not required".
break;
case AppModule.RESEARCH:
model = MODEL_RESEARCH;
config.tools = [{ googleSearch: {} }];
break;
case AppModule.VISION:
model = MODEL_CHAT_PRO; // For analysis
break;
case AppModule.STUDIO:
model = MODEL_CHAT_PRO; // For analysis
break;
case AppModule.AUDIO:
model = MODEL_AUDIO_TRANS; // For transcription/analysis
break;
}
// Build contents
// Chat history + new prompt
// Note: @google/genai chat history format differs slightly from simple array.
// For simplicity in this single-file service, we'll use `generateContent` with a constructed history
// OR just use `chats.create`. `chats.create` is better for history.
// Convert generic history to SDK format
// The SDK `sendMessage` handles the current turn.
// We need to initialize history first.
const sdkHistory = history.map(h => ({
role: h.role,
parts: h.parts
}));
const chat = ai.chats.create({
model: model,
config: config,
history: sdkHistory
});
// Prepare message content
let messageContent: any = { role: 'user', parts: [{ text: prompt }] };
if (media && media.length > 0) {
messageContent.parts = [
...media.map(m => ({ inlineData: { mimeType: m.mimeType, data: m.data } })),
{ text: prompt }
];
}
// For Streaming
// We will return the stream iterator
const resultStream = await chat.sendMessageStream({ message: messageContent });
return resultStream;
}
async generateImage(prompt: string, config: ImageConfig) {
const ai = this.getClient();
// Using generateContent for nano banana series (gemini-3-pro-image-preview)
const response = await ai.models.generateContent({
model: MODEL_IMAGE,
contents: {
parts: [{ text: prompt }]
},
config: {
imageConfig: {
aspectRatio: config.aspectRatio,
imageSize: config.size
}
}
});
// Extract image
for (const part of response.candidates?.[0]?.content?.parts || []) {
if (part.inlineData) {
return `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`;
}
}
throw new Error("No image generated");
}
async generateVideo(prompt: string, config: VeoConfig) {
const ai = this.getClient();
// Veo check for key selection (browser env only)
if (typeof window !== 'undefined' && (window as any).aistudio) {
try {
const hasKey = await (window as any).aistudio.hasSelectedApiKey();
if (!hasKey) {
await (window as any).aistudio.openSelectKey();
}
} catch (e) {
console.warn("Veo key selection check failed, proceeding with env key", e);
}
}
// Creating a NEW instance for Veo calls to ensure latest key if using the selection dialog flow?
// The prompt says "Create a new GoogleGenAI instance right before making an API call...".
// Since we are using our own stored key primarily, we stick to `this.ai`.
// If the user used the dialog, that key isn't automatically in our `this.apiKey`.
// We will assume `this.apiKey` (user entered) is the paid key required.
let operation = await ai.models.generateVideos({
model: MODEL_VIDEO,
prompt: prompt,
config: {
numberOfVideos: 1,
resolution: config.resolution,
aspectRatio: config.aspectRatio
}
});
while (!operation.done) {
await new Promise(resolve => setTimeout(resolve, 5000));
operation = await ai.operations.getVideosOperation({ operation: operation });
}
const videoUri = operation.response?.generatedVideos?.[0]?.video?.uri;
if (!videoUri) throw new Error("No video URI returned");
// Fetch the video bytes
const vidResponse = await fetch(`${videoUri}&key=${this.apiKey}`);
const blob = await vidResponse.blob();
return URL.createObjectURL(blob);
}
async transcribeAudio(base64Audio: string, mimeType: string) {
const ai = this.getClient();
const response = await ai.models.generateContent({
model: MODEL_AUDIO_TRANS,
contents: {
parts: [
{ inlineData: { mimeType: mimeType, data: base64Audio } },
{ text: "Transcribe this audio exactly." }
]
}
});
return response.text;
}
async generateSpeech(text: string, voice: string = 'Kore') {
const ai = this.getClient();
const response = await ai.models.generateContent({
model: MODEL_AUDIO_TTS,
contents: [{ parts: [{ text }] }],
config: {
responseModalities: [Modality.AUDIO],
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: { voiceName: voice },
},
},
},
});
const base64Audio = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
if (!base64Audio) throw new Error("No audio generated");
return base64Audio;
}
}