210 lines
6.8 KiB
TypeScript
210 lines
6.8 KiB
TypeScript
import { GoogleGenAI, Modality } from "@google/genai";
|
|
import { AppModule, ImageConfig, VeoConfig } from '../types';
|
|
|
|
// Models
|
|
const MODEL_CHAT_PRO = 'gemini-3-pro-preview';
|
|
const MODEL_RESEARCH = 'gemini-3-flash-preview';
|
|
const MODEL_IMAGE = 'gemini-3-pro-image-preview';
|
|
const MODEL_VIDEO = 'veo-3.1-fast-generate-preview';
|
|
const MODEL_AUDIO_TTS = 'gemini-2.5-flash-preview-tts';
|
|
const MODEL_AUDIO_TRANS = 'gemini-3-flash-preview';
|
|
|
|
export class GeminiService {
|
|
private ai: GoogleGenAI | null = null;
|
|
private apiKey: string;
|
|
|
|
constructor(apiKey: string) {
|
|
this.apiKey = apiKey;
|
|
if (apiKey) {
|
|
this.ai = new GoogleGenAI({ apiKey });
|
|
}
|
|
}
|
|
|
|
updateKey(apiKey: string) {
|
|
this.apiKey = apiKey;
|
|
this.ai = new GoogleGenAI({ apiKey });
|
|
}
|
|
|
|
private getClient() {
|
|
if (!this.ai) throw new Error("API Key not set");
|
|
return this.ai;
|
|
}
|
|
|
|
async generateText(
|
|
prompt: string,
|
|
module: AppModule,
|
|
history: {role: string, parts: any[]}[],
|
|
media?: { data: string, mimeType: string }[]
|
|
) {
|
|
const ai = this.getClient();
|
|
let model = MODEL_CHAT_PRO;
|
|
let config: any = {};
|
|
|
|
switch (module) {
|
|
case AppModule.TUTOR:
|
|
// Use fast model for simple queries if possible, but user wants options.
|
|
// We default to Pro for quality in Tutor, but could swap.
|
|
// Requirement says: "Use Pro for complex tasks and Flash or Flash-Lite for tasks that should happen fast."
|
|
// We'll stick to Pro for general "Tutor" advice as it implies teaching.
|
|
model = MODEL_CHAT_PRO;
|
|
break;
|
|
case AppModule.THINKER:
|
|
model = MODEL_CHAT_PRO;
|
|
config.thinkingConfig = { thinkingBudget: 32768 };
|
|
// config.maxOutputTokens should NOT be set when using max thinking budget if not careful,
|
|
// but recommendation says: "Avoid setting this if not required".
|
|
break;
|
|
case AppModule.RESEARCH:
|
|
model = MODEL_RESEARCH;
|
|
config.tools = [{ googleSearch: {} }];
|
|
break;
|
|
case AppModule.VISION:
|
|
model = MODEL_CHAT_PRO; // For analysis
|
|
break;
|
|
case AppModule.STUDIO:
|
|
model = MODEL_CHAT_PRO; // For analysis
|
|
break;
|
|
case AppModule.AUDIO:
|
|
model = MODEL_AUDIO_TRANS; // For transcription/analysis
|
|
break;
|
|
}
|
|
|
|
// Build contents
|
|
// Chat history + new prompt
|
|
// Note: @google/genai chat history format differs slightly from simple array.
|
|
// For simplicity in this single-file service, we'll use `generateContent` with a constructed history
|
|
// OR just use `chats.create`. `chats.create` is better for history.
|
|
|
|
// Convert generic history to SDK format
|
|
// The SDK `sendMessage` handles the current turn.
|
|
// We need to initialize history first.
|
|
|
|
const sdkHistory = history.map(h => ({
|
|
role: h.role,
|
|
parts: h.parts
|
|
}));
|
|
|
|
const chat = ai.chats.create({
|
|
model: model,
|
|
config: config,
|
|
history: sdkHistory
|
|
});
|
|
|
|
// Prepare message content
|
|
let messageContent: any = { role: 'user', parts: [{ text: prompt }] };
|
|
if (media && media.length > 0) {
|
|
messageContent.parts = [
|
|
...media.map(m => ({ inlineData: { mimeType: m.mimeType, data: m.data } })),
|
|
{ text: prompt }
|
|
];
|
|
}
|
|
|
|
// For Streaming
|
|
// We will return the stream iterator
|
|
const resultStream = await chat.sendMessageStream({ message: messageContent });
|
|
return resultStream;
|
|
}
|
|
|
|
async generateImage(prompt: string, config: ImageConfig) {
|
|
const ai = this.getClient();
|
|
// Using generateContent for nano banana series (gemini-3-pro-image-preview)
|
|
const response = await ai.models.generateContent({
|
|
model: MODEL_IMAGE,
|
|
contents: {
|
|
parts: [{ text: prompt }]
|
|
},
|
|
config: {
|
|
imageConfig: {
|
|
aspectRatio: config.aspectRatio,
|
|
imageSize: config.size
|
|
}
|
|
}
|
|
});
|
|
|
|
// Extract image
|
|
for (const part of response.candidates?.[0]?.content?.parts || []) {
|
|
if (part.inlineData) {
|
|
return `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`;
|
|
}
|
|
}
|
|
throw new Error("No image generated");
|
|
}
|
|
|
|
async generateVideo(prompt: string, config: VeoConfig) {
|
|
const ai = this.getClient();
|
|
// Veo check for key selection (browser env only)
|
|
if (typeof window !== 'undefined' && (window as any).aistudio) {
|
|
try {
|
|
const hasKey = await (window as any).aistudio.hasSelectedApiKey();
|
|
if (!hasKey) {
|
|
await (window as any).aistudio.openSelectKey();
|
|
}
|
|
} catch (e) {
|
|
console.warn("Veo key selection check failed, proceeding with env key", e);
|
|
}
|
|
}
|
|
|
|
// Creating a NEW instance for Veo calls to ensure latest key if using the selection dialog flow?
|
|
// The prompt says "Create a new GoogleGenAI instance right before making an API call...".
|
|
// Since we are using our own stored key primarily, we stick to `this.ai`.
|
|
// If the user used the dialog, that key isn't automatically in our `this.apiKey`.
|
|
// We will assume `this.apiKey` (user entered) is the paid key required.
|
|
|
|
let operation = await ai.models.generateVideos({
|
|
model: MODEL_VIDEO,
|
|
prompt: prompt,
|
|
config: {
|
|
numberOfVideos: 1,
|
|
resolution: config.resolution,
|
|
aspectRatio: config.aspectRatio
|
|
}
|
|
});
|
|
|
|
while (!operation.done) {
|
|
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
operation = await ai.operations.getVideosOperation({ operation: operation });
|
|
}
|
|
|
|
const videoUri = operation.response?.generatedVideos?.[0]?.video?.uri;
|
|
if (!videoUri) throw new Error("No video URI returned");
|
|
|
|
// Fetch the video bytes
|
|
const vidResponse = await fetch(`${videoUri}&key=${this.apiKey}`);
|
|
const blob = await vidResponse.blob();
|
|
return URL.createObjectURL(blob);
|
|
}
|
|
|
|
async transcribeAudio(base64Audio: string, mimeType: string) {
|
|
const ai = this.getClient();
|
|
const response = await ai.models.generateContent({
|
|
model: MODEL_AUDIO_TRANS,
|
|
contents: {
|
|
parts: [
|
|
{ inlineData: { mimeType: mimeType, data: base64Audio } },
|
|
{ text: "Transcribe this audio exactly." }
|
|
]
|
|
}
|
|
});
|
|
return response.text;
|
|
}
|
|
|
|
async generateSpeech(text: string, voice: string = 'Kore') {
|
|
const ai = this.getClient();
|
|
const response = await ai.models.generateContent({
|
|
model: MODEL_AUDIO_TTS,
|
|
contents: [{ parts: [{ text }] }],
|
|
config: {
|
|
responseModalities: [Modality.AUDIO],
|
|
speechConfig: {
|
|
voiceConfig: {
|
|
prebuiltVoiceConfig: { voiceName: voice },
|
|
},
|
|
},
|
|
},
|
|
});
|
|
|
|
const base64Audio = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
|
|
if (!base64Audio) throw new Error("No audio generated");
|
|
return base64Audio;
|
|
}
|
|
} |