import { GoogleGenAI, Modality } from "@google/genai"; import { AppModule, ImageConfig, VeoConfig } from '../types'; // Models const MODEL_CHAT_PRO = 'gemini-3-pro-preview'; const MODEL_RESEARCH = 'gemini-3-flash-preview'; const MODEL_IMAGE = 'gemini-3-pro-image-preview'; const MODEL_VIDEO = 'veo-3.1-fast-generate-preview'; const MODEL_AUDIO_TTS = 'gemini-2.5-flash-preview-tts'; const MODEL_AUDIO_TRANS = 'gemini-3-flash-preview'; export class GeminiService { private ai: GoogleGenAI | null = null; private apiKey: string; constructor(apiKey: string) { this.apiKey = apiKey; if (apiKey) { this.ai = new GoogleGenAI({ apiKey }); } } updateKey(apiKey: string) { this.apiKey = apiKey; this.ai = new GoogleGenAI({ apiKey }); } private getClient() { if (!this.ai) throw new Error("API Key not set"); return this.ai; } async generateText( prompt: string, module: AppModule, history: {role: string, parts: any[]}[], media?: { data: string, mimeType: string }[] ) { const ai = this.getClient(); let model = MODEL_CHAT_PRO; let config: any = {}; switch (module) { case AppModule.TUTOR: // Use fast model for simple queries if possible, but user wants options. // We default to Pro for quality in Tutor, but could swap. // Requirement says: "Use Pro for complex tasks and Flash or Flash-Lite for tasks that should happen fast." // We'll stick to Pro for general "Tutor" advice as it implies teaching. model = MODEL_CHAT_PRO; break; case AppModule.THINKER: model = MODEL_CHAT_PRO; config.thinkingConfig = { thinkingBudget: 32768 }; // config.maxOutputTokens should NOT be set when using max thinking budget if not careful, // but recommendation says: "Avoid setting this if not required". break; case AppModule.RESEARCH: model = MODEL_RESEARCH; config.tools = [{ googleSearch: {} }]; break; case AppModule.VISION: model = MODEL_CHAT_PRO; // For analysis break; case AppModule.STUDIO: model = MODEL_CHAT_PRO; // For analysis break; case AppModule.AUDIO: model = MODEL_AUDIO_TRANS; // For transcription/analysis break; } // Build contents // Chat history + new prompt // Note: @google/genai chat history format differs slightly from simple array. // For simplicity in this single-file service, we'll use `generateContent` with a constructed history // OR just use `chats.create`. `chats.create` is better for history. // Convert generic history to SDK format // The SDK `sendMessage` handles the current turn. // We need to initialize history first. const sdkHistory = history.map(h => ({ role: h.role, parts: h.parts })); const chat = ai.chats.create({ model: model, config: config, history: sdkHistory }); // Prepare message content let messageContent: any = { role: 'user', parts: [{ text: prompt }] }; if (media && media.length > 0) { messageContent.parts = [ ...media.map(m => ({ inlineData: { mimeType: m.mimeType, data: m.data } })), { text: prompt } ]; } // For Streaming // We will return the stream iterator const resultStream = await chat.sendMessageStream({ message: messageContent }); return resultStream; } async generateImage(prompt: string, config: ImageConfig) { const ai = this.getClient(); // Using generateContent for nano banana series (gemini-3-pro-image-preview) const response = await ai.models.generateContent({ model: MODEL_IMAGE, contents: { parts: [{ text: prompt }] }, config: { imageConfig: { aspectRatio: config.aspectRatio, imageSize: config.size } } }); // Extract image for (const part of response.candidates?.[0]?.content?.parts || []) { if (part.inlineData) { return `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`; } } throw new Error("No image generated"); } async generateVideo(prompt: string, config: VeoConfig) { const ai = this.getClient(); // Veo check for key selection (browser env only) if (typeof window !== 'undefined' && (window as any).aistudio) { try { const hasKey = await (window as any).aistudio.hasSelectedApiKey(); if (!hasKey) { await (window as any).aistudio.openSelectKey(); } } catch (e) { console.warn("Veo key selection check failed, proceeding with env key", e); } } // Creating a NEW instance for Veo calls to ensure latest key if using the selection dialog flow? // The prompt says "Create a new GoogleGenAI instance right before making an API call...". // Since we are using our own stored key primarily, we stick to `this.ai`. // If the user used the dialog, that key isn't automatically in our `this.apiKey`. // We will assume `this.apiKey` (user entered) is the paid key required. let operation = await ai.models.generateVideos({ model: MODEL_VIDEO, prompt: prompt, config: { numberOfVideos: 1, resolution: config.resolution, aspectRatio: config.aspectRatio } }); while (!operation.done) { await new Promise(resolve => setTimeout(resolve, 5000)); operation = await ai.operations.getVideosOperation({ operation: operation }); } const videoUri = operation.response?.generatedVideos?.[0]?.video?.uri; if (!videoUri) throw new Error("No video URI returned"); // Fetch the video bytes const vidResponse = await fetch(`${videoUri}&key=${this.apiKey}`); const blob = await vidResponse.blob(); return URL.createObjectURL(blob); } async transcribeAudio(base64Audio: string, mimeType: string) { const ai = this.getClient(); const response = await ai.models.generateContent({ model: MODEL_AUDIO_TRANS, contents: { parts: [ { inlineData: { mimeType: mimeType, data: base64Audio } }, { text: "Transcribe this audio exactly." } ] } }); return response.text; } async generateSpeech(text: string, voice: string = 'Kore') { const ai = this.getClient(); const response = await ai.models.generateContent({ model: MODEL_AUDIO_TTS, contents: [{ parts: [{ text }] }], config: { responseModalities: [Modality.AUDIO], speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: voice }, }, }, }, }); const base64Audio = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data; if (!base64Audio) throw new Error("No audio generated"); return base64Audio; } }