import { GoogleGenAI, Modality, Type } from "@google/genai"; import { ChatMode, Message, ChatScenario } from "../types"; // Helper to get client const getClient = () => { const apiKey = process.env.API_KEY; if (!apiKey) throw new Error("API Key is missing. Please ensure process.env.API_KEY is set."); return new GoogleGenAI({ apiKey }); }; // --- Models --- const MODEL_CHAT_STANDARD = "gemini-3-flash-preview"; const MODEL_CHAT_DEEP = "gemini-3-pro-preview"; const MODEL_CHAT_FAST = "gemini-flash-lite-latest"; const MODEL_IMAGE_GEN = "gemini-3-pro-image-preview"; const MODEL_VIDEO_GEN = "veo-3.1-fast-generate-preview"; const MODEL_TTS = "gemini-2.5-flash-preview-tts"; // --- Chat --- export const streamChatResponse = async ( history: Message[], currentMessage: string, mode: ChatMode, language: string, scenario: ChatScenario = ChatScenario.GENERAL, attachments: { mimeType: string; data: string }[] = [], onChunk: (text: string, grounding?: any) => void ) => { const ai = getClient(); let model = MODEL_CHAT_STANDARD; // Construct System Instruction based on Scenario let baseInstruction = ""; switch (scenario) { case ChatScenario.READING: baseInstruction = `You are a distinguished Sociology Professor specializing in Classical Sociological Theory. Focus on the works of Marx, Weber, Durkheim, Simmel, and other foundational figures. When answering: 1. Contextualize the text historically. 2. Explain key arguments precisely. 3. Discuss the critical reception and legacy. 4. Use academic yet accessible language.`; break; case ChatScenario.CONCEPT: baseInstruction = `You are an expert Sociological Concept Analyst. Your goal is to provide deep, multi-dimensional definitions of sociological terms. When defining a concept: 1. Provide a clear, concise definition. 2. Explain its etymology or theoretical origin. 3. Contrast it with related or opposing concepts. 4. Provide concrete examples of the concept in action.`; break; case ChatScenario.RESEARCH: baseInstruction = `You are a Senior Research Methodology Consultant. You help students and researchers design their studies. Focus on: 1. Refining research questions. 2. Suggesting appropriate methods (Qualitative, Quantitative, Mixed). 3. Discussing sampling, operationalization, and ethics. 4. Suggesting theoretical frameworks suitable for the topic.`; break; case ChatScenario.GENERAL: default: baseInstruction = `You are a helpful and knowledgeable Sociology Learning Assistant. Answer questions clearly using sociological perspectives. Encourage critical thinking and connect daily life examples to sociological theories.`; break; } let config: any = { systemInstruction: `${baseInstruction} Always reply in the user's preferred language: ${language}.`, }; // Configure based on mode if (mode === ChatMode.STANDARD) { model = MODEL_CHAT_STANDARD; config.tools = [{ googleSearch: {} }]; } else if (mode === ChatMode.DEEP) { model = MODEL_CHAT_DEEP; config.thinkingConfig = { thinkingBudget: 32768 }; // Max for pro } else if (mode === ChatMode.FAST) { model = MODEL_CHAT_FAST; } const chat = ai.chats.create({ model, config, history: history.slice(0, -1).map(m => ({ role: m.role, parts: [ { text: m.content }, ...(m.attachments || []).map(a => ({ inlineData: { mimeType: a.mimeType, data: a.data } })) ] })) }); const parts: any[] = [{ text: currentMessage }]; attachments.forEach(att => { parts.push({ inlineData: { mimeType: att.mimeType, data: att.data } }); }); try { const result = await chat.sendMessageStream({ message: { parts } }); for await (const chunk of result) { const text = chunk.text; const grounding = chunk.candidates?.[0]?.groundingMetadata; if (text || grounding) { onChunk(text || '', grounding); } } } catch (e) { console.error("Chat error", e); throw e; } }; // --- Image Generation --- export const generateImage = async ( prompt: string, size: "1K" | "2K" | "4K" ): Promise => { const ai = getClient(); // Using gemini-3-pro-image-preview const response = await ai.models.generateContent({ model: MODEL_IMAGE_GEN, contents: { parts: [{ text: prompt }] }, config: { imageConfig: { imageSize: size, count: 1, // Only 1 allowed usually for this model in preview } } }); const images: string[] = []; if (response.candidates?.[0]?.content?.parts) { for (const part of response.candidates[0].content.parts) { if (part.inlineData && part.inlineData.data) { images.push(`data:${part.inlineData.mimeType};base64,${part.inlineData.data}`); } } } return images; }; // --- Video Generation --- export const generateVideo = async ( prompt: string, aspectRatio: "16:9" | "9:16" ): Promise => { const ai = getClient(); let operation = await ai.models.generateVideos({ model: MODEL_VIDEO_GEN, prompt: prompt, config: { numberOfVideos: 1, aspectRatio: aspectRatio, resolution: '720p', // fast-generate-preview often defaults to this } }); // Poll for completion while (!operation.done) { await new Promise(resolve => setTimeout(resolve, 5000)); operation = await ai.operations.getVideosOperation({ operation: operation }); } const uri = operation.response?.generatedVideos?.[0]?.video?.uri; if (!uri) throw new Error("No video URI returned"); // Fetch the actual bytes using the key const fetchResponse = await fetch(`${uri}&key=${process.env.API_KEY}`); const blob = await fetchResponse.blob(); return URL.createObjectURL(blob); }; // --- Transcription --- export const transcribeAudio = async ( audioBase64: string, mimeType: string ): Promise => { const ai = getClient(); const response = await ai.models.generateContent({ model: MODEL_CHAT_STANDARD, // 3-flash is good for audio contents: { parts: [ { inlineData: { mimeType, data: audioBase64 } }, { text: "Please transcribe this audio exactly as spoken." } ] } }); return response.text || ""; }; // --- TTS --- export const generateSpeech = async ( text: string ): Promise => { const ai = getClient(); const response = await ai.models.generateContent({ model: MODEL_TTS, contents: { parts: [{ text }] }, config: { responseModalities: [Modality.AUDIO], speechConfig: { voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' }, }, }, }, }); const base64Audio = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data; if (!base64Audio) throw new Error("No audio generated"); const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)(); const audioBuffer = await decodeAudioData( decode(base64Audio), audioContext, 24000, 1 ); return audioBuffer; }; // Helper utils for audio function decode(base64: string) { const binaryString = atob(base64); const len = binaryString.length; const bytes = new Uint8Array(len); for (let i = 0; i < len; i++) { bytes[i] = binaryString.charCodeAt(i); } return bytes; } async function decodeAudioData( data: Uint8Array, ctx: AudioContext, sampleRate: number, numChannels: number, ): Promise { const dataInt16 = new Int16Array(data.buffer); const frameCount = dataInt16.length / numChannels; const buffer = ctx.createBuffer(numChannels, frameCount, sampleRate); for (let channel = 0; channel < numChannels; channel++) { const channelData = buffer.getChannelData(channel); for (let i = 0; i < frameCount; i++) { channelData[i] = dataInt16[i * numChannels + channel] / 32768.0; } } return buffer; }