Files
ai-app-skg/services/geminiService.ts
2025-12-23 16:23:56 +08:00

262 lines
7.9 KiB
TypeScript

import { GoogleGenAI, Modality, Type } from "@google/genai";
import { ChatMode, Message, ChatScenario } from "../types";
// Helper to get client
const getClient = () => {
const apiKey = process.env.API_KEY;
if (!apiKey) throw new Error("API Key is missing. Please ensure process.env.API_KEY is set.");
return new GoogleGenAI({ apiKey });
};
// --- Models ---
const MODEL_CHAT_STANDARD = "gemini-3-flash-preview";
const MODEL_CHAT_DEEP = "gemini-3-pro-preview";
const MODEL_CHAT_FAST = "gemini-flash-lite-latest";
const MODEL_IMAGE_GEN = "gemini-3-pro-image-preview";
const MODEL_VIDEO_GEN = "veo-3.1-fast-generate-preview";
const MODEL_TTS = "gemini-2.5-flash-preview-tts";
// --- Chat ---
export const streamChatResponse = async (
history: Message[],
currentMessage: string,
mode: ChatMode,
language: string,
scenario: ChatScenario = ChatScenario.GENERAL,
attachments: { mimeType: string; data: string }[] = [],
onChunk: (text: string, grounding?: any) => void
) => {
const ai = getClient();
let model = MODEL_CHAT_STANDARD;
// Construct System Instruction based on Scenario
let baseInstruction = "";
switch (scenario) {
case ChatScenario.READING:
baseInstruction = `You are a distinguished Sociology Professor specializing in Classical Sociological Theory.
Focus on the works of Marx, Weber, Durkheim, Simmel, and other foundational figures.
When answering:
1. Contextualize the text historically.
2. Explain key arguments precisely.
3. Discuss the critical reception and legacy.
4. Use academic yet accessible language.`;
break;
case ChatScenario.CONCEPT:
baseInstruction = `You are an expert Sociological Concept Analyst.
Your goal is to provide deep, multi-dimensional definitions of sociological terms.
When defining a concept:
1. Provide a clear, concise definition.
2. Explain its etymology or theoretical origin.
3. Contrast it with related or opposing concepts.
4. Provide concrete examples of the concept in action.`;
break;
case ChatScenario.RESEARCH:
baseInstruction = `You are a Senior Research Methodology Consultant.
You help students and researchers design their studies.
Focus on:
1. Refining research questions.
2. Suggesting appropriate methods (Qualitative, Quantitative, Mixed).
3. Discussing sampling, operationalization, and ethics.
4. Suggesting theoretical frameworks suitable for the topic.`;
break;
case ChatScenario.GENERAL:
default:
baseInstruction = `You are a helpful and knowledgeable Sociology Learning Assistant.
Answer questions clearly using sociological perspectives.
Encourage critical thinking and connect daily life examples to sociological theories.`;
break;
}
let config: any = {
systemInstruction: `${baseInstruction} Always reply in the user's preferred language: ${language}.`,
};
// Configure based on mode
if (mode === ChatMode.STANDARD) {
model = MODEL_CHAT_STANDARD;
config.tools = [{ googleSearch: {} }];
} else if (mode === ChatMode.DEEP) {
model = MODEL_CHAT_DEEP;
config.thinkingConfig = { thinkingBudget: 32768 }; // Max for pro
} else if (mode === ChatMode.FAST) {
model = MODEL_CHAT_FAST;
}
const chat = ai.chats.create({
model,
config,
history: history.slice(0, -1).map(m => ({
role: m.role,
parts: [
{ text: m.content },
...(m.attachments || []).map(a => ({
inlineData: { mimeType: a.mimeType, data: a.data }
}))
]
}))
});
const parts: any[] = [{ text: currentMessage }];
attachments.forEach(att => {
parts.push({ inlineData: { mimeType: att.mimeType, data: att.data } });
});
try {
const result = await chat.sendMessageStream({
message: { parts }
});
for await (const chunk of result) {
const text = chunk.text;
const grounding = chunk.candidates?.[0]?.groundingMetadata;
if (text || grounding) {
onChunk(text || '', grounding);
}
}
} catch (e) {
console.error("Chat error", e);
throw e;
}
};
// --- Image Generation ---
export const generateImage = async (
prompt: string,
size: "1K" | "2K" | "4K"
): Promise<string[]> => {
const ai = getClient();
// Using gemini-3-pro-image-preview
const response = await ai.models.generateContent({
model: MODEL_IMAGE_GEN,
contents: { parts: [{ text: prompt }] },
config: {
imageConfig: {
imageSize: size,
count: 1, // Only 1 allowed usually for this model in preview
}
}
});
const images: string[] = [];
if (response.candidates?.[0]?.content?.parts) {
for (const part of response.candidates[0].content.parts) {
if (part.inlineData && part.inlineData.data) {
images.push(`data:${part.inlineData.mimeType};base64,${part.inlineData.data}`);
}
}
}
return images;
};
// --- Video Generation ---
export const generateVideo = async (
prompt: string,
aspectRatio: "16:9" | "9:16"
): Promise<string> => {
const ai = getClient();
let operation = await ai.models.generateVideos({
model: MODEL_VIDEO_GEN,
prompt: prompt,
config: {
numberOfVideos: 1,
aspectRatio: aspectRatio,
resolution: '720p', // fast-generate-preview often defaults to this
}
});
// Poll for completion
while (!operation.done) {
await new Promise(resolve => setTimeout(resolve, 5000));
operation = await ai.operations.getVideosOperation({ operation: operation });
}
const uri = operation.response?.generatedVideos?.[0]?.video?.uri;
if (!uri) throw new Error("No video URI returned");
// Fetch the actual bytes using the key
const fetchResponse = await fetch(`${uri}&key=${process.env.API_KEY}`);
const blob = await fetchResponse.blob();
return URL.createObjectURL(blob);
};
// --- Transcription ---
export const transcribeAudio = async (
audioBase64: string,
mimeType: string
): Promise<string> => {
const ai = getClient();
const response = await ai.models.generateContent({
model: MODEL_CHAT_STANDARD, // 3-flash is good for audio
contents: {
parts: [
{ inlineData: { mimeType, data: audioBase64 } },
{ text: "Please transcribe this audio exactly as spoken." }
]
}
});
return response.text || "";
};
// --- TTS ---
export const generateSpeech = async (
text: string
): Promise<AudioBuffer> => {
const ai = getClient();
const response = await ai.models.generateContent({
model: MODEL_TTS,
contents: { parts: [{ text }] },
config: {
responseModalities: [Modality.AUDIO],
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: { voiceName: 'Kore' },
},
},
},
});
const base64Audio = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
if (!base64Audio) throw new Error("No audio generated");
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
const audioBuffer = await decodeAudioData(
decode(base64Audio),
audioContext,
24000,
1
);
return audioBuffer;
};
// Helper utils for audio
function decode(base64: string) {
const binaryString = atob(base64);
const len = binaryString.length;
const bytes = new Uint8Array(len);
for (let i = 0; i < len; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes;
}
async function decodeAudioData(
data: Uint8Array,
ctx: AudioContext,
sampleRate: number,
numChannels: number,
): Promise<AudioBuffer> {
const dataInt16 = new Int16Array(data.buffer);
const frameCount = dataInt16.length / numChannels;
const buffer = ctx.createBuffer(numChannels, frameCount, sampleRate);
for (let channel = 0; channel < numChannels; channel++) {
const channelData = buffer.getChannelData(channel);
for (let i = 0; i < frameCount; i++) {
channelData[i] = dataInt16[i * numChannels + channel] / 32768.0;
}
}
return buffer;
}