262 lines
7.9 KiB
TypeScript
262 lines
7.9 KiB
TypeScript
import { GoogleGenAI, Modality, Type } from "@google/genai";
|
|
import { ChatMode, Message, ChatScenario } from "../types";
|
|
|
|
// Helper to get client
|
|
const getClient = () => {
|
|
const apiKey = process.env.API_KEY;
|
|
if (!apiKey) throw new Error("API Key is missing. Please ensure process.env.API_KEY is set.");
|
|
return new GoogleGenAI({ apiKey });
|
|
};
|
|
|
|
// --- Models ---
|
|
const MODEL_CHAT_STANDARD = "gemini-3-flash-preview";
|
|
const MODEL_CHAT_DEEP = "gemini-3-pro-preview";
|
|
const MODEL_CHAT_FAST = "gemini-flash-lite-latest";
|
|
const MODEL_IMAGE_GEN = "gemini-3-pro-image-preview";
|
|
const MODEL_VIDEO_GEN = "veo-3.1-fast-generate-preview";
|
|
const MODEL_TTS = "gemini-2.5-flash-preview-tts";
|
|
|
|
// --- Chat ---
|
|
|
|
export const streamChatResponse = async (
|
|
history: Message[],
|
|
currentMessage: string,
|
|
mode: ChatMode,
|
|
language: string,
|
|
scenario: ChatScenario = ChatScenario.GENERAL,
|
|
attachments: { mimeType: string; data: string }[] = [],
|
|
onChunk: (text: string, grounding?: any) => void
|
|
) => {
|
|
const ai = getClient();
|
|
let model = MODEL_CHAT_STANDARD;
|
|
|
|
// Construct System Instruction based on Scenario
|
|
let baseInstruction = "";
|
|
switch (scenario) {
|
|
case ChatScenario.READING:
|
|
baseInstruction = `You are a distinguished Sociology Professor specializing in Classical Sociological Theory.
|
|
Focus on the works of Marx, Weber, Durkheim, Simmel, and other foundational figures.
|
|
When answering:
|
|
1. Contextualize the text historically.
|
|
2. Explain key arguments precisely.
|
|
3. Discuss the critical reception and legacy.
|
|
4. Use academic yet accessible language.`;
|
|
break;
|
|
case ChatScenario.CONCEPT:
|
|
baseInstruction = `You are an expert Sociological Concept Analyst.
|
|
Your goal is to provide deep, multi-dimensional definitions of sociological terms.
|
|
When defining a concept:
|
|
1. Provide a clear, concise definition.
|
|
2. Explain its etymology or theoretical origin.
|
|
3. Contrast it with related or opposing concepts.
|
|
4. Provide concrete examples of the concept in action.`;
|
|
break;
|
|
case ChatScenario.RESEARCH:
|
|
baseInstruction = `You are a Senior Research Methodology Consultant.
|
|
You help students and researchers design their studies.
|
|
Focus on:
|
|
1. Refining research questions.
|
|
2. Suggesting appropriate methods (Qualitative, Quantitative, Mixed).
|
|
3. Discussing sampling, operationalization, and ethics.
|
|
4. Suggesting theoretical frameworks suitable for the topic.`;
|
|
break;
|
|
case ChatScenario.GENERAL:
|
|
default:
|
|
baseInstruction = `You are a helpful and knowledgeable Sociology Learning Assistant.
|
|
Answer questions clearly using sociological perspectives.
|
|
Encourage critical thinking and connect daily life examples to sociological theories.`;
|
|
break;
|
|
}
|
|
|
|
let config: any = {
|
|
systemInstruction: `${baseInstruction} Always reply in the user's preferred language: ${language}.`,
|
|
};
|
|
|
|
// Configure based on mode
|
|
if (mode === ChatMode.STANDARD) {
|
|
model = MODEL_CHAT_STANDARD;
|
|
config.tools = [{ googleSearch: {} }];
|
|
} else if (mode === ChatMode.DEEP) {
|
|
model = MODEL_CHAT_DEEP;
|
|
config.thinkingConfig = { thinkingBudget: 32768 }; // Max for pro
|
|
} else if (mode === ChatMode.FAST) {
|
|
model = MODEL_CHAT_FAST;
|
|
}
|
|
|
|
const chat = ai.chats.create({
|
|
model,
|
|
config,
|
|
history: history.slice(0, -1).map(m => ({
|
|
role: m.role,
|
|
parts: [
|
|
{ text: m.content },
|
|
...(m.attachments || []).map(a => ({
|
|
inlineData: { mimeType: a.mimeType, data: a.data }
|
|
}))
|
|
]
|
|
}))
|
|
});
|
|
|
|
const parts: any[] = [{ text: currentMessage }];
|
|
attachments.forEach(att => {
|
|
parts.push({ inlineData: { mimeType: att.mimeType, data: att.data } });
|
|
});
|
|
|
|
try {
|
|
const result = await chat.sendMessageStream({
|
|
message: { parts }
|
|
});
|
|
|
|
for await (const chunk of result) {
|
|
const text = chunk.text;
|
|
const grounding = chunk.candidates?.[0]?.groundingMetadata;
|
|
if (text || grounding) {
|
|
onChunk(text || '', grounding);
|
|
}
|
|
}
|
|
} catch (e) {
|
|
console.error("Chat error", e);
|
|
throw e;
|
|
}
|
|
};
|
|
|
|
// --- Image Generation ---
|
|
export const generateImage = async (
|
|
prompt: string,
|
|
size: "1K" | "2K" | "4K"
|
|
): Promise<string[]> => {
|
|
const ai = getClient();
|
|
|
|
// Using gemini-3-pro-image-preview
|
|
const response = await ai.models.generateContent({
|
|
model: MODEL_IMAGE_GEN,
|
|
contents: { parts: [{ text: prompt }] },
|
|
config: {
|
|
imageConfig: {
|
|
imageSize: size,
|
|
count: 1, // Only 1 allowed usually for this model in preview
|
|
}
|
|
}
|
|
});
|
|
|
|
const images: string[] = [];
|
|
if (response.candidates?.[0]?.content?.parts) {
|
|
for (const part of response.candidates[0].content.parts) {
|
|
if (part.inlineData && part.inlineData.data) {
|
|
images.push(`data:${part.inlineData.mimeType};base64,${part.inlineData.data}`);
|
|
}
|
|
}
|
|
}
|
|
return images;
|
|
};
|
|
|
|
// --- Video Generation ---
|
|
export const generateVideo = async (
|
|
prompt: string,
|
|
aspectRatio: "16:9" | "9:16"
|
|
): Promise<string> => {
|
|
const ai = getClient();
|
|
|
|
let operation = await ai.models.generateVideos({
|
|
model: MODEL_VIDEO_GEN,
|
|
prompt: prompt,
|
|
config: {
|
|
numberOfVideos: 1,
|
|
aspectRatio: aspectRatio,
|
|
resolution: '720p', // fast-generate-preview often defaults to this
|
|
}
|
|
});
|
|
|
|
// Poll for completion
|
|
while (!operation.done) {
|
|
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
operation = await ai.operations.getVideosOperation({ operation: operation });
|
|
}
|
|
|
|
const uri = operation.response?.generatedVideos?.[0]?.video?.uri;
|
|
if (!uri) throw new Error("No video URI returned");
|
|
|
|
// Fetch the actual bytes using the key
|
|
const fetchResponse = await fetch(`${uri}&key=${process.env.API_KEY}`);
|
|
const blob = await fetchResponse.blob();
|
|
return URL.createObjectURL(blob);
|
|
};
|
|
|
|
// --- Transcription ---
|
|
export const transcribeAudio = async (
|
|
audioBase64: string,
|
|
mimeType: string
|
|
): Promise<string> => {
|
|
const ai = getClient();
|
|
const response = await ai.models.generateContent({
|
|
model: MODEL_CHAT_STANDARD, // 3-flash is good for audio
|
|
contents: {
|
|
parts: [
|
|
{ inlineData: { mimeType, data: audioBase64 } },
|
|
{ text: "Please transcribe this audio exactly as spoken." }
|
|
]
|
|
}
|
|
});
|
|
return response.text || "";
|
|
};
|
|
|
|
// --- TTS ---
|
|
export const generateSpeech = async (
|
|
text: string
|
|
): Promise<AudioBuffer> => {
|
|
const ai = getClient();
|
|
const response = await ai.models.generateContent({
|
|
model: MODEL_TTS,
|
|
contents: { parts: [{ text }] },
|
|
config: {
|
|
responseModalities: [Modality.AUDIO],
|
|
speechConfig: {
|
|
voiceConfig: {
|
|
prebuiltVoiceConfig: { voiceName: 'Kore' },
|
|
},
|
|
},
|
|
},
|
|
});
|
|
|
|
const base64Audio = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
|
|
if (!base64Audio) throw new Error("No audio generated");
|
|
|
|
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)();
|
|
const audioBuffer = await decodeAudioData(
|
|
decode(base64Audio),
|
|
audioContext,
|
|
24000,
|
|
1
|
|
);
|
|
return audioBuffer;
|
|
};
|
|
|
|
// Helper utils for audio
|
|
function decode(base64: string) {
|
|
const binaryString = atob(base64);
|
|
const len = binaryString.length;
|
|
const bytes = new Uint8Array(len);
|
|
for (let i = 0; i < len; i++) {
|
|
bytes[i] = binaryString.charCodeAt(i);
|
|
}
|
|
return bytes;
|
|
}
|
|
|
|
async function decodeAudioData(
|
|
data: Uint8Array,
|
|
ctx: AudioContext,
|
|
sampleRate: number,
|
|
numChannels: number,
|
|
): Promise<AudioBuffer> {
|
|
const dataInt16 = new Int16Array(data.buffer);
|
|
const frameCount = dataInt16.length / numChannels;
|
|
const buffer = ctx.createBuffer(numChannels, frameCount, sampleRate);
|
|
|
|
for (let channel = 0; channel < numChannels; channel++) {
|
|
const channelData = buffer.getChannelData(channel);
|
|
for (let i = 0; i < frameCount; i++) {
|
|
channelData[i] = dataInt16[i * numChannels + channel] / 32768.0;
|
|
}
|
|
}
|
|
return buffer;
|
|
} |