Files
ai-app-ckg/services/gemini.ts
2025-12-26 16:06:34 +08:00

268 lines
9.6 KiB
TypeScript

import { GoogleGenAI, Modality } from "@google/genai";
import { AppModule, ImageConfig, VeoConfig } from '../types';
// Models
const MODEL_CHAT_PRO = 'gemini-3-pro-preview';
const MODEL_RESEARCH = 'gemini-3-flash-preview';
const MODEL_IMAGE = 'gemini-3-pro-image-preview';
const MODEL_VIDEO = 'veo-3.1-fast-generate-preview';
const MODEL_AUDIO_TTS = 'gemini-2.5-flash-preview-tts';
const MODEL_AUDIO_TRANS = 'gemini-3-flash-preview';
export class GeminiService {
private ai: GoogleGenAI | null = null;
private apiKey: string;
constructor(apiKey: string) {
this.apiKey = apiKey;
if (apiKey) {
this.ai = new GoogleGenAI({ apiKey });
}
}
updateKey(apiKey: string) {
this.apiKey = apiKey;
this.ai = new GoogleGenAI({ apiKey });
}
private getClient() {
if (!this.ai) throw new Error("API Key not set");
return this.ai;
}
async generateText(
prompt: string,
module: AppModule,
history: {role: string, parts: any[]}[],
language: string,
media?: { data: string, mimeType: string }[],
enableThinking: boolean = false,
responseLangMode: 'system' | 'input' = 'system'
) {
const ai = this.getClient();
let model = MODEL_CHAT_PRO;
let config: any = {};
switch (module) {
case AppModule.RESEARCH:
model = MODEL_RESEARCH;
config.tools = [{ googleSearch: {} }];
break;
case AppModule.VISION:
model = MODEL_CHAT_PRO; // For analysis
break;
case AppModule.STUDIO:
model = MODEL_CHAT_PRO; // For analysis
break;
case AppModule.AUDIO:
model = MODEL_AUDIO_TRANS; // For transcription/analysis
break;
default:
// Math, Principles, SoftEng, Graphics, Network, AI_LAB
// Use Pro for these complex domains
model = MODEL_CHAT_PRO;
if (enableThinking) {
config.thinkingConfig = { thinkingBudget: 32768 };
}
break;
}
// Construct System Instruction based on module and language
const langMap: Record<string, string> = {
'en': 'English',
'zh-CN': 'Simplified Chinese',
'zh-TW': 'Traditional Chinese',
'ja': 'Japanese'
};
let languageInstruction = '';
if (responseLangMode === 'input') {
languageInstruction = `- Detect the language of the user's input and reply exclusively in that language.`;
} else {
const targetLang = langMap[language] || 'English';
languageInstruction = `- Please provide your response in ${targetLang}.
- However, if the user explicitly asks a question in a different language, you should adapt and reply in the language of the question to ensure effective communication.`;
}
const contextMap: Record<string, string> = {
[AppModule.MATH]: 'Discrete Mathematics, Calculus, Linear Algebra, and Logic for Computer Science.',
[AppModule.THEORY]: 'Theory of Computation, Automata, Complexity Theory, and Computability.',
[AppModule.PRINCIPLES]: 'Computer Architecture, Organization, Digital Logic, and Assembly.',
[AppModule.SOFT_ENG]: 'Software Engineering, Design Patterns, Architecture, and DevOps.',
[AppModule.GRAPHICS]: 'Computer Graphics, Rendering, WebGL, and Linear Algebra for Graphics.',
[AppModule.NETWORK]: 'Computer Networks, Protocols (TCP/IP), Security, and Distributed Systems.',
[AppModule.AI_LAB]: 'Artificial Intelligence, Machine Learning, Deep Learning, and Neural Networks.',
[AppModule.RESEARCH]: 'Academic Research, Paper Search, and Citations.',
[AppModule.VISION]: 'Computer Vision, Image Analysis, and Generation.',
[AppModule.STUDIO]: 'Video Generation and Multimedia Processing.',
[AppModule.AUDIO]: 'Audio Processing, Speech Synthesis, and Transcription.',
[AppModule.SQL]: 'SQL Database Administration and Query Optimization.'
};
const domain = contextMap[module] || 'Computer Science';
const systemInstruction = `You are BitSage, an expert tutor and companion specializing in ${domain}.
Primary Goal: Help the user learn, understand, and explore concepts in this domain.
Language Preference:
${languageInstruction}
Style:
- Be precise, educational, and helpful.
- Use code blocks for code snippets.
- Use Markdown for formatting.
${enableThinking ? '- Thinking process is enabled. Use it to break down complex problems.' : ''}
`;
config.systemInstruction = systemInstruction;
// Convert generic history to SDK format
const sdkHistory = history.map(h => ({
role: h.role,
parts: h.parts
}));
const chat = ai.chats.create({
model: model,
config: config,
history: sdkHistory
});
// Prepare message content
let messageContent: any = { role: 'user', parts: [{ text: prompt }] };
if (media && media.length > 0) {
messageContent.parts = [
...media.map(m => ({ inlineData: { mimeType: m.mimeType, data: m.data } })),
{ text: prompt }
];
}
// For Streaming
// We will return the stream iterator
const resultStream = await chat.sendMessageStream({ message: messageContent });
return resultStream;
}
async generateImage(prompt: string, config: ImageConfig) {
const ai = this.getClient();
// Using generateContent for nano banana series (gemini-3-pro-image-preview)
const response = await ai.models.generateContent({
model: MODEL_IMAGE,
contents: {
parts: [{ text: prompt }]
},
config: {
imageConfig: {
aspectRatio: config.aspectRatio,
imageSize: config.size
}
}
});
// Extract image
for (const part of response.candidates?.[0]?.content?.parts || []) {
if (part.inlineData) {
return `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`;
}
}
throw new Error("No image generated");
}
async generateVideo(prompt: string, config: VeoConfig) {
const ai = this.getClient();
// Veo check for key selection (browser env only)
if (typeof window !== 'undefined' && (window as any).aistudio) {
try {
const hasKey = await (window as any).aistudio.hasSelectedApiKey();
if (!hasKey) {
await (window as any).aistudio.openSelectKey();
}
} catch (e) {
console.warn("Veo key selection check failed, proceeding with env key", e);
}
}
let operation = await ai.models.generateVideos({
model: MODEL_VIDEO,
prompt: prompt,
config: {
numberOfVideos: 1,
resolution: config.resolution,
aspectRatio: config.aspectRatio
}
});
while (!operation.done) {
await new Promise(resolve => setTimeout(resolve, 5000));
operation = await ai.operations.getVideosOperation({ operation: operation });
}
const videoUri = operation.response?.generatedVideos?.[0]?.video?.uri;
if (!videoUri) throw new Error("No video URI returned");
// Fetch the video bytes
const vidResponse = await fetch(`${videoUri}&key=${this.apiKey}`);
const blob = await vidResponse.blob();
return URL.createObjectURL(blob);
}
async transcribeAudio(base64Audio: string, mimeType: string) {
const ai = this.getClient();
const response = await ai.models.generateContent({
model: MODEL_AUDIO_TRANS,
contents: {
parts: [
{ inlineData: { mimeType: mimeType, data: base64Audio } },
{ text: "Transcribe this audio exactly." }
]
}
});
return response.text;
}
async generateSpeech(text: string, voice: string = 'Kore') {
const ai = this.getClient();
const response = await ai.models.generateContent({
model: MODEL_AUDIO_TTS,
contents: [{ parts: [{ text }] }],
config: {
responseModalities: [Modality.AUDIO],
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: { voiceName: voice },
},
},
},
});
const base64Audio = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
if (!base64Audio) throw new Error("No audio generated");
return base64Audio;
}
async toolsSql(text: string, type: 'format' | 'convert' | 'custom', target?: string, instruction?: string) {
const ai = this.getClient();
let prompt = "";
if (type === 'custom') {
if (!text.trim()) {
// Generation mode
prompt = `You are a SQL expert. The user wants you to generate SQL code based on this request: "${instruction}".\n\nReturn ONLY the generated SQL code. Do not wrap in markdown backticks unless asked for explanation.`;
} else {
// Manipulation mode
prompt = `You are a SQL expert. The user wants you to perform the following action on the SQL code provided: "${instruction}".\n\nReturn ONLY the processed SQL code (or the answer if it's an analysis). Do not wrap in markdown backticks unless asked for explanation.\n\nSQL Code:\n${text}`;
}
} else if (type === 'format') {
prompt = `You are a SQL formatter. Format the following SQL code to be readable, with proper indentation and uppercased keywords. Return ONLY the formatted SQL code, no markdown backticks.\n\n${text}`;
} else if (type === 'convert') {
prompt = `You are a SQL converter. Convert the following SQL code to ${target} dialect. Return ONLY the converted SQL code, no markdown backticks.\n\n${text}`;
}
const response = await ai.models.generateContent({
model: MODEL_CHAT_PRO,
contents: [{ parts: [{ text: prompt }] }]
});
return response.text?.trim() || "";
}
}