268 lines
9.6 KiB
TypeScript
268 lines
9.6 KiB
TypeScript
import { GoogleGenAI, Modality } from "@google/genai";
|
|
import { AppModule, ImageConfig, VeoConfig } from '../types';
|
|
|
|
// Models
|
|
const MODEL_CHAT_PRO = 'gemini-3-pro-preview';
|
|
const MODEL_RESEARCH = 'gemini-3-flash-preview';
|
|
const MODEL_IMAGE = 'gemini-3-pro-image-preview';
|
|
const MODEL_VIDEO = 'veo-3.1-fast-generate-preview';
|
|
const MODEL_AUDIO_TTS = 'gemini-2.5-flash-preview-tts';
|
|
const MODEL_AUDIO_TRANS = 'gemini-3-flash-preview';
|
|
|
|
export class GeminiService {
|
|
private ai: GoogleGenAI | null = null;
|
|
private apiKey: string;
|
|
|
|
constructor(apiKey: string) {
|
|
this.apiKey = apiKey;
|
|
if (apiKey) {
|
|
this.ai = new GoogleGenAI({ apiKey });
|
|
}
|
|
}
|
|
|
|
updateKey(apiKey: string) {
|
|
this.apiKey = apiKey;
|
|
this.ai = new GoogleGenAI({ apiKey });
|
|
}
|
|
|
|
private getClient() {
|
|
if (!this.ai) throw new Error("API Key not set");
|
|
return this.ai;
|
|
}
|
|
|
|
async generateText(
|
|
prompt: string,
|
|
module: AppModule,
|
|
history: {role: string, parts: any[]}[],
|
|
language: string,
|
|
media?: { data: string, mimeType: string }[],
|
|
enableThinking: boolean = false,
|
|
responseLangMode: 'system' | 'input' = 'system'
|
|
) {
|
|
const ai = this.getClient();
|
|
let model = MODEL_CHAT_PRO;
|
|
let config: any = {};
|
|
|
|
switch (module) {
|
|
case AppModule.RESEARCH:
|
|
model = MODEL_RESEARCH;
|
|
config.tools = [{ googleSearch: {} }];
|
|
break;
|
|
case AppModule.VISION:
|
|
model = MODEL_CHAT_PRO; // For analysis
|
|
break;
|
|
case AppModule.STUDIO:
|
|
model = MODEL_CHAT_PRO; // For analysis
|
|
break;
|
|
case AppModule.AUDIO:
|
|
model = MODEL_AUDIO_TRANS; // For transcription/analysis
|
|
break;
|
|
default:
|
|
// Math, Principles, SoftEng, Graphics, Network, AI_LAB
|
|
// Use Pro for these complex domains
|
|
model = MODEL_CHAT_PRO;
|
|
if (enableThinking) {
|
|
config.thinkingConfig = { thinkingBudget: 32768 };
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Construct System Instruction based on module and language
|
|
const langMap: Record<string, string> = {
|
|
'en': 'English',
|
|
'zh-CN': 'Simplified Chinese',
|
|
'zh-TW': 'Traditional Chinese',
|
|
'ja': 'Japanese'
|
|
};
|
|
|
|
let languageInstruction = '';
|
|
if (responseLangMode === 'input') {
|
|
languageInstruction = `- Detect the language of the user's input and reply exclusively in that language.`;
|
|
} else {
|
|
const targetLang = langMap[language] || 'English';
|
|
languageInstruction = `- Please provide your response in ${targetLang}.
|
|
- However, if the user explicitly asks a question in a different language, you should adapt and reply in the language of the question to ensure effective communication.`;
|
|
}
|
|
|
|
const contextMap: Record<string, string> = {
|
|
[AppModule.MATH]: 'Discrete Mathematics, Calculus, Linear Algebra, and Logic for Computer Science.',
|
|
[AppModule.THEORY]: 'Theory of Computation, Automata, Complexity Theory, and Computability.',
|
|
[AppModule.PRINCIPLES]: 'Computer Architecture, Organization, Digital Logic, and Assembly.',
|
|
[AppModule.SOFT_ENG]: 'Software Engineering, Design Patterns, Architecture, and DevOps.',
|
|
[AppModule.GRAPHICS]: 'Computer Graphics, Rendering, WebGL, and Linear Algebra for Graphics.',
|
|
[AppModule.NETWORK]: 'Computer Networks, Protocols (TCP/IP), Security, and Distributed Systems.',
|
|
[AppModule.AI_LAB]: 'Artificial Intelligence, Machine Learning, Deep Learning, and Neural Networks.',
|
|
[AppModule.RESEARCH]: 'Academic Research, Paper Search, and Citations.',
|
|
[AppModule.VISION]: 'Computer Vision, Image Analysis, and Generation.',
|
|
[AppModule.STUDIO]: 'Video Generation and Multimedia Processing.',
|
|
[AppModule.AUDIO]: 'Audio Processing, Speech Synthesis, and Transcription.',
|
|
[AppModule.SQL]: 'SQL Database Administration and Query Optimization.'
|
|
};
|
|
|
|
const domain = contextMap[module] || 'Computer Science';
|
|
|
|
const systemInstruction = `You are BitSage, an expert tutor and companion specializing in ${domain}.
|
|
|
|
Primary Goal: Help the user learn, understand, and explore concepts in this domain.
|
|
|
|
Language Preference:
|
|
${languageInstruction}
|
|
|
|
Style:
|
|
- Be precise, educational, and helpful.
|
|
- Use code blocks for code snippets.
|
|
- Use Markdown for formatting.
|
|
${enableThinking ? '- Thinking process is enabled. Use it to break down complex problems.' : ''}
|
|
`;
|
|
|
|
config.systemInstruction = systemInstruction;
|
|
|
|
// Convert generic history to SDK format
|
|
const sdkHistory = history.map(h => ({
|
|
role: h.role,
|
|
parts: h.parts
|
|
}));
|
|
|
|
const chat = ai.chats.create({
|
|
model: model,
|
|
config: config,
|
|
history: sdkHistory
|
|
});
|
|
|
|
// Prepare message content
|
|
let messageContent: any = { role: 'user', parts: [{ text: prompt }] };
|
|
if (media && media.length > 0) {
|
|
messageContent.parts = [
|
|
...media.map(m => ({ inlineData: { mimeType: m.mimeType, data: m.data } })),
|
|
{ text: prompt }
|
|
];
|
|
}
|
|
|
|
// For Streaming
|
|
// We will return the stream iterator
|
|
const resultStream = await chat.sendMessageStream({ message: messageContent });
|
|
return resultStream;
|
|
}
|
|
|
|
async generateImage(prompt: string, config: ImageConfig) {
|
|
const ai = this.getClient();
|
|
// Using generateContent for nano banana series (gemini-3-pro-image-preview)
|
|
const response = await ai.models.generateContent({
|
|
model: MODEL_IMAGE,
|
|
contents: {
|
|
parts: [{ text: prompt }]
|
|
},
|
|
config: {
|
|
imageConfig: {
|
|
aspectRatio: config.aspectRatio,
|
|
imageSize: config.size
|
|
}
|
|
}
|
|
});
|
|
|
|
// Extract image
|
|
for (const part of response.candidates?.[0]?.content?.parts || []) {
|
|
if (part.inlineData) {
|
|
return `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`;
|
|
}
|
|
}
|
|
throw new Error("No image generated");
|
|
}
|
|
|
|
async generateVideo(prompt: string, config: VeoConfig) {
|
|
const ai = this.getClient();
|
|
// Veo check for key selection (browser env only)
|
|
if (typeof window !== 'undefined' && (window as any).aistudio) {
|
|
try {
|
|
const hasKey = await (window as any).aistudio.hasSelectedApiKey();
|
|
if (!hasKey) {
|
|
await (window as any).aistudio.openSelectKey();
|
|
}
|
|
} catch (e) {
|
|
console.warn("Veo key selection check failed, proceeding with env key", e);
|
|
}
|
|
}
|
|
|
|
let operation = await ai.models.generateVideos({
|
|
model: MODEL_VIDEO,
|
|
prompt: prompt,
|
|
config: {
|
|
numberOfVideos: 1,
|
|
resolution: config.resolution,
|
|
aspectRatio: config.aspectRatio
|
|
}
|
|
});
|
|
|
|
while (!operation.done) {
|
|
await new Promise(resolve => setTimeout(resolve, 5000));
|
|
operation = await ai.operations.getVideosOperation({ operation: operation });
|
|
}
|
|
|
|
const videoUri = operation.response?.generatedVideos?.[0]?.video?.uri;
|
|
if (!videoUri) throw new Error("No video URI returned");
|
|
|
|
// Fetch the video bytes
|
|
const vidResponse = await fetch(`${videoUri}&key=${this.apiKey}`);
|
|
const blob = await vidResponse.blob();
|
|
return URL.createObjectURL(blob);
|
|
}
|
|
|
|
async transcribeAudio(base64Audio: string, mimeType: string) {
|
|
const ai = this.getClient();
|
|
const response = await ai.models.generateContent({
|
|
model: MODEL_AUDIO_TRANS,
|
|
contents: {
|
|
parts: [
|
|
{ inlineData: { mimeType: mimeType, data: base64Audio } },
|
|
{ text: "Transcribe this audio exactly." }
|
|
]
|
|
}
|
|
});
|
|
return response.text;
|
|
}
|
|
|
|
async generateSpeech(text: string, voice: string = 'Kore') {
|
|
const ai = this.getClient();
|
|
const response = await ai.models.generateContent({
|
|
model: MODEL_AUDIO_TTS,
|
|
contents: [{ parts: [{ text }] }],
|
|
config: {
|
|
responseModalities: [Modality.AUDIO],
|
|
speechConfig: {
|
|
voiceConfig: {
|
|
prebuiltVoiceConfig: { voiceName: voice },
|
|
},
|
|
},
|
|
},
|
|
});
|
|
|
|
const base64Audio = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
|
|
if (!base64Audio) throw new Error("No audio generated");
|
|
return base64Audio;
|
|
}
|
|
|
|
async toolsSql(text: string, type: 'format' | 'convert' | 'custom', target?: string, instruction?: string) {
|
|
const ai = this.getClient();
|
|
let prompt = "";
|
|
|
|
if (type === 'custom') {
|
|
if (!text.trim()) {
|
|
// Generation mode
|
|
prompt = `You are a SQL expert. The user wants you to generate SQL code based on this request: "${instruction}".\n\nReturn ONLY the generated SQL code. Do not wrap in markdown backticks unless asked for explanation.`;
|
|
} else {
|
|
// Manipulation mode
|
|
prompt = `You are a SQL expert. The user wants you to perform the following action on the SQL code provided: "${instruction}".\n\nReturn ONLY the processed SQL code (or the answer if it's an analysis). Do not wrap in markdown backticks unless asked for explanation.\n\nSQL Code:\n${text}`;
|
|
}
|
|
} else if (type === 'format') {
|
|
prompt = `You are a SQL formatter. Format the following SQL code to be readable, with proper indentation and uppercased keywords. Return ONLY the formatted SQL code, no markdown backticks.\n\n${text}`;
|
|
} else if (type === 'convert') {
|
|
prompt = `You are a SQL converter. Convert the following SQL code to ${target} dialect. Return ONLY the converted SQL code, no markdown backticks.\n\n${text}`;
|
|
}
|
|
|
|
const response = await ai.models.generateContent({
|
|
model: MODEL_CHAT_PRO,
|
|
contents: [{ parts: [{ text: prompt }] }]
|
|
});
|
|
return response.text?.trim() || "";
|
|
}
|
|
} |