ai-app-ckg/services/gemini.ts

import { GoogleGenAI, Modality } from "@google/genai";
import { AppModule, ImageConfig, VeoConfig } from '../types';

// Models
const MODEL_CHAT_PRO = 'gemini-3-pro-preview';
const MODEL_RESEARCH = 'gemini-3-flash-preview';
const MODEL_IMAGE = 'gemini-3-pro-image-preview';
const MODEL_VIDEO = 'veo-3.1-fast-generate-preview';
const MODEL_AUDIO_TTS = 'gemini-2.5-flash-preview-tts';
const MODEL_AUDIO_TRANS = 'gemini-3-flash-preview';

export class GeminiService {
  private ai: GoogleGenAI | null = null;
  private apiKey: string;

  constructor(apiKey: string) {
    this.apiKey = apiKey;
    if (apiKey) {
      this.ai = new GoogleGenAI({ apiKey });
    }
  }

  updateKey(apiKey: string) {
    this.apiKey = apiKey;
    this.ai = new GoogleGenAI({ apiKey });
  }

  private getClient() {
    if (!this.ai) throw new Error("API Key not set");
    return this.ai;
  }

  async generateText(
    prompt: string,
    module: AppModule,
    history: {role: string, parts: any[]}[],
    language: string,
    media?: { data: string, mimeType: string }[],
    enableThinking: boolean = false,
    responseLangMode: 'system' | 'input' = 'system'
  ) {
    const ai = this.getClient();
    let model = MODEL_CHAT_PRO;
    let config: any = {};

    switch (module) {
      case AppModule.RESEARCH:
        model = MODEL_RESEARCH;
        config.tools = [{ googleSearch: {} }];
        break;
      case AppModule.VISION:
        model = MODEL_CHAT_PRO; // For analysis
        break;
      case AppModule.STUDIO:
        model = MODEL_CHAT_PRO; // For analysis
        break;
      case AppModule.AUDIO:
        model = MODEL_AUDIO_TRANS; // For transcription/analysis
        break;
      default:
        // Math, Principles, SoftEng, Graphics, Network, AI_LAB
        // Use Pro for these complex domains
        model = MODEL_CHAT_PRO;
        if (enableThinking) {
           config.thinkingConfig = { thinkingBudget: 32768 };
        }
        break;
    }

    // Construct System Instruction based on module and language
    const langMap: Record<string, string> = {
      'en': 'English',
      'zh-CN': 'Simplified Chinese',
      'zh-TW': 'Traditional Chinese',
      'ja': 'Japanese'
    };

    let languageInstruction = '';
    if (responseLangMode === 'input') {
       languageInstruction = `- Detect the language of the user's input and reply exclusively in that language.`;
    } else {
       const targetLang = langMap[language] || 'English';
       languageInstruction = `- Please provide your response in ${targetLang}.
    - However, if the user explicitly asks a question in a different language, you should adapt and reply in the language of the question to ensure effective communication.`;
    }

    const contextMap: Record<string, string> = {
      [AppModule.MATH]: 'Discrete Mathematics, Calculus, Linear Algebra, and Logic for Computer Science.',
      [AppModule.THEORY]: 'Theory of Computation, Automata, Complexity Theory, and Computability.',
      [AppModule.PRINCIPLES]: 'Computer Architecture, Organization, Digital Logic, and Assembly.',
      [AppModule.SOFT_ENG]: 'Software Engineering, Design Patterns, Architecture, and DevOps.',
      [AppModule.GRAPHICS]: 'Computer Graphics, Rendering, WebGL, and Linear Algebra for Graphics.',
      [AppModule.NETWORK]: 'Computer Networks, Protocols (TCP/IP), Security, and Distributed Systems.',
      [AppModule.AI_LAB]: 'Artificial Intelligence, Machine Learning, Deep Learning, and Neural Networks.',
      [AppModule.RESEARCH]: 'Academic Research, Paper Search, and Citations.',
      [AppModule.VISION]: 'Computer Vision, Image Analysis, and Generation.',
      [AppModule.STUDIO]: 'Video Generation and Multimedia Processing.',
      [AppModule.AUDIO]: 'Audio Processing, Speech Synthesis, and Transcription.',
      [AppModule.SQL]: 'SQL Database Administration and Query Optimization.'
    };

    const domain = contextMap[module] || 'Computer Science';

    const systemInstruction = `You are BitSage, an expert tutor and companion specializing in ${domain}.

    Primary Goal: Help the user learn, understand, and explore concepts in this domain.

    Language Preference:
    ${languageInstruction}

    Style:
    - Be precise, educational, and helpful.
    - Use code blocks for code snippets.
    - Use Markdown for formatting.
    ${enableThinking ? '- Thinking process is enabled. Use it to break down complex problems.' : ''}
    `;

    config.systemInstruction = systemInstruction;

    // Convert generic history to SDK format
    const sdkHistory = history.map(h => ({
      role: h.role,
      parts: h.parts
    }));

    const chat = ai.chats.create({
      model: model,
      config: config,
      history: sdkHistory
    });

    // Prepare message content
    let messageContent: any = { role: 'user', parts: [{ text: prompt }] };
    if (media && media.length > 0) {
       messageContent.parts = [
         ...media.map(m => ({ inlineData: { mimeType: m.mimeType, data: m.data } })),
         { text: prompt }
       ];
    }

    // For Streaming
    // We will return the stream iterator
    const resultStream = await chat.sendMessageStream({ message: messageContent });
    return resultStream;
  }

  async generateImage(prompt: string, config: ImageConfig) {
    const ai = this.getClient();
    // Using generateContent for nano banana series (gemini-3-pro-image-preview)
    const response = await ai.models.generateContent({
      model: MODEL_IMAGE,
      contents: {
        parts: [{ text: prompt }]
      },
      config: {
        imageConfig: {
          aspectRatio: config.aspectRatio,
          imageSize: config.size
        }
      }
    });

    // Extract image
    for (const part of response.candidates?.[0]?.content?.parts || []) {
      if (part.inlineData) {
        return `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`;
      }
    }
    throw new Error("No image generated");
  }

  async generateVideo(prompt: string, config: VeoConfig) {
    const ai = this.getClient();
    // Veo check for key selection (browser env only)
    if (typeof window !== 'undefined' && (window as any).aistudio) {
        try {
            const hasKey = await (window as any).aistudio.hasSelectedApiKey();
            if (!hasKey) {
                 await (window as any).aistudio.openSelectKey();
            }
        } catch (e) {
            console.warn("Veo key selection check failed, proceeding with env key", e);
        }
    }

    let operation = await ai.models.generateVideos({
      model: MODEL_VIDEO,
      prompt: prompt,
      config: {
        numberOfVideos: 1,
        resolution: config.resolution,
        aspectRatio: config.aspectRatio
      }
    });

    while (!operation.done) {
      await new Promise(resolve => setTimeout(resolve, 5000));
      operation = await ai.operations.getVideosOperation({ operation: operation });
    }

    const videoUri = operation.response?.generatedVideos?.[0]?.video?.uri;
    if (!videoUri) throw new Error("No video URI returned");

    // Fetch the video bytes
    const vidResponse = await fetch(`${videoUri}&key=${this.apiKey}`);
    const blob = await vidResponse.blob();
    return URL.createObjectURL(blob);
  }

  async transcribeAudio(base64Audio: string, mimeType: string) {
    const ai = this.getClient();
    const response = await ai.models.generateContent({
        model: MODEL_AUDIO_TRANS,
        contents: {
            parts: [
                { inlineData: { mimeType: mimeType, data: base64Audio } },
                { text: "Transcribe this audio exactly." }
            ]
        }
    });
    return response.text;
  }

  async generateSpeech(text: string, voice: string = 'Kore') {
    const ai = this.getClient();
    const response = await ai.models.generateContent({
      model: MODEL_AUDIO_TTS,
      contents: [{ parts: [{ text }] }],
      config: {
        responseModalities: [Modality.AUDIO],
        speechConfig: {
          voiceConfig: {
            prebuiltVoiceConfig: { voiceName: voice },
          },
        },
      },
    });

    const base64Audio = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
    if (!base64Audio) throw new Error("No audio generated");
    return base64Audio;
  }

  async toolsSql(text: string, type: 'format' | 'convert' | 'custom', target?: string, instruction?: string) {
    const ai = this.getClient();
    let prompt = "";

    if (type === 'custom') {
        if (!text.trim()) {
            // Generation mode
            prompt = `You are a SQL expert. The user wants you to generate SQL code based on this request: "${instruction}".\n\nReturn ONLY the generated SQL code. Do not wrap in markdown backticks unless asked for explanation.`;
        } else {
            // Manipulation mode
            prompt = `You are a SQL expert. The user wants you to perform the following action on the SQL code provided: "${instruction}".\n\nReturn ONLY the processed SQL code (or the answer if it's an analysis). Do not wrap in markdown backticks unless asked for explanation.\n\nSQL Code:\n${text}`;
        }
    } else if (type === 'format') {
        prompt = `You are a SQL formatter. Format the following SQL code to be readable, with proper indentation and uppercased keywords. Return ONLY the formatted SQL code, no markdown backticks.\n\n${text}`;
    } else if (type === 'convert') {
        prompt = `You are a SQL converter. Convert the following SQL code to ${target} dialect. Return ONLY the converted SQL code, no markdown backticks.\n\n${text}`;
    }

    const response = await ai.models.generateContent({
        model: MODEL_CHAT_PRO,
        contents: [{ parts: [{ text: prompt }] }]
    });
    return response.text?.trim() || "";
  }
}