ai-app-ckg/services/gemini.ts

import { GoogleGenAI, Modality } from "@google/genai";
import { AppModule, ImageConfig, VeoConfig } from '../types';

// Models
const MODEL_CHAT_PRO = 'gemini-3-pro-preview';
const MODEL_RESEARCH = 'gemini-3-flash-preview';
const MODEL_IMAGE = 'gemini-3-pro-image-preview';
const MODEL_VIDEO = 'veo-3.1-fast-generate-preview';
const MODEL_AUDIO_TTS = 'gemini-2.5-flash-preview-tts';
const MODEL_AUDIO_TRANS = 'gemini-3-flash-preview';

export class GeminiService {
  private ai: GoogleGenAI | null = null;
  private apiKey: string;

  constructor(apiKey: string) {
    this.apiKey = apiKey;
    if (apiKey) {
      this.ai = new GoogleGenAI({ apiKey });
    }
  }

  updateKey(apiKey: string) {
    this.apiKey = apiKey;
    this.ai = new GoogleGenAI({ apiKey });
  }

  private getClient() {
    if (!this.ai) throw new Error("API Key not set");
    return this.ai;
  }

  async generateText(
    prompt: string,
    module: AppModule,
    history: {role: string, parts: any[]}[],
    media?: { data: string, mimeType: string }[]
  ) {
    const ai = this.getClient();
    let model = MODEL_CHAT_PRO;
    let config: any = {};

    switch (module) {
      case AppModule.TUTOR:
        // Use fast model for simple queries if possible, but user wants options.
        // We default to Pro for quality in Tutor, but could swap.
        // Requirement says: "Use Pro for complex tasks and Flash or Flash-Lite for tasks that should happen fast."
        // We'll stick to Pro for general "Tutor" advice as it implies teaching.
        model = MODEL_CHAT_PRO;
        break;
      case AppModule.THINKER:
        model = MODEL_CHAT_PRO;
        config.thinkingConfig = { thinkingBudget: 32768 };
        // config.maxOutputTokens should NOT be set when using max thinking budget if not careful,
        // but recommendation says: "Avoid setting this if not required".
        break;
      case AppModule.RESEARCH:
        model = MODEL_RESEARCH;
        config.tools = [{ googleSearch: {} }];
        break;
      case AppModule.VISION:
        model = MODEL_CHAT_PRO; // For analysis
        break;
      case AppModule.STUDIO:
        model = MODEL_CHAT_PRO; // For analysis
        break;
      case AppModule.AUDIO:
        model = MODEL_AUDIO_TRANS; // For transcription/analysis
        break;
    }

    // Build contents
    // Chat history + new prompt
    // Note: @google/genai chat history format differs slightly from simple array.
    // For simplicity in this single-file service, we'll use `generateContent` with a constructed history
    // OR just use `chats.create`. `chats.create` is better for history.

    // Convert generic history to SDK format
    // The SDK `sendMessage` handles the current turn.
    // We need to initialize history first.

    const sdkHistory = history.map(h => ({
      role: h.role,
      parts: h.parts
    }));

    const chat = ai.chats.create({
      model: model,
      config: config,
      history: sdkHistory
    });

    // Prepare message content
    let messageContent: any = { role: 'user', parts: [{ text: prompt }] };
    if (media && media.length > 0) {
       messageContent.parts = [
         ...media.map(m => ({ inlineData: { mimeType: m.mimeType, data: m.data } })),
         { text: prompt }
       ];
    }

    // For Streaming
    // We will return the stream iterator
    const resultStream = await chat.sendMessageStream({ message: messageContent });
    return resultStream;
  }

  async generateImage(prompt: string, config: ImageConfig) {
    const ai = this.getClient();
    // Using generateContent for nano banana series (gemini-3-pro-image-preview)
    const response = await ai.models.generateContent({
      model: MODEL_IMAGE,
      contents: {
        parts: [{ text: prompt }]
      },
      config: {
        imageConfig: {
          aspectRatio: config.aspectRatio,
          imageSize: config.size
        }
      }
    });

    // Extract image
    for (const part of response.candidates?.[0]?.content?.parts || []) {
      if (part.inlineData) {
        return `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`;
      }
    }
    throw new Error("No image generated");
  }

  async generateVideo(prompt: string, config: VeoConfig) {
    const ai = this.getClient();
    // Veo check for key selection (browser env only)
    if (typeof window !== 'undefined' && (window as any).aistudio) {
        try {
            const hasKey = await (window as any).aistudio.hasSelectedApiKey();
            if (!hasKey) {
                 await (window as any).aistudio.openSelectKey();
            }
        } catch (e) {
            console.warn("Veo key selection check failed, proceeding with env key", e);
        }
    }

    // Creating a NEW instance for Veo calls to ensure latest key if using the selection dialog flow?
    // The prompt says "Create a new GoogleGenAI instance right before making an API call...".
    // Since we are using our own stored key primarily, we stick to `this.ai`.
    // If the user used the dialog, that key isn't automatically in our `this.apiKey`.
    // We will assume `this.apiKey` (user entered) is the paid key required.

    let operation = await ai.models.generateVideos({
      model: MODEL_VIDEO,
      prompt: prompt,
      config: {
        numberOfVideos: 1,
        resolution: config.resolution,
        aspectRatio: config.aspectRatio
      }
    });

    while (!operation.done) {
      await new Promise(resolve => setTimeout(resolve, 5000));
      operation = await ai.operations.getVideosOperation({ operation: operation });
    }

    const videoUri = operation.response?.generatedVideos?.[0]?.video?.uri;
    if (!videoUri) throw new Error("No video URI returned");

    // Fetch the video bytes
    const vidResponse = await fetch(`${videoUri}&key=${this.apiKey}`);
    const blob = await vidResponse.blob();
    return URL.createObjectURL(blob);
  }

  async transcribeAudio(base64Audio: string, mimeType: string) {
    const ai = this.getClient();
    const response = await ai.models.generateContent({
        model: MODEL_AUDIO_TRANS,
        contents: {
            parts: [
                { inlineData: { mimeType: mimeType, data: base64Audio } },
                { text: "Transcribe this audio exactly." }
            ]
        }
    });
    return response.text;
  }

  async generateSpeech(text: string, voice: string = 'Kore') {
    const ai = this.getClient();
    const response = await ai.models.generateContent({
      model: MODEL_AUDIO_TTS,
      contents: [{ parts: [{ text }] }],
      config: {
        responseModalities: [Modality.AUDIO],
        speechConfig: {
          voiceConfig: {
            prebuiltVoiceConfig: { voiceName: voice },
          },
        },
      },
    });

    const base64Audio = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
    if (!base64Audio) throw new Error("No audio generated");
    return base64Audio;
  }
}