初始化项目

2025-11-21 00:24:10 +08:00
commit 2878783349
34 changed files with 6774 additions and 0 deletions
--- a/services/geminiService.ts
+++ b/services/geminiService.ts
@@ -0,0 +1,543 @@
+
+
+import { GoogleGenAI, Modality, Type } from "@google/genai";
+import { PronunciationFeedback, Language, ReadingLesson, ReadingDifficulty, OCRAnalysis, ListeningLesson } from "../types";
+import { base64ToUint8Array, uint8ArrayToBase64 } from "../utils/audioUtils";
+
+export const USER_API_KEY_STORAGE = 'sakura_user_api_key';
+export const USER_BASE_URL_STORAGE = 'sakura_user_base_url';
+
+// Helper to decode audio for playback
+// Updated to support raw PCM (typically returned by Gemini TTS) which browser cannot decode automatically
+export const decodeAudioData = async (
+  base64Data: string,
+  audioContext: AudioContext
+): Promise<AudioBuffer> => {
+  const binaryString = atob(base64Data);
+  const len = binaryString.length;
+  const bytes = new Uint8Array(len);
+  for (let i = 0; i < len; i++) {
+    bytes[i] = binaryString.charCodeAt(i);
+  }
+
+  try {
+    // Try standard decoding first (wav/mp3 containers)
+    // We clone the buffer because decodeAudioData detaches it
+    return await audioContext.decodeAudioData(bytes.buffer.slice(0));
+  } catch (e) {
+    // Fallback: Treat as raw PCM (16-bit, 24kHz default for Gemini TTS, or 16kHz)
+    // Assuming 24kHz Mono 16-bit Little Endian based on typical Gemini TTS raw output
+    const pcmData = new Int16Array(bytes.buffer);
+    const float32Data = new Float32Array(pcmData.length);
+    for (let i = 0; i < pcmData.length; i++) {
+        // Convert int16 to float32 (-1.0 to 1.0)
+        float32Data[i] = pcmData[i] / 32768.0;
+    }
+    
+    // Create buffer: 1 channel, length, 24000 sample rate
+    const audioBuffer = audioContext.createBuffer(1, float32Data.length, 24000);
+    audioBuffer.getChannelData(0).set(float32Data);
+    return audioBuffer;
+  }
+};
+
+// Helper to check/request Veo key
+export const ensureVeoKey = async (): Promise<void> => {
+  // @ts-ignore
+  if (window.aistudio) {
+    // @ts-ignore
+    const hasKey = await window.aistudio.hasSelectedApiKey();
+    if (!hasKey) {
+        // @ts-ignore
+      await window.aistudio.openSelectKey();
+    }
+  }
+};
+
+const LANGUAGE_MAP = {
+  en: "English",
+  ja: "Japanese",
+  zh: "Chinese (Simplified)"
+};
+
+class GeminiService {
+  private getAi() {
+    const userKey = localStorage.getItem(USER_API_KEY_STORAGE);
+    const userBaseUrl = localStorage.getItem(USER_BASE_URL_STORAGE);
+    const envKey = process.env.API_KEY;
+    const keyToUse = (userKey && userKey.trim().length > 0) ? userKey : envKey;
+
+    if (!keyToUse) {
+      console.error("API_KEY is missing.");
+      throw new Error("API Key is missing");
+    }
+    
+    const config: any = { apiKey: keyToUse };
+    if (userBaseUrl && userBaseUrl.trim().length > 0) {
+        config.baseUrl = userBaseUrl.trim();
+    }
+
+    return new GoogleGenAI(config);
+  }
+
+  private async getApiKey(): Promise<string> {
+      const userKey = localStorage.getItem(USER_API_KEY_STORAGE);
+      const envKey = process.env.API_KEY;
+      const key = (userKey && userKey.trim().length > 0) ? userKey : envKey;
+      if (!key) throw new Error("No API Key available");
+      return key;
+  }
+
+  private async retryOperation<T>(operation: () => Promise<T>, retries = 3, delay = 1000): Promise<T> {
+    try {
+      return await operation();
+    } catch (error: any) {
+      const isOverloaded = 
+        error?.status === 503 || 
+        error?.response?.status === 503 ||
+        error?.message?.includes('503') ||
+        error?.message?.includes('overloaded');
+
+      if (isOverloaded && retries > 0) {
+        console.warn(`Model overloaded (503). Retrying...`);
+        await new Promise(resolve => setTimeout(resolve, delay));
+        return this.retryOperation(operation, retries - 1, delay * 2);
+      }
+      throw error;
+    }
+  }
+
+  // 1. Text Chat Response - Returns { text, model }
+  async generateTextResponse(
+    prompt: string,
+    imageBase64?: string,
+    useThinking: boolean = false,
+    language: Language = 'en',
+    modelOverride?: string,
+    aiSpeakingLanguage: 'ja' | 'native' = 'native'
+  ): Promise<{ text: string, model: string }> {
+    const ai = this.getAi();
+    
+    let modelName = useThinking 
+        ? 'gemini-3-pro-preview' 
+        : (imageBase64 ? 'gemini-3-pro-preview' : (modelOverride || 'gemini-2.5-flash'));
+    
+    const targetLangName = LANGUAGE_MAP[language];
+    const parts: any[] = [];
+    
+    if (imageBase64) {
+      parts.push({
+        inlineData: {
+          mimeType: 'image/jpeg', 
+          data: imageBase64
+        }
+      });
+      parts.push({ text: `Analyze this image in the context of learning Japanese. Explain in ${targetLangName}: ` + prompt });
+    } else {
+      parts.push({ text: prompt });
+    }
+
+    let instruction = "";
+    if (aiSpeakingLanguage === 'ja') {
+        instruction = `You are Sakura, a Japanese language tutor. 
+        IMPORTANT: 
+        - Respond primarily in Japanese (日本語) to help the user practice immersion. 
+        - Only use ${targetLangName} for complex grammar explanations or if the user asks specifically for a translation.
+        - Keep the tone encouraging and natural.`;
+    } else {
+        instruction = `You are Sakura, a friendly, encouraging, and highly skilled Japanese language tutor. You help users learn vocabulary, grammar, listening, and speaking. You provide clear explanations, examples, and translations. 
+        IMPORTANT:
+        - You are teaching Japanese.
+        - However, the user speaks ${targetLangName}. 
+        - Provide your explanations, translations, and feedback in ${targetLangName}.`;
+    }
+
+    const config: any = {
+        systemInstruction: instruction,
+    };
+
+    if (useThinking) {
+      config.thinkingConfig = { thinkingBudget: 32768 }; 
+    }
+
+    return this.retryOperation(async () => {
+        const response = await ai.models.generateContent({
+            model: modelName,
+            contents: { parts },
+            config: config
+        });
+        return {
+            text: response.text || "I apologize, I couldn't generate a response.",
+            model: modelName
+        };
+    });
+  }
+
+  // Internal helper for single TTS chunk
+  private async _generateSpeechChunk(text: string): Promise<string | null> {
+    const ai = this.getAi();
+    return this.retryOperation(async () => {
+        try {
+            const response = await ai.models.generateContent({
+                model: 'gemini-2.5-flash-preview-tts',
+                contents: [{ parts: [{ text }] }],
+                config: {
+                    responseModalities: [Modality.AUDIO],
+                    speechConfig: {
+                        voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Kore' } },
+                    },
+                },
+            });
+            return response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data || null;
+        } catch (e) {
+            console.error("TTS Chunk Error", e);
+            return null;
+        }
+    });
+  }
+
+  async generateSpeech(text: string): Promise<string | null> {
+    if (!text || !text.trim()) return null;
+    
+    const MAX_CHUNK_LENGTH = 250; // Safe limit to prevent network timeout on long generation
+
+    // If text is short, process directly
+    if (text.length <= MAX_CHUNK_LENGTH) {
+        return this._generateSpeechChunk(text);
+    }
+
+    // Split text into chunks by sentence to avoid breaking words
+    const regex = /[^。！？.!?\n]+[。！？.!?\n]*|[^。！？.!?\n]+$/g;
+    const sentences = text.match(regex) || [text];
+    const chunks: string[] = [];
+    let currentChunk = '';
+    
+    for (const sentence of sentences) {
+        if ((currentChunk + sentence).length > MAX_CHUNK_LENGTH) {
+            if (currentChunk) chunks.push(currentChunk);
+            currentChunk = sentence;
+            // Force split if a single sentence exceeds max length
+            while (currentChunk.length > MAX_CHUNK_LENGTH) {
+                chunks.push(currentChunk.slice(0, MAX_CHUNK_LENGTH));
+                currentChunk = currentChunk.slice(MAX_CHUNK_LENGTH);
+            }
+        } else {
+            currentChunk += sentence;
+        }
+    }
+    if (currentChunk) chunks.push(currentChunk);
+
+    try {
+        // Generate chunks in parallel to speed up total time
+        // Note: Promise.all order is preserved
+        const results = await Promise.all(chunks.map(chunk => this._generateSpeechChunk(chunk)));
+        
+        // If any chunk failed, the whole audio is compromised
+        if (results.some(r => r === null)) return null;
+
+        // Convert Base64 -> Uint8Array
+        const audioSegments = results.map(r => base64ToUint8Array(r!));
+        
+        // Concatenate raw PCM data
+        const totalLength = audioSegments.reduce((acc, cur) => acc + cur.length, 0);
+        const combined = new Uint8Array(totalLength);
+        let offset = 0;
+        for (const seg of audioSegments) {
+            combined.set(seg, offset);
+            offset += seg.length;
+        }
+        
+        // Convert back to Base64 for playback/storage
+        return uint8ArrayToBase64(combined);
+    } catch (e) {
+        console.error("TTS Assembly Error", e);
+        return null;
+    }
+  }
+
+  async transcribeAudio(audioBase64: string): Promise<string> {
+    const ai = this.getAi();
+    return this.retryOperation(async () => {
+        const response = await ai.models.generateContent({
+            model: 'gemini-2.5-flash',
+            contents: {
+            parts: [
+                { inlineData: { mimeType: 'audio/wav', data: audioBase64 } },
+                { text: "Transcribe accurately." },
+            ],
+            },
+        });
+        return response.text || "";
+    });
+  }
+
+  async generateImage(prompt: string): Promise<string | null> {
+    const ai = this.getAi();
+    return this.retryOperation(async () => {
+        try {
+            const response = await ai.models.generateImages({
+                model: 'imagen-4.0-generate-001',
+                prompt: prompt + " style of a japanese textbook illustration",
+                config: { numberOfImages: 1, outputMimeType: 'image/jpeg', aspectRatio: '1:1' },
+            });
+            const bytes = response.generatedImages?.[0]?.image?.imageBytes;
+            return bytes ? `data:image/jpeg;base64,${bytes}` : null;
+        } catch (e) {
+            console.error("Image Gen Error", e);
+            return null;
+        }
+    });
+  }
+
+  async editImage(base64Original: string, prompt: string): Promise<string | null> {
+    const ai = this.getAi();
+    return this.retryOperation(async () => {
+        try {
+            const cleanBase64 = base64Original.replace(/^data:image\/(png|jpeg|jpg|webp|heic|heif);base64,/i, "");
+            const response = await ai.models.generateContent({
+                model: 'gemini-2.5-flash-image',
+                contents: {
+                    parts: [
+                        { inlineData: { data: cleanBase64, mimeType: 'image/jpeg' } },
+                        { text: prompt }
+                    ]
+                },
+                config: { responseModalities: [Modality.IMAGE] }
+            });
+            for (const part of response.candidates?.[0]?.content?.parts || []) {
+                if (part.inlineData) return `data:image/png;base64,${part.inlineData.data}`;
+            }
+            return null;
+        } catch (e) {
+            console.error("Image Edit Error", e);
+            return null;
+        }
+    });
+  }
+
+  async generateVideo(prompt: string, onStatusUpdate: (status: string) => void): Promise<string | null> {
+    await ensureVeoKey();
+    const ai = this.getAi();
+    try {
+        onStatusUpdate("Initializing Veo...");
+        let operation = await ai.models.generateVideos({
+            model: 'veo-3.1-fast-generate-preview',
+            prompt: prompt,
+            config: { numberOfVideos: 1, resolution: '720p', aspectRatio: '16:9' }
+        });
+        onStatusUpdate("Dreaming up video...");
+        while (!operation.done) {
+            await new Promise(resolve => setTimeout(resolve, 5000));
+            operation = await ai.operations.getVideosOperation({ operation: operation });
+        }
+        const videoUri = operation.response?.generatedVideos?.[0]?.video?.uri;
+        if (!videoUri) return null;
+        const apiKey = await this.getApiKey();
+        const videoRes = await fetch(`${videoUri}&key=${apiKey}`);
+        const blob = await videoRes.blob();
+        return URL.createObjectURL(blob);
+    } catch (e) {
+        console.error("Veo Error", e);
+        return null;
+    }
+  }
+
+  async analyzeSpeakingPerformance(audioBase64: string, scenarioContext: string, historyContext: string, language: Language = 'en'): Promise<PronunciationFeedback | null> {
+      const ai = this.getAi();
+      const targetLangName = LANGUAGE_MAP[language];
+      const prompt = `Roleplay: ${scenarioContext}. History: ${historyContext}. Listen, Transcribe, Reply, Evaluate (JSON). Translation/Advice in ${targetLangName}.`;
+      
+      return this.retryOperation(async () => {
+          const response = await ai.models.generateContent({
+              model: 'gemini-2.5-flash',
+              contents: {
+                parts: [{ inlineData: { mimeType: 'audio/wav', data: audioBase64 } }, { text: prompt }]
+              },
+              config: {
+                responseMimeType: "application/json",
+                responseSchema: {
+                    type: Type.OBJECT,
+                    properties: {
+                        transcription: { type: Type.STRING },
+                        response: { type: Type.STRING },
+                        translation: { type: Type.STRING },
+                        score: { type: Type.INTEGER },
+                        pronunciationIssues: { type: Type.ARRAY, items: { type: Type.STRING } },
+                        advice: { type: Type.STRING }
+                    },
+                    required: ["transcription", "response", "translation", "score", "pronunciationIssues", "advice"]
+                }
+              }
+          });
+          return response.text ? JSON.parse(response.text) : null;
+      });
+  }
+
+  async generateReadingLesson(topic: string, difficulty: ReadingDifficulty, language: Language): Promise<ReadingLesson | null> {
+      const ai = this.getAi();
+      const targetLangName = LANGUAGE_MAP[language];
+      const prompt = `Create a complete Japanese reading lesson on "${topic}", level ${difficulty}. 
+      The 'japaneseContent' MUST be a complete article or story (at least 300 characters). 
+      Output JSON with title, japaneseContent, translation (${targetLangName}), vocabulary, and grammarPoints (list of key grammar used in the text with explanations).`;
+      
+      return this.retryOperation(async () => {
+          const response = await ai.models.generateContent({
+              model: 'gemini-2.5-flash',
+              contents: { parts: [{ text: prompt }] },
+              config: {
+                  responseMimeType: "application/json",
+                  responseSchema: {
+                      type: Type.OBJECT,
+                      properties: {
+                          title: { type: Type.STRING },
+                          japaneseContent: { type: Type.STRING },
+                          translation: { type: Type.STRING },
+                          vocabulary: { type: Type.ARRAY, items: { type: Type.OBJECT, properties: { word: { type: Type.STRING }, reading: { type: Type.STRING }, meaning: { type: Type.STRING } } } },
+                          grammarPoints: { type: Type.ARRAY, items: { type: Type.OBJECT, properties: { point: { type: Type.STRING }, explanation: { type: Type.STRING } } } }
+                      },
+                      required: ["title", "japaneseContent", "translation", "vocabulary", "grammarPoints"]
+                  }
+              }
+          });
+          return response.text ? JSON.parse(response.text) : null;
+      });
+  }
+
+  async generateListeningLesson(topic: string, difficulty: ReadingDifficulty, language: Language): Promise<ListeningLesson | null> {
+    const ai = this.getAi();
+    const targetLangName = LANGUAGE_MAP[language];
+    // Prompt asks for a conversation or monologue suitable for listening practice
+    const prompt = `Create a Japanese listening practice script on "${topic}", level ${difficulty}. It should be a conversation or monologue. 
+    Output JSON with:
+    - title
+    - script (The full Japanese text of the conversation/monologue)
+    - translation (The full text in ${targetLangName})
+    - vocabulary (Key words)
+    - questions (3 multiple choice comprehension questions in ${targetLangName})
+      - Each question needs: question, options (array of 3 strings), correctIndex (0-2), explanation.
+    `;
+
+    return this.retryOperation(async () => {
+        const response = await ai.models.generateContent({
+            model: 'gemini-2.5-flash',
+            contents: { parts: [{ text: prompt }] },
+            config: {
+                responseMimeType: "application/json",
+                responseSchema: {
+                    type: Type.OBJECT,
+                    properties: {
+                        title: { type: Type.STRING },
+                        script: { type: Type.STRING },
+                        translation: { type: Type.STRING },
+                        vocabulary: { type: Type.ARRAY, items: { type: Type.OBJECT, properties: { word: { type: Type.STRING }, reading: { type: Type.STRING }, meaning: { type: Type.STRING } } } },
+                        questions: { 
+                          type: Type.ARRAY, 
+                          items: { 
+                            type: Type.OBJECT, 
+                            properties: { 
+                              id: { type: Type.STRING },
+                              question: { type: Type.STRING },
+                              options: { type: Type.ARRAY, items: { type: Type.STRING } },
+                              correctIndex: { type: Type.INTEGER },
+                              explanation: { type: Type.STRING }
+                            },
+                            required: ["question", "options", "correctIndex", "explanation"]
+                          } 
+                        }
+                    },
+                    required: ["title", "script", "translation", "vocabulary", "questions"]
+                }
+            }
+        });
+        return response.text ? JSON.parse(response.text) : null;
+    });
+  }
+
+  async generateReadingTutorResponse(question: string, lesson: ReadingLesson | ListeningLesson, history: string, language: Language): Promise<string> {
+      const ai = this.getAi();
+      // Handle both ReadingLesson (japaneseContent) and ListeningLesson (script)
+      const content = 'japaneseContent' in lesson ? lesson.japaneseContent : lesson.script;
+      const prompt = `Tutor for text "${lesson.title}". Question: "${question}". History: ${history}. Explain in ${LANGUAGE_MAP[language]}.`;
+      return this.retryOperation(async () => {
+          const res = await ai.models.generateContent({
+              model: 'gemini-2.5-flash',
+              contents: { parts: [{ text: prompt }] }
+          });
+          return res.text || "";
+      });
+  }
+
+  async translateText(text: string, target: string, source: string = "Auto"): Promise<string> {
+      const ai = this.getAi();
+      return this.retryOperation(async () => {
+          const res = await ai.models.generateContent({
+              model: 'gemini-2.5-flash',
+              contents: { parts: [{ text: `Translate the following text from ${source} to ${target}.` }, { text: text }] },
+              config: {
+                  responseMimeType: "application/json",
+                  responseSchema: {
+                      type: Type.OBJECT,
+                      properties: { translation: { type: Type.STRING } },
+                      required: ["translation"]
+                  }
+              }
+          });
+          return (res.text ? JSON.parse(res.text).translation : "") || "";
+      });
+  }
+
+  async translateImage(base64: string, target: string, source: string = "Auto"): Promise<{ original: string; translated: string } | null> {
+      const ai = this.getAi();
+      const cleanBase64 = base64.replace(/^data:image\/(png|jpeg|jpg|webp|heic|heif);base64,/i, "");
+      return this.retryOperation(async () => {
+          const res = await ai.models.generateContent({
+              model: 'gemini-2.5-flash',
+              contents: {
+                  parts: [{ inlineData: { mimeType: 'image/jpeg', data: cleanBase64 } }, { text: `Extract text (Language: ${source}) and translate to ${target}. JSON output: original, translated.` }]
+              },
+              config: {
+                  responseMimeType: "application/json",
+                  responseSchema: {
+                      type: Type.OBJECT,
+                      properties: { original: { type: Type.STRING }, translated: { type: Type.STRING } },
+                      required: ["original", "translated"]
+                  }
+              }
+          });
+          return res.text ? JSON.parse(res.text) : null;
+      });
+  }
+
+  async extractAndAnalyzeText(base64: string, language: Language): Promise<OCRAnalysis | null> {
+      const ai = this.getAi();
+      const cleanBase64 = base64.replace(/^data:image\/(png|jpeg|jpg|webp|heic|heif);base64,/i, "");
+      const targetLang = LANGUAGE_MAP[language];
+      const prompt = `OCR and analyze text. Explain in ${targetLang}. JSON: extractedText, detectedLanguage, summary, vocabulary, grammarPoints.`;
+      
+      return this.retryOperation(async () => {
+          const res = await ai.models.generateContent({
+              model: 'gemini-2.5-flash',
+              contents: {
+                  parts: [{ inlineData: { mimeType: 'image/jpeg', data: cleanBase64 } }, { text: prompt }]
+              },
+              config: {
+                  responseMimeType: "application/json",
+                  responseSchema: {
+                      type: Type.OBJECT,
+                      properties: {
+                          extractedText: { type: Type.STRING },
+                          detectedLanguage: { type: Type.STRING },
+                          summary: { type: Type.STRING },
+                          vocabulary: { type: Type.ARRAY, items: { type: Type.OBJECT, properties: { word: { type: Type.STRING }, reading: { type: Type.STRING }, meaning: { type: Type.STRING } } } },
+                          grammarPoints: { type: Type.ARRAY, items: { type: Type.OBJECT, properties: { point: { type: Type.STRING }, explanation: { type: Type.STRING } } } }
+                      },
+                      required: ["extractedText", "detectedLanguage", "summary", "vocabulary", "grammarPoints"]
+                  }
+              }
+          });
+          return res.text ? JSON.parse(res.text) : null;
+      });
+  }
+}
+
+export const geminiService = new GeminiService();