feat(coach): switch Lyra to Gemini 2.5 Flash Lite (Groq+OpenRouter quotas dead)
- Primary: gemini-2.5-flash-lite (~789ms TTFR, ~10x cheaper than Haiku, no reasoning overhead) - Fallback 1: gemini-2.5-flash (smarter when Lite overloaded) - Fallback 2: gpt-4o-mini (anchor on different provider) - message.post.ts: candidates chain replaced - sos-stream.get.ts: gemini-flash-lite default + auto-fallback to gpt-4o-mini if key missing - nitro.config.ts: geminiApiKey runtimeConfig - start-staging.sh: GEMINI_API_KEY export + NITRO_GEMINI_API_KEY OpenRouter credits = 0, Groq TPD exhausted - users get 503 currently.
This commit is contained in:
parent
487af4ede1
commit
3a4e1ecfba
@ -40,6 +40,7 @@ export default defineNitroConfig({
|
|||||||
openaiApiKey: process.env.OPENAI_API_KEY ?? process.env.NUXT_OPENAI_API_KEY ?? "",
|
openaiApiKey: process.env.OPENAI_API_KEY ?? process.env.NUXT_OPENAI_API_KEY ?? "",
|
||||||
groqApiKey: process.env.GROQ_API_KEY ?? process.env.NUXT_GROQ_API_KEY ?? "",
|
groqApiKey: process.env.GROQ_API_KEY ?? process.env.NUXT_GROQ_API_KEY ?? "",
|
||||||
googleAiApiKey: process.env.GOOGLE_AI_API_KEY ?? "",
|
googleAiApiKey: process.env.GOOGLE_AI_API_KEY ?? "",
|
||||||
|
geminiApiKey: process.env.GEMINI_API_KEY ?? "",
|
||||||
|
|
||||||
// ─── TTS-Provider ────────────────────────────────────────────────────
|
// ─── TTS-Provider ────────────────────────────────────────────────────
|
||||||
googleApiKey: process.env.GOOGLE_API_KEY ?? process.env.NUXT_GOOGLE_API_KEY ?? "",
|
googleApiKey: process.env.GOOGLE_API_KEY ?? process.env.NUXT_GOOGLE_API_KEY ?? "",
|
||||||
|
|||||||
@ -361,6 +361,14 @@ const PROVIDER_CONFIG = {
|
|||||||
url: "https://openrouter.ai/api/v1/chat/completions",
|
url: "https://openrouter.ai/api/v1/chat/completions",
|
||||||
keyName: "openrouterApiKey" as const,
|
keyName: "openrouterApiKey" as const,
|
||||||
},
|
},
|
||||||
|
gemini: {
|
||||||
|
url: "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
|
||||||
|
keyName: "geminiApiKey" as const,
|
||||||
|
},
|
||||||
|
openai: {
|
||||||
|
url: "https://api.openai.com/v1/chat/completions",
|
||||||
|
keyName: "openaiApiKey" as const,
|
||||||
|
},
|
||||||
} as const;
|
} as const;
|
||||||
|
|
||||||
const FEEDBACK_DETECTION_PROMPT = `Du analysierst eine Nutzer-Nachricht aus einer Gambling-Recovery-App.
|
const FEEDBACK_DETECTION_PROMPT = `Du analysierst eine Nutzer-Nachricht aus einer Gambling-Recovery-App.
|
||||||
@ -595,28 +603,21 @@ export default defineEventHandler(async (event) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ─── Tier-basiertes LLM-Routing (analog sos-stream.get.ts) ─────────────────
|
// ─── Tier-basiertes LLM-Routing (analog sos-stream.get.ts) ─────────────────
|
||||||
// Free / Pro → Groq Llama 3.3 70B (schnell, sachlich)
|
// Default-Chain: Gemini Flash Lite (schnell+billig+warm) → Gemini 2.5 Flash
|
||||||
// Legend → OpenRouter Haiku 4.5 (warm, premium)
|
// (smarter Fallback) → OpenAI gpt-4o-mini (Last-Resort, anderer Provider).
|
||||||
// Kein sosMode-Override mehr — Coach-Page hat eigenes Routing.
|
// OpenRouter + Groq sind aktuell ohne Quota/Credits — entfernt aus Chain.
|
||||||
const planRaw = (profile?.plan ?? "free").toLowerCase();
|
const planRaw = (profile?.plan ?? "free").toLowerCase();
|
||||||
const plan = planRaw === "premium" ? "legend" : planRaw === "standard" ? "pro" : planRaw;
|
const plan = planRaw === "premium" ? "legend" : planRaw === "standard" ? "pro" : planRaw;
|
||||||
const llmProvider = plan === "legend" ? "openrouter-haiku" : "groq-llama";
|
const llmProvider = "gemini-flash-lite";
|
||||||
|
|
||||||
type Candidate = { provider: "groq" | "openrouter"; model: string };
|
type Candidate = { provider: "groq" | "openrouter" | "gemini" | "openai"; model: string };
|
||||||
const candidates: Candidate[] =
|
const candidates: Candidate[] = [
|
||||||
llmProvider === "openrouter-haiku"
|
{ provider: "gemini", model: "gemini-2.5-flash-lite" },
|
||||||
? [
|
{ provider: "gemini", model: "gemini-2.5-flash" },
|
||||||
{ provider: "openrouter", model: "anthropic/claude-haiku-4.5" },
|
{ provider: "openai", model: "gpt-4o-mini" },
|
||||||
{ provider: "openrouter", model: "anthropic/claude-3.5-haiku" },
|
];
|
||||||
{ provider: "groq", model: "llama-3.3-70b-versatile" },
|
|
||||||
]
|
|
||||||
: [
|
|
||||||
{ provider: "groq", model: "llama-3.3-70b-versatile" },
|
|
||||||
{ provider: "groq", model: "llama-3.1-8b-instant" },
|
|
||||||
{ provider: "openrouter", model: "meta-llama/llama-3.3-70b-instruct" },
|
|
||||||
];
|
|
||||||
|
|
||||||
async function tryModel(providerName: "groq" | "openrouter", model: string) {
|
async function tryModel(providerName: "groq" | "openrouter" | "gemini" | "openai", model: string) {
|
||||||
const p = PROVIDER_CONFIG[providerName];
|
const p = PROVIDER_CONFIG[providerName];
|
||||||
const key = config[p.keyName];
|
const key = config[p.keyName];
|
||||||
if (!key) return null;
|
if (!key) return null;
|
||||||
|
|||||||
@ -219,17 +219,20 @@ export default defineEventHandler(async (event) => {
|
|||||||
if (userToggle && userToggle !== "auto") {
|
if (userToggle && userToggle !== "auto") {
|
||||||
llmProvider = userToggle;
|
llmProvider = userToggle;
|
||||||
} else {
|
} else {
|
||||||
const planRaw = (profile?.plan ?? "free").toLowerCase();
|
// Default chain: Gemini Flash Lite (schnell+billig+warm) für alle Pläne.
|
||||||
// legacy "premium"/"standard" → legend/pro
|
// OpenRouter (Haiku/Sonnet) + Groq haben aktuell keine Quota/Credits.
|
||||||
const plan = planRaw === "premium" ? "legend" : planRaw === "standard" ? "pro" : planRaw;
|
llmProvider = "gemini-flash-lite";
|
||||||
llmProvider = plan === "legend" ? "openrouter-haiku" : "groq-llama";
|
|
||||||
}
|
}
|
||||||
let upstreamUrl: string;
|
let upstreamUrl: string;
|
||||||
let upstreamKey: string | undefined;
|
let upstreamKey: string | undefined;
|
||||||
let upstreamModel: string;
|
let upstreamModel: string;
|
||||||
const upstreamHeaders: Record<string, string> = { "Content-Type": "application/json" };
|
const upstreamHeaders: Record<string, string> = { "Content-Type": "application/json" };
|
||||||
let upstreamProviderField: { sort: string } | undefined;
|
let upstreamProviderField: { sort: string } | undefined;
|
||||||
if (llmProvider === "groq-llama") {
|
if (llmProvider === "gemini-flash-lite" || llmProvider === "gemini-flash") {
|
||||||
|
upstreamUrl = "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions";
|
||||||
|
upstreamKey = config.geminiApiKey as string | undefined;
|
||||||
|
upstreamModel = llmProvider === "gemini-flash" ? "gemini-2.5-flash" : "gemini-2.5-flash-lite";
|
||||||
|
} else if (llmProvider === "groq-llama") {
|
||||||
upstreamUrl = "https://api.groq.com/openai/v1/chat/completions";
|
upstreamUrl = "https://api.groq.com/openai/v1/chat/completions";
|
||||||
upstreamKey = config.groqApiKey as string | undefined;
|
upstreamKey = config.groqApiKey as string | undefined;
|
||||||
upstreamModel = "llama-3.3-70b-versatile";
|
upstreamModel = "llama-3.3-70b-versatile";
|
||||||
@ -243,6 +246,15 @@ export default defineEventHandler(async (event) => {
|
|||||||
upstreamHeaders["X-Title"] = "ReBreak SOS";
|
upstreamHeaders["X-Title"] = "ReBreak SOS";
|
||||||
upstreamProviderField = { sort: "latency" };
|
upstreamProviderField = { sort: "latency" };
|
||||||
}
|
}
|
||||||
|
// Fallback: wenn gewählter Provider keinen Key hat → OpenAI gpt-4o-mini als Anker
|
||||||
|
if (!upstreamKey && config.openaiApiKey) {
|
||||||
|
console.warn(`[coach/sos-stream] ${llmProvider} key missing → fallback openai/gpt-4o-mini`);
|
||||||
|
llmProvider = "openai-mini";
|
||||||
|
upstreamUrl = "https://api.openai.com/v1/chat/completions";
|
||||||
|
upstreamKey = config.openaiApiKey as string;
|
||||||
|
upstreamModel = "gpt-4o-mini";
|
||||||
|
upstreamProviderField = undefined;
|
||||||
|
}
|
||||||
if (!upstreamKey) {
|
if (!upstreamKey) {
|
||||||
throw createError({ statusCode: 503, message: `API key for ${llmProvider} fehlt` });
|
throw createError({ statusCode: 503, message: `API key for ${llmProvider} fehlt` });
|
||||||
}
|
}
|
||||||
|
|||||||
@ -44,6 +44,7 @@ exec infisical run \
|
|||||||
export OPENROUTER_API_KEY="${OPENROUTER_API_KEY:-${NUXT_OPENROUTER_API_KEY:-}}"
|
export OPENROUTER_API_KEY="${OPENROUTER_API_KEY:-${NUXT_OPENROUTER_API_KEY:-}}"
|
||||||
export GROQ_API_KEY="${GROQ_API_KEY:-${NUXT_GROQ_API_KEY:-}}"
|
export GROQ_API_KEY="${GROQ_API_KEY:-${NUXT_GROQ_API_KEY:-}}"
|
||||||
export GOOGLE_API_KEY="${GOOGLE_API_KEY:-${NUXT_GOOGLE_API_KEY:-}}"
|
export GOOGLE_API_KEY="${GOOGLE_API_KEY:-${NUXT_GOOGLE_API_KEY:-}}"
|
||||||
|
export GEMINI_API_KEY="${GEMINI_API_KEY:-}"
|
||||||
export DEEPGRAM_API_KEY="${DEEPGRAM_API_KEY:-${NUXT_DEEPGRAM_API_KEY:-}}"
|
export DEEPGRAM_API_KEY="${DEEPGRAM_API_KEY:-${NUXT_DEEPGRAM_API_KEY:-}}"
|
||||||
export DATABASE_URL="${DATABASE_URL:-${NUXT_DATABASE_URL:-}}"
|
export DATABASE_URL="${DATABASE_URL:-${NUXT_DATABASE_URL:-}}"
|
||||||
export LYRA_BOT_USER_ID="${LYRA_BOT_USER_ID:-${NUXT_LYRA_BOT_USER_ID:-}}"
|
export LYRA_BOT_USER_ID="${LYRA_BOT_USER_ID:-${NUXT_LYRA_BOT_USER_ID:-}}"
|
||||||
@ -59,6 +60,7 @@ exec infisical run \
|
|||||||
[[ -n "${GROQ_API_KEY:-}" ]] && export NITRO_GROQ_API_KEY="$GROQ_API_KEY"
|
[[ -n "${GROQ_API_KEY:-}" ]] && export NITRO_GROQ_API_KEY="$GROQ_API_KEY"
|
||||||
[[ -n "${GOOGLE_AI_API_KEY:-}" ]] && export NITRO_GOOGLE_AI_API_KEY="$GOOGLE_AI_API_KEY"
|
[[ -n "${GOOGLE_AI_API_KEY:-}" ]] && export NITRO_GOOGLE_AI_API_KEY="$GOOGLE_AI_API_KEY"
|
||||||
[[ -n "${GOOGLE_API_KEY:-}" ]] && export NITRO_GOOGLE_API_KEY="$GOOGLE_API_KEY"
|
[[ -n "${GOOGLE_API_KEY:-}" ]] && export NITRO_GOOGLE_API_KEY="$GOOGLE_API_KEY"
|
||||||
|
[[ -n "${GEMINI_API_KEY:-}" ]] && export NITRO_GEMINI_API_KEY="$GEMINI_API_KEY"
|
||||||
[[ -n "${DEEPGRAM_API_KEY:-}" ]] && export NITRO_DEEPGRAM_API_KEY="$DEEPGRAM_API_KEY"
|
[[ -n "${DEEPGRAM_API_KEY:-}" ]] && export NITRO_DEEPGRAM_API_KEY="$DEEPGRAM_API_KEY"
|
||||||
[[ -n "${CARTESIA_API_KEY:-}" ]] && export NITRO_CARTESIA_API_KEY="$CARTESIA_API_KEY"
|
[[ -n "${CARTESIA_API_KEY:-}" ]] && export NITRO_CARTESIA_API_KEY="$CARTESIA_API_KEY"
|
||||||
[[ -n "${ELEVENLABS_API_KEY:-}" ]] && export NITRO_ELEVENLABS_API_KEY="$ELEVENLABS_API_KEY"
|
[[ -n "${ELEVENLABS_API_KEY:-}" ]] && export NITRO_ELEVENLABS_API_KEY="$ELEVENLABS_API_KEY"
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user