feat: 平台 AI 预算、成本熔断与全局限流 (API-AI-071)
All checks were successful
Deploy API Server / build-and-deploy (push) Successful in 45s
All checks were successful
Deploy API Server / build-and-deploy (push) Successful in 45s
- PlatformBudgetService: checkPlatformBudget / recordSuccess / recordFailure - 熔断器: closed → open (连续失败N次) → half_open (限制任务数) → closed - 平台日 token 预算 + 成本预算检查 - Admin 接口: transitionToHalfOpen / closeCircuit / getBudgetState - PlatformAiBudgetDaily 已存在于 Prisma schema Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
cc2ccbad59
commit
00ac32a103
@ -7,11 +7,12 @@ import { CredentialEncryptionService } from './credential-encryption.service';
|
||||
import { RuntimeInternalController } from './internal/runtime-internal.controller';
|
||||
import { RuntimeInternalService } from './internal/runtime-internal.service';
|
||||
import { UserAiQuotaService } from './user-ai-quota.service';
|
||||
import { PlatformBudgetService } from './platform-budget.service';
|
||||
|
||||
@Module({
|
||||
imports: [ConfigModule, PrismaModule],
|
||||
controllers: [UserAiController, RuntimeInternalController],
|
||||
providers: [UserAiService, CredentialEncryptionService, RuntimeInternalService, UserAiQuotaService],
|
||||
exports: [UserAiService, CredentialEncryptionService, RuntimeInternalService, UserAiQuotaService],
|
||||
providers: [UserAiService, CredentialEncryptionService, RuntimeInternalService, UserAiQuotaService, PlatformBudgetService],
|
||||
exports: [UserAiService, CredentialEncryptionService, RuntimeInternalService, UserAiQuotaService, PlatformBudgetService],
|
||||
})
|
||||
export class AiRuntimeModule {}
|
||||
|
||||
182
src/modules/ai-runtime/platform-budget.service.ts
Normal file
182
src/modules/ai-runtime/platform-budget.service.ts
Normal file
@ -0,0 +1,182 @@
|
||||
import { Injectable, BadRequestException } from '@nestjs/common';
|
||||
import { PrismaService } from '../../infrastructure/database/prisma.service';
|
||||
|
||||
interface BudgetConfig {
|
||||
maxDailyTokens: number;
|
||||
maxDailyCostCents: number;
|
||||
consecutiveFailureThreshold: number;
|
||||
halfOpenMaxJobs: number;
|
||||
}
|
||||
|
||||
const DEFAULT_CONFIG: BudgetConfig = {
|
||||
maxDailyTokens: 10_000_000,
|
||||
maxDailyCostCents: 50_000, // $500
|
||||
consecutiveFailureThreshold: 10,
|
||||
halfOpenMaxJobs: 2,
|
||||
};
|
||||
|
||||
@Injectable()
|
||||
export class PlatformBudgetService {
|
||||
constructor(private readonly prisma: PrismaService) {}
|
||||
|
||||
private today(): Date {
|
||||
const d = new Date();
|
||||
return new Date(Date.UTC(d.getFullYear(), d.getMonth(), d.getDate()));
|
||||
}
|
||||
|
||||
private async getOrCreateDaily(provider: string, model: string) {
|
||||
const localDate = this.today();
|
||||
let budget = await this.prisma.platformAiBudgetDaily.findUnique({
|
||||
where: { localDate_provider_model: { localDate, provider, model } },
|
||||
});
|
||||
if (!budget) {
|
||||
budget = await this.prisma.platformAiBudgetDaily.create({
|
||||
data: { localDate, provider, model },
|
||||
});
|
||||
}
|
||||
return budget;
|
||||
}
|
||||
|
||||
/** Check if platform_key job can be created. Throws if blocked. */
|
||||
async checkPlatformBudget(provider: string, model: string): Promise<void> {
|
||||
const budget = await this.getOrCreateDaily(provider, model);
|
||||
|
||||
if (budget.circuitBreakerStatus === 'open') {
|
||||
throw new BadRequestException({
|
||||
errorCode: 'PLATFORM_CIRCUIT_OPEN',
|
||||
message: `Platform circuit breaker is open. Reason: ${budget.circuitBreakerReason ?? 'failure threshold reached'}`,
|
||||
});
|
||||
}
|
||||
|
||||
if (budget.circuitBreakerStatus === 'half_open') {
|
||||
// Allow only limited jobs in half_open state
|
||||
const halfOpenJobs = await this.prisma.aiRuntimeJob.count({
|
||||
where: {
|
||||
status: { in: ['pending', 'locked', 'running'] },
|
||||
apiKeyMode: 'platform_key',
|
||||
},
|
||||
});
|
||||
if (halfOpenJobs >= DEFAULT_CONFIG.halfOpenMaxJobs) {
|
||||
throw new BadRequestException({
|
||||
errorCode: 'PLATFORM_CIRCUIT_HALF_OPEN',
|
||||
message: 'Platform circuit breaker is half-open; limited capacity reached',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (budget.totalTokens >= DEFAULT_CONFIG.maxDailyTokens) {
|
||||
throw new BadRequestException({
|
||||
errorCode: 'PLATFORM_TOKEN_BUDGET_EXCEEDED',
|
||||
message: `Daily platform token budget (${DEFAULT_CONFIG.maxDailyTokens}) exceeded`,
|
||||
});
|
||||
}
|
||||
|
||||
if (budget.costEstimate >= DEFAULT_CONFIG.maxDailyCostCents) {
|
||||
throw new BadRequestException({
|
||||
errorCode: 'PLATFORM_COST_BUDGET_EXCEEDED',
|
||||
message: `Daily platform cost budget (${DEFAULT_CONFIG.maxDailyCostCents} cents) exceeded`,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/** Record successful token usage after a platform_key invocation */
|
||||
async recordSuccess(
|
||||
provider: string, model: string,
|
||||
inputTokens: number, outputTokens: number, totalTokens: number,
|
||||
costEstimate: number,
|
||||
): Promise<void> {
|
||||
const localDate = this.today();
|
||||
await this.prisma.platformAiBudgetDaily.upsert({
|
||||
where: { localDate_provider_model: { localDate, provider, model } },
|
||||
create: {
|
||||
localDate, provider, model,
|
||||
inputTokens, outputTokens, totalTokens,
|
||||
costEstimate, jobCount: 1,
|
||||
},
|
||||
update: {
|
||||
inputTokens: { increment: inputTokens },
|
||||
outputTokens: { increment: outputTokens },
|
||||
totalTokens: { increment: totalTokens },
|
||||
costEstimate: { increment: costEstimate },
|
||||
jobCount: { increment: 1 },
|
||||
// Success resets consecutive failure count
|
||||
failedCount: 0,
|
||||
// Transition half_open → closed on success
|
||||
circuitBreakerStatus: 'closed',
|
||||
circuitBreakerReason: null,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/** Record failed platform_key invocation, potentially triggering circuit breaker */
|
||||
async recordFailure(
|
||||
provider: string, model: string,
|
||||
errorCode: string,
|
||||
): Promise<void> {
|
||||
const localDate = this.today();
|
||||
const budget = await this.getOrCreateDaily(provider, model);
|
||||
const newFailed = budget.failedCount + 1;
|
||||
|
||||
const update: any = {
|
||||
failedCount: newFailed,
|
||||
jobCount: { increment: 1 },
|
||||
};
|
||||
|
||||
if (newFailed >= DEFAULT_CONFIG.consecutiveFailureThreshold) {
|
||||
update.circuitBreakerStatus = 'open';
|
||||
update.circuitBreakerReason = `Consecutive failures reached ${newFailed}/${DEFAULT_CONFIG.consecutiveFailureThreshold}. Last error: ${errorCode}`;
|
||||
}
|
||||
|
||||
await this.prisma.platformAiBudgetDaily.upsert({
|
||||
where: { localDate_provider_model: { localDate, provider, model } },
|
||||
create: {
|
||||
localDate, provider, model,
|
||||
failedCount: 1,
|
||||
jobCount: 1,
|
||||
circuitBreakerStatus: 'closed',
|
||||
},
|
||||
update,
|
||||
});
|
||||
}
|
||||
|
||||
/** Admin: manually transition circuit breaker to half_open for testing */
|
||||
async transitionToHalfOpen(provider: string, model: string): Promise<void> {
|
||||
const localDate = this.today();
|
||||
await this.prisma.platformAiBudgetDaily.upsert({
|
||||
where: { localDate_provider_model: { localDate, provider, model } },
|
||||
create: { localDate, provider, model, circuitBreakerStatus: 'half_open' },
|
||||
update: { circuitBreakerStatus: 'half_open', circuitBreakerReason: 'Manually set to half_open by admin' },
|
||||
});
|
||||
}
|
||||
|
||||
/** Admin: manually close circuit breaker */
|
||||
async closeCircuit(provider: string, model: string): Promise<void> {
|
||||
const localDate = this.today();
|
||||
await this.prisma.platformAiBudgetDaily.upsert({
|
||||
where: { localDate_provider_model: { localDate, provider, model } },
|
||||
create: { localDate, provider, model, circuitBreakerStatus: 'closed' },
|
||||
update: { circuitBreakerStatus: 'closed', circuitBreakerReason: null, failedCount: 0 },
|
||||
});
|
||||
}
|
||||
|
||||
/** Get current platform budget state */
|
||||
async getBudgetState(provider: string, model: string) {
|
||||
const budget = await this.getOrCreateDaily(provider, model);
|
||||
return {
|
||||
provider: budget.provider,
|
||||
model: budget.model,
|
||||
localDate: budget.localDate.toISOString(),
|
||||
totalTokens: budget.totalTokens,
|
||||
costEstimateCents: budget.costEstimate,
|
||||
jobCount: budget.jobCount,
|
||||
failedCount: budget.failedCount,
|
||||
circuitBreakerStatus: budget.circuitBreakerStatus,
|
||||
circuitBreakerReason: budget.circuitBreakerReason,
|
||||
limits: {
|
||||
maxDailyTokens: DEFAULT_CONFIG.maxDailyTokens,
|
||||
maxDailyCostCents: DEFAULT_CONFIG.maxDailyCostCents,
|
||||
consecutiveFailureThreshold: DEFAULT_CONFIG.consecutiveFailureThreshold,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user