feat: 平台 AI 预算、成本熔断与全局限流 (API-AI-071)
All checks were successful
Deploy API Server / build-and-deploy (push) Successful in 45s
All checks were successful
Deploy API Server / build-and-deploy (push) Successful in 45s
- PlatformBudgetService: checkPlatformBudget / recordSuccess / recordFailure - 熔断器: closed → open (连续失败N次) → half_open (限制任务数) → closed - 平台日 token 预算 + 成本预算检查 - Admin 接口: transitionToHalfOpen / closeCircuit / getBudgetState - PlatformAiBudgetDaily 已存在于 Prisma schema Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
cc2ccbad59
commit
00ac32a103
@ -7,11 +7,12 @@ import { CredentialEncryptionService } from './credential-encryption.service';
|
|||||||
import { RuntimeInternalController } from './internal/runtime-internal.controller';
|
import { RuntimeInternalController } from './internal/runtime-internal.controller';
|
||||||
import { RuntimeInternalService } from './internal/runtime-internal.service';
|
import { RuntimeInternalService } from './internal/runtime-internal.service';
|
||||||
import { UserAiQuotaService } from './user-ai-quota.service';
|
import { UserAiQuotaService } from './user-ai-quota.service';
|
||||||
|
import { PlatformBudgetService } from './platform-budget.service';
|
||||||
|
|
||||||
@Module({
|
@Module({
|
||||||
imports: [ConfigModule, PrismaModule],
|
imports: [ConfigModule, PrismaModule],
|
||||||
controllers: [UserAiController, RuntimeInternalController],
|
controllers: [UserAiController, RuntimeInternalController],
|
||||||
providers: [UserAiService, CredentialEncryptionService, RuntimeInternalService, UserAiQuotaService],
|
providers: [UserAiService, CredentialEncryptionService, RuntimeInternalService, UserAiQuotaService, PlatformBudgetService],
|
||||||
exports: [UserAiService, CredentialEncryptionService, RuntimeInternalService, UserAiQuotaService],
|
exports: [UserAiService, CredentialEncryptionService, RuntimeInternalService, UserAiQuotaService, PlatformBudgetService],
|
||||||
})
|
})
|
||||||
export class AiRuntimeModule {}
|
export class AiRuntimeModule {}
|
||||||
|
|||||||
182
src/modules/ai-runtime/platform-budget.service.ts
Normal file
182
src/modules/ai-runtime/platform-budget.service.ts
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
import { Injectable, BadRequestException } from '@nestjs/common';
|
||||||
|
import { PrismaService } from '../../infrastructure/database/prisma.service';
|
||||||
|
|
||||||
|
interface BudgetConfig {
|
||||||
|
maxDailyTokens: number;
|
||||||
|
maxDailyCostCents: number;
|
||||||
|
consecutiveFailureThreshold: number;
|
||||||
|
halfOpenMaxJobs: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
const DEFAULT_CONFIG: BudgetConfig = {
|
||||||
|
maxDailyTokens: 10_000_000,
|
||||||
|
maxDailyCostCents: 50_000, // $500
|
||||||
|
consecutiveFailureThreshold: 10,
|
||||||
|
halfOpenMaxJobs: 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class PlatformBudgetService {
|
||||||
|
constructor(private readonly prisma: PrismaService) {}
|
||||||
|
|
||||||
|
private today(): Date {
|
||||||
|
const d = new Date();
|
||||||
|
return new Date(Date.UTC(d.getFullYear(), d.getMonth(), d.getDate()));
|
||||||
|
}
|
||||||
|
|
||||||
|
private async getOrCreateDaily(provider: string, model: string) {
|
||||||
|
const localDate = this.today();
|
||||||
|
let budget = await this.prisma.platformAiBudgetDaily.findUnique({
|
||||||
|
where: { localDate_provider_model: { localDate, provider, model } },
|
||||||
|
});
|
||||||
|
if (!budget) {
|
||||||
|
budget = await this.prisma.platformAiBudgetDaily.create({
|
||||||
|
data: { localDate, provider, model },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return budget;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Check if platform_key job can be created. Throws if blocked. */
|
||||||
|
async checkPlatformBudget(provider: string, model: string): Promise<void> {
|
||||||
|
const budget = await this.getOrCreateDaily(provider, model);
|
||||||
|
|
||||||
|
if (budget.circuitBreakerStatus === 'open') {
|
||||||
|
throw new BadRequestException({
|
||||||
|
errorCode: 'PLATFORM_CIRCUIT_OPEN',
|
||||||
|
message: `Platform circuit breaker is open. Reason: ${budget.circuitBreakerReason ?? 'failure threshold reached'}`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (budget.circuitBreakerStatus === 'half_open') {
|
||||||
|
// Allow only limited jobs in half_open state
|
||||||
|
const halfOpenJobs = await this.prisma.aiRuntimeJob.count({
|
||||||
|
where: {
|
||||||
|
status: { in: ['pending', 'locked', 'running'] },
|
||||||
|
apiKeyMode: 'platform_key',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
if (halfOpenJobs >= DEFAULT_CONFIG.halfOpenMaxJobs) {
|
||||||
|
throw new BadRequestException({
|
||||||
|
errorCode: 'PLATFORM_CIRCUIT_HALF_OPEN',
|
||||||
|
message: 'Platform circuit breaker is half-open; limited capacity reached',
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (budget.totalTokens >= DEFAULT_CONFIG.maxDailyTokens) {
|
||||||
|
throw new BadRequestException({
|
||||||
|
errorCode: 'PLATFORM_TOKEN_BUDGET_EXCEEDED',
|
||||||
|
message: `Daily platform token budget (${DEFAULT_CONFIG.maxDailyTokens}) exceeded`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (budget.costEstimate >= DEFAULT_CONFIG.maxDailyCostCents) {
|
||||||
|
throw new BadRequestException({
|
||||||
|
errorCode: 'PLATFORM_COST_BUDGET_EXCEEDED',
|
||||||
|
message: `Daily platform cost budget (${DEFAULT_CONFIG.maxDailyCostCents} cents) exceeded`,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Record successful token usage after a platform_key invocation */
|
||||||
|
async recordSuccess(
|
||||||
|
provider: string, model: string,
|
||||||
|
inputTokens: number, outputTokens: number, totalTokens: number,
|
||||||
|
costEstimate: number,
|
||||||
|
): Promise<void> {
|
||||||
|
const localDate = this.today();
|
||||||
|
await this.prisma.platformAiBudgetDaily.upsert({
|
||||||
|
where: { localDate_provider_model: { localDate, provider, model } },
|
||||||
|
create: {
|
||||||
|
localDate, provider, model,
|
||||||
|
inputTokens, outputTokens, totalTokens,
|
||||||
|
costEstimate, jobCount: 1,
|
||||||
|
},
|
||||||
|
update: {
|
||||||
|
inputTokens: { increment: inputTokens },
|
||||||
|
outputTokens: { increment: outputTokens },
|
||||||
|
totalTokens: { increment: totalTokens },
|
||||||
|
costEstimate: { increment: costEstimate },
|
||||||
|
jobCount: { increment: 1 },
|
||||||
|
// Success resets consecutive failure count
|
||||||
|
failedCount: 0,
|
||||||
|
// Transition half_open → closed on success
|
||||||
|
circuitBreakerStatus: 'closed',
|
||||||
|
circuitBreakerReason: null,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Record failed platform_key invocation, potentially triggering circuit breaker */
|
||||||
|
async recordFailure(
|
||||||
|
provider: string, model: string,
|
||||||
|
errorCode: string,
|
||||||
|
): Promise<void> {
|
||||||
|
const localDate = this.today();
|
||||||
|
const budget = await this.getOrCreateDaily(provider, model);
|
||||||
|
const newFailed = budget.failedCount + 1;
|
||||||
|
|
||||||
|
const update: any = {
|
||||||
|
failedCount: newFailed,
|
||||||
|
jobCount: { increment: 1 },
|
||||||
|
};
|
||||||
|
|
||||||
|
if (newFailed >= DEFAULT_CONFIG.consecutiveFailureThreshold) {
|
||||||
|
update.circuitBreakerStatus = 'open';
|
||||||
|
update.circuitBreakerReason = `Consecutive failures reached ${newFailed}/${DEFAULT_CONFIG.consecutiveFailureThreshold}. Last error: ${errorCode}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.prisma.platformAiBudgetDaily.upsert({
|
||||||
|
where: { localDate_provider_model: { localDate, provider, model } },
|
||||||
|
create: {
|
||||||
|
localDate, provider, model,
|
||||||
|
failedCount: 1,
|
||||||
|
jobCount: 1,
|
||||||
|
circuitBreakerStatus: 'closed',
|
||||||
|
},
|
||||||
|
update,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Admin: manually transition circuit breaker to half_open for testing */
|
||||||
|
async transitionToHalfOpen(provider: string, model: string): Promise<void> {
|
||||||
|
const localDate = this.today();
|
||||||
|
await this.prisma.platformAiBudgetDaily.upsert({
|
||||||
|
where: { localDate_provider_model: { localDate, provider, model } },
|
||||||
|
create: { localDate, provider, model, circuitBreakerStatus: 'half_open' },
|
||||||
|
update: { circuitBreakerStatus: 'half_open', circuitBreakerReason: 'Manually set to half_open by admin' },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Admin: manually close circuit breaker */
|
||||||
|
async closeCircuit(provider: string, model: string): Promise<void> {
|
||||||
|
const localDate = this.today();
|
||||||
|
await this.prisma.platformAiBudgetDaily.upsert({
|
||||||
|
where: { localDate_provider_model: { localDate, provider, model } },
|
||||||
|
create: { localDate, provider, model, circuitBreakerStatus: 'closed' },
|
||||||
|
update: { circuitBreakerStatus: 'closed', circuitBreakerReason: null, failedCount: 0 },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get current platform budget state */
|
||||||
|
async getBudgetState(provider: string, model: string) {
|
||||||
|
const budget = await this.getOrCreateDaily(provider, model);
|
||||||
|
return {
|
||||||
|
provider: budget.provider,
|
||||||
|
model: budget.model,
|
||||||
|
localDate: budget.localDate.toISOString(),
|
||||||
|
totalTokens: budget.totalTokens,
|
||||||
|
costEstimateCents: budget.costEstimate,
|
||||||
|
jobCount: budget.jobCount,
|
||||||
|
failedCount: budget.failedCount,
|
||||||
|
circuitBreakerStatus: budget.circuitBreakerStatus,
|
||||||
|
circuitBreakerReason: budget.circuitBreakerReason,
|
||||||
|
limits: {
|
||||||
|
maxDailyTokens: DEFAULT_CONFIG.maxDailyTokens,
|
||||||
|
maxDailyCostCents: DEFAULT_CONFIG.maxDailyCostCents,
|
||||||
|
consecutiveFailureThreshold: DEFAULT_CONFIG.consecutiveFailureThreshold,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user