L2βL5 Contract: Orchestration β Evaluation β
Canonical specification for communication between Orchestration Layer (L2) and Evaluation Layer (L5).
Overview β
Purpose: Enable Orchestration (L2) to submit completion results to Evaluation (L5) for quality assessment, benchmarking, and feedback collection.
Direction: Bidirectional
- L2 β L5: Task completion results from Orchestration to Evaluation
- L5 β L2: Quality scores, benchmarks, and feedback from Evaluation to Orchestration
Transport: HTTP REST API with JSON payloads
Request Contract: L2 β L5 β
evaluation_request.v0 β
json
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://mnemoverse.dev/schemas/evaluation_request.v0.json",
"title": "evaluation_request.v0",
"type": "object",
"required": ["request_id", "task_id", "intent", "completion", "metrics", "deadline_ms"],
"properties": {
"request_id": { "type": "string" },
"task_id": { "type": "string" },
"intent": { "type": "string", "minLength": 1 },
"completion": {
"type": "object",
"required": ["fragments", "total_tokens", "latency_ms"],
"properties": {
"fragments": {
"type": "array",
"items": {
"type": "object",
"required": ["id", "text", "lod", "cost_tokens"],
"properties": {
"id": { "type": "string" },
"text": { "type": "string" },
"lod": { "type": "string", "enum": ["macro", "micro", "atomic"] },
"entities": { "type": "array", "items": { "type": "string" } },
"cost_tokens": { "type": "integer", "minimum": 0 },
"source": { "type": "string", "enum": ["L1", "L4", "cached"] }
},
"additionalProperties": false
}
},
"total_tokens": { "type": "integer", "minimum": 0 },
"latency_ms": { "type": "integer", "minimum": 0 }
},
"additionalProperties": false
},
"metrics": {
"type": "object",
"required": ["coverage_entities", "quality_threshold"],
"properties": {
"coverage_entities": { "type": "number", "minimum": 0.0, "maximum": 1.0 },
"quality_threshold": { "type": "number", "minimum": 0.0, "maximum": 1.0 },
"provider_latencies": {
"type": "object",
"properties": {
"L1_ms": { "type": "integer", "minimum": 0 },
"L4_ms": { "type": "integer", "minimum": 0 }
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"deadline_ms": { "type": "integer", "minimum": 100 },
"evaluation_type": {
"type": "string",
"enum": ["quality_score", "benchmark", "feedback", "all"],
"default": "all"
},
"context": {
"type": "object",
"properties": {
"user_id": { "type": "string" },
"session_id": { "type": "string" },
"domain": { "type": "string", "enum": ["code", "documentation", "research", "general"] },
"budget_used": {
"type": "object",
"properties": {
"tokens_max": { "type": "integer", "minimum": 0 },
"time_ms": { "type": "integer", "minimum": 0 }
},
"additionalProperties": false
}
},
"additionalProperties": false
}
},
"additionalProperties": false
}
Request Examples β
Quality Score Evaluation:
json
{
"request_id": "req_eval_001",
"task_id": "task_550e8400-e29b-41d4-a716-446655440000",
"intent": "Find authentication issues in React app and suggest debugging steps",
"completion": {
"fragments": [
{
"id": "frag_auth_issue_001",
"text": "Authentication middleware in Express.js verifies JWT tokens using jsonwebtoken library...",
"lod": "macro",
"entities": ["jwt", "middleware", "authentication", "express"],
"cost_tokens": 150,
"source": "L1"
},
{
"id": "frag_debug_steps_002",
"text": "To debug JWT issues: 1. Check token expiration using jwt.decode(), 2. Verify secret key matches...",
"lod": "micro",
"entities": ["debugging", "jwt", "troubleshooting"],
"cost_tokens": 200,
"source": "L4"
}
],
"total_tokens": 350,
"latency_ms": 1250
},
"metrics": {
"coverage_entities": 0.85,
"quality_threshold": 0.8,
"provider_latencies": {
"L1_ms": 800,
"L4_ms": 450
}
},
"deadline_ms": 3000,
"evaluation_type": "quality_score",
"context": {
"user_id": "user_123",
"session_id": "session_456",
"domain": "code",
"budget_used": {
"tokens_max": 2000,
"time_ms": 2500
}
}
}
Benchmark Evaluation:
json
{
"request_id": "req_eval_002",
"task_id": "task_benchmark_001",
"intent": "Explain transformer architecture components",
"completion": {
"fragments": [
{
"id": "frag_transformer_001",
"text": "Transformer architecture consists of encoder-decoder blocks with multi-head attention mechanisms...",
"lod": "macro",
"entities": ["transformer", "attention", "encoder", "decoder"],
"cost_tokens": 300,
"source": "L1"
}
],
"total_tokens": 300,
"latency_ms": 900
},
"metrics": {
"coverage_entities": 0.92,
"quality_threshold": 0.85
},
"deadline_ms": 2000,
"evaluation_type": "benchmark",
"context": {
"domain": "research"
}
}
Response Contract: L5 β L2 β
evaluation_response.v0 β
json
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "https://mnemoverse.dev/schemas/evaluation_response.v0.json",
"title": "evaluation_response.v0",
"type": "object",
"required": ["request_id", "task_id", "results", "metadata"],
"properties": {
"request_id": { "type": "string" },
"task_id": { "type": "string" },
"results": {
"type": "object",
"properties": {
"quality_score": {
"type": "object",
"properties": {
"overall": { "type": "number", "minimum": 0.0, "maximum": 1.0 },
"dimensions": {
"type": "object",
"properties": {
"relevance": { "type": "number", "minimum": 0.0, "maximum": 1.0 },
"completeness": { "type": "number", "minimum": 0.0, "maximum": 1.0 },
"accuracy": { "type": "number", "minimum": 0.0, "maximum": 1.0 },
"coherence": { "type": "number", "minimum": 0.0, "maximum": 1.0 }
},
"additionalProperties": false
},
"confidence": { "type": "number", "minimum": 0.0, "maximum": 1.0 }
},
"additionalProperties": false
},
"benchmark": {
"type": "object",
"properties": {
"dataset": { "type": "string" },
"score": { "type": "number", "minimum": 0.0, "maximum": 1.0 },
"percentile": { "type": "number", "minimum": 0.0, "maximum": 100.0 },
"comparison": {
"type": "object",
"properties": {
"baseline": { "type": "number" },
"improvement": { "type": "number" }
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"feedback": {
"type": "object",
"properties": {
"strengths": { "type": "array", "items": { "type": "string" } },
"improvements": { "type": "array", "items": { "type": "string" } },
"recommendations": {
"type": "array",
"items": {
"type": "object",
"required": ["action", "priority"],
"properties": {
"action": { "type": "string" },
"priority": { "type": "string", "enum": ["low", "medium", "high"] },
"rationale": { "type": "string" }
},
"additionalProperties": false
}
}
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"metadata": {
"type": "object",
"required": ["evaluation_latency_ms", "evaluator_version"],
"properties": {
"evaluation_latency_ms": { "type": "integer", "minimum": 0 },
"evaluator_version": { "type": "string" },
"cost_breakdown": {
"type": "object",
"properties": {
"evaluation_tokens": { "type": "integer", "minimum": 0 },
"benchmark_queries": { "type": "integer", "minimum": 0 },
"total_cost_cents": { "type": "number", "minimum": 0 }
},
"additionalProperties": false
},
"cache_status": { "type": "string", "enum": ["hit", "miss", "partial"] }
},
"additionalProperties": false
},
"warnings": {
"type": "array",
"items": {
"type": "object",
"required": ["code", "message"],
"properties": {
"code": { "type": "string", "enum": ["LOW_CONFIDENCE", "PARTIAL_EVALUATION", "TIMEOUT_RISK", "BENCHMARK_UNAVAILABLE"] },
"message": { "type": "string" },
"impact": { "type": "string", "enum": ["low", "medium", "high"] }
},
"additionalProperties": false
}
},
"error": {
"type": "object",
"required": ["code", "message", "retriable"],
"properties": {
"code": { "type": "string", "enum": ["INVALID_COMPLETION", "TIMEOUT", "SYSTEM_ERROR", "CAPACITY_EXCEEDED"] },
"message": { "type": "string" },
"retriable": { "type": "boolean" },
"retry_after_ms": { "type": "integer", "minimum": 0 }
},
"additionalProperties": false
}
},
"additionalProperties": false
}
Response Examples β
Quality Score Response:
json
{
"request_id": "req_eval_001",
"task_id": "task_550e8400-e29b-41d4-a716-446655440000",
"results": {
"quality_score": {
"overall": 0.87,
"dimensions": {
"relevance": 0.92,
"completeness": 0.84,
"accuracy": 0.89,
"coherence": 0.83
},
"confidence": 0.91
},
"feedback": {
"strengths": [
"Comprehensive coverage of JWT authentication concepts",
"Practical debugging steps with specific code examples",
"Good balance of theoretical and implementation details"
],
"improvements": [
"Could include more error handling edge cases",
"Missing discussion of JWT security best practices"
],
"recommendations": [
{
"action": "Add section on JWT expiration handling",
"priority": "medium",
"rationale": "Common source of authentication bugs"
},
{
"action": "Include CSRF protection considerations",
"priority": "high",
"rationale": "Critical security aspect often overlooked"
}
]
}
},
"metadata": {
"evaluation_latency_ms": 1500,
"evaluator_version": "v0.1.2",
"cost_breakdown": {
"evaluation_tokens": 450,
"benchmark_queries": 0,
"total_cost_cents": 2.25
},
"cache_status": "miss"
}
}
Benchmark Response:
json
{
"request_id": "req_eval_002",
"task_id": "task_benchmark_001",
"results": {
"benchmark": {
"dataset": "transformer_explanations_v2",
"score": 0.76,
"percentile": 82.5,
"comparison": {
"baseline": 0.68,
"improvement": 0.08
}
},
"quality_score": {
"overall": 0.79,
"dimensions": {
"relevance": 0.85,
"completeness": 0.72,
"accuracy": 0.81,
"coherence": 0.78
},
"confidence": 0.88
}
},
"metadata": {
"evaluation_latency_ms": 2100,
"evaluator_version": "v0.1.2",
"cost_breakdown": {
"evaluation_tokens": 320,
"benchmark_queries": 5,
"total_cost_cents": 3.80
},
"cache_status": "hit"
}
}
Error Response:
json
{
"request_id": "req_eval_003",
"task_id": "task_error_001",
"results": {},
"metadata": {
"evaluation_latency_ms": 3000,
"evaluator_version": "v0.1.2"
},
"error": {
"code": "TIMEOUT",
"message": "Evaluation operation exceeded deadline of 3000ms",
"retriable": true,
"retry_after_ms": 1000
}
}
HTTP API Specification β
Base URL β
https://evaluation.mnemoverse.dev/api/v0
Endpoints β
POST /evaluate β
Submit task completion for quality evaluation.
Request:
- Method:
POST
- Path:
/evaluate
- Headers:
Content-Type: application/json
Authorization: Bearer {api_key}
- Body:
evaluation_request.v0
Response:
- Status:
200 OK
|400 Bad Request
|500 Internal Server Error
|503 Service Unavailable
- Headers:
Content-Type: application/json
X-Request-ID: {request_id}
- Body:
evaluation_response.v0
GET /benchmarks β
List available benchmark datasets.
Response:
json
{
"benchmarks": [
{
"id": "transformer_explanations_v2",
"name": "Transformer Architecture Explanations",
"domain": "research",
"version": "2.1",
"size": 1247
},
{
"id": "code_debugging_v1",
"name": "Code Debugging Tasks",
"domain": "code",
"version": "1.3",
"size": 892
}
],
"timestamp": "2025-09-06T10:00:00Z"
}
GET /health β
Health check endpoint.
Response:
json
{
"status": "healthy",
"components": {
"quality_evaluator": "healthy",
"benchmark_engine": "healthy",
"feedback_generator": "degraded",
"cache": "healthy"
},
"timestamp": "2025-09-06T10:00:00Z"
}
Error Handling β
Standard Error Codes:
INVALID_COMPLETION
: Malformed completion data or missing required fieldsTIMEOUT
: Evaluation operation exceeded deadlineSYSTEM_ERROR
: Internal evaluation system failureCAPACITY_EXCEEDED
: Evaluation system at capacity, retry later
HTTP Status Mappings:
INVALID_COMPLETION
β400 Bad Request
TIMEOUT
β504 Gateway Timeout
SYSTEM_ERROR
β500 Internal Server Error
CAPACITY_EXCEEDED
β503 Service Unavailable
Performance Characteristics β
SLA Targets β
- Availability: 99.9% uptime
- Latency: P95 < 5 seconds for quality scores, P95 < 15 seconds for benchmarks
- Throughput: 50 requests/second per instance
- Accuracy: Quality score correlation > 0.85 with human evaluators
Rate Limiting β
- Default: 500 requests/hour per API key
- Burst: 5 requests/second
- Headers:
X-RateLimit-Limit
,X-RateLimit-Remaining
,X-RateLimit-Reset
Integration Examples β
Orchestration β Evaluation Quality Check β
typescript
// L2 (Orchestration) calling L5 (Evaluation)
class EvaluationClient {
async evaluate(taskResult: TaskCompletion): Promise<EvaluationResults> {
const request: EvaluationRequest = {
request_id: uuidv4(),
task_id: taskResult.task_id,
intent: taskResult.intent,
completion: {
fragments: taskResult.fragments,
total_tokens: taskResult.total_tokens,
latency_ms: taskResult.latency_ms
},
metrics: {
coverage_entities: taskResult.coverage_entities,
quality_threshold: 0.8,
provider_latencies: taskResult.provider_latencies
},
deadline_ms: 5000,
evaluation_type: "all",
context: {
user_id: taskResult.context.user_id,
domain: taskResult.context.domain,
budget_used: taskResult.budget_used
}
};
const response = await fetch(`${this.baseUrl}/evaluate`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.apiKey}`
},
body: JSON.stringify(request)
});
if (!response.ok) {
throw new Error(`Evaluation failed: ${response.statusText}`);
}
return response.json();
}
}
Status: Canonical specification ready for implementation Schema Location: /architecture/contracts/schemas/evaluation_*.v0.json
(to be created)