Evaluation Schemas β
Canonical JSON Schemas for fixtures and events. Use schema_version for evolution.
gold.jsonl (per-query) β
json
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://mnemoverse.dev/schemas/eval/gold.schema.json",
"title": "EvalGoldItem",
"type": "object",
"required": ["id", "query", "expected_ids", "layers", "schema_version"],
"properties": {
"schema_version": { "type": "string", "const": "0.1" },
"id": { "type": "string" },
"query": { "type": "string" },
"expected_ids": { "type": "array", "items": { "type": "string" } },
"entities": { "type": "array", "items": { "type": "string" } },
"layers": { "type": "array", "items": { "type": "string" } },
"tags": { "type": "array", "items": { "type": "string" } }
}
}
results.jsonl (per-query result) β
json
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://mnemoverse.dev/schemas/eval/result.schema.json",
"title": "EvalResultItem",
"type": "object",
"required": ["id", "request_id", "metrics", "schema_version"],
"properties": {
"schema_version": { "type": "string", "const": "0.1" },
"id": { "type": "string" },
"request_id": { "type": "string" },
"agent_id": { "type": "string" },
"user_id": { "type": "string" },
"layer": { "type": "string" },
"model_id": { "type": "string" },
"tool_id": { "type": "string" },
"ts": { "type": "string", "format": "date-time" },
"retrieved_ids": { "type": "array", "items": { "type": "string" } },
"metrics": {
"type": "object",
"properties": {
"p_at_5": { "type": "number" },
"p_at_10": { "type": "number" },
"ndcg_10": { "type": "number" },
"mrr_10": { "type": "number" },
"cov_entities": { "type": "number" },
"p50_ms": { "type": "number" },
"p95_ms": { "type": "number" },
"errors": { "type": "integer" },
"privacy_leak": { "type": "boolean" }
},
"additionalProperties": true
},
"verdicts": {
"type": "array",
"items": {
"type": "object",
"required": ["metric", "value", "threshold", "pass"],
"properties": {
"metric": { "type": "string" },
"value": { "type": "number" },
"threshold": { "type": "number" },
"pass": { "type": "boolean" }
}
}
}
}
}
Events (evaluation_observation / evaluation_result / feedback_action) β
json
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://mnemoverse.dev/schemas/eval/events.schema.json",
"title": "EvalEvents",
"type": "object",
"oneOf": [
{
"title": "evaluation_observation",
"type": "object",
"required": ["type", "request_id", "ts"],
"properties": {
"type": { "const": "evaluation_observation" },
"request_id": { "type": "string" },
"agent_id": { "type": "string" },
"user_id": { "type": "string" },
"layer": { "type": "string" },
"model_id": { "type": "string" },
"tool_id": { "type": "string" },
"ts": { "type": "string", "format": "date-time" },
"latency_ms": { "type": "number" },
"tokens": { "type": "integer" },
"retrieved_ids": { "type": "array", "items": { "type": "string" } },
"outcome": { "type": "string" }
}
},
{
"title": "evaluation_result",
"type": "object",
"required": ["type", "request_id", "metric", "value", "ts"],
"properties": {
"type": { "const": "evaluation_result" },
"request_id": { "type": "string" },
"metric": { "type": "string" },
"value": { "type": "number" },
"threshold": { "type": "number" },
"pass": { "type": "boolean" },
"observation_ids": { "type": "array", "items": { "type": "string" } },
"ts": { "type": "string", "format": "date-time" }
}
},
{
"title": "feedback_action",
"type": "object",
"required": ["type", "request_id", "action", "scope", "ts"],
"properties": {
"type": { "const": "feedback_action" },
"request_id": { "type": "string" },
"action": { "type": "string", "enum": ["rerank_tweak", "source_blacklist", "tool_blacklist", "retry_policy_tune"] },
"scope": { "type": "object", "additionalProperties": true },
"ttl_ms": { "type": "number" },
"reason": { "type": "string" },
"ts": { "type": "string", "format": "date-time" }
}
}
]
}