Skip to content

Evaluation Schemas ​

Canonical JSON Schemas for fixtures and events. Use schema_version for evolution.

gold.jsonl (per-query) ​

json
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://mnemoverse.dev/schemas/eval/gold.schema.json",
  "title": "EvalGoldItem",
  "type": "object",
  "required": ["id", "query", "expected_ids", "layers", "schema_version"],
  "properties": {
    "schema_version": { "type": "string", "const": "0.1" },
    "id": { "type": "string" },
    "query": { "type": "string" },
    "expected_ids": { "type": "array", "items": { "type": "string" } },
    "entities": { "type": "array", "items": { "type": "string" } },
    "layers": { "type": "array", "items": { "type": "string" } },
    "tags": { "type": "array", "items": { "type": "string" } }
  }
}

results.jsonl (per-query result) ​

json
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://mnemoverse.dev/schemas/eval/result.schema.json",
  "title": "EvalResultItem",
  "type": "object",
  "required": ["id", "request_id", "metrics", "schema_version"],
  "properties": {
    "schema_version": { "type": "string", "const": "0.1" },
    "id": { "type": "string" },
    "request_id": { "type": "string" },
    "agent_id": { "type": "string" },
    "user_id": { "type": "string" },
    "layer": { "type": "string" },
    "model_id": { "type": "string" },
    "tool_id": { "type": "string" },
    "ts": { "type": "string", "format": "date-time" },
    "retrieved_ids": { "type": "array", "items": { "type": "string" } },
    "metrics": {
      "type": "object",
      "properties": {
        "p_at_5": { "type": "number" },
        "p_at_10": { "type": "number" },
        "ndcg_10": { "type": "number" },
        "mrr_10": { "type": "number" },
        "cov_entities": { "type": "number" },
        "p50_ms": { "type": "number" },
        "p95_ms": { "type": "number" },
        "errors": { "type": "integer" },
        "privacy_leak": { "type": "boolean" }
      },
      "additionalProperties": true
    },
    "verdicts": {
      "type": "array",
      "items": {
        "type": "object",
        "required": ["metric", "value", "threshold", "pass"],
        "properties": {
          "metric": { "type": "string" },
          "value": { "type": "number" },
          "threshold": { "type": "number" },
          "pass": { "type": "boolean" }
        }
      }
    }
  }
}

Events (evaluation_observation / evaluation_result / feedback_action) ​

json
{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://mnemoverse.dev/schemas/eval/events.schema.json",
  "title": "EvalEvents",
  "type": "object",
  "oneOf": [
    {
      "title": "evaluation_observation",
      "type": "object",
      "required": ["type", "request_id", "ts"],
      "properties": {
        "type": { "const": "evaluation_observation" },
        "request_id": { "type": "string" },
        "agent_id": { "type": "string" },
        "user_id": { "type": "string" },
        "layer": { "type": "string" },
        "model_id": { "type": "string" },
        "tool_id": { "type": "string" },
        "ts": { "type": "string", "format": "date-time" },
        "latency_ms": { "type": "number" },
        "tokens": { "type": "integer" },
        "retrieved_ids": { "type": "array", "items": { "type": "string" } },
        "outcome": { "type": "string" }
      }
    },
    {
      "title": "evaluation_result",
      "type": "object",
      "required": ["type", "request_id", "metric", "value", "ts"],
      "properties": {
        "type": { "const": "evaluation_result" },
        "request_id": { "type": "string" },
        "metric": { "type": "string" },
        "value": { "type": "number" },
        "threshold": { "type": "number" },
        "pass": { "type": "boolean" },
        "observation_ids": { "type": "array", "items": { "type": "string" } },
        "ts": { "type": "string", "format": "date-time" }
      }
    },
    {
      "title": "feedback_action",
      "type": "object",
      "required": ["type", "request_id", "action", "scope", "ts"],
      "properties": {
        "type": { "const": "feedback_action" },
        "request_id": { "type": "string" },
        "action": { "type": "string", "enum": ["rerank_tweak", "source_blacklist", "tool_blacklist", "retry_policy_tune"] },
        "scope": { "type": "object", "additionalProperties": true },
        "ttl_ms": { "type": "number" },
        "reason": { "type": "string" },
        "ts": { "type": "string", "format": "date-time" }
      }
    }
  ]
}