Skip to content

L4 Data Models ​

ExperienceRecord schema and minimal indices for querying.

Core Data Structures ​

ExperienceEvent Storage Schema ​

sql
-- Primary events table with partitioning by date
CREATE TABLE experience_events (
    id UUID PRIMARY KEY,
    request_id VARCHAR(255) NOT NULL,
    ts_ms BIGINT NOT NULL,
    
    -- Actor information
    actor_type VARCHAR(20) NOT NULL CHECK (actor_type IN ('user', 'agent')),
    actor_id VARCHAR(255) NOT NULL,
    
    -- Context
    channel VARCHAR(20) NOT NULL CHECK (channel IN ('tool', 'chat', 'code', 'api')),
    intent TEXT NOT NULL,
    session_id VARCHAR(255),
    project_id VARCHAR(255),
    
    -- Content (JSON fields for flexibility)
    input_data JSONB,
    output_data JSONB,
    
    -- Outcome tracking
    outcome_status VARCHAR(20) NOT NULL CHECK (outcome_status IN ('success', 'fail', 'partial')),
    outcome_error_code VARCHAR(100),
    
    -- Entity and reference linkage
    entities TEXT[], -- Array of L1 entity IDs
    refs TEXT[], -- Array of L1 document/resource IDs
    
    -- Privacy controls
    privacy_mode VARCHAR(10) NOT NULL CHECK (privacy_mode IN ('allow', 'redact', 'block')),
    pii_detected BOOLEAN DEFAULT FALSE,
    redaction_applied BOOLEAN DEFAULT FALSE,
    
    -- Operational metadata
    kv_policy_hint VARCHAR(20) CHECK (kv_policy_hint IN ('pin', 'compress', 'evict')),
    ttl_ms BIGINT,
    
    -- Feedback and quality
    feedback_rating INTEGER CHECK (feedback_rating BETWEEN 1 AND 5),
    feedback_tags TEXT[],
    feedback_note TEXT,
    
    -- Indexing and search optimization
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
) PARTITION BY RANGE (ts_ms);

-- Create monthly partitions for performance
CREATE TABLE experience_events_y2025m09 PARTITION OF experience_events
    FOR VALUES FROM (1725148800000) TO (1727827200000); -- Sept 2025

-- Optimized indices for common query patterns
CREATE INDEX idx_experience_events_request_id ON experience_events (request_id);
CREATE INDEX idx_experience_events_actor ON experience_events (actor_type, actor_id);
CREATE INDEX idx_experience_events_session ON experience_events (session_id) WHERE session_id IS NOT NULL;
CREATE INDEX idx_experience_events_entities ON experience_events USING GIN (entities);
CREATE INDEX idx_experience_events_privacy ON experience_events (privacy_mode, pii_detected);
CREATE INDEX idx_experience_events_outcome ON experience_events (outcome_status, ts_ms);

ExperienceUnit Storage Schema ​

sql
-- Summarized experience units for fast retrieval
CREATE TABLE experience_units (
    id UUID PRIMARY KEY,
    version VARCHAR(10) NOT NULL DEFAULT 'v0',
    
    -- Source traceability
    from_event_ids UUID[] NOT NULL,
    session_id VARCHAR(255),
    project_id VARCHAR(255),
    
    -- Core content
    summary TEXT NOT NULL,
    summary_embedding VECTOR(384), -- For semantic search
    
    -- Entity and reference preservation
    entities TEXT[],
    refs TEXT[],
    
    -- Temporal information
    recency_ts_ms BIGINT NOT NULL,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    
    -- Quality and relevance scoring
    quality_score DECIMAL(3,2) CHECK (quality_score >= 0 AND quality_score <= 1),
    
    -- Privacy preservation
    privacy_mode VARCHAR(10) NOT NULL CHECK (privacy_mode IN ('allow', 'redact', 'block')),
    original_privacy_modes TEXT[], -- Track source event privacy modes
    
    -- Storage optimization
    storage_size_bytes INTEGER,
    compression_ratio DECIMAL(4,3),
    
    -- Operational metadata
    pinned BOOLEAN DEFAULT FALSE,
    access_count INTEGER DEFAULT 0,
    last_accessed_at TIMESTAMP WITH TIME ZONE,
    
    -- Embedding reference for external vector store
    embedding_ref VARCHAR(255)
);

-- Performance indices for retrieval patterns
CREATE INDEX idx_experience_units_recency ON experience_units (recency_ts_ms DESC);
CREATE INDEX idx_experience_units_quality ON experience_units (quality_score DESC);
CREATE INDEX idx_experience_units_entities ON experience_units USING GIN (entities);
CREATE INDEX idx_experience_units_privacy ON experience_units (privacy_mode);
CREATE INDEX idx_experience_units_session ON experience_units (session_id) WHERE session_id IS NOT NULL;

-- Vector similarity search (if using PostgreSQL with pgvector)
CREATE INDEX idx_experience_units_embedding ON experience_units 
    USING ivfflat (summary_embedding vector_cosine_ops) WITH (lists = 1000);

Production Implementation Framework ​

Database Performance Configuration ​

yaml
postgresql_optimization:
  shared_buffers: 4GB
  effective_cache_size: 12GB
  random_page_cost: 1.1  # Optimized for SSD
  seq_page_cost: 1.0
  
  # Connection pooling
  max_connections: 200
  connection_pooler: pgbouncer
  pool_size: 50
  
  # Query optimization
  work_mem: 256MB
  maintenance_work_mem: 1GB
  checkpoint_completion_target: 0.9
  
  # Monitoring
  log_statement_stats: on
  log_duration: on
  log_min_duration_statement: 100ms

Data Model Testing Framework ​

typescript
describe('L4 Data Model Tests', () => {
  describe('Experience Event Storage', () => {
    test('stores and retrieves events correctly', async () => {
      const test_event = {
        id: '550e8400-e29b-41d4-a716-446655440000',
        request_id: 'req_test_001',
        ts_ms: Date.now(),
        actor_type: 'user',
        actor_id: 'user_123',
        channel: 'tool',
        intent: 'implement authentication middleware',
        input_data: { text: 'Need JWT validation for API endpoints' },
        output_data: { text: 'Created middleware with token validation' },
        outcome_status: 'success',
        entities: ['ent.jwt', 'ent.middleware'],
        privacy_mode: 'allow',
        feedback_rating: 5
      };
      
      const stored_id = await dataLayer.storeExperienceEvent(test_event);
      const retrieved = await dataLayer.getExperienceEvent(stored_id);
      
      expect(retrieved.id).toBe(test_event.id);
      expect(retrieved.intent).toBe(test_event.intent);
      expect(retrieved.entities).toEqual(test_event.entities);
      expect(retrieved.privacy_mode).toBe(test_event.privacy_mode);
    });
    
    test('handles privacy modes correctly', async () => {
      const redacted_event = {
        input_data: { text: 'User email: john.doe@company.com' },
        privacy_mode: 'redact',
        pii_detected: true,
        redaction_applied: true
      };
      
      const stored = await dataLayer.storeExperienceEvent(redacted_event);
      const retrieved = await dataLayer.getExperienceEvent(stored.id);
      
      expect(retrieved.privacy_mode).toBe('redact');
      expect(retrieved.pii_detected).toBe(true);
      expect(retrieved.redaction_applied).toBe(true);
      expect(retrieved.input_data.text).not.toContain('john.doe@company.com');
    });
  });
  
  describe('Experience Unit Storage', () => {
    test('creates units from event clusters', async () => {
      const event_ids = [
        '550e8400-e29b-41d4-a716-446655440001',
        '550e8400-e29b-41d4-a716-446655440002'
      ];
      
      const experience_unit = {
        from_event_ids: event_ids,
        summary: 'Successfully implemented JWT authentication with middleware',
        entities: ['ent.jwt', 'ent.middleware'],
        quality_score: 0.92,
        privacy_mode: 'allow'
      };
      
      const stored_unit = await dataLayer.storeExperienceUnit(experience_unit);
      const retrieved_unit = await dataLayer.getExperienceUnit(stored_unit.id);
      
      expect(retrieved_unit.from_event_ids).toEqual(event_ids);
      expect(retrieved_unit.quality_score).toBe(0.92);
      expect(retrieved_unit.summary).toContain('JWT authentication');
    });
    
    test('vector similarity search works correctly', async () => {
      // Create test units with known relationships
      const units = [
        { summary: 'JWT authentication implementation', entities: ['ent.jwt'] },
        { summary: 'Database connection pooling setup', entities: ['ent.database'] },
        { summary: 'API rate limiting middleware', entities: ['ent.api'] }
      ];
      
      for (const unit of units) {
        await dataLayer.storeExperienceUnit(unit);
      }
      
      // Search for JWT-related content
      const results = await dataLayer.searchSimilarUnits({
        query_embedding: await generateEmbedding('JWT authentication'),
        similarity_threshold: 0.7,
        limit: 3
      });
      
      expect(results.length).toBeGreaterThan(0);
      expect(results[0].summary).toContain('JWT');
      expect(results[0].similarity_score).toBeGreaterThan(0.7);
    });
  });
  
  describe('Query Performance', () => {
    test('retrieval queries meet performance SLA', async () => {
      // Create test data
      await seedTestData(1000); // 1000 experience units
      
      const performance_queries = [
        { entities: ['ent.authentication'], expected_results: 50 },
        { privacy_mode: 'allow', quality_min: 0.8, expected_results: 200 },
        { recency_days: 7, expected_results: 100 }
      ];
      
      for (const query of performance_queries) {
        const start = Date.now();
        const results = await dataLayer.queryExperienceUnits(query);
        const latency = Date.now() - start;
        
        expect(latency).toBeLessThan(5); // < 5ms for data layer queries
        expect(results.length).toBeLessThanOrEqual(query.expected_results);
      }
    });
  });
});

Data Lifecycle Management ​

typescript
interface DataLifecycleManager {
  // Automated data retention and cleanup
  scheduleDataCleanup(): Promise<void>;
  
  // Privacy-compliant data archival
  archiveOldEvents(cutoff_date: Date): Promise<ArchivalResult>;
  
  // Performance optimization through data partitioning
  createMonthlyPartition(year: number, month: number): Promise<void>;
  
  // Quality-based data curation
  promoteHighQualityUnits(): Promise<CurationResult>;
}

class ProductionDataLifecycleManager implements DataLifecycleManager {
  async scheduleDataCleanup(): Promise<void> {
    const cleanup_schedule = {
      experience_events: {
        retention_days: 90,
        partition_cleanup: 'monthly',
        privacy_compliance: 'automatic_erasure'
      },
      experience_units: {
        retention_days: 365,
        quality_threshold: 0.6, // Archive low-quality units
        access_based_retention: true
      }
    };
    
    // Schedule cleanup jobs
    await this.scheduleJob('cleanup-events', cleanup_schedule.experience_events);
    await this.scheduleJob('cleanup-units', cleanup_schedule.experience_units);
  }
  
  async archiveOldEvents(cutoff_date: Date): Promise<ArchivalResult> {
    const archive_query = `
      INSERT INTO experience_events_archive 
      SELECT * FROM experience_events 
      WHERE created_at < $1 AND privacy_mode != 'block'
    `;
    
    const archived_count = await this.db.query(archive_query, [cutoff_date]);
    
    // Remove archived events from main table
    await this.db.query(`
      DELETE FROM experience_events 
      WHERE created_at < $1
    `, [cutoff_date]);
    
    return { archived_events: archived_count, cutoff_date };
  }
}

Monitoring & Observability ​

yaml
data_layer_monitoring:
  metrics:
    # Database performance
    - name: l4_database_query_duration_seconds
      type: histogram
      buckets: [0.001, 0.005, 0.01, 0.05, 0.1]
      labels: [query_type]
      
    - name: l4_database_connections_active
      type: gauge
      description: "Active database connections"
      
    # Data quality
    - name: l4_data_quality_score
      type: histogram
      description: "Distribution of data quality scores"
      
    - name: l4_storage_utilization_bytes
      type: gauge
      labels: [table_name]
      
  alerts:
    - name: L4DatabaseSlowQueries
      condition: l4_database_query_duration_seconds{quantile="0.95"} > 0.01
      severity: warning
      
    - name: L4DatabaseConnectionsHigh
      condition: l4_database_connections_active > 150
      severity: warning