L4 Data Models β
ExperienceRecord schema and minimal indices for querying.
Core Data Structures β
ExperienceEvent Storage Schema β
sql
-- Primary events table with partitioning by date
CREATE TABLE experience_events (
id UUID PRIMARY KEY,
request_id VARCHAR(255) NOT NULL,
ts_ms BIGINT NOT NULL,
-- Actor information
actor_type VARCHAR(20) NOT NULL CHECK (actor_type IN ('user', 'agent')),
actor_id VARCHAR(255) NOT NULL,
-- Context
channel VARCHAR(20) NOT NULL CHECK (channel IN ('tool', 'chat', 'code', 'api')),
intent TEXT NOT NULL,
session_id VARCHAR(255),
project_id VARCHAR(255),
-- Content (JSON fields for flexibility)
input_data JSONB,
output_data JSONB,
-- Outcome tracking
outcome_status VARCHAR(20) NOT NULL CHECK (outcome_status IN ('success', 'fail', 'partial')),
outcome_error_code VARCHAR(100),
-- Entity and reference linkage
entities TEXT[], -- Array of L1 entity IDs
refs TEXT[], -- Array of L1 document/resource IDs
-- Privacy controls
privacy_mode VARCHAR(10) NOT NULL CHECK (privacy_mode IN ('allow', 'redact', 'block')),
pii_detected BOOLEAN DEFAULT FALSE,
redaction_applied BOOLEAN DEFAULT FALSE,
-- Operational metadata
kv_policy_hint VARCHAR(20) CHECK (kv_policy_hint IN ('pin', 'compress', 'evict')),
ttl_ms BIGINT,
-- Feedback and quality
feedback_rating INTEGER CHECK (feedback_rating BETWEEN 1 AND 5),
feedback_tags TEXT[],
feedback_note TEXT,
-- Indexing and search optimization
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
) PARTITION BY RANGE (ts_ms);
-- Create monthly partitions for performance
CREATE TABLE experience_events_y2025m09 PARTITION OF experience_events
FOR VALUES FROM (1725148800000) TO (1727827200000); -- Sept 2025
-- Optimized indices for common query patterns
CREATE INDEX idx_experience_events_request_id ON experience_events (request_id);
CREATE INDEX idx_experience_events_actor ON experience_events (actor_type, actor_id);
CREATE INDEX idx_experience_events_session ON experience_events (session_id) WHERE session_id IS NOT NULL;
CREATE INDEX idx_experience_events_entities ON experience_events USING GIN (entities);
CREATE INDEX idx_experience_events_privacy ON experience_events (privacy_mode, pii_detected);
CREATE INDEX idx_experience_events_outcome ON experience_events (outcome_status, ts_ms);
ExperienceUnit Storage Schema β
sql
-- Summarized experience units for fast retrieval
CREATE TABLE experience_units (
id UUID PRIMARY KEY,
version VARCHAR(10) NOT NULL DEFAULT 'v0',
-- Source traceability
from_event_ids UUID[] NOT NULL,
session_id VARCHAR(255),
project_id VARCHAR(255),
-- Core content
summary TEXT NOT NULL,
summary_embedding VECTOR(384), -- For semantic search
-- Entity and reference preservation
entities TEXT[],
refs TEXT[],
-- Temporal information
recency_ts_ms BIGINT NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
-- Quality and relevance scoring
quality_score DECIMAL(3,2) CHECK (quality_score >= 0 AND quality_score <= 1),
-- Privacy preservation
privacy_mode VARCHAR(10) NOT NULL CHECK (privacy_mode IN ('allow', 'redact', 'block')),
original_privacy_modes TEXT[], -- Track source event privacy modes
-- Storage optimization
storage_size_bytes INTEGER,
compression_ratio DECIMAL(4,3),
-- Operational metadata
pinned BOOLEAN DEFAULT FALSE,
access_count INTEGER DEFAULT 0,
last_accessed_at TIMESTAMP WITH TIME ZONE,
-- Embedding reference for external vector store
embedding_ref VARCHAR(255)
);
-- Performance indices for retrieval patterns
CREATE INDEX idx_experience_units_recency ON experience_units (recency_ts_ms DESC);
CREATE INDEX idx_experience_units_quality ON experience_units (quality_score DESC);
CREATE INDEX idx_experience_units_entities ON experience_units USING GIN (entities);
CREATE INDEX idx_experience_units_privacy ON experience_units (privacy_mode);
CREATE INDEX idx_experience_units_session ON experience_units (session_id) WHERE session_id IS NOT NULL;
-- Vector similarity search (if using PostgreSQL with pgvector)
CREATE INDEX idx_experience_units_embedding ON experience_units
USING ivfflat (summary_embedding vector_cosine_ops) WITH (lists = 1000);
Production Implementation Framework β
Database Performance Configuration β
yaml
postgresql_optimization:
shared_buffers: 4GB
effective_cache_size: 12GB
random_page_cost: 1.1 # Optimized for SSD
seq_page_cost: 1.0
# Connection pooling
max_connections: 200
connection_pooler: pgbouncer
pool_size: 50
# Query optimization
work_mem: 256MB
maintenance_work_mem: 1GB
checkpoint_completion_target: 0.9
# Monitoring
log_statement_stats: on
log_duration: on
log_min_duration_statement: 100ms
Data Model Testing Framework β
typescript
describe('L4 Data Model Tests', () => {
describe('Experience Event Storage', () => {
test('stores and retrieves events correctly', async () => {
const test_event = {
id: '550e8400-e29b-41d4-a716-446655440000',
request_id: 'req_test_001',
ts_ms: Date.now(),
actor_type: 'user',
actor_id: 'user_123',
channel: 'tool',
intent: 'implement authentication middleware',
input_data: { text: 'Need JWT validation for API endpoints' },
output_data: { text: 'Created middleware with token validation' },
outcome_status: 'success',
entities: ['ent.jwt', 'ent.middleware'],
privacy_mode: 'allow',
feedback_rating: 5
};
const stored_id = await dataLayer.storeExperienceEvent(test_event);
const retrieved = await dataLayer.getExperienceEvent(stored_id);
expect(retrieved.id).toBe(test_event.id);
expect(retrieved.intent).toBe(test_event.intent);
expect(retrieved.entities).toEqual(test_event.entities);
expect(retrieved.privacy_mode).toBe(test_event.privacy_mode);
});
test('handles privacy modes correctly', async () => {
const redacted_event = {
input_data: { text: 'User email: john.doe@company.com' },
privacy_mode: 'redact',
pii_detected: true,
redaction_applied: true
};
const stored = await dataLayer.storeExperienceEvent(redacted_event);
const retrieved = await dataLayer.getExperienceEvent(stored.id);
expect(retrieved.privacy_mode).toBe('redact');
expect(retrieved.pii_detected).toBe(true);
expect(retrieved.redaction_applied).toBe(true);
expect(retrieved.input_data.text).not.toContain('john.doe@company.com');
});
});
describe('Experience Unit Storage', () => {
test('creates units from event clusters', async () => {
const event_ids = [
'550e8400-e29b-41d4-a716-446655440001',
'550e8400-e29b-41d4-a716-446655440002'
];
const experience_unit = {
from_event_ids: event_ids,
summary: 'Successfully implemented JWT authentication with middleware',
entities: ['ent.jwt', 'ent.middleware'],
quality_score: 0.92,
privacy_mode: 'allow'
};
const stored_unit = await dataLayer.storeExperienceUnit(experience_unit);
const retrieved_unit = await dataLayer.getExperienceUnit(stored_unit.id);
expect(retrieved_unit.from_event_ids).toEqual(event_ids);
expect(retrieved_unit.quality_score).toBe(0.92);
expect(retrieved_unit.summary).toContain('JWT authentication');
});
test('vector similarity search works correctly', async () => {
// Create test units with known relationships
const units = [
{ summary: 'JWT authentication implementation', entities: ['ent.jwt'] },
{ summary: 'Database connection pooling setup', entities: ['ent.database'] },
{ summary: 'API rate limiting middleware', entities: ['ent.api'] }
];
for (const unit of units) {
await dataLayer.storeExperienceUnit(unit);
}
// Search for JWT-related content
const results = await dataLayer.searchSimilarUnits({
query_embedding: await generateEmbedding('JWT authentication'),
similarity_threshold: 0.7,
limit: 3
});
expect(results.length).toBeGreaterThan(0);
expect(results[0].summary).toContain('JWT');
expect(results[0].similarity_score).toBeGreaterThan(0.7);
});
});
describe('Query Performance', () => {
test('retrieval queries meet performance SLA', async () => {
// Create test data
await seedTestData(1000); // 1000 experience units
const performance_queries = [
{ entities: ['ent.authentication'], expected_results: 50 },
{ privacy_mode: 'allow', quality_min: 0.8, expected_results: 200 },
{ recency_days: 7, expected_results: 100 }
];
for (const query of performance_queries) {
const start = Date.now();
const results = await dataLayer.queryExperienceUnits(query);
const latency = Date.now() - start;
expect(latency).toBeLessThan(5); // < 5ms for data layer queries
expect(results.length).toBeLessThanOrEqual(query.expected_results);
}
});
});
});
Data Lifecycle Management β
typescript
interface DataLifecycleManager {
// Automated data retention and cleanup
scheduleDataCleanup(): Promise<void>;
// Privacy-compliant data archival
archiveOldEvents(cutoff_date: Date): Promise<ArchivalResult>;
// Performance optimization through data partitioning
createMonthlyPartition(year: number, month: number): Promise<void>;
// Quality-based data curation
promoteHighQualityUnits(): Promise<CurationResult>;
}
class ProductionDataLifecycleManager implements DataLifecycleManager {
async scheduleDataCleanup(): Promise<void> {
const cleanup_schedule = {
experience_events: {
retention_days: 90,
partition_cleanup: 'monthly',
privacy_compliance: 'automatic_erasure'
},
experience_units: {
retention_days: 365,
quality_threshold: 0.6, // Archive low-quality units
access_based_retention: true
}
};
// Schedule cleanup jobs
await this.scheduleJob('cleanup-events', cleanup_schedule.experience_events);
await this.scheduleJob('cleanup-units', cleanup_schedule.experience_units);
}
async archiveOldEvents(cutoff_date: Date): Promise<ArchivalResult> {
const archive_query = `
INSERT INTO experience_events_archive
SELECT * FROM experience_events
WHERE created_at < $1 AND privacy_mode != 'block'
`;
const archived_count = await this.db.query(archive_query, [cutoff_date]);
// Remove archived events from main table
await this.db.query(`
DELETE FROM experience_events
WHERE created_at < $1
`, [cutoff_date]);
return { archived_events: archived_count, cutoff_date };
}
}
Monitoring & Observability β
yaml
data_layer_monitoring:
metrics:
# Database performance
- name: l4_database_query_duration_seconds
type: histogram
buckets: [0.001, 0.005, 0.01, 0.05, 0.1]
labels: [query_type]
- name: l4_database_connections_active
type: gauge
description: "Active database connections"
# Data quality
- name: l4_data_quality_score
type: histogram
description: "Distribution of data quality scores"
- name: l4_storage_utilization_bytes
type: gauge
labels: [table_name]
alerts:
- name: L4DatabaseSlowQueries
condition: l4_database_query_duration_seconds{quantile="0.95"} > 0.01
severity: warning
- name: L4DatabaseConnectionsHigh
condition: l4_database_connections_active > 150
severity: warning