Row Key: {document_id}#{timestamp_reverse}#{revision_id}
Column Families:-content: Document content and metadata
-operations: Individual operations that created this revision
-user: User information and attribution
-diff: Differences from previous revision
Example Row:
doc_12345#9223372036854775807-1642248000000#rev_67890content:full_content:<compressed document content>word_count:1500character_count:8500content_hash:"sha256_hash"operations:operation_count:15operations_json:<compressed operations array>operation_types:"insert,delete,format"user:user_id:"user_uuid"display_name:"John Doe"user_agent:"Chrome/96.0"ip_address_hash:"hashed_ip"diff:diff_size:150diff_json:<compressed diff data>change_summary:"Added paragraph, fixed typos"
Operation Log Table
Row Key: {document_id}#{timestamp}#{operation_id}
Column Families:-op: Operation details
-transform: Transformation information
-client: Client context
Example Row:
doc_12345#1642248000000#op_abc123op:type:"insert"position:150content:"Hello World"length:11transform:base_version:1233transformed_version:1234concurrent_ops:2transform_time_ms:5client:user_id:"user_uuid"client_id:"client_uuid"session_id:"session_uuid"timestamp:1642248000000
Document Snapshots Table
Row Key: {document_id}#{snapshot_type}#{timestamp}
Column Families:-snapshot: Full document snapshot
-metadata: Snapshot metadata
Snapshot Types:-daily: Daily snapshots for quick recovery
-major: Major version snapshots (every 100 versions)
-manual: User-created snapshots
Example Row:
doc_12345#daily#1642204800000snapshot:content:<compressed full document>version:1200size_bytes:25600compression_ratio:0.3metadata:word_count:1500collaborator_count:5comment_count:12created_reason:"daily_snapshot"
Database Partitioning Strategy
Spanner Partitioning
-- Documents partitioned by owner_id for locality-- Interleaved tables for related data co-location-- Partition by user for better localityCREATE TABLE documents (
owner_id STRING(36) NOT NULL,
document_id STRING(36) NOT NULL,
...
) PRIMARY KEY (owner_id, document_id);
-- Interleave permissions with documentsCREATE TABLE document_permissions (
owner_id STRING(36) NOT NULL,
document_id STRING(36) NOT NULL,
permission_id STRING(36) NOT NULL,
...
) PRIMARY KEY (owner_id, document_id, permission_id),
INTERLEAVE IN PARENT documents (owner_id, document_id);
-- Archive old revisions (keep last 100 per document)CREATEOR REPLACE PROCEDURE archive_old_revisions()
BEGINDECLARE document_cursor CURSORFORSELECT document_id FROM documents WHERE status ='active';
FOR document_record IN document_cursor DO
-- Keep only last 100 revisions per documentDELETEFROM document_revisions
WHERE document_id = document_record.document_id
AND revision_id NOTIN (
SELECT revision_id
FROM document_revisions
WHERE document_id = document_record.document_id
ORDERBY created_at DESC
LIMIT 100
);
ENDFOR;
END;
Bigtable TTL Configuration
// Configure TTL for different data typesconst bigtableConfig = {
revisions: {
maxAge: '365d', // 1 year retentionmaxVersions: 100// Keep last 100 versions
},
operations: {
maxAge: '90d', // 3 months retentionmaxVersions: 1000// Keep last 1000 operations
},
snapshots: {
maxAge: '2555d', // 7 years retentionmaxVersions: 50// Keep last 50 snapshots
}
};
Performance Optimization
Indexing Strategy
-- Composite indexes for common query patternsCREATE INDEX idx_documents_owner_updated
ON documents (owner_id, updated_at DESC);
CREATE INDEX idx_documents_collaborator_access
ON document_collaborators (user_id, last_accessed_at DESC);
CREATE INDEX idx_comments_document_status
ON comments (document_id, status, created_at DESC);
-- Covering indexes to avoid table lookupsCREATE INDEX idx_documents_list_view
ON documents (owner_id, status, updated_at DESC)
STORING (title, sharing_mode, word_count);
Query Optimization
-- Optimized query for user's recent documentsSELECT d.document_id, d.title, d.updated_at, d.word_count
FROM documents d
WHERE d.owner_id =@user_idAND d.status ='active'ORDERBY d.updated_at DESC
LIMIT 50;
-- Optimized query for document collaboratorsSELECT dc.user_id, dc.display_name, dc.role, dc.is_active
FROM document_collaborators dc
WHERE dc.document_id =@document_idAND dc.last_accessed_at > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL30DAY)
ORDERBY dc.is_active DESC, dc.last_accessed_at DESC;