CRDT Data Structures
Date: 2025-01-05
Superseded: This document describes a custom CRDT implementation approach. After evaluation, we decided to use Automerge instead, which provides battle-tested CRDT primitives. See 504-automerge-migration for the comparison and 505-automerge-migration-plan for the implementation plan.
This document details how Minuta’s data structures would transform to support CRDT (Conflict-free Replicated Data Types) for automatic multi-device sync with deterministic conflict resolution.
Current Data Structures
Tag
public struct Tag: Identifiable, Codable, Hashable, Sendable {
public let id: UUID
public var name: String
public var color: String // Hex color "#RRGGBB"
public let createdAt: Date
public var isArchived: Bool
} TimeRecord
public struct TimeRecord: Identifiable, Codable, Hashable, Sendable {
public let id: UUID
public var startTime: Date
public var endTime: Date? // nil = running timer
public var tagId: UUID? // nil = untagged
public var comment: String?
public var images: [String] // filenames of attached images
} Current Storage Format
~/Documents/Minuta/
├── tags.json # All tags in one file
└── records/
└── 2024/
└── 01/
└── 2024-01-15_093000.json # One file per record tags.json:
{
"tags": [
{
"id": "550e8400-e29b-41d4-a716-446655440000",
"name": "Work",
"color": "#4A90D9",
"createdAt": "2024-01-01T00:00:00Z",
"isArchived": false
}
]
} record.json:
{
"id": "123e4567-e89b-12d3-a456-426614174000",
"startTime": "2024-01-15T09:30:00Z",
"endTime": "2024-01-15T11:45:00Z",
"tagId": "550e8400-e29b-41d4-a716-446655440000",
"comment": "Working on feature X",
"images": ["123e4567_0.jpg", "123e4567_1.jpg"]
} CRDT Primitive Types
Hybrid Logical Clock (HLC)
Combines wall clock with logical counter to handle clock skew across devices:
/// Hybrid Logical Clock for ordering events across devices
public struct HLC: Codable, Comparable, Hashable, Sendable {
public var timestamp: UInt64 // Milliseconds since epoch
public var counter: UInt32 // Logical counter for same-ms events
public var deviceId: String // Unique device identifier
public init(timestamp: UInt64 = 0, counter: UInt32 = 0, deviceId: String) {
self.timestamp = timestamp
self.counter = counter
self.deviceId = deviceId
}
/// Create a new HLC for current time
public static func now(deviceId: String) -> HLC {
HLC(
timestamp: UInt64(Date().timeIntervalSince1970 * 1000),
counter: 0,
deviceId: deviceId
)
}
/// Tick the clock for a local event
public mutating func tick() {
let now = UInt64(Date().timeIntervalSince1970 * 1000)
if now > timestamp {
timestamp = now
counter = 0
} else {
counter += 1
}
}
/// Update clock after receiving remote event
public mutating func receive(_ remote: HLC) {
let now = UInt64(Date().timeIntervalSince1970 * 1000)
let maxTs = max(now, max(timestamp, remote.timestamp))
if maxTs == timestamp && maxTs == remote.timestamp {
counter = max(counter, remote.counter) + 1
} else if maxTs == timestamp {
counter += 1
} else if maxTs == remote.timestamp {
counter = remote.counter + 1
} else {
counter = 0
}
timestamp = maxTs
}
public static func < (lhs: HLC, rhs: HLC) -> Bool {
if lhs.timestamp != rhs.timestamp {
return lhs.timestamp < rhs.timestamp
}
if lhs.counter != rhs.counter {
return lhs.counter < rhs.counter
}
return lhs.deviceId < rhs.deviceId // Deterministic tiebreaker
}
} LWW-Register (Last-Writer-Wins)
A register where the value with the highest clock wins:
/// A register where the value with the highest clock wins
public struct LWWRegister<T: Codable & Sendable>: Codable, Sendable {
public var value: T
public var clock: HLC
public init(value: T, clock: HLC) {
self.value = value
self.clock = clock
}
/// Merge with another register - highest clock wins
public func merged(with other: LWWRegister<T>) -> LWWRegister<T> {
other.clock > self.clock ? other : self
}
/// Update value with new clock
public mutating func set(_ newValue: T, clock: HLC) {
if clock > self.clock {
self.value = newValue
self.clock = clock
}
}
}
extension LWWRegister: Equatable where T: Equatable {
public static func == (lhs: LWWRegister<T>, rhs: LWWRegister<T>) -> Bool {
lhs.value == rhs.value && lhs.clock == rhs.clock
}
} OR-Set (Observed-Remove Set)
A set where adds and removes can happen concurrently:
/// A set where adds and removes can happen concurrently
/// Each element has a unique tag; remove only affects known tags
public struct ORSet<T: Codable & Hashable & Sendable>: Codable, Sendable {
/// Each element is stored with unique add-tags
public var elements: [T: Set<ElementTag>]
public struct ElementTag: Codable, Hashable, Sendable {
let clock: HLC
var isDeleted: Bool
}
public init() {
self.elements = [:]
}
/// Current values (excluding deleted)
public var values: Set<T> {
Set(elements.compactMap { element, tags in
tags.contains { !$0.isDeleted } ? element : nil
})
}
/// Add an element
public mutating func add(_ element: T, clock: HLC) {
let tag = ElementTag(clock: clock, isDeleted: false)
if elements[element] != nil {
elements[element]?.insert(tag)
} else {
elements[element] = [tag]
}
}
/// Remove an element (marks all current tags as deleted)
public mutating func remove(_ element: T, clock: HLC) {
guard var tags = elements[element] else { return }
tags = Set(tags.map { tag in
var newTag = tag
if clock > tag.clock {
newTag.isDeleted = true
}
return newTag
})
elements[element] = tags
}
/// Merge with another OR-Set
public func merged(with other: ORSet<T>) -> ORSet<T> {
var result = ORSet<T>()
let allKeys = Set(elements.keys).union(other.elements.keys)
for key in allKeys {
let selfTags = elements[key] ?? []
let otherTags = other.elements[key] ?? []
result.elements[key] = selfTags.union(otherTags)
}
return result
}
} Tombstone
Tracks whether an entity has been deleted:
/// Tracks whether an entity has been deleted
public struct Tombstone: Codable, Sendable {
public var isDeleted: Bool
public var deletedAt: HLC?
public init(isDeleted: Bool = false, deletedAt: HLC? = nil) {
self.isDeleted = isDeleted
self.deletedAt = deletedAt
}
public mutating func markDeleted(clock: HLC) {
if !isDeleted || (deletedAt != nil && clock > deletedAt!) {
isDeleted = true
deletedAt = clock
}
}
public func merged(with other: Tombstone) -> Tombstone {
if isDeleted && other.isDeleted {
if let selfClock = deletedAt, let otherClock = other.deletedAt {
return selfClock > otherClock ? self : other
}
}
if isDeleted { return self }
if other.isDeleted { return other }
return Tombstone()
}
} CRDT-Enabled Models
TagCRDT
/// CRDT-enabled Tag
public struct TagCRDT: Identifiable, Codable, Sendable {
// Identity (immutable)
public let id: UUID
// CRDT fields
public var name: LWWRegister<String>
public var color: LWWRegister<String>
public var createdAt: LWWRegister<Date>
public var isArchived: LWWRegister<Bool>
// Deletion tracking
public var tombstone: Tombstone
// Metadata
public var lastModified: HLC
public init(
id: UUID = UUID(),
name: String,
color: String,
createdAt: Date = Date(),
isArchived: Bool = false,
clock: HLC
) {
self.id = id
self.name = LWWRegister(value: name, clock: clock)
self.color = LWWRegister(value: color, clock: clock)
self.createdAt = LWWRegister(value: createdAt, clock: clock)
self.isArchived = LWWRegister(value: isArchived, clock: clock)
self.tombstone = Tombstone()
self.lastModified = clock
}
/// Merge with another version of the same tag
public func merged(with other: TagCRDT) -> TagCRDT {
precondition(id == other.id, "Cannot merge tags with different IDs")
var result = TagCRDT(
id: id,
name: "",
color: "",
clock: HLC(deviceId: "")
)
result.name = name.merged(with: other.name)
result.color = color.merged(with: other.color)
result.createdAt = createdAt.merged(with: other.createdAt)
result.isArchived = isArchived.merged(with: other.isArchived)
result.tombstone = tombstone.merged(with: other.tombstone)
result.lastModified = max(lastModified, other.lastModified)
return result
}
/// Convert to plain Tag for UI consumption
public var asTag: Tag? {
guard !tombstone.isDeleted else { return nil }
return Tag(
id: id,
name: name.value,
color: color.value,
createdAt: createdAt.value,
isArchived: isArchived.value
)
}
} TimeRecordCRDT
/// CRDT-enabled TimeRecord
public struct TimeRecordCRDT: Identifiable, Codable, Sendable {
// Identity (immutable)
public let id: UUID
// CRDT fields - scalar values use LWW
public var startTime: LWWRegister<Date>
public var endTime: LWWRegister<Date?>
public var tagId: LWWRegister<UUID?>
public var comment: LWWRegister<String?>
// Images use OR-Set (can add/remove concurrently)
public var images: ORSet<String>
// Deletion tracking
public var tombstone: Tombstone
// Metadata
public var lastModified: HLC
public init(
id: UUID = UUID(),
startTime: Date = Date(),
endTime: Date? = nil,
tagId: UUID? = nil,
comment: String? = nil,
images: [String] = [],
clock: HLC
) {
self.id = id
self.startTime = LWWRegister(value: startTime, clock: clock)
self.endTime = LWWRegister(value: endTime, clock: clock)
self.tagId = LWWRegister(value: tagId, clock: clock)
self.comment = LWWRegister(value: comment, clock: clock)
self.images = ORSet<String>()
for image in images {
self.images.add(image, clock: clock)
}
self.tombstone = Tombstone()
self.lastModified = clock
}
/// Merge with another version of the same record
public func merged(with other: TimeRecordCRDT) -> TimeRecordCRDT {
precondition(id == other.id, "Cannot merge records with different IDs")
var result = TimeRecordCRDT(
id: id,
startTime: Date(),
clock: HLC(deviceId: "")
)
result.startTime = startTime.merged(with: other.startTime)
result.endTime = endTime.merged(with: other.endTime)
result.tagId = tagId.merged(with: other.tagId)
result.comment = comment.merged(with: other.comment)
result.images = images.merged(with: other.images)
result.tombstone = tombstone.merged(with: other.tombstone)
result.lastModified = max(lastModified, other.lastModified)
return result
}
/// Convert to plain TimeRecord for UI consumption
public var asTimeRecord: TimeRecord? {
guard !tombstone.isDeleted else { return nil }
return TimeRecord(
id: id,
startTime: startTime.value,
endTime: endTime.value,
tagId: tagId.value,
comment: comment.value,
images: Array(images.values)
)
}
} CRDT Storage Format
New File Structure
~/Documents/Minuta/
├── device.json # Device identity
├── clock.json # Current HLC state
├── tags/
│ └── {tag-id}.json # One file per tag
├── records/
│ └── 2024/
│ └── 01/
│ └── {record-id}.json # CRDT content
└── sync/
├── pending/ # Events waiting to sync
│ └── {event-id}.json
└── state/ # Per-provider sync state
└── {provider-id}.json device.json
{
"deviceId": "macbook-pro-a1b2c3",
"deviceName": "MacBook Pro",
"createdAt": "2024-01-01T00:00:00Z"
} clock.json
{
"timestamp": 1704067200000,
"counter": 42,
"deviceId": "macbook-pro-a1b2c3"
} Tag File (CRDT)
Before (tags.json - all tags in one file):
{
"tags": [
{ "id": "...", "name": "Work", "color": "#4A90D9", ... }
]
} After (tags/{id}.json - one file per tag):
{
"id": "550e8400-e29b-41d4-a716-446655440000",
"name": {
"value": "Work",
"clock": { "timestamp": 1704067200000, "counter": 0, "deviceId": "macbook-pro" }
},
"color": {
"value": "#4A90D9",
"clock": { "timestamp": 1704067200000, "counter": 0, "deviceId": "macbook-pro" }
},
"createdAt": {
"value": "2024-01-01T00:00:00Z",
"clock": { "timestamp": 1704067200000, "counter": 0, "deviceId": "macbook-pro" }
},
"isArchived": {
"value": false,
"clock": { "timestamp": 1704067200000, "counter": 0, "deviceId": "macbook-pro" }
},
"tombstone": {
"isDeleted": false,
"deletedAt": null
},
"lastModified": { "timestamp": 1704067200000, "counter": 0, "deviceId": "macbook-pro" }
} Record File (CRDT)
Before:
{
"id": "123e4567-e89b-12d3-a456-426614174000",
"startTime": "2024-01-15T09:30:00Z",
"endTime": "2024-01-15T11:45:00Z",
"tagId": "550e8400-e29b-41d4-a716-446655440000",
"comment": "Working on feature X",
"images": ["123e4567_0.jpg"]
} After:
{
"id": "123e4567-e89b-12d3-a456-426614174000",
"startTime": {
"value": "2024-01-15T09:30:00Z",
"clock": { "timestamp": 1705311000000, "counter": 0, "deviceId": "macbook-pro" }
},
"endTime": {
"value": "2024-01-15T11:45:00Z",
"clock": { "timestamp": 1705319100000, "counter": 0, "deviceId": "macbook-pro" }
},
"tagId": {
"value": "550e8400-e29b-41d4-a716-446655440000",
"clock": { "timestamp": 1705311000000, "counter": 0, "deviceId": "macbook-pro" }
},
"comment": {
"value": "Working on feature X",
"clock": { "timestamp": 1705315000000, "counter": 0, "deviceId": "iphone" }
},
"images": {
"elements": {
"123e4567_0.jpg": [
{ "clock": { "timestamp": 1705316000000, "counter": 0, "deviceId": "iphone" }, "isDeleted": false }
],
"123e4567_1.jpg": [
{ "clock": { "timestamp": 1705317000000, "counter": 0, "deviceId": "macbook-pro" }, "isDeleted": false },
{ "clock": { "timestamp": 1705318000000, "counter": 0, "deviceId": "iphone" }, "isDeleted": true }
]
}
},
"tombstone": {
"isDeleted": false,
"deletedAt": null
},
"lastModified": { "timestamp": 1705319100000, "counter": 0, "deviceId": "macbook-pro" }
} Merge Scenarios
Scenario 1: Concurrent Field Edits
Device A (clock: 100): Update comment = "Task A"
Device B (clock: 101): Update comment = "Task B"
Merge result: comment = "Task B" (clock 101 > 100) Scenario 2: Concurrent Image Add/Remove
Device A: Add "photo1.jpg" at clock 100
Device B: Add "photo2.jpg" at clock 101
Device A: Remove "photo2.jpg" at clock 102 (doesn't know about B's add yet)
After merge:
- "photo1.jpg": exists (added by A)
- "photo2.jpg": exists (B's add at 101 has different tag than A's remove at 102)
OR-Set semantics: Add wins over concurrent remove (add-wins) Scenario 3: Delete vs Edit Conflict
Device A (clock: 100): Delete record X
Device B (clock: 101): Edit record X.comment = "Updated"
After merge:
- tombstone.isDeleted = true (clock 100)
- comment.value = "Updated" (clock 101)
Policy decision: Check tombstone first. If deleted, ignore edits.
Or: "Edit wins" - undelete if edit is newer. Storage Size Comparison
| Field Type | Current | CRDT | Overhead |
|---|---|---|---|
String | ~20 bytes | ~70 bytes | 3.5x |
Date | ~24 bytes | ~74 bytes | 3x |
UUID? | ~36 bytes | ~86 bytes | 2.4x |
Bool | 1 byte | ~51 bytes | 51x |
[String] (5 items) | ~100 bytes | ~400 bytes | 4x |
Typical TimeRecord:
- Current: ~250 bytes
- CRDT: ~800 bytes
- Overhead: ~3x
For 1000 records: 250KB -> 800KB (still very manageable)
Migration Strategy
Phase 1: Read Both Formats
/// Reads either legacy or CRDT format
func loadRecord(at url: URL) async throws -> TimeRecord {
let data = try Data(contentsOf: url)
// Try CRDT format first
if let crdt = try? decoder.decode(TimeRecordCRDT.self, from: data) {
return crdt.asTimeRecord!
}
// Fall back to legacy format
let legacy = try decoder.decode(TimeRecord.self, from: data)
return legacy
} Phase 2: Write CRDT Format
/// Always write CRDT format
func saveRecord(_ record: TimeRecord, clock: HLC) async throws {
let crdt = TimeRecordCRDT(
id: record.id,
startTime: record.startTime,
endTime: record.endTime,
tagId: record.tagId,
comment: record.comment,
images: record.images,
clock: clock
)
let data = try encoder.encode(crdt)
try data.write(to: recordURL(for: record))
} Phase 3: Background Migration
/// Migrate all legacy files to CRDT format
func migrateToCRDT() async throws {
let clock = await deviceClock.current()
// Migrate tags
if let legacyTags = try? loadLegacyTags() {
for tag in legacyTags {
let crdt = TagCRDT(
id: tag.id,
name: tag.name,
color: tag.color,
createdAt: tag.createdAt,
isArchived: tag.isArchived,
clock: clock
)
try await saveCRDTTag(crdt)
}
try? FileManager.default.removeItem(at: legacyTagsURL)
}
// Migrate records (can be done lazily on read)
} Architecture Diagram
┌─────────────────────────────────────────────────────────────────────┐
│ Application Layer │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────────┐ │
│ │ AppState │ │ Views │ │ Tag/TimeRecord │ │
│ │ (unchanged)│<-->│ (unchanged)│<-->│ (plain structs) │ │
│ └──────┬──────┘ └─────────────┘ └─────────────────────────┘ │
│ │ ^ │
│ │ uses │ converts │
│ v │ │
│ ┌──────────────────────────────────────────────────┴─────────────┐│
│ │ CRDT Storage Service ││
│ │ ┌────────────┐ ┌────────────┐ ┌────────────┐ ││
│ │ │ DeviceClock│ │ CRDTStore │ │ SyncEngine │ ││
│ │ │ (HLC) │ │ │ │ │ ││
│ │ └─────┬──────┘ └─────┬──────┘ └─────┬──────┘ ││
│ │ │ │ │ ││
│ │ └───────────────┴───────────────┘ ││
│ └────────────────────────┬────────────────────────────────────────┘│
│ │ │
└───────────────────────────┼──────────────────────────────────────────┘
│ read/write
v
┌───────────────────────────────────────────────────────────────────┐
│ File System │
│ ┌─────────┐ ┌─────────────────┐ ┌─────────────────────────────┐│
│ │device/ │ │ tags/ │ │ records/ ││
│ │clock.json│ │ {id}.json (CRDT)│ │ YYYY/MM/{id}.json (CRDT) ││
│ └─────────┘ └─────────────────┘ └─────────────────────────────┘│
└───────────────────────────────────────────────────────────────────┘ Summary
| Aspect | Current | CRDT |
|---|---|---|
| Tag storage | Single tags.json | Individual tags/{id}.json |
| Record storage | {date}.json | {id}.json (by ID, not date) |
| Field types | Plain values | LWW-Register per field |
| Arrays | Plain [String] | OR-Set |
| Deletion | File deletion | Tombstone marker |
| Ordering | None | Hybrid Logical Clock |
| Merge | Manual/overwrite | Automatic (deterministic) |
| Storage size | 1x | ~3x |
| Complexity | Low | Medium |
The key insight: CRDT wraps each mutable field with metadata (clock + device) to enable automatic, deterministic merging. The UI layer sees the same Tag and TimeRecord types - the CRDT layer is an implementation detail of storage.
Related
- 502-multi-cloud-sync - Multi-cloud sync architecture
- 101-overview - System architecture
- 301-file-storage - File storage service
- 201-tag - Tag model
- 202-time-record - TimeRecord model