CRDT Data Structures

Date: 2025-01-05

Superseded: This document describes a custom CRDT implementation approach. After evaluation, we decided to use Automerge instead, which provides battle-tested CRDT primitives. See 504-automerge-migration for the comparison and 505-automerge-migration-plan for the implementation plan.

This document details how Minuta’s data structures would transform to support CRDT (Conflict-free Replicated Data Types) for automatic multi-device sync with deterministic conflict resolution.

Current Data Structures

Tag

public struct Tag: Identifiable, Codable, Hashable, Sendable {
    public let id: UUID
    public var name: String
    public var color: String  // Hex color "#RRGGBB"
    public let createdAt: Date
    public var isArchived: Bool
}

TimeRecord

public struct TimeRecord: Identifiable, Codable, Hashable, Sendable {
    public let id: UUID
    public var startTime: Date
    public var endTime: Date?      // nil = running timer
    public var tagId: UUID?        // nil = untagged
    public var comment: String?
    public var images: [String]    // filenames of attached images
}

Current Storage Format

~/Documents/Minuta/
├── tags.json                           # All tags in one file
└── records/
    └── 2024/
        └── 01/
            └── 2024-01-15_093000.json  # One file per record

tags.json:

{
  "tags": [
    {
      "id": "550e8400-e29b-41d4-a716-446655440000",
      "name": "Work",
      "color": "#4A90D9",
      "createdAt": "2024-01-01T00:00:00Z",
      "isArchived": false
    }
  ]
}

record.json:

{
  "id": "123e4567-e89b-12d3-a456-426614174000",
  "startTime": "2024-01-15T09:30:00Z",
  "endTime": "2024-01-15T11:45:00Z",
  "tagId": "550e8400-e29b-41d4-a716-446655440000",
  "comment": "Working on feature X",
  "images": ["123e4567_0.jpg", "123e4567_1.jpg"]
}

CRDT Primitive Types

Hybrid Logical Clock (HLC)

Combines wall clock with logical counter to handle clock skew across devices:

/// Hybrid Logical Clock for ordering events across devices
public struct HLC: Codable, Comparable, Hashable, Sendable {
    public var timestamp: UInt64   // Milliseconds since epoch
    public var counter: UInt32     // Logical counter for same-ms events
    public var deviceId: String    // Unique device identifier

    public init(timestamp: UInt64 = 0, counter: UInt32 = 0, deviceId: String) {
        self.timestamp = timestamp
        self.counter = counter
        self.deviceId = deviceId
    }

    /// Create a new HLC for current time
    public static func now(deviceId: String) -> HLC {
        HLC(
            timestamp: UInt64(Date().timeIntervalSince1970 * 1000),
            counter: 0,
            deviceId: deviceId
        )
    }

    /// Tick the clock for a local event
    public mutating func tick() {
        let now = UInt64(Date().timeIntervalSince1970 * 1000)
        if now > timestamp {
            timestamp = now
            counter = 0
        } else {
            counter += 1
        }
    }

    /// Update clock after receiving remote event
    public mutating func receive(_ remote: HLC) {
        let now = UInt64(Date().timeIntervalSince1970 * 1000)
        let maxTs = max(now, max(timestamp, remote.timestamp))

        if maxTs == timestamp && maxTs == remote.timestamp {
            counter = max(counter, remote.counter) + 1
        } else if maxTs == timestamp {
            counter += 1
        } else if maxTs == remote.timestamp {
            counter = remote.counter + 1
        } else {
            counter = 0
        }
        timestamp = maxTs
    }

    public static func < (lhs: HLC, rhs: HLC) -> Bool {
        if lhs.timestamp != rhs.timestamp {
            return lhs.timestamp < rhs.timestamp
        }
        if lhs.counter != rhs.counter {
            return lhs.counter < rhs.counter
        }
        return lhs.deviceId < rhs.deviceId  // Deterministic tiebreaker
    }
}

LWW-Register (Last-Writer-Wins)

A register where the value with the highest clock wins:

/// A register where the value with the highest clock wins
public struct LWWRegister<T: Codable & Sendable>: Codable, Sendable {
    public var value: T
    public var clock: HLC

    public init(value: T, clock: HLC) {
        self.value = value
        self.clock = clock
    }

    /// Merge with another register - highest clock wins
    public func merged(with other: LWWRegister<T>) -> LWWRegister<T> {
        other.clock > self.clock ? other : self
    }

    /// Update value with new clock
    public mutating func set(_ newValue: T, clock: HLC) {
        if clock > self.clock {
            self.value = newValue
            self.clock = clock
        }
    }
}

extension LWWRegister: Equatable where T: Equatable {
    public static func == (lhs: LWWRegister<T>, rhs: LWWRegister<T>) -> Bool {
        lhs.value == rhs.value && lhs.clock == rhs.clock
    }
}

OR-Set (Observed-Remove Set)

A set where adds and removes can happen concurrently:

/// A set where adds and removes can happen concurrently
/// Each element has a unique tag; remove only affects known tags
public struct ORSet<T: Codable & Hashable & Sendable>: Codable, Sendable {
    /// Each element is stored with unique add-tags
    public var elements: [T: Set<ElementTag>]

    public struct ElementTag: Codable, Hashable, Sendable {
        let clock: HLC
        var isDeleted: Bool
    }

    public init() {
        self.elements = [:]
    }

    /// Current values (excluding deleted)
    public var values: Set<T> {
        Set(elements.compactMap { element, tags in
            tags.contains { !$0.isDeleted } ? element : nil
        })
    }

    /// Add an element
    public mutating func add(_ element: T, clock: HLC) {
        let tag = ElementTag(clock: clock, isDeleted: false)
        if elements[element] != nil {
            elements[element]?.insert(tag)
        } else {
            elements[element] = [tag]
        }
    }

    /// Remove an element (marks all current tags as deleted)
    public mutating func remove(_ element: T, clock: HLC) {
        guard var tags = elements[element] else { return }
        tags = Set(tags.map { tag in
            var newTag = tag
            if clock > tag.clock {
                newTag.isDeleted = true
            }
            return newTag
        })
        elements[element] = tags
    }

    /// Merge with another OR-Set
    public func merged(with other: ORSet<T>) -> ORSet<T> {
        var result = ORSet<T>()

        let allKeys = Set(elements.keys).union(other.elements.keys)
        for key in allKeys {
            let selfTags = elements[key] ?? []
            let otherTags = other.elements[key] ?? []
            result.elements[key] = selfTags.union(otherTags)
        }

        return result
    }
}

Tombstone

Tracks whether an entity has been deleted:

/// Tracks whether an entity has been deleted
public struct Tombstone: Codable, Sendable {
    public var isDeleted: Bool
    public var deletedAt: HLC?

    public init(isDeleted: Bool = false, deletedAt: HLC? = nil) {
        self.isDeleted = isDeleted
        self.deletedAt = deletedAt
    }

    public mutating func markDeleted(clock: HLC) {
        if !isDeleted || (deletedAt != nil && clock > deletedAt!) {
            isDeleted = true
            deletedAt = clock
        }
    }

    public func merged(with other: Tombstone) -> Tombstone {
        if isDeleted && other.isDeleted {
            if let selfClock = deletedAt, let otherClock = other.deletedAt {
                return selfClock > otherClock ? self : other
            }
        }
        if isDeleted { return self }
        if other.isDeleted { return other }
        return Tombstone()
    }
}

CRDT-Enabled Models

TagCRDT

/// CRDT-enabled Tag
public struct TagCRDT: Identifiable, Codable, Sendable {
    // Identity (immutable)
    public let id: UUID

    // CRDT fields
    public var name: LWWRegister<String>
    public var color: LWWRegister<String>
    public var createdAt: LWWRegister<Date>
    public var isArchived: LWWRegister<Bool>

    // Deletion tracking
    public var tombstone: Tombstone

    // Metadata
    public var lastModified: HLC

    public init(
        id: UUID = UUID(),
        name: String,
        color: String,
        createdAt: Date = Date(),
        isArchived: Bool = false,
        clock: HLC
    ) {
        self.id = id
        self.name = LWWRegister(value: name, clock: clock)
        self.color = LWWRegister(value: color, clock: clock)
        self.createdAt = LWWRegister(value: createdAt, clock: clock)
        self.isArchived = LWWRegister(value: isArchived, clock: clock)
        self.tombstone = Tombstone()
        self.lastModified = clock
    }

    /// Merge with another version of the same tag
    public func merged(with other: TagCRDT) -> TagCRDT {
        precondition(id == other.id, "Cannot merge tags with different IDs")

        var result = TagCRDT(
            id: id,
            name: "",
            color: "",
            clock: HLC(deviceId: "")
        )

        result.name = name.merged(with: other.name)
        result.color = color.merged(with: other.color)
        result.createdAt = createdAt.merged(with: other.createdAt)
        result.isArchived = isArchived.merged(with: other.isArchived)
        result.tombstone = tombstone.merged(with: other.tombstone)
        result.lastModified = max(lastModified, other.lastModified)

        return result
    }

    /// Convert to plain Tag for UI consumption
    public var asTag: Tag? {
        guard !tombstone.isDeleted else { return nil }
        return Tag(
            id: id,
            name: name.value,
            color: color.value,
            createdAt: createdAt.value,
            isArchived: isArchived.value
        )
    }
}

TimeRecordCRDT

/// CRDT-enabled TimeRecord
public struct TimeRecordCRDT: Identifiable, Codable, Sendable {
    // Identity (immutable)
    public let id: UUID

    // CRDT fields - scalar values use LWW
    public var startTime: LWWRegister<Date>
    public var endTime: LWWRegister<Date?>
    public var tagId: LWWRegister<UUID?>
    public var comment: LWWRegister<String?>

    // Images use OR-Set (can add/remove concurrently)
    public var images: ORSet<String>

    // Deletion tracking
    public var tombstone: Tombstone

    // Metadata
    public var lastModified: HLC

    public init(
        id: UUID = UUID(),
        startTime: Date = Date(),
        endTime: Date? = nil,
        tagId: UUID? = nil,
        comment: String? = nil,
        images: [String] = [],
        clock: HLC
    ) {
        self.id = id
        self.startTime = LWWRegister(value: startTime, clock: clock)
        self.endTime = LWWRegister(value: endTime, clock: clock)
        self.tagId = LWWRegister(value: tagId, clock: clock)
        self.comment = LWWRegister(value: comment, clock: clock)

        self.images = ORSet<String>()
        for image in images {
            self.images.add(image, clock: clock)
        }

        self.tombstone = Tombstone()
        self.lastModified = clock
    }

    /// Merge with another version of the same record
    public func merged(with other: TimeRecordCRDT) -> TimeRecordCRDT {
        precondition(id == other.id, "Cannot merge records with different IDs")

        var result = TimeRecordCRDT(
            id: id,
            startTime: Date(),
            clock: HLC(deviceId: "")
        )

        result.startTime = startTime.merged(with: other.startTime)
        result.endTime = endTime.merged(with: other.endTime)
        result.tagId = tagId.merged(with: other.tagId)
        result.comment = comment.merged(with: other.comment)
        result.images = images.merged(with: other.images)
        result.tombstone = tombstone.merged(with: other.tombstone)
        result.lastModified = max(lastModified, other.lastModified)

        return result
    }

    /// Convert to plain TimeRecord for UI consumption
    public var asTimeRecord: TimeRecord? {
        guard !tombstone.isDeleted else { return nil }
        return TimeRecord(
            id: id,
            startTime: startTime.value,
            endTime: endTime.value,
            tagId: tagId.value,
            comment: comment.value,
            images: Array(images.values)
        )
    }
}

CRDT Storage Format

New File Structure

~/Documents/Minuta/
├── device.json                         # Device identity
├── clock.json                          # Current HLC state
├── tags/
│   └── {tag-id}.json                   # One file per tag
├── records/
│   └── 2024/
│       └── 01/
│           └── {record-id}.json        # CRDT content
└── sync/
    ├── pending/                        # Events waiting to sync
    │   └── {event-id}.json
    └── state/                          # Per-provider sync state
        └── {provider-id}.json

device.json

{
  "deviceId": "macbook-pro-a1b2c3",
  "deviceName": "MacBook Pro",
  "createdAt": "2024-01-01T00:00:00Z"
}

clock.json

{
  "timestamp": 1704067200000,
  "counter": 42,
  "deviceId": "macbook-pro-a1b2c3"
}

Tag File (CRDT)

Before (tags.json - all tags in one file):

{
  "tags": [
    { "id": "...", "name": "Work", "color": "#4A90D9", ... }
  ]
}

After (tags/{id}.json - one file per tag):

{
  "id": "550e8400-e29b-41d4-a716-446655440000",
  "name": {
    "value": "Work",
    "clock": { "timestamp": 1704067200000, "counter": 0, "deviceId": "macbook-pro" }
  },
  "color": {
    "value": "#4A90D9",
    "clock": { "timestamp": 1704067200000, "counter": 0, "deviceId": "macbook-pro" }
  },
  "createdAt": {
    "value": "2024-01-01T00:00:00Z",
    "clock": { "timestamp": 1704067200000, "counter": 0, "deviceId": "macbook-pro" }
  },
  "isArchived": {
    "value": false,
    "clock": { "timestamp": 1704067200000, "counter": 0, "deviceId": "macbook-pro" }
  },
  "tombstone": {
    "isDeleted": false,
    "deletedAt": null
  },
  "lastModified": { "timestamp": 1704067200000, "counter": 0, "deviceId": "macbook-pro" }
}

Record File (CRDT)

Before:

{
  "id": "123e4567-e89b-12d3-a456-426614174000",
  "startTime": "2024-01-15T09:30:00Z",
  "endTime": "2024-01-15T11:45:00Z",
  "tagId": "550e8400-e29b-41d4-a716-446655440000",
  "comment": "Working on feature X",
  "images": ["123e4567_0.jpg"]
}

After:

{
  "id": "123e4567-e89b-12d3-a456-426614174000",
  "startTime": {
    "value": "2024-01-15T09:30:00Z",
    "clock": { "timestamp": 1705311000000, "counter": 0, "deviceId": "macbook-pro" }
  },
  "endTime": {
    "value": "2024-01-15T11:45:00Z",
    "clock": { "timestamp": 1705319100000, "counter": 0, "deviceId": "macbook-pro" }
  },
  "tagId": {
    "value": "550e8400-e29b-41d4-a716-446655440000",
    "clock": { "timestamp": 1705311000000, "counter": 0, "deviceId": "macbook-pro" }
  },
  "comment": {
    "value": "Working on feature X",
    "clock": { "timestamp": 1705315000000, "counter": 0, "deviceId": "iphone" }
  },
  "images": {
    "elements": {
      "123e4567_0.jpg": [
        { "clock": { "timestamp": 1705316000000, "counter": 0, "deviceId": "iphone" }, "isDeleted": false }
      ],
      "123e4567_1.jpg": [
        { "clock": { "timestamp": 1705317000000, "counter": 0, "deviceId": "macbook-pro" }, "isDeleted": false },
        { "clock": { "timestamp": 1705318000000, "counter": 0, "deviceId": "iphone" }, "isDeleted": true }
      ]
    }
  },
  "tombstone": {
    "isDeleted": false,
    "deletedAt": null
  },
  "lastModified": { "timestamp": 1705319100000, "counter": 0, "deviceId": "macbook-pro" }
}

Merge Scenarios

Scenario 1: Concurrent Field Edits

Device A (clock: 100): Update comment = "Task A"
Device B (clock: 101): Update comment = "Task B"

Merge result: comment = "Task B" (clock 101 > 100)

Scenario 2: Concurrent Image Add/Remove

Device A: Add "photo1.jpg" at clock 100
Device B: Add "photo2.jpg" at clock 101
Device A: Remove "photo2.jpg" at clock 102 (doesn't know about B's add yet)

After merge:
- "photo1.jpg": exists (added by A)
- "photo2.jpg": exists (B's add at 101 has different tag than A's remove at 102)

OR-Set semantics: Add wins over concurrent remove (add-wins)

Scenario 3: Delete vs Edit Conflict

Device A (clock: 100): Delete record X
Device B (clock: 101): Edit record X.comment = "Updated"

After merge:
- tombstone.isDeleted = true (clock 100)
- comment.value = "Updated" (clock 101)

Policy decision: Check tombstone first. If deleted, ignore edits.
Or: "Edit wins" - undelete if edit is newer.

Storage Size Comparison

Field TypeCurrentCRDTOverhead
String~20 bytes~70 bytes3.5x
Date~24 bytes~74 bytes3x
UUID?~36 bytes~86 bytes2.4x
Bool1 byte~51 bytes51x
[String] (5 items)~100 bytes~400 bytes4x

Typical TimeRecord:

  • Current: ~250 bytes
  • CRDT: ~800 bytes
  • Overhead: ~3x

For 1000 records: 250KB -> 800KB (still very manageable)


Migration Strategy

Phase 1: Read Both Formats

/// Reads either legacy or CRDT format
func loadRecord(at url: URL) async throws -> TimeRecord {
    let data = try Data(contentsOf: url)

    // Try CRDT format first
    if let crdt = try? decoder.decode(TimeRecordCRDT.self, from: data) {
        return crdt.asTimeRecord!
    }

    // Fall back to legacy format
    let legacy = try decoder.decode(TimeRecord.self, from: data)
    return legacy
}

Phase 2: Write CRDT Format

/// Always write CRDT format
func saveRecord(_ record: TimeRecord, clock: HLC) async throws {
    let crdt = TimeRecordCRDT(
        id: record.id,
        startTime: record.startTime,
        endTime: record.endTime,
        tagId: record.tagId,
        comment: record.comment,
        images: record.images,
        clock: clock
    )

    let data = try encoder.encode(crdt)
    try data.write(to: recordURL(for: record))
}

Phase 3: Background Migration

/// Migrate all legacy files to CRDT format
func migrateToCRDT() async throws {
    let clock = await deviceClock.current()

    // Migrate tags
    if let legacyTags = try? loadLegacyTags() {
        for tag in legacyTags {
            let crdt = TagCRDT(
                id: tag.id,
                name: tag.name,
                color: tag.color,
                createdAt: tag.createdAt,
                isArchived: tag.isArchived,
                clock: clock
            )
            try await saveCRDTTag(crdt)
        }
        try? FileManager.default.removeItem(at: legacyTagsURL)
    }

    // Migrate records (can be done lazily on read)
}

Architecture Diagram

┌─────────────────────────────────────────────────────────────────────┐
│                        Application Layer                             │
│  ┌─────────────┐    ┌─────────────┐    ┌─────────────────────────┐ │
│  │   AppState  │    │    Views    │    │    Tag/TimeRecord       │ │
│  │  (unchanged)│<-->│  (unchanged)│<-->│    (plain structs)      │ │
│  └──────┬──────┘    └─────────────┘    └─────────────────────────┘ │
│         │                                           ^               │
│         │ uses                                      │ converts      │
│         v                                           │               │
│  ┌──────────────────────────────────────────────────┴─────────────┐│
│  │                    CRDT Storage Service                         ││
│  │  ┌────────────┐  ┌────────────┐  ┌────────────┐                ││
│  │  │ DeviceClock│  │ CRDTStore  │  │ SyncEngine │                ││
│  │  │   (HLC)    │  │            │  │            │                ││
│  │  └─────┬──────┘  └─────┬──────┘  └─────┬──────┘                ││
│  │        │               │               │                        ││
│  │        └───────────────┴───────────────┘                        ││
│  └────────────────────────┬────────────────────────────────────────┘│
│                           │                                          │
└───────────────────────────┼──────────────────────────────────────────┘
                            │ read/write
                            v
┌───────────────────────────────────────────────────────────────────┐
│                       File System                                  │
│  ┌─────────┐  ┌─────────────────┐  ┌─────────────────────────────┐│
│  │device/  │  │     tags/       │  │         records/            ││
│  │clock.json│ │ {id}.json (CRDT)│  │ YYYY/MM/{id}.json (CRDT)    ││
│  └─────────┘  └─────────────────┘  └─────────────────────────────┘│
└───────────────────────────────────────────────────────────────────┘

Summary

AspectCurrentCRDT
Tag storageSingle tags.jsonIndividual tags/{id}.json
Record storage{date}.json{id}.json (by ID, not date)
Field typesPlain valuesLWW-Register per field
ArraysPlain [String]OR-Set
DeletionFile deletionTombstone marker
OrderingNoneHybrid Logical Clock
MergeManual/overwriteAutomatic (deterministic)
Storage size1x~3x
ComplexityLowMedium

The key insight: CRDT wraps each mutable field with metadata (clock + device) to enable automatic, deterministic merging. The UI layer sees the same Tag and TimeRecord types - the CRDT layer is an implementation detail of storage.

Related