FossilRepo

Implement Fossil delta decoder for proper diff view - Add _apply_fossil_delta() that decodes Fossil's delta format (base-64 integer encoding, @copy and :insert commands) - Update get_file_content() to recursively resolve delta chains via the delta table (srcid references) - Fixes diff view showing garbled content for delta-compressed blobs - Now properly shows the actual source code difference, not raw deltas

lmata 2026-04-06 14:15 trunk
Commit 027a818377d662fdbc905a353ff79b9a57647299a636f8244c9d58753bac87f0
1 file changed +99 -5
+99 -5
--- fossil/reader.py
+++ fossil/reader.py
@@ -104,10 +104,85 @@
104104
# Julian day epoch is Jan 1, 4713 BC (proleptic Julian calendar)
105105
# Unix epoch in Julian days = 2440587.5
106106
unix_ts = (julian - 2440587.5) * 86400.0
107107
return datetime.fromtimestamp(unix_ts, tz=UTC)
108108
109
+
110
+def _apply_fossil_delta(source: bytes, delta: bytes) -> bytes:
111
+ """Apply a Fossil delta to a source blob to produce the output.
112
+
113
+ Fossil delta format: output_size\\n then commands:
114
+ - @offset,length: copy 'length' bytes from source starting at 'offset'
115
+ - :length:data: insert 'length' bytes of literal data
116
+ - length@ or length,offset: shorthand copy commands
117
+
118
+ The actual format uses a base-64-like encoding for integers.
119
+ See: https://fossil-scm.org/home/doc/trunk/www/delta_format.wiki
120
+ """
121
+ if not delta:
122
+ return source
123
+
124
+ pos = 0
125
+ out = bytearray()
126
+
127
+ def read_int():
128
+ nonlocal pos
129
+ val = 0
130
+ while pos < len(delta):
131
+ c = delta[pos : pos + 1]
132
+ if c in b"0123456789":
133
+ val = val * 64 + (c[0] - 48)
134
+ elif c in b"ABCDEFGHIJKLMNOPQRSTUVWXYZ":
135
+ val = val * 64 + (c[0] - 55)
136
+ elif c in b"abcdefghijklmnopqrstuvwxyz":
137
+ val = val * 64 + (c[0] - 87)
138
+ elif c == b".":
139
+ val = val * 64 + 62
140
+ elif c == b"/":
141
+ val = val * 64 + 63
142
+ else:
143
+ break
144
+ pos += 1
145
+ return val
146
+
147
+ # Read output size
148
+ output_size = read_int()
149
+ if pos < len(delta) and delta[pos : pos + 1] == b"\n":
150
+ pos += 1
151
+
152
+ while pos < len(delta):
153
+ count = read_int()
154
+ if pos >= len(delta):
155
+ break
156
+ cmd = delta[pos : pos + 1]
157
+ pos += 1
158
+
159
+ if cmd == b"@":
160
+ # Copy from source: count bytes starting at offset
161
+ offset = read_int()
162
+ if pos < len(delta) and delta[pos : pos + 1] == b",":
163
+ pos += 1
164
+ out.extend(source[offset : offset + count])
165
+ elif cmd == b",":
166
+ # Copy from source at offset=count, length follows
167
+ offset = count
168
+ length = read_int()
169
+ if pos < len(delta) and delta[pos : pos + 1] in (b"\n", b";"):
170
+ pos += 1
171
+ out.extend(source[offset : offset + length])
172
+ elif cmd == b":":
173
+ # Insert literal data
174
+ out.extend(delta[pos : pos + count])
175
+ pos += count
176
+ elif cmd == b";":
177
+ # End of delta with checksum
178
+ break
179
+ elif cmd == b"\n":
180
+ continue
181
+
182
+ return bytes(out[:output_size]) if output_size else bytes(out)
183
+
109184
110185
def _decompress_blob(data: bytes) -> bytes:
111186
"""Decompress a Fossil blob.
112187
113188
Fossil stores blobs with a 4-byte big-endian size prefix followed by
@@ -439,17 +514,36 @@
439514
]
440515
except sqlite3.OperationalError:
441516
return []
442517
443518
def get_file_content(self, blob_uuid: str) -> bytes:
519
+ """Get file content, resolving delta compression chains."""
444520
try:
445
- row = self.conn.execute("SELECT content FROM blob WHERE uuid=?", (blob_uuid,)).fetchone()
446
- if not row or not row[0]:
447
- return b""
448
- return _decompress_blob(row[0])
449
- except sqlite3.OperationalError:
521
+ return self._resolve_blob(blob_uuid)
522
+ except Exception:
523
+ return b""
524
+
525
+ def _resolve_blob(self, uuid_or_rid, by_rid=False) -> bytes:
526
+ """Resolve a blob, following delta chains if needed."""
527
+ if by_rid:
528
+ row = self.conn.execute("SELECT rid, content FROM blob WHERE rid=?", (uuid_or_rid,)).fetchone()
529
+ else:
530
+ row = self.conn.execute("SELECT rid, content FROM blob WHERE uuid=?", (uuid_or_rid,)).fetchone()
531
+ if not row or not row["content"]:
450532
return b""
533
+
534
+ rid = row["rid"]
535
+ data = _decompress_blob(row["content"])
536
+
537
+ # Check if this blob is delta-compressed
538
+ delta_row = self.conn.execute("SELECT srcid FROM delta WHERE rid=?", (rid,)).fetchone()
539
+ if delta_row:
540
+ # Recursively resolve the source blob
541
+ source = self._resolve_blob(delta_row["srcid"], by_rid=True)
542
+ return _apply_fossil_delta(source, data)
543
+
544
+ return data
451545
452546
# --- Tickets ---
453547
454548
def get_tickets(self, status: str | None = None, limit: int = 50) -> list[TicketEntry]:
455549
sql = "SELECT tkt_uuid, title, status, type, tkt_ctime, subsystem, priority FROM ticket"
456550
--- fossil/reader.py
+++ fossil/reader.py
@@ -104,10 +104,85 @@
104 # Julian day epoch is Jan 1, 4713 BC (proleptic Julian calendar)
105 # Unix epoch in Julian days = 2440587.5
106 unix_ts = (julian - 2440587.5) * 86400.0
107 return datetime.fromtimestamp(unix_ts, tz=UTC)
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
110 def _decompress_blob(data: bytes) -> bytes:
111 """Decompress a Fossil blob.
112
113 Fossil stores blobs with a 4-byte big-endian size prefix followed by
@@ -439,17 +514,36 @@
439 ]
440 except sqlite3.OperationalError:
441 return []
442
443 def get_file_content(self, blob_uuid: str) -> bytes:
 
444 try:
445 row = self.conn.execute("SELECT content FROM blob WHERE uuid=?", (blob_uuid,)).fetchone()
446 if not row or not row[0]:
447 return b""
448 return _decompress_blob(row[0])
449 except sqlite3.OperationalError:
 
 
 
 
 
 
450 return b""
 
 
 
 
 
 
 
 
 
 
 
 
451
452 # --- Tickets ---
453
454 def get_tickets(self, status: str | None = None, limit: int = 50) -> list[TicketEntry]:
455 sql = "SELECT tkt_uuid, title, status, type, tkt_ctime, subsystem, priority FROM ticket"
456
--- fossil/reader.py
+++ fossil/reader.py
@@ -104,10 +104,85 @@
104 # Julian day epoch is Jan 1, 4713 BC (proleptic Julian calendar)
105 # Unix epoch in Julian days = 2440587.5
106 unix_ts = (julian - 2440587.5) * 86400.0
107 return datetime.fromtimestamp(unix_ts, tz=UTC)
108
109
110 def _apply_fossil_delta(source: bytes, delta: bytes) -> bytes:
111 """Apply a Fossil delta to a source blob to produce the output.
112
113 Fossil delta format: output_size\\n then commands:
114 - @offset,length: copy 'length' bytes from source starting at 'offset'
115 - :length:data: insert 'length' bytes of literal data
116 - length@ or length,offset: shorthand copy commands
117
118 The actual format uses a base-64-like encoding for integers.
119 See: https://fossil-scm.org/home/doc/trunk/www/delta_format.wiki
120 """
121 if not delta:
122 return source
123
124 pos = 0
125 out = bytearray()
126
127 def read_int():
128 nonlocal pos
129 val = 0
130 while pos < len(delta):
131 c = delta[pos : pos + 1]
132 if c in b"0123456789":
133 val = val * 64 + (c[0] - 48)
134 elif c in b"ABCDEFGHIJKLMNOPQRSTUVWXYZ":
135 val = val * 64 + (c[0] - 55)
136 elif c in b"abcdefghijklmnopqrstuvwxyz":
137 val = val * 64 + (c[0] - 87)
138 elif c == b".":
139 val = val * 64 + 62
140 elif c == b"/":
141 val = val * 64 + 63
142 else:
143 break
144 pos += 1
145 return val
146
147 # Read output size
148 output_size = read_int()
149 if pos < len(delta) and delta[pos : pos + 1] == b"\n":
150 pos += 1
151
152 while pos < len(delta):
153 count = read_int()
154 if pos >= len(delta):
155 break
156 cmd = delta[pos : pos + 1]
157 pos += 1
158
159 if cmd == b"@":
160 # Copy from source: count bytes starting at offset
161 offset = read_int()
162 if pos < len(delta) and delta[pos : pos + 1] == b",":
163 pos += 1
164 out.extend(source[offset : offset + count])
165 elif cmd == b",":
166 # Copy from source at offset=count, length follows
167 offset = count
168 length = read_int()
169 if pos < len(delta) and delta[pos : pos + 1] in (b"\n", b";"):
170 pos += 1
171 out.extend(source[offset : offset + length])
172 elif cmd == b":":
173 # Insert literal data
174 out.extend(delta[pos : pos + count])
175 pos += count
176 elif cmd == b";":
177 # End of delta with checksum
178 break
179 elif cmd == b"\n":
180 continue
181
182 return bytes(out[:output_size]) if output_size else bytes(out)
183
184
185 def _decompress_blob(data: bytes) -> bytes:
186 """Decompress a Fossil blob.
187
188 Fossil stores blobs with a 4-byte big-endian size prefix followed by
@@ -439,17 +514,36 @@
514 ]
515 except sqlite3.OperationalError:
516 return []
517
518 def get_file_content(self, blob_uuid: str) -> bytes:
519 """Get file content, resolving delta compression chains."""
520 try:
521 return self._resolve_blob(blob_uuid)
522 except Exception:
523 return b""
524
525 def _resolve_blob(self, uuid_or_rid, by_rid=False) -> bytes:
526 """Resolve a blob, following delta chains if needed."""
527 if by_rid:
528 row = self.conn.execute("SELECT rid, content FROM blob WHERE rid=?", (uuid_or_rid,)).fetchone()
529 else:
530 row = self.conn.execute("SELECT rid, content FROM blob WHERE uuid=?", (uuid_or_rid,)).fetchone()
531 if not row or not row["content"]:
532 return b""
533
534 rid = row["rid"]
535 data = _decompress_blob(row["content"])
536
537 # Check if this blob is delta-compressed
538 delta_row = self.conn.execute("SELECT srcid FROM delta WHERE rid=?", (rid,)).fetchone()
539 if delta_row:
540 # Recursively resolve the source blob
541 source = self._resolve_blob(delta_row["srcid"], by_rid=True)
542 return _apply_fossil_delta(source, data)
543
544 return data
545
546 # --- Tickets ---
547
548 def get_tickets(self, status: str | None = None, limit: int = 50) -> list[TicketEntry]:
549 sql = "SELECT tkt_uuid, title, status, type, tkt_ctime, subsystem, priority FROM ticket"
550

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button