FossilRepo
Implement Fossil delta decoder for proper diff view - Add _apply_fossil_delta() that decodes Fossil's delta format (base-64 integer encoding, @copy and :insert commands) - Update get_file_content() to recursively resolve delta chains via the delta table (srcid references) - Fixes diff view showing garbled content for delta-compressed blobs - Now properly shows the actual source code difference, not raw deltas
Commit
027a818377d662fdbc905a353ff79b9a57647299a636f8244c9d58753bac87f0
Parent
0e0b43fd9b312cf…
1 file changed
+99
-5
+99
-5
| --- fossil/reader.py | ||
| +++ fossil/reader.py | ||
| @@ -104,10 +104,85 @@ | ||
| 104 | 104 | # Julian day epoch is Jan 1, 4713 BC (proleptic Julian calendar) |
| 105 | 105 | # Unix epoch in Julian days = 2440587.5 |
| 106 | 106 | unix_ts = (julian - 2440587.5) * 86400.0 |
| 107 | 107 | return datetime.fromtimestamp(unix_ts, tz=UTC) |
| 108 | 108 | |
| 109 | + | |
| 110 | +def _apply_fossil_delta(source: bytes, delta: bytes) -> bytes: | |
| 111 | + """Apply a Fossil delta to a source blob to produce the output. | |
| 112 | + | |
| 113 | + Fossil delta format: output_size\\n then commands: | |
| 114 | + - @offset,length: copy 'length' bytes from source starting at 'offset' | |
| 115 | + - :length:data: insert 'length' bytes of literal data | |
| 116 | + - length@ or length,offset: shorthand copy commands | |
| 117 | + | |
| 118 | + The actual format uses a base-64-like encoding for integers. | |
| 119 | + See: https://fossil-scm.org/home/doc/trunk/www/delta_format.wiki | |
| 120 | + """ | |
| 121 | + if not delta: | |
| 122 | + return source | |
| 123 | + | |
| 124 | + pos = 0 | |
| 125 | + out = bytearray() | |
| 126 | + | |
| 127 | + def read_int(): | |
| 128 | + nonlocal pos | |
| 129 | + val = 0 | |
| 130 | + while pos < len(delta): | |
| 131 | + c = delta[pos : pos + 1] | |
| 132 | + if c in b"0123456789": | |
| 133 | + val = val * 64 + (c[0] - 48) | |
| 134 | + elif c in b"ABCDEFGHIJKLMNOPQRSTUVWXYZ": | |
| 135 | + val = val * 64 + (c[0] - 55) | |
| 136 | + elif c in b"abcdefghijklmnopqrstuvwxyz": | |
| 137 | + val = val * 64 + (c[0] - 87) | |
| 138 | + elif c == b".": | |
| 139 | + val = val * 64 + 62 | |
| 140 | + elif c == b"/": | |
| 141 | + val = val * 64 + 63 | |
| 142 | + else: | |
| 143 | + break | |
| 144 | + pos += 1 | |
| 145 | + return val | |
| 146 | + | |
| 147 | + # Read output size | |
| 148 | + output_size = read_int() | |
| 149 | + if pos < len(delta) and delta[pos : pos + 1] == b"\n": | |
| 150 | + pos += 1 | |
| 151 | + | |
| 152 | + while pos < len(delta): | |
| 153 | + count = read_int() | |
| 154 | + if pos >= len(delta): | |
| 155 | + break | |
| 156 | + cmd = delta[pos : pos + 1] | |
| 157 | + pos += 1 | |
| 158 | + | |
| 159 | + if cmd == b"@": | |
| 160 | + # Copy from source: count bytes starting at offset | |
| 161 | + offset = read_int() | |
| 162 | + if pos < len(delta) and delta[pos : pos + 1] == b",": | |
| 163 | + pos += 1 | |
| 164 | + out.extend(source[offset : offset + count]) | |
| 165 | + elif cmd == b",": | |
| 166 | + # Copy from source at offset=count, length follows | |
| 167 | + offset = count | |
| 168 | + length = read_int() | |
| 169 | + if pos < len(delta) and delta[pos : pos + 1] in (b"\n", b";"): | |
| 170 | + pos += 1 | |
| 171 | + out.extend(source[offset : offset + length]) | |
| 172 | + elif cmd == b":": | |
| 173 | + # Insert literal data | |
| 174 | + out.extend(delta[pos : pos + count]) | |
| 175 | + pos += count | |
| 176 | + elif cmd == b";": | |
| 177 | + # End of delta with checksum | |
| 178 | + break | |
| 179 | + elif cmd == b"\n": | |
| 180 | + continue | |
| 181 | + | |
| 182 | + return bytes(out[:output_size]) if output_size else bytes(out) | |
| 183 | + | |
| 109 | 184 | |
| 110 | 185 | def _decompress_blob(data: bytes) -> bytes: |
| 111 | 186 | """Decompress a Fossil blob. |
| 112 | 187 | |
| 113 | 188 | Fossil stores blobs with a 4-byte big-endian size prefix followed by |
| @@ -439,17 +514,36 @@ | ||
| 439 | 514 | ] |
| 440 | 515 | except sqlite3.OperationalError: |
| 441 | 516 | return [] |
| 442 | 517 | |
| 443 | 518 | def get_file_content(self, blob_uuid: str) -> bytes: |
| 519 | + """Get file content, resolving delta compression chains.""" | |
| 444 | 520 | try: |
| 445 | - row = self.conn.execute("SELECT content FROM blob WHERE uuid=?", (blob_uuid,)).fetchone() | |
| 446 | - if not row or not row[0]: | |
| 447 | - return b"" | |
| 448 | - return _decompress_blob(row[0]) | |
| 449 | - except sqlite3.OperationalError: | |
| 521 | + return self._resolve_blob(blob_uuid) | |
| 522 | + except Exception: | |
| 523 | + return b"" | |
| 524 | + | |
| 525 | + def _resolve_blob(self, uuid_or_rid, by_rid=False) -> bytes: | |
| 526 | + """Resolve a blob, following delta chains if needed.""" | |
| 527 | + if by_rid: | |
| 528 | + row = self.conn.execute("SELECT rid, content FROM blob WHERE rid=?", (uuid_or_rid,)).fetchone() | |
| 529 | + else: | |
| 530 | + row = self.conn.execute("SELECT rid, content FROM blob WHERE uuid=?", (uuid_or_rid,)).fetchone() | |
| 531 | + if not row or not row["content"]: | |
| 450 | 532 | return b"" |
| 533 | + | |
| 534 | + rid = row["rid"] | |
| 535 | + data = _decompress_blob(row["content"]) | |
| 536 | + | |
| 537 | + # Check if this blob is delta-compressed | |
| 538 | + delta_row = self.conn.execute("SELECT srcid FROM delta WHERE rid=?", (rid,)).fetchone() | |
| 539 | + if delta_row: | |
| 540 | + # Recursively resolve the source blob | |
| 541 | + source = self._resolve_blob(delta_row["srcid"], by_rid=True) | |
| 542 | + return _apply_fossil_delta(source, data) | |
| 543 | + | |
| 544 | + return data | |
| 451 | 545 | |
| 452 | 546 | # --- Tickets --- |
| 453 | 547 | |
| 454 | 548 | def get_tickets(self, status: str | None = None, limit: int = 50) -> list[TicketEntry]: |
| 455 | 549 | sql = "SELECT tkt_uuid, title, status, type, tkt_ctime, subsystem, priority FROM ticket" |
| 456 | 550 |
| --- fossil/reader.py | |
| +++ fossil/reader.py | |
| @@ -104,10 +104,85 @@ | |
| 104 | # Julian day epoch is Jan 1, 4713 BC (proleptic Julian calendar) |
| 105 | # Unix epoch in Julian days = 2440587.5 |
| 106 | unix_ts = (julian - 2440587.5) * 86400.0 |
| 107 | return datetime.fromtimestamp(unix_ts, tz=UTC) |
| 108 | |
| 109 | |
| 110 | def _decompress_blob(data: bytes) -> bytes: |
| 111 | """Decompress a Fossil blob. |
| 112 | |
| 113 | Fossil stores blobs with a 4-byte big-endian size prefix followed by |
| @@ -439,17 +514,36 @@ | |
| 439 | ] |
| 440 | except sqlite3.OperationalError: |
| 441 | return [] |
| 442 | |
| 443 | def get_file_content(self, blob_uuid: str) -> bytes: |
| 444 | try: |
| 445 | row = self.conn.execute("SELECT content FROM blob WHERE uuid=?", (blob_uuid,)).fetchone() |
| 446 | if not row or not row[0]: |
| 447 | return b"" |
| 448 | return _decompress_blob(row[0]) |
| 449 | except sqlite3.OperationalError: |
| 450 | return b"" |
| 451 | |
| 452 | # --- Tickets --- |
| 453 | |
| 454 | def get_tickets(self, status: str | None = None, limit: int = 50) -> list[TicketEntry]: |
| 455 | sql = "SELECT tkt_uuid, title, status, type, tkt_ctime, subsystem, priority FROM ticket" |
| 456 |
| --- fossil/reader.py | |
| +++ fossil/reader.py | |
| @@ -104,10 +104,85 @@ | |
| 104 | # Julian day epoch is Jan 1, 4713 BC (proleptic Julian calendar) |
| 105 | # Unix epoch in Julian days = 2440587.5 |
| 106 | unix_ts = (julian - 2440587.5) * 86400.0 |
| 107 | return datetime.fromtimestamp(unix_ts, tz=UTC) |
| 108 | |
| 109 | |
| 110 | def _apply_fossil_delta(source: bytes, delta: bytes) -> bytes: |
| 111 | """Apply a Fossil delta to a source blob to produce the output. |
| 112 | |
| 113 | Fossil delta format: output_size\\n then commands: |
| 114 | - @offset,length: copy 'length' bytes from source starting at 'offset' |
| 115 | - :length:data: insert 'length' bytes of literal data |
| 116 | - length@ or length,offset: shorthand copy commands |
| 117 | |
| 118 | The actual format uses a base-64-like encoding for integers. |
| 119 | See: https://fossil-scm.org/home/doc/trunk/www/delta_format.wiki |
| 120 | """ |
| 121 | if not delta: |
| 122 | return source |
| 123 | |
| 124 | pos = 0 |
| 125 | out = bytearray() |
| 126 | |
| 127 | def read_int(): |
| 128 | nonlocal pos |
| 129 | val = 0 |
| 130 | while pos < len(delta): |
| 131 | c = delta[pos : pos + 1] |
| 132 | if c in b"0123456789": |
| 133 | val = val * 64 + (c[0] - 48) |
| 134 | elif c in b"ABCDEFGHIJKLMNOPQRSTUVWXYZ": |
| 135 | val = val * 64 + (c[0] - 55) |
| 136 | elif c in b"abcdefghijklmnopqrstuvwxyz": |
| 137 | val = val * 64 + (c[0] - 87) |
| 138 | elif c == b".": |
| 139 | val = val * 64 + 62 |
| 140 | elif c == b"/": |
| 141 | val = val * 64 + 63 |
| 142 | else: |
| 143 | break |
| 144 | pos += 1 |
| 145 | return val |
| 146 | |
| 147 | # Read output size |
| 148 | output_size = read_int() |
| 149 | if pos < len(delta) and delta[pos : pos + 1] == b"\n": |
| 150 | pos += 1 |
| 151 | |
| 152 | while pos < len(delta): |
| 153 | count = read_int() |
| 154 | if pos >= len(delta): |
| 155 | break |
| 156 | cmd = delta[pos : pos + 1] |
| 157 | pos += 1 |
| 158 | |
| 159 | if cmd == b"@": |
| 160 | # Copy from source: count bytes starting at offset |
| 161 | offset = read_int() |
| 162 | if pos < len(delta) and delta[pos : pos + 1] == b",": |
| 163 | pos += 1 |
| 164 | out.extend(source[offset : offset + count]) |
| 165 | elif cmd == b",": |
| 166 | # Copy from source at offset=count, length follows |
| 167 | offset = count |
| 168 | length = read_int() |
| 169 | if pos < len(delta) and delta[pos : pos + 1] in (b"\n", b";"): |
| 170 | pos += 1 |
| 171 | out.extend(source[offset : offset + length]) |
| 172 | elif cmd == b":": |
| 173 | # Insert literal data |
| 174 | out.extend(delta[pos : pos + count]) |
| 175 | pos += count |
| 176 | elif cmd == b";": |
| 177 | # End of delta with checksum |
| 178 | break |
| 179 | elif cmd == b"\n": |
| 180 | continue |
| 181 | |
| 182 | return bytes(out[:output_size]) if output_size else bytes(out) |
| 183 | |
| 184 | |
| 185 | def _decompress_blob(data: bytes) -> bytes: |
| 186 | """Decompress a Fossil blob. |
| 187 | |
| 188 | Fossil stores blobs with a 4-byte big-endian size prefix followed by |
| @@ -439,17 +514,36 @@ | |
| 514 | ] |
| 515 | except sqlite3.OperationalError: |
| 516 | return [] |
| 517 | |
| 518 | def get_file_content(self, blob_uuid: str) -> bytes: |
| 519 | """Get file content, resolving delta compression chains.""" |
| 520 | try: |
| 521 | return self._resolve_blob(blob_uuid) |
| 522 | except Exception: |
| 523 | return b"" |
| 524 | |
| 525 | def _resolve_blob(self, uuid_or_rid, by_rid=False) -> bytes: |
| 526 | """Resolve a blob, following delta chains if needed.""" |
| 527 | if by_rid: |
| 528 | row = self.conn.execute("SELECT rid, content FROM blob WHERE rid=?", (uuid_or_rid,)).fetchone() |
| 529 | else: |
| 530 | row = self.conn.execute("SELECT rid, content FROM blob WHERE uuid=?", (uuid_or_rid,)).fetchone() |
| 531 | if not row or not row["content"]: |
| 532 | return b"" |
| 533 | |
| 534 | rid = row["rid"] |
| 535 | data = _decompress_blob(row["content"]) |
| 536 | |
| 537 | # Check if this blob is delta-compressed |
| 538 | delta_row = self.conn.execute("SELECT srcid FROM delta WHERE rid=?", (rid,)).fetchone() |
| 539 | if delta_row: |
| 540 | # Recursively resolve the source blob |
| 541 | source = self._resolve_blob(delta_row["srcid"], by_rid=True) |
| 542 | return _apply_fossil_delta(source, data) |
| 543 | |
| 544 | return data |
| 545 | |
| 546 | # --- Tickets --- |
| 547 | |
| 548 | def get_tickets(self, status: str | None = None, limit: int = 50) -> list[TicketEntry]: |
| 549 | sql = "SELECT tkt_uuid, title, status, type, tkt_ctime, subsystem, priority FROM ticket" |
| 550 |