Fossil SCM

fossil-scm / compat / zlib / contrib / minizip / skipset.h
Source Blame History 366 lines
6ea30fb… florian 1 /* skipset.h -- set operations using a skiplist
6ea30fb… florian 2 // Copyright (C) 2024-2026 Mark Adler
6ea30fb… florian 3 // See MiniZip_info.txt for the license.
6ea30fb… florian 4
6ea30fb… florian 5 // This implements a skiplist set, i.e. just keys, no data, with ~O(log n) time
6ea30fb… florian 6 // insert and search operations. The application defines the type of a key, and
6ea30fb… florian 7 // provides a function to compare two keys.
6ea30fb… florian 8
6ea30fb… florian 9 // This header is not definitions of functions found in another source file --
6ea30fb… florian 10 // it creates the set functions, with the application's key type, right where
6ea30fb… florian 11 // the #include is. Before this header is #included, these must be defined:
6ea30fb… florian 12 //
6ea30fb… florian 13 // 1. A macro or typedef for set_key_t, the type of a key.
6ea30fb… florian 14 // 2. A macro or function set_cmp(a, b) to compare two keys. The return values
6ea30fb… florian 15 // are < 0 for a < b, 0 for a == b, and > 0 for a > b.
6ea30fb… florian 16 // 3. A macro or function set_drop(s, k) to release the key k's resources, if
6ea30fb… florian 17 // any, when doing a set_end() or set_clear(). s is a pointer to the set
6ea30fb… florian 18 // that key is in, for use with set_free() if desired.
6ea30fb… florian 19 //
6ea30fb… florian 20 // Example usage:
6ea30fb… florian 21 //
6ea30fb… florian 22 // typedef int set_key_t;
6ea30fb… florian 23 // #define set_cmp(a, b) ((a) < (b) ? -1 : (a) == (b) ? 0 : 1)
6ea30fb… florian 24 // #define set_drop(s, k)
6ea30fb… florian 25 // #include "skipset.h"
6ea30fb… florian 26 //
6ea30fb… florian 27 // int test(void) { // return 0: good, 1: bad, -1: out of memory
6ea30fb… florian 28 // set_t set;
6ea30fb… florian 29 // if (setjmp(set.env))
6ea30fb… florian 30 // return -1;
6ea30fb… florian 31 // set_start(&set);
6ea30fb… florian 32 // set_insert(&set, 2);
6ea30fb… florian 33 // set_insert(&set, 1);
6ea30fb… florian 34 // set_insert(&set, 7);
6ea30fb… florian 35 // int bad = !set_found(&set, 2);
6ea30fb… florian 36 // bad = bad || set_found(&set, 5);
6ea30fb… florian 37 // set_end(&set);
6ea30fb… florian 38 // return bad;
6ea30fb… florian 39 // }
6ea30fb… florian 40 //
6ea30fb… florian 41 // Interface summary (see more details below):
6ea30fb… florian 42 // - set_t is the type of the set being operated on (a set_t pointer is passed)
6ea30fb… florian 43 // - set_start() initializes a new, empty set (initialize set.env first)
6ea30fb… florian 44 // - set_insert() inserts a new key into the set, or not if it's already there
6ea30fb… florian 45 // - set_found() determines whether or not a key is in the set
6ea30fb… florian 46 // - set_end() ends the use of the set, freeing all memory
6ea30fb… florian 47 // - set_clear() empties the set, equivalent to set_end() and then set_start()
6ea30fb… florian 48 // - set_ok() checks if set appears to be usable, i.e. started and not ended
6ea30fb… florian 49 //
6ea30fb… florian 50 // Auxiliary functions available to the application:
6ea30fb… florian 51 // - set_alloc() allocates memory with optional tracking (#define SET_TRACK)
6ea30fb… florian 52 // - set_free() deallocates memory allocated by set_alloc()
6ea30fb… florian 53 // - set_rand() returns 32 random bits (seeded by set_start()) */
6ea30fb… florian 54
6ea30fb… florian 55 #ifndef SKIPSET_H
6ea30fb… florian 56 #define SKIPSET_H
6ea30fb… florian 57
6ea30fb… florian 58 #include <stdlib.h> /* realloc(), free(), NULL, size_t */
6ea30fb… florian 59 #include <stddef.h> /* ptrdiff_t */
6ea30fb… florian 60 #include <setjmp.h> /* jmp_buf, longjmp() */
6ea30fb… florian 61 #include <errno.h> /* ENOMEM */
6ea30fb… florian 62 #include <time.h> /* time(), clock() */
6ea30fb… florian 63 #include <assert.h> /* assert.h */
6ea30fb… florian 64 #include "ints.h" /* i16_t, ui32_t, ui64_t */
6ea30fb… florian 65
6ea30fb… florian 66 /* Structures and functions below noted as "--private--" should not be used by
6ea30fb… florian 67 // the application. set_t is partially private and partially public -- see the
6ea30fb… florian 68 // comments there.
6ea30fb… florian 69
6ea30fb… florian 70 // There is no POSIX random() in MSVC, and rand() is awful. For portability, we
6ea30fb… florian 71 // cannot rely on a library function for random numbers. Instead we use the
6ea30fb… florian 72 // fast and effective algorithm below, invented by Melissa O'Neill.
6ea30fb… florian 73
6ea30fb… florian 74 // *Really* minimal PCG32 code / (c) 2014 M.E. O'Neill / www.pcg-random.org
6ea30fb… florian 75 // Licensed under Apache License 2.0 (NO WARRANTY, etc. see website)
6ea30fb… florian 76 // --private-- Random number generator state. */
6ea30fb… florian 77 typedef struct {
6ea30fb… florian 78 ui64_t state; /* 64-bit generator state */
6ea30fb… florian 79 ui64_t inc; /* 63-bit sequence id */
6ea30fb… florian 80 } set_rand_t;
6ea30fb… florian 81 /* --private-- Initialize the state *gen using seed and seq. seed seeds the
6ea30fb… florian 82 // advancing 64-bit state. seq is a sequence selection constant. */
6ea30fb… florian 83 void set_seed(set_rand_t *gen, ui64_t seed, ui64_t seq) {
6ea30fb… florian 84 gen->inc = (seq << 1) | 1;
6ea30fb… florian 85 gen->state = (seed + gen->inc) * 6364136223846793005ULL + gen->inc;
6ea30fb… florian 86 }
6ea30fb… florian 87 /* Start a unique random number sequence using bits from noise sources. */
6ea30fb… florian 88 void set_uniq(set_rand_t *gen, const void *ptr) {
6ea30fb… florian 89 set_seed(gen, ((ui64_t)(ptrdiff_t)ptr << 32) ^
6ea30fb… florian 90 ((ui64_t)time(NULL) << 12) ^ clock(), 0);
6ea30fb… florian 91 }
6ea30fb… florian 92 /* Return 32 random bits, advancing the state *gen. */
6ea30fb… florian 93 ui32_t set_rand(set_rand_t *gen) {
6ea30fb… florian 94 ui64_t state = gen->state;
6ea30fb… florian 95 gen->state = state * 6364136223846793005ULL + gen->inc;
6ea30fb… florian 96 ui32_t mix = (ui32_t)(((state >> 18) ^ state) >> 27);
6ea30fb… florian 97 int rot = state >> 59;
6ea30fb… florian 98 return (mix >> rot) | (mix << ((-rot) & 31));
6ea30fb… florian 99 }
6ea30fb… florian 100 /* End of PCG32 code. */
6ea30fb… florian 101
6ea30fb… florian 102 /* --private-- Linked-list node. */
6ea30fb… florian 103 typedef struct set_node_s set_node_t;
6ea30fb… florian 104 struct set_node_s {
6ea30fb… florian 105 set_key_t key; /* the key (not used for head or path) */
6ea30fb… florian 106 i16_t size; /* number of allocated pointers in right[] */
6ea30fb… florian 107 i16_t fill; /* number of pointers in right[] filled in */
6ea30fb… florian 108 set_node_t **right; /* pointer for each level, each to the right */
6ea30fb… florian 109 };
6ea30fb… florian 110
6ea30fb… florian 111 /* A set. The application sets env, may use gen with set_rand(), and may read
6ea30fb… florian 112 // allocs and memory. The remaining variables are --private-- . */
6ea30fb… florian 113 typedef struct set_s {
6ea30fb… florian 114 set_node_t *head; /* skiplist head -- no key, just links */
6ea30fb… florian 115 set_node_t *path; /* right[] is path to key from set_found() */
6ea30fb… florian 116 set_node_t *node; /* node under construction, in case of longjmp() */
6ea30fb… florian 117 i16_t depth; /* maximum depth of the skiplist */
6ea30fb… florian 118 ui64_t ran; /* a precious trove of random bits */
6ea30fb… florian 119 set_rand_t gen; /* random number generator state */
6ea30fb… florian 120 jmp_buf env; /* setjmp() environment for allocation errors */
6ea30fb… florian 121 #ifdef SET_TRACK
6ea30fb… florian 122 size_t allocs; /* number of allocations */
6ea30fb… florian 123 size_t memory; /* total amount of allocated memory (>= requests) */
6ea30fb… florian 124 #endif
6ea30fb… florian 125 } set_t;
6ea30fb… florian 126
6ea30fb… florian 127 /* Memory allocation and deallocation. set_alloc(set, ptr, size) returns a
6ea30fb… florian 128 // pointer to an allocation of size bytes if ptr is NULL, or the previous
6ea30fb… florian 129 // allocation ptr resized to size bytes. set_alloc() will never return NULL.
6ea30fb… florian 130 // set_free(set, ptr) frees an allocation created by set_alloc(). These may be
6ea30fb… florian 131 // used by the application. e.g. if allocation tracking is desired. */
6ea30fb… florian 132 #ifdef SET_TRACK
6ea30fb… florian 133 /* Track the number of allocations and the total backing memory size. */
6ea30fb… florian 134 # if defined(_WIN32)
6ea30fb… florian 135 # include <malloc.h>
6ea30fb… florian 136 # define SET_ALLOC_SIZE(ptr) _msize(ptr)
6ea30fb… florian 137 # elif defined(__MACH__)
6ea30fb… florian 138 # include <malloc/malloc.h>
6ea30fb… florian 139 # define SET_ALLOC_SIZE(ptr) malloc_size(ptr)
6ea30fb… florian 140 # elif defined(__linux__)
6ea30fb… florian 141 # include <malloc.h>
6ea30fb… florian 142 # define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr)
6ea30fb… florian 143 # elif defined(__FreeBSD__)
6ea30fb… florian 144 # include <malloc_np.h>
6ea30fb… florian 145 # define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr)
6ea30fb… florian 146 # elif defined(__NetBSD__)
6ea30fb… florian 147 # include <jemalloc/jemalloc.h>
6ea30fb… florian 148 # define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr)
6ea30fb… florian 149 # else // e.g. OpenBSD
6ea30fb… florian 150 # define SET_ALLOC_SIZE(ptr) 0
6ea30fb… florian 151 # endif
6ea30fb… florian 152 // With tracking.
6ea30fb… florian 153 void *set_alloc(set_t *set, void *ptr, size_t size) {
6ea30fb… florian 154 size_t had = ptr == NULL ? 0 : SET_ALLOC_SIZE(ptr);
6ea30fb… florian 155 void *mem = realloc(ptr, size);
6ea30fb… florian 156 if (mem == NULL)
6ea30fb… florian 157 longjmp(set->env, ENOMEM);
6ea30fb… florian 158 set->allocs += ptr == NULL;
6ea30fb… florian 159 set->memory += SET_ALLOC_SIZE(mem) - had;
6ea30fb… florian 160 return mem;
6ea30fb… florian 161 }
6ea30fb… florian 162 void set_free(set_t *set, void *ptr) {
6ea30fb… florian 163 if (ptr != NULL) {
6ea30fb… florian 164 set->allocs--;
6ea30fb… florian 165 set->memory -= SET_ALLOC_SIZE(ptr);
6ea30fb… florian 166 free(ptr);
6ea30fb… florian 167 }
6ea30fb… florian 168 }
6ea30fb… florian 169 #else
6ea30fb… florian 170 /* Without tracking. */
6ea30fb… florian 171 void *set_alloc(set_t *set, void *ptr, size_t size) {
6ea30fb… florian 172 void *mem = realloc(ptr, size);
6ea30fb… florian 173 if (mem == NULL)
6ea30fb… florian 174 longjmp(set->env, ENOMEM);
6ea30fb… florian 175 return mem;
6ea30fb… florian 176 }
6ea30fb… florian 177 void set_free(set_t *set, void *ptr) {
6ea30fb… florian 178 (void)set;
6ea30fb… florian 179 free(ptr);
6ea30fb… florian 180 }
6ea30fb… florian 181 #endif
6ea30fb… florian 182
6ea30fb… florian 183 /* --private-- Grow node's array right[] as needed to be able to hold at least
6ea30fb… florian 184 // want links. If fill is true, assure that the first want links are filled in,
6ea30fb… florian 185 // setting them to set->head if not previously filled in. Otherwise it is
6ea30fb… florian 186 // assumed that the first want links are about to be filled in. */
6ea30fb… florian 187 void set_grow(set_t *set, set_node_t *node, int want, int fill) {
6ea30fb… florian 188 if (node->size < want) {
6ea30fb… florian 189 int more = node->size ? node->size : 1;
6ea30fb… florian 190 while (more < want)
6ea30fb… florian 191 more <<= 1;
6ea30fb… florian 192 node->right = set_alloc(set, node->right,
6ea30fb… florian 193 (size_t)more * sizeof(set_node_t *));
6ea30fb… florian 194 node->size = (i16_t)more;
6ea30fb… florian 195 }
6ea30fb… florian 196 int i;
6ea30fb… florian 197 if (fill)
6ea30fb… florian 198 for (i = node->fill; i < want; i++)
6ea30fb… florian 199 node->right[i] = set->head;
6ea30fb… florian 200 node->fill = (i16_t)want;
6ea30fb… florian 201 }
6ea30fb… florian 202
6ea30fb… florian 203 /* --private-- Return a new node. key is left uninitialized. */
6ea30fb… florian 204 set_node_t *set_node(set_t *set) {
6ea30fb… florian 205 set_node_t *node = set_alloc(set, NULL, sizeof(set_node_t));
6ea30fb… florian 206 node->size = 0;
6ea30fb… florian 207 node->fill = 0;
6ea30fb… florian 208 node->right = NULL;
6ea30fb… florian 209 return node;
6ea30fb… florian 210 }
6ea30fb… florian 211
6ea30fb… florian 212 /* --private-- Free the list linked from head, along with the keys. */
6ea30fb… florian 213 void set_sweep(set_t *set) {
6ea30fb… florian 214 set_node_t *step = set->head->right[0];
6ea30fb… florian 215 while (step != set->head) {
6ea30fb… florian 216 set_node_t *next = step->right[0]; /* save link to next node */
6ea30fb… florian 217 set_drop(set, step->key);
6ea30fb… florian 218 set_free(set, step->right);
6ea30fb… florian 219 set_free(set, step);
6ea30fb… florian 220 step = next;
6ea30fb… florian 221 }
6ea30fb… florian 222 }
6ea30fb… florian 223
6ea30fb… florian 224 /* Initialize a new set. set->env must be initialized using setjmp() before
6ea30fb… florian 225 // set_start() is called. A longjmp(set->env, ENOMEM) will be used to handle a
6ea30fb… florian 226 // memory allocation failure during any of the operations. (See setjmp.h and
6ea30fb… florian 227 // errno.h.) The set can still be used if this happens, assuming that it didn't
6ea30fb… florian 228 // happen during set_start(). Whether set_start() completed or not, set_end()
6ea30fb… florian 229 // can be used to free the set's memory after a longjmp(). */
6ea30fb… florian 230 void set_start(set_t *set) {
6ea30fb… florian 231 #ifdef SET_TRACK
6ea30fb… florian 232 set->allocs = 0;
6ea30fb… florian 233 set->memory = 0;
6ea30fb… florian 234 #endif
6ea30fb… florian 235 set->head = set->path = set->node = NULL; /* in case set_node() fails */
6ea30fb… florian 236 set->path = set_node(set);
6ea30fb… florian 237 set->head = set_node(set);
6ea30fb… florian 238 set_grow(set, set->head, 1, 1); /* one link back to head for an empty set */
6ea30fb… florian 239 *(unsigned char *)&set->head->key = 137; /* set id */
6ea30fb… florian 240 set->depth = 0;
6ea30fb… florian 241 set_uniq(&set->gen, set);
6ea30fb… florian 242 set->ran = 1;
6ea30fb… florian 243 }
6ea30fb… florian 244
6ea30fb… florian 245 /* Return true if *set appears to be in a usable state. If *set has been zeroed
6ea30fb… florian 246 // out, then set_ok(set) will be false and set_end(set) will be safe. */
6ea30fb… florian 247 int set_ok(set_t *set) {
6ea30fb… florian 248 return set->head != NULL &&
6ea30fb… florian 249 set->head->right != NULL &&
6ea30fb… florian 250 *(unsigned char *)&set->head->key == 137;
6ea30fb… florian 251 }
6ea30fb… florian 252
6ea30fb… florian 253 /* Empty the set. This frees the memory used for the previous set contents.
6ea30fb… florian 254 // After set_clear(), *set is ready for use, as if after a set_start(). */
6ea30fb… florian 255 void set_clear(set_t *set) {
6ea30fb… florian 256 assert(set_ok(set) && "improper use");
6ea30fb… florian 257
6ea30fb… florian 258 /* Free all the keys and their nodes. */
6ea30fb… florian 259 set_sweep(set);
6ea30fb… florian 260
6ea30fb… florian 261 /* Leave the head and path allocations as is. Clear their contents, with
6ea30fb… florian 262 // head pointing to itself and setting depth to zero, for an empty set. */
6ea30fb… florian 263 set->head->right[0] = set->head;
6ea30fb… florian 264 set->head->fill = 1;
6ea30fb… florian 265 set->path->fill = 0;
6ea30fb… florian 266 set->depth = 0;
6ea30fb… florian 267 }
6ea30fb… florian 268
6ea30fb… florian 269 /* Done using the set -- free all allocations. The only operation on *set
6ea30fb… florian 270 // permitted after this is set_start(). Though another set_end() would do no
6ea30fb… florian 271 // harm. This can be done at any time after a set_start(), or after a longjmp()
6ea30fb… florian 272 // on any allocation failure, including during a set_start(). */
6ea30fb… florian 273 void set_end(set_t *set) {
6ea30fb… florian 274 if (set->head != NULL) {
6ea30fb… florian 275 /* Empty the set and free the head node. */
6ea30fb… florian 276 if (set->head->right != NULL) {
6ea30fb… florian 277 set_sweep(set);
6ea30fb… florian 278 set_free(set, set->head->right);
6ea30fb… florian 279 }
6ea30fb… florian 280 set_free(set, set->head);
6ea30fb… florian 281 set->head = NULL;
6ea30fb… florian 282 }
6ea30fb… florian 283 if (set->path != NULL) {
6ea30fb… florian 284 /* Free the path work area. */
6ea30fb… florian 285 set_free(set, set->path->right);
6ea30fb… florian 286 set_free(set, set->path);
6ea30fb… florian 287 set->path = NULL;
6ea30fb… florian 288 }
6ea30fb… florian 289 if (set->node != NULL) {
6ea30fb… florian 290 /* Free the node that was under construction when longjmp() hit. */
6ea30fb… florian 291 set_drop(set, set->node->key);
6ea30fb… florian 292 set_free(set, set->node->right);
6ea30fb… florian 293 set_free(set, set->node);
6ea30fb… florian 294 set->node = NULL;
6ea30fb… florian 295 }
6ea30fb… florian 296 }
6ea30fb… florian 297
6ea30fb… florian 298 /* Look for key. Return 1 if found or 0 if not. This also puts the path to get
6ea30fb… florian 299 // there in set->path, for use by set_insert(). */
6ea30fb… florian 300 int set_found(set_t *set, set_key_t key) {
6ea30fb… florian 301 assert(set_ok(set) && "improper use");
6ea30fb… florian 302
6ea30fb… florian 303 /* Start at depth and work down and right as determined by key comparisons. */
6ea30fb… florian 304 set_node_t *head = set->head, *here = head;
6ea30fb… florian 305 int i = set->depth;
6ea30fb… florian 306 set_grow(set, set->path, i + 1, 0);
6ea30fb… florian 307 do {
6ea30fb… florian 308 while (here->right[i] != head &&
6ea30fb… florian 309 set_cmp(here->right[i]->key, key) < 0)
6ea30fb… florian 310 here = here->right[i];
6ea30fb… florian 311 set->path->right[i] = here;
6ea30fb… florian 312 } while (i--);
6ea30fb… florian 313
6ea30fb… florian 314 /* See if the key matches. */
6ea30fb… florian 315 here = here->right[0];
6ea30fb… florian 316 return here != head && set_cmp(here->key, key) == 0;
6ea30fb… florian 317 }
6ea30fb… florian 318
6ea30fb… florian 319 /* Insert the key key. Return 0 on success, or 1 if key is already in the set. */
6ea30fb… florian 320 int set_insert(set_t *set, set_key_t key) {
6ea30fb… florian 321 assert(set_ok(set) && "improper use");
6ea30fb… florian 322
6ea30fb… florian 323 if (set_found(set, key))
6ea30fb… florian 324 /* That key is already in the set. */
6ea30fb… florian 325 return 1;
6ea30fb… florian 326
6ea30fb… florian 327 /* Randomly generate a new level-- level 0 with probability 1/2, 1 with
6ea30fb… florian 328 // probability 1/4, 2 with probability 1/8, etc. */
6ea30fb… florian 329 int level = 0;
6ea30fb… florian 330 for (;;) {
6ea30fb… florian 331 if (set->ran == 1)
6ea30fb… florian 332 /* Ran out. Get another 32 random bits. */
6ea30fb… florian 333 set->ran = set_rand(&set->gen) | (1ULL << 32);
6ea30fb… florian 334 int bit = set->ran & 1;
6ea30fb… florian 335 set->ran >>= 1;
6ea30fb… florian 336 if (bit)
6ea30fb… florian 337 break;
6ea30fb… florian 338 assert(level < 32767 &&
6ea30fb… florian 339 "Overhead, without any fuss, the stars were going out.");
6ea30fb… florian 340 level++;
6ea30fb… florian 341 }
6ea30fb… florian 342 if (level > set->depth) {
6ea30fb… florian 343 /* The maximum depth is now deeper. Update the structures. */
6ea30fb… florian 344 set_grow(set, set->path, level + 1, 1);
6ea30fb… florian 345 set_grow(set, set->head, level + 1, 1);
6ea30fb… florian 346 set->depth = (i16_t)level;
6ea30fb… florian 347 }
6ea30fb… florian 348
6ea30fb… florian 349 /* Make a new node for the provided key, and insert it in the lists up to
6ea30fb… florian 350 // and including level. */
6ea30fb… florian 351 set->node = set_node(set);
6ea30fb… florian 352 set->node->key = key;
6ea30fb… florian 353 set_grow(set, set->node, level + 1, 0);
6ea30fb… florian 354 int i;
6ea30fb… florian 355 for (i = 0; i <= level; i++) {
6ea30fb… florian 356 set->node->right[i] = set->path->right[i]->right[i];
6ea30fb… florian 357 set->path->right[i]->right[i] = set->node;
6ea30fb… florian 358 }
6ea30fb… florian 359 set->node = NULL;
6ea30fb… florian 360 return 0;
6ea30fb… florian 361 }
6ea30fb… florian 362
6ea30fb… florian 363 #else
6ea30fb… florian 364 #error ** another skiplist set already created here
6ea30fb… florian 365 /* Would need to implement a prefix in order to support multiple sets. */
6ea30fb… florian 366 #endif

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button