Fossil SCM

fossil-scm / compat / zlib / contrib / minizip / skipset.h
Blame History Raw 367 lines
1
/* skipset.h -- set operations using a skiplist
2
// Copyright (C) 2024-2026 Mark Adler
3
// See MiniZip_info.txt for the license.
4
5
// This implements a skiplist set, i.e. just keys, no data, with ~O(log n) time
6
// insert and search operations. The application defines the type of a key, and
7
// provides a function to compare two keys.
8
9
// This header is not definitions of functions found in another source file --
10
// it creates the set functions, with the application's key type, right where
11
// the #include is. Before this header is #included, these must be defined:
12
//
13
// 1. A macro or typedef for set_key_t, the type of a key.
14
// 2. A macro or function set_cmp(a, b) to compare two keys. The return values
15
// are < 0 for a < b, 0 for a == b, and > 0 for a > b.
16
// 3. A macro or function set_drop(s, k) to release the key k's resources, if
17
// any, when doing a set_end() or set_clear(). s is a pointer to the set
18
// that key is in, for use with set_free() if desired.
19
//
20
// Example usage:
21
//
22
// typedef int set_key_t;
23
// #define set_cmp(a, b) ((a) < (b) ? -1 : (a) == (b) ? 0 : 1)
24
// #define set_drop(s, k)
25
// #include "skipset.h"
26
//
27
// int test(void) { // return 0: good, 1: bad, -1: out of memory
28
// set_t set;
29
// if (setjmp(set.env))
30
// return -1;
31
// set_start(&set);
32
// set_insert(&set, 2);
33
// set_insert(&set, 1);
34
// set_insert(&set, 7);
35
// int bad = !set_found(&set, 2);
36
// bad = bad || set_found(&set, 5);
37
// set_end(&set);
38
// return bad;
39
// }
40
//
41
// Interface summary (see more details below):
42
// - set_t is the type of the set being operated on (a set_t pointer is passed)
43
// - set_start() initializes a new, empty set (initialize set.env first)
44
// - set_insert() inserts a new key into the set, or not if it's already there
45
// - set_found() determines whether or not a key is in the set
46
// - set_end() ends the use of the set, freeing all memory
47
// - set_clear() empties the set, equivalent to set_end() and then set_start()
48
// - set_ok() checks if set appears to be usable, i.e. started and not ended
49
//
50
// Auxiliary functions available to the application:
51
// - set_alloc() allocates memory with optional tracking (#define SET_TRACK)
52
// - set_free() deallocates memory allocated by set_alloc()
53
// - set_rand() returns 32 random bits (seeded by set_start()) */
54
55
#ifndef SKIPSET_H
56
#define SKIPSET_H
57
58
#include <stdlib.h> /* realloc(), free(), NULL, size_t */
59
#include <stddef.h> /* ptrdiff_t */
60
#include <setjmp.h> /* jmp_buf, longjmp() */
61
#include <errno.h> /* ENOMEM */
62
#include <time.h> /* time(), clock() */
63
#include <assert.h> /* assert.h */
64
#include "ints.h" /* i16_t, ui32_t, ui64_t */
65
66
/* Structures and functions below noted as "--private--" should not be used by
67
// the application. set_t is partially private and partially public -- see the
68
// comments there.
69
70
// There is no POSIX random() in MSVC, and rand() is awful. For portability, we
71
// cannot rely on a library function for random numbers. Instead we use the
72
// fast and effective algorithm below, invented by Melissa O'Neill.
73
74
// *Really* minimal PCG32 code / (c) 2014 M.E. O'Neill / www.pcg-random.org
75
// Licensed under Apache License 2.0 (NO WARRANTY, etc. see website)
76
// --private-- Random number generator state. */
77
typedef struct {
78
ui64_t state; /* 64-bit generator state */
79
ui64_t inc; /* 63-bit sequence id */
80
} set_rand_t;
81
/* --private-- Initialize the state *gen using seed and seq. seed seeds the
82
// advancing 64-bit state. seq is a sequence selection constant. */
83
void set_seed(set_rand_t *gen, ui64_t seed, ui64_t seq) {
84
gen->inc = (seq << 1) | 1;
85
gen->state = (seed + gen->inc) * 6364136223846793005ULL + gen->inc;
86
}
87
/* Start a unique random number sequence using bits from noise sources. */
88
void set_uniq(set_rand_t *gen, const void *ptr) {
89
set_seed(gen, ((ui64_t)(ptrdiff_t)ptr << 32) ^
90
((ui64_t)time(NULL) << 12) ^ clock(), 0);
91
}
92
/* Return 32 random bits, advancing the state *gen. */
93
ui32_t set_rand(set_rand_t *gen) {
94
ui64_t state = gen->state;
95
gen->state = state * 6364136223846793005ULL + gen->inc;
96
ui32_t mix = (ui32_t)(((state >> 18) ^ state) >> 27);
97
int rot = state >> 59;
98
return (mix >> rot) | (mix << ((-rot) & 31));
99
}
100
/* End of PCG32 code. */
101
102
/* --private-- Linked-list node. */
103
typedef struct set_node_s set_node_t;
104
struct set_node_s {
105
set_key_t key; /* the key (not used for head or path) */
106
i16_t size; /* number of allocated pointers in right[] */
107
i16_t fill; /* number of pointers in right[] filled in */
108
set_node_t **right; /* pointer for each level, each to the right */
109
};
110
111
/* A set. The application sets env, may use gen with set_rand(), and may read
112
// allocs and memory. The remaining variables are --private-- . */
113
typedef struct set_s {
114
set_node_t *head; /* skiplist head -- no key, just links */
115
set_node_t *path; /* right[] is path to key from set_found() */
116
set_node_t *node; /* node under construction, in case of longjmp() */
117
i16_t depth; /* maximum depth of the skiplist */
118
ui64_t ran; /* a precious trove of random bits */
119
set_rand_t gen; /* random number generator state */
120
jmp_buf env; /* setjmp() environment for allocation errors */
121
#ifdef SET_TRACK
122
size_t allocs; /* number of allocations */
123
size_t memory; /* total amount of allocated memory (>= requests) */
124
#endif
125
} set_t;
126
127
/* Memory allocation and deallocation. set_alloc(set, ptr, size) returns a
128
// pointer to an allocation of size bytes if ptr is NULL, or the previous
129
// allocation ptr resized to size bytes. set_alloc() will never return NULL.
130
// set_free(set, ptr) frees an allocation created by set_alloc(). These may be
131
// used by the application. e.g. if allocation tracking is desired. */
132
#ifdef SET_TRACK
133
/* Track the number of allocations and the total backing memory size. */
134
# if defined(_WIN32)
135
# include <malloc.h>
136
# define SET_ALLOC_SIZE(ptr) _msize(ptr)
137
# elif defined(__MACH__)
138
# include <malloc/malloc.h>
139
# define SET_ALLOC_SIZE(ptr) malloc_size(ptr)
140
# elif defined(__linux__)
141
# include <malloc.h>
142
# define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr)
143
# elif defined(__FreeBSD__)
144
# include <malloc_np.h>
145
# define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr)
146
# elif defined(__NetBSD__)
147
# include <jemalloc/jemalloc.h>
148
# define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr)
149
# else // e.g. OpenBSD
150
# define SET_ALLOC_SIZE(ptr) 0
151
# endif
152
// With tracking.
153
void *set_alloc(set_t *set, void *ptr, size_t size) {
154
size_t had = ptr == NULL ? 0 : SET_ALLOC_SIZE(ptr);
155
void *mem = realloc(ptr, size);
156
if (mem == NULL)
157
longjmp(set->env, ENOMEM);
158
set->allocs += ptr == NULL;
159
set->memory += SET_ALLOC_SIZE(mem) - had;
160
return mem;
161
}
162
void set_free(set_t *set, void *ptr) {
163
if (ptr != NULL) {
164
set->allocs--;
165
set->memory -= SET_ALLOC_SIZE(ptr);
166
free(ptr);
167
}
168
}
169
#else
170
/* Without tracking. */
171
void *set_alloc(set_t *set, void *ptr, size_t size) {
172
void *mem = realloc(ptr, size);
173
if (mem == NULL)
174
longjmp(set->env, ENOMEM);
175
return mem;
176
}
177
void set_free(set_t *set, void *ptr) {
178
(void)set;
179
free(ptr);
180
}
181
#endif
182
183
/* --private-- Grow node's array right[] as needed to be able to hold at least
184
// want links. If fill is true, assure that the first want links are filled in,
185
// setting them to set->head if not previously filled in. Otherwise it is
186
// assumed that the first want links are about to be filled in. */
187
void set_grow(set_t *set, set_node_t *node, int want, int fill) {
188
if (node->size < want) {
189
int more = node->size ? node->size : 1;
190
while (more < want)
191
more <<= 1;
192
node->right = set_alloc(set, node->right,
193
(size_t)more * sizeof(set_node_t *));
194
node->size = (i16_t)more;
195
}
196
int i;
197
if (fill)
198
for (i = node->fill; i < want; i++)
199
node->right[i] = set->head;
200
node->fill = (i16_t)want;
201
}
202
203
/* --private-- Return a new node. key is left uninitialized. */
204
set_node_t *set_node(set_t *set) {
205
set_node_t *node = set_alloc(set, NULL, sizeof(set_node_t));
206
node->size = 0;
207
node->fill = 0;
208
node->right = NULL;
209
return node;
210
}
211
212
/* --private-- Free the list linked from head, along with the keys. */
213
void set_sweep(set_t *set) {
214
set_node_t *step = set->head->right[0];
215
while (step != set->head) {
216
set_node_t *next = step->right[0]; /* save link to next node */
217
set_drop(set, step->key);
218
set_free(set, step->right);
219
set_free(set, step);
220
step = next;
221
}
222
}
223
224
/* Initialize a new set. set->env must be initialized using setjmp() before
225
// set_start() is called. A longjmp(set->env, ENOMEM) will be used to handle a
226
// memory allocation failure during any of the operations. (See setjmp.h and
227
// errno.h.) The set can still be used if this happens, assuming that it didn't
228
// happen during set_start(). Whether set_start() completed or not, set_end()
229
// can be used to free the set's memory after a longjmp(). */
230
void set_start(set_t *set) {
231
#ifdef SET_TRACK
232
set->allocs = 0;
233
set->memory = 0;
234
#endif
235
set->head = set->path = set->node = NULL; /* in case set_node() fails */
236
set->path = set_node(set);
237
set->head = set_node(set);
238
set_grow(set, set->head, 1, 1); /* one link back to head for an empty set */
239
*(unsigned char *)&set->head->key = 137; /* set id */
240
set->depth = 0;
241
set_uniq(&set->gen, set);
242
set->ran = 1;
243
}
244
245
/* Return true if *set appears to be in a usable state. If *set has been zeroed
246
// out, then set_ok(set) will be false and set_end(set) will be safe. */
247
int set_ok(set_t *set) {
248
return set->head != NULL &&
249
set->head->right != NULL &&
250
*(unsigned char *)&set->head->key == 137;
251
}
252
253
/* Empty the set. This frees the memory used for the previous set contents.
254
// After set_clear(), *set is ready for use, as if after a set_start(). */
255
void set_clear(set_t *set) {
256
assert(set_ok(set) && "improper use");
257
258
/* Free all the keys and their nodes. */
259
set_sweep(set);
260
261
/* Leave the head and path allocations as is. Clear their contents, with
262
// head pointing to itself and setting depth to zero, for an empty set. */
263
set->head->right[0] = set->head;
264
set->head->fill = 1;
265
set->path->fill = 0;
266
set->depth = 0;
267
}
268
269
/* Done using the set -- free all allocations. The only operation on *set
270
// permitted after this is set_start(). Though another set_end() would do no
271
// harm. This can be done at any time after a set_start(), or after a longjmp()
272
// on any allocation failure, including during a set_start(). */
273
void set_end(set_t *set) {
274
if (set->head != NULL) {
275
/* Empty the set and free the head node. */
276
if (set->head->right != NULL) {
277
set_sweep(set);
278
set_free(set, set->head->right);
279
}
280
set_free(set, set->head);
281
set->head = NULL;
282
}
283
if (set->path != NULL) {
284
/* Free the path work area. */
285
set_free(set, set->path->right);
286
set_free(set, set->path);
287
set->path = NULL;
288
}
289
if (set->node != NULL) {
290
/* Free the node that was under construction when longjmp() hit. */
291
set_drop(set, set->node->key);
292
set_free(set, set->node->right);
293
set_free(set, set->node);
294
set->node = NULL;
295
}
296
}
297
298
/* Look for key. Return 1 if found or 0 if not. This also puts the path to get
299
// there in set->path, for use by set_insert(). */
300
int set_found(set_t *set, set_key_t key) {
301
assert(set_ok(set) && "improper use");
302
303
/* Start at depth and work down and right as determined by key comparisons. */
304
set_node_t *head = set->head, *here = head;
305
int i = set->depth;
306
set_grow(set, set->path, i + 1, 0);
307
do {
308
while (here->right[i] != head &&
309
set_cmp(here->right[i]->key, key) < 0)
310
here = here->right[i];
311
set->path->right[i] = here;
312
} while (i--);
313
314
/* See if the key matches. */
315
here = here->right[0];
316
return here != head && set_cmp(here->key, key) == 0;
317
}
318
319
/* Insert the key key. Return 0 on success, or 1 if key is already in the set. */
320
int set_insert(set_t *set, set_key_t key) {
321
assert(set_ok(set) && "improper use");
322
323
if (set_found(set, key))
324
/* That key is already in the set. */
325
return 1;
326
327
/* Randomly generate a new level-- level 0 with probability 1/2, 1 with
328
// probability 1/4, 2 with probability 1/8, etc. */
329
int level = 0;
330
for (;;) {
331
if (set->ran == 1)
332
/* Ran out. Get another 32 random bits. */
333
set->ran = set_rand(&set->gen) | (1ULL << 32);
334
int bit = set->ran & 1;
335
set->ran >>= 1;
336
if (bit)
337
break;
338
assert(level < 32767 &&
339
"Overhead, without any fuss, the stars were going out.");
340
level++;
341
}
342
if (level > set->depth) {
343
/* The maximum depth is now deeper. Update the structures. */
344
set_grow(set, set->path, level + 1, 1);
345
set_grow(set, set->head, level + 1, 1);
346
set->depth = (i16_t)level;
347
}
348
349
/* Make a new node for the provided key, and insert it in the lists up to
350
// and including level. */
351
set->node = set_node(set);
352
set->node->key = key;
353
set_grow(set, set->node, level + 1, 0);
354
int i;
355
for (i = 0; i <= level; i++) {
356
set->node->right[i] = set->path->right[i]->right[i];
357
set->path->right[i]->right[i] = set->node;
358
}
359
set->node = NULL;
360
return 0;
361
}
362
363
#else
364
#error ** another skiplist set already created here
365
/* Would need to implement a prefix in order to support multiple sets. */
366
#endif
367

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button