|
1
|
/* |
|
2
|
** Copyright (c) 2007 D. Richard Hipp |
|
3
|
** |
|
4
|
** This program is free software; you can redistribute it and/or |
|
5
|
** modify it under the terms of the Simplified BSD License (also |
|
6
|
** known as the "2-Clause License" or "FreeBSD License".) |
|
7
|
|
|
8
|
** This program is distributed in the hope that it will be useful, |
|
9
|
** but without any warranty; without even the implied warranty of |
|
10
|
** merchantability or fitness for a particular purpose. |
|
11
|
** |
|
12
|
** Author contact information: |
|
13
|
** [email protected] |
|
14
|
** http://www.hwaci.com/drh/ |
|
15
|
** |
|
16
|
******************************************************************************* |
|
17
|
** |
|
18
|
** This file contains code used to implement a "bag" of integers. |
|
19
|
** A bag is an unordered collection without duplicates. In this |
|
20
|
** implementation, all elements must be positive integers. |
|
21
|
*/ |
|
22
|
#include "config.h" |
|
23
|
#include "bag.h" |
|
24
|
#include <assert.h> |
|
25
|
|
|
26
|
|
|
27
|
#if INTERFACE |
|
28
|
/* |
|
29
|
** An integer can appear in the bag at most once. |
|
30
|
** Integers must be positive. |
|
31
|
** |
|
32
|
** On a hash collision, search continues to the next slot in the array, |
|
33
|
** looping back to the beginning of the array when we reach the end. |
|
34
|
** The search stops when a match is found or upon encountering a 0 entry. |
|
35
|
** |
|
36
|
** When an entry is deleted, its value is changed to -1. |
|
37
|
** |
|
38
|
** Bag.cnt is the number of live entries in the table. Bag.used is |
|
39
|
** the number of live entries plus the number of deleted entries. So |
|
40
|
** Bag.used>=Bag.cnt. We want to keep Bag.used-Bag.cnt as small as |
|
41
|
** possible. |
|
42
|
** |
|
43
|
** The length of a search increases as the hash table fills up. So |
|
44
|
** the table is enlarged whenever Bag.used reaches half of Bag.sz. That |
|
45
|
** way, the expected collision length never exceeds 2. |
|
46
|
*/ |
|
47
|
struct Bag { |
|
48
|
int cnt; /* Number of integers in the bag */ |
|
49
|
int sz; /* Number of slots in a[] */ |
|
50
|
int used; /* Number of used slots in a[] */ |
|
51
|
int *a; /* Hash table of integers that are in the bag */ |
|
52
|
}; |
|
53
|
/* |
|
54
|
** An expression for statically initializing a Bag instance, to be |
|
55
|
** assigned to Bag instances at their declaration point. It has |
|
56
|
** the same effect as passing the Bag to bag_init(). |
|
57
|
*/ |
|
58
|
#define Bag_INIT {0,0,0,0} |
|
59
|
#endif |
|
60
|
|
|
61
|
/* |
|
62
|
** Initialize a Bag structure |
|
63
|
*/ |
|
64
|
void bag_init(Bag *p){ |
|
65
|
memset(p, 0, sizeof(*p)); |
|
66
|
} |
|
67
|
|
|
68
|
/* |
|
69
|
** Destroy a Bag. Delete all of its content. |
|
70
|
*/ |
|
71
|
void bag_clear(Bag *p){ |
|
72
|
free(p->a); |
|
73
|
bag_init(p); |
|
74
|
} |
|
75
|
|
|
76
|
/* |
|
77
|
** The hash function |
|
78
|
*/ |
|
79
|
#define bag_hash(i) (((u64)(i))*101) |
|
80
|
|
|
81
|
/* |
|
82
|
** Change the size of the hash table on a bag so that |
|
83
|
** it contains N slots |
|
84
|
** |
|
85
|
** Completely reconstruct the hash table from scratch. Deleted |
|
86
|
** entries (indicated by a -1) are removed. When finished, it |
|
87
|
** should be the case that p->cnt==p->used. |
|
88
|
*/ |
|
89
|
static void bag_resize(Bag *p, int newSize){ |
|
90
|
int i; |
|
91
|
Bag old; |
|
92
|
int nDel = 0; /* Number of deleted entries */ |
|
93
|
int nLive = 0; /* Number of live entries */ |
|
94
|
|
|
95
|
old = *p; |
|
96
|
assert( newSize>old.cnt ); |
|
97
|
p->a = fossil_malloc( sizeof(p->a[0])*newSize ); |
|
98
|
p->sz = newSize; |
|
99
|
memset(p->a, 0, sizeof(p->a[0])*newSize ); |
|
100
|
for(i=0; i<old.sz; i++){ |
|
101
|
int e = old.a[i]; |
|
102
|
if( e>0 ){ |
|
103
|
unsigned h = bag_hash(e)%newSize; |
|
104
|
while( p->a[h] ){ |
|
105
|
h++; |
|
106
|
if( (int)h==newSize ) h = 0; |
|
107
|
} |
|
108
|
p->a[h] = e; |
|
109
|
nLive++; |
|
110
|
}else if( e<0 ){ |
|
111
|
nDel++; |
|
112
|
} |
|
113
|
} |
|
114
|
assert( p->cnt == nLive ); |
|
115
|
assert( p->used == nLive+nDel ); |
|
116
|
p->used = p->cnt; |
|
117
|
bag_clear(&old); |
|
118
|
} |
|
119
|
|
|
120
|
/* |
|
121
|
** Insert element e into the bag if it is not there already. |
|
122
|
** Return TRUE if the insert actually occurred. Return FALSE |
|
123
|
** if the element was already in the bag. |
|
124
|
*/ |
|
125
|
int bag_insert(Bag *p, int e){ |
|
126
|
unsigned h; |
|
127
|
int rc = 0; |
|
128
|
assert( e>0 ); |
|
129
|
if( p->used+1 >= p->sz/2 ){ |
|
130
|
int n = p->sz*2; |
|
131
|
bag_resize(p, n + 20 ); |
|
132
|
} |
|
133
|
h = bag_hash(e)%p->sz; |
|
134
|
while( p->a[h]>0 && p->a[h]!=e ){ |
|
135
|
h++; |
|
136
|
if( (int)h>=p->sz ) h = 0; |
|
137
|
} |
|
138
|
if( p->a[h]<=0 ){ |
|
139
|
if( p->a[h]==0 ) p->used++; |
|
140
|
p->a[h] = e; |
|
141
|
p->cnt++; |
|
142
|
rc = 1; |
|
143
|
} |
|
144
|
return rc; |
|
145
|
} |
|
146
|
|
|
147
|
/* |
|
148
|
** Return true if e in the bag. Return false if it is no. |
|
149
|
*/ |
|
150
|
int bag_find(Bag *p, int e){ |
|
151
|
unsigned h; |
|
152
|
assert( e>0 ); |
|
153
|
if( p->sz==0 ){ |
|
154
|
return 0; |
|
155
|
} |
|
156
|
h = bag_hash(e)%p->sz; |
|
157
|
while( p->a[h] && p->a[h]!=e ){ |
|
158
|
h++; |
|
159
|
if( (int)h>=p->sz ) h = 0; |
|
160
|
} |
|
161
|
return p->a[h]==e; |
|
162
|
} |
|
163
|
|
|
164
|
/* |
|
165
|
** Remove element e from the bag if it exists in the bag. |
|
166
|
** If e is not in the bag, this is a no-op. |
|
167
|
*/ |
|
168
|
void bag_remove(Bag *p, int e){ |
|
169
|
unsigned h; |
|
170
|
assert( e>0 ); |
|
171
|
if( p->sz==0 ) return; |
|
172
|
h = bag_hash(e)%p->sz; |
|
173
|
while( p->a[h] && p->a[h]!=e ){ |
|
174
|
h++; |
|
175
|
if( (int)h>=p->sz ) h = 0; |
|
176
|
} |
|
177
|
if( p->a[h] ){ |
|
178
|
int nx = h+1; |
|
179
|
if( nx>=p->sz ) nx = 0; |
|
180
|
if( p->a[nx]==0 ){ |
|
181
|
p->a[h] = 0; |
|
182
|
p->used--; |
|
183
|
}else{ |
|
184
|
p->a[h] = -1; |
|
185
|
} |
|
186
|
p->cnt--; |
|
187
|
if( p->cnt==0 ){ |
|
188
|
memset(p->a, 0, p->sz*sizeof(p->a[0])); |
|
189
|
p->used = 0; |
|
190
|
}else if( p->sz>40 && p->cnt<p->sz/8 ){ |
|
191
|
bag_resize(p, p->sz/2); |
|
192
|
} |
|
193
|
} |
|
194
|
} |
|
195
|
|
|
196
|
/* |
|
197
|
** Return the first element in the bag. Return 0 if the bag |
|
198
|
** is empty. |
|
199
|
*/ |
|
200
|
int bag_first(Bag *p){ |
|
201
|
int i; |
|
202
|
for(i=0; i<p->sz && p->a[i]<=0; i++){} |
|
203
|
if( i<p->sz ){ |
|
204
|
return p->a[i]; |
|
205
|
}else{ |
|
206
|
return 0; |
|
207
|
} |
|
208
|
} |
|
209
|
|
|
210
|
/* |
|
211
|
** Return the next element in the bag after e. Return 0 if |
|
212
|
** is the last element in the bag. Any insert or removal from |
|
213
|
** the bag might reorder the bag. |
|
214
|
*/ |
|
215
|
int bag_next(Bag *p, int e){ |
|
216
|
unsigned h; |
|
217
|
assert( p->sz>0 ); |
|
218
|
assert( e>0 ); |
|
219
|
h = bag_hash(e)%p->sz; |
|
220
|
while( p->a[h] && p->a[h]!=e ){ |
|
221
|
h++; |
|
222
|
if( (int)h>=p->sz ) h = 0; |
|
223
|
} |
|
224
|
assert( p->a[h] ); |
|
225
|
h++; |
|
226
|
while( (int)h<p->sz && p->a[h]<=0 ){ |
|
227
|
h++; |
|
228
|
} |
|
229
|
return (int)h<p->sz ? p->a[h] : 0; |
|
230
|
} |
|
231
|
|
|
232
|
/* |
|
233
|
** Return the number of elements in the bag. |
|
234
|
*/ |
|
235
|
int bag_count(Bag *p){ |
|
236
|
return p->cnt; |
|
237
|
} |
|
238
|
|