Fossil SCM

Performance improvements to the "bag" object.

drh 2009-08-27 14:04 trunk
Commit 1409fbe38c764e93ca606a195b572dfd70aec1b4
1 file changed +43 -6
+43 -6
--- src/bag.c
+++ src/bag.c
@@ -32,10 +32,25 @@
3232
3333
#if INTERFACE
3434
/*
3535
** An integer can appear in the bag at most once.
3636
** Integers must be positive.
37
+**
38
+** On a hash collision, search continues to the next slot in the array,
39
+** looping back to the beginning of the array when we reach the end.
40
+** The search stops when a match is found or upon encountering a 0 entry.
41
+**
42
+** When an entry is deleted, its value is changed to -1.
43
+**
44
+** Bag.cnt is the number of live entries in the table. Bag.used is
45
+** the number of live entries plus the number of deleted entries. So
46
+** Bag.used>=Bag.cnt. We want to keep Bag.used-Bag.cnt as small as
47
+** possible.
48
+**
49
+** The length of a search increases as the hash table fills up. So
50
+** the table is enlarged whenever Bag.used reaches half of Bag.sz. That
51
+** way, the expected collision length never exceeds 2.
3752
*/
3853
struct Bag {
3954
int cnt; /* Number of integers in the bag */
4055
int sz; /* Number of slots in a[] */
4156
int used; /* Number of used slots in a[] */
@@ -64,14 +79,20 @@
6479
#define bag_hash(i) (i*101)
6580
6681
/*
6782
** Change the size of the hash table on a bag so that
6883
** it contains N slots
84
+**
85
+** Completely reconstruct the hash table from scratch. Deleted
86
+** entries (indicated by a -1) are removed. When finished, it
87
+** should be the case that p->cnt==p->used.
6988
*/
7089
static void bag_resize(Bag *p, int newSize){
7190
int i;
7291
Bag old;
92
+ int nDel = 0; /* Number of deleted entries */
93
+ int nLive = 0; /* Number of live entries */
7394
7495
old = *p;
7596
assert( newSize>old.cnt );
7697
p->a = malloc( sizeof(p->a[0])*newSize );
7798
p->sz = newSize;
@@ -83,12 +104,17 @@
83104
while( p->a[h] ){
84105
h++;
85106
if( h==newSize ) h = 0;
86107
}
87108
p->a[h] = e;
109
+ nLive++;
110
+ }else if( e<0 ){
111
+ nDel++;
88112
}
89113
}
114
+ assert( p->cnt == nLive );
115
+ assert( p->used == nLive+nDel );
90116
p->used = p->cnt;
91117
bag_clear(&old);
92118
}
93119
94120
/*
@@ -99,20 +125,21 @@
99125
int bag_insert(Bag *p, int e){
100126
unsigned h;
101127
int rc = 0;
102128
assert( e>0 );
103129
if( p->used+1 >= p->sz/2 ){
104
- bag_resize(p, p->cnt*2 + 20 );
130
+ int n = p->sz*2;
131
+ bag_resize(p, n + 20 );
105132
}
106133
h = bag_hash(e)%p->sz;
107
- while( p->a[h] && p->a[h]!=e ){
134
+ while( p->a[h]>0 && p->a[h]!=e ){
108135
h++;
109136
if( h>=p->sz ) h = 0;
110137
}
111
- if( p->a[h]==0 ){
138
+ if( p->a[h]<=0 ){
139
+ if( p->a[h]==0 ) p->used++;
112140
p->a[h] = e;
113
- p->used++;
114141
p->cnt++;
115142
rc = 1;
116143
}
117144
return rc;
118145
}
@@ -146,13 +173,23 @@
146173
while( p->a[h] && p->a[h]!=e ){
147174
h++;
148175
if( h>=p->sz ) h = 0;
149176
}
150177
if( p->a[h] ){
151
- p->a[h] = -1;
178
+ int nx = h+1;
179
+ if( nx>=p->sz ) nx = 0;
180
+ if( p->a[nx]==0 ){
181
+ p->a[h] = 0;
182
+ p->used--;
183
+ }else{
184
+ p->a[h] = -1;
185
+ }
152186
p->cnt--;
153
- if( p->sz>20 && p->cnt<p->sz/8 ){
187
+ if( p->cnt==0 ){
188
+ memset(p->a, 0, p->sz*sizeof(p->a[0]));
189
+ p->used = 0;
190
+ }else if( p->sz>40 && p->cnt<p->sz/8 ){
154191
bag_resize(p, p->sz/2);
155192
}
156193
}
157194
}
158195
159196
--- src/bag.c
+++ src/bag.c
@@ -32,10 +32,25 @@
32
33 #if INTERFACE
34 /*
35 ** An integer can appear in the bag at most once.
36 ** Integers must be positive.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37 */
38 struct Bag {
39 int cnt; /* Number of integers in the bag */
40 int sz; /* Number of slots in a[] */
41 int used; /* Number of used slots in a[] */
@@ -64,14 +79,20 @@
64 #define bag_hash(i) (i*101)
65
66 /*
67 ** Change the size of the hash table on a bag so that
68 ** it contains N slots
 
 
 
 
69 */
70 static void bag_resize(Bag *p, int newSize){
71 int i;
72 Bag old;
 
 
73
74 old = *p;
75 assert( newSize>old.cnt );
76 p->a = malloc( sizeof(p->a[0])*newSize );
77 p->sz = newSize;
@@ -83,12 +104,17 @@
83 while( p->a[h] ){
84 h++;
85 if( h==newSize ) h = 0;
86 }
87 p->a[h] = e;
 
 
 
88 }
89 }
 
 
90 p->used = p->cnt;
91 bag_clear(&old);
92 }
93
94 /*
@@ -99,20 +125,21 @@
99 int bag_insert(Bag *p, int e){
100 unsigned h;
101 int rc = 0;
102 assert( e>0 );
103 if( p->used+1 >= p->sz/2 ){
104 bag_resize(p, p->cnt*2 + 20 );
 
105 }
106 h = bag_hash(e)%p->sz;
107 while( p->a[h] && p->a[h]!=e ){
108 h++;
109 if( h>=p->sz ) h = 0;
110 }
111 if( p->a[h]==0 ){
 
112 p->a[h] = e;
113 p->used++;
114 p->cnt++;
115 rc = 1;
116 }
117 return rc;
118 }
@@ -146,13 +173,23 @@
146 while( p->a[h] && p->a[h]!=e ){
147 h++;
148 if( h>=p->sz ) h = 0;
149 }
150 if( p->a[h] ){
151 p->a[h] = -1;
 
 
 
 
 
 
 
152 p->cnt--;
153 if( p->sz>20 && p->cnt<p->sz/8 ){
 
 
 
154 bag_resize(p, p->sz/2);
155 }
156 }
157 }
158
159
--- src/bag.c
+++ src/bag.c
@@ -32,10 +32,25 @@
32
33 #if INTERFACE
34 /*
35 ** An integer can appear in the bag at most once.
36 ** Integers must be positive.
37 **
38 ** On a hash collision, search continues to the next slot in the array,
39 ** looping back to the beginning of the array when we reach the end.
40 ** The search stops when a match is found or upon encountering a 0 entry.
41 **
42 ** When an entry is deleted, its value is changed to -1.
43 **
44 ** Bag.cnt is the number of live entries in the table. Bag.used is
45 ** the number of live entries plus the number of deleted entries. So
46 ** Bag.used>=Bag.cnt. We want to keep Bag.used-Bag.cnt as small as
47 ** possible.
48 **
49 ** The length of a search increases as the hash table fills up. So
50 ** the table is enlarged whenever Bag.used reaches half of Bag.sz. That
51 ** way, the expected collision length never exceeds 2.
52 */
53 struct Bag {
54 int cnt; /* Number of integers in the bag */
55 int sz; /* Number of slots in a[] */
56 int used; /* Number of used slots in a[] */
@@ -64,14 +79,20 @@
79 #define bag_hash(i) (i*101)
80
81 /*
82 ** Change the size of the hash table on a bag so that
83 ** it contains N slots
84 **
85 ** Completely reconstruct the hash table from scratch. Deleted
86 ** entries (indicated by a -1) are removed. When finished, it
87 ** should be the case that p->cnt==p->used.
88 */
89 static void bag_resize(Bag *p, int newSize){
90 int i;
91 Bag old;
92 int nDel = 0; /* Number of deleted entries */
93 int nLive = 0; /* Number of live entries */
94
95 old = *p;
96 assert( newSize>old.cnt );
97 p->a = malloc( sizeof(p->a[0])*newSize );
98 p->sz = newSize;
@@ -83,12 +104,17 @@
104 while( p->a[h] ){
105 h++;
106 if( h==newSize ) h = 0;
107 }
108 p->a[h] = e;
109 nLive++;
110 }else if( e<0 ){
111 nDel++;
112 }
113 }
114 assert( p->cnt == nLive );
115 assert( p->used == nLive+nDel );
116 p->used = p->cnt;
117 bag_clear(&old);
118 }
119
120 /*
@@ -99,20 +125,21 @@
125 int bag_insert(Bag *p, int e){
126 unsigned h;
127 int rc = 0;
128 assert( e>0 );
129 if( p->used+1 >= p->sz/2 ){
130 int n = p->sz*2;
131 bag_resize(p, n + 20 );
132 }
133 h = bag_hash(e)%p->sz;
134 while( p->a[h]>0 && p->a[h]!=e ){
135 h++;
136 if( h>=p->sz ) h = 0;
137 }
138 if( p->a[h]<=0 ){
139 if( p->a[h]==0 ) p->used++;
140 p->a[h] = e;
 
141 p->cnt++;
142 rc = 1;
143 }
144 return rc;
145 }
@@ -146,13 +173,23 @@
173 while( p->a[h] && p->a[h]!=e ){
174 h++;
175 if( h>=p->sz ) h = 0;
176 }
177 if( p->a[h] ){
178 int nx = h+1;
179 if( nx>=p->sz ) nx = 0;
180 if( p->a[nx]==0 ){
181 p->a[h] = 0;
182 p->used--;
183 }else{
184 p->a[h] = -1;
185 }
186 p->cnt--;
187 if( p->cnt==0 ){
188 memset(p->a, 0, p->sz*sizeof(p->a[0]));
189 p->used = 0;
190 }else if( p->sz>40 && p->cnt<p->sz/8 ){
191 bag_resize(p, p->sz/2);
192 }
193 }
194 }
195
196

Keyboard Shortcuts

Open search /
Next entry (timeline) j
Previous entry (timeline) k
Open focused entry Enter
Show this help ?
Toggle theme Top nav button