Fossil SCM
Performance improvements to the "bag" object.
Commit
1409fbe38c764e93ca606a195b572dfd70aec1b4
Parent
50ab5c33e77781f…
1 file changed
+43
-6
+43
-6
| --- src/bag.c | ||
| +++ src/bag.c | ||
| @@ -32,10 +32,25 @@ | ||
| 32 | 32 | |
| 33 | 33 | #if INTERFACE |
| 34 | 34 | /* |
| 35 | 35 | ** An integer can appear in the bag at most once. |
| 36 | 36 | ** Integers must be positive. |
| 37 | +** | |
| 38 | +** On a hash collision, search continues to the next slot in the array, | |
| 39 | +** looping back to the beginning of the array when we reach the end. | |
| 40 | +** The search stops when a match is found or upon encountering a 0 entry. | |
| 41 | +** | |
| 42 | +** When an entry is deleted, its value is changed to -1. | |
| 43 | +** | |
| 44 | +** Bag.cnt is the number of live entries in the table. Bag.used is | |
| 45 | +** the number of live entries plus the number of deleted entries. So | |
| 46 | +** Bag.used>=Bag.cnt. We want to keep Bag.used-Bag.cnt as small as | |
| 47 | +** possible. | |
| 48 | +** | |
| 49 | +** The length of a search increases as the hash table fills up. So | |
| 50 | +** the table is enlarged whenever Bag.used reaches half of Bag.sz. That | |
| 51 | +** way, the expected collision length never exceeds 2. | |
| 37 | 52 | */ |
| 38 | 53 | struct Bag { |
| 39 | 54 | int cnt; /* Number of integers in the bag */ |
| 40 | 55 | int sz; /* Number of slots in a[] */ |
| 41 | 56 | int used; /* Number of used slots in a[] */ |
| @@ -64,14 +79,20 @@ | ||
| 64 | 79 | #define bag_hash(i) (i*101) |
| 65 | 80 | |
| 66 | 81 | /* |
| 67 | 82 | ** Change the size of the hash table on a bag so that |
| 68 | 83 | ** it contains N slots |
| 84 | +** | |
| 85 | +** Completely reconstruct the hash table from scratch. Deleted | |
| 86 | +** entries (indicated by a -1) are removed. When finished, it | |
| 87 | +** should be the case that p->cnt==p->used. | |
| 69 | 88 | */ |
| 70 | 89 | static void bag_resize(Bag *p, int newSize){ |
| 71 | 90 | int i; |
| 72 | 91 | Bag old; |
| 92 | + int nDel = 0; /* Number of deleted entries */ | |
| 93 | + int nLive = 0; /* Number of live entries */ | |
| 73 | 94 | |
| 74 | 95 | old = *p; |
| 75 | 96 | assert( newSize>old.cnt ); |
| 76 | 97 | p->a = malloc( sizeof(p->a[0])*newSize ); |
| 77 | 98 | p->sz = newSize; |
| @@ -83,12 +104,17 @@ | ||
| 83 | 104 | while( p->a[h] ){ |
| 84 | 105 | h++; |
| 85 | 106 | if( h==newSize ) h = 0; |
| 86 | 107 | } |
| 87 | 108 | p->a[h] = e; |
| 109 | + nLive++; | |
| 110 | + }else if( e<0 ){ | |
| 111 | + nDel++; | |
| 88 | 112 | } |
| 89 | 113 | } |
| 114 | + assert( p->cnt == nLive ); | |
| 115 | + assert( p->used == nLive+nDel ); | |
| 90 | 116 | p->used = p->cnt; |
| 91 | 117 | bag_clear(&old); |
| 92 | 118 | } |
| 93 | 119 | |
| 94 | 120 | /* |
| @@ -99,20 +125,21 @@ | ||
| 99 | 125 | int bag_insert(Bag *p, int e){ |
| 100 | 126 | unsigned h; |
| 101 | 127 | int rc = 0; |
| 102 | 128 | assert( e>0 ); |
| 103 | 129 | if( p->used+1 >= p->sz/2 ){ |
| 104 | - bag_resize(p, p->cnt*2 + 20 ); | |
| 130 | + int n = p->sz*2; | |
| 131 | + bag_resize(p, n + 20 ); | |
| 105 | 132 | } |
| 106 | 133 | h = bag_hash(e)%p->sz; |
| 107 | - while( p->a[h] && p->a[h]!=e ){ | |
| 134 | + while( p->a[h]>0 && p->a[h]!=e ){ | |
| 108 | 135 | h++; |
| 109 | 136 | if( h>=p->sz ) h = 0; |
| 110 | 137 | } |
| 111 | - if( p->a[h]==0 ){ | |
| 138 | + if( p->a[h]<=0 ){ | |
| 139 | + if( p->a[h]==0 ) p->used++; | |
| 112 | 140 | p->a[h] = e; |
| 113 | - p->used++; | |
| 114 | 141 | p->cnt++; |
| 115 | 142 | rc = 1; |
| 116 | 143 | } |
| 117 | 144 | return rc; |
| 118 | 145 | } |
| @@ -146,13 +173,23 @@ | ||
| 146 | 173 | while( p->a[h] && p->a[h]!=e ){ |
| 147 | 174 | h++; |
| 148 | 175 | if( h>=p->sz ) h = 0; |
| 149 | 176 | } |
| 150 | 177 | if( p->a[h] ){ |
| 151 | - p->a[h] = -1; | |
| 178 | + int nx = h+1; | |
| 179 | + if( nx>=p->sz ) nx = 0; | |
| 180 | + if( p->a[nx]==0 ){ | |
| 181 | + p->a[h] = 0; | |
| 182 | + p->used--; | |
| 183 | + }else{ | |
| 184 | + p->a[h] = -1; | |
| 185 | + } | |
| 152 | 186 | p->cnt--; |
| 153 | - if( p->sz>20 && p->cnt<p->sz/8 ){ | |
| 187 | + if( p->cnt==0 ){ | |
| 188 | + memset(p->a, 0, p->sz*sizeof(p->a[0])); | |
| 189 | + p->used = 0; | |
| 190 | + }else if( p->sz>40 && p->cnt<p->sz/8 ){ | |
| 154 | 191 | bag_resize(p, p->sz/2); |
| 155 | 192 | } |
| 156 | 193 | } |
| 157 | 194 | } |
| 158 | 195 | |
| 159 | 196 |
| --- src/bag.c | |
| +++ src/bag.c | |
| @@ -32,10 +32,25 @@ | |
| 32 | |
| 33 | #if INTERFACE |
| 34 | /* |
| 35 | ** An integer can appear in the bag at most once. |
| 36 | ** Integers must be positive. |
| 37 | */ |
| 38 | struct Bag { |
| 39 | int cnt; /* Number of integers in the bag */ |
| 40 | int sz; /* Number of slots in a[] */ |
| 41 | int used; /* Number of used slots in a[] */ |
| @@ -64,14 +79,20 @@ | |
| 64 | #define bag_hash(i) (i*101) |
| 65 | |
| 66 | /* |
| 67 | ** Change the size of the hash table on a bag so that |
| 68 | ** it contains N slots |
| 69 | */ |
| 70 | static void bag_resize(Bag *p, int newSize){ |
| 71 | int i; |
| 72 | Bag old; |
| 73 | |
| 74 | old = *p; |
| 75 | assert( newSize>old.cnt ); |
| 76 | p->a = malloc( sizeof(p->a[0])*newSize ); |
| 77 | p->sz = newSize; |
| @@ -83,12 +104,17 @@ | |
| 83 | while( p->a[h] ){ |
| 84 | h++; |
| 85 | if( h==newSize ) h = 0; |
| 86 | } |
| 87 | p->a[h] = e; |
| 88 | } |
| 89 | } |
| 90 | p->used = p->cnt; |
| 91 | bag_clear(&old); |
| 92 | } |
| 93 | |
| 94 | /* |
| @@ -99,20 +125,21 @@ | |
| 99 | int bag_insert(Bag *p, int e){ |
| 100 | unsigned h; |
| 101 | int rc = 0; |
| 102 | assert( e>0 ); |
| 103 | if( p->used+1 >= p->sz/2 ){ |
| 104 | bag_resize(p, p->cnt*2 + 20 ); |
| 105 | } |
| 106 | h = bag_hash(e)%p->sz; |
| 107 | while( p->a[h] && p->a[h]!=e ){ |
| 108 | h++; |
| 109 | if( h>=p->sz ) h = 0; |
| 110 | } |
| 111 | if( p->a[h]==0 ){ |
| 112 | p->a[h] = e; |
| 113 | p->used++; |
| 114 | p->cnt++; |
| 115 | rc = 1; |
| 116 | } |
| 117 | return rc; |
| 118 | } |
| @@ -146,13 +173,23 @@ | |
| 146 | while( p->a[h] && p->a[h]!=e ){ |
| 147 | h++; |
| 148 | if( h>=p->sz ) h = 0; |
| 149 | } |
| 150 | if( p->a[h] ){ |
| 151 | p->a[h] = -1; |
| 152 | p->cnt--; |
| 153 | if( p->sz>20 && p->cnt<p->sz/8 ){ |
| 154 | bag_resize(p, p->sz/2); |
| 155 | } |
| 156 | } |
| 157 | } |
| 158 | |
| 159 |
| --- src/bag.c | |
| +++ src/bag.c | |
| @@ -32,10 +32,25 @@ | |
| 32 | |
| 33 | #if INTERFACE |
| 34 | /* |
| 35 | ** An integer can appear in the bag at most once. |
| 36 | ** Integers must be positive. |
| 37 | ** |
| 38 | ** On a hash collision, search continues to the next slot in the array, |
| 39 | ** looping back to the beginning of the array when we reach the end. |
| 40 | ** The search stops when a match is found or upon encountering a 0 entry. |
| 41 | ** |
| 42 | ** When an entry is deleted, its value is changed to -1. |
| 43 | ** |
| 44 | ** Bag.cnt is the number of live entries in the table. Bag.used is |
| 45 | ** the number of live entries plus the number of deleted entries. So |
| 46 | ** Bag.used>=Bag.cnt. We want to keep Bag.used-Bag.cnt as small as |
| 47 | ** possible. |
| 48 | ** |
| 49 | ** The length of a search increases as the hash table fills up. So |
| 50 | ** the table is enlarged whenever Bag.used reaches half of Bag.sz. That |
| 51 | ** way, the expected collision length never exceeds 2. |
| 52 | */ |
| 53 | struct Bag { |
| 54 | int cnt; /* Number of integers in the bag */ |
| 55 | int sz; /* Number of slots in a[] */ |
| 56 | int used; /* Number of used slots in a[] */ |
| @@ -64,14 +79,20 @@ | |
| 79 | #define bag_hash(i) (i*101) |
| 80 | |
| 81 | /* |
| 82 | ** Change the size of the hash table on a bag so that |
| 83 | ** it contains N slots |
| 84 | ** |
| 85 | ** Completely reconstruct the hash table from scratch. Deleted |
| 86 | ** entries (indicated by a -1) are removed. When finished, it |
| 87 | ** should be the case that p->cnt==p->used. |
| 88 | */ |
| 89 | static void bag_resize(Bag *p, int newSize){ |
| 90 | int i; |
| 91 | Bag old; |
| 92 | int nDel = 0; /* Number of deleted entries */ |
| 93 | int nLive = 0; /* Number of live entries */ |
| 94 | |
| 95 | old = *p; |
| 96 | assert( newSize>old.cnt ); |
| 97 | p->a = malloc( sizeof(p->a[0])*newSize ); |
| 98 | p->sz = newSize; |
| @@ -83,12 +104,17 @@ | |
| 104 | while( p->a[h] ){ |
| 105 | h++; |
| 106 | if( h==newSize ) h = 0; |
| 107 | } |
| 108 | p->a[h] = e; |
| 109 | nLive++; |
| 110 | }else if( e<0 ){ |
| 111 | nDel++; |
| 112 | } |
| 113 | } |
| 114 | assert( p->cnt == nLive ); |
| 115 | assert( p->used == nLive+nDel ); |
| 116 | p->used = p->cnt; |
| 117 | bag_clear(&old); |
| 118 | } |
| 119 | |
| 120 | /* |
| @@ -99,20 +125,21 @@ | |
| 125 | int bag_insert(Bag *p, int e){ |
| 126 | unsigned h; |
| 127 | int rc = 0; |
| 128 | assert( e>0 ); |
| 129 | if( p->used+1 >= p->sz/2 ){ |
| 130 | int n = p->sz*2; |
| 131 | bag_resize(p, n + 20 ); |
| 132 | } |
| 133 | h = bag_hash(e)%p->sz; |
| 134 | while( p->a[h]>0 && p->a[h]!=e ){ |
| 135 | h++; |
| 136 | if( h>=p->sz ) h = 0; |
| 137 | } |
| 138 | if( p->a[h]<=0 ){ |
| 139 | if( p->a[h]==0 ) p->used++; |
| 140 | p->a[h] = e; |
| 141 | p->cnt++; |
| 142 | rc = 1; |
| 143 | } |
| 144 | return rc; |
| 145 | } |
| @@ -146,13 +173,23 @@ | |
| 173 | while( p->a[h] && p->a[h]!=e ){ |
| 174 | h++; |
| 175 | if( h>=p->sz ) h = 0; |
| 176 | } |
| 177 | if( p->a[h] ){ |
| 178 | int nx = h+1; |
| 179 | if( nx>=p->sz ) nx = 0; |
| 180 | if( p->a[nx]==0 ){ |
| 181 | p->a[h] = 0; |
| 182 | p->used--; |
| 183 | }else{ |
| 184 | p->a[h] = -1; |
| 185 | } |
| 186 | p->cnt--; |
| 187 | if( p->cnt==0 ){ |
| 188 | memset(p->a, 0, p->sz*sizeof(p->a[0])); |
| 189 | p->used = 0; |
| 190 | }else if( p->sz>40 && p->cnt<p->sz/8 ){ |
| 191 | bag_resize(p, p->sz/2); |
| 192 | } |
| 193 | } |
| 194 | } |
| 195 | |
| 196 |