]> git.sesse.net Git - bcachefs-tools-debian/blob - linux/rhashtable.c
Delete more unused shim code, update bcache code
[bcachefs-tools-debian] / linux / rhashtable.c
1 /*
2  * Resizable, Scalable, Concurrent Hash Table
3  *
4  * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
5  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
6  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
7  *
8  * Code partially derived from nft_hash
9  * Rewritten with rehash code from br_multicast plus single list
10  * pointer as suggested by Josh Triplett
11  *
12  * This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License version 2 as
14  * published by the Free Software Foundation.
15  */
16
17 #include <linux/atomic.h>
18 #include <linux/kernel.h>
19 #include <linux/log2.h>
20 #include <linux/sched.h>
21 #include <linux/slab.h>
22 #include <linux/vmalloc.h>
23 #include <linux/mm.h>
24 #include <linux/jhash.h>
25 #include <linux/random.h>
26 #include <linux/rhashtable.h>
27 #include <linux/err.h>
28
29 #define HASH_DEFAULT_SIZE       64UL
30 #define HASH_MIN_SIZE           4U
31 #define BUCKET_LOCKS_PER_CPU    32UL
32
33 static u32 head_hashfn(struct rhashtable *ht,
34                        const struct bucket_table *tbl,
35                        const struct rhash_head *he)
36 {
37         return rht_head_hashfn(ht, tbl, he, ht->p);
38 }
39
40 static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl,
41                               gfp_t gfp)
42 {
43         unsigned int i, size;
44         unsigned int nr_pcpus = num_possible_cpus();
45
46         nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
47         size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
48
49         /* Never allocate more than 0.5 locks per bucket */
50         size = min_t(unsigned int, size, tbl->size >> 1);
51
52         if (sizeof(spinlock_t) != 0) {
53                 tbl->locks = NULL;
54                 if (gfp != GFP_KERNEL)
55                         gfp |= __GFP_NOWARN | __GFP_NORETRY;
56
57                 if (!tbl->locks)
58                         tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
59                                                    gfp);
60                 if (!tbl->locks)
61                         return -ENOMEM;
62                 for (i = 0; i < size; i++)
63                         spin_lock_init(&tbl->locks[i]);
64         }
65         tbl->locks_mask = size - 1;
66
67         return 0;
68 }
69
70 static void bucket_table_free(struct bucket_table *tbl)
71 {
72         if (tbl)
73                 kvfree(tbl->locks);
74
75         kvfree(tbl);
76 }
77
78 static void bucket_table_free_rcu(struct rcu_head *head)
79 {
80         bucket_table_free(container_of(head, struct bucket_table, rcu));
81 }
82
83 static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
84                                                size_t nbuckets,
85                                                gfp_t gfp)
86 {
87         struct bucket_table *tbl = NULL;
88         size_t size;
89         int i;
90
91         size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
92         if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) ||
93             gfp != GFP_KERNEL)
94                 tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY);
95         if (tbl == NULL && gfp == GFP_KERNEL)
96                 tbl = vzalloc(size);
97         if (tbl == NULL)
98                 return NULL;
99
100         tbl->size = nbuckets;
101
102         if (alloc_bucket_locks(ht, tbl, gfp) < 0) {
103                 bucket_table_free(tbl);
104                 return NULL;
105         }
106
107         INIT_LIST_HEAD(&tbl->walkers);
108
109         get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
110
111         for (i = 0; i < nbuckets; i++)
112                 INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
113
114         return tbl;
115 }
116
117 static struct bucket_table *rhashtable_last_table(struct rhashtable *ht,
118                                                   struct bucket_table *tbl)
119 {
120         struct bucket_table *new_tbl;
121
122         do {
123                 new_tbl = tbl;
124                 tbl = rht_dereference_rcu(tbl->future_tbl, ht);
125         } while (tbl);
126
127         return new_tbl;
128 }
129
130 static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash)
131 {
132         struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
133         struct bucket_table *new_tbl = rhashtable_last_table(ht,
134                 rht_dereference_rcu(old_tbl->future_tbl, ht));
135         struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash];
136         int err = -ENOENT;
137         struct rhash_head *head, *next, *entry;
138         spinlock_t *new_bucket_lock;
139         unsigned int new_hash;
140
141         rht_for_each(entry, old_tbl, old_hash) {
142                 err = 0;
143                 next = rht_dereference_bucket(entry->next, old_tbl, old_hash);
144
145                 if (rht_is_a_nulls(next))
146                         break;
147
148                 pprev = &entry->next;
149         }
150
151         if (err)
152                 goto out;
153
154         new_hash = head_hashfn(ht, new_tbl, entry);
155
156         new_bucket_lock = rht_bucket_lock(new_tbl, new_hash);
157
158         spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING);
159         head = rht_dereference_bucket(new_tbl->buckets[new_hash],
160                                       new_tbl, new_hash);
161
162         RCU_INIT_POINTER(entry->next, head);
163
164         rcu_assign_pointer(new_tbl->buckets[new_hash], entry);
165         spin_unlock(new_bucket_lock);
166
167         rcu_assign_pointer(*pprev, next);
168
169 out:
170         return err;
171 }
172
173 static void rhashtable_rehash_chain(struct rhashtable *ht,
174                                     unsigned int old_hash)
175 {
176         struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
177         spinlock_t *old_bucket_lock;
178
179         old_bucket_lock = rht_bucket_lock(old_tbl, old_hash);
180
181         spin_lock_bh(old_bucket_lock);
182         while (!rhashtable_rehash_one(ht, old_hash))
183                 ;
184         old_tbl->rehash++;
185         spin_unlock_bh(old_bucket_lock);
186 }
187
188 static int rhashtable_rehash_attach(struct rhashtable *ht,
189                                     struct bucket_table *old_tbl,
190                                     struct bucket_table *new_tbl)
191 {
192         /* Protect future_tbl using the first bucket lock. */
193         spin_lock_bh(old_tbl->locks);
194
195         /* Did somebody beat us to it? */
196         if (rcu_access_pointer(old_tbl->future_tbl)) {
197                 spin_unlock_bh(old_tbl->locks);
198                 return -EEXIST;
199         }
200
201         /* Make insertions go into the new, empty table right away. Deletions
202          * and lookups will be attempted in both tables until we synchronize.
203          */
204         rcu_assign_pointer(old_tbl->future_tbl, new_tbl);
205
206         spin_unlock_bh(old_tbl->locks);
207
208         return 0;
209 }
210
211 static int rhashtable_rehash_table(struct rhashtable *ht)
212 {
213         struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
214         struct bucket_table *new_tbl;
215         struct rhashtable_walker *walker;
216         unsigned int old_hash;
217
218         new_tbl = rht_dereference(old_tbl->future_tbl, ht);
219         if (!new_tbl)
220                 return 0;
221
222         for (old_hash = 0; old_hash < old_tbl->size; old_hash++)
223                 rhashtable_rehash_chain(ht, old_hash);
224
225         /* Publish the new table pointer. */
226         rcu_assign_pointer(ht->tbl, new_tbl);
227
228         spin_lock(&ht->lock);
229         list_for_each_entry(walker, &old_tbl->walkers, list)
230                 walker->tbl = NULL;
231         spin_unlock(&ht->lock);
232
233         /* Wait for readers. All new readers will see the new
234          * table, and thus no references to the old table will
235          * remain.
236          */
237         call_rcu(&old_tbl->rcu, bucket_table_free_rcu);
238
239         return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0;
240 }
241
242 static int rhashtable_expand(struct rhashtable *ht)
243 {
244         struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
245         int err;
246
247         old_tbl = rhashtable_last_table(ht, old_tbl);
248
249         new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL);
250         if (new_tbl == NULL)
251                 return -ENOMEM;
252
253         err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
254         if (err)
255                 bucket_table_free(new_tbl);
256
257         return err;
258 }
259
260 static int rhashtable_shrink(struct rhashtable *ht)
261 {
262         struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
263         unsigned int nelems = atomic_read(&ht->nelems);
264         unsigned int size = 0;
265         int err;
266
267         if (nelems)
268                 size = roundup_pow_of_two(nelems * 3 / 2);
269         if (size < ht->p.min_size)
270                 size = ht->p.min_size;
271
272         if (old_tbl->size <= size)
273                 return 0;
274
275         if (rht_dereference(old_tbl->future_tbl, ht))
276                 return -EEXIST;
277
278         new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
279         if (new_tbl == NULL)
280                 return -ENOMEM;
281
282         err = rhashtable_rehash_attach(ht, old_tbl, new_tbl);
283         if (err)
284                 bucket_table_free(new_tbl);
285
286         return err;
287 }
288
289 static void rht_deferred_worker(struct work_struct *work)
290 {
291         struct rhashtable *ht;
292         struct bucket_table *tbl;
293         int err = 0;
294
295         ht = container_of(work, struct rhashtable, run_work);
296         mutex_lock(&ht->mutex);
297
298         tbl = rht_dereference(ht->tbl, ht);
299         tbl = rhashtable_last_table(ht, tbl);
300
301         if (rht_grow_above_75(ht, tbl))
302                 rhashtable_expand(ht);
303         else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl))
304                 rhashtable_shrink(ht);
305
306         err = rhashtable_rehash_table(ht);
307
308         mutex_unlock(&ht->mutex);
309
310         if (err)
311                 schedule_work(&ht->run_work);
312 }
313
314 static bool rhashtable_check_elasticity(struct rhashtable *ht,
315                                         struct bucket_table *tbl,
316                                         unsigned int hash)
317 {
318         unsigned int elasticity = ht->elasticity;
319         struct rhash_head *head;
320
321         rht_for_each(head, tbl, hash)
322                 if (!--elasticity)
323                         return true;
324
325         return false;
326 }
327
328 int rhashtable_insert_rehash(struct rhashtable *ht,
329                              struct bucket_table *tbl)
330 {
331         struct bucket_table *old_tbl;
332         struct bucket_table *new_tbl;
333         unsigned int size;
334         int err;
335
336         old_tbl = rht_dereference_rcu(ht->tbl, ht);
337
338         size = tbl->size;
339
340         err = -EBUSY;
341
342         if (rht_grow_above_75(ht, tbl))
343                 size *= 2;
344         /* Do not schedule more than one rehash */
345         else if (old_tbl != tbl)
346                 goto fail;
347
348         err = -ENOMEM;
349
350         new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC);
351         if (new_tbl == NULL)
352                 goto fail;
353
354         err = rhashtable_rehash_attach(ht, tbl, new_tbl);
355         if (err) {
356                 bucket_table_free(new_tbl);
357                 if (err == -EEXIST)
358                         err = 0;
359         } else
360                 schedule_work(&ht->run_work);
361
362         return err;
363
364 fail:
365         /* Do not fail the insert if someone else did a rehash. */
366         if (likely(rcu_dereference_raw(tbl->future_tbl)))
367                 return 0;
368
369         /* Schedule async rehash to retry allocation in process context. */
370         if (err == -ENOMEM)
371                 schedule_work(&ht->run_work);
372
373         return err;
374 }
375
376 struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
377                                             const void *key,
378                                             struct rhash_head *obj,
379                                             struct bucket_table *tbl)
380 {
381         struct rhash_head *head;
382         unsigned int hash;
383         int err;
384
385         tbl = rhashtable_last_table(ht, tbl);
386         hash = head_hashfn(ht, tbl, obj);
387         spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING);
388
389         err = -EEXIST;
390         if (key && rhashtable_lookup_fast(ht, key, ht->p))
391                 goto exit;
392
393         err = -E2BIG;
394         if (unlikely(rht_grow_above_max(ht, tbl)))
395                 goto exit;
396
397         err = -EAGAIN;
398         if (rhashtable_check_elasticity(ht, tbl, hash) ||
399             rht_grow_above_100(ht, tbl))
400                 goto exit;
401
402         err = 0;
403
404         head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
405
406         RCU_INIT_POINTER(obj->next, head);
407
408         rcu_assign_pointer(tbl->buckets[hash], obj);
409
410         atomic_inc(&ht->nelems);
411
412 exit:
413         spin_unlock(rht_bucket_lock(tbl, hash));
414
415         if (err == 0)
416                 return NULL;
417         else if (err == -EAGAIN)
418                 return tbl;
419         else
420                 return ERR_PTR(err);
421 }
422
423 static size_t rounded_hashtable_size(const struct rhashtable_params *params)
424 {
425         return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
426                    (unsigned long)params->min_size);
427 }
428
429 static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
430 {
431         return jhash2(key, length, seed);
432 }
433
434 int rhashtable_init(struct rhashtable *ht,
435                     const struct rhashtable_params *params)
436 {
437         struct bucket_table *tbl;
438         size_t size;
439
440         size = HASH_DEFAULT_SIZE;
441
442         if ((!params->key_len && !params->obj_hashfn) ||
443             (params->obj_hashfn && !params->obj_cmpfn))
444                 return -EINVAL;
445
446         if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
447                 return -EINVAL;
448
449         memset(ht, 0, sizeof(*ht));
450         mutex_init(&ht->mutex);
451         spin_lock_init(&ht->lock);
452         memcpy(&ht->p, params, sizeof(*params));
453
454         if (params->min_size)
455                 ht->p.min_size = roundup_pow_of_two(params->min_size);
456
457         if (params->max_size)
458                 ht->p.max_size = rounddown_pow_of_two(params->max_size);
459
460         if (params->insecure_max_entries)
461                 ht->p.insecure_max_entries =
462                         rounddown_pow_of_two(params->insecure_max_entries);
463         else
464                 ht->p.insecure_max_entries = ht->p.max_size * 2;
465
466         ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE);
467
468         if (params->nelem_hint)
469                 size = rounded_hashtable_size(&ht->p);
470
471         /* The maximum (not average) chain length grows with the
472          * size of the hash table, at a rate of (log N)/(log log N).
473          * The value of 16 is selected so that even if the hash
474          * table grew to 2^32 you would not expect the maximum
475          * chain length to exceed it unless we are under attack
476          * (or extremely unlucky).
477          *
478          * As this limit is only to detect attacks, we don't need
479          * to set it to a lower value as you'd need the chain
480          * length to vastly exceed 16 to have any real effect
481          * on the system.
482          */
483         if (!params->insecure_elasticity)
484                 ht->elasticity = 16;
485
486         if (params->locks_mul)
487                 ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
488         else
489                 ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
490
491         ht->key_len = ht->p.key_len;
492         if (!params->hashfn) {
493                 ht->p.hashfn = jhash;
494
495                 if (!(ht->key_len & (sizeof(u32) - 1))) {
496                         ht->key_len /= sizeof(u32);
497                         ht->p.hashfn = rhashtable_jhash2;
498                 }
499         }
500
501         tbl = bucket_table_alloc(ht, size, GFP_KERNEL);
502         if (tbl == NULL)
503                 return -ENOMEM;
504
505         atomic_set(&ht->nelems, 0);
506
507         RCU_INIT_POINTER(ht->tbl, tbl);
508
509         INIT_WORK(&ht->run_work, rht_deferred_worker);
510
511         return 0;
512 }
513
514 void rhashtable_destroy(struct rhashtable *ht)
515 {
516         struct bucket_table *tbl;
517
518         cancel_work_sync(&ht->run_work);
519
520         mutex_lock(&ht->mutex);
521         tbl = rht_dereference(ht->tbl, ht);
522         bucket_table_free(tbl);
523         mutex_unlock(&ht->mutex);
524 }