From 57ac8a457b8205ef4f704d71ae3493d2a5970b49 Mon Sep 17 00:00:00 2001 From: Erik Boasson Date: Wed, 23 Oct 2019 17:38:55 +0200 Subject: [PATCH] Skip irrelevant buckets in hopscotch lookup (#270) When scanning buckets while looking for a specific element, it is faster to only inspect those buckets for which the corresponding bit in "hopinfo" is set. Sadly, simple scanning is faster than the far more elegant perfect hash based on a De Bruijn sequence. Add a simple test/performance measurement. Signed-off-by: Erik Boasson --- src/ddsrt/src/hopscotch.c | 20 ++-- src/ddsrt/tests/CMakeLists.txt | 1 + src/ddsrt/tests/hopscotch.c | 209 +++++++++++++++++++++++++++++++++ 3 files changed, 222 insertions(+), 8 deletions(-) create mode 100644 src/ddsrt/tests/hopscotch.c diff --git a/src/ddsrt/src/hopscotch.c b/src/ddsrt/src/hopscotch.c index b6869b1..46522bc 100644 --- a/src/ddsrt/src/hopscotch.c +++ b/src/ddsrt/src/hopscotch.c @@ -84,10 +84,12 @@ static void *ddsrt_hh_lookup_internal (const struct ddsrt_hh *rt, const uint32_t uint32_t hopinfo = rt->buckets[bucket].hopinfo; uint32_t idx; for (idx = 0; hopinfo != 0; hopinfo >>= 1, idx++) { - const uint32_t bidx = (bucket + idx) & idxmask; - void *data = rt->buckets[bidx].data; - if (data && rt->equals (data, template)) - return data; + if (hopinfo & 1) { + const uint32_t bidx = (bucket + idx) & idxmask; + void *data = rt->buckets[bidx].data; + if (data && rt->equals (data, template)) + return data; + } } return NULL; } @@ -453,10 +455,12 @@ static void *ddsrt_chh_lookup_internal (struct ddsrt_chh_bucket_array const * co ddsrt_atomic_fence_ldld (); hopinfo = ddsrt_atomic_ld32 (&bs[bucket].hopinfo); for (idx = 0; hopinfo != 0; hopinfo >>= 1, idx++) { - const uint32_t bidx = (bucket + idx) & idxmask; - void *data = ddsrt_atomic_ldvoidp (&bs[bidx].data); - if (ddsrt_chh_data_valid_p (data) && equals (data, template)) { - return data; + if (hopinfo & 1) { + const uint32_t bidx = (bucket + idx) & idxmask; + void *data = ddsrt_atomic_ldvoidp (&bs[bidx].data); + if (ddsrt_chh_data_valid_p (data) && equals (data, template)) { + return data; + } } } ddsrt_atomic_fence_ldld (); diff --git a/src/ddsrt/tests/CMakeLists.txt b/src/ddsrt/tests/CMakeLists.txt index 780a521..0a40ee3 100644 --- a/src/ddsrt/tests/CMakeLists.txt +++ b/src/ddsrt/tests/CMakeLists.txt @@ -23,6 +23,7 @@ list(APPEND sources "thread_cleanup.c" "string.c" "log.c" + "hopscotch.c" "random.c" "retcode.c" "strlcpy.c" diff --git a/src/ddsrt/tests/hopscotch.c b/src/ddsrt/tests/hopscotch.c new file mode 100644 index 0000000..fdb0f0c --- /dev/null +++ b/src/ddsrt/tests/hopscotch.c @@ -0,0 +1,209 @@ +/* +* Copyright(c) 2019 ADLINK Technology Limited and others +* +* This program and the accompanying materials are made available under the +* terms of the Eclipse Public License v. 2.0 which is available at +* http://www.eclipse.org/legal/epl-2.0, or the Eclipse Distribution License +* v. 1.0 which is available at +* http://www.eclipse.org/org/documents/edl-v10.php. +* +* SPDX-License-Identifier: EPL-2.0 OR BSD-3-Clause +*/ +#include +#include +#include +#include +#include "CUnit/Test.h" +#include "CUnit/Theory.h" + +#include "dds/ddsrt/random.h" +#include "dds/ddsrt/heap.h" +#include "dds/ddsrt/time.h" +#include "dds/ddsrt/hopscotch.h" + +#define MAX_NKEYS 10000 +#define MAX_ITERS 1000000 + +static int nkeys_hist[MAX_NKEYS+1]; +static uint32_t objs[MAX_NKEYS], keys[MAX_NKEYS]; +static uint32_t next_v; +static ddsrt_prng_t prng; + +static uint32_t hash_uint32 (const void *v) +{ + const uint64_t m = UINT64_C (10242350189706880077); + const uint32_t h = (uint32_t) ((*((uint32_t *) v) * m) >> 32); + return h; +} + +static int equals_uint32 (const void *a, const void *b) +{ + return *((uint32_t *) a) == *((uint32_t *) b); +} + +static int compare_uint32 (const void *va, const void *vb) +{ + const uint32_t *a = va; + const uint32_t *b = vb; + return (*a == *b) ? 0 : (*a < *b) ? -1 : 1; +} + +static void swap (uint32_t *a, uint32_t *b) +{ + uint32_t t = *a; + *a = *b; + *b = t; +} + +static void init (bool random) +{ + uint32_t i; + ddsrt_prng_init_simple (&prng, ddsrt_random ()); + next_v = MAX_NKEYS; + for (i = 0; i < MAX_NKEYS; i++) + { + nkeys_hist[i] = 0; + keys[i] = i; + } + if (random) + { + /* Generate MAX_NKEYS unique random ints by repeatedly replacing + duplicates with other random numbers (this'll take more time the + larger MAX_NKEYS is, but for practical values, it is nearly + instantaneous) */ + for (i = 0; i < MAX_NKEYS - 1; i++) + objs[i] = ddsrt_prng_random (&prng); + do { + objs[i] = ddsrt_prng_random (&prng); + qsort (objs, MAX_NKEYS, sizeof (*objs), compare_uint32); + for (i = 1; i < MAX_NKEYS && objs[i-1] != objs[i]; i++) + ; + } while (i < MAX_NKEYS); + } + else + { + for (i = 0; i < MAX_NKEYS; i++) + objs[i] = i; + } +} + +struct ops { + const char *name; + void * (*new) (void); + void (*free) (void *h); + void * (*lookup) (void *h, const void *v); + int (*add) (void *h, const void *v); + int (*remove) (void *h, const void *v); +}; + +#define WRAP(ret_, f_) static ret_ f_##_w (void *h, const void *v) { return f_ (h, v); } +WRAP(void *, ddsrt_hh_lookup); +WRAP(int, ddsrt_hh_add); +WRAP(int, ddsrt_hh_remove); +WRAP(void *, ddsrt_chh_lookup); +WRAP(int, ddsrt_chh_add); +WRAP(int, ddsrt_chh_remove); +WRAP(void *, ddsrt_ehh_lookup); +WRAP(int, ddsrt_ehh_add); +WRAP(int, ddsrt_ehh_remove); +#undef WRAP + +static void free_buckets (void *bs, void *arg) +{ + /* nothing to worry about because this is single threaded */ + (void) arg; + ddsrt_free (bs); +} + +static void *hhnew (void) { return ddsrt_hh_new (1, hash_uint32, equals_uint32); } +static void hhfree (void *h) { ddsrt_hh_free (h); } +static void *chhnew (void) { return ddsrt_chh_new (1, hash_uint32, equals_uint32, free_buckets, NULL); } +static void chhfree (void *h) { ddsrt_chh_free (h); } +static void *ehhnew (void) { return ddsrt_ehh_new (sizeof (uint32_t), 1, hash_uint32, equals_uint32); } +static void ehhfree (void *h) { ddsrt_ehh_free (h); } + +static const struct ops hhops = { + .name = "hh", + .new = hhnew, + .free = hhfree, + .lookup = ddsrt_hh_lookup_w, + .add = ddsrt_hh_add_w, + .remove = ddsrt_hh_remove_w +}; +static const struct ops chhops = { + .name = "chh", + .new = chhnew, + .free = chhfree, + .lookup = ddsrt_chh_lookup_w, + .add = ddsrt_chh_add_w, + .remove = ddsrt_chh_remove_w +}; +static const struct ops ehhops = { + .name = "ehh", + .new = ehhnew, + .free = ehhfree, + .lookup = ddsrt_ehh_lookup_w, + .add = ddsrt_ehh_add_w, + .remove = ddsrt_ehh_remove_w +}; + +static void adj_nop (uint32_t *v) { (void) v; } +static void adj_seq (uint32_t *v) { *v = next_v++; } + +typedef void (*adj_fun_t) (uint32_t *v); + +CU_TheoryDataPoints (ddsrt_hopscotch, random) = { + CU_DataPoints (const struct ops *, &hhops, &chhops, &ehhops, &hhops, &chhops, &ehhops), + CU_DataPoints (bool, true, true, true, false, false, false), + CU_DataPoints (adj_fun_t, adj_nop, adj_nop, adj_nop, adj_seq, adj_seq, adj_seq), + CU_DataPoints (const char *, "nop", "nop", "nop", "seq", "seq", "seq") +}; + +CU_Theory ((const struct ops *ops, bool random, adj_fun_t adj, const char *adjname), ddsrt_hopscotch, random) +{ + printf ("%s random=%d adj=%s", ops->name, random, adjname); + fflush (stdout); + init (random); + void *h = ops->new (); + uint32_t i, nk = 0; + uint64_t nn = 0; + dds_time_t t0, t1; + t0 = ddsrt_time_monotonic (); + for (uint32_t iter = 0; iter < MAX_ITERS; iter++) + { + int r; + assert (nk <= MAX_NKEYS); + nkeys_hist[nk]++; + if (nk == MAX_NKEYS || (nk > 0 && (ddsrt_prng_random (&prng) & 1))) + { + i = ddsrt_prng_random (&prng) % nk; + if (!ops->lookup (h, &objs[keys[i]])) + CU_FAIL_FATAL ("key not present\n"); + r = ops->remove (h, &objs[keys[i]]); + if (!r) + CU_FAIL_FATAL ("remove failed\n"); + if (ops->lookup (h, &objs[keys[i]])) + CU_FAIL_FATAL ("key still present\n"); + adj (&objs[keys[i]]); + swap (&keys[i], &keys[nk-1]); + nk--; + } + else + { + i = nk + (ddsrt_prng_random (&prng) % (MAX_NKEYS - nk)); + if (ops->lookup (h, &objs[keys[i]])) + CU_FAIL_FATAL ("key already present\n"); + r = ops->add (h, &objs[keys[i]]); + if (!r) + CU_FAIL_FATAL ("add failed\n"); + if (!ops->lookup (h, &objs[keys[i]])) + CU_FAIL_FATAL ("key still not present\n"); + swap (&keys[i], &keys[nk]); + nk++; + } + nn++; + } + t1 = ddsrt_time_monotonic (); + ops->free (h); + printf (" %"PRIu64" %.0f ns/cycle\n", nn, (double) (t1 - t0) / (double) nn); +}