[PATCH v4 1/3] erofs-utils: add xxh32 library

From: Jingbo Xu
Date: Thu Jul 13 2023 - 22:53:42 EST


Add xxh32 library which could be used by following xattr bloom filter
feature.

Signed-off-by: Jingbo Xu <jefflexu@xxxxxxxxxxxxxxxxx>
---
include/erofs/xxhash.h | 35 +++++++++++++++++
lib/Makefile.am | 3 +-
lib/xxhash.c | 85 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 122 insertions(+), 1 deletion(-)
create mode 100644 include/erofs/xxhash.h
create mode 100644 lib/xxhash.c

diff --git a/include/erofs/xxhash.h b/include/erofs/xxhash.h
new file mode 100644
index 0000000..fd9384e
--- /dev/null
+++ b/include/erofs/xxhash.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __EROFS_XXHASH_H
+#define __EROFS_XXHASH_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "defs.h"
+
+/*
+ * Copied from
+ * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+ * xxHash - Extremely Fast Hash algorithm
+ */
+
+/**
+ * xxh32() - calculate the 32-bit hash of the input with a given seed.
+ *
+ * @input: The data to hash.
+ * @length: The length of the data to hash.
+ * @seed: The seed can be used to alter the result predictably.
+ *
+ * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
+ *
+ * Return: The 32-bit hash of the data.
+ */
+uint32_t xxh32(const void *input, size_t length, uint32_t seed);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/Makefile.am b/lib/Makefile.am
index faa7311..a049af6 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -23,13 +23,14 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
$(top_srcdir)/include/erofs/xattr.h \
$(top_srcdir)/include/erofs/compress_hints.h \
$(top_srcdir)/include/erofs/fragments.h \
+ $(top_srcdir)/include/erofs/xxhash.h \
$(top_srcdir)/lib/liberofs_private.h

noinst_HEADERS += compressor.h
liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
namei.c data.c compress.c compressor.c zmap.c decompress.c \
compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
- fragments.c rb_tree.c dedupe.c
+ fragments.c rb_tree.c dedupe.c xxhash.c

liberofs_la_CFLAGS = -Wall -I$(top_srcdir)/include
if ENABLE_LZ4
diff --git a/lib/xxhash.c b/lib/xxhash.c
new file mode 100644
index 0000000..e5f511c
--- /dev/null
+++ b/lib/xxhash.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copied from
+ * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+ * xxHash - Extremely Fast Hash algorithm
+ */
+#include "erofs/xxhash.h"
+
+/*-*************************************
+ * Macros
+ **************************************/
+#define xxh_rotl32(x, r) ((x << r) | (x >> (32 - r)))
+
+/*-*************************************
+ * Constants
+ **************************************/
+static const uint32_t PRIME32_1 = 2654435761U;
+static const uint32_t PRIME32_2 = 2246822519U;
+static const uint32_t PRIME32_3 = 3266489917U;
+static const uint32_t PRIME32_4 = 668265263U;
+static const uint32_t PRIME32_5 = 374761393U;
+
+/*-***************************
+ * Simple Hash Functions
+ ****************************/
+static uint32_t xxh32_round(uint32_t seed, const uint32_t input)
+{
+ seed += input * PRIME32_2;
+ seed = xxh_rotl32(seed, 13);
+ seed *= PRIME32_1;
+ return seed;
+}
+
+uint32_t xxh32(const void *input, const size_t len, const uint32_t seed)
+{
+ const uint8_t *p = (const uint8_t *)input;
+ const uint8_t *b_end = p + len;
+ uint32_t h32;
+
+ if (len >= 16) {
+ const uint8_t *const limit = b_end - 16;
+ uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+ uint32_t v2 = seed + PRIME32_2;
+ uint32_t v3 = seed + 0;
+ uint32_t v4 = seed - PRIME32_1;
+
+ do {
+ v1 = xxh32_round(v1, get_unaligned_le32(p));
+ p += 4;
+ v2 = xxh32_round(v2, get_unaligned_le32(p));
+ p += 4;
+ v3 = xxh32_round(v3, get_unaligned_le32(p));
+ p += 4;
+ v4 = xxh32_round(v4, get_unaligned_le32(p));
+ p += 4;
+ } while (p <= limit);
+
+ h32 = xxh_rotl32(v1, 1) + xxh_rotl32(v2, 7) +
+ xxh_rotl32(v3, 12) + xxh_rotl32(v4, 18);
+ } else {
+ h32 = seed + PRIME32_5;
+ }
+
+ h32 += (uint32_t)len;
+
+ while (p + 4 <= b_end) {
+ h32 += get_unaligned_le32(p) * PRIME32_3;
+ h32 = xxh_rotl32(h32, 17) * PRIME32_4;
+ p += 4;
+ }
+
+ while (p < b_end) {
+ h32 += (*p) * PRIME32_5;
+ h32 = xxh_rotl32(h32, 11) * PRIME32_1;
+ p++;
+ }
+
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+
+ return h32;
+}
--
2.19.1.6.gb485710b