From aa250a3dabdfe4010cd31fb2fb8a068d281644be Mon Sep 17 00:00:00 2001 From: slothnian Date: Wed, 9 Jul 2025 09:04:02 +0000 Subject: [PATCH] lib: Import LZ4KD compression algorithm from HUAWEI ABR-AL60_HarmonyOS4.0.0_opensource.tar.gz Signed-off-by: 0wnerDied --- crypto/Kconfig | 9 + crypto/Makefile | 1 + crypto/lz4kd.c | 95 +++++++ drivers/block/zram/zcomp.c | 3 + include/linux/lz4kd.h | 216 ++++++++++++++++ lib/Kconfig | 8 +- lib/Makefile | 2 + lib/lz4kd/Makefile | 3 + lib/lz4kd/lz4kd_decode.c | 243 ++++++++++++++++++ lib/lz4kd/lz4kd_encode.c | 418 +++++++++++++++++++++++++++++++ lib/lz4kd/lz4kd_encode_private.h | 135 ++++++++++ lib/lz4kd/lz4kd_private.h | 304 ++++++++++++++++++++++ 12 files changed, 1436 insertions(+), 1 deletion(-) create mode 100644 crypto/lz4kd.c create mode 100644 include/linux/lz4kd.h create mode 100644 lib/lz4kd/Makefile create mode 100644 lib/lz4kd/lz4kd_decode.c create mode 100644 lib/lz4kd/lz4kd_encode.c create mode 100644 lib/lz4kd/lz4kd_encode_private.h create mode 100644 lib/lz4kd/lz4kd_private.h diff --git a/crypto/Kconfig b/crypto/Kconfig index 6ea544a67757..13f0a3b3eaf8 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1682,6 +1682,15 @@ config CRYPTO_LZ4 help This is the LZ4 algorithm. +config CRYPTO_LZ4KD + tristate "LZ4KD compression algorithm" + select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 + select LZ4KD_COMPRESS + select LZ4KD_DECOMPRESS + help + This is the LZ4KD algorithm. + config CRYPTO_LZ4HC tristate "LZ4HC compression algorithm" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index 423475b082d8..d292feff17dd 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -123,6 +123,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o +obj-$(CONFIG_CRYPTO_LZ4KD) += lz4kd.o obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_842) += 842.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o diff --git a/crypto/lz4kd.c b/crypto/lz4kd.c new file mode 100644 index 000000000000..03e8045d9c16 --- /dev/null +++ b/crypto/lz4kd.c @@ -0,0 +1,95 @@ +/* + * Cryptographic API. + * + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * Description: LZ4KD compression algorithm for ZRAM + */ + +#include +#include +#include +#include +#include + + +struct lz4kd_ctx { + void *lz4kd_comp_mem; +}; + +static int lz4kd_init(struct crypto_tfm *tfm) +{ + struct lz4kd_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->lz4kd_comp_mem = vmalloc(lz4kd_encode_state_bytes_min()); + if (!ctx->lz4kd_comp_mem) + return -ENOMEM; + + return 0; +} + +static void lz4kd_exit(struct crypto_tfm *tfm) +{ + struct lz4kd_ctx *ctx = crypto_tfm_ctx(tfm); + vfree(ctx->lz4kd_comp_mem); +} + +static int lz4kd_compress_crypto(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, + unsigned int *dlen) +{ + struct lz4kd_ctx *ctx = crypto_tfm_ctx(tfm); + int ret = 0; + + ret = lz4kd_encode(ctx->lz4kd_comp_mem, src, dst, slen, *dlen, 0); + if (ret < 0) + return -EINVAL; + + if (ret) + *dlen = ret; + + return 0; +} + +static int lz4kd_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, + unsigned int *dlen) +{ + int ret = 0; + + ret = lz4kd_decode(src, dst, slen, *dlen); + if (ret <= 0) + return -EINVAL; + *dlen = ret; + return 0; +} + +static struct crypto_alg alg_lz4kd = { + .cra_name = "lz4kd", + .cra_driver_name = "lz4kd-generic", + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_ctxsize = sizeof(struct lz4kd_ctx), + .cra_module = THIS_MODULE, + .cra_init = lz4kd_init, + .cra_exit = lz4kd_exit, + .cra_u = { + .compress = { + .coa_compress = lz4kd_compress_crypto, + .coa_decompress = lz4kd_decompress_crypto + } + } +}; + +static int __init lz4kd_mod_init(void) +{ + return crypto_register_alg(&alg_lz4kd); +} + +static void __exit lz4kd_mod_fini(void) +{ + crypto_unregister_alg(&alg_lz4kd); +} + +module_init(lz4kd_mod_init); +module_exit(lz4kd_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("LZ4KD Compression Algorithm"); +MODULE_ALIAS_CRYPTO("lz4kd"); diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index cc66daec7bbc..31683549fcfc 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -29,6 +29,9 @@ static const char * const backends[] = { #if IS_ENABLED(CONFIG_CRYPTO_LZ4HC) "lz4hc", #endif +#if IS_ENABLED(CONFIG_CRYPTO_LZ4KD) + "lz4kd", +#endif #if IS_ENABLED(CONFIG_CRYPTO_842) "842", #endif diff --git a/include/linux/lz4kd.h b/include/linux/lz4kd.h new file mode 100644 index 000000000000..3f7482821b3f --- /dev/null +++ b/include/linux/lz4kd.h @@ -0,0 +1,216 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * Description: LZ4K compression algorithm with delta compression + */ + +#ifndef _LZ4KD_H +#define _LZ4KD_H + +/* file lz4kd.h + This file contains the platform-independent API of LZ-class + lossless codecs (compressors/decompressors) with complete + in-place documentation. The documentation is formatted + in accordance with DOXYGEN mark-up format. So, one can + generate proper documentation, e.g. in HTML format, using DOXYGEN. + + Currently, LZ-class codecs, documented here, implement following + algorithms for lossless data compression/decompression: + \li "LZ" proprietary codec competing with LZ4 - lz4kd_encode(), + lz4kd_encode_delta(), lz4kd_decode(), lz4kd_decode_delta() + + The LZ compressors accept any data as input and compress it + without loss to a smaller size if possible. + Compressed data produced by LZ compressor API lz4kd_encode*(), + can be decompressed only by lz4kd_decode() API documented below.\n + */ + +/* + lz4kd_status defines simple set of status values returned by APIs + */ +typedef enum { + LZ4K_STATUS_INCOMPRESSIBLE = 0, /* !< Return when data is incompressible */ + LZ4K_STATUS_FAILED = -1, /* !< Return on general failure */ + LZ4K_STATUS_READ_ERROR = -2, /* !< Return when data reading failed */ + LZ4K_STATUS_WRITE_ERROR = -3 /* !< Return when data writing failed */ +} lz4kd_status; + +/* + lz4kd_Version() returns static unmutable string with algorithm version + */ +const char *lz4kd_version(void); + +/* + lz4kd_encode_state_bytes_min() returns number of bytes for state parameter, + supplied to lz4kd_encode(), lz4kd_encode_delta(). + So, state should occupy at least lz4kd_encode_state_bytes_min() for mentioned + functions to work correctly. + */ +unsigned lz4kd_encode_state_bytes_min(void); + +/* + lz4kd_encode() encodes/compresses one input buffer at *in, places + result of encoding into one output buffer at *out if encoded data + size fits specified values of out_max and out_limit. + It returs size of encoded data in case of success or value<=0 otherwise. + The result of successful encoding is in proprietary format, that + is the encoded data can be decoded only by lz4kd_decode(). + + \return + \li positive value\n + if encoding was successful. The value returned is the size of encoded + (compressed) data always <=out_max. + \li non-positive value\n + if in==0||in_max==0||out==0||out_max==0 or + if out_max is less than needed for encoded (compressed) data. + \li 0 value\n + if encoded data size >= out_limit + + \param[in] state + !=0, pointer to state buffer used internally by the function. Size of + state in bytes should be at least lz4kd_encode_state_bytes_min(). The content + of state buffer will be changed during encoding. + + \param[in] in + !=0, pointer to the input buffer to encode (compress). The content of + the input buffer does not change during encoding. + + \param[in] out + !=0, pointer to the output buffer where to place result of encoding + (compression). + If encoding is unsuccessful, e.g. out_max or out_limit are less than + needed for encoded data then content of out buffer may be arbitrary. + + \param[in] in_max + !=0, size in bytes of the input buffer at *in + + \param[in] out_max + !=0, size in bytes of the output buffer at *out + + \param[in] out_limit + encoded data size soft limit in bytes. Due to performance reasons it is + not guaranteed that + lz4kd_encode will always detect that resulting encoded data size is + bigger than out_limit. + Hovewer, when reaching out_limit is detected, lz4kd_encode() returns + earlier and spares CPU cycles. Caller code should recheck result + returned by lz4kd_encode() (value greater than 0) if it is really + less or equal than out_limit. + out_limit is ignored if it is equal to 0. + */ +int lz4kd_encode( + void *const state, + const void *const in, + void *out, + unsigned in_max, + unsigned out_max, + unsigned out_limit); + +int lz4kd_encode2( + void *const state, + const void *const in, + void *out, + unsigned in_max, + unsigned out_max, + unsigned out_limit); + +int lz4kd_encode_pattern( + void *const state, + const void *const in, + void *out, + unsigned in_max, + unsigned out_max, + unsigned out_limit); + +/* + lz4kd_encode_max_cr() encodes/compresses one input buffer at *in, places + result of encoding into one output buffer at *out if encoded data + size fits specified value of out_max. + It returs size of encoded data in case of success or value<=0 otherwise. + The result of successful encoding is in proprietary format, that + is the encoded data can be decoded only by lz4kd_decode(). + + \return + \li positive value\n + if encoding was successful. The value returned is the size of encoded + (compressed) data always <=out_max. + \li non-positive value\n + if in==0||in_max==0||out==0||out_max==0 or + if out_max is less than needed for encoded (compressed) data. + + \param[in] state + !=0, pointer to state buffer used internally by the function. Size of + state in bytes should be at least lz4kd_encode_state_bytes_min(). The content + of state buffer will be changed during encoding. + + \param[in] in + !=0, pointer to the input buffer to encode (compress). The content of + the input buffer does not change during encoding. + + \param[in] out + !=0, pointer to the output buffer where to place result of encoding + (compression). + If encoding is unsuccessful, e.g. out_max is less than + needed for encoded data then content of out buffer may be arbitrary. + + \param[in] in_max + !=0, size in bytes of the input buffer at *in + + \param[in] out_max + !=0, size in bytes of the output buffer at *out + + \param[in] out_limit + encoded data size soft limit in bytes. Due to performance reasons it is + not guaranteed that + lz4kd_encode will always detect that resulting encoded data size is + bigger than out_limit. + Hovewer, when reaching out_limit is detected, lz4kd_encode() returns + earlier and spares CPU cycles. Caller code should recheck result + returned by lz4kd_encode() (value greater than 0) if it is really + less or equal than out_limit. + out_limit is ignored if it is equal to 0. + */ +int lz4kd_encode_max_cr( + void *const state, + const void *const in, + void *out, + unsigned in_max, + unsigned out_max, + unsigned out_limit); + +/* + lz4kd_decode() decodes (decompresses) data from one input buffer and places + the result of decompression into one output buffer. The encoded data in input + buffer should be in proprietary format, produced by lz4kd_encode() + or by lz4kd_encode_delta(). + + \return + \li positive value\n + if decoding was successful. The value returned is the size of decoded + (decompressed) data. + \li non-positive value\n + if in==0||in_max==0||out==0||out_max==0 or + if out_max is less than needed for decoded (decompressed) data or + if input encoded data format is corrupted. + + \param[in] in + !=0, pointer to the input buffer to decode (decompress). The content of + the input buffer does not change during decoding. + + \param[in] out + !=0, pointer to the output buffer where to place result of decoding + (decompression). If decompression is unsuccessful then content of out + buffer may be arbitrary. + + \param[in] in_max + !=0, size in bytes of the input buffer at in + + \param[in] out_max + !=0, size in bytes of the output buffer at out + */ +int lz4kd_decode( + const void *const in, + void *const out, + unsigned in_max, + unsigned out_max); + +#endif /* _LZ4KD_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 1a33e9365951..54efe6eabfa7 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -245,8 +245,14 @@ config LZ4_COMPRESS config LZ4HC_COMPRESS tristate +config LZ4KD_COMPRESS + tristate + config LZ4_DECOMPRESS tristate + +config LZ4KD_DECOMPRESS + tristate config ZSTD_COMPRESS select XXHASH @@ -295,7 +301,7 @@ config GENERIC_ALLOCATOR # config REED_SOLOMON tristate - + config REED_SOLOMON_ENC8 bool diff --git a/lib/Makefile b/lib/Makefile index 2b1c74fb7a91..3f819180f439 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -135,6 +135,8 @@ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ obj-$(CONFIG_LZ4_COMPRESS) += lz4/ obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/ obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/ +obj-$(CONFIG_LZ4KD_COMPRESS) += lz4kd/ +obj-$(CONFIG_LZ4KD_DECOMPRESS) += lz4kd/ obj-$(CONFIG_ZSTD_COMPRESS) += zstd/ obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd/ obj-$(CONFIG_XZ_DEC) += xz/ diff --git a/lib/lz4kd/Makefile b/lib/lz4kd/Makefile new file mode 100644 index 000000000000..af032809dbae --- /dev/null +++ b/lib/lz4kd/Makefile @@ -0,0 +1,3 @@ +ccflags-y += -O3 +obj-$(CONFIG_LZ4KD_COMPRESS) += lz4kd_encode.o +obj-$(CONFIG_LZ4KD_DECOMPRESS) += lz4kd_decode.o diff --git a/lib/lz4kd/lz4kd_decode.c b/lib/lz4kd/lz4kd_decode.c new file mode 100644 index 000000000000..e3829eaa6049 --- /dev/null +++ b/lib/lz4kd/lz4kd_decode.c @@ -0,0 +1,243 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * Description: LZ4K compression algorithm with delta compression + */ + +#if !defined(__KERNEL__) +#include "lz4kd.h" +#else +#include +#include +#endif + +#include "lz4kd_private.h" /* types, etc */ + +static const uint8_t *get_size( + uint_fast32_t *size, + const uint8_t *in_at, + const uint8_t *const in_end) +{ + uint_fast32_t u; + do { + if (unlikely(in_at >= in_end)) + return NULL; + *size += (u = *(const uint8_t*)in_at); + ++in_at; + } while (BYTE_MAX == u); + return in_at; +} + +static int end_of_block( + const uint_fast32_t nr_bytes_max, + const uint_fast32_t r_bytes_max, + const uint8_t *const in_at, + const uint8_t *const in_end, + const uint8_t *const out, + const uint8_t *const out_at) +{ + if (!nr_bytes_max) + return LZ4K_STATUS_FAILED; /* should be the last one in block */ + if (r_bytes_max != REPEAT_MIN) + return LZ4K_STATUS_FAILED; /* should be the last one in block */ + if (in_at != in_end) + return LZ4K_STATUS_FAILED; /* should be the last one in block */ + return (int)(out_at - out); +} + +enum { + NR_COPY_MIN = 16, + R_COPY_MIN = 16, + R_COPY_SAFE = R_COPY_MIN - 1, + R_COPY_SAFE_2X = (R_COPY_MIN << 1) - 1 +}; + +static bool out_non_repeat( + const uint8_t **in_at, + uint8_t **out_at, + uint_fast32_t nr_bytes_max, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + const uint8_t *const in_copy_end = *in_at + nr_bytes_max; + uint8_t *const out_copy_end = *out_at + nr_bytes_max; + if (likely(nr_bytes_max <= NR_COPY_MIN)) { + if (likely(*in_at <= in_end - NR_COPY_MIN && + *out_at <= out_end - NR_COPY_MIN)) + m_copy(*out_at, *in_at, NR_COPY_MIN); + else if (in_copy_end <= in_end && out_copy_end <= out_end) + m_copy(*out_at, *in_at, nr_bytes_max); + else + return false; + } else { /* nr_bytes_max>NR_COPY_MIN */ + if (likely(in_copy_end <= in_end - NR_COPY_MIN && + out_copy_end <= out_end - NR_COPY_MIN)) { + m_copy(*out_at, *in_at, NR_COPY_MIN); + copy_x_while_lt(*out_at + NR_COPY_MIN, + *in_at + NR_COPY_MIN, + out_copy_end, NR_COPY_MIN); + } else if (in_copy_end <= in_end && out_copy_end <= out_end) { + m_copy(*out_at, *in_at, nr_bytes_max); + } else { /* in_copy_end > in_end || out_copy_end > out_end */ + return false; + } + } /* if (nr_bytes_max <= NR_COPY_MIN) */ + *in_at = in_copy_end; + *out_at = out_copy_end; + return true; +} + +static void out_repeat_overlap( + uint_fast32_t offset, + uint8_t *out_at, + const uint8_t *out_from, + const uint8_t *const out_copy_end) +{ + enum { + COPY_MIN = R_COPY_MIN >> 1, + OFFSET_LIMIT = COPY_MIN >> 1 + }; + m_copy(out_at, out_from, COPY_MIN); +/* (1 < offset < R_COPY_MIN/2) && out_copy_end + R_COPY_SAFE_2X <= out_end */ + out_at += offset; + if (offset <= OFFSET_LIMIT) + offset <<= 1; + do { + m_copy(out_at, out_from, COPY_MIN); + out_at += offset; + if (offset <= OFFSET_LIMIT) + offset <<= 1; + } while (out_at - out_from < R_COPY_MIN); + while_lt_copy_2x_as_x2(out_at, out_from, out_copy_end, R_COPY_MIN); +} + +static bool out_repeat_slow( + uint_fast32_t r_bytes_max, + uint_fast32_t offset, + uint8_t *out_at, + const uint8_t *out_from, + const uint8_t *const out_copy_end, + const uint8_t *const out_end) +{ + if (offset > 1 && out_copy_end <= out_end - R_COPY_SAFE_2X) { + out_repeat_overlap(offset, out_at, out_from, out_copy_end); + } else { + if (unlikely(out_copy_end > out_end)) + return false; + if (offset == 1) { + m_set(out_at, *out_from, r_bytes_max); + } else { + do + *out_at++ = *out_from++; + while (out_at < out_copy_end); + } + } + return true; +} + +static int decode( + const uint8_t *in_at, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2) +{ + const uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2); + const uint8_t *const in_end_minus_x = in_end - TAG_BYTES_MAX; + uint8_t *out_at = out; + while (likely(in_at <= in_end_minus_x)) { + const uint_fast32_t utag = read4_at(in_at - 1) >> BYTE_BITS; + const uint_fast32_t offset = utag & mask(off_log2); + uint_fast32_t nr_bytes_max = utag >> (off_log2 + r_log2), + r_bytes_max = ((utag >> off_log2) & mask(r_log2)) + + REPEAT_MIN; + const uint8_t *out_from = 0; + uint8_t *out_copy_end = 0; + const uint8_t *out_safe_end = 0; + in_at += TAG_BYTES_MAX; + if (unlikely(nr_bytes_max == mask(nr_log2))) { + in_at = get_size(&nr_bytes_max, in_at, in_end); + if (unlikely(in_at == NULL)) + return LZ4K_STATUS_READ_ERROR; + } + if (!out_non_repeat(&in_at, &out_at, nr_bytes_max, in_end, out_end)) + return LZ4K_STATUS_FAILED; + if (unlikely(r_bytes_max == mask(r_log2) + REPEAT_MIN)) { + in_at = get_size(&r_bytes_max, in_at, in_end); + if (unlikely(in_at == NULL)) + return LZ4K_STATUS_READ_ERROR; + } + out_from = out_at - offset; + if (unlikely(out_from < out)) + return LZ4K_STATUS_FAILED; + out_copy_end = out_at + r_bytes_max; + out_safe_end = out_end - R_COPY_SAFE_2X; + if (likely(offset >= R_COPY_MIN && out_copy_end <= out_safe_end)) { + copy_2x_as_x2_while_lt(out_at, out_from, out_copy_end, + R_COPY_MIN); + } else if (likely(offset >= (R_COPY_MIN >> 1) && + out_copy_end <= out_safe_end)) { + m_copy(out_at, out_from, R_COPY_MIN); + out_at += offset; + while_lt_copy_x(out_at, out_from, out_copy_end, R_COPY_MIN); + } else if (likely(offset > 0)) { + if (!out_repeat_slow(r_bytes_max, offset, out_at, out_from, + out_copy_end, out_end)) + return LZ4K_STATUS_FAILED; + } else { /* offset == 0: EOB, last literal */ + return end_of_block(nr_bytes_max, r_bytes_max, in_at, + in_end, out, out_at); + } + out_at = out_copy_end; + } /* while (likely(in_at <= in_end_minus_x)) */ + return in_at == in_end ? (int)(out_at - out) : LZ4K_STATUS_FAILED; +} + +static int decode_pattern_4kb( + const uint8_t *const in, + uint8_t *const out, + const uint8_t *const out_end) +{ + const uint64_t pattern = *(const uint64_t*)in; + uint64_t *o64 = (uint64_t*)out; + const uint64_t *const o64_end = (const uint64_t*)out_end - 1; + for (; o64 <= o64_end; ++o64) + *o64 = pattern; + return (uint8_t*)o64 == out_end ? (int)(out_end - out) : LZ4K_STATUS_FAILED; +} + +static int decode_4kb( + const uint8_t *const in, + uint8_t *const out, + const uint8_t *const in_end, + const uint8_t *const out_end) +{ + return decode(in, out, in_end, out_end, NR_4KB_LOG2, BLOCK_4KB_LOG2); +} + +int lz4kd_decode( + const void *in, + void *const out, + unsigned in_max, + unsigned out_max) +{ + /* ++use volatile pointers to prevent compiler optimizations */ + const uint8_t *volatile in_end = (const uint8_t*)in + in_max; + const uint8_t *volatile out_end = (uint8_t*)out + min_u64(out_max, 1 << BLOCK_4KB_LOG2); + if (unlikely(in == NULL || out == NULL)) + return LZ4K_STATUS_FAILED; + if (unlikely(in_max <= 1 + TAG_BYTES_MAX || out_max <= 0)) + return LZ4K_STATUS_FAILED; + /* invalid buffer size or pointer overflow */ + if (unlikely((const uint8_t*)in >= in_end || (uint8_t*)out >= out_end)) + return LZ4K_STATUS_FAILED; + /* -- */ + if (unlikely(in_max == PATTERN_BYTES_MAX)) + return decode_pattern_4kb((const uint8_t*)in, (uint8_t*)out, + out_end); + return decode_4kb((const uint8_t*)in + 1, (uint8_t*)out, in_end, out_end); +} +EXPORT_SYMBOL(lz4kd_decode); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("LZ4K decoder"); diff --git a/lib/lz4kd/lz4kd_encode.c b/lib/lz4kd/lz4kd_encode.c new file mode 100644 index 000000000000..37ca9243ef04 --- /dev/null +++ b/lib/lz4kd/lz4kd_encode.c @@ -0,0 +1,418 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * Description: LZ4K compression algorithm with delta compression + */ + +#if !defined(__KERNEL__) +#include "lz4kd.h" +#else +#include +#include +#endif + +#include "lz4kd_private.h" +#include "lz4kd_encode_private.h" + +enum { + HT_LOG2 = 12, /* ==11 #3 max drop in CR */ + STEP_LOG2 = 5 /* ==3 #2 avg drop in CR */ +}; + +static unsigned encode_state_bytes_min(void) +{ + enum { + BYTES_LOG2 = HT_LOG2 + 1 + }; + const unsigned bytes_total = (1U << BYTES_LOG2); + return bytes_total; +} + +#if !defined(LZ4K_DELTA) && !defined(LZ4K_MAX_CR) + +unsigned lz4kd_encode_state_bytes_min(void) +{ + return encode_state_bytes_min(); +} +EXPORT_SYMBOL(lz4kd_encode_state_bytes_min); + +#endif /* !defined(LZ4K_DELTA) && !defined(LZ4K_MAX_CR) */ + +/* minimum encoded size for non-compressible data */ +inline static uint_fast32_t encoded_bytes_min( + uint_fast32_t nr_log2, + uint_fast32_t in_max) +{ + return in_max < mask(nr_log2) ? + TAG_BYTES_MAX + in_max : + TAG_BYTES_MAX + size_bytes_count(in_max - mask(nr_log2)) + in_max; +} + +inline static void update_utag( + uint_fast32_t r_bytes_max, + uint_fast32_t *utag, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2) +{ + const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + nr_log2)); + *utag |= likely(r_bytes_max - REPEAT_MIN < r_mask) ? + ((r_bytes_max - REPEAT_MIN) << off_log2) : (r_mask << off_log2); +} + +inline static uint8_t *out_size_bytes(uint8_t *out_at, uint_fast32_t u) +{ + for (; u >= BYTE_MAX; *out_at++ = (uint8_t)BYTE_MAX, u -= BYTE_MAX); + *out_at++ = (uint8_t)u; + return out_at; +} + +inline static uint8_t *out_utag_then_bytes_left( + uint8_t *out_at, + uint_fast32_t utag, + uint_fast32_t bytes_left) +{ + m_copy(out_at, &utag, TAG_BYTES_MAX); + return out_size_bytes(out_at + TAG_BYTES_MAX, bytes_left); +} + +static int out_tail( + uint8_t *out_at, + uint8_t *const out_end, + const uint8_t *const out, + const uint8_t *const nr0, + const uint8_t *const in_end, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2) +{ + const uint_fast32_t nr_mask = mask(nr_log2); + const uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2); + const uint_fast32_t nr_bytes_now = u_32(in_end - nr0); + if (encoded_bytes_min(nr_log2, nr_bytes_now) > u_32(out_end - out_at)) + return LZ4K_STATUS_INCOMPRESSIBLE; + if (nr_bytes_now < nr_mask) { + /* caller guarantees at least one nr-byte */ + uint_fast32_t utag = (nr_bytes_now << (off_log2 + r_log2)); + m_copy(out_at, &utag, TAG_BYTES_MAX); + out_at += TAG_BYTES_MAX; + } else { /* nr_bytes_now>=nr_mask */ + uint_fast32_t bytes_left = nr_bytes_now - nr_mask; + uint_fast32_t utag = (nr_mask << (off_log2 + r_log2)); + out_at = out_utag_then_bytes_left(out_at, utag, bytes_left); + } /* if (nr_bytes_now= nr_mask */ + uint_fast32_t bytes_left = nr_bytes_max - nr_mask; + utag |= (nr_mask << (off_log2 + r_log2)); + out_at = out_utag_then_bytes_left(out_at, utag, bytes_left); + } /* if (nr_bytes_max> BYTE_BITS_LOG2); + } + /* some bytes differ: count of trailing 0-bits/bytes */ + q += sizeof(uint64_t); + r += sizeof(uint64_t); + } while (likely(r <= in_end_safe)); /* once, at input block end */ + while (r < in_end) { + if (*q != *r) return r; + ++q; + ++r; + } + return r; +} + +const uint8_t *lz4kd_repeat_end( + const uint8_t *q, + const uint8_t *r, + const uint8_t *const in_end_safe, + const uint8_t *const in_end) +{ + return repeat_end(q, r, in_end_safe, in_end); +} + +/* CR increase order: +STEP, have OFFSETS, use _5b(most impact) */ +/* *_6b to compete with LZ4 */ +inline static uint_fast32_t hash(const uint8_t *r) +{ + return hash64_5b(r, HT_LOG2); +} + +/* + * Proof that 'r' increments are safe-NO pointer overflows are possible: + * + * While using STEP_LOG2=5, step_start=1<= (1< + * 1<<(off_log2-STEP_LOG2+1) >= x^2+x-1 ==> + * x^2+x-1-1<<(off_log2-STEP_LOG2+1) == 0, which is solved by standard + * method. + * To avoid overhead here conservative approximate value of x is calculated + * as average of two nearest square roots, see STEP_LIMIT above. + */ + +static int encode_any( + uint16_t *const ht, + const uint8_t *const in0, + const uint8_t *const in_end, + uint8_t *const out, + uint8_t *const out_end) +{ + enum { + NR_LOG2 = NR_4KB_LOG2, + OFF_LOG2 = BLOCK_4KB_LOG2 + }; + const uint8_t *const in_end_safe = in_end - NR_COPY_MIN; + const uint8_t *r = in0; + const uint8_t *nr0 = r++; + uint8_t *out_at = out + 1; /* +1 for header */ + for (; ; nr0 = r) { + const uint8_t *q = 0; + uint_fast32_t step = 1 << STEP_LOG2; + uint_fast32_t utag = 0; + const uint8_t *r_end = 0; + uint_fast32_t r_bytes_max = 0; + while (true) { + if (equal4(q = hashed(in0, ht, hash(r), r), r)) + break; + ++r; + if (equal4(q = hashed(in0, ht, hash(r), r), r)) + break; + if (unlikely((r += (++step >> STEP_LOG2)) > in_end_safe)) + return out_tail(out_at, out_end, out, nr0, in_end, + NR_LOG2, OFF_LOG2); + } + utag = u_32(r - q); + r_end = repeat_end(q, r, in_end_safe, in_end); + r_bytes_max = u_32(r_end - r); + if (unlikely(nr0 == r)) + out_at = out_repeat(out_at, utag, r_bytes_max, + NR_LOG2, OFF_LOG2); + else + out_at = out_tuple(out_at, out_end, utag, nr0, r, r_bytes_max, + NR_LOG2, OFF_LOG2); + if (unlikely((r += r_bytes_max) > in_end_safe)) + return out_tail2(out_at, out_end, out, r, in_end, + NR_LOG2, OFF_LOG2); + ht[hash(r - 1)] = (uint16_t)(r - 1 - in0); + } +} + +/* not static for inlining optimization */ +int lz4kd_encode_fast( + void *const state, + const uint8_t *const in, + uint8_t *const out, + const uint_fast32_t in_max, + const uint_fast32_t out_max) +{ + return encode_any((uint16_t*)state, in, in + in_max, out, out + out_max); +} + +int lz4kd_encode( + void *const state, + const void *const in, + void *out, + unsigned in_max, + unsigned out_max, + unsigned out_limit) +{ + const uint64_t io_min = min_u64(in_max, out_max); + const uint64_t gain_max = max_u64(GAIN_BYTES_MAX, (io_min >> GAIN_BYTES_LOG2)); + /* ++use volatile pointers to prevent compiler optimizations */ + const uint8_t *volatile in_end = (const uint8_t*)in + in_max; + const uint8_t *volatile out_end = (uint8_t*)out + out_max; + const void *volatile state_end = + (uint8_t*)state + encode_state_bytes_min(); + if (unlikely(state == NULL)) + return LZ4K_STATUS_FAILED; + if (unlikely(in == NULL || out == NULL)) + return LZ4K_STATUS_FAILED; + if (unlikely(out_max <= gain_max)) + return LZ4K_STATUS_FAILED; + if (unlikely((const uint8_t*)in >= in_end || (uint8_t*)out >= out_end)) + return LZ4K_STATUS_FAILED; + if (unlikely(state >= state_end)) + return LZ4K_STATUS_FAILED; /* pointer overflow */ + if (in_max > (1 << BLOCK_4KB_LOG2)) + return LZ4K_STATUS_FAILED; + if (unlikely(!out_limit || out_limit > io_min)) + out_limit = (unsigned)io_min; + m_set(state, 0, encode_state_bytes_min()); + *((uint8_t*)out) = 0; /* lz4kd header */ + if (unlikely(nr_encoded_bytes_max(in_max, NR_4KB_LOG2) > out_max)) + return 0; + return lz4kd_encode_fast(state, (const uint8_t*)in, (uint8_t*)out, + in_max, out_limit); +} +EXPORT_SYMBOL(lz4kd_encode); + +/* maximum encoded size for repeat and non-repeat data if "fast" encoder is used */ +uint_fast32_t lz4kd_encoded_bytes_max( + uint_fast32_t nr_max, + uint_fast32_t r_max, + uint_fast32_t nr_log2, + uint_fast32_t off_log2) +{ + uint_fast32_t r = 1 + TAG_BYTES_MAX + + (uint32_t)round_up_to_log2(nr_max, NR_COPY_LOG2); + uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2); + if (nr_max >= mask(nr_log2)) + r += size_bytes_count(nr_max - mask(nr_log2)); + if (r_max >= mask(r_log2)) { + r_max -= mask(r_log2); + r += (uint_fast32_t)max_u64(size_bytes_count(r_max), + r_max - r_max / REPEAT_MIN); /* worst case: one tag for each REPEAT_MIN */ + } + return r; +} +EXPORT_SYMBOL(lz4kd_encoded_bytes_max); + +const char *lz4kd_version(void) +{ + static const char *version = "2022.03.20"; + return version; +} +EXPORT_SYMBOL(lz4kd_version); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("LZ4K encoder"); diff --git a/lib/lz4kd/lz4kd_encode_private.h b/lib/lz4kd/lz4kd_encode_private.h new file mode 100644 index 000000000000..becb1beeb9d0 --- /dev/null +++ b/lib/lz4kd/lz4kd_encode_private.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * Description: LZ4K compression algorithm with delta compression + */ + +#ifndef _LZ4KD_ENCODE_PRIVATE_H +#define _LZ4KD_ENCODE_PRIVATE_H + +#include "lz4kd_private.h" + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + GAIN_BYTES_LOG2 = 6, + GAIN_BYTES_MAX = 1 << GAIN_BYTES_LOG2, + NR_COPY_LOG2 = 4, + NR_COPY_MIN = 1 << NR_COPY_LOG2 +}; + +inline static uint32_t u_32(int64_t i) +{ + return (uint32_t)i; +} + +/* + * Compressed data format (where {} means 0 or more occurrences, [] means + * optional) + * <24bits tag: (off_log2 rOffset| r_log2 rSize|nr_log2 nrSize)> + * {}[]{} + * and sequences are terminated by byte != 255 + * + * +<1 terminating 0 byte> + */ +inline static uint_fast32_t size_bytes_count(uint_fast32_t u) +{ + return ((u + BYTE_MAX) >> BYTE_BITS) + 1; /* (u + BYTE_MAX - 1) / BYTE_MAX; */ +} + +/* maximum encoded size for non-compressible data if "fast" encoder is used */ +inline static uint_fast32_t nr_encoded_bytes_max( + uint_fast32_t nr_max, + uint_fast32_t nr_log2) +{ + uint_fast32_t r = 1 + TAG_BYTES_MAX + (uint32_t)round_up_to_log2(nr_max, NR_COPY_LOG2); + return nr_max < mask(nr_log2) ? r : r + size_bytes_count(nr_max - mask(nr_log2)); +} + +/* maximum encoded size for repeat and non-repeat data if "fast" encoder is used */ +uint_fast32_t lz4kd_encoded_bytes_max( + uint_fast32_t nr_max, + uint_fast32_t r_max, + uint_fast32_t nr_log2, + uint_fast32_t off_log2); + +inline static const uint8_t *hashed( + const uint8_t *const in0, + uint16_t *const ht, + uint_fast32_t h, + const uint8_t *r) +{ + const uint8_t *q = in0 + ht[h]; + ht[h] = (uint16_t)(r - in0); + return q; +} + +inline static const uint8_t *repeat_start( + const uint8_t *q, + const uint8_t *r, + const uint8_t *const nr0, + const uint8_t *const in0) +{ + for (; r > nr0 && likely(q > in0) && unlikely(q[-1] == r[-1]); --q, --r); + return r; +} + +static inline bool match_max( + const uint8_t *q, + const uint8_t *s, + const uint_fast32_t r_max) +{ + return equal4(q + r_max - REPEAT_MIN, s + r_max - REPEAT_MIN) && + equal4(q, s); +} + +int lz4kd_out_tail( + uint8_t *out_at, + uint8_t *const out_end, + const uint8_t *const out, + const uint8_t *const nr0, + const uint8_t *const in_end, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out); + +uint8_t *lz4kd_out_tuple( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + const uint8_t *const nr0, + const uint8_t *const r, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + bool check_out); + +uint8_t *lz4kd_out_repeat( + uint8_t *out_at, + uint8_t *const out_end, + uint_fast32_t utag, + uint_fast32_t r_bytes_max, + const uint_fast32_t nr_log2, + const uint_fast32_t off_log2, + const bool check_out); + +const uint8_t *lz4kd_repeat_end( + const uint8_t *q, + const uint8_t *r, + const uint8_t *const in_end_safe, + const uint8_t *const in_end); + +int lz4kd_encode_fast( + void *const state, + const uint8_t *const in, + uint8_t *const out, + const uint_fast32_t in_max, + const uint_fast32_t out_max); + +#ifdef __cplusplus +} +#endif + +#endif /* _LZ4KD_ENCODE_PRIVATE_H */ + diff --git a/lib/lz4kd/lz4kd_private.h b/lib/lz4kd/lz4kd_private.h new file mode 100644 index 000000000000..174438a6ecc5 --- /dev/null +++ b/lib/lz4kd/lz4kd_private.h @@ -0,0 +1,304 @@ +/* + * Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved. + * Description: LZ4K compression algorithm with delta compression + */ + +#ifndef _LZ4KD_PRIVATE_H +#define _LZ4KD_PRIVATE_H + +#if !defined(__KERNEL__) + +/* for userspace only */ + +#else /* __KERNEL__ */ + +#include +#define __STDC_WANT_LIB_EXT1__ 1 +#include /* memcpy() */ +#include /* uint8_t, int8_t, uint16_t, int16_t, +uint32_t, int32_t, uint64_t, int64_t */ +#include + +typedef uint64_t uint_fast32_t; +typedef int64_t int_fast32_t; + +#endif /* __KERNEL__ */ + +#if defined(__GNUC__) && (__GNUC__>=4) +#define LZ4K_WITH_GCC_INTRINSICS +#endif + +enum { + BYTE_BITS = 8UL, + WORD_BITS = 32U, + DWORD_BITS = 64UL, + BYTE_BITS_LOG2 = 3, + BYTE_MAX = 255U, + REPEAT_MIN = 4, + TAG_BYTES_MAX = 3, + TAG_BITS_MAX = TAG_BYTES_MAX * 8, + BLOCK_4KB_LOG2 = 12, + BLOCK_8KB_LOG2 = 13, + NR_8KB_LOG2 = 5, /* for encoded_bytes_max */ + NR_4KB_LOG2 = 6, + PATTERN_BYTES_MAX = 8 /* 1 bytes for header, 8 bytes for pattern */ +}; + +inline static uint32_t mask(uint_fast32_t log2) +{ + return (1U << log2) - 1U; +} + +inline static uint64_t mask64(uint_fast32_t log2) +{ + return (1ULL << log2) - 1ULL; +} + +#if defined LZ4K_WITH_GCC_INTRINSICS +inline static int most_significant_bit_of(uint64_t u) +{ + return (int)(__builtin_expect((u) == 0, false) ? + -1 : (int)((WORD_BITS - 1) ^ (uint32_t)__builtin_clz((unsigned)(u)))); +} +#else /* #!defined LZ4K_WITH_GCC_INTRINSICS */ +#error undefined most_significant_bit_of(unsigned u) +#endif /* #if defined LZ4K_WITH_GCC_INTRINSICS */ + +inline static uint64_t max_u64(uint64_t a, uint64_t b) +{ + return a > b ? a : b; +} + +inline static uint64_t min_u64(uint64_t a, uint64_t b) +{ + return a < b ? a : b; +} + +inline static void m_copy(void *dst, const void *src, size_t total) +{ +#if defined(__STDC_LIB_EXT1__) + (void)memcpy_s(dst, total, src, (total * 2) >> 1); /* *2 >> 1 to avoid bot errors */ +#else + (void)__builtin_memcpy(dst, src, total); +#endif +} + +inline static void m_set(void *dst, uint8_t value, size_t total) +{ +#if defined(__STDC_LIB_EXT1__) + (void)memset_s(dst, total, value, (total * 2) >> 1); /* *2 >> 1 to avoid bot errors */ +#else + (void)__builtin_memset(dst, value, total); +#endif +} + +inline static uint64_t round_down_to_log2(uint64_t u, uint8_t log2) +{ + return (uint64_t)(u & ~mask64(log2)); +} + +inline static uint64_t round_up_to_log2(uint64_t u, uint8_t log2) +{ + return (uint64_t)((u + mask64(log2)) & ~mask64(log2)); +} + +inline static uint64_t round_up_to_power_of2(uint64_t u) +{ + const int_fast32_t msb = most_significant_bit_of(u); + return round_up_to_log2(u, (uint8_t)msb); +} + +inline static void *align_pointer_up_to_log2(const void *p, uint8_t log2) +{ + return (void*)round_up_to_log2((uint64_t)p, log2); +} + +inline static uint32_t read3_at(const void *p) +{ + uint32_t result = 0; + m_copy(&result, p, 1 + 1 + 1); + return result; +} + +inline static uint32_t read4_at(const void *p) +{ + uint32_t result; + m_copy(&result, p, sizeof(result)); + return result; +} + +inline static uint64_t read8_at(const void *p) +{ + uint64_t result; + m_copy(&result, p, sizeof(result)); + return result; +} + +inline static bool equal3(const uint8_t *const q, const uint8_t *const r) +{ + return (read4_at(q) << BYTE_BITS) == (read4_at(r) << BYTE_BITS); +} + +inline static bool equal3pv(const uint8_t *const q, const uint64_t rv) +{ + return (read4_at(q) << BYTE_BITS) == ((uint32_t)rv << BYTE_BITS); +} + +inline static bool equal4(const uint8_t *const q, const uint8_t *const r) +{ + return read4_at(q) == read4_at(r); +} + +inline static bool equal4pv(const uint8_t *const q, const uint64_t rv) +{ + return read4_at(q) == (uint32_t)rv; +} + +inline static bool equal8(const uint8_t *const q, const uint8_t *const r) +{ + return read8_at(q) == read8_at(r); +} + +inline static uint_fast32_t hash24v(const uint64_t r, uint32_t shift) +{ + const uint32_t hash24_factor = 3266489917U; + return (((uint32_t)r << BYTE_BITS) * hash24_factor) >> (WORD_BITS - shift); +} + +inline static uint_fast32_t hash24(const uint8_t *r, uint32_t shift) +{ + return hash24v(read4_at(r), shift); +} + +inline static uint_fast32_t hash32v_2(const uint64_t r, uint32_t shift) +{ + const uint32_t hash32_2_factor = 3266489917U; + return ((uint32_t)r * hash32_2_factor) >> (WORD_BITS - shift); +} + +inline static uint_fast32_t hash32_2(const uint8_t *r, uint32_t shift) +{ + return hash32v_2(read4_at(r), shift); +} + +inline static uint_fast32_t hash32v(const uint64_t r, uint32_t shift) +{ + const uint32_t hash32_factor = 2654435761U; + return ((uint32_t)r * hash32_factor) >> (WORD_BITS - shift); +} + +inline static uint_fast32_t hash32(const uint8_t *r, uint32_t shift) +{ + return hash32v(read4_at(r), shift); +} + +inline static uint_fast32_t hash64v_5b(const uint64_t r, uint32_t shift) +{ + const uint64_t m = 889523592379ULL; + const uint64_t up_shift = 24; + return (uint32_t)(((r << up_shift) * m) >> (DWORD_BITS - shift)); +} + +inline static uint_fast32_t hash64_5b(const uint8_t *r, uint32_t shift) +{ + return hash64v_5b(read8_at(r), shift); +} + +inline static uint_fast32_t hash64v_6b(const uint64_t r, uint32_t shift) +{ + const uint64_t m = 227718039650203ULL; + const uint64_t up_shift = 16; + return (uint32_t)(((r << up_shift) * m) >> (DWORD_BITS - shift)); +} + +inline static uint_fast32_t hash64_6b(const uint8_t *r, uint32_t shift) +{ + return hash64v_6b(read8_at(r), shift); +} + +inline static uint_fast32_t hash64v_7b(const uint64_t r, uint32_t shift) +{ + const uint64_t m = 58295818150454627ULL; + const uint64_t up_shift = 8; + return (uint32_t)(((r << up_shift) * m) >> (DWORD_BITS - shift)); +} + +inline static uint_fast32_t hash64_7b(const uint8_t *r, uint32_t shift) +{ + return hash64v_7b(read8_at(r), shift); +} + +inline static uint_fast32_t hash64v_8b(const uint64_t r, uint32_t shift) +{ + const uint64_t m = 2870177450012600261ULL; + return (uint32_t)((r * m) >> (DWORD_BITS - shift)); +} + +inline static uint_fast32_t hash64_8b(const uint8_t *r, uint32_t shift) +{ + return hash64v_8b(read8_at(r), shift); +} + +inline static void while_lt_copy_x( + uint8_t *dst, + const uint8_t *src, + const uint8_t *dst_end, + const size_t copy_min) +{ + for (; dst < dst_end; dst += copy_min, src += copy_min) + m_copy(dst, src, copy_min); +} + +inline static void copy_x_while_lt( + uint8_t *dst, + const uint8_t *src, + const uint8_t *dst_end, + const size_t copy_min) +{ + m_copy(dst, src, copy_min); + while (dst + copy_min < dst_end) + m_copy(dst += copy_min, src += copy_min, copy_min); +} + +inline static void copy_x_while_total( + uint8_t *dst, + const uint8_t *src, + size_t total, + const size_t copy_min) +{ + m_copy(dst, src, copy_min); + for (; total > copy_min; total -= copy_min) + m_copy(dst += copy_min, src += copy_min, copy_min); +} + +inline static void copy_2x( + uint8_t *dst, + const uint8_t *src, + const size_t copy_min) +{ + m_copy(dst, src, copy_min); + m_copy(dst + copy_min, src + copy_min, copy_min); +} + +inline static void copy_2x_as_x2_while_lt( + uint8_t *dst, + const uint8_t *src, + const uint8_t *dst_end, + const size_t copy_min) +{ + copy_2x(dst, src, copy_min); + while (dst + (copy_min << 1) < dst_end) + copy_2x(dst += (copy_min << 1), src += (copy_min << 1), copy_min); +} + +inline static void while_lt_copy_2x_as_x2( + uint8_t *dst, + const uint8_t *src, + const uint8_t *dst_end, + const size_t copy_min) +{ + for (; dst < dst_end; dst += (copy_min << 1), src += (copy_min << 1)) + copy_2x(dst, src, copy_min); +} + +#endif /* _LZ4KD_PRIVATE_H */