lib: Import LZ4KD compression algorithm from HUAWEI

ABR-AL60_HarmonyOS4.0.0_opensource.tar.gz

Signed-off-by: 0wnerDied <z1281552865@gmail.com>
This commit is contained in:
slothnian
2025-07-09 09:04:02 +00:00
committed by Ansh
parent 16cf78a7ff
commit aa250a3dab
12 changed files with 1436 additions and 1 deletions

View File

@@ -1682,6 +1682,15 @@ config CRYPTO_LZ4
help
This is the LZ4 algorithm.
config CRYPTO_LZ4KD
tristate "LZ4KD compression algorithm"
select CRYPTO_ALGAPI
select CRYPTO_ACOMP2
select LZ4KD_COMPRESS
select LZ4KD_DECOMPRESS
help
This is the LZ4KD algorithm.
config CRYPTO_LZ4HC
tristate "LZ4HC compression algorithm"
select CRYPTO_ALGAPI

View File

@@ -123,6 +123,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o
obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
obj-$(CONFIG_CRYPTO_LZO) += lzo.o
obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
obj-$(CONFIG_CRYPTO_LZ4KD) += lz4kd.o
obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
obj-$(CONFIG_CRYPTO_842) += 842.o
obj-$(CONFIG_CRYPTO_RNG2) += rng.o

95
crypto/lz4kd.c Normal file
View File

@@ -0,0 +1,95 @@
/*
* Cryptographic API.
*
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
* Description: LZ4KD compression algorithm for ZRAM
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/crypto.h>
#include <linux/vmalloc.h>
#include <linux/lz4kd.h>
struct lz4kd_ctx {
void *lz4kd_comp_mem;
};
static int lz4kd_init(struct crypto_tfm *tfm)
{
struct lz4kd_ctx *ctx = crypto_tfm_ctx(tfm);
ctx->lz4kd_comp_mem = vmalloc(lz4kd_encode_state_bytes_min());
if (!ctx->lz4kd_comp_mem)
return -ENOMEM;
return 0;
}
static void lz4kd_exit(struct crypto_tfm *tfm)
{
struct lz4kd_ctx *ctx = crypto_tfm_ctx(tfm);
vfree(ctx->lz4kd_comp_mem);
}
static int lz4kd_compress_crypto(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst,
unsigned int *dlen)
{
struct lz4kd_ctx *ctx = crypto_tfm_ctx(tfm);
int ret = 0;
ret = lz4kd_encode(ctx->lz4kd_comp_mem, src, dst, slen, *dlen, 0);
if (ret < 0)
return -EINVAL;
if (ret)
*dlen = ret;
return 0;
}
static int lz4kd_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst,
unsigned int *dlen)
{
int ret = 0;
ret = lz4kd_decode(src, dst, slen, *dlen);
if (ret <= 0)
return -EINVAL;
*dlen = ret;
return 0;
}
static struct crypto_alg alg_lz4kd = {
.cra_name = "lz4kd",
.cra_driver_name = "lz4kd-generic",
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
.cra_ctxsize = sizeof(struct lz4kd_ctx),
.cra_module = THIS_MODULE,
.cra_init = lz4kd_init,
.cra_exit = lz4kd_exit,
.cra_u = {
.compress = {
.coa_compress = lz4kd_compress_crypto,
.coa_decompress = lz4kd_decompress_crypto
}
}
};
static int __init lz4kd_mod_init(void)
{
return crypto_register_alg(&alg_lz4kd);
}
static void __exit lz4kd_mod_fini(void)
{
crypto_unregister_alg(&alg_lz4kd);
}
module_init(lz4kd_mod_init);
module_exit(lz4kd_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("LZ4KD Compression Algorithm");
MODULE_ALIAS_CRYPTO("lz4kd");

View File

@@ -29,6 +29,9 @@ static const char * const backends[] = {
#if IS_ENABLED(CONFIG_CRYPTO_LZ4HC)
"lz4hc",
#endif
#if IS_ENABLED(CONFIG_CRYPTO_LZ4KD)
"lz4kd",
#endif
#if IS_ENABLED(CONFIG_CRYPTO_842)
"842",
#endif

216
include/linux/lz4kd.h Normal file
View File

@@ -0,0 +1,216 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
* Description: LZ4K compression algorithm with delta compression
*/
#ifndef _LZ4KD_H
#define _LZ4KD_H
/* file lz4kd.h
This file contains the platform-independent API of LZ-class
lossless codecs (compressors/decompressors) with complete
in-place documentation. The documentation is formatted
in accordance with DOXYGEN mark-up format. So, one can
generate proper documentation, e.g. in HTML format, using DOXYGEN.
Currently, LZ-class codecs, documented here, implement following
algorithms for lossless data compression/decompression:
\li "LZ" proprietary codec competing with LZ4 - lz4kd_encode(),
lz4kd_encode_delta(), lz4kd_decode(), lz4kd_decode_delta()
The LZ compressors accept any data as input and compress it
without loss to a smaller size if possible.
Compressed data produced by LZ compressor API lz4kd_encode*(),
can be decompressed only by lz4kd_decode() API documented below.\n
*/
/*
lz4kd_status defines simple set of status values returned by APIs
*/
typedef enum {
LZ4K_STATUS_INCOMPRESSIBLE = 0, /* !< Return when data is incompressible */
LZ4K_STATUS_FAILED = -1, /* !< Return on general failure */
LZ4K_STATUS_READ_ERROR = -2, /* !< Return when data reading failed */
LZ4K_STATUS_WRITE_ERROR = -3 /* !< Return when data writing failed */
} lz4kd_status;
/*
lz4kd_Version() returns static unmutable string with algorithm version
*/
const char *lz4kd_version(void);
/*
lz4kd_encode_state_bytes_min() returns number of bytes for state parameter,
supplied to lz4kd_encode(), lz4kd_encode_delta().
So, state should occupy at least lz4kd_encode_state_bytes_min() for mentioned
functions to work correctly.
*/
unsigned lz4kd_encode_state_bytes_min(void);
/*
lz4kd_encode() encodes/compresses one input buffer at *in, places
result of encoding into one output buffer at *out if encoded data
size fits specified values of out_max and out_limit.
It returs size of encoded data in case of success or value<=0 otherwise.
The result of successful encoding is in proprietary format, that
is the encoded data can be decoded only by lz4kd_decode().
\return
\li positive value\n
if encoding was successful. The value returned is the size of encoded
(compressed) data always <=out_max.
\li non-positive value\n
if in==0||in_max==0||out==0||out_max==0 or
if out_max is less than needed for encoded (compressed) data.
\li 0 value\n
if encoded data size >= out_limit
\param[in] state
!=0, pointer to state buffer used internally by the function. Size of
state in bytes should be at least lz4kd_encode_state_bytes_min(). The content
of state buffer will be changed during encoding.
\param[in] in
!=0, pointer to the input buffer to encode (compress). The content of
the input buffer does not change during encoding.
\param[in] out
!=0, pointer to the output buffer where to place result of encoding
(compression).
If encoding is unsuccessful, e.g. out_max or out_limit are less than
needed for encoded data then content of out buffer may be arbitrary.
\param[in] in_max
!=0, size in bytes of the input buffer at *in
\param[in] out_max
!=0, size in bytes of the output buffer at *out
\param[in] out_limit
encoded data size soft limit in bytes. Due to performance reasons it is
not guaranteed that
lz4kd_encode will always detect that resulting encoded data size is
bigger than out_limit.
Hovewer, when reaching out_limit is detected, lz4kd_encode() returns
earlier and spares CPU cycles. Caller code should recheck result
returned by lz4kd_encode() (value greater than 0) if it is really
less or equal than out_limit.
out_limit is ignored if it is equal to 0.
*/
int lz4kd_encode(
void *const state,
const void *const in,
void *out,
unsigned in_max,
unsigned out_max,
unsigned out_limit);
int lz4kd_encode2(
void *const state,
const void *const in,
void *out,
unsigned in_max,
unsigned out_max,
unsigned out_limit);
int lz4kd_encode_pattern(
void *const state,
const void *const in,
void *out,
unsigned in_max,
unsigned out_max,
unsigned out_limit);
/*
lz4kd_encode_max_cr() encodes/compresses one input buffer at *in, places
result of encoding into one output buffer at *out if encoded data
size fits specified value of out_max.
It returs size of encoded data in case of success or value<=0 otherwise.
The result of successful encoding is in proprietary format, that
is the encoded data can be decoded only by lz4kd_decode().
\return
\li positive value\n
if encoding was successful. The value returned is the size of encoded
(compressed) data always <=out_max.
\li non-positive value\n
if in==0||in_max==0||out==0||out_max==0 or
if out_max is less than needed for encoded (compressed) data.
\param[in] state
!=0, pointer to state buffer used internally by the function. Size of
state in bytes should be at least lz4kd_encode_state_bytes_min(). The content
of state buffer will be changed during encoding.
\param[in] in
!=0, pointer to the input buffer to encode (compress). The content of
the input buffer does not change during encoding.
\param[in] out
!=0, pointer to the output buffer where to place result of encoding
(compression).
If encoding is unsuccessful, e.g. out_max is less than
needed for encoded data then content of out buffer may be arbitrary.
\param[in] in_max
!=0, size in bytes of the input buffer at *in
\param[in] out_max
!=0, size in bytes of the output buffer at *out
\param[in] out_limit
encoded data size soft limit in bytes. Due to performance reasons it is
not guaranteed that
lz4kd_encode will always detect that resulting encoded data size is
bigger than out_limit.
Hovewer, when reaching out_limit is detected, lz4kd_encode() returns
earlier and spares CPU cycles. Caller code should recheck result
returned by lz4kd_encode() (value greater than 0) if it is really
less or equal than out_limit.
out_limit is ignored if it is equal to 0.
*/
int lz4kd_encode_max_cr(
void *const state,
const void *const in,
void *out,
unsigned in_max,
unsigned out_max,
unsigned out_limit);
/*
lz4kd_decode() decodes (decompresses) data from one input buffer and places
the result of decompression into one output buffer. The encoded data in input
buffer should be in proprietary format, produced by lz4kd_encode()
or by lz4kd_encode_delta().
\return
\li positive value\n
if decoding was successful. The value returned is the size of decoded
(decompressed) data.
\li non-positive value\n
if in==0||in_max==0||out==0||out_max==0 or
if out_max is less than needed for decoded (decompressed) data or
if input encoded data format is corrupted.
\param[in] in
!=0, pointer to the input buffer to decode (decompress). The content of
the input buffer does not change during decoding.
\param[in] out
!=0, pointer to the output buffer where to place result of decoding
(decompression). If decompression is unsuccessful then content of out
buffer may be arbitrary.
\param[in] in_max
!=0, size in bytes of the input buffer at in
\param[in] out_max
!=0, size in bytes of the output buffer at out
*/
int lz4kd_decode(
const void *const in,
void *const out,
unsigned in_max,
unsigned out_max);
#endif /* _LZ4KD_H */

View File

@@ -245,8 +245,14 @@ config LZ4_COMPRESS
config LZ4HC_COMPRESS
tristate
config LZ4KD_COMPRESS
tristate
config LZ4_DECOMPRESS
tristate
config LZ4KD_DECOMPRESS
tristate
config ZSTD_COMPRESS
select XXHASH
@@ -295,7 +301,7 @@ config GENERIC_ALLOCATOR
#
config REED_SOLOMON
tristate
config REED_SOLOMON_ENC8
bool

View File

@@ -135,6 +135,8 @@ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
obj-$(CONFIG_LZ4_COMPRESS) += lz4/
obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/
obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/
obj-$(CONFIG_LZ4KD_COMPRESS) += lz4kd/
obj-$(CONFIG_LZ4KD_DECOMPRESS) += lz4kd/
obj-$(CONFIG_ZSTD_COMPRESS) += zstd/
obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd/
obj-$(CONFIG_XZ_DEC) += xz/

3
lib/lz4kd/Makefile Normal file
View File

@@ -0,0 +1,3 @@
ccflags-y += -O3
obj-$(CONFIG_LZ4KD_COMPRESS) += lz4kd_encode.o
obj-$(CONFIG_LZ4KD_DECOMPRESS) += lz4kd_decode.o

243
lib/lz4kd/lz4kd_decode.c Normal file
View File

@@ -0,0 +1,243 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
* Description: LZ4K compression algorithm with delta compression
*/
#if !defined(__KERNEL__)
#include "lz4kd.h"
#else
#include <linux/lz4kd.h>
#include <linux/module.h>
#endif
#include "lz4kd_private.h" /* types, etc */
static const uint8_t *get_size(
uint_fast32_t *size,
const uint8_t *in_at,
const uint8_t *const in_end)
{
uint_fast32_t u;
do {
if (unlikely(in_at >= in_end))
return NULL;
*size += (u = *(const uint8_t*)in_at);
++in_at;
} while (BYTE_MAX == u);
return in_at;
}
static int end_of_block(
const uint_fast32_t nr_bytes_max,
const uint_fast32_t r_bytes_max,
const uint8_t *const in_at,
const uint8_t *const in_end,
const uint8_t *const out,
const uint8_t *const out_at)
{
if (!nr_bytes_max)
return LZ4K_STATUS_FAILED; /* should be the last one in block */
if (r_bytes_max != REPEAT_MIN)
return LZ4K_STATUS_FAILED; /* should be the last one in block */
if (in_at != in_end)
return LZ4K_STATUS_FAILED; /* should be the last one in block */
return (int)(out_at - out);
}
enum {
NR_COPY_MIN = 16,
R_COPY_MIN = 16,
R_COPY_SAFE = R_COPY_MIN - 1,
R_COPY_SAFE_2X = (R_COPY_MIN << 1) - 1
};
static bool out_non_repeat(
const uint8_t **in_at,
uint8_t **out_at,
uint_fast32_t nr_bytes_max,
const uint8_t *const in_end,
const uint8_t *const out_end)
{
const uint8_t *const in_copy_end = *in_at + nr_bytes_max;
uint8_t *const out_copy_end = *out_at + nr_bytes_max;
if (likely(nr_bytes_max <= NR_COPY_MIN)) {
if (likely(*in_at <= in_end - NR_COPY_MIN &&
*out_at <= out_end - NR_COPY_MIN))
m_copy(*out_at, *in_at, NR_COPY_MIN);
else if (in_copy_end <= in_end && out_copy_end <= out_end)
m_copy(*out_at, *in_at, nr_bytes_max);
else
return false;
} else { /* nr_bytes_max>NR_COPY_MIN */
if (likely(in_copy_end <= in_end - NR_COPY_MIN &&
out_copy_end <= out_end - NR_COPY_MIN)) {
m_copy(*out_at, *in_at, NR_COPY_MIN);
copy_x_while_lt(*out_at + NR_COPY_MIN,
*in_at + NR_COPY_MIN,
out_copy_end, NR_COPY_MIN);
} else if (in_copy_end <= in_end && out_copy_end <= out_end) {
m_copy(*out_at, *in_at, nr_bytes_max);
} else { /* in_copy_end > in_end || out_copy_end > out_end */
return false;
}
} /* if (nr_bytes_max <= NR_COPY_MIN) */
*in_at = in_copy_end;
*out_at = out_copy_end;
return true;
}
static void out_repeat_overlap(
uint_fast32_t offset,
uint8_t *out_at,
const uint8_t *out_from,
const uint8_t *const out_copy_end)
{
enum {
COPY_MIN = R_COPY_MIN >> 1,
OFFSET_LIMIT = COPY_MIN >> 1
};
m_copy(out_at, out_from, COPY_MIN);
/* (1 < offset < R_COPY_MIN/2) && out_copy_end + R_COPY_SAFE_2X <= out_end */
out_at += offset;
if (offset <= OFFSET_LIMIT)
offset <<= 1;
do {
m_copy(out_at, out_from, COPY_MIN);
out_at += offset;
if (offset <= OFFSET_LIMIT)
offset <<= 1;
} while (out_at - out_from < R_COPY_MIN);
while_lt_copy_2x_as_x2(out_at, out_from, out_copy_end, R_COPY_MIN);
}
static bool out_repeat_slow(
uint_fast32_t r_bytes_max,
uint_fast32_t offset,
uint8_t *out_at,
const uint8_t *out_from,
const uint8_t *const out_copy_end,
const uint8_t *const out_end)
{
if (offset > 1 && out_copy_end <= out_end - R_COPY_SAFE_2X) {
out_repeat_overlap(offset, out_at, out_from, out_copy_end);
} else {
if (unlikely(out_copy_end > out_end))
return false;
if (offset == 1) {
m_set(out_at, *out_from, r_bytes_max);
} else {
do
*out_at++ = *out_from++;
while (out_at < out_copy_end);
}
}
return true;
}
static int decode(
const uint8_t *in_at,
uint8_t *const out,
const uint8_t *const in_end,
const uint8_t *const out_end,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2)
{
const uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2);
const uint8_t *const in_end_minus_x = in_end - TAG_BYTES_MAX;
uint8_t *out_at = out;
while (likely(in_at <= in_end_minus_x)) {
const uint_fast32_t utag = read4_at(in_at - 1) >> BYTE_BITS;
const uint_fast32_t offset = utag & mask(off_log2);
uint_fast32_t nr_bytes_max = utag >> (off_log2 + r_log2),
r_bytes_max = ((utag >> off_log2) & mask(r_log2)) +
REPEAT_MIN;
const uint8_t *out_from = 0;
uint8_t *out_copy_end = 0;
const uint8_t *out_safe_end = 0;
in_at += TAG_BYTES_MAX;
if (unlikely(nr_bytes_max == mask(nr_log2))) {
in_at = get_size(&nr_bytes_max, in_at, in_end);
if (unlikely(in_at == NULL))
return LZ4K_STATUS_READ_ERROR;
}
if (!out_non_repeat(&in_at, &out_at, nr_bytes_max, in_end, out_end))
return LZ4K_STATUS_FAILED;
if (unlikely(r_bytes_max == mask(r_log2) + REPEAT_MIN)) {
in_at = get_size(&r_bytes_max, in_at, in_end);
if (unlikely(in_at == NULL))
return LZ4K_STATUS_READ_ERROR;
}
out_from = out_at - offset;
if (unlikely(out_from < out))
return LZ4K_STATUS_FAILED;
out_copy_end = out_at + r_bytes_max;
out_safe_end = out_end - R_COPY_SAFE_2X;
if (likely(offset >= R_COPY_MIN && out_copy_end <= out_safe_end)) {
copy_2x_as_x2_while_lt(out_at, out_from, out_copy_end,
R_COPY_MIN);
} else if (likely(offset >= (R_COPY_MIN >> 1) &&
out_copy_end <= out_safe_end)) {
m_copy(out_at, out_from, R_COPY_MIN);
out_at += offset;
while_lt_copy_x(out_at, out_from, out_copy_end, R_COPY_MIN);
} else if (likely(offset > 0)) {
if (!out_repeat_slow(r_bytes_max, offset, out_at, out_from,
out_copy_end, out_end))
return LZ4K_STATUS_FAILED;
} else { /* offset == 0: EOB, last literal */
return end_of_block(nr_bytes_max, r_bytes_max, in_at,
in_end, out, out_at);
}
out_at = out_copy_end;
} /* while (likely(in_at <= in_end_minus_x)) */
return in_at == in_end ? (int)(out_at - out) : LZ4K_STATUS_FAILED;
}
static int decode_pattern_4kb(
const uint8_t *const in,
uint8_t *const out,
const uint8_t *const out_end)
{
const uint64_t pattern = *(const uint64_t*)in;
uint64_t *o64 = (uint64_t*)out;
const uint64_t *const o64_end = (const uint64_t*)out_end - 1;
for (; o64 <= o64_end; ++o64)
*o64 = pattern;
return (uint8_t*)o64 == out_end ? (int)(out_end - out) : LZ4K_STATUS_FAILED;
}
static int decode_4kb(
const uint8_t *const in,
uint8_t *const out,
const uint8_t *const in_end,
const uint8_t *const out_end)
{
return decode(in, out, in_end, out_end, NR_4KB_LOG2, BLOCK_4KB_LOG2);
}
int lz4kd_decode(
const void *in,
void *const out,
unsigned in_max,
unsigned out_max)
{
/* ++use volatile pointers to prevent compiler optimizations */
const uint8_t *volatile in_end = (const uint8_t*)in + in_max;
const uint8_t *volatile out_end = (uint8_t*)out + min_u64(out_max, 1 << BLOCK_4KB_LOG2);
if (unlikely(in == NULL || out == NULL))
return LZ4K_STATUS_FAILED;
if (unlikely(in_max <= 1 + TAG_BYTES_MAX || out_max <= 0))
return LZ4K_STATUS_FAILED;
/* invalid buffer size or pointer overflow */
if (unlikely((const uint8_t*)in >= in_end || (uint8_t*)out >= out_end))
return LZ4K_STATUS_FAILED;
/* -- */
if (unlikely(in_max == PATTERN_BYTES_MAX))
return decode_pattern_4kb((const uint8_t*)in, (uint8_t*)out,
out_end);
return decode_4kb((const uint8_t*)in + 1, (uint8_t*)out, in_end, out_end);
}
EXPORT_SYMBOL(lz4kd_decode);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("LZ4K decoder");

418
lib/lz4kd/lz4kd_encode.c Normal file
View File

@@ -0,0 +1,418 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
* Description: LZ4K compression algorithm with delta compression
*/
#if !defined(__KERNEL__)
#include "lz4kd.h"
#else
#include <linux/lz4kd.h>
#include <linux/module.h>
#endif
#include "lz4kd_private.h"
#include "lz4kd_encode_private.h"
enum {
HT_LOG2 = 12, /* ==11 #3 max drop in CR */
STEP_LOG2 = 5 /* ==3 #2 avg drop in CR */
};
static unsigned encode_state_bytes_min(void)
{
enum {
BYTES_LOG2 = HT_LOG2 + 1
};
const unsigned bytes_total = (1U << BYTES_LOG2);
return bytes_total;
}
#if !defined(LZ4K_DELTA) && !defined(LZ4K_MAX_CR)
unsigned lz4kd_encode_state_bytes_min(void)
{
return encode_state_bytes_min();
}
EXPORT_SYMBOL(lz4kd_encode_state_bytes_min);
#endif /* !defined(LZ4K_DELTA) && !defined(LZ4K_MAX_CR) */
/* minimum encoded size for non-compressible data */
inline static uint_fast32_t encoded_bytes_min(
uint_fast32_t nr_log2,
uint_fast32_t in_max)
{
return in_max < mask(nr_log2) ?
TAG_BYTES_MAX + in_max :
TAG_BYTES_MAX + size_bytes_count(in_max - mask(nr_log2)) + in_max;
}
inline static void update_utag(
uint_fast32_t r_bytes_max,
uint_fast32_t *utag,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2)
{
const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + nr_log2));
*utag |= likely(r_bytes_max - REPEAT_MIN < r_mask) ?
((r_bytes_max - REPEAT_MIN) << off_log2) : (r_mask << off_log2);
}
inline static uint8_t *out_size_bytes(uint8_t *out_at, uint_fast32_t u)
{
for (; u >= BYTE_MAX; *out_at++ = (uint8_t)BYTE_MAX, u -= BYTE_MAX);
*out_at++ = (uint8_t)u;
return out_at;
}
inline static uint8_t *out_utag_then_bytes_left(
uint8_t *out_at,
uint_fast32_t utag,
uint_fast32_t bytes_left)
{
m_copy(out_at, &utag, TAG_BYTES_MAX);
return out_size_bytes(out_at + TAG_BYTES_MAX, bytes_left);
}
static int out_tail(
uint8_t *out_at,
uint8_t *const out_end,
const uint8_t *const out,
const uint8_t *const nr0,
const uint8_t *const in_end,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2)
{
const uint_fast32_t nr_mask = mask(nr_log2);
const uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2);
const uint_fast32_t nr_bytes_now = u_32(in_end - nr0);
if (encoded_bytes_min(nr_log2, nr_bytes_now) > u_32(out_end - out_at))
return LZ4K_STATUS_INCOMPRESSIBLE;
if (nr_bytes_now < nr_mask) {
/* caller guarantees at least one nr-byte */
uint_fast32_t utag = (nr_bytes_now << (off_log2 + r_log2));
m_copy(out_at, &utag, TAG_BYTES_MAX);
out_at += TAG_BYTES_MAX;
} else { /* nr_bytes_now>=nr_mask */
uint_fast32_t bytes_left = nr_bytes_now - nr_mask;
uint_fast32_t utag = (nr_mask << (off_log2 + r_log2));
out_at = out_utag_then_bytes_left(out_at, utag, bytes_left);
} /* if (nr_bytes_now<nr_mask) */
m_copy(out_at, nr0, nr_bytes_now);
return (int)(out_at + nr_bytes_now - out);
}
inline static int out_tail2(
uint8_t *out_at,
uint8_t *const out_end,
const uint8_t *const out,
const uint8_t *const r,
const uint8_t *const in_end,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2)
{
return r == in_end ? (int)(out_at - out) :
out_tail(out_at, out_end, out, r, in_end,
nr_log2, off_log2);
}
int lz4kd_out_tail(
uint8_t *out_at,
uint8_t *const out_end,
const uint8_t *const out,
const uint8_t *const nr0,
const uint8_t *const in_end,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2,
bool check_out)
{
return out_tail(out_at, out_end, out, nr0, in_end,
nr_log2, off_log2);
}
static uint8_t *out_non_repeat(
uint8_t *out_at,
uint8_t *const out_end,
uint_fast32_t utag,
const uint8_t *const nr0,
const uint8_t *const r,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2)
{
const uint_fast32_t nr_bytes_max = u_32(r - nr0);
const uint_fast32_t nr_mask = mask(nr_log2),
r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2);
if (likely(nr_bytes_max < nr_mask)) {
utag |= (nr_bytes_max << (off_log2 + r_log2));
m_copy(out_at, &utag, TAG_BYTES_MAX);
out_at += TAG_BYTES_MAX;
} else { /* nr_bytes_max >= nr_mask */
uint_fast32_t bytes_left = nr_bytes_max - nr_mask;
utag |= (nr_mask << (off_log2 + r_log2));
out_at = out_utag_then_bytes_left(out_at, utag, bytes_left);
} /* if (nr_bytes_max<nr_mask) */
copy_x_while_total(out_at, nr0, nr_bytes_max, NR_COPY_MIN);
out_at += nr_bytes_max;
return out_at;
}
inline static uint8_t *out_r_bytes_left(
uint8_t *out_at,
uint_fast32_t r_bytes_max,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2)
{
const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + nr_log2));
return likely(r_bytes_max - REPEAT_MIN < r_mask) ?
out_at : out_size_bytes(out_at, r_bytes_max - REPEAT_MIN - r_mask);
}
static uint8_t *out_repeat(
uint8_t *out_at,
uint_fast32_t utag,
uint_fast32_t r_bytes_max,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2)
{
const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + nr_log2));
if (likely(r_bytes_max - REPEAT_MIN < r_mask)) {
utag |= ((r_bytes_max - REPEAT_MIN) << off_log2);
m_copy(out_at, &utag, TAG_BYTES_MAX);
out_at += TAG_BYTES_MAX;
} else {
uint_fast32_t bytes_left = r_bytes_max - REPEAT_MIN - r_mask;
utag |= (r_mask << off_log2);
out_at = out_utag_then_bytes_left(out_at, utag, bytes_left);
}
return out_at; /* SUCCESS: continue compression */
}
uint8_t *lz4kd_out_repeat(
uint8_t *out_at,
uint8_t *const out_end,
uint_fast32_t utag,
uint_fast32_t r_bytes_max,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2,
const bool check_out)
{
return out_repeat(out_at, utag, r_bytes_max, nr_log2, off_log2);
}
inline static uint8_t *out_tuple(
uint8_t *out_at,
uint8_t *const out_end,
uint_fast32_t utag,
const uint8_t *const nr0,
const uint8_t *const r,
uint_fast32_t r_bytes_max,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2)
{
update_utag(r_bytes_max, &utag, nr_log2, off_log2);
out_at = out_non_repeat(out_at, out_end, utag, nr0, r, nr_log2, off_log2);
return out_r_bytes_left(out_at, r_bytes_max, nr_log2, off_log2);
}
uint8_t *lz4kd_out_tuple(
uint8_t *out_at,
uint8_t *const out_end,
uint_fast32_t utag,
const uint8_t *const nr0,
const uint8_t *const r,
uint_fast32_t r_bytes_max,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2,
bool check_out)
{
return out_tuple(out_at, out_end, utag, nr0, r, r_bytes_max,
nr_log2, off_log2);
}
static const uint8_t *repeat_end(
const uint8_t *q,
const uint8_t *r,
const uint8_t *const in_end_safe,
const uint8_t *const in_end)
{
q += REPEAT_MIN;
r += REPEAT_MIN;
/* caller guarantees r+12<=in_end */
do {
const uint64_t x = read8_at(q) ^ read8_at(r);
if (x) {
const uint16_t ctz = (uint16_t)__builtin_ctzl(x);
return r + (ctz >> BYTE_BITS_LOG2);
}
/* some bytes differ: count of trailing 0-bits/bytes */
q += sizeof(uint64_t);
r += sizeof(uint64_t);
} while (likely(r <= in_end_safe)); /* once, at input block end */
while (r < in_end) {
if (*q != *r) return r;
++q;
++r;
}
return r;
}
const uint8_t *lz4kd_repeat_end(
const uint8_t *q,
const uint8_t *r,
const uint8_t *const in_end_safe,
const uint8_t *const in_end)
{
return repeat_end(q, r, in_end_safe, in_end);
}
/* CR increase order: +STEP, have OFFSETS, use _5b(most impact) */
/* *_6b to compete with LZ4 */
inline static uint_fast32_t hash(const uint8_t *r)
{
return hash64_5b(r, HT_LOG2);
}
/*
* Proof that 'r' increments are safe-NO pointer overflows are possible:
*
* While using STEP_LOG2=5, step_start=1<<STEP_LOG2 == 32 we increment s
* 32 times by 1, 32 times by 2, 32 times by 3, and so on:
* 32*1+32*2+32*3+...+32*31 == 32*SUM(1..31) == 32*((1+31)*15+16).
* So, we can safely increment s by at most 31 for input block size <=
* 1<<13 < 15872.
*
* More precisely, STEP_LIMIT == x for any input block calculated as follows:
* 1<<off_log2 >= (1<<STEP_LOG2)*((x+1)(x-1)/2+x/2) ==>
* 1<<(off_log2-STEP_LOG2+1) >= x^2+x-1 ==>
* x^2+x-1-1<<(off_log2-STEP_LOG2+1) == 0, which is solved by standard
* method.
* To avoid overhead here conservative approximate value of x is calculated
* as average of two nearest square roots, see STEP_LIMIT above.
*/
static int encode_any(
uint16_t *const ht,
const uint8_t *const in0,
const uint8_t *const in_end,
uint8_t *const out,
uint8_t *const out_end)
{
enum {
NR_LOG2 = NR_4KB_LOG2,
OFF_LOG2 = BLOCK_4KB_LOG2
};
const uint8_t *const in_end_safe = in_end - NR_COPY_MIN;
const uint8_t *r = in0;
const uint8_t *nr0 = r++;
uint8_t *out_at = out + 1; /* +1 for header */
for (; ; nr0 = r) {
const uint8_t *q = 0;
uint_fast32_t step = 1 << STEP_LOG2;
uint_fast32_t utag = 0;
const uint8_t *r_end = 0;
uint_fast32_t r_bytes_max = 0;
while (true) {
if (equal4(q = hashed(in0, ht, hash(r), r), r))
break;
++r;
if (equal4(q = hashed(in0, ht, hash(r), r), r))
break;
if (unlikely((r += (++step >> STEP_LOG2)) > in_end_safe))
return out_tail(out_at, out_end, out, nr0, in_end,
NR_LOG2, OFF_LOG2);
}
utag = u_32(r - q);
r_end = repeat_end(q, r, in_end_safe, in_end);
r_bytes_max = u_32(r_end - r);
if (unlikely(nr0 == r))
out_at = out_repeat(out_at, utag, r_bytes_max,
NR_LOG2, OFF_LOG2);
else
out_at = out_tuple(out_at, out_end, utag, nr0, r, r_bytes_max,
NR_LOG2, OFF_LOG2);
if (unlikely((r += r_bytes_max) > in_end_safe))
return out_tail2(out_at, out_end, out, r, in_end,
NR_LOG2, OFF_LOG2);
ht[hash(r - 1)] = (uint16_t)(r - 1 - in0);
}
}
/* not static for inlining optimization */
int lz4kd_encode_fast(
void *const state,
const uint8_t *const in,
uint8_t *const out,
const uint_fast32_t in_max,
const uint_fast32_t out_max)
{
return encode_any((uint16_t*)state, in, in + in_max, out, out + out_max);
}
int lz4kd_encode(
void *const state,
const void *const in,
void *out,
unsigned in_max,
unsigned out_max,
unsigned out_limit)
{
const uint64_t io_min = min_u64(in_max, out_max);
const uint64_t gain_max = max_u64(GAIN_BYTES_MAX, (io_min >> GAIN_BYTES_LOG2));
/* ++use volatile pointers to prevent compiler optimizations */
const uint8_t *volatile in_end = (const uint8_t*)in + in_max;
const uint8_t *volatile out_end = (uint8_t*)out + out_max;
const void *volatile state_end =
(uint8_t*)state + encode_state_bytes_min();
if (unlikely(state == NULL))
return LZ4K_STATUS_FAILED;
if (unlikely(in == NULL || out == NULL))
return LZ4K_STATUS_FAILED;
if (unlikely(out_max <= gain_max))
return LZ4K_STATUS_FAILED;
if (unlikely((const uint8_t*)in >= in_end || (uint8_t*)out >= out_end))
return LZ4K_STATUS_FAILED;
if (unlikely(state >= state_end))
return LZ4K_STATUS_FAILED; /* pointer overflow */
if (in_max > (1 << BLOCK_4KB_LOG2))
return LZ4K_STATUS_FAILED;
if (unlikely(!out_limit || out_limit > io_min))
out_limit = (unsigned)io_min;
m_set(state, 0, encode_state_bytes_min());
*((uint8_t*)out) = 0; /* lz4kd header */
if (unlikely(nr_encoded_bytes_max(in_max, NR_4KB_LOG2) > out_max))
return 0;
return lz4kd_encode_fast(state, (const uint8_t*)in, (uint8_t*)out,
in_max, out_limit);
}
EXPORT_SYMBOL(lz4kd_encode);
/* maximum encoded size for repeat and non-repeat data if "fast" encoder is used */
uint_fast32_t lz4kd_encoded_bytes_max(
uint_fast32_t nr_max,
uint_fast32_t r_max,
uint_fast32_t nr_log2,
uint_fast32_t off_log2)
{
uint_fast32_t r = 1 + TAG_BYTES_MAX +
(uint32_t)round_up_to_log2(nr_max, NR_COPY_LOG2);
uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2);
if (nr_max >= mask(nr_log2))
r += size_bytes_count(nr_max - mask(nr_log2));
if (r_max >= mask(r_log2)) {
r_max -= mask(r_log2);
r += (uint_fast32_t)max_u64(size_bytes_count(r_max),
r_max - r_max / REPEAT_MIN); /* worst case: one tag for each REPEAT_MIN */
}
return r;
}
EXPORT_SYMBOL(lz4kd_encoded_bytes_max);
const char *lz4kd_version(void)
{
static const char *version = "2022.03.20";
return version;
}
EXPORT_SYMBOL(lz4kd_version);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("LZ4K encoder");

View File

@@ -0,0 +1,135 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
* Description: LZ4K compression algorithm with delta compression
*/
#ifndef _LZ4KD_ENCODE_PRIVATE_H
#define _LZ4KD_ENCODE_PRIVATE_H
#include "lz4kd_private.h"
#ifdef __cplusplus
extern "C" {
#endif
enum {
GAIN_BYTES_LOG2 = 6,
GAIN_BYTES_MAX = 1 << GAIN_BYTES_LOG2,
NR_COPY_LOG2 = 4,
NR_COPY_MIN = 1 << NR_COPY_LOG2
};
inline static uint32_t u_32(int64_t i)
{
return (uint32_t)i;
}
/*
* Compressed data format (where {} means 0 or more occurrences, [] means
* optional)
* <24bits tag: (off_log2 rOffset| r_log2 rSize|nr_log2 nrSize)>
* {<nrSize byte>}[<nr bytes>]{<rSize byte>}
* <rSize byte> and <nrSize byte> sequences are terminated by byte != 255
*
* <nrSize bytes for whole block>+<1 terminating 0 byte>
*/
inline static uint_fast32_t size_bytes_count(uint_fast32_t u)
{
return ((u + BYTE_MAX) >> BYTE_BITS) + 1; /* (u + BYTE_MAX - 1) / BYTE_MAX; */
}
/* maximum encoded size for non-compressible data if "fast" encoder is used */
inline static uint_fast32_t nr_encoded_bytes_max(
uint_fast32_t nr_max,
uint_fast32_t nr_log2)
{
uint_fast32_t r = 1 + TAG_BYTES_MAX + (uint32_t)round_up_to_log2(nr_max, NR_COPY_LOG2);
return nr_max < mask(nr_log2) ? r : r + size_bytes_count(nr_max - mask(nr_log2));
}
/* maximum encoded size for repeat and non-repeat data if "fast" encoder is used */
uint_fast32_t lz4kd_encoded_bytes_max(
uint_fast32_t nr_max,
uint_fast32_t r_max,
uint_fast32_t nr_log2,
uint_fast32_t off_log2);
inline static const uint8_t *hashed(
const uint8_t *const in0,
uint16_t *const ht,
uint_fast32_t h,
const uint8_t *r)
{
const uint8_t *q = in0 + ht[h];
ht[h] = (uint16_t)(r - in0);
return q;
}
inline static const uint8_t *repeat_start(
const uint8_t *q,
const uint8_t *r,
const uint8_t *const nr0,
const uint8_t *const in0)
{
for (; r > nr0 && likely(q > in0) && unlikely(q[-1] == r[-1]); --q, --r);
return r;
}
static inline bool match_max(
const uint8_t *q,
const uint8_t *s,
const uint_fast32_t r_max)
{
return equal4(q + r_max - REPEAT_MIN, s + r_max - REPEAT_MIN) &&
equal4(q, s);
}
int lz4kd_out_tail(
uint8_t *out_at,
uint8_t *const out_end,
const uint8_t *const out,
const uint8_t *const nr0,
const uint8_t *const in_end,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2,
bool check_out);
uint8_t *lz4kd_out_tuple(
uint8_t *out_at,
uint8_t *const out_end,
uint_fast32_t utag,
const uint8_t *const nr0,
const uint8_t *const r,
uint_fast32_t r_bytes_max,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2,
bool check_out);
uint8_t *lz4kd_out_repeat(
uint8_t *out_at,
uint8_t *const out_end,
uint_fast32_t utag,
uint_fast32_t r_bytes_max,
const uint_fast32_t nr_log2,
const uint_fast32_t off_log2,
const bool check_out);
const uint8_t *lz4kd_repeat_end(
const uint8_t *q,
const uint8_t *r,
const uint8_t *const in_end_safe,
const uint8_t *const in_end);
int lz4kd_encode_fast(
void *const state,
const uint8_t *const in,
uint8_t *const out,
const uint_fast32_t in_max,
const uint_fast32_t out_max);
#ifdef __cplusplus
}
#endif
#endif /* _LZ4KD_ENCODE_PRIVATE_H */

304
lib/lz4kd/lz4kd_private.h Normal file
View File

@@ -0,0 +1,304 @@
/*
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
* Description: LZ4K compression algorithm with delta compression
*/
#ifndef _LZ4KD_PRIVATE_H
#define _LZ4KD_PRIVATE_H
#if !defined(__KERNEL__)
/* for userspace only */
#else /* __KERNEL__ */
#include <linux/lz4kd.h>
#define __STDC_WANT_LIB_EXT1__ 1
#include <linux/string.h> /* memcpy() */
#include <linux/types.h> /* uint8_t, int8_t, uint16_t, int16_t,
uint32_t, int32_t, uint64_t, int64_t */
#include <stddef.h>
typedef uint64_t uint_fast32_t;
typedef int64_t int_fast32_t;
#endif /* __KERNEL__ */
#if defined(__GNUC__) && (__GNUC__>=4)
#define LZ4K_WITH_GCC_INTRINSICS
#endif
enum {
BYTE_BITS = 8UL,
WORD_BITS = 32U,
DWORD_BITS = 64UL,
BYTE_BITS_LOG2 = 3,
BYTE_MAX = 255U,
REPEAT_MIN = 4,
TAG_BYTES_MAX = 3,
TAG_BITS_MAX = TAG_BYTES_MAX * 8,
BLOCK_4KB_LOG2 = 12,
BLOCK_8KB_LOG2 = 13,
NR_8KB_LOG2 = 5, /* for encoded_bytes_max */
NR_4KB_LOG2 = 6,
PATTERN_BYTES_MAX = 8 /* 1 bytes for header, 8 bytes for pattern */
};
inline static uint32_t mask(uint_fast32_t log2)
{
return (1U << log2) - 1U;
}
inline static uint64_t mask64(uint_fast32_t log2)
{
return (1ULL << log2) - 1ULL;
}
#if defined LZ4K_WITH_GCC_INTRINSICS
inline static int most_significant_bit_of(uint64_t u)
{
return (int)(__builtin_expect((u) == 0, false) ?
-1 : (int)((WORD_BITS - 1) ^ (uint32_t)__builtin_clz((unsigned)(u))));
}
#else /* #!defined LZ4K_WITH_GCC_INTRINSICS */
#error undefined most_significant_bit_of(unsigned u)
#endif /* #if defined LZ4K_WITH_GCC_INTRINSICS */
inline static uint64_t max_u64(uint64_t a, uint64_t b)
{
return a > b ? a : b;
}
inline static uint64_t min_u64(uint64_t a, uint64_t b)
{
return a < b ? a : b;
}
inline static void m_copy(void *dst, const void *src, size_t total)
{
#if defined(__STDC_LIB_EXT1__)
(void)memcpy_s(dst, total, src, (total * 2) >> 1); /* *2 >> 1 to avoid bot errors */
#else
(void)__builtin_memcpy(dst, src, total);
#endif
}
inline static void m_set(void *dst, uint8_t value, size_t total)
{
#if defined(__STDC_LIB_EXT1__)
(void)memset_s(dst, total, value, (total * 2) >> 1); /* *2 >> 1 to avoid bot errors */
#else
(void)__builtin_memset(dst, value, total);
#endif
}
inline static uint64_t round_down_to_log2(uint64_t u, uint8_t log2)
{
return (uint64_t)(u & ~mask64(log2));
}
inline static uint64_t round_up_to_log2(uint64_t u, uint8_t log2)
{
return (uint64_t)((u + mask64(log2)) & ~mask64(log2));
}
inline static uint64_t round_up_to_power_of2(uint64_t u)
{
const int_fast32_t msb = most_significant_bit_of(u);
return round_up_to_log2(u, (uint8_t)msb);
}
inline static void *align_pointer_up_to_log2(const void *p, uint8_t log2)
{
return (void*)round_up_to_log2((uint64_t)p, log2);
}
inline static uint32_t read3_at(const void *p)
{
uint32_t result = 0;
m_copy(&result, p, 1 + 1 + 1);
return result;
}
inline static uint32_t read4_at(const void *p)
{
uint32_t result;
m_copy(&result, p, sizeof(result));
return result;
}
inline static uint64_t read8_at(const void *p)
{
uint64_t result;
m_copy(&result, p, sizeof(result));
return result;
}
inline static bool equal3(const uint8_t *const q, const uint8_t *const r)
{
return (read4_at(q) << BYTE_BITS) == (read4_at(r) << BYTE_BITS);
}
inline static bool equal3pv(const uint8_t *const q, const uint64_t rv)
{
return (read4_at(q) << BYTE_BITS) == ((uint32_t)rv << BYTE_BITS);
}
inline static bool equal4(const uint8_t *const q, const uint8_t *const r)
{
return read4_at(q) == read4_at(r);
}
inline static bool equal4pv(const uint8_t *const q, const uint64_t rv)
{
return read4_at(q) == (uint32_t)rv;
}
inline static bool equal8(const uint8_t *const q, const uint8_t *const r)
{
return read8_at(q) == read8_at(r);
}
inline static uint_fast32_t hash24v(const uint64_t r, uint32_t shift)
{
const uint32_t hash24_factor = 3266489917U;
return (((uint32_t)r << BYTE_BITS) * hash24_factor) >> (WORD_BITS - shift);
}
inline static uint_fast32_t hash24(const uint8_t *r, uint32_t shift)
{
return hash24v(read4_at(r), shift);
}
inline static uint_fast32_t hash32v_2(const uint64_t r, uint32_t shift)
{
const uint32_t hash32_2_factor = 3266489917U;
return ((uint32_t)r * hash32_2_factor) >> (WORD_BITS - shift);
}
inline static uint_fast32_t hash32_2(const uint8_t *r, uint32_t shift)
{
return hash32v_2(read4_at(r), shift);
}
inline static uint_fast32_t hash32v(const uint64_t r, uint32_t shift)
{
const uint32_t hash32_factor = 2654435761U;
return ((uint32_t)r * hash32_factor) >> (WORD_BITS - shift);
}
inline static uint_fast32_t hash32(const uint8_t *r, uint32_t shift)
{
return hash32v(read4_at(r), shift);
}
inline static uint_fast32_t hash64v_5b(const uint64_t r, uint32_t shift)
{
const uint64_t m = 889523592379ULL;
const uint64_t up_shift = 24;
return (uint32_t)(((r << up_shift) * m) >> (DWORD_BITS - shift));
}
inline static uint_fast32_t hash64_5b(const uint8_t *r, uint32_t shift)
{
return hash64v_5b(read8_at(r), shift);
}
inline static uint_fast32_t hash64v_6b(const uint64_t r, uint32_t shift)
{
const uint64_t m = 227718039650203ULL;
const uint64_t up_shift = 16;
return (uint32_t)(((r << up_shift) * m) >> (DWORD_BITS - shift));
}
inline static uint_fast32_t hash64_6b(const uint8_t *r, uint32_t shift)
{
return hash64v_6b(read8_at(r), shift);
}
inline static uint_fast32_t hash64v_7b(const uint64_t r, uint32_t shift)
{
const uint64_t m = 58295818150454627ULL;
const uint64_t up_shift = 8;
return (uint32_t)(((r << up_shift) * m) >> (DWORD_BITS - shift));
}
inline static uint_fast32_t hash64_7b(const uint8_t *r, uint32_t shift)
{
return hash64v_7b(read8_at(r), shift);
}
inline static uint_fast32_t hash64v_8b(const uint64_t r, uint32_t shift)
{
const uint64_t m = 2870177450012600261ULL;
return (uint32_t)((r * m) >> (DWORD_BITS - shift));
}
inline static uint_fast32_t hash64_8b(const uint8_t *r, uint32_t shift)
{
return hash64v_8b(read8_at(r), shift);
}
inline static void while_lt_copy_x(
uint8_t *dst,
const uint8_t *src,
const uint8_t *dst_end,
const size_t copy_min)
{
for (; dst < dst_end; dst += copy_min, src += copy_min)
m_copy(dst, src, copy_min);
}
inline static void copy_x_while_lt(
uint8_t *dst,
const uint8_t *src,
const uint8_t *dst_end,
const size_t copy_min)
{
m_copy(dst, src, copy_min);
while (dst + copy_min < dst_end)
m_copy(dst += copy_min, src += copy_min, copy_min);
}
inline static void copy_x_while_total(
uint8_t *dst,
const uint8_t *src,
size_t total,
const size_t copy_min)
{
m_copy(dst, src, copy_min);
for (; total > copy_min; total -= copy_min)
m_copy(dst += copy_min, src += copy_min, copy_min);
}
inline static void copy_2x(
uint8_t *dst,
const uint8_t *src,
const size_t copy_min)
{
m_copy(dst, src, copy_min);
m_copy(dst + copy_min, src + copy_min, copy_min);
}
inline static void copy_2x_as_x2_while_lt(
uint8_t *dst,
const uint8_t *src,
const uint8_t *dst_end,
const size_t copy_min)
{
copy_2x(dst, src, copy_min);
while (dst + (copy_min << 1) < dst_end)
copy_2x(dst += (copy_min << 1), src += (copy_min << 1), copy_min);
}
inline static void while_lt_copy_2x_as_x2(
uint8_t *dst,
const uint8_t *src,
const uint8_t *dst_end,
const size_t copy_min)
{
for (; dst < dst_end; dst += (copy_min << 1), src += (copy_min << 1))
copy_2x(dst, src, copy_min);
}
#endif /* _LZ4KD_PRIVATE_H */