lib: Import LZ4KD compression algorithm from HUAWEI
ABR-AL60_HarmonyOS4.0.0_opensource.tar.gz Signed-off-by: 0wnerDied <z1281552865@gmail.com>
This commit is contained in:
@@ -1682,6 +1682,15 @@ config CRYPTO_LZ4
|
||||
help
|
||||
This is the LZ4 algorithm.
|
||||
|
||||
config CRYPTO_LZ4KD
|
||||
tristate "LZ4KD compression algorithm"
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_ACOMP2
|
||||
select LZ4KD_COMPRESS
|
||||
select LZ4KD_DECOMPRESS
|
||||
help
|
||||
This is the LZ4KD algorithm.
|
||||
|
||||
config CRYPTO_LZ4HC
|
||||
tristate "LZ4HC compression algorithm"
|
||||
select CRYPTO_ALGAPI
|
||||
|
||||
@@ -123,6 +123,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o
|
||||
obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
|
||||
obj-$(CONFIG_CRYPTO_LZO) += lzo.o
|
||||
obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
|
||||
obj-$(CONFIG_CRYPTO_LZ4KD) += lz4kd.o
|
||||
obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
|
||||
obj-$(CONFIG_CRYPTO_842) += 842.o
|
||||
obj-$(CONFIG_CRYPTO_RNG2) += rng.o
|
||||
|
||||
95
crypto/lz4kd.c
Normal file
95
crypto/lz4kd.c
Normal file
@@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Cryptographic API.
|
||||
*
|
||||
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
|
||||
* Description: LZ4KD compression algorithm for ZRAM
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/lz4kd.h>
|
||||
|
||||
|
||||
struct lz4kd_ctx {
|
||||
void *lz4kd_comp_mem;
|
||||
};
|
||||
|
||||
static int lz4kd_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct lz4kd_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
ctx->lz4kd_comp_mem = vmalloc(lz4kd_encode_state_bytes_min());
|
||||
if (!ctx->lz4kd_comp_mem)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void lz4kd_exit(struct crypto_tfm *tfm)
|
||||
{
|
||||
struct lz4kd_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
vfree(ctx->lz4kd_comp_mem);
|
||||
}
|
||||
|
||||
static int lz4kd_compress_crypto(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst,
|
||||
unsigned int *dlen)
|
||||
{
|
||||
struct lz4kd_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int ret = 0;
|
||||
|
||||
ret = lz4kd_encode(ctx->lz4kd_comp_mem, src, dst, slen, *dlen, 0);
|
||||
if (ret < 0)
|
||||
return -EINVAL;
|
||||
|
||||
if (ret)
|
||||
*dlen = ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int lz4kd_decompress_crypto(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst,
|
||||
unsigned int *dlen)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
ret = lz4kd_decode(src, dst, slen, *dlen);
|
||||
if (ret <= 0)
|
||||
return -EINVAL;
|
||||
*dlen = ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct crypto_alg alg_lz4kd = {
|
||||
.cra_name = "lz4kd",
|
||||
.cra_driver_name = "lz4kd-generic",
|
||||
.cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
|
||||
.cra_ctxsize = sizeof(struct lz4kd_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_init = lz4kd_init,
|
||||
.cra_exit = lz4kd_exit,
|
||||
.cra_u = {
|
||||
.compress = {
|
||||
.coa_compress = lz4kd_compress_crypto,
|
||||
.coa_decompress = lz4kd_decompress_crypto
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static int __init lz4kd_mod_init(void)
|
||||
{
|
||||
return crypto_register_alg(&alg_lz4kd);
|
||||
}
|
||||
|
||||
static void __exit lz4kd_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&alg_lz4kd);
|
||||
}
|
||||
|
||||
module_init(lz4kd_mod_init);
|
||||
module_exit(lz4kd_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("LZ4KD Compression Algorithm");
|
||||
MODULE_ALIAS_CRYPTO("lz4kd");
|
||||
@@ -29,6 +29,9 @@ static const char * const backends[] = {
|
||||
#if IS_ENABLED(CONFIG_CRYPTO_LZ4HC)
|
||||
"lz4hc",
|
||||
#endif
|
||||
#if IS_ENABLED(CONFIG_CRYPTO_LZ4KD)
|
||||
"lz4kd",
|
||||
#endif
|
||||
#if IS_ENABLED(CONFIG_CRYPTO_842)
|
||||
"842",
|
||||
#endif
|
||||
|
||||
216
include/linux/lz4kd.h
Normal file
216
include/linux/lz4kd.h
Normal file
@@ -0,0 +1,216 @@
|
||||
/*
|
||||
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
|
||||
* Description: LZ4K compression algorithm with delta compression
|
||||
*/
|
||||
|
||||
#ifndef _LZ4KD_H
|
||||
#define _LZ4KD_H
|
||||
|
||||
/* file lz4kd.h
|
||||
This file contains the platform-independent API of LZ-class
|
||||
lossless codecs (compressors/decompressors) with complete
|
||||
in-place documentation. The documentation is formatted
|
||||
in accordance with DOXYGEN mark-up format. So, one can
|
||||
generate proper documentation, e.g. in HTML format, using DOXYGEN.
|
||||
|
||||
Currently, LZ-class codecs, documented here, implement following
|
||||
algorithms for lossless data compression/decompression:
|
||||
\li "LZ" proprietary codec competing with LZ4 - lz4kd_encode(),
|
||||
lz4kd_encode_delta(), lz4kd_decode(), lz4kd_decode_delta()
|
||||
|
||||
The LZ compressors accept any data as input and compress it
|
||||
without loss to a smaller size if possible.
|
||||
Compressed data produced by LZ compressor API lz4kd_encode*(),
|
||||
can be decompressed only by lz4kd_decode() API documented below.\n
|
||||
*/
|
||||
|
||||
/*
|
||||
lz4kd_status defines simple set of status values returned by APIs
|
||||
*/
|
||||
typedef enum {
|
||||
LZ4K_STATUS_INCOMPRESSIBLE = 0, /* !< Return when data is incompressible */
|
||||
LZ4K_STATUS_FAILED = -1, /* !< Return on general failure */
|
||||
LZ4K_STATUS_READ_ERROR = -2, /* !< Return when data reading failed */
|
||||
LZ4K_STATUS_WRITE_ERROR = -3 /* !< Return when data writing failed */
|
||||
} lz4kd_status;
|
||||
|
||||
/*
|
||||
lz4kd_Version() returns static unmutable string with algorithm version
|
||||
*/
|
||||
const char *lz4kd_version(void);
|
||||
|
||||
/*
|
||||
lz4kd_encode_state_bytes_min() returns number of bytes for state parameter,
|
||||
supplied to lz4kd_encode(), lz4kd_encode_delta().
|
||||
So, state should occupy at least lz4kd_encode_state_bytes_min() for mentioned
|
||||
functions to work correctly.
|
||||
*/
|
||||
unsigned lz4kd_encode_state_bytes_min(void);
|
||||
|
||||
/*
|
||||
lz4kd_encode() encodes/compresses one input buffer at *in, places
|
||||
result of encoding into one output buffer at *out if encoded data
|
||||
size fits specified values of out_max and out_limit.
|
||||
It returs size of encoded data in case of success or value<=0 otherwise.
|
||||
The result of successful encoding is in proprietary format, that
|
||||
is the encoded data can be decoded only by lz4kd_decode().
|
||||
|
||||
\return
|
||||
\li positive value\n
|
||||
if encoding was successful. The value returned is the size of encoded
|
||||
(compressed) data always <=out_max.
|
||||
\li non-positive value\n
|
||||
if in==0||in_max==0||out==0||out_max==0 or
|
||||
if out_max is less than needed for encoded (compressed) data.
|
||||
\li 0 value\n
|
||||
if encoded data size >= out_limit
|
||||
|
||||
\param[in] state
|
||||
!=0, pointer to state buffer used internally by the function. Size of
|
||||
state in bytes should be at least lz4kd_encode_state_bytes_min(). The content
|
||||
of state buffer will be changed during encoding.
|
||||
|
||||
\param[in] in
|
||||
!=0, pointer to the input buffer to encode (compress). The content of
|
||||
the input buffer does not change during encoding.
|
||||
|
||||
\param[in] out
|
||||
!=0, pointer to the output buffer where to place result of encoding
|
||||
(compression).
|
||||
If encoding is unsuccessful, e.g. out_max or out_limit are less than
|
||||
needed for encoded data then content of out buffer may be arbitrary.
|
||||
|
||||
\param[in] in_max
|
||||
!=0, size in bytes of the input buffer at *in
|
||||
|
||||
\param[in] out_max
|
||||
!=0, size in bytes of the output buffer at *out
|
||||
|
||||
\param[in] out_limit
|
||||
encoded data size soft limit in bytes. Due to performance reasons it is
|
||||
not guaranteed that
|
||||
lz4kd_encode will always detect that resulting encoded data size is
|
||||
bigger than out_limit.
|
||||
Hovewer, when reaching out_limit is detected, lz4kd_encode() returns
|
||||
earlier and spares CPU cycles. Caller code should recheck result
|
||||
returned by lz4kd_encode() (value greater than 0) if it is really
|
||||
less or equal than out_limit.
|
||||
out_limit is ignored if it is equal to 0.
|
||||
*/
|
||||
int lz4kd_encode(
|
||||
void *const state,
|
||||
const void *const in,
|
||||
void *out,
|
||||
unsigned in_max,
|
||||
unsigned out_max,
|
||||
unsigned out_limit);
|
||||
|
||||
int lz4kd_encode2(
|
||||
void *const state,
|
||||
const void *const in,
|
||||
void *out,
|
||||
unsigned in_max,
|
||||
unsigned out_max,
|
||||
unsigned out_limit);
|
||||
|
||||
int lz4kd_encode_pattern(
|
||||
void *const state,
|
||||
const void *const in,
|
||||
void *out,
|
||||
unsigned in_max,
|
||||
unsigned out_max,
|
||||
unsigned out_limit);
|
||||
|
||||
/*
|
||||
lz4kd_encode_max_cr() encodes/compresses one input buffer at *in, places
|
||||
result of encoding into one output buffer at *out if encoded data
|
||||
size fits specified value of out_max.
|
||||
It returs size of encoded data in case of success or value<=0 otherwise.
|
||||
The result of successful encoding is in proprietary format, that
|
||||
is the encoded data can be decoded only by lz4kd_decode().
|
||||
|
||||
\return
|
||||
\li positive value\n
|
||||
if encoding was successful. The value returned is the size of encoded
|
||||
(compressed) data always <=out_max.
|
||||
\li non-positive value\n
|
||||
if in==0||in_max==0||out==0||out_max==0 or
|
||||
if out_max is less than needed for encoded (compressed) data.
|
||||
|
||||
\param[in] state
|
||||
!=0, pointer to state buffer used internally by the function. Size of
|
||||
state in bytes should be at least lz4kd_encode_state_bytes_min(). The content
|
||||
of state buffer will be changed during encoding.
|
||||
|
||||
\param[in] in
|
||||
!=0, pointer to the input buffer to encode (compress). The content of
|
||||
the input buffer does not change during encoding.
|
||||
|
||||
\param[in] out
|
||||
!=0, pointer to the output buffer where to place result of encoding
|
||||
(compression).
|
||||
If encoding is unsuccessful, e.g. out_max is less than
|
||||
needed for encoded data then content of out buffer may be arbitrary.
|
||||
|
||||
\param[in] in_max
|
||||
!=0, size in bytes of the input buffer at *in
|
||||
|
||||
\param[in] out_max
|
||||
!=0, size in bytes of the output buffer at *out
|
||||
|
||||
\param[in] out_limit
|
||||
encoded data size soft limit in bytes. Due to performance reasons it is
|
||||
not guaranteed that
|
||||
lz4kd_encode will always detect that resulting encoded data size is
|
||||
bigger than out_limit.
|
||||
Hovewer, when reaching out_limit is detected, lz4kd_encode() returns
|
||||
earlier and spares CPU cycles. Caller code should recheck result
|
||||
returned by lz4kd_encode() (value greater than 0) if it is really
|
||||
less or equal than out_limit.
|
||||
out_limit is ignored if it is equal to 0.
|
||||
*/
|
||||
int lz4kd_encode_max_cr(
|
||||
void *const state,
|
||||
const void *const in,
|
||||
void *out,
|
||||
unsigned in_max,
|
||||
unsigned out_max,
|
||||
unsigned out_limit);
|
||||
|
||||
/*
|
||||
lz4kd_decode() decodes (decompresses) data from one input buffer and places
|
||||
the result of decompression into one output buffer. The encoded data in input
|
||||
buffer should be in proprietary format, produced by lz4kd_encode()
|
||||
or by lz4kd_encode_delta().
|
||||
|
||||
\return
|
||||
\li positive value\n
|
||||
if decoding was successful. The value returned is the size of decoded
|
||||
(decompressed) data.
|
||||
\li non-positive value\n
|
||||
if in==0||in_max==0||out==0||out_max==0 or
|
||||
if out_max is less than needed for decoded (decompressed) data or
|
||||
if input encoded data format is corrupted.
|
||||
|
||||
\param[in] in
|
||||
!=0, pointer to the input buffer to decode (decompress). The content of
|
||||
the input buffer does not change during decoding.
|
||||
|
||||
\param[in] out
|
||||
!=0, pointer to the output buffer where to place result of decoding
|
||||
(decompression). If decompression is unsuccessful then content of out
|
||||
buffer may be arbitrary.
|
||||
|
||||
\param[in] in_max
|
||||
!=0, size in bytes of the input buffer at in
|
||||
|
||||
\param[in] out_max
|
||||
!=0, size in bytes of the output buffer at out
|
||||
*/
|
||||
int lz4kd_decode(
|
||||
const void *const in,
|
||||
void *const out,
|
||||
unsigned in_max,
|
||||
unsigned out_max);
|
||||
|
||||
#endif /* _LZ4KD_H */
|
||||
@@ -245,8 +245,14 @@ config LZ4_COMPRESS
|
||||
config LZ4HC_COMPRESS
|
||||
tristate
|
||||
|
||||
config LZ4KD_COMPRESS
|
||||
tristate
|
||||
|
||||
config LZ4_DECOMPRESS
|
||||
tristate
|
||||
|
||||
config LZ4KD_DECOMPRESS
|
||||
tristate
|
||||
|
||||
config ZSTD_COMPRESS
|
||||
select XXHASH
|
||||
@@ -295,7 +301,7 @@ config GENERIC_ALLOCATOR
|
||||
#
|
||||
config REED_SOLOMON
|
||||
tristate
|
||||
|
||||
|
||||
config REED_SOLOMON_ENC8
|
||||
bool
|
||||
|
||||
|
||||
@@ -135,6 +135,8 @@ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
|
||||
obj-$(CONFIG_LZ4_COMPRESS) += lz4/
|
||||
obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/
|
||||
obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/
|
||||
obj-$(CONFIG_LZ4KD_COMPRESS) += lz4kd/
|
||||
obj-$(CONFIG_LZ4KD_DECOMPRESS) += lz4kd/
|
||||
obj-$(CONFIG_ZSTD_COMPRESS) += zstd/
|
||||
obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd/
|
||||
obj-$(CONFIG_XZ_DEC) += xz/
|
||||
|
||||
3
lib/lz4kd/Makefile
Normal file
3
lib/lz4kd/Makefile
Normal file
@@ -0,0 +1,3 @@
|
||||
ccflags-y += -O3
|
||||
obj-$(CONFIG_LZ4KD_COMPRESS) += lz4kd_encode.o
|
||||
obj-$(CONFIG_LZ4KD_DECOMPRESS) += lz4kd_decode.o
|
||||
243
lib/lz4kd/lz4kd_decode.c
Normal file
243
lib/lz4kd/lz4kd_decode.c
Normal file
@@ -0,0 +1,243 @@
|
||||
/*
|
||||
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
|
||||
* Description: LZ4K compression algorithm with delta compression
|
||||
*/
|
||||
|
||||
#if !defined(__KERNEL__)
|
||||
#include "lz4kd.h"
|
||||
#else
|
||||
#include <linux/lz4kd.h>
|
||||
#include <linux/module.h>
|
||||
#endif
|
||||
|
||||
#include "lz4kd_private.h" /* types, etc */
|
||||
|
||||
static const uint8_t *get_size(
|
||||
uint_fast32_t *size,
|
||||
const uint8_t *in_at,
|
||||
const uint8_t *const in_end)
|
||||
{
|
||||
uint_fast32_t u;
|
||||
do {
|
||||
if (unlikely(in_at >= in_end))
|
||||
return NULL;
|
||||
*size += (u = *(const uint8_t*)in_at);
|
||||
++in_at;
|
||||
} while (BYTE_MAX == u);
|
||||
return in_at;
|
||||
}
|
||||
|
||||
static int end_of_block(
|
||||
const uint_fast32_t nr_bytes_max,
|
||||
const uint_fast32_t r_bytes_max,
|
||||
const uint8_t *const in_at,
|
||||
const uint8_t *const in_end,
|
||||
const uint8_t *const out,
|
||||
const uint8_t *const out_at)
|
||||
{
|
||||
if (!nr_bytes_max)
|
||||
return LZ4K_STATUS_FAILED; /* should be the last one in block */
|
||||
if (r_bytes_max != REPEAT_MIN)
|
||||
return LZ4K_STATUS_FAILED; /* should be the last one in block */
|
||||
if (in_at != in_end)
|
||||
return LZ4K_STATUS_FAILED; /* should be the last one in block */
|
||||
return (int)(out_at - out);
|
||||
}
|
||||
|
||||
enum {
|
||||
NR_COPY_MIN = 16,
|
||||
R_COPY_MIN = 16,
|
||||
R_COPY_SAFE = R_COPY_MIN - 1,
|
||||
R_COPY_SAFE_2X = (R_COPY_MIN << 1) - 1
|
||||
};
|
||||
|
||||
static bool out_non_repeat(
|
||||
const uint8_t **in_at,
|
||||
uint8_t **out_at,
|
||||
uint_fast32_t nr_bytes_max,
|
||||
const uint8_t *const in_end,
|
||||
const uint8_t *const out_end)
|
||||
{
|
||||
const uint8_t *const in_copy_end = *in_at + nr_bytes_max;
|
||||
uint8_t *const out_copy_end = *out_at + nr_bytes_max;
|
||||
if (likely(nr_bytes_max <= NR_COPY_MIN)) {
|
||||
if (likely(*in_at <= in_end - NR_COPY_MIN &&
|
||||
*out_at <= out_end - NR_COPY_MIN))
|
||||
m_copy(*out_at, *in_at, NR_COPY_MIN);
|
||||
else if (in_copy_end <= in_end && out_copy_end <= out_end)
|
||||
m_copy(*out_at, *in_at, nr_bytes_max);
|
||||
else
|
||||
return false;
|
||||
} else { /* nr_bytes_max>NR_COPY_MIN */
|
||||
if (likely(in_copy_end <= in_end - NR_COPY_MIN &&
|
||||
out_copy_end <= out_end - NR_COPY_MIN)) {
|
||||
m_copy(*out_at, *in_at, NR_COPY_MIN);
|
||||
copy_x_while_lt(*out_at + NR_COPY_MIN,
|
||||
*in_at + NR_COPY_MIN,
|
||||
out_copy_end, NR_COPY_MIN);
|
||||
} else if (in_copy_end <= in_end && out_copy_end <= out_end) {
|
||||
m_copy(*out_at, *in_at, nr_bytes_max);
|
||||
} else { /* in_copy_end > in_end || out_copy_end > out_end */
|
||||
return false;
|
||||
}
|
||||
} /* if (nr_bytes_max <= NR_COPY_MIN) */
|
||||
*in_at = in_copy_end;
|
||||
*out_at = out_copy_end;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void out_repeat_overlap(
|
||||
uint_fast32_t offset,
|
||||
uint8_t *out_at,
|
||||
const uint8_t *out_from,
|
||||
const uint8_t *const out_copy_end)
|
||||
{
|
||||
enum {
|
||||
COPY_MIN = R_COPY_MIN >> 1,
|
||||
OFFSET_LIMIT = COPY_MIN >> 1
|
||||
};
|
||||
m_copy(out_at, out_from, COPY_MIN);
|
||||
/* (1 < offset < R_COPY_MIN/2) && out_copy_end + R_COPY_SAFE_2X <= out_end */
|
||||
out_at += offset;
|
||||
if (offset <= OFFSET_LIMIT)
|
||||
offset <<= 1;
|
||||
do {
|
||||
m_copy(out_at, out_from, COPY_MIN);
|
||||
out_at += offset;
|
||||
if (offset <= OFFSET_LIMIT)
|
||||
offset <<= 1;
|
||||
} while (out_at - out_from < R_COPY_MIN);
|
||||
while_lt_copy_2x_as_x2(out_at, out_from, out_copy_end, R_COPY_MIN);
|
||||
}
|
||||
|
||||
static bool out_repeat_slow(
|
||||
uint_fast32_t r_bytes_max,
|
||||
uint_fast32_t offset,
|
||||
uint8_t *out_at,
|
||||
const uint8_t *out_from,
|
||||
const uint8_t *const out_copy_end,
|
||||
const uint8_t *const out_end)
|
||||
{
|
||||
if (offset > 1 && out_copy_end <= out_end - R_COPY_SAFE_2X) {
|
||||
out_repeat_overlap(offset, out_at, out_from, out_copy_end);
|
||||
} else {
|
||||
if (unlikely(out_copy_end > out_end))
|
||||
return false;
|
||||
if (offset == 1) {
|
||||
m_set(out_at, *out_from, r_bytes_max);
|
||||
} else {
|
||||
do
|
||||
*out_at++ = *out_from++;
|
||||
while (out_at < out_copy_end);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int decode(
|
||||
const uint8_t *in_at,
|
||||
uint8_t *const out,
|
||||
const uint8_t *const in_end,
|
||||
const uint8_t *const out_end,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2)
|
||||
{
|
||||
const uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2);
|
||||
const uint8_t *const in_end_minus_x = in_end - TAG_BYTES_MAX;
|
||||
uint8_t *out_at = out;
|
||||
while (likely(in_at <= in_end_minus_x)) {
|
||||
const uint_fast32_t utag = read4_at(in_at - 1) >> BYTE_BITS;
|
||||
const uint_fast32_t offset = utag & mask(off_log2);
|
||||
uint_fast32_t nr_bytes_max = utag >> (off_log2 + r_log2),
|
||||
r_bytes_max = ((utag >> off_log2) & mask(r_log2)) +
|
||||
REPEAT_MIN;
|
||||
const uint8_t *out_from = 0;
|
||||
uint8_t *out_copy_end = 0;
|
||||
const uint8_t *out_safe_end = 0;
|
||||
in_at += TAG_BYTES_MAX;
|
||||
if (unlikely(nr_bytes_max == mask(nr_log2))) {
|
||||
in_at = get_size(&nr_bytes_max, in_at, in_end);
|
||||
if (unlikely(in_at == NULL))
|
||||
return LZ4K_STATUS_READ_ERROR;
|
||||
}
|
||||
if (!out_non_repeat(&in_at, &out_at, nr_bytes_max, in_end, out_end))
|
||||
return LZ4K_STATUS_FAILED;
|
||||
if (unlikely(r_bytes_max == mask(r_log2) + REPEAT_MIN)) {
|
||||
in_at = get_size(&r_bytes_max, in_at, in_end);
|
||||
if (unlikely(in_at == NULL))
|
||||
return LZ4K_STATUS_READ_ERROR;
|
||||
}
|
||||
out_from = out_at - offset;
|
||||
if (unlikely(out_from < out))
|
||||
return LZ4K_STATUS_FAILED;
|
||||
out_copy_end = out_at + r_bytes_max;
|
||||
out_safe_end = out_end - R_COPY_SAFE_2X;
|
||||
if (likely(offset >= R_COPY_MIN && out_copy_end <= out_safe_end)) {
|
||||
copy_2x_as_x2_while_lt(out_at, out_from, out_copy_end,
|
||||
R_COPY_MIN);
|
||||
} else if (likely(offset >= (R_COPY_MIN >> 1) &&
|
||||
out_copy_end <= out_safe_end)) {
|
||||
m_copy(out_at, out_from, R_COPY_MIN);
|
||||
out_at += offset;
|
||||
while_lt_copy_x(out_at, out_from, out_copy_end, R_COPY_MIN);
|
||||
} else if (likely(offset > 0)) {
|
||||
if (!out_repeat_slow(r_bytes_max, offset, out_at, out_from,
|
||||
out_copy_end, out_end))
|
||||
return LZ4K_STATUS_FAILED;
|
||||
} else { /* offset == 0: EOB, last literal */
|
||||
return end_of_block(nr_bytes_max, r_bytes_max, in_at,
|
||||
in_end, out, out_at);
|
||||
}
|
||||
out_at = out_copy_end;
|
||||
} /* while (likely(in_at <= in_end_minus_x)) */
|
||||
return in_at == in_end ? (int)(out_at - out) : LZ4K_STATUS_FAILED;
|
||||
}
|
||||
|
||||
static int decode_pattern_4kb(
|
||||
const uint8_t *const in,
|
||||
uint8_t *const out,
|
||||
const uint8_t *const out_end)
|
||||
{
|
||||
const uint64_t pattern = *(const uint64_t*)in;
|
||||
uint64_t *o64 = (uint64_t*)out;
|
||||
const uint64_t *const o64_end = (const uint64_t*)out_end - 1;
|
||||
for (; o64 <= o64_end; ++o64)
|
||||
*o64 = pattern;
|
||||
return (uint8_t*)o64 == out_end ? (int)(out_end - out) : LZ4K_STATUS_FAILED;
|
||||
}
|
||||
|
||||
static int decode_4kb(
|
||||
const uint8_t *const in,
|
||||
uint8_t *const out,
|
||||
const uint8_t *const in_end,
|
||||
const uint8_t *const out_end)
|
||||
{
|
||||
return decode(in, out, in_end, out_end, NR_4KB_LOG2, BLOCK_4KB_LOG2);
|
||||
}
|
||||
|
||||
int lz4kd_decode(
|
||||
const void *in,
|
||||
void *const out,
|
||||
unsigned in_max,
|
||||
unsigned out_max)
|
||||
{
|
||||
/* ++use volatile pointers to prevent compiler optimizations */
|
||||
const uint8_t *volatile in_end = (const uint8_t*)in + in_max;
|
||||
const uint8_t *volatile out_end = (uint8_t*)out + min_u64(out_max, 1 << BLOCK_4KB_LOG2);
|
||||
if (unlikely(in == NULL || out == NULL))
|
||||
return LZ4K_STATUS_FAILED;
|
||||
if (unlikely(in_max <= 1 + TAG_BYTES_MAX || out_max <= 0))
|
||||
return LZ4K_STATUS_FAILED;
|
||||
/* invalid buffer size or pointer overflow */
|
||||
if (unlikely((const uint8_t*)in >= in_end || (uint8_t*)out >= out_end))
|
||||
return LZ4K_STATUS_FAILED;
|
||||
/* -- */
|
||||
if (unlikely(in_max == PATTERN_BYTES_MAX))
|
||||
return decode_pattern_4kb((const uint8_t*)in, (uint8_t*)out,
|
||||
out_end);
|
||||
return decode_4kb((const uint8_t*)in + 1, (uint8_t*)out, in_end, out_end);
|
||||
}
|
||||
EXPORT_SYMBOL(lz4kd_decode);
|
||||
|
||||
MODULE_LICENSE("Dual BSD/GPL");
|
||||
MODULE_DESCRIPTION("LZ4K decoder");
|
||||
418
lib/lz4kd/lz4kd_encode.c
Normal file
418
lib/lz4kd/lz4kd_encode.c
Normal file
@@ -0,0 +1,418 @@
|
||||
/*
|
||||
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
|
||||
* Description: LZ4K compression algorithm with delta compression
|
||||
*/
|
||||
|
||||
#if !defined(__KERNEL__)
|
||||
#include "lz4kd.h"
|
||||
#else
|
||||
#include <linux/lz4kd.h>
|
||||
#include <linux/module.h>
|
||||
#endif
|
||||
|
||||
#include "lz4kd_private.h"
|
||||
#include "lz4kd_encode_private.h"
|
||||
|
||||
enum {
|
||||
HT_LOG2 = 12, /* ==11 #3 max drop in CR */
|
||||
STEP_LOG2 = 5 /* ==3 #2 avg drop in CR */
|
||||
};
|
||||
|
||||
static unsigned encode_state_bytes_min(void)
|
||||
{
|
||||
enum {
|
||||
BYTES_LOG2 = HT_LOG2 + 1
|
||||
};
|
||||
const unsigned bytes_total = (1U << BYTES_LOG2);
|
||||
return bytes_total;
|
||||
}
|
||||
|
||||
#if !defined(LZ4K_DELTA) && !defined(LZ4K_MAX_CR)
|
||||
|
||||
unsigned lz4kd_encode_state_bytes_min(void)
|
||||
{
|
||||
return encode_state_bytes_min();
|
||||
}
|
||||
EXPORT_SYMBOL(lz4kd_encode_state_bytes_min);
|
||||
|
||||
#endif /* !defined(LZ4K_DELTA) && !defined(LZ4K_MAX_CR) */
|
||||
|
||||
/* minimum encoded size for non-compressible data */
|
||||
inline static uint_fast32_t encoded_bytes_min(
|
||||
uint_fast32_t nr_log2,
|
||||
uint_fast32_t in_max)
|
||||
{
|
||||
return in_max < mask(nr_log2) ?
|
||||
TAG_BYTES_MAX + in_max :
|
||||
TAG_BYTES_MAX + size_bytes_count(in_max - mask(nr_log2)) + in_max;
|
||||
}
|
||||
|
||||
inline static void update_utag(
|
||||
uint_fast32_t r_bytes_max,
|
||||
uint_fast32_t *utag,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2)
|
||||
{
|
||||
const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + nr_log2));
|
||||
*utag |= likely(r_bytes_max - REPEAT_MIN < r_mask) ?
|
||||
((r_bytes_max - REPEAT_MIN) << off_log2) : (r_mask << off_log2);
|
||||
}
|
||||
|
||||
inline static uint8_t *out_size_bytes(uint8_t *out_at, uint_fast32_t u)
|
||||
{
|
||||
for (; u >= BYTE_MAX; *out_at++ = (uint8_t)BYTE_MAX, u -= BYTE_MAX);
|
||||
*out_at++ = (uint8_t)u;
|
||||
return out_at;
|
||||
}
|
||||
|
||||
inline static uint8_t *out_utag_then_bytes_left(
|
||||
uint8_t *out_at,
|
||||
uint_fast32_t utag,
|
||||
uint_fast32_t bytes_left)
|
||||
{
|
||||
m_copy(out_at, &utag, TAG_BYTES_MAX);
|
||||
return out_size_bytes(out_at + TAG_BYTES_MAX, bytes_left);
|
||||
}
|
||||
|
||||
static int out_tail(
|
||||
uint8_t *out_at,
|
||||
uint8_t *const out_end,
|
||||
const uint8_t *const out,
|
||||
const uint8_t *const nr0,
|
||||
const uint8_t *const in_end,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2)
|
||||
{
|
||||
const uint_fast32_t nr_mask = mask(nr_log2);
|
||||
const uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2);
|
||||
const uint_fast32_t nr_bytes_now = u_32(in_end - nr0);
|
||||
if (encoded_bytes_min(nr_log2, nr_bytes_now) > u_32(out_end - out_at))
|
||||
return LZ4K_STATUS_INCOMPRESSIBLE;
|
||||
if (nr_bytes_now < nr_mask) {
|
||||
/* caller guarantees at least one nr-byte */
|
||||
uint_fast32_t utag = (nr_bytes_now << (off_log2 + r_log2));
|
||||
m_copy(out_at, &utag, TAG_BYTES_MAX);
|
||||
out_at += TAG_BYTES_MAX;
|
||||
} else { /* nr_bytes_now>=nr_mask */
|
||||
uint_fast32_t bytes_left = nr_bytes_now - nr_mask;
|
||||
uint_fast32_t utag = (nr_mask << (off_log2 + r_log2));
|
||||
out_at = out_utag_then_bytes_left(out_at, utag, bytes_left);
|
||||
} /* if (nr_bytes_now<nr_mask) */
|
||||
m_copy(out_at, nr0, nr_bytes_now);
|
||||
return (int)(out_at + nr_bytes_now - out);
|
||||
}
|
||||
|
||||
inline static int out_tail2(
|
||||
uint8_t *out_at,
|
||||
uint8_t *const out_end,
|
||||
const uint8_t *const out,
|
||||
const uint8_t *const r,
|
||||
const uint8_t *const in_end,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2)
|
||||
{
|
||||
return r == in_end ? (int)(out_at - out) :
|
||||
out_tail(out_at, out_end, out, r, in_end,
|
||||
nr_log2, off_log2);
|
||||
}
|
||||
|
||||
int lz4kd_out_tail(
|
||||
uint8_t *out_at,
|
||||
uint8_t *const out_end,
|
||||
const uint8_t *const out,
|
||||
const uint8_t *const nr0,
|
||||
const uint8_t *const in_end,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2,
|
||||
bool check_out)
|
||||
{
|
||||
return out_tail(out_at, out_end, out, nr0, in_end,
|
||||
nr_log2, off_log2);
|
||||
}
|
||||
|
||||
static uint8_t *out_non_repeat(
|
||||
uint8_t *out_at,
|
||||
uint8_t *const out_end,
|
||||
uint_fast32_t utag,
|
||||
const uint8_t *const nr0,
|
||||
const uint8_t *const r,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2)
|
||||
{
|
||||
const uint_fast32_t nr_bytes_max = u_32(r - nr0);
|
||||
const uint_fast32_t nr_mask = mask(nr_log2),
|
||||
r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2);
|
||||
if (likely(nr_bytes_max < nr_mask)) {
|
||||
utag |= (nr_bytes_max << (off_log2 + r_log2));
|
||||
m_copy(out_at, &utag, TAG_BYTES_MAX);
|
||||
out_at += TAG_BYTES_MAX;
|
||||
} else { /* nr_bytes_max >= nr_mask */
|
||||
uint_fast32_t bytes_left = nr_bytes_max - nr_mask;
|
||||
utag |= (nr_mask << (off_log2 + r_log2));
|
||||
out_at = out_utag_then_bytes_left(out_at, utag, bytes_left);
|
||||
} /* if (nr_bytes_max<nr_mask) */
|
||||
copy_x_while_total(out_at, nr0, nr_bytes_max, NR_COPY_MIN);
|
||||
out_at += nr_bytes_max;
|
||||
return out_at;
|
||||
}
|
||||
|
||||
inline static uint8_t *out_r_bytes_left(
|
||||
uint8_t *out_at,
|
||||
uint_fast32_t r_bytes_max,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2)
|
||||
{
|
||||
const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + nr_log2));
|
||||
return likely(r_bytes_max - REPEAT_MIN < r_mask) ?
|
||||
out_at : out_size_bytes(out_at, r_bytes_max - REPEAT_MIN - r_mask);
|
||||
}
|
||||
|
||||
static uint8_t *out_repeat(
|
||||
uint8_t *out_at,
|
||||
uint_fast32_t utag,
|
||||
uint_fast32_t r_bytes_max,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2)
|
||||
{
|
||||
const uint_fast32_t r_mask = mask(TAG_BITS_MAX - (off_log2 + nr_log2));
|
||||
if (likely(r_bytes_max - REPEAT_MIN < r_mask)) {
|
||||
utag |= ((r_bytes_max - REPEAT_MIN) << off_log2);
|
||||
m_copy(out_at, &utag, TAG_BYTES_MAX);
|
||||
out_at += TAG_BYTES_MAX;
|
||||
} else {
|
||||
uint_fast32_t bytes_left = r_bytes_max - REPEAT_MIN - r_mask;
|
||||
utag |= (r_mask << off_log2);
|
||||
out_at = out_utag_then_bytes_left(out_at, utag, bytes_left);
|
||||
}
|
||||
return out_at; /* SUCCESS: continue compression */
|
||||
}
|
||||
|
||||
uint8_t *lz4kd_out_repeat(
|
||||
uint8_t *out_at,
|
||||
uint8_t *const out_end,
|
||||
uint_fast32_t utag,
|
||||
uint_fast32_t r_bytes_max,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2,
|
||||
const bool check_out)
|
||||
{
|
||||
return out_repeat(out_at, utag, r_bytes_max, nr_log2, off_log2);
|
||||
}
|
||||
|
||||
inline static uint8_t *out_tuple(
|
||||
uint8_t *out_at,
|
||||
uint8_t *const out_end,
|
||||
uint_fast32_t utag,
|
||||
const uint8_t *const nr0,
|
||||
const uint8_t *const r,
|
||||
uint_fast32_t r_bytes_max,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2)
|
||||
{
|
||||
update_utag(r_bytes_max, &utag, nr_log2, off_log2);
|
||||
out_at = out_non_repeat(out_at, out_end, utag, nr0, r, nr_log2, off_log2);
|
||||
return out_r_bytes_left(out_at, r_bytes_max, nr_log2, off_log2);
|
||||
}
|
||||
|
||||
uint8_t *lz4kd_out_tuple(
|
||||
uint8_t *out_at,
|
||||
uint8_t *const out_end,
|
||||
uint_fast32_t utag,
|
||||
const uint8_t *const nr0,
|
||||
const uint8_t *const r,
|
||||
uint_fast32_t r_bytes_max,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2,
|
||||
bool check_out)
|
||||
{
|
||||
return out_tuple(out_at, out_end, utag, nr0, r, r_bytes_max,
|
||||
nr_log2, off_log2);
|
||||
}
|
||||
|
||||
static const uint8_t *repeat_end(
|
||||
const uint8_t *q,
|
||||
const uint8_t *r,
|
||||
const uint8_t *const in_end_safe,
|
||||
const uint8_t *const in_end)
|
||||
{
|
||||
q += REPEAT_MIN;
|
||||
r += REPEAT_MIN;
|
||||
/* caller guarantees r+12<=in_end */
|
||||
do {
|
||||
const uint64_t x = read8_at(q) ^ read8_at(r);
|
||||
if (x) {
|
||||
const uint16_t ctz = (uint16_t)__builtin_ctzl(x);
|
||||
return r + (ctz >> BYTE_BITS_LOG2);
|
||||
}
|
||||
/* some bytes differ: count of trailing 0-bits/bytes */
|
||||
q += sizeof(uint64_t);
|
||||
r += sizeof(uint64_t);
|
||||
} while (likely(r <= in_end_safe)); /* once, at input block end */
|
||||
while (r < in_end) {
|
||||
if (*q != *r) return r;
|
||||
++q;
|
||||
++r;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
const uint8_t *lz4kd_repeat_end(
|
||||
const uint8_t *q,
|
||||
const uint8_t *r,
|
||||
const uint8_t *const in_end_safe,
|
||||
const uint8_t *const in_end)
|
||||
{
|
||||
return repeat_end(q, r, in_end_safe, in_end);
|
||||
}
|
||||
|
||||
/* CR increase order: +STEP, have OFFSETS, use _5b(most impact) */
|
||||
/* *_6b to compete with LZ4 */
|
||||
inline static uint_fast32_t hash(const uint8_t *r)
|
||||
{
|
||||
return hash64_5b(r, HT_LOG2);
|
||||
}
|
||||
|
||||
/*
|
||||
* Proof that 'r' increments are safe-NO pointer overflows are possible:
|
||||
*
|
||||
* While using STEP_LOG2=5, step_start=1<<STEP_LOG2 == 32 we increment s
|
||||
* 32 times by 1, 32 times by 2, 32 times by 3, and so on:
|
||||
* 32*1+32*2+32*3+...+32*31 == 32*SUM(1..31) == 32*((1+31)*15+16).
|
||||
* So, we can safely increment s by at most 31 for input block size <=
|
||||
* 1<<13 < 15872.
|
||||
*
|
||||
* More precisely, STEP_LIMIT == x for any input block calculated as follows:
|
||||
* 1<<off_log2 >= (1<<STEP_LOG2)*((x+1)(x-1)/2+x/2) ==>
|
||||
* 1<<(off_log2-STEP_LOG2+1) >= x^2+x-1 ==>
|
||||
* x^2+x-1-1<<(off_log2-STEP_LOG2+1) == 0, which is solved by standard
|
||||
* method.
|
||||
* To avoid overhead here conservative approximate value of x is calculated
|
||||
* as average of two nearest square roots, see STEP_LIMIT above.
|
||||
*/
|
||||
|
||||
static int encode_any(
|
||||
uint16_t *const ht,
|
||||
const uint8_t *const in0,
|
||||
const uint8_t *const in_end,
|
||||
uint8_t *const out,
|
||||
uint8_t *const out_end)
|
||||
{
|
||||
enum {
|
||||
NR_LOG2 = NR_4KB_LOG2,
|
||||
OFF_LOG2 = BLOCK_4KB_LOG2
|
||||
};
|
||||
const uint8_t *const in_end_safe = in_end - NR_COPY_MIN;
|
||||
const uint8_t *r = in0;
|
||||
const uint8_t *nr0 = r++;
|
||||
uint8_t *out_at = out + 1; /* +1 for header */
|
||||
for (; ; nr0 = r) {
|
||||
const uint8_t *q = 0;
|
||||
uint_fast32_t step = 1 << STEP_LOG2;
|
||||
uint_fast32_t utag = 0;
|
||||
const uint8_t *r_end = 0;
|
||||
uint_fast32_t r_bytes_max = 0;
|
||||
while (true) {
|
||||
if (equal4(q = hashed(in0, ht, hash(r), r), r))
|
||||
break;
|
||||
++r;
|
||||
if (equal4(q = hashed(in0, ht, hash(r), r), r))
|
||||
break;
|
||||
if (unlikely((r += (++step >> STEP_LOG2)) > in_end_safe))
|
||||
return out_tail(out_at, out_end, out, nr0, in_end,
|
||||
NR_LOG2, OFF_LOG2);
|
||||
}
|
||||
utag = u_32(r - q);
|
||||
r_end = repeat_end(q, r, in_end_safe, in_end);
|
||||
r_bytes_max = u_32(r_end - r);
|
||||
if (unlikely(nr0 == r))
|
||||
out_at = out_repeat(out_at, utag, r_bytes_max,
|
||||
NR_LOG2, OFF_LOG2);
|
||||
else
|
||||
out_at = out_tuple(out_at, out_end, utag, nr0, r, r_bytes_max,
|
||||
NR_LOG2, OFF_LOG2);
|
||||
if (unlikely((r += r_bytes_max) > in_end_safe))
|
||||
return out_tail2(out_at, out_end, out, r, in_end,
|
||||
NR_LOG2, OFF_LOG2);
|
||||
ht[hash(r - 1)] = (uint16_t)(r - 1 - in0);
|
||||
}
|
||||
}
|
||||
|
||||
/* not static for inlining optimization */
|
||||
int lz4kd_encode_fast(
|
||||
void *const state,
|
||||
const uint8_t *const in,
|
||||
uint8_t *const out,
|
||||
const uint_fast32_t in_max,
|
||||
const uint_fast32_t out_max)
|
||||
{
|
||||
return encode_any((uint16_t*)state, in, in + in_max, out, out + out_max);
|
||||
}
|
||||
|
||||
int lz4kd_encode(
|
||||
void *const state,
|
||||
const void *const in,
|
||||
void *out,
|
||||
unsigned in_max,
|
||||
unsigned out_max,
|
||||
unsigned out_limit)
|
||||
{
|
||||
const uint64_t io_min = min_u64(in_max, out_max);
|
||||
const uint64_t gain_max = max_u64(GAIN_BYTES_MAX, (io_min >> GAIN_BYTES_LOG2));
|
||||
/* ++use volatile pointers to prevent compiler optimizations */
|
||||
const uint8_t *volatile in_end = (const uint8_t*)in + in_max;
|
||||
const uint8_t *volatile out_end = (uint8_t*)out + out_max;
|
||||
const void *volatile state_end =
|
||||
(uint8_t*)state + encode_state_bytes_min();
|
||||
if (unlikely(state == NULL))
|
||||
return LZ4K_STATUS_FAILED;
|
||||
if (unlikely(in == NULL || out == NULL))
|
||||
return LZ4K_STATUS_FAILED;
|
||||
if (unlikely(out_max <= gain_max))
|
||||
return LZ4K_STATUS_FAILED;
|
||||
if (unlikely((const uint8_t*)in >= in_end || (uint8_t*)out >= out_end))
|
||||
return LZ4K_STATUS_FAILED;
|
||||
if (unlikely(state >= state_end))
|
||||
return LZ4K_STATUS_FAILED; /* pointer overflow */
|
||||
if (in_max > (1 << BLOCK_4KB_LOG2))
|
||||
return LZ4K_STATUS_FAILED;
|
||||
if (unlikely(!out_limit || out_limit > io_min))
|
||||
out_limit = (unsigned)io_min;
|
||||
m_set(state, 0, encode_state_bytes_min());
|
||||
*((uint8_t*)out) = 0; /* lz4kd header */
|
||||
if (unlikely(nr_encoded_bytes_max(in_max, NR_4KB_LOG2) > out_max))
|
||||
return 0;
|
||||
return lz4kd_encode_fast(state, (const uint8_t*)in, (uint8_t*)out,
|
||||
in_max, out_limit);
|
||||
}
|
||||
EXPORT_SYMBOL(lz4kd_encode);
|
||||
|
||||
/* maximum encoded size for repeat and non-repeat data if "fast" encoder is used */
|
||||
uint_fast32_t lz4kd_encoded_bytes_max(
|
||||
uint_fast32_t nr_max,
|
||||
uint_fast32_t r_max,
|
||||
uint_fast32_t nr_log2,
|
||||
uint_fast32_t off_log2)
|
||||
{
|
||||
uint_fast32_t r = 1 + TAG_BYTES_MAX +
|
||||
(uint32_t)round_up_to_log2(nr_max, NR_COPY_LOG2);
|
||||
uint_fast32_t r_log2 = TAG_BITS_MAX - (off_log2 + nr_log2);
|
||||
if (nr_max >= mask(nr_log2))
|
||||
r += size_bytes_count(nr_max - mask(nr_log2));
|
||||
if (r_max >= mask(r_log2)) {
|
||||
r_max -= mask(r_log2);
|
||||
r += (uint_fast32_t)max_u64(size_bytes_count(r_max),
|
||||
r_max - r_max / REPEAT_MIN); /* worst case: one tag for each REPEAT_MIN */
|
||||
}
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL(lz4kd_encoded_bytes_max);
|
||||
|
||||
const char *lz4kd_version(void)
|
||||
{
|
||||
static const char *version = "2022.03.20";
|
||||
return version;
|
||||
}
|
||||
EXPORT_SYMBOL(lz4kd_version);
|
||||
|
||||
MODULE_LICENSE("Dual BSD/GPL");
|
||||
MODULE_DESCRIPTION("LZ4K encoder");
|
||||
135
lib/lz4kd/lz4kd_encode_private.h
Normal file
135
lib/lz4kd/lz4kd_encode_private.h
Normal file
@@ -0,0 +1,135 @@
|
||||
/*
|
||||
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
|
||||
* Description: LZ4K compression algorithm with delta compression
|
||||
*/
|
||||
|
||||
#ifndef _LZ4KD_ENCODE_PRIVATE_H
|
||||
#define _LZ4KD_ENCODE_PRIVATE_H
|
||||
|
||||
#include "lz4kd_private.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
enum {
|
||||
GAIN_BYTES_LOG2 = 6,
|
||||
GAIN_BYTES_MAX = 1 << GAIN_BYTES_LOG2,
|
||||
NR_COPY_LOG2 = 4,
|
||||
NR_COPY_MIN = 1 << NR_COPY_LOG2
|
||||
};
|
||||
|
||||
inline static uint32_t u_32(int64_t i)
|
||||
{
|
||||
return (uint32_t)i;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compressed data format (where {} means 0 or more occurrences, [] means
|
||||
* optional)
|
||||
* <24bits tag: (off_log2 rOffset| r_log2 rSize|nr_log2 nrSize)>
|
||||
* {<nrSize byte>}[<nr bytes>]{<rSize byte>}
|
||||
* <rSize byte> and <nrSize byte> sequences are terminated by byte != 255
|
||||
*
|
||||
* <nrSize bytes for whole block>+<1 terminating 0 byte>
|
||||
*/
|
||||
inline static uint_fast32_t size_bytes_count(uint_fast32_t u)
|
||||
{
|
||||
return ((u + BYTE_MAX) >> BYTE_BITS) + 1; /* (u + BYTE_MAX - 1) / BYTE_MAX; */
|
||||
}
|
||||
|
||||
/* maximum encoded size for non-compressible data if "fast" encoder is used */
|
||||
inline static uint_fast32_t nr_encoded_bytes_max(
|
||||
uint_fast32_t nr_max,
|
||||
uint_fast32_t nr_log2)
|
||||
{
|
||||
uint_fast32_t r = 1 + TAG_BYTES_MAX + (uint32_t)round_up_to_log2(nr_max, NR_COPY_LOG2);
|
||||
return nr_max < mask(nr_log2) ? r : r + size_bytes_count(nr_max - mask(nr_log2));
|
||||
}
|
||||
|
||||
/* maximum encoded size for repeat and non-repeat data if "fast" encoder is used */
|
||||
uint_fast32_t lz4kd_encoded_bytes_max(
|
||||
uint_fast32_t nr_max,
|
||||
uint_fast32_t r_max,
|
||||
uint_fast32_t nr_log2,
|
||||
uint_fast32_t off_log2);
|
||||
|
||||
inline static const uint8_t *hashed(
|
||||
const uint8_t *const in0,
|
||||
uint16_t *const ht,
|
||||
uint_fast32_t h,
|
||||
const uint8_t *r)
|
||||
{
|
||||
const uint8_t *q = in0 + ht[h];
|
||||
ht[h] = (uint16_t)(r - in0);
|
||||
return q;
|
||||
}
|
||||
|
||||
inline static const uint8_t *repeat_start(
|
||||
const uint8_t *q,
|
||||
const uint8_t *r,
|
||||
const uint8_t *const nr0,
|
||||
const uint8_t *const in0)
|
||||
{
|
||||
for (; r > nr0 && likely(q > in0) && unlikely(q[-1] == r[-1]); --q, --r);
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline bool match_max(
|
||||
const uint8_t *q,
|
||||
const uint8_t *s,
|
||||
const uint_fast32_t r_max)
|
||||
{
|
||||
return equal4(q + r_max - REPEAT_MIN, s + r_max - REPEAT_MIN) &&
|
||||
equal4(q, s);
|
||||
}
|
||||
|
||||
int lz4kd_out_tail(
|
||||
uint8_t *out_at,
|
||||
uint8_t *const out_end,
|
||||
const uint8_t *const out,
|
||||
const uint8_t *const nr0,
|
||||
const uint8_t *const in_end,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2,
|
||||
bool check_out);
|
||||
|
||||
uint8_t *lz4kd_out_tuple(
|
||||
uint8_t *out_at,
|
||||
uint8_t *const out_end,
|
||||
uint_fast32_t utag,
|
||||
const uint8_t *const nr0,
|
||||
const uint8_t *const r,
|
||||
uint_fast32_t r_bytes_max,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2,
|
||||
bool check_out);
|
||||
|
||||
uint8_t *lz4kd_out_repeat(
|
||||
uint8_t *out_at,
|
||||
uint8_t *const out_end,
|
||||
uint_fast32_t utag,
|
||||
uint_fast32_t r_bytes_max,
|
||||
const uint_fast32_t nr_log2,
|
||||
const uint_fast32_t off_log2,
|
||||
const bool check_out);
|
||||
|
||||
const uint8_t *lz4kd_repeat_end(
|
||||
const uint8_t *q,
|
||||
const uint8_t *r,
|
||||
const uint8_t *const in_end_safe,
|
||||
const uint8_t *const in_end);
|
||||
|
||||
int lz4kd_encode_fast(
|
||||
void *const state,
|
||||
const uint8_t *const in,
|
||||
uint8_t *const out,
|
||||
const uint_fast32_t in_max,
|
||||
const uint_fast32_t out_max);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _LZ4KD_ENCODE_PRIVATE_H */
|
||||
|
||||
304
lib/lz4kd/lz4kd_private.h
Normal file
304
lib/lz4kd/lz4kd_private.h
Normal file
@@ -0,0 +1,304 @@
|
||||
/*
|
||||
* Copyright (c) Huawei Technologies Co., Ltd. 2022. All rights reserved.
|
||||
* Description: LZ4K compression algorithm with delta compression
|
||||
*/
|
||||
|
||||
#ifndef _LZ4KD_PRIVATE_H
|
||||
#define _LZ4KD_PRIVATE_H
|
||||
|
||||
#if !defined(__KERNEL__)
|
||||
|
||||
/* for userspace only */
|
||||
|
||||
#else /* __KERNEL__ */
|
||||
|
||||
#include <linux/lz4kd.h>
|
||||
#define __STDC_WANT_LIB_EXT1__ 1
|
||||
#include <linux/string.h> /* memcpy() */
|
||||
#include <linux/types.h> /* uint8_t, int8_t, uint16_t, int16_t,
|
||||
uint32_t, int32_t, uint64_t, int64_t */
|
||||
#include <stddef.h>
|
||||
|
||||
typedef uint64_t uint_fast32_t;
|
||||
typedef int64_t int_fast32_t;
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#if defined(__GNUC__) && (__GNUC__>=4)
|
||||
#define LZ4K_WITH_GCC_INTRINSICS
|
||||
#endif
|
||||
|
||||
enum {
|
||||
BYTE_BITS = 8UL,
|
||||
WORD_BITS = 32U,
|
||||
DWORD_BITS = 64UL,
|
||||
BYTE_BITS_LOG2 = 3,
|
||||
BYTE_MAX = 255U,
|
||||
REPEAT_MIN = 4,
|
||||
TAG_BYTES_MAX = 3,
|
||||
TAG_BITS_MAX = TAG_BYTES_MAX * 8,
|
||||
BLOCK_4KB_LOG2 = 12,
|
||||
BLOCK_8KB_LOG2 = 13,
|
||||
NR_8KB_LOG2 = 5, /* for encoded_bytes_max */
|
||||
NR_4KB_LOG2 = 6,
|
||||
PATTERN_BYTES_MAX = 8 /* 1 bytes for header, 8 bytes for pattern */
|
||||
};
|
||||
|
||||
inline static uint32_t mask(uint_fast32_t log2)
|
||||
{
|
||||
return (1U << log2) - 1U;
|
||||
}
|
||||
|
||||
inline static uint64_t mask64(uint_fast32_t log2)
|
||||
{
|
||||
return (1ULL << log2) - 1ULL;
|
||||
}
|
||||
|
||||
#if defined LZ4K_WITH_GCC_INTRINSICS
|
||||
inline static int most_significant_bit_of(uint64_t u)
|
||||
{
|
||||
return (int)(__builtin_expect((u) == 0, false) ?
|
||||
-1 : (int)((WORD_BITS - 1) ^ (uint32_t)__builtin_clz((unsigned)(u))));
|
||||
}
|
||||
#else /* #!defined LZ4K_WITH_GCC_INTRINSICS */
|
||||
#error undefined most_significant_bit_of(unsigned u)
|
||||
#endif /* #if defined LZ4K_WITH_GCC_INTRINSICS */
|
||||
|
||||
inline static uint64_t max_u64(uint64_t a, uint64_t b)
|
||||
{
|
||||
return a > b ? a : b;
|
||||
}
|
||||
|
||||
inline static uint64_t min_u64(uint64_t a, uint64_t b)
|
||||
{
|
||||
return a < b ? a : b;
|
||||
}
|
||||
|
||||
inline static void m_copy(void *dst, const void *src, size_t total)
|
||||
{
|
||||
#if defined(__STDC_LIB_EXT1__)
|
||||
(void)memcpy_s(dst, total, src, (total * 2) >> 1); /* *2 >> 1 to avoid bot errors */
|
||||
#else
|
||||
(void)__builtin_memcpy(dst, src, total);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline static void m_set(void *dst, uint8_t value, size_t total)
|
||||
{
|
||||
#if defined(__STDC_LIB_EXT1__)
|
||||
(void)memset_s(dst, total, value, (total * 2) >> 1); /* *2 >> 1 to avoid bot errors */
|
||||
#else
|
||||
(void)__builtin_memset(dst, value, total);
|
||||
#endif
|
||||
}
|
||||
|
||||
inline static uint64_t round_down_to_log2(uint64_t u, uint8_t log2)
|
||||
{
|
||||
return (uint64_t)(u & ~mask64(log2));
|
||||
}
|
||||
|
||||
inline static uint64_t round_up_to_log2(uint64_t u, uint8_t log2)
|
||||
{
|
||||
return (uint64_t)((u + mask64(log2)) & ~mask64(log2));
|
||||
}
|
||||
|
||||
inline static uint64_t round_up_to_power_of2(uint64_t u)
|
||||
{
|
||||
const int_fast32_t msb = most_significant_bit_of(u);
|
||||
return round_up_to_log2(u, (uint8_t)msb);
|
||||
}
|
||||
|
||||
inline static void *align_pointer_up_to_log2(const void *p, uint8_t log2)
|
||||
{
|
||||
return (void*)round_up_to_log2((uint64_t)p, log2);
|
||||
}
|
||||
|
||||
inline static uint32_t read3_at(const void *p)
|
||||
{
|
||||
uint32_t result = 0;
|
||||
m_copy(&result, p, 1 + 1 + 1);
|
||||
return result;
|
||||
}
|
||||
|
||||
inline static uint32_t read4_at(const void *p)
|
||||
{
|
||||
uint32_t result;
|
||||
m_copy(&result, p, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline static uint64_t read8_at(const void *p)
|
||||
{
|
||||
uint64_t result;
|
||||
m_copy(&result, p, sizeof(result));
|
||||
return result;
|
||||
}
|
||||
|
||||
inline static bool equal3(const uint8_t *const q, const uint8_t *const r)
|
||||
{
|
||||
return (read4_at(q) << BYTE_BITS) == (read4_at(r) << BYTE_BITS);
|
||||
}
|
||||
|
||||
inline static bool equal3pv(const uint8_t *const q, const uint64_t rv)
|
||||
{
|
||||
return (read4_at(q) << BYTE_BITS) == ((uint32_t)rv << BYTE_BITS);
|
||||
}
|
||||
|
||||
inline static bool equal4(const uint8_t *const q, const uint8_t *const r)
|
||||
{
|
||||
return read4_at(q) == read4_at(r);
|
||||
}
|
||||
|
||||
inline static bool equal4pv(const uint8_t *const q, const uint64_t rv)
|
||||
{
|
||||
return read4_at(q) == (uint32_t)rv;
|
||||
}
|
||||
|
||||
inline static bool equal8(const uint8_t *const q, const uint8_t *const r)
|
||||
{
|
||||
return read8_at(q) == read8_at(r);
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash24v(const uint64_t r, uint32_t shift)
|
||||
{
|
||||
const uint32_t hash24_factor = 3266489917U;
|
||||
return (((uint32_t)r << BYTE_BITS) * hash24_factor) >> (WORD_BITS - shift);
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash24(const uint8_t *r, uint32_t shift)
|
||||
{
|
||||
return hash24v(read4_at(r), shift);
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash32v_2(const uint64_t r, uint32_t shift)
|
||||
{
|
||||
const uint32_t hash32_2_factor = 3266489917U;
|
||||
return ((uint32_t)r * hash32_2_factor) >> (WORD_BITS - shift);
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash32_2(const uint8_t *r, uint32_t shift)
|
||||
{
|
||||
return hash32v_2(read4_at(r), shift);
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash32v(const uint64_t r, uint32_t shift)
|
||||
{
|
||||
const uint32_t hash32_factor = 2654435761U;
|
||||
return ((uint32_t)r * hash32_factor) >> (WORD_BITS - shift);
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash32(const uint8_t *r, uint32_t shift)
|
||||
{
|
||||
return hash32v(read4_at(r), shift);
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash64v_5b(const uint64_t r, uint32_t shift)
|
||||
{
|
||||
const uint64_t m = 889523592379ULL;
|
||||
const uint64_t up_shift = 24;
|
||||
return (uint32_t)(((r << up_shift) * m) >> (DWORD_BITS - shift));
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash64_5b(const uint8_t *r, uint32_t shift)
|
||||
{
|
||||
return hash64v_5b(read8_at(r), shift);
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash64v_6b(const uint64_t r, uint32_t shift)
|
||||
{
|
||||
const uint64_t m = 227718039650203ULL;
|
||||
const uint64_t up_shift = 16;
|
||||
return (uint32_t)(((r << up_shift) * m) >> (DWORD_BITS - shift));
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash64_6b(const uint8_t *r, uint32_t shift)
|
||||
{
|
||||
return hash64v_6b(read8_at(r), shift);
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash64v_7b(const uint64_t r, uint32_t shift)
|
||||
{
|
||||
const uint64_t m = 58295818150454627ULL;
|
||||
const uint64_t up_shift = 8;
|
||||
return (uint32_t)(((r << up_shift) * m) >> (DWORD_BITS - shift));
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash64_7b(const uint8_t *r, uint32_t shift)
|
||||
{
|
||||
return hash64v_7b(read8_at(r), shift);
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash64v_8b(const uint64_t r, uint32_t shift)
|
||||
{
|
||||
const uint64_t m = 2870177450012600261ULL;
|
||||
return (uint32_t)((r * m) >> (DWORD_BITS - shift));
|
||||
}
|
||||
|
||||
inline static uint_fast32_t hash64_8b(const uint8_t *r, uint32_t shift)
|
||||
{
|
||||
return hash64v_8b(read8_at(r), shift);
|
||||
}
|
||||
|
||||
inline static void while_lt_copy_x(
|
||||
uint8_t *dst,
|
||||
const uint8_t *src,
|
||||
const uint8_t *dst_end,
|
||||
const size_t copy_min)
|
||||
{
|
||||
for (; dst < dst_end; dst += copy_min, src += copy_min)
|
||||
m_copy(dst, src, copy_min);
|
||||
}
|
||||
|
||||
inline static void copy_x_while_lt(
|
||||
uint8_t *dst,
|
||||
const uint8_t *src,
|
||||
const uint8_t *dst_end,
|
||||
const size_t copy_min)
|
||||
{
|
||||
m_copy(dst, src, copy_min);
|
||||
while (dst + copy_min < dst_end)
|
||||
m_copy(dst += copy_min, src += copy_min, copy_min);
|
||||
}
|
||||
|
||||
inline static void copy_x_while_total(
|
||||
uint8_t *dst,
|
||||
const uint8_t *src,
|
||||
size_t total,
|
||||
const size_t copy_min)
|
||||
{
|
||||
m_copy(dst, src, copy_min);
|
||||
for (; total > copy_min; total -= copy_min)
|
||||
m_copy(dst += copy_min, src += copy_min, copy_min);
|
||||
}
|
||||
|
||||
inline static void copy_2x(
|
||||
uint8_t *dst,
|
||||
const uint8_t *src,
|
||||
const size_t copy_min)
|
||||
{
|
||||
m_copy(dst, src, copy_min);
|
||||
m_copy(dst + copy_min, src + copy_min, copy_min);
|
||||
}
|
||||
|
||||
inline static void copy_2x_as_x2_while_lt(
|
||||
uint8_t *dst,
|
||||
const uint8_t *src,
|
||||
const uint8_t *dst_end,
|
||||
const size_t copy_min)
|
||||
{
|
||||
copy_2x(dst, src, copy_min);
|
||||
while (dst + (copy_min << 1) < dst_end)
|
||||
copy_2x(dst += (copy_min << 1), src += (copy_min << 1), copy_min);
|
||||
}
|
||||
|
||||
inline static void while_lt_copy_2x_as_x2(
|
||||
uint8_t *dst,
|
||||
const uint8_t *src,
|
||||
const uint8_t *dst_end,
|
||||
const size_t copy_min)
|
||||
{
|
||||
for (; dst < dst_end; dst += (copy_min << 1), src += (copy_min << 1))
|
||||
copy_2x(dst, src, copy_min);
|
||||
}
|
||||
|
||||
#endif /* _LZ4KD_PRIVATE_H */
|
||||
Reference in New Issue
Block a user