From 8f6f68732412b415519c9458038faf5dd79805bd Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 30 Oct 2018 15:07:28 -0700 Subject: [PATCH 01/85] lib/lz4: update LZ4 decompressor module Update the LZ4 compression module based on LZ4 v1.8.3 in order for the erofs file system to use the newest LZ4_decompress_safe_partial() which can now decode exactly the nb of bytes requested [1] to take place of the open hacked code in the erofs file system itself. Currently, apart from the erofs file system, no other users use LZ4_decompress_safe_partial, so no worry about the interface. In addition, LZ4 v1.8.x boosts up decompression speed compared to the current code which is based on LZ4 v1.7.3, mainly due to shortcut optimization for the specific common LZ4-sequences [2]. lzbench testdata (tested in kirin710, 8 cores, 4 big cores at 2189Mhz, 2GB DDR RAM at 1622Mhz, with enwik8 testdata [3]): Compressor name Compress. Decompress. Compr. size Ratio Filename memcpy 5004 MB/s 4924 MB/s 100000000 100.00 enwik8 lz4hc 1.7.3 -9 12 MB/s 653 MB/s 42203253 42.20 enwik8 lz4hc 1.8.0 -9 12 MB/s 908 MB/s 42203096 42.20 enwik8 lz4hc 1.8.3 -9 11 MB/s 965 MB/s 42203094 42.20 enwik8 [1] https://github.com/lz4/lz4/issues/566 https://github.com/lz4/lz4/commit/08d347b5b217b011ff7487130b79480d8cfdaeb8 [2] v1.8.1 perf: slightly faster compression and decompression speed https://github.com/lz4/lz4/commit/a31b7058cb97e4393da55e78a77a1c6f0c9ae038 v1.8.2 perf: slightly faster HC compression and decompression speed https://github.com/lz4/lz4/commit/45f8603aae389d34c689d3ff7427b314071ccd2c https://github.com/lz4/lz4/commit/1a191b3f8d26b50a7c1d41590b529ec308d768cd [3] http://mattmahoney.net/dc/textdata.html http://mattmahoney.net/dc/enwik8.zip Link: http: //lkml.kernel.org/r/1537181207-21932-1-git-send-email-gaoxiang25@huawei.com Signed-off-by: Gao Xiang Tested-by: Guo Xuenan Cc: Colin Ian King Cc: Yann Collet Cc: Greg Kroah-Hartman Cc: Fang Wei Cc: Chao Yu Cc: Miao Xie Cc: Sven Schmidt <4sschmid@informatik.uni-hamburg.de> Cc: Kyungsik Lee Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Change-Id: I457b806e87ee22830537cf3927140202de78c11a --- lib/lz4/lz4_decompress.c | 495 +++++++++++++++++++++++++++------------ lib/lz4/lz4defs.h | 9 +- 2 files changed, 356 insertions(+), 148 deletions(-) diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 141734d255e4..0c9d3ad17e0f 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -43,30 +43,36 @@ /*-***************************** * Decompression functions *******************************/ -/* LZ4_decompress_generic() : - * This generic decompression function cover all use cases. - * It shall be instantiated several times, using different sets of directives - * Note that it is important this generic function is really inlined, + +#define DEBUGLOG(l, ...) {} /* disabled */ + +#ifndef assert +#define assert(condition) ((void)0) +#endif + +/* + * LZ4_decompress_generic() : + * This generic decompression function covers all use cases. + * It shall be instantiated several times, using different sets of directives. + * Note that it is important for performance that this function really get inlined, * in order to remove useless branches during compilation optimization. */ static FORCE_INLINE int LZ4_decompress_generic( - const char * const source, - char * const dest, - int inputSize, + const char * const src, + char * const dst, + int srcSize, /* * If endOnInput == endOnInputSize, - * this value is the max size of Output Buffer. + * this value is `dstCapacity` */ int outputSize, /* endOnOutputSize, endOnInputSize */ - int endOnInput, + endCondition_directive endOnInput, /* full, partial */ - int partialDecoding, - /* only used if partialDecoding == partial */ - int targetOutputSize, + earlyEnd_directive partialDecoding, /* noDict, withPrefix64k, usingExtDict */ - int dict, - /* == dest when no prefix */ + dict_directive dict, + /* always <= dst, == dst when no prefix */ const BYTE * const lowPrefix, /* only if dict == usingExtDict */ const BYTE * const dictStart, @@ -74,35 +80,43 @@ static FORCE_INLINE int LZ4_decompress_generic( const size_t dictSize ) { - /* Local Variables */ - const BYTE *ip = (const BYTE *) source; - const BYTE * const iend = ip + inputSize; + const BYTE *ip = (const BYTE *) src; + const BYTE * const iend = ip + srcSize; - BYTE *op = (BYTE *) dest; + BYTE *op = (BYTE *) dst; BYTE * const oend = op + outputSize; BYTE *cpy; - BYTE *oexit = op + targetOutputSize; - const BYTE * const lowLimit = lowPrefix - dictSize; const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize; - static const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; - static const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 }; + static const unsigned int inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4}; + static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3}; const int safeDecode = (endOnInput == endOnInputSize); const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB))); + /* Set up the "end" pointers for the shortcut. */ + const BYTE *const shortiend = iend - + (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; + const BYTE *const shortoend = oend - + (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; + + DEBUGLOG(5, "%s (srcSize:%i, dstSize:%i)", __func__, + srcSize, outputSize); + /* Special cases */ - /* targetOutputSize too high => decode everything */ - if ((partialDecoding) && (oexit > oend - MFLIMIT)) - oexit = oend - MFLIMIT; + assert(lowPrefix <= op); + assert(src != NULL); /* Empty output buffer */ if ((endOnInput) && (unlikely(outputSize == 0))) - return ((inputSize == 1) && (*ip == 0)) ? 0 : -1; + return ((srcSize == 1) && (*ip == 0)) ? 0 : -1; if ((!endOnInput) && (unlikely(outputSize == 0))) return (*ip == 0 ? 1 : -1); + if ((endOnInput) && unlikely(srcSize == 0)) + return -1; + /* Main Loop : decode sequences */ while (1) { size_t length; @@ -111,12 +125,74 @@ static FORCE_INLINE int LZ4_decompress_generic( /* get literal length */ unsigned int const token = *ip++; - length = token>>ML_BITS; + /* ip < iend before the increment */ + assert(!endOnInput || ip <= iend); + + /* + * A two-stage shortcut for the most common case: + * 1) If the literal length is 0..14, and there is enough + * space, enter the shortcut and copy 16 bytes on behalf + * of the literals (in the fast mode, only 8 bytes can be + * safely copied this way). + * 2) Further if the match length is 4..18, copy 18 bytes + * in a similar manner; but we ensure that there's enough + * space in the output for those 18 bytes earlier, upon + * entering the shortcut (in other words, there is a + * combined check for both stages). + */ + if ((endOnInput ? length != RUN_MASK : length <= 8) + /* + * strictly "less than" on input, to re-enter + * the loop with at least one byte + */ + && likely((endOnInput ? ip < shortiend : 1) & + (op <= shortoend))) { + /* Copy the literals */ + memcpy(op, ip, endOnInput ? 16 : 8); + op += length; ip += length; + + /* + * The second stage: + * prepare for match copying, decode full info. + * If it doesn't work out, the info won't be wasted. + */ + length = token & ML_MASK; /* match length */ + offset = LZ4_readLE16(ip); + ip += 2; + match = op - offset; + assert(match <= op); /* check overflow */ + + /* Do not deal with overlapping matches. */ + if ((length != ML_MASK) && + (offset >= 8) && + (dict == withPrefix64k || match >= lowPrefix)) { + /* Copy the match. */ + memcpy(op + 0, match + 0, 8); + memcpy(op + 8, match + 8, 8); + memcpy(op + 16, match + 16, 2); + op += length + MINMATCH; + /* Both stages worked, load the next token. */ + continue; + } + + /* + * The second stage didn't work out, but the info + * is ready. Propel it right to the point of match + * copying. + */ + goto _copy_match; + } + + /* decode literal length */ if (length == RUN_MASK) { unsigned int s; + if (unlikely(endOnInput ? ip >= iend - RUN_MASK : 0)) { + /* overflow detection */ + goto _output_error; + } do { s = *ip++; length += s; @@ -125,14 +201,14 @@ static FORCE_INLINE int LZ4_decompress_generic( : 1) & (s == 255)); if ((safeDecode) - && unlikely( - (size_t)(op + length) < (size_t)(op))) { + && unlikely((uptrval)(op) + + length < (uptrval)(op))) { /* overflow detection */ goto _output_error; } if ((safeDecode) - && unlikely( - (size_t)(ip + length) < (size_t)(ip))) { + && unlikely((uptrval)(ip) + + length < (uptrval)(ip))) { /* overflow detection */ goto _output_error; } @@ -140,16 +216,19 @@ static FORCE_INLINE int LZ4_decompress_generic( /* copy literals */ cpy = op + length; - if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT)) + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + + if (((endOnInput) && ((cpy > oend - MFLIMIT) || (ip + length > iend - (2 + 1 + LASTLITERALS)))) || ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) { if (partialDecoding) { if (cpy > oend) { /* - * Error : - * write attempt beyond end of output buffer + * Partial decoding : + * stop in the middle of literal segment */ - goto _output_error; + cpy = oend; + length = oend - op; } if ((endOnInput) && (ip + length > iend)) { @@ -184,29 +263,43 @@ static FORCE_INLINE int LZ4_decompress_generic( memcpy(op, ip, length); ip += length; op += length; - /* Necessarily EOF, due to parsing restrictions */ - break; - } - LZ4_wildCopy(op, ip, cpy); - ip += length; - op = cpy; + /* Necessarily EOF, due to parsing restrictions */ + if (!partialDecoding || (cpy == oend)) + break; + } else { + /* may overwrite up to WILDCOPYLENGTH beyond cpy */ + LZ4_wildCopy(op, ip, cpy); + ip += length; + op = cpy; + } /* get offset */ offset = LZ4_readLE16(ip); ip += 2; match = op - offset; - if ((checkOffset) && (unlikely(match < lowLimit))) { + /* get matchlength */ + length = token & ML_MASK; + +_copy_match: + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { /* Error : offset outside buffers */ goto _output_error; } /* costs ~1%; silence an msan warning when offset == 0 */ - LZ4_write32(op, (U32)offset); + /* + * note : when partialDecoding, there is no guarantee that + * at least 4 bytes remain available in output buffer + */ + if (!partialDecoding) { + assert(oend > op); + assert(oend - op >= 4); + + LZ4_write32(op, (U32)offset); + } - /* get matchlength */ - length = token & ML_MASK; if (length == ML_MASK) { unsigned int s; @@ -221,7 +314,7 @@ static FORCE_INLINE int LZ4_decompress_generic( if ((safeDecode) && unlikely( - (size_t)(op + length) < (size_t)op)) { + (uptrval)(op) + length < (uptrval)op)) { /* overflow detection */ goto _output_error; } @@ -229,24 +322,26 @@ static FORCE_INLINE int LZ4_decompress_generic( length += MINMATCH; - /* check external dictionary */ + /* match starting within external dictionary */ if ((dict == usingExtDict) && (match < lowPrefix)) { if (unlikely(op + length > oend - LASTLITERALS)) { /* doesn't respect parsing restriction */ - goto _output_error; + if (!partialDecoding) + goto _output_error; + length = min(length, (size_t)(oend - op)); } if (length <= (size_t)(lowPrefix - match)) { /* - * match can be copied as a single segment - * from external dictionary + * match fits entirely within external + * dictionary : just copy */ memmove(op, dictEnd - (lowPrefix - match), length); op += length; } else { /* - * match encompass external + * match stretches into both external * dictionary and current block */ size_t const copySize = (size_t)(lowPrefix - match); @@ -254,7 +349,6 @@ static FORCE_INLINE int LZ4_decompress_generic( memcpy(op, dictEnd - copySize, copySize); op += copySize; - if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ BYTE * const endOfMatch = op + restSize; @@ -267,23 +361,44 @@ static FORCE_INLINE int LZ4_decompress_generic( op += restSize; } } - continue; } /* copy match within block */ cpy = op + length; - if (unlikely(offset < 8)) { - const int dec64 = dec64table[offset]; + /* + * partialDecoding : + * may not respect endBlock parsing restrictions + */ + assert(op <= oend); + if (partialDecoding && + (cpy > oend - MATCH_SAFEGUARD_DISTANCE)) { + size_t const mlen = min(length, (size_t)(oend - op)); + const BYTE * const matchEnd = match + mlen; + BYTE * const copyEnd = op + mlen; + if (matchEnd > op) { + /* overlap copy */ + while (op < copyEnd) + *op++ = *match++; + } else { + memcpy(op, match, mlen); + } + op = copyEnd; + if (op == oend) + break; + continue; + } + + if (unlikely(offset < 8)) { op[0] = match[0]; op[1] = match[1]; op[2] = match[2]; op[3] = match[3]; - match += dec32table[offset]; + match += inc32table[offset]; memcpy(op + 4, match, 4); - match -= dec64; + match -= dec64table[offset]; } else { LZ4_copy8(op, match); match += 8; @@ -291,7 +406,7 @@ static FORCE_INLINE int LZ4_decompress_generic( op += 8; - if (unlikely(cpy > oend - 12)) { + if (unlikely(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) { BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1); if (cpy > oend - LASTLITERALS) { @@ -307,60 +422,139 @@ static FORCE_INLINE int LZ4_decompress_generic( match += oCopyLimit - op; op = oCopyLimit; } - while (op < cpy) *op++ = *match++; } else { LZ4_copy8(op, match); - if (length > 16) LZ4_wildCopy(op + 8, match + 8, cpy); } - - op = cpy; /* correction */ + op = cpy; /* wildcopy correction */ } /* end of decoding */ if (endOnInput) { /* Nb of output bytes decoded */ - return (int) (((char *)op) - dest); + return (int) (((char *)op) - dst); } else { /* Nb of input bytes read */ - return (int) (((const char *)ip) - source); + return (int) (((const char *)ip) - src); } /* Overflow error detected */ _output_error: - return -1; + return (int) (-(((const char *)ip) - src)) - 1; } int LZ4_decompress_safe(const char *source, char *dest, int compressedSize, int maxDecompressedSize) { - return LZ4_decompress_generic(source, dest, compressedSize, - maxDecompressedSize, endOnInputSize, full, 0, - noDict, (BYTE *)dest, NULL, 0); + return LZ4_decompress_generic(source, dest, + compressedSize, maxDecompressedSize, + endOnInputSize, decode_full_block, + noDict, (BYTE *)dest, NULL, 0); } -int LZ4_decompress_safe_partial(const char *source, char *dest, - int compressedSize, int targetOutputSize, int maxDecompressedSize) +int LZ4_decompress_safe_partial(const char *src, char *dst, + int compressedSize, int targetOutputSize, int dstCapacity) { - return LZ4_decompress_generic(source, dest, compressedSize, - maxDecompressedSize, endOnInputSize, partial, - targetOutputSize, noDict, (BYTE *)dest, NULL, 0); + dstCapacity = min(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, + endOnInputSize, partial_decode, + noDict, (BYTE *)dst, NULL, 0); } int LZ4_decompress_fast(const char *source, char *dest, int originalSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, withPrefix64k, - (BYTE *)(dest - 64 * KB), NULL, 64 * KB); + endOnOutputSize, decode_full_block, + withPrefix64k, + (BYTE *)dest - 64 * KB, NULL, 0); } +/* ===== Instantiate a few more decoding cases, used more than once. ===== */ + +int LZ4_decompress_safe_withPrefix64k(const char *source, char *dest, + int compressedSize, int maxOutputSize) +{ + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, + withPrefix64k, + (BYTE *)dest - 64 * KB, NULL, 0); +} + +static int LZ4_decompress_safe_withSmallPrefix(const char *source, char *dest, + int compressedSize, + int maxOutputSize, + size_t prefixSize) +{ + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, + noDict, + (BYTE *)dest - prefixSize, NULL, 0); +} + +int LZ4_decompress_safe_forceExtDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + const void *dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, + usingExtDict, (BYTE *)dest, + (const BYTE *)dictStart, dictSize); +} + +static int LZ4_decompress_fast_extDict(const char *source, char *dest, + int originalSize, + const void *dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, + 0, originalSize, + endOnOutputSize, decode_full_block, + usingExtDict, (BYTE *)dest, + (const BYTE *)dictStart, dictSize); +} + +/* + * The "double dictionary" mode, for use with e.g. ring buffers: the first part + * of the dictionary is passed as prefix, and the second via dictStart + dictSize. + * These routines are used only once, in LZ4_decompress_*_continue(). + */ +static FORCE_INLINE +int LZ4_decompress_safe_doubleDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + size_t prefixSize, + const void *dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, + compressedSize, maxOutputSize, + endOnInputSize, decode_full_block, + usingExtDict, (BYTE *)dest - prefixSize, + (const BYTE *)dictStart, dictSize); +} + +static FORCE_INLINE +int LZ4_decompress_fast_doubleDict(const char *source, char *dest, + int originalSize, size_t prefixSize, + const void *dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, + 0, originalSize, + endOnOutputSize, decode_full_block, + usingExtDict, (BYTE *)dest - prefixSize, + (const BYTE *)dictStart, dictSize); +} + +/* ===== streaming decompression functions ===== */ + int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, const char *dictionary, int dictSize) { - LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode; + LZ4_streamDecode_t_internal *lz4sd = + &LZ4_streamDecode->internal_donotuse; lz4sd->prefixSize = (size_t) dictSize; lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize; @@ -382,35 +576,51 @@ int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, const char *source, char *dest, int compressedSize, int maxOutputSize) { - LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse; + LZ4_streamDecode_t_internal *lz4sd = + &LZ4_streamDecode->internal_donotuse; int result; - if (lz4sd->prefixEnd == (BYTE *)dest) { - result = LZ4_decompress_generic(source, dest, - compressedSize, - maxOutputSize, - endOnInputSize, full, 0, - usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize, - lz4sd->externalDict, - lz4sd->extDictSize); - + if (lz4sd->prefixSize == 0) { + /* The first call, no dictionary yet. */ + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_safe(source, dest, + compressedSize, maxOutputSize); + if (result <= 0) + return result; + lz4sd->prefixSize = result; + lz4sd->prefixEnd = (BYTE *)dest + result; + } else if (lz4sd->prefixEnd == (BYTE *)dest) { + /* They're rolling the current segment. */ + if (lz4sd->prefixSize >= 64 * KB - 1) + result = LZ4_decompress_safe_withPrefix64k(source, dest, + compressedSize, maxOutputSize); + else if (lz4sd->extDictSize == 0) + result = LZ4_decompress_safe_withSmallPrefix(source, + dest, compressedSize, maxOutputSize, + lz4sd->prefixSize); + else + result = LZ4_decompress_safe_doubleDict(source, dest, + compressedSize, maxOutputSize, + lz4sd->prefixSize, + lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; - lz4sd->prefixSize += result; - lz4sd->prefixEnd += result; + lz4sd->prefixEnd += result; } else { + /* + * The buffer wraps around, or they're + * switching to another buffer. + */ lz4sd->extDictSize = lz4sd->prefixSize; lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_generic(source, dest, + result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, - endOnInputSize, full, 0, - usingExtDict, (BYTE *)dest, lz4sd->externalDict, lz4sd->extDictSize); if (result <= 0) return result; lz4sd->prefixSize = result; - lz4sd->prefixEnd = (BYTE *)dest + result; + lz4sd->prefixEnd = (BYTE *)dest + result; } return result; @@ -422,75 +632,66 @@ int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode, LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse; int result; - if (lz4sd->prefixEnd == (BYTE *)dest) { - result = LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, - usingExtDict, - lz4sd->prefixEnd - lz4sd->prefixSize, - lz4sd->externalDict, lz4sd->extDictSize); - - if (result <= 0) - return result; - - lz4sd->prefixSize += originalSize; - lz4sd->prefixEnd += originalSize; - } else { - lz4sd->extDictSize = lz4sd->prefixSize; - lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, full, 0, - usingExtDict, (BYTE *)dest, - lz4sd->externalDict, lz4sd->extDictSize); + if (lz4sd->prefixSize == 0) { + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_fast(source, dest, originalSize); if (result <= 0) return result; lz4sd->prefixSize = originalSize; - lz4sd->prefixEnd = (BYTE *)dest + originalSize; + lz4sd->prefixEnd = (BYTE *)dest + originalSize; + } else if (lz4sd->prefixEnd == (BYTE *)dest) { + if (lz4sd->prefixSize >= 64 * KB - 1 || + lz4sd->extDictSize == 0) + result = LZ4_decompress_fast(source, dest, + originalSize); + else + result = LZ4_decompress_fast_doubleDict(source, dest, + originalSize, lz4sd->prefixSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) + return result; + lz4sd->prefixSize += originalSize; + lz4sd->prefixEnd += originalSize; + } else { + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_fast_extDict(source, dest, + originalSize, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) + return result; + lz4sd->prefixSize = originalSize; + lz4sd->prefixEnd = (BYTE *)dest + originalSize; } - return result; } -/* - * Advanced decoding functions : - * *_usingDict() : - * These decoding functions work the same as "_continue" ones, - * the dictionary must be explicitly provided within parameters - */ -static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source, - char *dest, int compressedSize, int maxOutputSize, int safe, - const char *dictStart, int dictSize) +int LZ4_decompress_safe_usingDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + const char *dictStart, int dictSize) { if (dictSize == 0) - return LZ4_decompress_generic(source, dest, - compressedSize, maxOutputSize, safe, full, 0, - noDict, (BYTE *)dest, NULL, 0); - if (dictStart + dictSize == dest) { - if (dictSize >= (int)(64 * KB - 1)) - return LZ4_decompress_generic(source, dest, - compressedSize, maxOutputSize, safe, full, 0, - withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0); - return LZ4_decompress_generic(source, dest, compressedSize, - maxOutputSize, safe, full, 0, noDict, - (BYTE *)dest - dictSize, NULL, 0); + return LZ4_decompress_safe(source, dest, + compressedSize, maxOutputSize); + if (dictStart+dictSize == dest) { + if (dictSize >= 64 * KB - 1) + return LZ4_decompress_safe_withPrefix64k(source, dest, + compressedSize, maxOutputSize); + return LZ4_decompress_safe_withSmallPrefix(source, dest, + compressedSize, maxOutputSize, dictSize); } - return LZ4_decompress_generic(source, dest, compressedSize, - maxOutputSize, safe, full, 0, usingExtDict, - (BYTE *)dest, (const BYTE *)dictStart, dictSize); -} - -int LZ4_decompress_safe_usingDict(const char *source, char *dest, - int compressedSize, int maxOutputSize, - const char *dictStart, int dictSize) -{ - return LZ4_decompress_usingDict_generic(source, dest, - compressedSize, maxOutputSize, 1, dictStart, dictSize); + return LZ4_decompress_safe_forceExtDict(source, dest, + compressedSize, maxOutputSize, dictStart, dictSize); } int LZ4_decompress_fast_usingDict(const char *source, char *dest, - int originalSize, const char *dictStart, int dictSize) + int originalSize, + const char *dictStart, int dictSize) { - return LZ4_decompress_usingDict_generic(source, dest, 0, - originalSize, 0, dictStart, dictSize); + if (dictSize == 0 || dictStart + dictSize == dest) + return LZ4_decompress_fast(source, dest, originalSize); + + return LZ4_decompress_fast_extDict(source, dest, originalSize, + dictStart, dictSize); } #ifndef STATIC diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index 00a0b58a0871..1a7fa9d9170f 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -75,6 +75,11 @@ typedef uintptr_t uptrval; #define WILDCOPYLENGTH 8 #define LASTLITERALS 5 #define MFLIMIT (WILDCOPYLENGTH + MINMATCH) +/* + * ensure it's possible to write 2 x wildcopyLength + * without overflowing output buffer + */ +#define MATCH_SAFEGUARD_DISTANCE ((2 * WILDCOPYLENGTH) - MINMATCH) /* Increase this value ==> compression run slower on incompressible data */ #define LZ4_SKIPTRIGGER 6 @@ -222,6 +227,8 @@ typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; -typedef enum { full = 0, partial = 1 } earlyEnd_directive; +typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; + +#define LZ4_STATIC_ASSERT(c) BUILD_BUG_ON(!(c)) #endif From c7d5cb0d8e1de8907136bf6ea85d71c192f2cf18 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Fri, 14 Aug 2020 17:30:10 -0700 Subject: [PATCH 02/85] lz4: fix kernel decompression speed This patch replaces all memcpy() calls with LZ4_memcpy() which calls __builtin_memcpy() so the compiler can inline it. LZ4 relies heavily on memcpy() with a constant size being inlined. In x86 and i386 pre-boot environments memcpy() cannot be inlined because memcpy() doesn't get defined as __builtin_memcpy(). An equivalent patch has been applied upstream so that the next import won't lose this change [1]. I've measured the kernel decompression speed using QEMU before and after this patch for the x86_64 and i386 architectures. The speed-up is about 10x as shown below. Code Arch Kernel Size Time Speed v5.8 x86_64 11504832 B 148 ms 79 MB/s patch x86_64 11503872 B 13 ms 885 MB/s v5.8 i386 9621216 B 91 ms 106 MB/s patch i386 9620224 B 10 ms 962 MB/s I also measured the time to decompress the initramfs on x86_64, i386, and arm. All three show the same decompression speed before and after, as expected. [1] https://github.com/lz4/lz4/pull/890 Signed-off-by: Nick Terrell Signed-off-by: Andrew Morton Cc: Yann Collet Cc: Gao Xiang Cc: Sven Schmidt <4sschmid@informatik.uni-hamburg.de> Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Arvind Sankar Link: http: //lkml.kernel.org/r/20200803194022.2966806-1-nickrterrell@gmail.com Signed-off-by: Linus Torvalds Change-Id: I3e725b70595227145a8c8b42a6626cb0629fdddf --- lib/lz4/lz4_compress.c | 4 ++-- lib/lz4/lz4_decompress.c | 18 +++++++++--------- lib/lz4/lz4defs.h | 10 ++++++++++ lib/lz4/lz4hc_compress.c | 2 +- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c index cc7b6d4cc7c7..90bb67994688 100644 --- a/lib/lz4/lz4_compress.c +++ b/lib/lz4/lz4_compress.c @@ -446,7 +446,7 @@ _last_literals: *op++ = (BYTE)(lastRun << ML_BITS); } - memcpy(op, anchor, lastRun); + LZ4_memcpy(op, anchor, lastRun); op += lastRun; } @@ -708,7 +708,7 @@ _last_literals: } else { *op++ = (BYTE)(lastRunSize<= 8) && (dict == withPrefix64k || match >= lowPrefix)) { /* Copy the match. */ - memcpy(op + 0, match + 0, 8); - memcpy(op + 8, match + 8, 8); - memcpy(op + 16, match + 16, 2); + LZ4_memcpy(op + 0, match + 0, 8); + LZ4_memcpy(op + 8, match + 8, 8); + LZ4_memcpy(op + 16, match + 16, 2); op += length + MINMATCH; /* Both stages worked, load the next token. */ continue; @@ -260,7 +260,7 @@ static FORCE_INLINE int LZ4_decompress_generic( } } - memcpy(op, ip, length); + LZ4_memcpy(op, ip, length); ip += length; op += length; @@ -347,7 +347,7 @@ _copy_match: size_t const copySize = (size_t)(lowPrefix - match); size_t const restSize = length - copySize; - memcpy(op, dictEnd - copySize, copySize); + LZ4_memcpy(op, dictEnd - copySize, copySize); op += copySize; if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ @@ -357,7 +357,7 @@ _copy_match: while (op < endOfMatch) *op++ = *copyFrom++; } else { - memcpy(op, lowPrefix, restSize); + LZ4_memcpy(op, lowPrefix, restSize); op += restSize; } } @@ -383,7 +383,7 @@ _copy_match: while (op < copyEnd) *op++ = *match++; } else { - memcpy(op, match, mlen); + LZ4_memcpy(op, match, mlen); } op = copyEnd; if (op == oend) @@ -397,7 +397,7 @@ _copy_match: op[2] = match[2]; op[3] = match[3]; match += inc32table[offset]; - memcpy(op + 4, match, 4); + LZ4_memcpy(op + 4, match, 4); match -= dec64table[offset]; } else { LZ4_copy8(op, match); diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index 1a7fa9d9170f..c91dd96ef629 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -137,6 +137,16 @@ static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value) return put_unaligned_le16(value, memPtr); } +/* + * LZ4 relies on memcpy with a constant size being inlined. In freestanding + * environments, the compiler can't assume the implementation of memcpy() is + * standard compliant, so apply its specialized memcpy() inlining logic. When + * possible, use __builtin_memcpy() to tell the compiler to analyze memcpy() + * as-if it were standard compliant, so it can inline it in freestanding + * environments. This is needed when decompressing the Linux Kernel, for example. + */ +#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) + static FORCE_INLINE void LZ4_copy8(void *dst, const void *src) { #if LZ4_ARCH64 diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c index 176f03b83e56..ccbf0e89d713 100644 --- a/lib/lz4/lz4hc_compress.c +++ b/lib/lz4/lz4hc_compress.c @@ -570,7 +570,7 @@ _Search3: *op++ = (BYTE) lastRun; } else *op++ = (BYTE)(lastRun< Date: Tue, 15 Dec 2020 20:44:03 -0800 Subject: [PATCH 03/85] lib/lz4: explicitly support in-place decompression LZ4 final literal copy could be overlapped when doing in-place decompression, so it's unsafe to just use memcpy() on an optimized memcpy approach but memmove() instead. Upstream LZ4 has updated this years ago [1] (and the impact is non-sensible [2] plus only a few bytes remain), this commit just synchronizes LZ4 upstream code to the kernel side as well. It can be observed as EROFS in-place decompression failure on specific files when X86_FEATURE_ERMS is unsupported, memcpy() optimization of commit 59daa706fbec ("x86, mem: Optimize memcpy by avoiding memory false dependece") will be enabled then. Currently most modern x86-CPUs support ERMS, these CPUs just use "rep movsb" approach so no problem at all. However, it can still be verified with forcely disabling ERMS feature... arch/x86/lib/memcpy_64.S: ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ - "jmp memcpy_erms", X86_FEATURE_ERMS + "jmp memcpy_orig", X86_FEATURE_ERMS We didn't observe any strange on arm64/arm/x86 platform before since most memcpy() would behave in an increasing address order ("copy upwards" [3]) and it's the correct order of in-place decompression but it really needs an update to memmove() for sure considering it's an undefined behavior according to the standard and some unique optimization already exists in the kernel. [1] https://github.com/lz4/lz4/commit/33cb8518ac385835cc17be9a770b27b40cd0e15b [2] https://github.com/lz4/lz4/pull/717#issuecomment-497818921 [3] https://sourceware.org/bugzilla/show_bug.cgi?id=12518 Link: https: //lkml.kernel.org/r/20201122030749.2698994-1-hsiangkao@redhat.com Signed-off-by: Gao Xiang Reviewed-by: Nick Terrell Cc: Yann Collet Cc: Miao Xie Cc: Chao Yu Cc: Li Guifu Cc: Guo Xuenan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Change-Id: Iec77608d7cd5201f761ac78a34b2fc617294c495 --- lib/lz4/lz4_decompress.c | 6 +++++- lib/lz4/lz4defs.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index fea6ad57cab7..cda8b794da04 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -260,7 +260,11 @@ static FORCE_INLINE int LZ4_decompress_generic( } } - LZ4_memcpy(op, ip, length); + /* + * supports overlapping memory regions; only matters + * for in-place decompression scenarios + */ + LZ4_memmove(op, ip, length); ip += length; op += length; diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h index c91dd96ef629..673bd206aa98 100644 --- a/lib/lz4/lz4defs.h +++ b/lib/lz4/lz4defs.h @@ -146,6 +146,7 @@ static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value) * environments. This is needed when decompressing the Linux Kernel, for example. */ #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) +#define LZ4_memmove(dst, src, size) __builtin_memmove(dst, src, size) static FORCE_INLINE void LZ4_copy8(void *dst, const void *src) { From 3eda101d26fcce402ba3759592cbce602a9c55d9 Mon Sep 17 00:00:00 2001 From: Guo Xuenan Date: Fri, 8 Apr 2022 13:08:58 -0700 Subject: [PATCH 04/85] lz4: fix LZ4_decompress_safe_partial read out of bound commit eafc0a02391b7b36617b36c97c4b5d6832cf5e24 upstream. When partialDecoding, it is EOF if we've either filled the output buffer or can't proceed with reading an offset for following match. In some extreme corner cases when compressed data is suitably corrupted, UAF will occur. As reported by KASAN [1], LZ4_decompress_safe_partial may lead to read out of bound problem during decoding. lz4 upstream has fixed it [2] and this issue has been disscussed here [3] before. current decompression routine was ported from lz4 v1.8.3, bumping lib/lz4 to v1.9.+ is certainly a huge work to be done later, so, we'd better fix it first. [1] https://lore.kernel.org/all/000000000000830d1205cf7f0477@google.com/ [2] https://github.com/lz4/lz4/commit/c5d6f8a8be3927c0bec91bcc58667a6cfad244ad# [3] https://lore.kernel.org/all/CC666AE8-4CA4-4951-B6FB-A2EFDE3AC03B@fb.com/ Link: https://lkml.kernel.org/r/20211111105048.2006070-1-guoxuenan@huawei.com Reported-by: syzbot+63d688f1d899c588fb71@syzkaller.appspotmail.com Change-Id: I24b1fe4aaed8b89b65f66d753b72a2f9f32ac79b Signed-off-by: Guo Xuenan Reviewed-by: Nick Terrell Acked-by: Gao Xiang Cc: Yann Collet Cc: Chengyang Fan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- lib/lz4/lz4_decompress.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index cda8b794da04..3c30bf193a40 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -268,8 +268,12 @@ static FORCE_INLINE int LZ4_decompress_generic( ip += length; op += length; - /* Necessarily EOF, due to parsing restrictions */ - if (!partialDecoding || (cpy == oend)) + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend - 2))) break; } else { /* may overwrite up to WILDCOPYLENGTH beyond cpy */ From f031c99645f23a9a6451279b77dce3d11281cfbf Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Sun, 13 Apr 2025 20:15:42 +0300 Subject: [PATCH 05/85] lz4: import v1.10.0 from upstream Change-Id: Ic8937ac5cc952272ab8cb26cc73361f255813264 Signed-off-by: Juhyung Park --- include/linux/lz4.h | 649 +-------- lib/lz4/Makefile | 5 - lib/lz4/lz4.c | 2829 ++++++++++++++++++++++++++++++++++++++ lib/lz4/lz4.h | 884 ++++++++++++ lib/lz4/lz4_compress.c | 940 ------------- lib/lz4/lz4_decompress.c | 717 ---------- lib/lz4/lz4defs.h | 245 ---- lib/lz4/lz4hc.c | 2193 +++++++++++++++++++++++++++++ lib/lz4/lz4hc.h | 414 ++++++ lib/lz4/lz4hc_compress.c | 769 ----------- 10 files changed, 6321 insertions(+), 3324 deletions(-) mode change 100644 => 120000 include/linux/lz4.h delete mode 100644 lib/lz4/Makefile create mode 100644 lib/lz4/lz4.c create mode 100644 lib/lz4/lz4.h delete mode 100644 lib/lz4/lz4_compress.c delete mode 100644 lib/lz4/lz4_decompress.c delete mode 100644 lib/lz4/lz4defs.h create mode 100644 lib/lz4/lz4hc.c create mode 100644 lib/lz4/lz4hc.h delete mode 100644 lib/lz4/lz4hc_compress.c diff --git a/include/linux/lz4.h b/include/linux/lz4.h deleted file mode 100644 index 394e3d9213b8..000000000000 --- a/include/linux/lz4.h +++ /dev/null @@ -1,648 +0,0 @@ -/* LZ4 Kernel Interface - * - * Copyright (C) 2013, LG Electronics, Kyungsik Lee - * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This file is based on the original header file - * for LZ4 - Fast LZ compression algorithm. - * - * LZ4 - Fast LZ compression algorithm - * Copyright (C) 2011-2016, Yann Collet. - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * You can contact the author at : - * - LZ4 homepage : http://www.lz4.org - * - LZ4 source repository : https://github.com/lz4/lz4 - */ - -#ifndef __LZ4_H__ -#define __LZ4_H__ - -#include -#include /* memset, memcpy */ - -/*-************************************************************************ - * CONSTANTS - **************************************************************************/ -/* - * LZ4_MEMORY_USAGE : - * Memory usage formula : N->2^N Bytes - * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) - * Increasing memory usage improves compression ratio - * Reduced memory usage can improve speed, due to cache effect - * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache - */ -#define LZ4_MEMORY_USAGE 14 - -#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ -#define LZ4_COMPRESSBOUND(isize) (\ - (unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \ - ? 0 \ - : (isize) + ((isize)/255) + 16) - -#define LZ4_ACCELERATION_DEFAULT 1 -#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) -#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) -#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) - -#define LZ4HC_MIN_CLEVEL 3 -#define LZ4HC_DEFAULT_CLEVEL 9 -#define LZ4HC_MAX_CLEVEL 16 - -#define LZ4HC_DICTIONARY_LOGSIZE 16 -#define LZ4HC_MAXD (1<= LZ4_compressBound(inputSize). - * It also runs faster, so it's a recommended setting. - * If the function cannot compress 'source' into a more limited 'dest' budget, - * compression stops *immediately*, and the function result is zero. - * As a consequence, 'dest' content is not valid. - * - * Return: Number of bytes written into buffer 'dest' - * (necessarily <= maxOutputSize) or 0 if compression fails - */ -int LZ4_compress_default(const char *source, char *dest, int inputSize, - int maxOutputSize, void *wrkmem); - -/** - * LZ4_compress_fast() - As LZ4_compress_default providing an acceleration param - * @source: source address of the original data - * @dest: output buffer address of the compressed data - * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE - * @maxOutputSize: full or partial size of buffer 'dest' - * which must be already allocated - * @acceleration: acceleration factor - * @wrkmem: address of the working memory. - * This requires 'workmem' of LZ4_MEM_COMPRESS. - * - * Same as LZ4_compress_default(), but allows to select an "acceleration" - * factor. The larger the acceleration value, the faster the algorithm, - * but also the lesser the compression. It's a trade-off. It can be fine tuned, - * with each successive value providing roughly +~3% to speed. - * An acceleration value of "1" is the same as regular LZ4_compress_default() - * Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT, which is 1. - * - * Return: Number of bytes written into buffer 'dest' - * (necessarily <= maxOutputSize) or 0 if compression fails - */ -int LZ4_compress_fast(const char *source, char *dest, int inputSize, - int maxOutputSize, int acceleration, void *wrkmem); - -/** - * LZ4_compress_destSize() - Compress as much data as possible - * from source to dest - * @source: source address of the original data - * @dest: output buffer address of the compressed data - * @sourceSizePtr: will be modified to indicate how many bytes where read - * from 'source' to fill 'dest'. New value is necessarily <= old value. - * @targetDestSize: Size of buffer 'dest' which must be already allocated - * @wrkmem: address of the working memory. - * This requires 'workmem' of LZ4_MEM_COMPRESS. - * - * Reverse the logic, by compressing as much data as possible - * from 'source' buffer into already allocated buffer 'dest' - * of size 'targetDestSize'. - * This function either compresses the entire 'source' content into 'dest' - * if it's large enough, or fill 'dest' buffer completely with as much data as - * possible from 'source'. - * - * Return: Number of bytes written into 'dest' (necessarily <= targetDestSize) - * or 0 if compression fails - */ -int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr, - int targetDestSize, void *wrkmem); - -/*-************************************************************************ - * Decompression Functions - **************************************************************************/ - -/** - * LZ4_decompress_fast() - Decompresses data from 'source' into 'dest' - * @source: source address of the compressed data - * @dest: output buffer address of the uncompressed data - * which must be already allocated with 'originalSize' bytes - * @originalSize: is the original and therefore uncompressed size - * - * Decompresses data from 'source' into 'dest'. - * This function fully respect memory boundaries for properly formed - * compressed data. - * It is a bit faster than LZ4_decompress_safe(). - * However, it does not provide any protection against intentionally - * modified data stream (malicious input). - * Use this function in trusted environment only - * (data to decode comes from a trusted source). - * - * Return: number of bytes read from the source buffer - * or a negative result if decompression fails. - */ -int LZ4_decompress_fast(const char *source, char *dest, int originalSize); - -/** - * LZ4_decompress_safe() - Decompression protected against buffer overflow - * @source: source address of the compressed data - * @dest: output buffer address of the uncompressed data - * which must be already allocated - * @compressedSize: is the precise full size of the compressed block - * @maxDecompressedSize: is the size of 'dest' buffer - * - * Decompresses data fom 'source' into 'dest'. - * If the source stream is detected malformed, the function will - * stop decoding and return a negative result. - * This function is protected against buffer overflow exploits, - * including malicious data packets. It never writes outside output buffer, - * nor reads outside input buffer. - * - * Return: number of bytes decompressed into destination buffer - * (necessarily <= maxDecompressedSize) - * or a negative result in case of error - */ -int LZ4_decompress_safe(const char *source, char *dest, int compressedSize, - int maxDecompressedSize); - -/** - * LZ4_decompress_safe_partial() - Decompress a block of size 'compressedSize' - * at position 'source' into buffer 'dest' - * @source: source address of the compressed data - * @dest: output buffer address of the decompressed data which must be - * already allocated - * @compressedSize: is the precise full size of the compressed block. - * @targetOutputSize: the decompression operation will try - * to stop as soon as 'targetOutputSize' has been reached - * @maxDecompressedSize: is the size of destination buffer - * - * This function decompresses a compressed block of size 'compressedSize' - * at position 'source' into destination buffer 'dest' - * of size 'maxDecompressedSize'. - * The function tries to stop decompressing operation as soon as - * 'targetOutputSize' has been reached, reducing decompression time. - * This function never writes outside of output buffer, - * and never reads outside of input buffer. - * It is therefore protected against malicious data packets. - * - * Return: the number of bytes decoded in the destination buffer - * (necessarily <= maxDecompressedSize) - * or a negative result in case of error - * - */ -int LZ4_decompress_safe_partial(const char *source, char *dest, - int compressedSize, int targetOutputSize, int maxDecompressedSize); - -/*-************************************************************************ - * LZ4 HC Compression - **************************************************************************/ - -/** - * LZ4_compress_HC() - Compress data from `src` into `dst`, using HC algorithm - * @src: source address of the original data - * @dst: output buffer address of the compressed data - * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE - * @dstCapacity: full or partial size of buffer 'dst', - * which must be already allocated - * @compressionLevel: Recommended values are between 4 and 9, although any - * value between 1 and LZ4HC_MAX_CLEVEL will work. - * Values >LZ4HC_MAX_CLEVEL behave the same as 16. - * @wrkmem: address of the working memory. - * This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS. - * - * Compress data from 'src' into 'dst', using the more powerful - * but slower "HC" algorithm. Compression is guaranteed to succeed if - * `dstCapacity >= LZ4_compressBound(srcSize) - * - * Return : the number of bytes written into 'dst' or 0 if compression fails. - */ -int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity, - int compressionLevel, void *wrkmem); - -/** - * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure - * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure - * @compressionLevel: Recommended values are between 4 and 9, although any - * value between 1 and LZ4HC_MAX_CLEVEL will work. - * Values >LZ4HC_MAX_CLEVEL behave the same as 16. - * - * An LZ4_streamHC_t structure can be allocated once - * and re-used multiple times. - * Use this function to init an allocated `LZ4_streamHC_t` structure - * and start a new compression. - */ -void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel); - -/** - * LZ4_loadDictHC() - Load a static dictionary into LZ4_streamHC - * @streamHCPtr: pointer to the LZ4HC_stream_t - * @dictionary: dictionary to load - * @dictSize: size of dictionary - * - * Use this function to load a static dictionary into LZ4HC_stream. - * Any previous data will be forgotten, only 'dictionary' - * will remain in memory. - * Loading a size of 0 is allowed. - * - * Return : dictionary size, in bytes (necessarily <= 64 KB) - */ -int LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary, - int dictSize); - -/** - * LZ4_compress_HC_continue() - Compress 'src' using data from previously - * compressed blocks as a dictionary using the HC algorithm - * @streamHCPtr: Pointer to the previous 'LZ4_streamHC_t' structure - * @src: source address of the original data - * @dst: output buffer address of the compressed data, - * which must be already allocated - * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE - * @maxDstSize: full or partial size of buffer 'dest' - * which must be already allocated - * - * These functions compress data in successive blocks of any size, using - * previous blocks as dictionary. One key assumption is that previous - * blocks (up to 64 KB) remain read-accessible while - * compressing next blocks. There is an exception for ring buffers, - * which can be smaller than 64 KB. - * Ring buffers scenario is automatically detected and handled by - * LZ4_compress_HC_continue(). - * Before starting compression, state must be properly initialized, - * using LZ4_resetStreamHC(). - * A first "fictional block" can then be designated as - * initial dictionary, using LZ4_loadDictHC() (Optional). - * Then, use LZ4_compress_HC_continue() - * to compress each successive block. Previous memory blocks - * (including initial dictionary when present) must remain accessible - * and unmodified during compression. - * 'dst' buffer should be sized to handle worst case scenarios, using - * LZ4_compressBound(), to ensure operation success. - * If, for any reason, previous data blocks can't be preserved unmodified - * in memory during next compression block, - * you must save it to a safer memory space, using LZ4_saveDictHC(). - * Return value of LZ4_saveDictHC() is the size of dictionary - * effectively saved into 'safeBuffer'. - * - * Return: Number of bytes written into buffer 'dst' or 0 if compression fails - */ -int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src, - char *dst, int srcSize, int maxDstSize); - -/** - * LZ4_saveDictHC() - Save static dictionary from LZ4HC_stream - * @streamHCPtr: pointer to the 'LZ4HC_stream_t' structure - * @safeBuffer: buffer to save dictionary to, must be already allocated - * @maxDictSize: size of 'safeBuffer' - * - * If previously compressed data block is not guaranteed - * to remain available at its memory location, - * save it into a safer place (char *safeBuffer). - * Note : you don't need to call LZ4_loadDictHC() afterwards, - * dictionary is immediately usable, you can therefore call - * LZ4_compress_HC_continue(). - * - * Return : saved dictionary size in bytes (necessarily <= maxDictSize), - * or 0 if error. - */ -int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer, - int maxDictSize); - -/*-********************************************* - * Streaming Compression Functions - ***********************************************/ - -/** - * LZ4_resetStream() - Init an allocated 'LZ4_stream_t' structure - * @LZ4_stream: pointer to the 'LZ4_stream_t' structure - * - * An LZ4_stream_t structure can be allocated once - * and re-used multiple times. - * Use this function to init an allocated `LZ4_stream_t` structure - * and start a new compression. - */ -void LZ4_resetStream(LZ4_stream_t *LZ4_stream); - -/** - * LZ4_loadDict() - Load a static dictionary into LZ4_stream - * @streamPtr: pointer to the LZ4_stream_t - * @dictionary: dictionary to load - * @dictSize: size of dictionary - * - * Use this function to load a static dictionary into LZ4_stream. - * Any previous data will be forgotten, only 'dictionary' - * will remain in memory. - * Loading a size of 0 is allowed. - * - * Return : dictionary size, in bytes (necessarily <= 64 KB) - */ -int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary, - int dictSize); - -/** - * LZ4_saveDict() - Save static dictionary from LZ4_stream - * @streamPtr: pointer to the 'LZ4_stream_t' structure - * @safeBuffer: buffer to save dictionary to, must be already allocated - * @dictSize: size of 'safeBuffer' - * - * If previously compressed data block is not guaranteed - * to remain available at its memory location, - * save it into a safer place (char *safeBuffer). - * Note : you don't need to call LZ4_loadDict() afterwards, - * dictionary is immediately usable, you can therefore call - * LZ4_compress_fast_continue(). - * - * Return : saved dictionary size in bytes (necessarily <= dictSize), - * or 0 if error. - */ -int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize); - -/** - * LZ4_compress_fast_continue() - Compress 'src' using data from previously - * compressed blocks as a dictionary - * @streamPtr: Pointer to the previous 'LZ4_stream_t' structure - * @src: source address of the original data - * @dst: output buffer address of the compressed data, - * which must be already allocated - * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE - * @maxDstSize: full or partial size of buffer 'dest' - * which must be already allocated - * @acceleration: acceleration factor - * - * Compress buffer content 'src', using data from previously compressed blocks - * as dictionary to improve compression ratio. - * Important : Previous data blocks are assumed to still - * be present and unmodified ! - * If maxDstSize >= LZ4_compressBound(srcSize), - * compression is guaranteed to succeed, and runs faster. - * - * Return: Number of bytes written into buffer 'dst' or 0 if compression fails - */ -int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src, - char *dst, int srcSize, int maxDstSize, int acceleration); - -/** - * LZ4_setStreamDecode() - Instruct where to find dictionary - * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure - * @dictionary: dictionary to use - * @dictSize: size of dictionary - * - * Use this function to instruct where to find the dictionary. - * Setting a size of 0 is allowed (same effect as reset). - * - * Return: 1 if OK, 0 if error - */ -int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, - const char *dictionary, int dictSize); - -/** - * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode - * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure - * @source: source address of the compressed data - * @dest: output buffer address of the uncompressed data - * which must be already allocated - * @compressedSize: is the precise full size of the compressed block - * @maxDecompressedSize: is the size of 'dest' buffer - * - * These decoding function allows decompression of multiple blocks - * in "streaming" mode. - * Previously decoded blocks *must* remain available at the memory position - * where they were decoded (up to 64 KB) - * In the case of a ring buffers, decoding buffer must be either : - * - Exactly same size as encoding buffer, with same update rule - * (block boundaries at same positions) In which case, - * the decoding & encoding ring buffer can have any size, - * including very small ones ( < 64 KB). - * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. - * maxBlockSize is implementation dependent. - * It's the maximum size you intend to compress into a single block. - * In which case, encoding and decoding buffers do not need - * to be synchronized, and encoding ring buffer can have any size, - * including small ones ( < 64 KB). - * - _At least_ 64 KB + 8 bytes + maxBlockSize. - * In which case, encoding and decoding buffers do not need to be - * synchronized, and encoding ring buffer can have any size, - * including larger than decoding buffer. W - * Whenever these conditions are not possible, save the last 64KB of decoded - * data into a safe buffer, and indicate where it is saved - * using LZ4_setStreamDecode() - * - * Return: number of bytes decompressed into destination buffer - * (necessarily <= maxDecompressedSize) - * or a negative result in case of error - */ -int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, - const char *source, char *dest, int compressedSize, - int maxDecompressedSize); - -/** - * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode - * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure - * @source: source address of the compressed data - * @dest: output buffer address of the uncompressed data - * which must be already allocated with 'originalSize' bytes - * @originalSize: is the original and therefore uncompressed size - * - * These decoding function allows decompression of multiple blocks - * in "streaming" mode. - * Previously decoded blocks *must* remain available at the memory position - * where they were decoded (up to 64 KB) - * In the case of a ring buffers, decoding buffer must be either : - * - Exactly same size as encoding buffer, with same update rule - * (block boundaries at same positions) In which case, - * the decoding & encoding ring buffer can have any size, - * including very small ones ( < 64 KB). - * - Larger than encoding buffer, by a minimum of maxBlockSize more bytes. - * maxBlockSize is implementation dependent. - * It's the maximum size you intend to compress into a single block. - * In which case, encoding and decoding buffers do not need - * to be synchronized, and encoding ring buffer can have any size, - * including small ones ( < 64 KB). - * - _At least_ 64 KB + 8 bytes + maxBlockSize. - * In which case, encoding and decoding buffers do not need to be - * synchronized, and encoding ring buffer can have any size, - * including larger than decoding buffer. W - * Whenever these conditions are not possible, save the last 64KB of decoded - * data into a safe buffer, and indicate where it is saved - * using LZ4_setStreamDecode() - * - * Return: number of bytes decompressed into destination buffer - * (necessarily <= maxDecompressedSize) - * or a negative result in case of error - */ -int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode, - const char *source, char *dest, int originalSize); - -/** - * LZ4_decompress_safe_usingDict() - Same as LZ4_setStreamDecode() - * followed by LZ4_decompress_safe_continue() - * @source: source address of the compressed data - * @dest: output buffer address of the uncompressed data - * which must be already allocated - * @compressedSize: is the precise full size of the compressed block - * @maxDecompressedSize: is the size of 'dest' buffer - * @dictStart: pointer to the start of the dictionary in memory - * @dictSize: size of dictionary - * - * These decoding function works the same as - * a combination of LZ4_setStreamDecode() followed by - * LZ4_decompress_safe_continue() - * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure. - * - * Return: number of bytes decompressed into destination buffer - * (necessarily <= maxDecompressedSize) - * or a negative result in case of error - */ -int LZ4_decompress_safe_usingDict(const char *source, char *dest, - int compressedSize, int maxDecompressedSize, const char *dictStart, - int dictSize); - -/** - * LZ4_decompress_fast_usingDict() - Same as LZ4_setStreamDecode() - * followed by LZ4_decompress_fast_continue() - * @source: source address of the compressed data - * @dest: output buffer address of the uncompressed data - * which must be already allocated with 'originalSize' bytes - * @originalSize: is the original and therefore uncompressed size - * @dictStart: pointer to the start of the dictionary in memory - * @dictSize: size of dictionary - * - * These decoding function works the same as - * a combination of LZ4_setStreamDecode() followed by - * LZ4_decompress_safe_continue() - * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure. - * - * Return: number of bytes decompressed into destination buffer - * (necessarily <= maxDecompressedSize) - * or a negative result in case of error - */ -int LZ4_decompress_fast_usingDict(const char *source, char *dest, - int originalSize, const char *dictStart, int dictSize); - -#endif diff --git a/include/linux/lz4.h b/include/linux/lz4.h new file mode 120000 index 000000000000..9ff890762b05 --- /dev/null +++ b/include/linux/lz4.h @@ -0,0 +1 @@ +../../lib/lz4/lz4.h \ No newline at end of file diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile deleted file mode 100644 index f7b113271d13..000000000000 --- a/lib/lz4/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -ccflags-y += -O3 - -obj-$(CONFIG_LZ4_COMPRESS) += lz4_compress.o -obj-$(CONFIG_LZ4HC_COMPRESS) += lz4hc_compress.o -obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o diff --git a/lib/lz4/lz4.c b/lib/lz4/lz4.c new file mode 100644 index 000000000000..a2f7abee19fb --- /dev/null +++ b/lib/lz4/lz4.c @@ -0,0 +1,2829 @@ +/* + LZ4 - Fast LZ compression algorithm + Copyright (C) 2011-2023, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ + +/*-************************************ +* Tuning parameters +**************************************/ +/* + * LZ4_HEAPMODE : + * Select how stateless compression functions like `LZ4_compress_default()` + * allocate memory for their hash table, + * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). + */ +#ifndef LZ4_HEAPMODE +# define LZ4_HEAPMODE 0 +#endif + +/* + * LZ4_ACCELERATION_DEFAULT : + * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 + */ +#define LZ4_ACCELERATION_DEFAULT 1 +/* + * LZ4_ACCELERATION_MAX : + * Any "acceleration" value higher than this threshold + * get treated as LZ4_ACCELERATION_MAX instead (fix #876) + */ +#define LZ4_ACCELERATION_MAX 65537 + + +/*-************************************ +* CPU Feature Detection +**************************************/ +/* LZ4_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets which assembly generation depends on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ +# if defined(__GNUC__) && \ + ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define LZ4_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) || defined(_MSC_VER) +# define LZ4_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/* + * LZ4_FORCE_SW_BITCOUNT + * Define this parameter if your target system or compiler does not support hardware bit count + */ +#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */ +# undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */ +# define LZ4_FORCE_SW_BITCOUNT +#endif + + + +/*-************************************ +* Dependency +**************************************/ +/* + * LZ4_SRC_INCLUDED: + * Amalgamation flag, whether lz4.c is included + */ +#ifndef LZ4_SRC_INCLUDED +# define LZ4_SRC_INCLUDED 1 +#endif + +#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS +# define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */ +#endif + +#ifndef LZ4_STATIC_LINKING_ONLY +# define LZ4_STATIC_LINKING_ONLY +#endif +#include "lz4.h" +/* see also "memory routines" below */ + + +/*-************************************ +* Compiler Options +**************************************/ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */ +# include /* only present in VS2005+ */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 6237) /* disable: C6237: conditional expression is always 0 */ +# pragma warning(disable : 6239) /* disable: C6239: ( && ) always evaluates to the result of */ +# pragma warning(disable : 6240) /* disable: C6240: ( && ) always evaluates to the result of */ +# pragma warning(disable : 6326) /* disable: C6326: Potential comparison of a constant with another constant */ +#endif /* _MSC_VER */ + +#ifndef LZ4_FORCE_INLINE +# if defined (_MSC_VER) && !defined (__clang__) /* MSVC */ +# define LZ4_FORCE_INLINE static __forceinline +# else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# if defined (__GNUC__) || defined (__clang__) +# define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define LZ4_FORCE_INLINE static inline +# endif +# else +# define LZ4_FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +# endif /* _MSC_VER */ +#endif /* LZ4_FORCE_INLINE */ + +/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE + * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, + * together with a simple 8-byte copy loop as a fall-back path. + * However, this optimization hurts the decompression speed by >30%, + * because the execution does not go to the optimized loop + * for typical compressible data, and all of the preamble checks + * before going to the fall-back path become useless overhead. + * This optimization happens only with the -O3 flag, and -O2 generates + * a simple 8-byte copy loop. + * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8 + * functions are annotated with __attribute__((optimize("O2"))), + * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute + * of LZ4_wildCopy8 does not affect the compression speed. + */ +#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) +# define LZ4_FORCE_O2 __attribute__((optimize("O2"))) +# undef LZ4_FORCE_INLINE +# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) +#else +# define LZ4_FORCE_O2 +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#ifndef likely +#define likely(expr) expect((expr) != 0, 1) +#endif +#ifndef unlikely +#define unlikely(expr) expect((expr) != 0, 0) +#endif + +/* Should the alignment test prove unreliable, for some reason, + * it can be disabled by setting LZ4_ALIGN_TEST to 0 */ +#ifndef LZ4_ALIGN_TEST /* can be externally provided */ +# define LZ4_ALIGN_TEST 1 +#endif + + +/*-************************************ +* Memory routines +**************************************/ + +/*! LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION : + * Disable relatively high-level LZ4/HC functions that use dynamic memory + * allocation functions (malloc(), calloc(), free()). + * + * Note that this is a compile-time switch. And since it disables + * public/stable LZ4 v1 API functions, we don't recommend using this + * symbol to generate a library for distribution. + * + * The following public functions are removed when this symbol is defined. + * - lz4 : LZ4_createStream, LZ4_freeStream, + * LZ4_createStreamDecode, LZ4_freeStreamDecode, LZ4_create (deprecated) + * - lz4hc : LZ4_createStreamHC, LZ4_freeStreamHC, + * LZ4_createHC (deprecated), LZ4_freeHC (deprecated) + * - lz4frame, lz4file : All LZ4F_* functions + */ +#if defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +# define ALLOC(s) lz4_error_memory_allocation_is_disabled +# define ALLOC_AND_ZERO(s) lz4_error_memory_allocation_is_disabled +# define FREEMEM(p) lz4_error_memory_allocation_is_disabled +#elif defined(LZ4_USER_MEMORY_FUNCTIONS) +/* memory management functions can be customized by user project. + * Below functions must exist somewhere in the Project + * and be available at link time */ +void* LZ4_malloc(size_t s); +void* LZ4_calloc(size_t n, size_t s); +void LZ4_free(void* p); +# define ALLOC(s) LZ4_malloc(s) +# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s) +# define FREEMEM(p) LZ4_free(p) +#else +# include /* malloc, calloc, free */ +# define ALLOC(s) malloc(s) +# define ALLOC_AND_ZERO(s) calloc(1,s) +# define FREEMEM(p) free(p) +#endif + +#if ! LZ4_FREESTANDING +# include /* memset, memcpy */ +#endif +#if !defined(LZ4_memset) +# define LZ4_memset(p,v,s) memset((p),(v),(s)) +#endif +#define MEM_INIT(p,v,s) LZ4_memset((p),(v),(s)) + + +/*-************************************ +* Common Constants +**************************************/ +#define MINMATCH 4 + +#define WILDCOPYLENGTH 8 +#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ +#define FASTLOOP_SAFE_DISTANCE 64 +static const int LZ4_minLength = (MFLIMIT+1); + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define LZ4_DISTANCE_ABSOLUTE_MAX 65535 +#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ +# error "LZ4_DISTANCE_MAX is too big : must be <= 65535" +#endif + +#define ML_BITS 4 +#define ML_MASK ((1U<=1) +# include +#else +# ifndef assert +# define assert(condition) ((void)0) +# endif +#endif + +#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */ + +#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) +# include + static int g_debuglog_enable = 1; +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ + fprintf(stderr, __FILE__ " %i: ", __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + +static int LZ4_isAligned(const void* ptr, size_t alignment) +{ + return ((size_t)ptr & (alignment -1)) == 0; +} + + +/*-************************************ +* Types +**************************************/ +#include +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef uintptr_t uptrval; +#else +# if UINT_MAX != 4294967295UL +# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" +# endif + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef size_t uptrval; /* generally true, except OpenVMS-64 */ +#endif + +#if defined(__x86_64__) + typedef U64 reg_t; /* 64-bits in x32 mode */ +#else + typedef size_t reg_t; /* 32-bits in x32 mode */ +#endif + +typedef enum { + notLimited = 0, + limitedOutput = 1, + fillOutput = 2 +} limitedOutput_directive; + + +/*-************************************ +* Reading and writing into memory +**************************************/ + +/** + * LZ4 relies on memcpy with a constant size being inlined. In freestanding + * environments, the compiler can't assume the implementation of memcpy() is + * standard compliant, so it can't apply its specialized memcpy() inlining + * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze + * memcpy() as if it were standard compliant, so it can inline it in freestanding + * environments. This is needed when decompressing the Linux Kernel, for example. + */ +#if !defined(LZ4_memcpy) +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) +# else +# define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) +# endif +#endif + +#if !defined(LZ4_memmove) +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4_memmove __builtin_memmove +# else +# define LZ4_memmove memmove +# endif +#endif + +static unsigned LZ4_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#define LZ4_PACK( __Declaration__ ) __Declaration__ __attribute__((__packed__)) +#elif defined(_MSC_VER) +#define LZ4_PACK( __Declaration__ ) __pragma( pack(push, 1) ) __Declaration__ __pragma( pack(pop)) +#endif + +#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) +/* lie to the compiler about data alignment; use with caution */ + +static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } +static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } +static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; } + +static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } + +#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +LZ4_PACK(typedef struct { U16 u16; }) LZ4_unalign16; +LZ4_PACK(typedef struct { U32 u32; }) LZ4_unalign32; +LZ4_PACK(typedef struct { reg_t uArch; }) LZ4_unalignST; + +static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign16*)ptr)->u16; } +static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign32*)ptr)->u32; } +static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalignST*)ptr)->uArch; } + +static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign16*)memPtr)->u16 = value; } +static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign32*)memPtr)->u32 = value; } + +#else /* safe and portable access using memcpy() */ + +static U16 LZ4_read16(const void* memPtr) +{ + U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U32 LZ4_read32(const void* memPtr) +{ + U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static reg_t LZ4_read_ARCH(const void* memPtr) +{ + reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static void LZ4_write16(void* memPtr, U16 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} + +static void LZ4_write32(void* memPtr, U32 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* LZ4_FORCE_MEMORY_ACCESS */ + + +static U16 LZ4_readLE16(const void* memPtr) +{ + if (LZ4_isLittleEndian()) { + return LZ4_read16(memPtr); + } else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)((U16)p[0] | (p[1]<<8)); + } +} + +#ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT +static U32 LZ4_readLE32(const void* memPtr) +{ + if (LZ4_isLittleEndian()) { + return LZ4_read32(memPtr); + } else { + const BYTE* p = (const BYTE*)memPtr; + return (U32)p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24); + } +} +#endif + +static void LZ4_writeLE16(void* memPtr, U16 value) +{ + if (LZ4_isLittleEndian()) { + LZ4_write16(memPtr, value); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); + } +} + +/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ +LZ4_FORCE_INLINE +void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d= 16. */ +LZ4_FORCE_INLINE void +LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d= dstPtr + MINMATCH + * - there is at least 12 bytes available to write after dstEnd */ +LZ4_FORCE_INLINE void +LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) +{ + BYTE v[8]; + + assert(dstEnd >= dstPtr + MINMATCH); + + switch(offset) { + case 1: + MEM_INIT(v, *srcPtr, 8); + break; + case 2: + LZ4_memcpy(v, srcPtr, 2); + LZ4_memcpy(&v[2], srcPtr, 2); +#if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */ +# pragma warning(push) +# pragma warning(disable : 6385) /* warning C6385: Reading invalid data from 'v'. */ +#endif + LZ4_memcpy(&v[4], v, 4); +#if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */ +# pragma warning(pop) +#endif + break; + case 4: + LZ4_memcpy(v, srcPtr, 4); + LZ4_memcpy(&v[4], srcPtr, 4); + break; + default: + LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); + return; + } + + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + while (dstPtr < dstEnd) { + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + } +} +#endif + + +/*-************************************ +* Common functions +**************************************/ +static unsigned LZ4_NbCommonBytes (reg_t val) +{ + assert(val != 0); + if (LZ4_isLittleEndian()) { + if (sizeof(val) == 8) { +# if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT) +/*-************************************************************************************************* +* ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11. +* The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics +* including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC. +****************************************************************************************************/ +# if defined(__clang__) && (__clang_major__ < 10) + /* Avoid undefined clang-cl intrinsics issue. + * See https://github.com/lz4/lz4/pull/1017 for details. */ + return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3; +# else + /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */ + return (unsigned)_tzcnt_u64(val) >> 3; +# endif +# elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64(&r, (U64)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctzll((U64)val) >> 3; +# else + const U64 m = 0x0101010101010101ULL; + val ^= val - 1; + return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56); +# endif + } else /* 32 bits */ { +# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward(&r, (U32)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctz((U32)val) >> 3; +# else + const U32 m = 0x01010101; + return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24; +# endif + } + } else /* Big Endian CPU */ { + if (sizeof(val)==8) { +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clzll((U64)val) >> 3; +# else +#if 1 + /* this method is probably faster, + * but adds a 128 bytes lookup table */ + static const unsigned char ctz7_tab[128] = { + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + }; + U64 const mask = 0x0101010101010101ULL; + U64 const t = (((val >> 8) - mask) | val) & mask; + return ctz7_tab[(t * 0x0080402010080402ULL) >> 57]; +#else + /* this method doesn't consume memory space like the previous one, + * but it contains several branches, + * that may end up slowing execution */ + static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits. + Just to avoid some static analyzer complaining about shift by 32 on 32-bits target. + Note that this code path is never triggered in 32-bits mode. */ + unsigned r; + if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +#endif +# endif + } else /* 32 bits */ { +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clz((U32)val) >> 3; +# else + val >>= 8; + val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | + (val + 0x00FF0000)) >> 24; + return (unsigned)val ^ 3; +# endif + } + } +} + + +#define STEPSIZE sizeof(reg_t) +LZ4_FORCE_INLINE +unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +{ + const BYTE* const pStart = pIn; + + if (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { + pIn+=STEPSIZE; pMatch+=STEPSIZE; + } else { + return LZ4_NbCommonBytes(diff); + } } + + while (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } + pIn += LZ4_NbCommonBytes(diff); + return (unsigned)(pIn - pStart); + } + + if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn compression run slower on incompressible data */ + + +/*-************************************ +* Local Structures and types +**************************************/ +typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; + +/** + * This enum distinguishes several different modes of accessing previous + * content in the stream. + * + * - noDict : There is no preceding content. + * - withPrefix64k : Table entries up to ctx->dictSize before the current blob + * blob being compressed are valid and refer to the preceding + * content (of length ctx->dictSize), which is available + * contiguously preceding in memory the content currently + * being compressed. + * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere + * else in memory, starting at ctx->dictionary with length + * ctx->dictSize. + * - usingDictCtx : Everything concerning the preceding content is + * in a separate context, pointed to by ctx->dictCtx. + * ctx->dictionary, ctx->dictSize, and table entries + * in the current context that refer to positions + * preceding the beginning of the current compression are + * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx + * ->dictSize describe the location and size of the preceding + * content, and matches are found by looking in the ctx + * ->dictCtx->hashTable. + */ +typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; +typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; + + +/*-************************************ +* Local Utils +**************************************/ +int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } +const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; } +int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } +int LZ4_sizeofState(void) { return sizeof(LZ4_stream_t); } + + +/*-**************************************** +* Internal Definitions, used only in Tests +*******************************************/ +#if defined (__cplusplus) +extern "C" { +#endif + +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize); + +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize); +int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest, + int compressedSize, int targetOutputSize, int dstCapacity, + const void* dictStart, size_t dictSize); +#if defined (__cplusplus) +} +#endif + +/*-****************************** +* Compression functions +********************************/ +LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType) +{ + if (tableType == byU16) + return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); + else + return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); +} + +LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType) +{ + const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; + if (LZ4_isLittleEndian()) { + const U64 prime5bytes = 889523592379ULL; + return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); + } else { + const U64 prime8bytes = 11400714785074694791ULL; + return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); + } +} + +LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType) +{ + if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType); + +#ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT + return LZ4_hash4(LZ4_readLE32(p), tableType); +#else + return LZ4_hash4(LZ4_read32(p), tableType); +#endif +} + +LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType) +{ + switch (tableType) + { + default: /* fallthrough */ + case clearedTable: { /* illegal! */ assert(0); return; } + case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType) +{ + switch (tableType) + { + default: /* fallthrough */ + case clearedTable: /* fallthrough */ + case byPtr: { /* illegal! */ assert(0); return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; } + case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; } + } +} + +/* LZ4_putPosition*() : only used in byPtr mode */ +LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, + void* tableBase, tableType_t const tableType) +{ + const BYTE** const hashTable = (const BYTE**)tableBase; + assert(tableType == byPtr); (void)tableType; + hashTable[h] = p; +} + +LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType) +{ + U32 const h = LZ4_hashPosition(p, tableType); + LZ4_putPositionOnHash(p, h, tableBase, tableType); +} + +/* LZ4_getIndexOnHash() : + * Index of match position registered in hash table. + * hash position must be calculated by using base+index, or dictBase+index. + * Assumption 1 : only valid if tableType == byU32 or byU16. + * Assumption 2 : h is presumed valid (within limits of hash table) + */ +LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType) +{ + LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2); + if (tableType == byU32) { + const U32* const hashTable = (const U32*) tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE-2))); + return hashTable[h]; + } + if (tableType == byU16) { + const U16* const hashTable = (const U16*) tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE-1))); + return hashTable[h]; + } + assert(0); return 0; /* forbidden case */ +} + +static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType) +{ + assert(tableType == byPtr); (void)tableType; + { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; } +} + +LZ4_FORCE_INLINE const BYTE* +LZ4_getPosition(const BYTE* p, + const void* tableBase, tableType_t tableType) +{ + U32 const h = LZ4_hashPosition(p, tableType); + return LZ4_getPositionOnHash(h, tableBase, tableType); +} + +LZ4_FORCE_INLINE void +LZ4_prepareTable(LZ4_stream_t_internal* const cctx, + const int inputSize, + const tableType_t tableType) { + /* If the table hasn't been used, it's guaranteed to be zeroed out, and is + * therefore safe to use no matter what mode we're in. Otherwise, we figure + * out if it's safe to leave as is or whether it needs to be reset. + */ + if ((tableType_t)cctx->tableType != clearedTable) { + assert(inputSize >= 0); + if ((tableType_t)cctx->tableType != tableType + || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU) + || ((tableType == byU32) && cctx->currentOffset > 1 GB) + || tableType == byPtr + || inputSize >= 4 KB) + { + DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx); + MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE); + cctx->currentOffset = 0; + cctx->tableType = (U32)clearedTable; + } else { + DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)"); + } + } + + /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, + * is faster than compressing without a gap. + * However, compressing with currentOffset == 0 is faster still, + * so we preserve that case. + */ + if (cctx->currentOffset != 0 && tableType == byU32) { + DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset"); + cctx->currentOffset += 64 KB; + } + + /* Finally, clear history */ + cctx->dictCtx = NULL; + cctx->dictionary = NULL; + cctx->dictSize = 0; +} + +/** LZ4_compress_generic_validated() : + * inlined, to ensure branches are decided at compilation time. + * The following conditions are presumed already validated: + * - source != NULL + * - inputSize > 0 + */ +LZ4_FORCE_INLINE int LZ4_compress_generic_validated( + LZ4_stream_t_internal* const cctx, + const char* const source, + char* const dest, + const int inputSize, + int* inputConsumed, /* only written when outputDirective == fillOutput */ + const int maxOutputSize, + const limitedOutput_directive outputDirective, + const tableType_t tableType, + const dict_directive dictDirective, + const dictIssue_directive dictIssue, + const int acceleration) +{ + int result; + const BYTE* ip = (const BYTE*)source; + + U32 const startIndex = cctx->currentOffset; + const BYTE* base = (const BYTE*)source - startIndex; + const BYTE* lowLimit; + + const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx; + const BYTE* const dictionary = + dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary; + const U32 dictSize = + dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize; + const U32 dictDelta = + (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0; /* make indexes in dictCtx comparable with indexes in current context */ + + int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx); + U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */ + const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary; + const BYTE* anchor = (const BYTE*) source; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1; + const BYTE* const matchlimit = iend - LASTLITERALS; + + /* the dictCtx currentOffset is indexed on the start of the dictionary, + * while a dictionary in the current context precedes the currentOffset */ + const BYTE* dictBase = (dictionary == NULL) ? NULL : + (dictDirective == usingDictCtx) ? + dictionary + dictSize - dictCtx->currentOffset : + dictionary + dictSize - startIndex; + + BYTE* op = (BYTE*) dest; + BYTE* const olimit = op + maxOutputSize; + + U32 offset = 0; + U32 forwardH; + + DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType); + assert(ip != NULL); + if (tableType == byU16) assert(inputSize= 1); + + lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0); + + /* Update context state */ + if (dictDirective == usingDictCtx) { + /* Subsequent linked blocks can't use the dictionary. */ + /* Instead, they use the block we just compressed. */ + cctx->dictCtx = NULL; + cctx->dictSize = (U32)inputSize; + } else { + cctx->dictSize += (U32)inputSize; + } + cctx->currentOffset += (U32)inputSize; + cctx->tableType = (U32)tableType; + + if (inputSizehashTable, byPtr); + } else { + LZ4_putIndexOnHash(startIndex, h, cctx->hashTable, tableType); + } } + ip++; forwardH = LZ4_hashPosition(ip, tableType); + + /* Main Loop */ + for ( ; ; ) { + const BYTE* match; + BYTE* token; + const BYTE* filledIp; + + /* Find a match */ + if (tableType == byPtr) { + const BYTE* forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; + assert(ip < mflimitPlusOne); + + match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType); + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType); + + } while ( (match+LZ4_DISTANCE_MAX < ip) + || (LZ4_read32(match) != LZ4_read32(ip)) ); + + } else { /* byU32, byU16 */ + + const BYTE* forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + U32 const current = (U32)(forwardIp - base); + U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); + assert(matchIndex <= current); + assert(forwardIp - base < (ptrdiff_t)(2 GB - 1)); + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; + assert(ip < mflimitPlusOne); + + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + assert(tableType == byU32); + matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); + match = dictBase + matchIndex; + matchIndex += dictDelta; /* make dictCtx index comparable with current context */ + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; + } + } else if (dictDirective == usingExtDict) { + if (matchIndex < startIndex) { + DEBUGLOG(7, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex); + assert(startIndex - matchIndex >= MINMATCH); + assert(dictBase); + match = dictBase + matchIndex; + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; + } + } else { /* single continuous memory segment */ + match = base + matchIndex; + } + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + + DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex); + if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */ + assert(matchIndex < current); + if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX)) + && (matchIndex+LZ4_DISTANCE_MAX < current)) { + continue; + } /* too far */ + assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */ + + if (LZ4_read32(match) == LZ4_read32(ip)) { + if (maybe_extMem) offset = current - matchIndex; + break; /* match found */ + } + + } while(1); + } + + /* Catch up */ + filledIp = ip; + assert(ip > anchor); /* this is always true as ip has been advanced before entering the main loop */ + if ((match > lowLimit) && unlikely(ip[-1] == match[-1])) { + do { ip--; match--; } while (((ip > anchor) & (match > lowLimit)) && (unlikely(ip[-1] == match[-1]))); + } + + /* Encode Literals */ + { unsigned const litLength = (unsigned)(ip - anchor); + token = op++; + if ((outputDirective == limitedOutput) && /* Check output buffer overflow */ + (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) { + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + if ((outputDirective == fillOutput) && + (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) { + op--; + goto _last_literals; + } + if (litLength >= RUN_MASK) { + unsigned len = litLength - RUN_MASK; + *token = (RUN_MASK<= 255 ; len-=255) *op++ = 255; + *op++ = (BYTE)len; + } + else *token = (BYTE)(litLength< olimit)) { + /* the match was too close to the end, rewind and go to last literals */ + op = token; + goto _last_literals; + } + + /* Encode Offset */ + if (maybe_extMem) { /* static test */ + DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source)); + assert(offset <= LZ4_DISTANCE_MAX && offset > 0); + LZ4_writeLE16(op, (U16)offset); op+=2; + } else { + DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match)); + assert(ip-match <= LZ4_DISTANCE_MAX); + LZ4_writeLE16(op, (U16)(ip - match)); op+=2; + } + + /* Encode MatchLength */ + { unsigned matchCode; + + if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx) + && (lowLimit==dictionary) /* match within extDict */ ) { + const BYTE* limit = ip + (dictEnd-match); + assert(dictEnd > match); + if (limit > matchlimit) limit = matchlimit; + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit); + ip += (size_t)matchCode + MINMATCH; + if (ip==limit) { + unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit); + matchCode += more; + ip += more; + } + DEBUGLOG(6, " with matchLength=%u starting in extDict", matchCode+MINMATCH); + } else { + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); + ip += (size_t)matchCode + MINMATCH; + DEBUGLOG(6, " with matchLength=%u", matchCode+MINMATCH); + } + + if ((outputDirective) && /* Check output buffer overflow */ + (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) { + if (outputDirective == fillOutput) { + /* Match description too long : reduce it */ + U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255; + ip -= matchCode - newMatchCode; + assert(newMatchCode < matchCode); + matchCode = newMatchCode; + if (unlikely(ip <= filledIp)) { + /* We have already filled up to filledIp so if ip ends up less than filledIp + * we have positions in the hash table beyond the current position. This is + * a problem if we reuse the hash table. So we have to remove these positions + * from the hash table. + */ + const BYTE* ptr; + DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip)); + for (ptr = ip; ptr <= filledIp; ++ptr) { + U32 const h = LZ4_hashPosition(ptr, tableType); + LZ4_clearHash(h, cctx->hashTable, tableType); + } + } + } else { + assert(outputDirective == limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + if (matchCode >= ML_MASK) { + *token += ML_MASK; + matchCode -= ML_MASK; + LZ4_write32(op, 0xFFFFFFFF); + while (matchCode >= 4*255) { + op+=4; + LZ4_write32(op, 0xFFFFFFFF); + matchCode -= 4*255; + } + op += matchCode / 255; + *op++ = (BYTE)(matchCode % 255); + } else + *token += (BYTE)(matchCode); + } + /* Ensure we have enough space for the last literals. */ + assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit)); + + anchor = ip; + + /* Test end of chunk */ + if (ip >= mflimitPlusOne) break; + + /* Fill table */ + { U32 const h = LZ4_hashPosition(ip-2, tableType); + if (tableType == byPtr) { + LZ4_putPositionOnHash(ip-2, h, cctx->hashTable, byPtr); + } else { + U32 const idx = (U32)((ip-2) - base); + LZ4_putIndexOnHash(idx, h, cctx->hashTable, tableType); + } } + + /* Test next position */ + if (tableType == byPtr) { + + match = LZ4_getPosition(ip, cctx->hashTable, tableType); + LZ4_putPosition(ip, cctx->hashTable, tableType); + if ( (match+LZ4_DISTANCE_MAX >= ip) + && (LZ4_read32(match) == LZ4_read32(ip)) ) + { token=op++; *token=0; goto _next_match; } + + } else { /* byU32, byU16 */ + + U32 const h = LZ4_hashPosition(ip, tableType); + U32 const current = (U32)(ip-base); + U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); + assert(matchIndex < current); + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + assert(tableType == byU32); + matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); + match = dictBase + matchIndex; + lowLimit = dictionary; /* required for match length counter */ + matchIndex += dictDelta; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; /* required for match length counter */ + } + } else if (dictDirective==usingExtDict) { + if (matchIndex < startIndex) { + assert(dictBase); + match = dictBase + matchIndex; + lowLimit = dictionary; /* required for match length counter */ + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; /* required for match length counter */ + } + } else { /* single memory segment */ + match = base + matchIndex; + } + LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + assert(matchIndex < current); + if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1) + && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current)) + && (LZ4_read32(match) == LZ4_read32(ip)) ) { + token=op++; + *token=0; + if (maybe_extMem) offset = current - matchIndex; + DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i", + (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source)); + goto _next_match; + } + } + + /* Prepare next loop */ + forwardH = LZ4_hashPosition(++ip, tableType); + + } + +_last_literals: + /* Encode Last Literals */ + { size_t lastRun = (size_t)(iend - anchor); + if ( (outputDirective) && /* Check output buffer overflow */ + (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) { + if (outputDirective == fillOutput) { + /* adapt lastRun to fill 'dst' */ + assert(olimit >= op); + lastRun = (size_t)(olimit-op) - 1/*token*/; + lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/ + } else { + assert(outputDirective == limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun); + if (lastRun >= RUN_MASK) { + size_t accumulator = lastRun - RUN_MASK; + *op++ = RUN_MASK << ML_BITS; + for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRun< 0); + DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result); + return result; +} + +/** LZ4_compress_generic() : + * inlined, to ensure branches are decided at compilation time; + * takes care of src == (NULL, 0) + * and forward the rest to LZ4_compress_generic_validated */ +LZ4_FORCE_INLINE int LZ4_compress_generic( + LZ4_stream_t_internal* const cctx, + const char* const src, + char* const dst, + const int srcSize, + int *inputConsumed, /* only written when outputDirective == fillOutput */ + const int dstCapacity, + const limitedOutput_directive outputDirective, + const tableType_t tableType, + const dict_directive dictDirective, + const dictIssue_directive dictIssue, + const int acceleration) +{ + DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i", + srcSize, dstCapacity); + + if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */ + if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */ + if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */ + DEBUGLOG(5, "Generating an empty block"); + assert(outputDirective == notLimited || dstCapacity >= 1); + assert(dst != NULL); + dst[0] = 0; + if (outputDirective == fillOutput) { + assert (inputConsumed != NULL); + *inputConsumed = 0; + } + return 1; + } + assert(src != NULL); + + return LZ4_compress_generic_validated(cctx, src, dst, srcSize, + inputConsumed, /* only written into if outputDirective == fillOutput */ + dstCapacity, outputDirective, + tableType, dictDirective, dictIssue, acceleration); +} + + +int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +{ + LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse; + assert(ctx != NULL); + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + if (maxOutputSize >= LZ4_compressBound(inputSize)) { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration); + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } +} + +/** + * LZ4_compress_fast_extState_fastReset() : + * A variant of LZ4_compress_fast_extState(). + * + * Using this variant avoids an expensive initialization step. It is only safe + * to call if the state buffer is known to be correctly initialized already + * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of + * "correctly initialized"). + */ +int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration) +{ + LZ4_stream_t_internal* const ctx = &((LZ4_stream_t*)state)->internal_donotuse; + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + assert(ctx != NULL); + + if (dstCapacity >= LZ4_compressBound(srcSize)) { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration); + } else { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration); + } else { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } +} + + +int LZ4_compress_fast(const char* src, char* dest, int srcSize, int dstCapacity, int acceleration) +{ + int result; +#if (LZ4_HEAPMODE) + LZ4_stream_t* const ctxPtr = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctxPtr == NULL) return 0; +#else + LZ4_stream_t ctx; + LZ4_stream_t* const ctxPtr = &ctx; +#endif + result = LZ4_compress_fast_extState(ctxPtr, src, dest, srcSize, dstCapacity, acceleration); + +#if (LZ4_HEAPMODE) + FREEMEM(ctxPtr); +#endif + return result; +} + + +int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity) +{ + return LZ4_compress_fast(src, dst, srcSize, dstCapacity, 1); +} + + +/* Note!: This function leaves the stream in an unclean/broken state! + * It is not safe to subsequently use the same state with a _fastReset() or + * _continue() call without resetting it. */ +static int LZ4_compress_destSize_extState_internal(LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration) +{ + void* const s = LZ4_initStream(state, sizeof (*state)); + assert(s != NULL); (void)s; + + if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */ + return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, acceleration); + } else { + if (*srcSizePtr < LZ4_64Klimit) { + return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, acceleration); + } else { + tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, acceleration); + } } +} + +int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration) +{ + int const r = LZ4_compress_destSize_extState_internal((LZ4_stream_t*)state, src, dst, srcSizePtr, targetDstSize, acceleration); + /* clean the state on exit */ + LZ4_initStream(state, sizeof (LZ4_stream_t)); + return r; +} + + +int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize) +{ +#if (LZ4_HEAPMODE) + LZ4_stream_t* const ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctx == NULL) return 0; +#else + LZ4_stream_t ctxBody; + LZ4_stream_t* const ctx = &ctxBody; +#endif + + int result = LZ4_compress_destSize_extState_internal(ctx, src, dst, srcSizePtr, targetDstSize, 1); + +#if (LZ4_HEAPMODE) + FREEMEM(ctx); +#endif + return result; +} + + + +/*-****************************** +* Streaming functions +********************************/ + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4_stream_t* LZ4_createStream(void) +{ + LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); + LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal)); + DEBUGLOG(4, "LZ4_createStream %p", lz4s); + if (lz4s == NULL) return NULL; + LZ4_initStream(lz4s, sizeof(*lz4s)); + return lz4s; +} +#endif + +static size_t LZ4_stream_t_alignment(void) +{ +#if LZ4_ALIGN_TEST + typedef struct { char c; LZ4_stream_t t; } t_a; + return sizeof(t_a) - sizeof(LZ4_stream_t); +#else + return 1; /* effectively disabled */ +#endif +} + +LZ4_stream_t* LZ4_initStream (void* buffer, size_t size) +{ + DEBUGLOG(5, "LZ4_initStream"); + if (buffer == NULL) { return NULL; } + if (size < sizeof(LZ4_stream_t)) { return NULL; } + if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL; + MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal)); + return (LZ4_stream_t*)buffer; +} + +/* resetStream is now deprecated, + * prefer initStream() which is more general */ +void LZ4_resetStream (LZ4_stream_t* LZ4_stream) +{ + DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream); + MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal)); +} + +void LZ4_resetStream_fast(LZ4_stream_t* ctx) { + LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32); +} + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +int LZ4_freeStream (LZ4_stream_t* LZ4_stream) +{ + if (!LZ4_stream) return 0; /* support free on NULL */ + DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream); + FREEMEM(LZ4_stream); + return (0); +} +#endif + + +typedef enum { _ld_fast, _ld_slow } LoadDict_mode_e; +#define HASH_UNIT sizeof(reg_t) +int LZ4_loadDict_internal(LZ4_stream_t* LZ4_dict, + const char* dictionary, int dictSize, + LoadDict_mode_e _ld) +{ + LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; + const tableType_t tableType = byU32; + const BYTE* p = (const BYTE*)dictionary; + const BYTE* const dictEnd = p + dictSize; + U32 idx32; + + DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict); + + /* It's necessary to reset the context, + * and not just continue it with prepareTable() + * to avoid any risk of generating overflowing matchIndex + * when compressing using this dictionary */ + LZ4_resetStream(LZ4_dict); + + /* We always increment the offset by 64 KB, since, if the dict is longer, + * we truncate it to the last 64k, and if it's shorter, we still want to + * advance by a whole window length so we can provide the guarantee that + * there are only valid offsets in the window, which allows an optimization + * in LZ4_compress_fast_continue() where it uses noDictIssue even when the + * dictionary isn't a full 64k. */ + dict->currentOffset += 64 KB; + + if (dictSize < (int)HASH_UNIT) { + return 0; + } + + if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; + dict->dictionary = p; + dict->dictSize = (U32)(dictEnd - p); + dict->tableType = (U32)tableType; + idx32 = dict->currentOffset - dict->dictSize; + + while (p <= dictEnd-HASH_UNIT) { + U32 const h = LZ4_hashPosition(p, tableType); + /* Note: overwriting => favors positions end of dictionary */ + LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType); + p+=3; idx32+=3; + } + + if (_ld == _ld_slow) { + /* Fill hash table with additional references, to improve compression capability */ + p = dict->dictionary; + idx32 = dict->currentOffset - dict->dictSize; + while (p <= dictEnd-HASH_UNIT) { + U32 const h = LZ4_hashPosition(p, tableType); + U32 const limit = dict->currentOffset - 64 KB; + if (LZ4_getIndexOnHash(h, dict->hashTable, tableType) <= limit) { + /* Note: not overwriting => favors positions beginning of dictionary */ + LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType); + } + p++; idx32++; + } + } + + return (int)dict->dictSize; +} + +int LZ4_loadDict(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) +{ + return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_fast); +} + +int LZ4_loadDictSlow(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) +{ + return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_slow); +} + +void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) +{ + const LZ4_stream_t_internal* dictCtx = (dictionaryStream == NULL) ? NULL : + &(dictionaryStream->internal_donotuse); + + DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)", + workingStream, dictionaryStream, + dictCtx != NULL ? dictCtx->dictSize : 0); + + if (dictCtx != NULL) { + /* If the current offset is zero, we will never look in the + * external dictionary context, since there is no value a table + * entry can take that indicate a miss. In that case, we need + * to bump the offset to something non-zero. + */ + if (workingStream->internal_donotuse.currentOffset == 0) { + workingStream->internal_donotuse.currentOffset = 64 KB; + } + + /* Don't actually attach an empty dictionary. + */ + if (dictCtx->dictSize == 0) { + dictCtx = NULL; + } + } + workingStream->internal_donotuse.dictCtx = dictCtx; +} + + +static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize) +{ + assert(nextSize >= 0); + if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */ + /* rescale hash table */ + U32 const delta = LZ4_dict->currentOffset - 64 KB; + const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; + int i; + DEBUGLOG(4, "LZ4_renormDictT"); + for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0; + else LZ4_dict->hashTable[i] -= delta; + } + LZ4_dict->currentOffset = 64 KB; + if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; + LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; + } +} + + +int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, + const char* source, char* dest, + int inputSize, int maxOutputSize, + int acceleration) +{ + const tableType_t tableType = byU32; + LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse; + const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL; + + DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize); + + LZ4_renormDictT(streamPtr, inputSize); /* fix index overflow */ + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + + /* invalidate tiny dictionaries */ + if ( (streamPtr->dictSize < 4) /* tiny dictionary : not enough for a hash */ + && (dictEnd != source) /* prefix mode */ + && (inputSize > 0) /* tolerance : don't lose history, in case next invocation would use prefix mode */ + && (streamPtr->dictCtx == NULL) /* usingDictCtx */ + ) { + DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary); + /* remove dictionary existence from history, to employ faster prefix mode */ + streamPtr->dictSize = 0; + streamPtr->dictionary = (const BYTE*)source; + dictEnd = source; + } + + /* Check overlapping input/dictionary space */ + { const char* const sourceEnd = source + inputSize; + if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) { + streamPtr->dictSize = (U32)(dictEnd - sourceEnd); + if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; + if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; + streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize; + } + } + + /* prefix mode : source data follows dictionary */ + if (dictEnd == source) { + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) + return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration); + else + return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration); + } + + /* external dictionary mode */ + { int result; + if (streamPtr->dictCtx) { + /* We depend here on the fact that dictCtx'es (produced by + * LZ4_loadDict) guarantee that their tables contain no references + * to offsets between dictCtx->currentOffset - 64 KB and + * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe + * to use noDictIssue even when the dict isn't a full 64 KB. + */ + if (inputSize > 4 KB) { + /* For compressing large blobs, it is faster to pay the setup + * cost to copy the dictionary's tables into the active context, + * so that the compression loop is only looking into one table. + */ + LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr)); + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration); + } + } else { /* small data <= 4 KB */ + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); + } + } + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)inputSize; + return result; + } +} + + +/* Hidden debug function, to force-test external dictionary mode */ +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize) +{ + LZ4_stream_t_internal* const streamPtr = &LZ4_dict->internal_donotuse; + int result; + + LZ4_renormDictT(streamPtr, srcSize); + + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1); + } + + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)srcSize; + + return result; +} + + +/*! LZ4_saveDict() : + * If previously compressed data block is not guaranteed to remain available at its memory location, + * save it into a safer place (char* safeBuffer). + * Note : no need to call LZ4_loadDict() afterwards, dictionary is immediately usable, + * one can therefore call LZ4_compress_fast_continue() right after. + * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error. + */ +int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) +{ + LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; + + DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, safeBuffer); + + if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */ + if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; } + + if (safeBuffer == NULL) assert(dictSize == 0); + if (dictSize > 0) { + const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize; + assert(dict->dictionary); + LZ4_memmove(safeBuffer, previousDictEnd - dictSize, (size_t)dictSize); + } + + dict->dictionary = (const BYTE*)safeBuffer; + dict->dictSize = (U32)dictSize; + + return dictSize; +} + + + +/*-******************************* + * Decompression functions + ********************************/ + +typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; + +#undef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + + +/* variant for decompress_unsafe() + * does not know end of input + * presumes input is well formed + * note : will consume at least one byte */ +static size_t read_long_length_no_check(const BYTE** pp) +{ + size_t b, l = 0; + do { b = **pp; (*pp)++; l += b; } while (b==255); + DEBUGLOG(6, "read_long_length_no_check: +length=%zu using %zu input bytes", l, l/255 + 1) + return l; +} + +/* core decoder variant for LZ4_decompress_fast*() + * for legacy support only : these entry points are deprecated. + * - Presumes input is correctly formed (no defense vs malformed inputs) + * - Does not know input size (presume input buffer is "large enough") + * - Decompress a full block (only) + * @return : nb of bytes read from input. + * Note : this variant is not optimized for speed, just for maintenance. + * the goal is to remove support of decompress_fast*() variants by v2.0 +**/ +LZ4_FORCE_INLINE int +LZ4_decompress_unsafe_generic( + const BYTE* const istart, + BYTE* const ostart, + int decompressedSize, + + size_t prefixSize, + const BYTE* const dictStart, /* only if dict==usingExtDict */ + const size_t dictSize /* note: =0 if dictStart==NULL */ + ) +{ + const BYTE* ip = istart; + BYTE* op = (BYTE*)ostart; + BYTE* const oend = ostart + decompressedSize; + const BYTE* const prefixStart = ostart - prefixSize; + + DEBUGLOG(5, "LZ4_decompress_unsafe_generic"); + if (dictStart == NULL) assert(dictSize == 0); + + while (1) { + /* start new sequence */ + unsigned token = *ip++; + + /* literals */ + { size_t ll = token >> ML_BITS; + if (ll==15) { + /* long literal length */ + ll += read_long_length_no_check(&ip); + } + if ((size_t)(oend-op) < ll) return -1; /* output buffer overflow */ + LZ4_memmove(op, ip, ll); /* support in-place decompression */ + op += ll; + ip += ll; + if ((size_t)(oend-op) < MFLIMIT) { + if (op==oend) break; /* end of block */ + DEBUGLOG(5, "invalid: literals end at distance %zi from end of block", oend-op); + /* incorrect end of block : + * last match must start at least MFLIMIT==12 bytes before end of output block */ + return -1; + } } + + /* match */ + { size_t ml = token & 15; + size_t const offset = LZ4_readLE16(ip); + ip+=2; + + if (ml==15) { + /* long literal length */ + ml += read_long_length_no_check(&ip); + } + ml += MINMATCH; + + if ((size_t)(oend-op) < ml) return -1; /* output buffer overflow */ + + { const BYTE* match = op - offset; + + /* out of range */ + if (offset > (size_t)(op - prefixStart) + dictSize) { + DEBUGLOG(6, "offset out of range"); + return -1; + } + + /* check special case : extDict */ + if (offset > (size_t)(op - prefixStart)) { + /* extDict scenario */ + const BYTE* const dictEnd = dictStart + dictSize; + const BYTE* extMatch = dictEnd - (offset - (size_t)(op-prefixStart)); + size_t const extml = (size_t)(dictEnd - extMatch); + if (extml > ml) { + /* match entirely within extDict */ + LZ4_memmove(op, extMatch, ml); + op += ml; + ml = 0; + } else { + /* match split between extDict & prefix */ + LZ4_memmove(op, extMatch, extml); + op += extml; + ml -= extml; + } + match = prefixStart; + } + + /* match copy - slow variant, supporting overlap copy */ + { size_t u; + for (u=0; u= ipmax before start of loop. Returns initial_error if so. + * @error (output) - error code. Must be set to 0 before call. +**/ +typedef size_t Rvl_t; +static const Rvl_t rvl_error = (Rvl_t)(-1); +LZ4_FORCE_INLINE Rvl_t +read_variable_length(const BYTE** ip, const BYTE* ilimit, + int initial_check) +{ + Rvl_t s, length = 0; + assert(ip != NULL); + assert(*ip != NULL); + assert(ilimit != NULL); + if (initial_check && unlikely((*ip) >= ilimit)) { /* read limit reached */ + return rvl_error; + } + s = **ip; + (*ip)++; + length += s; + if (unlikely((*ip) > ilimit)) { /* read limit reached */ + return rvl_error; + } + /* accumulator overflow detection (32-bit mode only) */ + if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) { + return rvl_error; + } + if (likely(s != 255)) return length; + do { + s = **ip; + (*ip)++; + length += s; + if (unlikely((*ip) > ilimit)) { /* read limit reached */ + return rvl_error; + } + /* accumulator overflow detection (32-bit mode only) */ + if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) { + return rvl_error; + } + } while (s == 255); + + return length; +} + +/*! LZ4_decompress_generic() : + * This generic decompression function covers all use cases. + * It shall be instantiated several times, using different sets of directives. + * Note that it is important for performance that this function really get inlined, + * in order to remove useless branches during compilation optimization. + */ +LZ4_FORCE_INLINE int +LZ4_decompress_generic( + const char* const src, + char* const dst, + int srcSize, + int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ + + earlyEnd_directive partialDecoding, /* full, partial */ + dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ + const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ + const BYTE* const dictStart, /* only if dict==usingExtDict */ + const size_t dictSize /* note : = 0 if noDict */ + ) +{ + if ((src == NULL) || (outputSize < 0)) { return -1; } + + { const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + BYTE* op = (BYTE*) dst; + BYTE* const oend = op + outputSize; + BYTE* cpy; + + const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; + + const int checkOffset = (dictSize < (int)(64 KB)); + + + /* Set up the "end" pointers for the shortcut. */ + const BYTE* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/; + const BYTE* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/; + + const BYTE* match; + size_t offset; + unsigned token; + size_t length; + + + DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); + + /* Special cases */ + assert(lowPrefix <= op); + if (unlikely(outputSize==0)) { + /* Empty output buffer */ + if (partialDecoding) return 0; + return ((srcSize==1) && (*ip==0)) ? 0 : -1; + } + if (unlikely(srcSize==0)) { return -1; } + + /* LZ4_FAST_DEC_LOOP: + * designed for modern OoO performance cpus, + * where copying reliably 32-bytes is preferable to an unpredictable branch. + * note : fast loop may show a regression for some client arm chips. */ +#if LZ4_FAST_DEC_LOOP + if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { + DEBUGLOG(6, "move to safe decode loop"); + goto safe_decode; + } + + /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */ + DEBUGLOG(6, "using fast decode loop"); + while (1) { + /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ + assert(oend - op >= FASTLOOP_SAFE_DISTANCE); + assert(ip < iend); + token = *ip++; + length = token >> ML_BITS; /* literal length */ + DEBUGLOG(7, "blockPos%6u: litLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length); + + /* decode literal length */ + if (length == RUN_MASK) { + size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1); + if (addl == rvl_error) { + DEBUGLOG(6, "error reading long literal length"); + goto _output_error; + } + length += addl; + if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + + /* copy literals */ + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if ((op+length>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } + LZ4_wildCopy32(op, ip, op+length); + ip += length; op += length; + } else if (ip <= iend-(16 + 1/*max lit + offset + nextToken*/)) { + /* We don't need to check oend, since we check it once for each loop below */ + DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); + /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */ + LZ4_memcpy(op, ip, 16); + ip += length; op += length; + } else { + goto safe_literal_copy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + DEBUGLOG(6, "blockPos%6u: offset = %u", (unsigned)(op-(BYTE*)dst), (unsigned)offset); + match = op - offset; + assert(match <= op); /* overflow check */ + + /* get matchlength */ + length = token & ML_MASK; + DEBUGLOG(7, " match length token = %u (len==%u)", (unsigned)length, (unsigned)length+MINMATCH); + + if (length == ML_MASK) { + size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0); + if (addl == rvl_error) { + DEBUGLOG(5, "error reading long match length"); + goto _output_error; + } + length += addl; + length += MINMATCH; + DEBUGLOG(7, " long match length == %u", (unsigned)length); + if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + goto safe_match_copy; + } + } else { + length += MINMATCH; + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + DEBUGLOG(7, "moving to safe_match_copy (ml==%u)", (unsigned)length); + goto safe_match_copy; + } + + /* Fastpath check: skip LZ4_wildCopy32 when true */ + if ((dict == withPrefix64k) || (match >= lowPrefix)) { + if (offset >= 8) { + assert(match >= lowPrefix); + assert(match <= op); + assert(op + 18 <= oend); + + LZ4_memcpy(op, match, 8); + LZ4_memcpy(op+8, match+8, 8); + LZ4_memcpy(op+16, match+16, 2); + op += length; + continue; + } } } + + if ( checkOffset && (unlikely(match + dictSize < lowPrefix)) ) { + DEBUGLOG(5, "Error : pos=%zi, offset=%zi => outside buffers", op-lowPrefix, op-match); + goto _output_error; + } + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + assert(dictEnd != NULL); + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) { + DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); + length = MIN(length, (size_t)(oend-op)); + } else { + DEBUGLOG(6, "end-of-block condition violated") + goto _output_error; + } } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + LZ4_memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) { *op++ = *copyFrom++; } + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + + /* copy match within block */ + cpy = op + length; + + assert((op <= oend) && (oend-op >= 32)); + if (unlikely(offset<16)) { + LZ4_memcpy_using_offset(op, match, cpy, offset); + } else { + LZ4_wildCopy32(op, match, cpy); + } + + op = cpy; /* wildcopy correction */ + } + safe_decode: +#endif + + /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ + DEBUGLOG(6, "using safe decode loop"); + while (1) { + assert(ip < iend); + token = *ip++; + length = token >> ML_BITS; /* literal length */ + DEBUGLOG(7, "blockPos%6u: litLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length); + + /* A two-stage shortcut for the most common case: + * 1) If the literal length is 0..14, and there is enough space, + * enter the shortcut and copy 16 bytes on behalf of the literals + * (in the fast mode, only 8 bytes can be safely copied this way). + * 2) Further if the match length is 4..18, copy 18 bytes in a similar + * manner; but we ensure that there's enough space in the output for + * those 18 bytes earlier, upon entering the shortcut (in other words, + * there is a combined check for both stages). + */ + if ( (length != RUN_MASK) + /* strictly "less than" on input, to re-enter the loop with at least one byte */ + && likely((ip < shortiend) & (op <= shortoend)) ) { + /* Copy the literals */ + LZ4_memcpy(op, ip, 16); + op += length; ip += length; + + /* The second stage: prepare for match copying, decode full info. + * If it doesn't work out, the info won't be wasted. */ + length = token & ML_MASK; /* match length */ + DEBUGLOG(7, "blockPos%6u: matchLength token = %u (len=%u)", (unsigned)(op-(BYTE*)dst), (unsigned)length, (unsigned)length + 4); + offset = LZ4_readLE16(ip); ip += 2; + match = op - offset; + assert(match <= op); /* check overflow */ + + /* Do not deal with overlapping matches. */ + if ( (length != ML_MASK) + && (offset >= 8) + && (dict==withPrefix64k || match >= lowPrefix) ) { + /* Copy the match. */ + LZ4_memcpy(op + 0, match + 0, 8); + LZ4_memcpy(op + 8, match + 8, 8); + LZ4_memcpy(op +16, match +16, 2); + op += length + MINMATCH; + /* Both stages worked, load the next token. */ + continue; + } + + /* The second stage didn't work out, but the info is ready. + * Propel it right to the point of match copying. */ + goto _copy_match; + } + + /* decode literal length */ + if (length == RUN_MASK) { + size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1); + if (addl == rvl_error) { goto _output_error; } + length += addl; + if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + } + +#if LZ4_FAST_DEC_LOOP + safe_literal_copy: +#endif + /* copy literals */ + cpy = op+length; + + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) { + /* We've either hit the input parsing restriction or the output parsing restriction. + * In the normal scenario, decoding a full block, it must be the last sequence, + * otherwise it's an error (invalid input or dimensions). + * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. + */ + if (partialDecoding) { + /* Since we are partial decoding we may be in this block because of the output parsing + * restriction, which is not valid since the output buffer is allowed to be undersized. + */ + DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") + DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); + DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); + DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); + /* Finishing in the middle of a literals segment, + * due to lack of input. + */ + if (ip+length > iend) { + length = (size_t)(iend-ip); + cpy = op + length; + } + /* Finishing in the middle of a literals segment, + * due to lack of output space. + */ + if (cpy > oend) { + cpy = oend; + assert(op<=oend); + length = (size_t)(oend-op); + } + } else { + /* We must be on the last sequence (or invalid) because of the parsing limitations + * so check that we exactly consume the input and don't overrun the output buffer. + */ + if ((ip+length != iend) || (cpy > oend)) { + DEBUGLOG(5, "should have been last run of literals") + DEBUGLOG(5, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend); + DEBUGLOG(5, "or cpy(%p) > (oend-MFLIMIT)(%p)", cpy, oend-MFLIMIT); + DEBUGLOG(5, "after writing %u bytes / %i bytes available", (unsigned)(op-(BYTE*)dst), outputSize); + goto _output_error; + } + } + LZ4_memmove(op, ip, length); /* supports overlapping memory regions, for in-place decompression scenarios */ + ip += length; + op += length; + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { + break; + } + } else { + LZ4_wildCopy8(op, ip, cpy); /* can overwrite up to 8 bytes beyond cpy */ + ip += length; op = cpy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + + /* get matchlength */ + length = token & ML_MASK; + DEBUGLOG(7, "blockPos%6u: matchLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length); + + _copy_match: + if (length == ML_MASK) { + size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0); + if (addl == rvl_error) { goto _output_error; } + length += addl; + if (unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ + } + length += MINMATCH; + +#if LZ4_FAST_DEC_LOOP + safe_match_copy: +#endif + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + assert(dictEnd != NULL); + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) length = MIN(length, (size_t)(oend-op)); + else goto _output_error; /* doesn't respect parsing restriction */ + } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + LZ4_memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) *op++ = *copyFrom++; + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + assert(match >= lowPrefix); + + /* copy match within block */ + cpy = op + length; + + /* partialDecoding : may end anywhere within the block */ + assert(op<=oend); + if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + size_t const mlen = MIN(length, (size_t)(oend-op)); + const BYTE* const matchEnd = match + mlen; + BYTE* const copyEnd = op + mlen; + if (matchEnd > op) { /* overlap copy */ + while (op < copyEnd) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, mlen); + } + op = copyEnd; + if (op == oend) { break; } + continue; + } + + if (unlikely(offset<8)) { + LZ4_write32(op, 0); /* silence msan warning when offset==0 */ + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += inc32table[offset]; + LZ4_memcpy(op+4, match, 4); + match -= dec64table[offset]; + } else { + LZ4_memcpy(op, match, 8); + match += 8; + } + op += 8; + + if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); + if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ + if (op < oCopyLimit) { + LZ4_wildCopy8(op, match, oCopyLimit); + match += oCopyLimit - op; + op = oCopyLimit; + } + while (op < cpy) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, 8); + if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } + } + op = cpy; /* wildcopy correction */ + } + + /* end of decoding */ + DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); + return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ + + /* Overflow error detected */ + _output_error: + return (int) (-(((const char*)ip)-src))-1; + } +} + + +/*===== Instantiate the API decoding functions. =====*/ + +LZ4_FORCE_O2 +int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, + decode_full_block, noDict, + (BYTE*)dest, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, + partial_decode, + noDict, (BYTE*)dst, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_fast(const char* source, char* dest, int originalSize) +{ + DEBUGLOG(5, "LZ4_decompress_fast"); + return LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + 0, NULL, 0); +} + +/*===== Instantiate a few more decoding cases, used more than once. =====*/ + +LZ4_FORCE_O2 /* Exported, an obsolete API function. */ +int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + decode_full_block, withPrefix64k, + (BYTE*)dest - 64 KB, NULL, 0); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_safe_partial_withPrefix64k(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, + partial_decode, withPrefix64k, + (BYTE*)dest - 64 KB, NULL, 0); +} + +/* Another obsolete API function, paired with the previous one. */ +int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) +{ + return LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + 64 KB, NULL, 0); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t prefixSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + decode_full_block, noDict, + (BYTE*)dest-prefixSize, NULL, 0); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_safe_partial_withSmallPrefix(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, + size_t prefixSize) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, + partial_decode, noDict, + (BYTE*)dest-prefixSize, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize) +{ + DEBUGLOG(5, "LZ4_decompress_safe_forceExtDict"); + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + decode_full_block, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest, + int compressedSize, int targetOutputSize, int dstCapacity, + const void* dictStart, size_t dictSize) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, + partial_decode, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, + const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + 0, (const BYTE*)dictStart, dictSize); +} + +/* The "double dictionary" mode, for use with e.g. ring buffers: the first part + * of the dictionary is passed as prefix, and the second via dictStart + dictSize. + * These routines are used only once, in LZ4_decompress_*_continue(). + */ +LZ4_FORCE_INLINE +int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t prefixSize, const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + decode_full_block, usingExtDict, + (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); +} + +/*===== streaming decompression functions =====*/ + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4_streamDecode_t* LZ4_createStreamDecode(void) +{ + LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= sizeof(LZ4_streamDecode_t_internal)); + return (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t)); +} + +int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) +{ + if (LZ4_stream == NULL) { return 0; } /* support free on NULL */ + FREEMEM(LZ4_stream); + return 0; +} +#endif + +/*! LZ4_setStreamDecode() : + * Use this function to instruct where to find the dictionary. + * This function is not necessary if previous data is still available where it was decoded. + * Loading a size of 0 is allowed (same effect as no dictionary). + * @return : 1 if OK, 0 if error + */ +int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + lz4sd->prefixSize = (size_t)dictSize; + if (dictSize) { + assert(dictionary != NULL); + lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize; + } else { + lz4sd->prefixEnd = (const BYTE*) dictionary; + } + lz4sd->externalDict = NULL; + lz4sd->extDictSize = 0; + return 1; +} + +/*! LZ4_decoderRingBufferSize() : + * when setting a ring buffer for streaming decompression (optional scenario), + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * Note : in a ring buffer scenario, + * blocks are presumed decompressed next to each other. + * When not enough space remains for next block (remainingSize < maxBlockSize), + * decoding resumes from beginning of ring buffer. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +int LZ4_decoderRingBufferSize(int maxBlockSize) +{ + if (maxBlockSize < 0) return 0; + if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0; + if (maxBlockSize < 16) maxBlockSize = 16; + return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize); +} + +/* +*_continue() : + These decoding functions allow decompression of multiple blocks in "streaming" mode. + Previously decoded blocks must still be available at the memory position where they were decoded. + If it's not possible, save the relevant part of decoded data into a safe buffer, + and indicate where it stands using LZ4_setStreamDecode() +*/ +LZ4_FORCE_O2 +int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + + if (lz4sd->prefixSize == 0) { + /* The first call, no dictionary yet. */ + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } else if (lz4sd->prefixEnd == (BYTE*)dest) { + /* They're rolling the current segment. */ + if (lz4sd->prefixSize >= 64 KB - 1) + result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); + else if (lz4sd->extDictSize == 0) + result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize); + else + result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += (size_t)result; + lz4sd->prefixEnd += result; + } else { + /* The buffer wraps around, or they're switching to another buffer. */ + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } + + return result; +} + +LZ4_FORCE_O2 int +LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, + const char* source, char* dest, int originalSize) +{ + LZ4_streamDecode_t_internal* const lz4sd = + (assert(LZ4_streamDecode!=NULL), &LZ4_streamDecode->internal_donotuse); + int result; + + DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", originalSize); + assert(originalSize >= 0); + + if (lz4sd->prefixSize == 0) { + DEBUGLOG(5, "first invocation : no prefix nor extDict"); + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_fast(source, dest, originalSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE*)dest + originalSize; + } else if (lz4sd->prefixEnd == (BYTE*)dest) { + DEBUGLOG(5, "continue using existing prefix"); + result = LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + lz4sd->prefixSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += (size_t)originalSize; + lz4sd->prefixEnd += originalSize; + } else { + DEBUGLOG(5, "prefix becomes extDict"); + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_fast_extDict(source, dest, originalSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE*)dest + originalSize; + } + + return result; +} + + +/* +Advanced decoding functions : +*_usingDict() : + These decoding functions work the same as "_continue" ones, + the dictionary must be explicitly provided within parameters +*/ + +int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) +{ + if (dictSize==0) + return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (dictStart+dictSize == dest) { + if (dictSize >= 64 KB - 1) { + return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize); +} + +int LZ4_decompress_safe_partial_usingDict(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, const char* dictStart, int dictSize) +{ + if (dictSize==0) + return LZ4_decompress_safe_partial(source, dest, compressedSize, targetOutputSize, dstCapacity); + if (dictStart+dictSize == dest) { + if (dictSize >= 64 KB - 1) { + return LZ4_decompress_safe_partial_withPrefix64k(source, dest, compressedSize, targetOutputSize, dstCapacity); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_partial_withSmallPrefix(source, dest, compressedSize, targetOutputSize, dstCapacity, (size_t)dictSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_partial_forceExtDict(source, dest, compressedSize, targetOutputSize, dstCapacity, dictStart, (size_t)dictSize); +} + +int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) +{ + if (dictSize==0 || dictStart+dictSize == dest) + return LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + (size_t)dictSize, NULL, 0); + assert(dictSize >= 0); + return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize); +} + + +/*=************************************************* +* Obsolete Functions +***************************************************/ +/* obsolete compression functions */ +int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) +{ + return LZ4_compress_default(source, dest, inputSize, maxOutputSize); +} +int LZ4_compress(const char* src, char* dest, int srcSize) +{ + return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize)); +} +int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) +{ + return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); +} +int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) +{ + return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); +} +int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity) +{ + return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1); +} +int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) +{ + return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); +} + +/* +These decompression functions are deprecated and should no longer be used. +They are only provided here for compatibility with older user programs. +- LZ4_uncompress is totally equivalent to LZ4_decompress_fast +- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe +*/ +int LZ4_uncompress (const char* source, char* dest, int outputSize) +{ + return LZ4_decompress_fast(source, dest, outputSize); +} +int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) +{ + return LZ4_decompress_safe(source, dest, isize, maxOutputSize); +} + +/* Obsolete Streaming functions */ + +int LZ4_sizeofStreamState(void) { return sizeof(LZ4_stream_t); } + +int LZ4_resetStreamState(void* state, char* inputBuffer) +{ + (void)inputBuffer; + LZ4_resetStream((LZ4_stream_t*)state); + return 0; +} + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +void* LZ4_create (char* inputBuffer) +{ + (void)inputBuffer; + return LZ4_createStream(); +} +#endif + +char* LZ4_slideInputBuffer (void* state) +{ + /* avoid const char * -> char * conversion warning */ + return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary; +} + +#endif /* LZ4_COMMONDEFS_ONLY */ diff --git a/lib/lz4/lz4.h b/lib/lz4/lz4.h new file mode 100644 index 000000000000..80e3e5ca04d2 --- /dev/null +++ b/lib/lz4/lz4.h @@ -0,0 +1,884 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Header File + * Copyright (C) 2011-2023, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef LZ4_H_2983827168210 +#define LZ4_H_2983827168210 + +/* --- Dependency --- */ +#include /* size_t */ + + +/** + Introduction + + LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core, + scalable with multi-cores CPU. It features an extremely fast decoder, with speed in + multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. + + The LZ4 compression library provides in-memory compression and decompression functions. + It gives full buffer control to user. + Compression can be done in: + - a single step (described as Simple Functions) + - a single step, reusing a context (described in Advanced Functions) + - unbounded multiple steps (described as Streaming compression) + + lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md). + Decompressing such a compressed block requires additional metadata. + Exact metadata depends on exact decompression function. + For the typical case of LZ4_decompress_safe(), + metadata includes block's compressed size, and maximum bound of decompressed size. + Each application is free to encode and pass such metadata in whichever way it wants. + + lz4.h only handle blocks, it can not generate Frames. + + Blocks are different from Frames (doc/lz4_Frame_format.md). + Frames bundle both blocks and metadata in a specified manner. + Embedding metadata is required for compressed data to be self-contained and portable. + Frame format is delivered through a companion API, declared in lz4frame.h. + The `lz4` CLI can only manage frames. +*/ + +/*^*************************************************************** +* Export parameters +*****************************************************************/ +/* +* LZ4_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +* LZ4LIB_VISIBILITY : +* Control library symbols visibility. +*/ +#ifndef LZ4LIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define LZ4LIB_VISIBILITY +# endif +#endif +#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1) +# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY +#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1) +# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define LZ4LIB_API LZ4LIB_VISIBILITY +#endif + +/*! LZ4_FREESTANDING : + * When this macro is set to 1, it enables "freestanding mode" that is + * suitable for typical freestanding environment which doesn't support + * standard C library. + * + * - LZ4_FREESTANDING is a compile-time switch. + * - It requires the following macros to be defined: + * LZ4_memcpy, LZ4_memmove, LZ4_memset. + * - It only enables LZ4/HC functions which don't use heap. + * All LZ4F_* functions are not supported. + * - See tests/freestanding.c to check its basic setup. + */ +#if defined(LZ4_FREESTANDING) && (LZ4_FREESTANDING == 1) +# define LZ4_HEAPMODE 0 +# define LZ4HC_HEAPMODE 0 +# define LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION 1 +# if !defined(LZ4_memcpy) +# error "LZ4_FREESTANDING requires macro 'LZ4_memcpy'." +# endif +# if !defined(LZ4_memset) +# error "LZ4_FREESTANDING requires macro 'LZ4_memset'." +# endif +# if !defined(LZ4_memmove) +# error "LZ4_FREESTANDING requires macro 'LZ4_memmove'." +# endif +#elif ! defined(LZ4_FREESTANDING) +# define LZ4_FREESTANDING 0 +#endif + + +/*------ Version ------*/ +#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ +#define LZ4_VERSION_MINOR 10 /* for new (non-breaking) interface capabilities */ +#define LZ4_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */ + +#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) + +#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE +#define LZ4_QUOTE(str) #str +#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str) +#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) /* requires v1.7.3+ */ + +LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version; requires v1.3.0+ */ +LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version; requires v1.7.5+ */ + + +/*-************************************ +* Tuning memory usage +**************************************/ +/*! + * LZ4_MEMORY_USAGE : + * Can be selected at compile time, by setting LZ4_MEMORY_USAGE. + * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB) + * Increasing memory usage improves compression ratio, generally at the cost of speed. + * Reduced memory usage may improve speed at the cost of ratio, thanks to better cache locality. + * Default value is 14, for 16KB, which nicely fits into most L1 caches. + */ +#ifndef LZ4_MEMORY_USAGE +# define LZ4_MEMORY_USAGE LZ4_MEMORY_USAGE_DEFAULT +#endif + +/* These are absolute limits, they should not be changed by users */ +#define LZ4_MEMORY_USAGE_MIN 10 +#define LZ4_MEMORY_USAGE_DEFAULT 14 +#define LZ4_MEMORY_USAGE_MAX 20 + +#if (LZ4_MEMORY_USAGE < LZ4_MEMORY_USAGE_MIN) +# error "LZ4_MEMORY_USAGE is too small !" +#endif + +#if (LZ4_MEMORY_USAGE > LZ4_MEMORY_USAGE_MAX) +# error "LZ4_MEMORY_USAGE is too large !" +#endif + +/*-************************************ +* Simple Functions +**************************************/ +/*! LZ4_compress_default() : + * Compresses 'srcSize' bytes from buffer 'src' + * into already allocated 'dst' buffer of size 'dstCapacity'. + * Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize). + * It also runs faster, so it's a recommended setting. + * If the function cannot compress 'src' into a more limited 'dst' budget, + * compression stops *immediately*, and the function result is zero. + * In which case, 'dst' content is undefined (invalid). + * srcSize : max supported value is LZ4_MAX_INPUT_SIZE. + * dstCapacity : size of buffer 'dst' (which must be already allocated) + * @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity) + * or 0 if compression fails + * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer). + */ +LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity); + +/*! LZ4_decompress_safe() : + * @compressedSize : is the exact complete size of the compressed block. + * @dstCapacity : is the size of destination buffer (which must be already allocated), + * presumed an upper bound of decompressed size. + * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity) + * If destination buffer is not large enough, decoding will stop and output an error code (negative value). + * If the source stream is detected malformed, the function will stop decoding and return a negative result. + * Note 1 : This function is protected against malicious data packets : + * it will never writes outside 'dst' buffer, nor read outside 'source' buffer, + * even if the compressed block is maliciously modified to order the decoder to do these actions. + * In such case, the decoder stops immediately, and considers the compressed block malformed. + * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them. + * The implementation is free to send / store / derive this information in whichever way is most beneficial. + * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead. + */ +LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity); + + +/*-************************************ +* Advanced Functions +**************************************/ +#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ +#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) + +/*! LZ4_compressBound() : + Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) + This function is primarily useful for memory allocation purposes (destination buffer size). + Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example). + Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize) + inputSize : max supported value is LZ4_MAX_INPUT_SIZE + return : maximum output size in a "worst case" scenario + or 0, if input size is incorrect (too large or negative) +*/ +LZ4LIB_API int LZ4_compressBound(int inputSize); + +/*! LZ4_compress_fast() : + Same as LZ4_compress_default(), but allows selection of "acceleration" factor. + The larger the acceleration value, the faster the algorithm, but also the lesser the compression. + It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. + An acceleration value of "1" is the same as regular LZ4_compress_default() + Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c). + Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c). +*/ +LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + + +/*! LZ4_compress_fast_extState() : + * Same as LZ4_compress_fast(), using an externally allocated memory space for its state. + * Use LZ4_sizeofState() to know how much memory must be allocated, + * and allocate it on 8-bytes boundaries (using `malloc()` typically). + * Then, provide this buffer as `void* state` to compression function. + */ +LZ4LIB_API int LZ4_sizeofState(void); +LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_compress_destSize() : + * Reverse the logic : compresses as much data as possible from 'src' buffer + * into already allocated buffer 'dst', of size >= 'dstCapacity'. + * This function either compresses the entire 'src' content into 'dst' if it's large enough, + * or fill 'dst' buffer completely with as much data as possible from 'src'. + * note: acceleration parameter is fixed to "default". + * + * *srcSizePtr : in+out parameter. Initially contains size of input. + * Will be modified to indicate how many bytes where read from 'src' to fill 'dst'. + * New value is necessarily <= input value. + * @return : Nb bytes written into 'dst' (necessarily <= dstCapacity) + * or 0 if compression fails. + * + * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed in v1.9.2+): + * the produced compressed content could, in specific circumstances, + * require to be decompressed into a destination buffer larger + * by at least 1 byte than the content to decompress. + * If an application uses `LZ4_compress_destSize()`, + * it's highly recommended to update liblz4 to v1.9.2 or better. + * If this can't be done or ensured, + * the receiving decompression function should provide + * a dstCapacity which is > decompressedSize, by at least 1 byte. + * See https://github.com/lz4/lz4/issues/859 for details + */ +LZ4LIB_API int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize); + +/*! LZ4_decompress_safe_partial() : + * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', + * into destination buffer 'dst' of size 'dstCapacity'. + * Up to 'targetOutputSize' bytes will be decoded. + * The function stops decoding on reaching this objective. + * This can be useful to boost performance + * whenever only the beginning of a block is required. + * + * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize) + * If source stream is detected malformed, function returns a negative result. + * + * Note 1 : @return can be < targetOutputSize, if compressed block contains less data. + * + * Note 2 : targetOutputSize must be <= dstCapacity + * + * Note 3 : this function effectively stops decoding on reaching targetOutputSize, + * so dstCapacity is kind of redundant. + * This is because in older versions of this function, + * decoding operation would still write complete sequences. + * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize, + * it could write more bytes, though only up to dstCapacity. + * Some "margin" used to be required for this operation to work properly. + * Thankfully, this is no longer necessary. + * The function nonetheless keeps the same signature, in an effort to preserve API compatibility. + * + * Note 4 : If srcSize is the exact size of the block, + * then targetOutputSize can be any value, + * including larger than the block's decompressed size. + * The function will, at most, generate block's decompressed size. + * + * Note 5 : If srcSize is _larger_ than block's compressed size, + * then targetOutputSize **MUST** be <= block's decompressed size. + * Otherwise, *silent corruption will occur*. + */ +LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); + + +/*-********************************************* +* Streaming Compression Functions +***********************************************/ +typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */ + +/*! + Note about RC_INVOKED + + - RC_INVOKED is predefined symbol of rc.exe (the resource compiler which is part of MSVC/Visual Studio). + https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros + + - Since rc.exe is a legacy compiler, it truncates long symbol (> 30 chars) + and reports warning "RC4011: identifier truncated". + + - To eliminate the warning, we surround long preprocessor symbol with + "#if !defined(RC_INVOKED) ... #endif" block that means + "skip this block when rc.exe is trying to read it". +*/ +#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */ +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4LIB_API LZ4_stream_t* LZ4_createStream(void); +LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr); +#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */ +#endif + +/*! LZ4_resetStream_fast() : v1.9.0+ + * Use this to prepare an LZ4_stream_t for a new chain of dependent blocks + * (e.g., LZ4_compress_fast_continue()). + * + * An LZ4_stream_t must be initialized once before usage. + * This is automatically done when created by LZ4_createStream(). + * However, should the LZ4_stream_t be simply declared on stack (for example), + * it's necessary to initialize it first, using LZ4_initStream(). + * + * After init, start any new stream with LZ4_resetStream_fast(). + * A same LZ4_stream_t can be re-used multiple times consecutively + * and compress multiple streams, + * provided that it starts each new stream with LZ4_resetStream_fast(). + * + * LZ4_resetStream_fast() is much faster than LZ4_initStream(), + * but is not compatible with memory regions containing garbage data. + * + * Note: it's only useful to call LZ4_resetStream_fast() + * in the context of streaming compression. + * The *extState* functions perform their own resets. + * Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive. + */ +LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr); + +/*! LZ4_loadDict() : + * Use this function to reference a static dictionary into LZ4_stream_t. + * The dictionary must remain available during compression. + * LZ4_loadDict() triggers a reset, so any previous data will be forgotten. + * The same dictionary will have to be loaded on decompression side for successful decoding. + * Dictionary are useful for better compression of small data (KB range). + * While LZ4 itself accepts any input as dictionary, dictionary efficiency is also a topic. + * When in doubt, employ the Zstandard's Dictionary Builder. + * Loading a size of 0 is allowed, and is the same as reset. + * @return : loaded dictionary size, in bytes (note: only the last 64 KB are loaded) + */ +LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); + +/*! LZ4_loadDictSlow() : v1.10.0+ + * Same as LZ4_loadDict(), + * but uses a bit more cpu to reference the dictionary content more thoroughly. + * This is expected to slightly improve compression ratio. + * The extra-cpu cost is likely worth it if the dictionary is re-used across multiple sessions. + * @return : loaded dictionary size, in bytes (note: only the last 64 KB are loaded) + */ +LZ4LIB_API int LZ4_loadDictSlow(LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); + +/*! LZ4_attach_dictionary() : stable since v1.10.0 + * + * This allows efficient re-use of a static dictionary multiple times. + * + * Rather than re-loading the dictionary buffer into a working context before + * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a + * working LZ4_stream_t, this function introduces a no-copy setup mechanism, + * in which the working stream references @dictionaryStream in-place. + * + * Several assumptions are made about the state of @dictionaryStream. + * Currently, only states which have been prepared by LZ4_loadDict() or + * LZ4_loadDictSlow() should be expected to work. + * + * Alternatively, the provided @dictionaryStream may be NULL, + * in which case any existing dictionary stream is unset. + * + * If a dictionary is provided, it replaces any pre-existing stream history. + * The dictionary contents are the only history that can be referenced and + * logically immediately precede the data compressed in the first subsequent + * compression call. + * + * The dictionary will only remain attached to the working stream through the + * first compression call, at the end of which it is cleared. + * @dictionaryStream stream (and source buffer) must remain in-place / accessible / unchanged + * through the completion of the compression session. + * + * Note: there is no equivalent LZ4_attach_*() method on the decompression side + * because there is no initialization cost, hence no need to share the cost across multiple sessions. + * To decompress LZ4 blocks using dictionary, attached or not, + * just employ the regular LZ4_setStreamDecode() for streaming, + * or the stateless LZ4_decompress_safe_usingDict() for one-shot decompression. + */ +LZ4LIB_API void +LZ4_attach_dictionary(LZ4_stream_t* workingStream, + const LZ4_stream_t* dictionaryStream); + +/*! LZ4_compress_fast_continue() : + * Compress 'src' content using data from previously compressed blocks, for better compression ratio. + * 'dst' buffer must be already allocated. + * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. + * + * @return : size of compressed block + * or 0 if there is an error (typically, cannot fit into 'dst'). + * + * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block. + * Each block has precise boundaries. + * Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata. + * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together. + * + * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory ! + * + * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB. + * Make sure that buffers are separated, by at least one byte. + * This construction ensures that each block only depends on previous block. + * + * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. + * + * Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed. + */ +LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_saveDict() : + * If last 64KB data cannot be guaranteed to remain available at its current memory location, + * save it into a safer place (char* safeBuffer). + * This is schematically equivalent to a memcpy() followed by LZ4_loadDict(), + * but is much faster, because LZ4_saveDict() doesn't need to rebuild tables. + * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error. + */ +LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize); + + +/*-********************************************** +* Streaming Decompression Functions +* Bufferless synchronous API +************************************************/ +typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */ + +/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() : + * creation / destruction of streaming decompression tracking context. + * A tracking context can be re-used multiple times. + */ +#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */ +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void); +LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); +#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */ +#endif + +/*! LZ4_setStreamDecode() : + * An LZ4_streamDecode_t context can be allocated once and re-used multiple times. + * Use this function to start decompression of a new stream of blocks. + * A dictionary can optionally be set. Use NULL or size 0 for a reset order. + * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. + * @return : 1 if OK, 0 if error + */ +LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); + +/*! LZ4_decoderRingBufferSize() : v1.8.2+ + * Note : in a ring buffer scenario (optional), + * blocks are presumed decompressed next to each other + * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize), + * at which stage it resumes from beginning of ring buffer. + * When setting such a ring buffer for streaming decompression, + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize); +#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */ + +/*! LZ4_decompress_safe_continue() : + * This decoding function allows decompression of consecutive blocks in "streaming" mode. + * The difference with the usual independent blocks is that + * new blocks are allowed to find references into former blocks. + * A block is an unsplittable entity, and must be presented entirely to the decompression function. + * LZ4_decompress_safe_continue() only accepts one block at a time. + * It's modeled after `LZ4_decompress_safe()` and behaves similarly. + * + * @LZ4_streamDecode : decompression state, tracking the position in memory of past data + * @compressedSize : exact complete size of one compressed block. + * @dstCapacity : size of destination buffer (which must be already allocated), + * must be an upper bound of decompressed size. + * @return : number of bytes decompressed into destination buffer (necessarily <= dstCapacity) + * If destination buffer is not large enough, decoding will stop and output an error code (negative value). + * If the source stream is detected malformed, the function will stop decoding and return a negative result. + * + * The last 64KB of previously decoded data *must* remain available and unmodified + * at the memory position where they were previously decoded. + * If less than 64KB of data has been decoded, all the data must be present. + * + * Special : if decompression side sets a ring buffer, it must respect one of the following conditions : + * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize). + * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes. + * In which case, encoding and decoding buffers do not need to be synchronized. + * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize. + * - Synchronized mode : + * Decompression buffer size is _exactly_ the same as compression buffer size, + * and follows exactly same update rule (block boundaries at same positions), + * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream), + * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB). + * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * In which case, encoding and decoding buffers do not need to be synchronized, + * and encoding ring buffer can have any size, including small ones ( < 64 KB). + * + * Whenever these conditions are not possible, + * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression, + * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block. +*/ +LZ4LIB_API int +LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, + const char* src, char* dst, + int srcSize, int dstCapacity); + + +/*! LZ4_decompress_safe_usingDict() : + * Works the same as + * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_safe_continue() + * However, it's stateless: it doesn't need any LZ4_streamDecode_t state. + * Dictionary is presumed stable : it must remain accessible and unmodified during decompression. + * Performance tip : Decompression speed can be substantially increased + * when dst == dictStart + dictSize. + */ +LZ4LIB_API int +LZ4_decompress_safe_usingDict(const char* src, char* dst, + int srcSize, int dstCapacity, + const char* dictStart, int dictSize); + +/*! LZ4_decompress_safe_partial_usingDict() : + * Behaves the same as LZ4_decompress_safe_partial() + * with the added ability to specify a memory segment for past data. + * Performance tip : Decompression speed can be substantially increased + * when dst == dictStart + dictSize. + */ +LZ4LIB_API int +LZ4_decompress_safe_partial_usingDict(const char* src, char* dst, + int compressedSize, + int targetOutputSize, int maxOutputSize, + const char* dictStart, int dictSize); + +#endif /* LZ4_H_2983827168210 */ + + +/*^************************************* + * !!!!!! STATIC LINKING ONLY !!!!!! + ***************************************/ + +/*-**************************************************************************** + * Experimental section + * + * Symbols declared in this section must be considered unstable. Their + * signatures or semantics may change, or they may be removed altogether in the + * future. They are therefore only safe to depend on when the caller is + * statically linked against the library. + * + * To protect against unsafe usage, not only are the declarations guarded, + * the definitions are hidden by default + * when building LZ4 as a shared/dynamic library. + * + * In order to access these declarations, + * define LZ4_STATIC_LINKING_ONLY in your application + * before including LZ4's headers. + * + * In order to make their implementations accessible dynamically, you must + * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library. + ******************************************************************************/ + +#ifdef LZ4_STATIC_LINKING_ONLY + +#ifndef LZ4_STATIC_3504398509 +#define LZ4_STATIC_3504398509 + +#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS +# define LZ4LIB_STATIC_API LZ4LIB_API +#else +# define LZ4LIB_STATIC_API +#endif + + +/*! LZ4_compress_fast_extState_fastReset() : + * A variant of LZ4_compress_fast_extState(). + * + * Using this variant avoids an expensive initialization step. + * It is only safe to call if the state buffer is known to be correctly initialized already + * (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized"). + * From a high level, the difference is that + * this function initializes the provided state with a call to something like LZ4_resetStream_fast() + * while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream(). + */ +LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_compress_destSize_extState() : introduced in v1.10.0 + * Same as LZ4_compress_destSize(), but using an externally allocated state. + * Also: exposes @acceleration + */ +int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration); + +/*! In-place compression and decompression + * + * It's possible to have input and output sharing the same buffer, + * for highly constrained memory environments. + * In both cases, it requires input to lay at the end of the buffer, + * and decompression to start at beginning of the buffer. + * Buffer size must feature some margin, hence be larger than final size. + * + * |<------------------------buffer--------------------------------->| + * |<-----------compressed data--------->| + * |<-----------decompressed size------------------>| + * |<----margin---->| + * + * This technique is more useful for decompression, + * since decompressed size is typically larger, + * and margin is short. + * + * In-place decompression will work inside any buffer + * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize). + * This presumes that decompressedSize > compressedSize. + * Otherwise, it means compression actually expanded data, + * and it would be more efficient to store such data with a flag indicating it's not compressed. + * This can happen when data is not compressible (already compressed, or encrypted). + * + * For in-place compression, margin is larger, as it must be able to cope with both + * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX, + * and data expansion, which can happen when input is not compressible. + * As a consequence, buffer size requirements are much higher, + * and memory savings offered by in-place compression are more limited. + * + * There are ways to limit this cost for compression : + * - Reduce history size, by modifying LZ4_DISTANCE_MAX. + * Note that it is a compile-time constant, so all compressions will apply this limit. + * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX, + * so it's a reasonable trick when inputs are known to be small. + * - Require the compressor to deliver a "maximum compressed size". + * This is the `dstCapacity` parameter in `LZ4_compress*()`. + * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail, + * in which case, the return code will be 0 (zero). + * The caller must be ready for these cases to happen, + * and typically design a backup scheme to send data uncompressed. + * The combination of both techniques can significantly reduce + * the amount of margin required for in-place compression. + * + * In-place compression can work in any buffer + * which size is >= (maxCompressedSize) + * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success. + * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX, + * so it's possible to reduce memory requirements by playing with them. + */ + +#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32) +#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */ + +#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ +# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ +#endif + +#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */ +#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */ + +#endif /* LZ4_STATIC_3504398509 */ +#endif /* LZ4_STATIC_LINKING_ONLY */ + + + +#ifndef LZ4_H_98237428734687 +#define LZ4_H_98237428734687 + +/*-************************************************************ + * Private Definitions + ************************************************************** + * Do not use these definitions directly. + * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. + * Accessing members will expose user code to API and/or ABI break in future versions of the library. + **************************************************************/ +#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) +#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) +#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ + +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef int8_t LZ4_i8; + typedef uint8_t LZ4_byte; + typedef uint16_t LZ4_u16; + typedef uint32_t LZ4_u32; +#else + typedef signed char LZ4_i8; + typedef unsigned char LZ4_byte; + typedef unsigned short LZ4_u16; + typedef unsigned int LZ4_u32; +#endif + +/*! LZ4_stream_t : + * Never ever use below internal definitions directly ! + * These definitions are not API/ABI safe, and may change in future versions. + * If you need static allocation, declare or allocate an LZ4_stream_t object. +**/ + +typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; +struct LZ4_stream_t_internal { + LZ4_u32 hashTable[LZ4_HASH_SIZE_U32]; + const LZ4_byte* dictionary; + const LZ4_stream_t_internal* dictCtx; + LZ4_u32 currentOffset; + LZ4_u32 tableType; + LZ4_u32 dictSize; + /* Implicit padding to ensure structure is aligned */ +}; + +#define LZ4_STREAM_MINSIZE ((1UL << (LZ4_MEMORY_USAGE)) + 32) /* static size, for inter-version compatibility */ +union LZ4_stream_u { + char minStateSize[LZ4_STREAM_MINSIZE]; + LZ4_stream_t_internal internal_donotuse; +}; /* previously typedef'd to LZ4_stream_t */ + + +/*! LZ4_initStream() : v1.9.0+ + * An LZ4_stream_t structure must be initialized at least once. + * This is automatically done when invoking LZ4_createStream(), + * but it's not when the structure is simply declared on stack (for example). + * + * Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t. + * It can also initialize any arbitrary buffer of sufficient size, + * and will @return a pointer of proper type upon initialization. + * + * Note : initialization fails if size and alignment conditions are not respected. + * In which case, the function will @return NULL. + * Note2: An LZ4_stream_t structure guarantees correct alignment and size. + * Note3: Before v1.9.0, use LZ4_resetStream() instead +**/ +LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* stateBuffer, size_t size); + + +/*! LZ4_streamDecode_t : + * Never ever use below internal definitions directly ! + * These definitions are not API/ABI safe, and may change in future versions. + * If you need static allocation, declare or allocate an LZ4_streamDecode_t object. +**/ +typedef struct { + const LZ4_byte* externalDict; + const LZ4_byte* prefixEnd; + size_t extDictSize; + size_t prefixSize; +} LZ4_streamDecode_t_internal; + +#define LZ4_STREAMDECODE_MINSIZE 32 +union LZ4_streamDecode_u { + char minStateSize[LZ4_STREAMDECODE_MINSIZE]; + LZ4_streamDecode_t_internal internal_donotuse; +} ; /* previously typedef'd to LZ4_streamDecode_t */ + + + +/*-************************************ +* Obsolete Functions +**************************************/ + +/*! Deprecation warnings + * + * Deprecated functions make the compiler generate a warning when invoked. + * This is meant to invite users to update their source code. + * Should deprecation warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc + * or _CRT_SECURE_NO_WARNINGS in Visual. + * + * Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS + * before including the header file. + */ +#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS +# define LZ4_DEPRECATED(message) /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define LZ4_DEPRECATED(message) [[deprecated(message)]] +# elif defined(_MSC_VER) +# define LZ4_DEPRECATED(message) __declspec(deprecated(message)) +# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45)) +# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31) +# define LZ4_DEPRECATED(message) __attribute__((deprecated)) +# else +# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler") +# define LZ4_DEPRECATED(message) /* disabled */ +# endif +#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */ + +/*! Obsolete compression functions (since v1.7.3) */ +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize); +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); + +/*! Obsolete decompression functions (since v1.8.0) */ +LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize); +LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); + +/* Obsolete streaming functions (since v1.7.0) + * degraded functionality; do not use! + * + * In order to perform streaming compression, these functions depended on data + * that is no longer tracked in the state. They have been preserved as well as + * possible: using them will still produce a correct output. However, they don't + * actually retain any history between compression calls. The compression ratio + * achieved will therefore be no better than compressing each chunk + * independently. + */ +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void); +LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state); + +/*! Obsolete streaming decoding functions (since v1.7.0) */ +LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); +LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); + +/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) : + * These functions used to be faster than LZ4_decompress_safe(), + * but this is no longer the case. They are now slower. + * This is because LZ4_decompress_fast() doesn't know the input size, + * and therefore must progress more cautiously into the input buffer to not read beyond the end of block. + * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability. + * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated. + * + * The last remaining LZ4_decompress_fast() specificity is that + * it can decompress a block without knowing its compressed size. + * Such functionality can be achieved in a more secure manner + * by employing LZ4_decompress_safe_partial(). + * + * Parameters: + * originalSize : is the uncompressed size to regenerate. + * `dst` must be already allocated, its size must be >= 'originalSize' bytes. + * @return : number of bytes read from source buffer (== compressed size). + * The function expects to finish at block's end exactly. + * If the source stream is detected malformed, the function stops decoding and returns a negative result. + * note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer. + * However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds. + * Also, since match offsets are not validated, match reads from 'src' may underflow too. + * These issues never happen if input (compressed) data is correct. + * But they may happen if input data is invalid (error or intentional tampering). + * As a consequence, use these functions in trusted environments with trusted data **only**. + */ +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_partial() instead") +LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider migrating towards LZ4_decompress_safe_continue() instead. " + "Note that the contract will change (requires block's compressed size, instead of decompressed size)") +LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_partial_usingDict() instead") +LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); + +/*! LZ4_resetStream() : + * An LZ4_stream_t structure must be initialized at least once. + * This is done with LZ4_initStream(), or LZ4_resetStream(). + * Consider switching to LZ4_initStream(), + * invoking LZ4_resetStream() will trigger deprecation warnings in the future. + */ +LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr); + + +#endif /* LZ4_H_98237428734687 */ + + +#if defined (__cplusplus) +} +#endif diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c deleted file mode 100644 index 90bb67994688..000000000000 --- a/lib/lz4/lz4_compress.c +++ /dev/null @@ -1,940 +0,0 @@ -/* - * LZ4 - Fast LZ compression algorithm - * Copyright (C) 2011 - 2016, Yann Collet. - * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php) - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * You can contact the author at : - * - LZ4 homepage : http://www.lz4.org - * - LZ4 source repository : https://github.com/lz4/lz4 - * - * Changed for kernel usage by: - * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> - */ - -/*-************************************ - * Dependencies - **************************************/ -#include -#include "lz4defs.h" -#include -#include -#include - -static const int LZ4_minLength = (MFLIMIT + 1); -static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT - 1)); - -/*-****************************** - * Compression functions - ********************************/ -static FORCE_INLINE U32 LZ4_hash4( - U32 sequence, - tableType_t const tableType) -{ - if (tableType == byU16) - return ((sequence * 2654435761U) - >> ((MINMATCH * 8) - (LZ4_HASHLOG + 1))); - else - return ((sequence * 2654435761U) - >> ((MINMATCH * 8) - LZ4_HASHLOG)); -} - -static FORCE_INLINE U32 LZ4_hash5( - U64 sequence, - tableType_t const tableType) -{ - const U32 hashLog = (tableType == byU16) - ? LZ4_HASHLOG + 1 - : LZ4_HASHLOG; - -#if LZ4_LITTLE_ENDIAN - static const U64 prime5bytes = 889523592379ULL; - - return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); -#else - static const U64 prime8bytes = 11400714785074694791ULL; - - return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); -#endif -} - -static FORCE_INLINE U32 LZ4_hashPosition( - const void *p, - tableType_t const tableType) -{ -#if LZ4_ARCH64 - if (tableType == byU32) - return LZ4_hash5(LZ4_read_ARCH(p), tableType); -#endif - - return LZ4_hash4(LZ4_read32(p), tableType); -} - -static void LZ4_putPositionOnHash( - const BYTE *p, - U32 h, - void *tableBase, - tableType_t const tableType, - const BYTE *srcBase) -{ - switch (tableType) { - case byPtr: - { - const BYTE **hashTable = (const BYTE **)tableBase; - - hashTable[h] = p; - return; - } - case byU32: - { - U32 *hashTable = (U32 *) tableBase; - - hashTable[h] = (U32)(p - srcBase); - return; - } - case byU16: - { - U16 *hashTable = (U16 *) tableBase; - - hashTable[h] = (U16)(p - srcBase); - return; - } - } -} - -static FORCE_INLINE void LZ4_putPosition( - const BYTE *p, - void *tableBase, - tableType_t tableType, - const BYTE *srcBase) -{ - U32 const h = LZ4_hashPosition(p, tableType); - - LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); -} - -static const BYTE *LZ4_getPositionOnHash( - U32 h, - void *tableBase, - tableType_t tableType, - const BYTE *srcBase) -{ - if (tableType == byPtr) { - const BYTE **hashTable = (const BYTE **) tableBase; - - return hashTable[h]; - } - - if (tableType == byU32) { - const U32 * const hashTable = (U32 *) tableBase; - - return hashTable[h] + srcBase; - } - - { - /* default, to ensure a return */ - const U16 * const hashTable = (U16 *) tableBase; - - return hashTable[h] + srcBase; - } -} - -static FORCE_INLINE const BYTE *LZ4_getPosition( - const BYTE *p, - void *tableBase, - tableType_t tableType, - const BYTE *srcBase) -{ - U32 const h = LZ4_hashPosition(p, tableType); - - return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); -} - - -/* - * LZ4_compress_generic() : - * inlined, to ensure branches are decided at compilation time - */ -static FORCE_INLINE int LZ4_compress_generic( - LZ4_stream_t_internal * const dictPtr, - const char * const source, - char * const dest, - const int inputSize, - const int maxOutputSize, - const limitedOutput_directive outputLimited, - const tableType_t tableType, - const dict_directive dict, - const dictIssue_directive dictIssue, - const U32 acceleration) -{ - const BYTE *ip = (const BYTE *) source; - const BYTE *base; - const BYTE *lowLimit; - const BYTE * const lowRefLimit = ip - dictPtr->dictSize; - const BYTE * const dictionary = dictPtr->dictionary; - const BYTE * const dictEnd = dictionary + dictPtr->dictSize; - const size_t dictDelta = dictEnd - (const BYTE *)source; - const BYTE *anchor = (const BYTE *) source; - const BYTE * const iend = ip + inputSize; - const BYTE * const mflimit = iend - MFLIMIT; - const BYTE * const matchlimit = iend - LASTLITERALS; - - BYTE *op = (BYTE *) dest; - BYTE * const olimit = op + maxOutputSize; - - U32 forwardH; - size_t refDelta = 0; - - /* Init conditions */ - if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) { - /* Unsupported inputSize, too large (or negative) */ - return 0; - } - - switch (dict) { - case noDict: - default: - base = (const BYTE *)source; - lowLimit = (const BYTE *)source; - break; - case withPrefix64k: - base = (const BYTE *)source - dictPtr->currentOffset; - lowLimit = (const BYTE *)source - dictPtr->dictSize; - break; - case usingExtDict: - base = (const BYTE *)source - dictPtr->currentOffset; - lowLimit = (const BYTE *)source; - break; - } - - if ((tableType == byU16) - && (inputSize >= LZ4_64Klimit)) { - /* Size too large (not within 64K limit) */ - return 0; - } - - if (inputSize < LZ4_minLength) { - /* Input too small, no compression (all literals) */ - goto _last_literals; - } - - /* First Byte */ - LZ4_putPosition(ip, dictPtr->hashTable, tableType, base); - ip++; - forwardH = LZ4_hashPosition(ip, tableType); - - /* Main Loop */ - for ( ; ; ) { - const BYTE *match; - BYTE *token; - - /* Find a match */ - { - const BYTE *forwardIp = ip; - unsigned int step = 1; - unsigned int searchMatchNb = acceleration << LZ4_SKIPTRIGGER; - - do { - U32 const h = forwardH; - - ip = forwardIp; - forwardIp += step; - step = (searchMatchNb++ >> LZ4_SKIPTRIGGER); - - if (unlikely(forwardIp > mflimit)) - goto _last_literals; - - match = LZ4_getPositionOnHash(h, - dictPtr->hashTable, - tableType, base); - - if (dict == usingExtDict) { - if (match < (const BYTE *)source) { - refDelta = dictDelta; - lowLimit = dictionary; - } else { - refDelta = 0; - lowLimit = (const BYTE *)source; - } } - - forwardH = LZ4_hashPosition(forwardIp, - tableType); - - LZ4_putPositionOnHash(ip, h, dictPtr->hashTable, - tableType, base); - } while (((dictIssue == dictSmall) - ? (match < lowRefLimit) - : 0) - || ((tableType == byU16) - ? 0 - : (match + MAX_DISTANCE < ip)) - || (LZ4_read32(match + refDelta) - != LZ4_read32(ip))); - } - - /* Catch up */ - while (((ip > anchor) & (match + refDelta > lowLimit)) - && (unlikely(ip[-1] == match[refDelta - 1]))) { - ip--; - match--; - } - - /* Encode Literals */ - { - unsigned const int litLength = (unsigned int)(ip - anchor); - - token = op++; - - if ((outputLimited) && - /* Check output buffer overflow */ - (unlikely(op + litLength + - (2 + 1 + LASTLITERALS) + - (litLength / 255) > olimit))) - return 0; - - if (litLength >= RUN_MASK) { - int len = (int)litLength - RUN_MASK; - - *token = (RUN_MASK << ML_BITS); - - for (; len >= 255; len -= 255) - *op++ = 255; - *op++ = (BYTE)len; - } else - *token = (BYTE)(litLength << ML_BITS); - - /* Copy Literals */ - LZ4_wildCopy(op, anchor, op + litLength); - op += litLength; - } - -_next_match: - /* Encode Offset */ - LZ4_writeLE16(op, (U16)(ip - match)); - op += 2; - - /* Encode MatchLength */ - { - unsigned int matchCode; - - if ((dict == usingExtDict) - && (lowLimit == dictionary)) { - const BYTE *limit; - - match += refDelta; - limit = ip + (dictEnd - match); - - if (limit > matchlimit) - limit = matchlimit; - - matchCode = LZ4_count(ip + MINMATCH, - match + MINMATCH, limit); - - ip += MINMATCH + matchCode; - - if (ip == limit) { - unsigned const int more = LZ4_count(ip, - (const BYTE *)source, - matchlimit); - - matchCode += more; - ip += more; - } - } else { - matchCode = LZ4_count(ip + MINMATCH, - match + MINMATCH, matchlimit); - ip += MINMATCH + matchCode; - } - - if (outputLimited && - /* Check output buffer overflow */ - (unlikely(op + - (1 + LASTLITERALS) + - (matchCode >> 8) > olimit))) - return 0; - - if (matchCode >= ML_MASK) { - *token += ML_MASK; - matchCode -= ML_MASK; - LZ4_write32(op, 0xFFFFFFFF); - - while (matchCode >= 4 * 255) { - op += 4; - LZ4_write32(op, 0xFFFFFFFF); - matchCode -= 4 * 255; - } - - op += matchCode / 255; - *op++ = (BYTE)(matchCode % 255); - } else - *token += (BYTE)(matchCode); - } - - anchor = ip; - - /* Test end of chunk */ - if (ip > mflimit) - break; - - /* Fill table */ - LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base); - - /* Test next position */ - match = LZ4_getPosition(ip, dictPtr->hashTable, - tableType, base); - - if (dict == usingExtDict) { - if (match < (const BYTE *)source) { - refDelta = dictDelta; - lowLimit = dictionary; - } else { - refDelta = 0; - lowLimit = (const BYTE *)source; - } - } - - LZ4_putPosition(ip, dictPtr->hashTable, tableType, base); - - if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1) - && (match + MAX_DISTANCE >= ip) - && (LZ4_read32(match + refDelta) == LZ4_read32(ip))) { - token = op++; - *token = 0; - goto _next_match; - } - - /* Prepare next loop */ - forwardH = LZ4_hashPosition(++ip, tableType); - } - -_last_literals: - /* Encode Last Literals */ - { - size_t const lastRun = (size_t)(iend - anchor); - - if ((outputLimited) && - /* Check output buffer overflow */ - ((op - (BYTE *)dest) + lastRun + 1 + - ((lastRun + 255 - RUN_MASK) / 255) > (U32)maxOutputSize)) - return 0; - - if (lastRun >= RUN_MASK) { - size_t accumulator = lastRun - RUN_MASK; - *op++ = RUN_MASK << ML_BITS; - for (; accumulator >= 255; accumulator -= 255) - *op++ = 255; - *op++ = (BYTE) accumulator; - } else { - *op++ = (BYTE)(lastRun << ML_BITS); - } - - LZ4_memcpy(op, anchor, lastRun); - - op += lastRun; - } - - /* End */ - return (int) (((char *)op) - dest); -} - -static int LZ4_compress_fast_extState( - void *state, - const char *source, - char *dest, - int inputSize, - int maxOutputSize, - int acceleration) -{ - LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse; -#if LZ4_ARCH64 - const tableType_t tableType = byU32; -#else - const tableType_t tableType = byPtr; -#endif - - LZ4_resetStream((LZ4_stream_t *)state); - - if (acceleration < 1) - acceleration = LZ4_ACCELERATION_DEFAULT; - - if (maxOutputSize >= LZ4_COMPRESSBOUND(inputSize)) { - if (inputSize < LZ4_64Klimit) - return LZ4_compress_generic(ctx, source, - dest, inputSize, 0, - noLimit, byU16, noDict, - noDictIssue, acceleration); - else - return LZ4_compress_generic(ctx, source, - dest, inputSize, 0, - noLimit, tableType, noDict, - noDictIssue, acceleration); - } else { - if (inputSize < LZ4_64Klimit) - return LZ4_compress_generic(ctx, source, - dest, inputSize, - maxOutputSize, limitedOutput, byU16, noDict, - noDictIssue, acceleration); - else - return LZ4_compress_generic(ctx, source, - dest, inputSize, - maxOutputSize, limitedOutput, tableType, noDict, - noDictIssue, acceleration); - } -} - -int LZ4_compress_fast(const char *source, char *dest, int inputSize, - int maxOutputSize, int acceleration, void *wrkmem) -{ - return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize, - maxOutputSize, acceleration); -} -EXPORT_SYMBOL(LZ4_compress_fast); - -int LZ4_compress_default(const char *source, char *dest, int inputSize, - int maxOutputSize, void *wrkmem) -{ - return LZ4_compress_fast(source, dest, inputSize, - maxOutputSize, LZ4_ACCELERATION_DEFAULT, wrkmem); -} -EXPORT_SYMBOL(LZ4_compress_default); - -/*-****************************** - * *_destSize() variant - ********************************/ -static int LZ4_compress_destSize_generic( - LZ4_stream_t_internal * const ctx, - const char * const src, - char * const dst, - int * const srcSizePtr, - const int targetDstSize, - const tableType_t tableType) -{ - const BYTE *ip = (const BYTE *) src; - const BYTE *base = (const BYTE *) src; - const BYTE *lowLimit = (const BYTE *) src; - const BYTE *anchor = ip; - const BYTE * const iend = ip + *srcSizePtr; - const BYTE * const mflimit = iend - MFLIMIT; - const BYTE * const matchlimit = iend - LASTLITERALS; - - BYTE *op = (BYTE *) dst; - BYTE * const oend = op + targetDstSize; - BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */ - - 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */; - BYTE * const oMaxMatch = op + targetDstSize - - (LASTLITERALS + 1 /* token */); - BYTE * const oMaxSeq = oMaxLit - 1 /* token */; - - U32 forwardH; - - /* Init conditions */ - /* Impossible to store anything */ - if (targetDstSize < 1) - return 0; - /* Unsupported input size, too large (or negative) */ - if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) - return 0; - /* Size too large (not within 64K limit) */ - if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit)) - return 0; - /* Input too small, no compression (all literals) */ - if (*srcSizePtr < LZ4_minLength) - goto _last_literals; - - /* First Byte */ - *srcSizePtr = 0; - LZ4_putPosition(ip, ctx->hashTable, tableType, base); - ip++; forwardH = LZ4_hashPosition(ip, tableType); - - /* Main Loop */ - for ( ; ; ) { - const BYTE *match; - BYTE *token; - - /* Find a match */ - { - const BYTE *forwardIp = ip; - unsigned int step = 1; - unsigned int searchMatchNb = 1 << LZ4_SKIPTRIGGER; - - do { - U32 h = forwardH; - - ip = forwardIp; - forwardIp += step; - step = (searchMatchNb++ >> LZ4_SKIPTRIGGER); - - if (unlikely(forwardIp > mflimit)) - goto _last_literals; - - match = LZ4_getPositionOnHash(h, ctx->hashTable, - tableType, base); - forwardH = LZ4_hashPosition(forwardIp, - tableType); - LZ4_putPositionOnHash(ip, h, - ctx->hashTable, tableType, - base); - - } while (((tableType == byU16) - ? 0 - : (match + MAX_DISTANCE < ip)) - || (LZ4_read32(match) != LZ4_read32(ip))); - } - - /* Catch up */ - while ((ip > anchor) - && (match > lowLimit) - && (unlikely(ip[-1] == match[-1]))) { - ip--; - match--; - } - - /* Encode Literal length */ - { - unsigned int litLength = (unsigned int)(ip - anchor); - - token = op++; - if (op + ((litLength + 240) / 255) - + litLength > oMaxLit) { - /* Not enough space for a last match */ - op--; - goto _last_literals; - } - if (litLength >= RUN_MASK) { - unsigned int len = litLength - RUN_MASK; - *token = (RUN_MASK<= 255; len -= 255) - *op++ = 255; - *op++ = (BYTE)len; - } else - *token = (BYTE)(litLength << ML_BITS); - - /* Copy Literals */ - LZ4_wildCopy(op, anchor, op + litLength); - op += litLength; - } - -_next_match: - /* Encode Offset */ - LZ4_writeLE16(op, (U16)(ip - match)); op += 2; - - /* Encode MatchLength */ - { - size_t matchLength = LZ4_count(ip + MINMATCH, - match + MINMATCH, matchlimit); - - if (op + ((matchLength + 240)/255) > oMaxMatch) { - /* Match description too long : reduce it */ - matchLength = (15 - 1) + (oMaxMatch - op) * 255; - } - ip += MINMATCH + matchLength; - - if (matchLength >= ML_MASK) { - *token += ML_MASK; - matchLength -= ML_MASK; - while (matchLength >= 255) { - matchLength -= 255; - *op++ = 255; - } - *op++ = (BYTE)matchLength; - } else - *token += (BYTE)(matchLength); - } - - anchor = ip; - - /* Test end of block */ - if (ip > mflimit) - break; - if (op > oMaxSeq) - break; - - /* Fill table */ - LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base); - - /* Test next position */ - match = LZ4_getPosition(ip, ctx->hashTable, tableType, base); - LZ4_putPosition(ip, ctx->hashTable, tableType, base); - - if ((match + MAX_DISTANCE >= ip) - && (LZ4_read32(match) == LZ4_read32(ip))) { - token = op++; *token = 0; - goto _next_match; - } - - /* Prepare next loop */ - forwardH = LZ4_hashPosition(++ip, tableType); - } - -_last_literals: - /* Encode Last Literals */ - { - size_t lastRunSize = (size_t)(iend - anchor); - - if (op + 1 /* token */ - + ((lastRunSize + 240) / 255) /* litLength */ - + lastRunSize /* literals */ > oend) { - /* adapt lastRunSize to fill 'dst' */ - lastRunSize = (oend - op) - 1; - lastRunSize -= (lastRunSize + 240) / 255; - } - ip = anchor + lastRunSize; - - if (lastRunSize >= RUN_MASK) { - size_t accumulator = lastRunSize - RUN_MASK; - - *op++ = RUN_MASK << ML_BITS; - for (; accumulator >= 255; accumulator -= 255) - *op++ = 255; - *op++ = (BYTE) accumulator; - } else { - *op++ = (BYTE)(lastRunSize<= LZ4_COMPRESSBOUND(*srcSizePtr)) { - /* compression success is guaranteed */ - return LZ4_compress_fast_extState( - state, src, dst, *srcSizePtr, - targetDstSize, 1); - } else { - if (*srcSizePtr < LZ4_64Klimit) - return LZ4_compress_destSize_generic( - &state->internal_donotuse, - src, dst, srcSizePtr, - targetDstSize, byU16); - else - return LZ4_compress_destSize_generic( - &state->internal_donotuse, - src, dst, srcSizePtr, - targetDstSize, tableType); - } -} - - -int LZ4_compress_destSize( - const char *src, - char *dst, - int *srcSizePtr, - int targetDstSize, - void *wrkmem) -{ - return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr, - targetDstSize); -} -EXPORT_SYMBOL(LZ4_compress_destSize); - -/*-****************************** - * Streaming functions - ********************************/ -void LZ4_resetStream(LZ4_stream_t *LZ4_stream) -{ - memset(LZ4_stream, 0, sizeof(LZ4_stream_t)); -} - -int LZ4_loadDict(LZ4_stream_t *LZ4_dict, - const char *dictionary, int dictSize) -{ - LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse; - const BYTE *p = (const BYTE *)dictionary; - const BYTE * const dictEnd = p + dictSize; - const BYTE *base; - - if ((dict->initCheck) - || (dict->currentOffset > 1 * GB)) { - /* Uninitialized structure, or reuse overflow */ - LZ4_resetStream(LZ4_dict); - } - - if (dictSize < (int)HASH_UNIT) { - dict->dictionary = NULL; - dict->dictSize = 0; - return 0; - } - - if ((dictEnd - p) > 64 * KB) - p = dictEnd - 64 * KB; - dict->currentOffset += 64 * KB; - base = p - dict->currentOffset; - dict->dictionary = p; - dict->dictSize = (U32)(dictEnd - p); - dict->currentOffset += dict->dictSize; - - while (p <= dictEnd - HASH_UNIT) { - LZ4_putPosition(p, dict->hashTable, byU32, base); - p += 3; - } - - return dict->dictSize; -} -EXPORT_SYMBOL(LZ4_loadDict); - -static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict, - const BYTE *src) -{ - if ((LZ4_dict->currentOffset > 0x80000000) || - ((uptrval)LZ4_dict->currentOffset > (uptrval)src)) { - /* address space overflow */ - /* rescale hash table */ - U32 const delta = LZ4_dict->currentOffset - 64 * KB; - const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; - int i; - - for (i = 0; i < LZ4_HASH_SIZE_U32; i++) { - if (LZ4_dict->hashTable[i] < delta) - LZ4_dict->hashTable[i] = 0; - else - LZ4_dict->hashTable[i] -= delta; - } - LZ4_dict->currentOffset = 64 * KB; - if (LZ4_dict->dictSize > 64 * KB) - LZ4_dict->dictSize = 64 * KB; - LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; - } -} - -int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize) -{ - LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse; - const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize; - - if ((U32)dictSize > 64 * KB) { - /* useless to define a dictionary > 64 * KB */ - dictSize = 64 * KB; - } - if ((U32)dictSize > dict->dictSize) - dictSize = dict->dictSize; - - memmove(safeBuffer, previousDictEnd - dictSize, dictSize); - - dict->dictionary = (const BYTE *)safeBuffer; - dict->dictSize = (U32)dictSize; - - return dictSize; -} -EXPORT_SYMBOL(LZ4_saveDict); - -int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source, - char *dest, int inputSize, int maxOutputSize, int acceleration) -{ - LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse; - const BYTE * const dictEnd = streamPtr->dictionary - + streamPtr->dictSize; - - const BYTE *smallest = (const BYTE *) source; - - if (streamPtr->initCheck) { - /* Uninitialized structure detected */ - return 0; - } - - if ((streamPtr->dictSize > 0) && (smallest > dictEnd)) - smallest = dictEnd; - - LZ4_renormDictT(streamPtr, smallest); - - if (acceleration < 1) - acceleration = LZ4_ACCELERATION_DEFAULT; - - /* Check overlapping input/dictionary space */ - { - const BYTE *sourceEnd = (const BYTE *) source + inputSize; - - if ((sourceEnd > streamPtr->dictionary) - && (sourceEnd < dictEnd)) { - streamPtr->dictSize = (U32)(dictEnd - sourceEnd); - if (streamPtr->dictSize > 64 * KB) - streamPtr->dictSize = 64 * KB; - if (streamPtr->dictSize < 4) - streamPtr->dictSize = 0; - streamPtr->dictionary = dictEnd - streamPtr->dictSize; - } - } - - /* prefix mode : source data follows dictionary */ - if (dictEnd == (const BYTE *)source) { - int result; - - if ((streamPtr->dictSize < 64 * KB) && - (streamPtr->dictSize < streamPtr->currentOffset)) { - result = LZ4_compress_generic( - streamPtr, source, dest, inputSize, - maxOutputSize, limitedOutput, byU32, - withPrefix64k, dictSmall, acceleration); - } else { - result = LZ4_compress_generic( - streamPtr, source, dest, inputSize, - maxOutputSize, limitedOutput, byU32, - withPrefix64k, noDictIssue, acceleration); - } - streamPtr->dictSize += (U32)inputSize; - streamPtr->currentOffset += (U32)inputSize; - return result; - } - - /* external dictionary mode */ - { - int result; - - if ((streamPtr->dictSize < 64 * KB) && - (streamPtr->dictSize < streamPtr->currentOffset)) { - result = LZ4_compress_generic( - streamPtr, source, dest, inputSize, - maxOutputSize, limitedOutput, byU32, - usingExtDict, dictSmall, acceleration); - } else { - result = LZ4_compress_generic( - streamPtr, source, dest, inputSize, - maxOutputSize, limitedOutput, byU32, - usingExtDict, noDictIssue, acceleration); - } - streamPtr->dictionary = (const BYTE *)source; - streamPtr->dictSize = (U32)inputSize; - streamPtr->currentOffset += (U32)inputSize; - return result; - } -} -EXPORT_SYMBOL(LZ4_compress_fast_continue); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("LZ4 compressor"); diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c deleted file mode 100644 index 3c30bf193a40..000000000000 --- a/lib/lz4/lz4_decompress.c +++ /dev/null @@ -1,717 +0,0 @@ -/* - * LZ4 - Fast LZ compression algorithm - * Copyright (C) 2011 - 2016, Yann Collet. - * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php) - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * You can contact the author at : - * - LZ4 homepage : http://www.lz4.org - * - LZ4 source repository : https://github.com/lz4/lz4 - * - * Changed for kernel usage by: - * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> - */ - -/*-************************************ - * Dependencies - **************************************/ -#include -#include "lz4defs.h" -#include -#include -#include -#include - -/*-***************************** - * Decompression functions - *******************************/ - -#define DEBUGLOG(l, ...) {} /* disabled */ - -#ifndef assert -#define assert(condition) ((void)0) -#endif - -/* - * LZ4_decompress_generic() : - * This generic decompression function covers all use cases. - * It shall be instantiated several times, using different sets of directives. - * Note that it is important for performance that this function really get inlined, - * in order to remove useless branches during compilation optimization. - */ -static FORCE_INLINE int LZ4_decompress_generic( - const char * const src, - char * const dst, - int srcSize, - /* - * If endOnInput == endOnInputSize, - * this value is `dstCapacity` - */ - int outputSize, - /* endOnOutputSize, endOnInputSize */ - endCondition_directive endOnInput, - /* full, partial */ - earlyEnd_directive partialDecoding, - /* noDict, withPrefix64k, usingExtDict */ - dict_directive dict, - /* always <= dst, == dst when no prefix */ - const BYTE * const lowPrefix, - /* only if dict == usingExtDict */ - const BYTE * const dictStart, - /* note : = 0 if noDict */ - const size_t dictSize - ) -{ - const BYTE *ip = (const BYTE *) src; - const BYTE * const iend = ip + srcSize; - - BYTE *op = (BYTE *) dst; - BYTE * const oend = op + outputSize; - BYTE *cpy; - - const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize; - static const unsigned int inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4}; - static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3}; - - const int safeDecode = (endOnInput == endOnInputSize); - const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB))); - - /* Set up the "end" pointers for the shortcut. */ - const BYTE *const shortiend = iend - - (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/; - const BYTE *const shortoend = oend - - (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/; - - DEBUGLOG(5, "%s (srcSize:%i, dstSize:%i)", __func__, - srcSize, outputSize); - - /* Special cases */ - assert(lowPrefix <= op); - assert(src != NULL); - - /* Empty output buffer */ - if ((endOnInput) && (unlikely(outputSize == 0))) - return ((srcSize == 1) && (*ip == 0)) ? 0 : -1; - - if ((!endOnInput) && (unlikely(outputSize == 0))) - return (*ip == 0 ? 1 : -1); - - if ((endOnInput) && unlikely(srcSize == 0)) - return -1; - - /* Main Loop : decode sequences */ - while (1) { - size_t length; - const BYTE *match; - size_t offset; - - /* get literal length */ - unsigned int const token = *ip++; - length = token>>ML_BITS; - - /* ip < iend before the increment */ - assert(!endOnInput || ip <= iend); - - /* - * A two-stage shortcut for the most common case: - * 1) If the literal length is 0..14, and there is enough - * space, enter the shortcut and copy 16 bytes on behalf - * of the literals (in the fast mode, only 8 bytes can be - * safely copied this way). - * 2) Further if the match length is 4..18, copy 18 bytes - * in a similar manner; but we ensure that there's enough - * space in the output for those 18 bytes earlier, upon - * entering the shortcut (in other words, there is a - * combined check for both stages). - */ - if ((endOnInput ? length != RUN_MASK : length <= 8) - /* - * strictly "less than" on input, to re-enter - * the loop with at least one byte - */ - && likely((endOnInput ? ip < shortiend : 1) & - (op <= shortoend))) { - /* Copy the literals */ - LZ4_memcpy(op, ip, endOnInput ? 16 : 8); - op += length; ip += length; - - /* - * The second stage: - * prepare for match copying, decode full info. - * If it doesn't work out, the info won't be wasted. - */ - length = token & ML_MASK; /* match length */ - offset = LZ4_readLE16(ip); - ip += 2; - match = op - offset; - assert(match <= op); /* check overflow */ - - /* Do not deal with overlapping matches. */ - if ((length != ML_MASK) && - (offset >= 8) && - (dict == withPrefix64k || match >= lowPrefix)) { - /* Copy the match. */ - LZ4_memcpy(op + 0, match + 0, 8); - LZ4_memcpy(op + 8, match + 8, 8); - LZ4_memcpy(op + 16, match + 16, 2); - op += length + MINMATCH; - /* Both stages worked, load the next token. */ - continue; - } - - /* - * The second stage didn't work out, but the info - * is ready. Propel it right to the point of match - * copying. - */ - goto _copy_match; - } - - /* decode literal length */ - if (length == RUN_MASK) { - unsigned int s; - - if (unlikely(endOnInput ? ip >= iend - RUN_MASK : 0)) { - /* overflow detection */ - goto _output_error; - } - do { - s = *ip++; - length += s; - } while (likely(endOnInput - ? ip < iend - RUN_MASK - : 1) & (s == 255)); - - if ((safeDecode) - && unlikely((uptrval)(op) + - length < (uptrval)(op))) { - /* overflow detection */ - goto _output_error; - } - if ((safeDecode) - && unlikely((uptrval)(ip) + - length < (uptrval)(ip))) { - /* overflow detection */ - goto _output_error; - } - } - - /* copy literals */ - cpy = op + length; - LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); - - if (((endOnInput) && ((cpy > oend - MFLIMIT) - || (ip + length > iend - (2 + 1 + LASTLITERALS)))) - || ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) { - if (partialDecoding) { - if (cpy > oend) { - /* - * Partial decoding : - * stop in the middle of literal segment - */ - cpy = oend; - length = oend - op; - } - if ((endOnInput) - && (ip + length > iend)) { - /* - * Error : - * read attempt beyond - * end of input buffer - */ - goto _output_error; - } - } else { - if ((!endOnInput) - && (cpy != oend)) { - /* - * Error : - * block decoding must - * stop exactly there - */ - goto _output_error; - } - if ((endOnInput) - && ((ip + length != iend) - || (cpy > oend))) { - /* - * Error : - * input must be consumed - */ - goto _output_error; - } - } - - /* - * supports overlapping memory regions; only matters - * for in-place decompression scenarios - */ - LZ4_memmove(op, ip, length); - ip += length; - op += length; - - /* Necessarily EOF when !partialDecoding. - * When partialDecoding, it is EOF if we've either - * filled the output buffer or - * can't proceed with reading an offset for following match. - */ - if (!partialDecoding || (cpy == oend) || (ip >= (iend - 2))) - break; - } else { - /* may overwrite up to WILDCOPYLENGTH beyond cpy */ - LZ4_wildCopy(op, ip, cpy); - ip += length; - op = cpy; - } - - /* get offset */ - offset = LZ4_readLE16(ip); - ip += 2; - match = op - offset; - - /* get matchlength */ - length = token & ML_MASK; - -_copy_match: - if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { - /* Error : offset outside buffers */ - goto _output_error; - } - - /* costs ~1%; silence an msan warning when offset == 0 */ - /* - * note : when partialDecoding, there is no guarantee that - * at least 4 bytes remain available in output buffer - */ - if (!partialDecoding) { - assert(oend > op); - assert(oend - op >= 4); - - LZ4_write32(op, (U32)offset); - } - - if (length == ML_MASK) { - unsigned int s; - - do { - s = *ip++; - - if ((endOnInput) && (ip > iend - LASTLITERALS)) - goto _output_error; - - length += s; - } while (s == 255); - - if ((safeDecode) - && unlikely( - (uptrval)(op) + length < (uptrval)op)) { - /* overflow detection */ - goto _output_error; - } - } - - length += MINMATCH; - - /* match starting within external dictionary */ - if ((dict == usingExtDict) && (match < lowPrefix)) { - if (unlikely(op + length > oend - LASTLITERALS)) { - /* doesn't respect parsing restriction */ - if (!partialDecoding) - goto _output_error; - length = min(length, (size_t)(oend - op)); - } - - if (length <= (size_t)(lowPrefix - match)) { - /* - * match fits entirely within external - * dictionary : just copy - */ - memmove(op, dictEnd - (lowPrefix - match), - length); - op += length; - } else { - /* - * match stretches into both external - * dictionary and current block - */ - size_t const copySize = (size_t)(lowPrefix - match); - size_t const restSize = length - copySize; - - LZ4_memcpy(op, dictEnd - copySize, copySize); - op += copySize; - if (restSize > (size_t)(op - lowPrefix)) { - /* overlap copy */ - BYTE * const endOfMatch = op + restSize; - const BYTE *copyFrom = lowPrefix; - - while (op < endOfMatch) - *op++ = *copyFrom++; - } else { - LZ4_memcpy(op, lowPrefix, restSize); - op += restSize; - } - } - continue; - } - - /* copy match within block */ - cpy = op + length; - - /* - * partialDecoding : - * may not respect endBlock parsing restrictions - */ - assert(op <= oend); - if (partialDecoding && - (cpy > oend - MATCH_SAFEGUARD_DISTANCE)) { - size_t const mlen = min(length, (size_t)(oend - op)); - const BYTE * const matchEnd = match + mlen; - BYTE * const copyEnd = op + mlen; - - if (matchEnd > op) { - /* overlap copy */ - while (op < copyEnd) - *op++ = *match++; - } else { - LZ4_memcpy(op, match, mlen); - } - op = copyEnd; - if (op == oend) - break; - continue; - } - - if (unlikely(offset < 8)) { - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += inc32table[offset]; - LZ4_memcpy(op + 4, match, 4); - match -= dec64table[offset]; - } else { - LZ4_copy8(op, match); - match += 8; - } - - op += 8; - - if (unlikely(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) { - BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1); - - if (cpy > oend - LASTLITERALS) { - /* - * Error : last LASTLITERALS bytes - * must be literals (uncompressed) - */ - goto _output_error; - } - - if (op < oCopyLimit) { - LZ4_wildCopy(op, match, oCopyLimit); - match += oCopyLimit - op; - op = oCopyLimit; - } - while (op < cpy) - *op++ = *match++; - } else { - LZ4_copy8(op, match); - if (length > 16) - LZ4_wildCopy(op + 8, match + 8, cpy); - } - op = cpy; /* wildcopy correction */ - } - - /* end of decoding */ - if (endOnInput) { - /* Nb of output bytes decoded */ - return (int) (((char *)op) - dst); - } else { - /* Nb of input bytes read */ - return (int) (((const char *)ip) - src); - } - - /* Overflow error detected */ -_output_error: - return (int) (-(((const char *)ip) - src)) - 1; -} - -int LZ4_decompress_safe(const char *source, char *dest, - int compressedSize, int maxDecompressedSize) -{ - return LZ4_decompress_generic(source, dest, - compressedSize, maxDecompressedSize, - endOnInputSize, decode_full_block, - noDict, (BYTE *)dest, NULL, 0); -} - -int LZ4_decompress_safe_partial(const char *src, char *dst, - int compressedSize, int targetOutputSize, int dstCapacity) -{ - dstCapacity = min(targetOutputSize, dstCapacity); - return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, - endOnInputSize, partial_decode, - noDict, (BYTE *)dst, NULL, 0); -} - -int LZ4_decompress_fast(const char *source, char *dest, int originalSize) -{ - return LZ4_decompress_generic(source, dest, 0, originalSize, - endOnOutputSize, decode_full_block, - withPrefix64k, - (BYTE *)dest - 64 * KB, NULL, 0); -} - -/* ===== Instantiate a few more decoding cases, used more than once. ===== */ - -int LZ4_decompress_safe_withPrefix64k(const char *source, char *dest, - int compressedSize, int maxOutputSize) -{ - return LZ4_decompress_generic(source, dest, - compressedSize, maxOutputSize, - endOnInputSize, decode_full_block, - withPrefix64k, - (BYTE *)dest - 64 * KB, NULL, 0); -} - -static int LZ4_decompress_safe_withSmallPrefix(const char *source, char *dest, - int compressedSize, - int maxOutputSize, - size_t prefixSize) -{ - return LZ4_decompress_generic(source, dest, - compressedSize, maxOutputSize, - endOnInputSize, decode_full_block, - noDict, - (BYTE *)dest - prefixSize, NULL, 0); -} - -int LZ4_decompress_safe_forceExtDict(const char *source, char *dest, - int compressedSize, int maxOutputSize, - const void *dictStart, size_t dictSize) -{ - return LZ4_decompress_generic(source, dest, - compressedSize, maxOutputSize, - endOnInputSize, decode_full_block, - usingExtDict, (BYTE *)dest, - (const BYTE *)dictStart, dictSize); -} - -static int LZ4_decompress_fast_extDict(const char *source, char *dest, - int originalSize, - const void *dictStart, size_t dictSize) -{ - return LZ4_decompress_generic(source, dest, - 0, originalSize, - endOnOutputSize, decode_full_block, - usingExtDict, (BYTE *)dest, - (const BYTE *)dictStart, dictSize); -} - -/* - * The "double dictionary" mode, for use with e.g. ring buffers: the first part - * of the dictionary is passed as prefix, and the second via dictStart + dictSize. - * These routines are used only once, in LZ4_decompress_*_continue(). - */ -static FORCE_INLINE -int LZ4_decompress_safe_doubleDict(const char *source, char *dest, - int compressedSize, int maxOutputSize, - size_t prefixSize, - const void *dictStart, size_t dictSize) -{ - return LZ4_decompress_generic(source, dest, - compressedSize, maxOutputSize, - endOnInputSize, decode_full_block, - usingExtDict, (BYTE *)dest - prefixSize, - (const BYTE *)dictStart, dictSize); -} - -static FORCE_INLINE -int LZ4_decompress_fast_doubleDict(const char *source, char *dest, - int originalSize, size_t prefixSize, - const void *dictStart, size_t dictSize) -{ - return LZ4_decompress_generic(source, dest, - 0, originalSize, - endOnOutputSize, decode_full_block, - usingExtDict, (BYTE *)dest - prefixSize, - (const BYTE *)dictStart, dictSize); -} - -/* ===== streaming decompression functions ===== */ - -int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, - const char *dictionary, int dictSize) -{ - LZ4_streamDecode_t_internal *lz4sd = - &LZ4_streamDecode->internal_donotuse; - - lz4sd->prefixSize = (size_t) dictSize; - lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize; - lz4sd->externalDict = NULL; - lz4sd->extDictSize = 0; - return 1; -} - -/* - * *_continue() : - * These decoding functions allow decompression of multiple blocks - * in "streaming" mode. - * Previously decoded blocks must still be available at the memory - * position where they were decoded. - * If it's not possible, save the relevant part of - * decoded data into a safe buffer, - * and indicate where it stands using LZ4_setStreamDecode() - */ -int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, - const char *source, char *dest, int compressedSize, int maxOutputSize) -{ - LZ4_streamDecode_t_internal *lz4sd = - &LZ4_streamDecode->internal_donotuse; - int result; - - if (lz4sd->prefixSize == 0) { - /* The first call, no dictionary yet. */ - assert(lz4sd->extDictSize == 0); - result = LZ4_decompress_safe(source, dest, - compressedSize, maxOutputSize); - if (result <= 0) - return result; - lz4sd->prefixSize = result; - lz4sd->prefixEnd = (BYTE *)dest + result; - } else if (lz4sd->prefixEnd == (BYTE *)dest) { - /* They're rolling the current segment. */ - if (lz4sd->prefixSize >= 64 * KB - 1) - result = LZ4_decompress_safe_withPrefix64k(source, dest, - compressedSize, maxOutputSize); - else if (lz4sd->extDictSize == 0) - result = LZ4_decompress_safe_withSmallPrefix(source, - dest, compressedSize, maxOutputSize, - lz4sd->prefixSize); - else - result = LZ4_decompress_safe_doubleDict(source, dest, - compressedSize, maxOutputSize, - lz4sd->prefixSize, - lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) - return result; - lz4sd->prefixSize += result; - lz4sd->prefixEnd += result; - } else { - /* - * The buffer wraps around, or they're - * switching to another buffer. - */ - lz4sd->extDictSize = lz4sd->prefixSize; - lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_safe_forceExtDict(source, dest, - compressedSize, maxOutputSize, - lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) - return result; - lz4sd->prefixSize = result; - lz4sd->prefixEnd = (BYTE *)dest + result; - } - - return result; -} - -int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode, - const char *source, char *dest, int originalSize) -{ - LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse; - int result; - - if (lz4sd->prefixSize == 0) { - assert(lz4sd->extDictSize == 0); - result = LZ4_decompress_fast(source, dest, originalSize); - if (result <= 0) - return result; - lz4sd->prefixSize = originalSize; - lz4sd->prefixEnd = (BYTE *)dest + originalSize; - } else if (lz4sd->prefixEnd == (BYTE *)dest) { - if (lz4sd->prefixSize >= 64 * KB - 1 || - lz4sd->extDictSize == 0) - result = LZ4_decompress_fast(source, dest, - originalSize); - else - result = LZ4_decompress_fast_doubleDict(source, dest, - originalSize, lz4sd->prefixSize, - lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) - return result; - lz4sd->prefixSize += originalSize; - lz4sd->prefixEnd += originalSize; - } else { - lz4sd->extDictSize = lz4sd->prefixSize; - lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_fast_extDict(source, dest, - originalSize, lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) - return result; - lz4sd->prefixSize = originalSize; - lz4sd->prefixEnd = (BYTE *)dest + originalSize; - } - return result; -} - -int LZ4_decompress_safe_usingDict(const char *source, char *dest, - int compressedSize, int maxOutputSize, - const char *dictStart, int dictSize) -{ - if (dictSize == 0) - return LZ4_decompress_safe(source, dest, - compressedSize, maxOutputSize); - if (dictStart+dictSize == dest) { - if (dictSize >= 64 * KB - 1) - return LZ4_decompress_safe_withPrefix64k(source, dest, - compressedSize, maxOutputSize); - return LZ4_decompress_safe_withSmallPrefix(source, dest, - compressedSize, maxOutputSize, dictSize); - } - return LZ4_decompress_safe_forceExtDict(source, dest, - compressedSize, maxOutputSize, dictStart, dictSize); -} - -int LZ4_decompress_fast_usingDict(const char *source, char *dest, - int originalSize, - const char *dictStart, int dictSize) -{ - if (dictSize == 0 || dictStart + dictSize == dest) - return LZ4_decompress_fast(source, dest, originalSize); - - return LZ4_decompress_fast_extDict(source, dest, originalSize, - dictStart, dictSize); -} - -#ifndef STATIC -EXPORT_SYMBOL(LZ4_decompress_safe); -EXPORT_SYMBOL(LZ4_decompress_safe_partial); -EXPORT_SYMBOL(LZ4_decompress_fast); -EXPORT_SYMBOL(LZ4_setStreamDecode); -EXPORT_SYMBOL(LZ4_decompress_safe_continue); -EXPORT_SYMBOL(LZ4_decompress_fast_continue); -EXPORT_SYMBOL(LZ4_decompress_safe_usingDict); -EXPORT_SYMBOL(LZ4_decompress_fast_usingDict); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("LZ4 decompressor"); -#endif diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h deleted file mode 100644 index 673bd206aa98..000000000000 --- a/lib/lz4/lz4defs.h +++ /dev/null @@ -1,245 +0,0 @@ -#ifndef __LZ4DEFS_H__ -#define __LZ4DEFS_H__ - -/* - * lz4defs.h -- common and architecture specific defines for the kernel usage - - * LZ4 - Fast LZ compression algorithm - * Copyright (C) 2011-2016, Yann Collet. - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * You can contact the author at : - * - LZ4 homepage : http://www.lz4.org - * - LZ4 source repository : https://github.com/lz4/lz4 - * - * Changed for kernel usage by: - * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> - */ - -#include -#include /* memset, memcpy */ - -#define FORCE_INLINE __always_inline - -/*-************************************ - * Basic Types - **************************************/ -#include - -typedef uint8_t BYTE; -typedef uint16_t U16; -typedef uint32_t U32; -typedef int32_t S32; -typedef uint64_t U64; -typedef uintptr_t uptrval; - -/*-************************************ - * Architecture specifics - **************************************/ -#if defined(CONFIG_64BIT) -#define LZ4_ARCH64 1 -#else -#define LZ4_ARCH64 0 -#endif - -#if defined(__LITTLE_ENDIAN) -#define LZ4_LITTLE_ENDIAN 1 -#else -#define LZ4_LITTLE_ENDIAN 0 -#endif - -/*-************************************ - * Constants - **************************************/ -#define MINMATCH 4 - -#define WILDCOPYLENGTH 8 -#define LASTLITERALS 5 -#define MFLIMIT (WILDCOPYLENGTH + MINMATCH) -/* - * ensure it's possible to write 2 x wildcopyLength - * without overflowing output buffer - */ -#define MATCH_SAFEGUARD_DISTANCE ((2 * WILDCOPYLENGTH) - MINMATCH) - -/* Increase this value ==> compression run slower on incompressible data */ -#define LZ4_SKIPTRIGGER 6 - -#define HASH_UNIT sizeof(size_t) - -#define KB (1 << 10) -#define MB (1 << 20) -#define GB (1U << 30) - -#define MAXD_LOG 16 -#define MAX_DISTANCE ((1 << MAXD_LOG) - 1) -#define STEPSIZE sizeof(size_t) - -#define ML_BITS 4 -#define ML_MASK ((1U << ML_BITS) - 1) -#define RUN_BITS (8 - ML_BITS) -#define RUN_MASK ((1U << RUN_BITS) - 1) - -/*-************************************ - * Reading and writing into memory - **************************************/ -static FORCE_INLINE U16 LZ4_read16(const void *ptr) -{ - return get_unaligned((const U16 *)ptr); -} - -static FORCE_INLINE U32 LZ4_read32(const void *ptr) -{ - return get_unaligned((const U32 *)ptr); -} - -static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr) -{ - return get_unaligned((const size_t *)ptr); -} - -static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value) -{ - put_unaligned(value, (U16 *)memPtr); -} - -static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value) -{ - put_unaligned(value, (U32 *)memPtr); -} - -static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr) -{ - return get_unaligned_le16(memPtr); -} - -static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value) -{ - return put_unaligned_le16(value, memPtr); -} - -/* - * LZ4 relies on memcpy with a constant size being inlined. In freestanding - * environments, the compiler can't assume the implementation of memcpy() is - * standard compliant, so apply its specialized memcpy() inlining logic. When - * possible, use __builtin_memcpy() to tell the compiler to analyze memcpy() - * as-if it were standard compliant, so it can inline it in freestanding - * environments. This is needed when decompressing the Linux Kernel, for example. - */ -#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) -#define LZ4_memmove(dst, src, size) __builtin_memmove(dst, src, size) - -static FORCE_INLINE void LZ4_copy8(void *dst, const void *src) -{ -#if LZ4_ARCH64 - U64 a = get_unaligned((const U64 *)src); - - put_unaligned(a, (U64 *)dst); -#else - U32 a = get_unaligned((const U32 *)src); - U32 b = get_unaligned((const U32 *)src + 1); - - put_unaligned(a, (U32 *)dst); - put_unaligned(b, (U32 *)dst + 1); -#endif -} - -/* - * customized variant of memcpy, - * which can overwrite up to 7 bytes beyond dstEnd - */ -static FORCE_INLINE void LZ4_wildCopy(void *dstPtr, - const void *srcPtr, void *dstEnd) -{ - BYTE *d = (BYTE *)dstPtr; - const BYTE *s = (const BYTE *)srcPtr; - BYTE *const e = (BYTE *)dstEnd; - - do { - LZ4_copy8(d, s); - d += 8; - s += 8; - } while (d < e); -} - -static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val) -{ -#if LZ4_LITTLE_ENDIAN - return __ffs(val) >> 3; -#else - return (BITS_PER_LONG - 1 - __fls(val)) >> 3; -#endif -} - -static FORCE_INLINE unsigned int LZ4_count( - const BYTE *pIn, - const BYTE *pMatch, - const BYTE *pInLimit) -{ - const BYTE *const pStart = pIn; - - while (likely(pIn < pInLimit - (STEPSIZE - 1))) { - size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); - - if (!diff) { - pIn += STEPSIZE; - pMatch += STEPSIZE; - continue; - } - - pIn += LZ4_NbCommonBytes(diff); - - return (unsigned int)(pIn - pStart); - } - -#if LZ4_ARCH64 - if ((pIn < (pInLimit - 3)) - && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { - pIn += 4; - pMatch += 4; - } -#endif - - if ((pIn < (pInLimit - 1)) - && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { - pIn += 2; - pMatch += 2; - } - - if ((pIn < pInLimit) && (*pMatch == *pIn)) - pIn++; - - return (unsigned int)(pIn - pStart); -} - -typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive; -typedef enum { byPtr, byU32, byU16 } tableType_t; - -typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive; -typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; - -typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive; -typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; - -#define LZ4_STATIC_ASSERT(c) BUILD_BUG_ON(!(c)) - -#endif diff --git a/lib/lz4/lz4hc.c b/lib/lz4/lz4hc.c new file mode 100644 index 000000000000..41d2399037c5 --- /dev/null +++ b/lib/lz4/lz4hc.c @@ -0,0 +1,2193 @@ + +/* + LZ4 HC - High Compression Mode of LZ4 + Copyright (C) 2011-2020, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */ + + +/* ************************************* +* Tuning Parameter +***************************************/ + +/*! HEAPMODE : + * Select how stateless HC compression functions like `LZ4_compress_HC()` + * allocate memory for their workspace: + * in stack (0:fastest), or in heap (1:default, requires malloc()). + * Since workspace is rather large, heap mode is recommended. +**/ +#ifndef LZ4HC_HEAPMODE +# define LZ4HC_HEAPMODE 1 +#endif + + +/*=== Dependency ===*/ +#define LZ4_HC_STATIC_LINKING_ONLY +#include "lz4hc.h" +#include + + +/*=== Shared lz4.c code ===*/ +#ifndef LZ4_SRC_INCLUDED +# if defined(__GNUC__) +# pragma GCC diagnostic ignored "-Wunused-function" +# endif +# if defined (__clang__) +# pragma clang diagnostic ignored "-Wunused-function" +# endif +# define LZ4_COMMONDEFS_ONLY +# include "lz4.c" /* LZ4_count, constants, mem */ +#endif + + +/*=== Enums ===*/ +typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive; + + +/*=== Constants ===*/ +#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) +#define LZ4_OPT_NUM (1<<12) + + +/*=== Macros ===*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) +#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) + + +/*=== Levels definition ===*/ +typedef enum { lz4mid, lz4hc, lz4opt } lz4hc_strat_e; +typedef struct { + lz4hc_strat_e strat; + int nbSearches; + U32 targetLength; +} cParams_t; +static const cParams_t k_clTable[LZ4HC_CLEVEL_MAX+1] = { + { lz4mid, 2, 16 }, /* 0, unused */ + { lz4mid, 2, 16 }, /* 1, unused */ + { lz4mid, 2, 16 }, /* 2 */ + { lz4hc, 4, 16 }, /* 3 */ + { lz4hc, 8, 16 }, /* 4 */ + { lz4hc, 16, 16 }, /* 5 */ + { lz4hc, 32, 16 }, /* 6 */ + { lz4hc, 64, 16 }, /* 7 */ + { lz4hc, 128, 16 }, /* 8 */ + { lz4hc, 256, 16 }, /* 9 */ + { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/ + { lz4opt, 512,128 }, /*11 */ + { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */ +}; + +static cParams_t LZ4HC_getCLevelParams(int cLevel) +{ + /* note : clevel convention is a bit different from lz4frame, + * possibly something worth revisiting for consistency */ + if (cLevel < 1) + cLevel = LZ4HC_CLEVEL_DEFAULT; + cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel); + return k_clTable[cLevel]; +} + + +/*=== Hashing ===*/ +#define LZ4HC_HASHSIZE 4 +#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG)) +static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); } + +#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) +/* lie to the compiler about data alignment; use with caution */ +static U64 LZ4_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) +/* __pack instructions are safer, but compiler specific */ +LZ4_PACK(typedef struct { U64 u64; }) LZ4_unalign64; +static U64 LZ4_read64(const void* ptr) { return ((const LZ4_unalign64*)ptr)->u64; } + +#else /* safe and portable access using memcpy() */ +static U64 LZ4_read64(const void* memPtr) +{ + U64 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +#endif /* LZ4_FORCE_MEMORY_ACCESS */ + +#define LZ4MID_HASHSIZE 8 +#define LZ4MID_HASHLOG (LZ4HC_HASH_LOG-1) +#define LZ4MID_HASHTABLESIZE (1 << LZ4MID_HASHLOG) + +static U32 LZ4MID_hash4(U32 v) { return (v * 2654435761U) >> (32-LZ4MID_HASHLOG); } +static U32 LZ4MID_hash4Ptr(const void* ptr) { return LZ4MID_hash4(LZ4_read32(ptr)); } +/* note: hash7 hashes the lower 56-bits. + * It presumes input was read using little endian.*/ +static U32 LZ4MID_hash7(U64 v) { return (U32)(((v << (64-56)) * 58295818150454627ULL) >> (64-LZ4MID_HASHLOG)) ; } +static U64 LZ4_readLE64(const void* memPtr); +static U32 LZ4MID_hash8Ptr(const void* ptr) { return LZ4MID_hash7(LZ4_readLE64(ptr)); } + +static U64 LZ4_readLE64(const void* memPtr) +{ + if (LZ4_isLittleEndian()) { + return LZ4_read64(memPtr); + } else { + const BYTE* p = (const BYTE*)memPtr; + /* note: relies on the compiler to simplify this expression */ + return (U64)p[0] | ((U64)p[1]<<8) | ((U64)p[2]<<16) | ((U64)p[3]<<24) + | ((U64)p[4]<<32) | ((U64)p[5]<<40) | ((U64)p[6]<<48) | ((U64)p[7]<<56); + } +} + + +/*=== Count match length ===*/ +LZ4_FORCE_INLINE +unsigned LZ4HC_NbCommonBytes32(U32 val) +{ + assert(val != 0); + if (LZ4_isLittleEndian()) { +# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanReverse(&r, val); + return (unsigned)((31 - r) >> 3); +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clz(val) >> 3; +# else + val >>= 8; + val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | + (val + 0x00FF0000)) >> 24; + return (unsigned)val ^ 3; +# endif + } else { +# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward(&r, val); + return (unsigned)(r >> 3); +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctz(val) >> 3; +# else + const U32 m = 0x01010101; + return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24; +# endif + } +} + +/** LZ4HC_countBack() : + * @return : negative value, nb of common bytes before ip/match */ +LZ4_FORCE_INLINE +int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, + const BYTE* const iMin, const BYTE* const mMin) +{ + int back = 0; + int const min = (int)MAX(iMin - ip, mMin - match); + assert(min <= 0); + assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31)); + assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31)); + + while ((back - min) > 3) { + U32 const v = LZ4_read32(ip + back - 4) ^ LZ4_read32(match + back - 4); + if (v) { + return (back - (int)LZ4HC_NbCommonBytes32(v)); + } else back -= 4; /* 4-byte step */ + } + /* check remainder if any */ + while ( (back > min) + && (ip[back-1] == match[back-1]) ) + back--; + return back; +} + +/*=== Chain table updates ===*/ +#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */ +/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */ +#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor + + +/************************************** +* Init +**************************************/ +static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4) +{ + MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable)); + MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); +} + +static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start) +{ + size_t const bufferSize = (size_t)(hc4->end - hc4->prefixStart); + size_t newStartingOffset = bufferSize + hc4->dictLimit; + DEBUGLOG(5, "LZ4HC_init_internal"); + assert(newStartingOffset >= bufferSize); /* check overflow */ + if (newStartingOffset > 1 GB) { + LZ4HC_clearTables(hc4); + newStartingOffset = 0; + } + newStartingOffset += 64 KB; + hc4->nextToUpdate = (U32)newStartingOffset; + hc4->prefixStart = start; + hc4->end = start; + hc4->dictStart = start; + hc4->dictLimit = (U32)newStartingOffset; + hc4->lowLimit = (U32)newStartingOffset; +} + + +/************************************** +* Encode +**************************************/ +/* LZ4HC_encodeSequence() : + * @return : 0 if ok, + * 1 if buffer issue detected */ +LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( + const BYTE** _ip, + BYTE** _op, + const BYTE** _anchor, + int matchLength, + int offset, + limitedOutput_directive limit, + BYTE* oend) +{ +#define ip (*_ip) +#define op (*_op) +#define anchor (*_anchor) + + size_t length; + BYTE* const token = op++; + +#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6) + static const BYTE* start = NULL; + static U32 totalCost = 0; + U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start); + U32 const ll = (U32)(ip - anchor); + U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; + U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; + U32 const cost = 1 + llAdd + ll + 2 + mlAdd; + if (start==NULL) start = anchor; /* only works for single segment */ + /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */ + DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5i, cost:%4u + %5u", + pos, + (U32)(ip - anchor), matchLength, offset, + cost, totalCost); + totalCost += cost; +#endif + + /* Encode Literal length */ + length = (size_t)(ip - anchor); + LZ4_STATIC_ASSERT(notLimited == 0); + /* Check output limit */ + if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) { + DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)", + (int)length, (int)(oend - op)); + return 1; + } + if (length >= RUN_MASK) { + size_t len = length - RUN_MASK; + *token = (RUN_MASK << ML_BITS); + for(; len >= 255 ; len -= 255) *op++ = 255; + *op++ = (BYTE)len; + } else { + *token = (BYTE)(length << ML_BITS); + } + + /* Copy Literals */ + LZ4_wildCopy8(op, anchor, op + length); + op += length; + + /* Encode Offset */ + assert(offset <= LZ4_DISTANCE_MAX ); + assert(offset > 0); + LZ4_writeLE16(op, (U16)(offset)); op += 2; + + /* Encode MatchLength */ + assert(matchLength >= MINMATCH); + length = (size_t)matchLength - MINMATCH; + if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) { + DEBUGLOG(6, "Not enough room to write match length"); + return 1; /* Check output limit */ + } + if (length >= ML_MASK) { + *token += ML_MASK; + length -= ML_MASK; + for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; } + if (length >= 255) { length -= 255; *op++ = 255; } + *op++ = (BYTE)length; + } else { + *token += (BYTE)(length); + } + + /* Prepare next loop */ + ip += matchLength; + anchor = ip; + + return 0; + +#undef ip +#undef op +#undef anchor +} + + +typedef struct { + int off; + int len; + int back; /* negative value */ +} LZ4HC_match_t; + +LZ4HC_match_t LZ4HC_searchExtDict(const BYTE* ip, U32 ipIndex, + const BYTE* const iLowLimit, const BYTE* const iHighLimit, + const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex, + int currentBestML, int nbAttempts) +{ + size_t const lDictEndIndex = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit; + U32 lDictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; + U32 matchIndex = lDictMatchIndex + gDictEndIndex - (U32)lDictEndIndex; + int offset = 0, sBack = 0; + assert(lDictEndIndex <= 1 GB); + if (lDictMatchIndex>0) + DEBUGLOG(7, "lDictEndIndex = %zu, lDictMatchIndex = %u", lDictEndIndex, lDictMatchIndex); + while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) { + const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + lDictMatchIndex; + + if (LZ4_read32(matchPtr) == LZ4_read32(ip)) { + int mlt; + int back = 0; + const BYTE* vLimit = ip + (lDictEndIndex - lDictMatchIndex); + if (vLimit > iHighLimit) vLimit = iHighLimit; + mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + back = (ip > iLowLimit) ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->prefixStart) : 0; + mlt -= back; + if (mlt > currentBestML) { + currentBestML = mlt; + offset = (int)(ipIndex - matchIndex); + sBack = back; + DEBUGLOG(7, "found match of length %i within extDictCtx", currentBestML); + } } + + { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, lDictMatchIndex); + lDictMatchIndex -= nextOffset; + matchIndex -= nextOffset; + } } + + { LZ4HC_match_t md; + md.len = currentBestML; + md.off = offset; + md.back = sBack; + return md; + } +} + +typedef LZ4HC_match_t (*LZ4MID_searchIntoDict_f)(const BYTE* ip, U32 ipIndex, + const BYTE* const iHighLimit, + const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex); + +static LZ4HC_match_t LZ4MID_searchHCDict(const BYTE* ip, U32 ipIndex, + const BYTE* const iHighLimit, + const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex) +{ + return LZ4HC_searchExtDict(ip,ipIndex, + ip, iHighLimit, + dictCtx, gDictEndIndex, + MINMATCH-1, 2); +} + +static LZ4HC_match_t LZ4MID_searchExtDict(const BYTE* ip, U32 ipIndex, + const BYTE* const iHighLimit, + const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex) +{ + size_t const lDictEndIndex = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit; + const U32* const hash4Table = dictCtx->hashTable; + const U32* const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE; + DEBUGLOG(7, "LZ4MID_searchExtDict (ipIdx=%u)", ipIndex); + + /* search long match first */ + { U32 l8DictMatchIndex = hash8Table[LZ4MID_hash8Ptr(ip)]; + U32 m8Index = l8DictMatchIndex + gDictEndIndex - (U32)lDictEndIndex; + assert(lDictEndIndex <= 1 GB); + if (ipIndex - m8Index <= LZ4_DISTANCE_MAX) { + const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + l8DictMatchIndex; + const size_t safeLen = MIN(lDictEndIndex - l8DictMatchIndex, (size_t)(iHighLimit - ip)); + int mlt = (int)LZ4_count(ip, matchPtr, ip + safeLen); + if (mlt >= MINMATCH) { + LZ4HC_match_t md; + DEBUGLOG(7, "Found long ExtDict match of len=%u", mlt); + md.len = mlt; + md.off = (int)(ipIndex - m8Index); + md.back = 0; + return md; + } + } + } + + /* search for short match second */ + { U32 l4DictMatchIndex = hash4Table[LZ4MID_hash4Ptr(ip)]; + U32 m4Index = l4DictMatchIndex + gDictEndIndex - (U32)lDictEndIndex; + if (ipIndex - m4Index <= LZ4_DISTANCE_MAX) { + const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + l4DictMatchIndex; + const size_t safeLen = MIN(lDictEndIndex - l4DictMatchIndex, (size_t)(iHighLimit - ip)); + int mlt = (int)LZ4_count(ip, matchPtr, ip + safeLen); + if (mlt >= MINMATCH) { + LZ4HC_match_t md; + DEBUGLOG(7, "Found short ExtDict match of len=%u", mlt); + md.len = mlt; + md.off = (int)(ipIndex - m4Index); + md.back = 0; + return md; + } + } + } + + /* nothing found */ + { LZ4HC_match_t const md = {0, 0, 0 }; + return md; + } +} + +/************************************** +* Mid Compression (level 2) +**************************************/ + +LZ4_FORCE_INLINE void +LZ4MID_addPosition(U32* hTable, U32 hValue, U32 index) +{ + hTable[hValue] = index; +} + +#define ADDPOS8(_p, _idx) LZ4MID_addPosition(hash8Table, LZ4MID_hash8Ptr(_p), _idx) +#define ADDPOS4(_p, _idx) LZ4MID_addPosition(hash4Table, LZ4MID_hash4Ptr(_p), _idx) + +/* Fill hash tables with references into dictionary. + * The resulting table is only exploitable by LZ4MID (level 2) */ +static void +LZ4MID_fillHTable (LZ4HC_CCtx_internal* cctx, const void* dict, size_t size) +{ + U32* const hash4Table = cctx->hashTable; + U32* const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE; + const BYTE* const prefixPtr = (const BYTE*)dict; + U32 const prefixIdx = cctx->dictLimit; + U32 const target = prefixIdx + (U32)size - LZ4MID_HASHSIZE; + U32 idx = cctx->nextToUpdate; + assert(dict == cctx->prefixStart); + DEBUGLOG(4, "LZ4MID_fillHTable (size:%zu)", size); + if (size <= LZ4MID_HASHSIZE) + return; + + for (; idx < target; idx += 3) { + ADDPOS4(prefixPtr+idx-prefixIdx, idx); + ADDPOS8(prefixPtr+idx+1-prefixIdx, idx+1); + } + + idx = (size > 32 KB + LZ4MID_HASHSIZE) ? target - 32 KB : cctx->nextToUpdate; + for (; idx < target; idx += 1) { + ADDPOS8(prefixPtr+idx-prefixIdx, idx); + } + + cctx->nextToUpdate = target; +} + +static LZ4MID_searchIntoDict_f select_searchDict_function(const LZ4HC_CCtx_internal* dictCtx) +{ + if (dictCtx == NULL) return NULL; + if (LZ4HC_getCLevelParams(dictCtx->compressionLevel).strat == lz4mid) + return LZ4MID_searchExtDict; + return LZ4MID_searchHCDict; +} + +static int LZ4MID_compress ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* srcSizePtr, + int const maxOutputSize, + const limitedOutput_directive limit, + const dictCtx_directive dict + ) +{ + U32* const hash4Table = ctx->hashTable; + U32* const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE; + const BYTE* ip = (const BYTE*)src; + const BYTE* anchor = ip; + const BYTE* const iend = ip + *srcSizePtr; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = (iend - LASTLITERALS); + const BYTE* const ilimit = (iend - LZ4MID_HASHSIZE); + BYTE* op = (BYTE*)dst; + BYTE* oend = op + maxOutputSize; + + const BYTE* const prefixPtr = ctx->prefixStart; + const U32 prefixIdx = ctx->dictLimit; + const U32 ilimitIdx = (U32)(ilimit - prefixPtr) + prefixIdx; + const BYTE* const dictStart = ctx->dictStart; + const U32 dictIdx = ctx->lowLimit; + const U32 gDictEndIndex = ctx->lowLimit; + const LZ4MID_searchIntoDict_f searchIntoDict = (dict == usingDictCtxHc) ? select_searchDict_function(ctx->dictCtx) : NULL; + unsigned matchLength; + unsigned matchDistance; + + /* input sanitization */ + DEBUGLOG(5, "LZ4MID_compress (%i bytes)", *srcSizePtr); + if (dict == usingDictCtxHc) DEBUGLOG(5, "usingDictCtxHc"); + assert(*srcSizePtr >= 0); + if (*srcSizePtr) assert(src != NULL); + if (maxOutputSize) assert(dst != NULL); + if (*srcSizePtr < 0) return 0; /* invalid */ + if (maxOutputSize < 0) return 0; /* invalid */ + if (*srcSizePtr > LZ4_MAX_INPUT_SIZE) { + /* forbidden: no input is allowed to be that large */ + return 0; + } + if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (*srcSizePtr < LZ4_minLength) + goto _lz4mid_last_literals; /* Input too small, no compression (all literals) */ + + /* main loop */ + while (ip <= mflimit) { + const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx; + /* search long match */ + { U32 const h8 = LZ4MID_hash8Ptr(ip); + U32 const pos8 = hash8Table[h8]; + assert(h8 < LZ4MID_HASHTABLESIZE); + assert(pos8 < ipIndex); + LZ4MID_addPosition(hash8Table, h8, ipIndex); + if (ipIndex - pos8 <= LZ4_DISTANCE_MAX) { + /* match candidate found */ + if (pos8 >= prefixIdx) { + const BYTE* const matchPtr = prefixPtr + pos8 - prefixIdx; + assert(matchPtr < ip); + matchLength = LZ4_count(ip, matchPtr, matchlimit); + if (matchLength >= MINMATCH) { + DEBUGLOG(7, "found long match at pos %u (len=%u)", pos8, matchLength); + matchDistance = ipIndex - pos8; + goto _lz4mid_encode_sequence; + } + } else { + if (pos8 >= dictIdx) { + /* extDict match candidate */ + const BYTE* const matchPtr = dictStart + (pos8 - dictIdx); + const size_t safeLen = MIN(prefixIdx - pos8, (size_t)(matchlimit - ip)); + matchLength = LZ4_count(ip, matchPtr, ip + safeLen); + if (matchLength >= MINMATCH) { + DEBUGLOG(7, "found long match at ExtDict pos %u (len=%u)", pos8, matchLength); + matchDistance = ipIndex - pos8; + goto _lz4mid_encode_sequence; + } + } + } + } } + /* search short match */ + { U32 const h4 = LZ4MID_hash4Ptr(ip); + U32 const pos4 = hash4Table[h4]; + assert(h4 < LZ4MID_HASHTABLESIZE); + assert(pos4 < ipIndex); + LZ4MID_addPosition(hash4Table, h4, ipIndex); + if (ipIndex - pos4 <= LZ4_DISTANCE_MAX) { + /* match candidate found */ + if (pos4 >= prefixIdx) { + /* only search within prefix */ + const BYTE* const matchPtr = prefixPtr + (pos4 - prefixIdx); + assert(matchPtr < ip); + assert(matchPtr >= prefixPtr); + matchLength = LZ4_count(ip, matchPtr, matchlimit); + if (matchLength >= MINMATCH) { + /* short match found, let's just check ip+1 for longer */ + U32 const h8 = LZ4MID_hash8Ptr(ip+1); + U32 const pos8 = hash8Table[h8]; + U32 const m2Distance = ipIndex + 1 - pos8; + matchDistance = ipIndex - pos4; + if ( m2Distance <= LZ4_DISTANCE_MAX + && pos8 >= prefixIdx /* only search within prefix */ + && likely(ip < mflimit) + ) { + const BYTE* const m2Ptr = prefixPtr + (pos8 - prefixIdx); + unsigned ml2 = LZ4_count(ip+1, m2Ptr, matchlimit); + if (ml2 > matchLength) { + LZ4MID_addPosition(hash8Table, h8, ipIndex+1); + ip++; + matchLength = ml2; + matchDistance = m2Distance; + } } + goto _lz4mid_encode_sequence; + } + } else { + if (pos4 >= dictIdx) { + /* extDict match candidate */ + const BYTE* const matchPtr = dictStart + (pos4 - dictIdx); + const size_t safeLen = MIN(prefixIdx - pos4, (size_t)(matchlimit - ip)); + matchLength = LZ4_count(ip, matchPtr, ip + safeLen); + if (matchLength >= MINMATCH) { + DEBUGLOG(7, "found match at ExtDict pos %u (len=%u)", pos4, matchLength); + matchDistance = ipIndex - pos4; + goto _lz4mid_encode_sequence; + } + } + } + } } + /* no match found in prefix */ + if ( (dict == usingDictCtxHc) + && (ipIndex - gDictEndIndex < LZ4_DISTANCE_MAX - 8) ) { + /* search a match into external dictionary */ + LZ4HC_match_t dMatch = searchIntoDict(ip, ipIndex, + matchlimit, + ctx->dictCtx, gDictEndIndex); + if (dMatch.len >= MINMATCH) { + DEBUGLOG(7, "found Dictionary match (offset=%i)", dMatch.off); + assert(dMatch.back == 0); + matchLength = (unsigned)dMatch.len; + matchDistance = (unsigned)dMatch.off; + goto _lz4mid_encode_sequence; + } + } + /* no match found */ + ip += 1 + ((ip-anchor) >> 9); /* skip faster over incompressible data */ + continue; + +_lz4mid_encode_sequence: + /* catch back */ + while (((ip > anchor) & ((U32)(ip-prefixPtr) > matchDistance)) && (unlikely(ip[-1] == ip[-(int)matchDistance-1]))) { + ip--; matchLength++; + }; + + /* fill table with beginning of match */ + ADDPOS8(ip+1, ipIndex+1); + ADDPOS8(ip+2, ipIndex+2); + ADDPOS4(ip+1, ipIndex+1); + + /* encode */ + { BYTE* const saved_op = op; + /* LZ4HC_encodeSequence always updates @op; on success, it updates @ip and @anchor */ + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), + (int)matchLength, (int)matchDistance, + limit, oend) ) { + op = saved_op; /* restore @op value before failed LZ4HC_encodeSequence */ + goto _lz4mid_dest_overflow; + } + } + + /* fill table with end of match */ + { U32 endMatchIdx = (U32)(ip-prefixPtr) + prefixIdx; + U32 pos_m2 = endMatchIdx - 2; + if (pos_m2 < ilimitIdx) { + if (likely(ip - prefixPtr > 5)) { + ADDPOS8(ip-5, endMatchIdx - 5); + } + ADDPOS8(ip-3, endMatchIdx - 3); + ADDPOS8(ip-2, endMatchIdx - 2); + ADDPOS4(ip-2, endMatchIdx - 2); + ADDPOS4(ip-1, endMatchIdx - 1); + } + } + } + +_lz4mid_last_literals: + /* Encode Last Literals */ + { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; + if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) return 0; /* not enough space in @dst */ + /* adapt lastRunSize to fill 'dest' */ + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); + ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for(; accumulator >= 255 ; accumulator -= 255) + *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + assert(lastRunSize <= (size_t)(oend - op)); + LZ4_memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } + + /* End */ + DEBUGLOG(5, "compressed %i bytes into %i bytes", *srcSizePtr, (int)((char*)op - dst)); + assert(ip >= (const BYTE*)src); + assert(ip <= iend); + *srcSizePtr = (int)(ip - (const BYTE*)src); + assert((char*)op >= dst); + assert(op <= oend); + assert((char*)op - dst < INT_MAX); + return (int)((char*)op - dst); + +_lz4mid_dest_overflow: + if (limit == fillOutput) { + /* Assumption : @ip, @anchor, @optr and @matchLength must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG(6, "Last sequence is overflowing : %u literals, %u remaining space", + (unsigned)ll, (unsigned)(oend-op)); + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); + if ((size_t)matchLength > maxMlSize) matchLength= (unsigned)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + matchLength >= MFLIMIT) { + DEBUGLOG(6, "Let's encode a last sequence (ll=%u, ml=%u)", (unsigned)ll, matchLength); + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), + (int)matchLength, (int)matchDistance, + notLimited, oend); + } } + DEBUGLOG(6, "Let's finish with a run of literals (%u bytes left)", (unsigned)(oend-op)); + goto _lz4mid_last_literals; + } + /* compression failed */ + return 0; +} + + +/************************************** +* HC Compression - Search +**************************************/ + +/* Update chains up to ip (excluded) */ +LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip) +{ + U16* const chainTable = hc4->chainTable; + U32* const hashTable = hc4->hashTable; + const BYTE* const prefixPtr = hc4->prefixStart; + U32 const prefixIdx = hc4->dictLimit; + U32 const target = (U32)(ip - prefixPtr) + prefixIdx; + U32 idx = hc4->nextToUpdate; + assert(ip >= prefixPtr); + assert(target >= prefixIdx); + + while (idx < target) { + U32 const h = LZ4HC_hashPtr(prefixPtr+idx-prefixIdx); + size_t delta = idx - hashTable[h]; + if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX; + DELTANEXTU16(chainTable, idx) = (U16)delta; + hashTable[h] = idx; + idx++; + } + + hc4->nextToUpdate = target; +} + +#if defined(_MSC_VER) +# define LZ4HC_rotl32(x,r) _rotl(x,r) +#else +# define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#endif + + +static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern) +{ + size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3; + if (bitsToRotate == 0) return pattern; + return LZ4HC_rotl32(pattern, (int)bitsToRotate); +} + +/* LZ4HC_countPattern() : + * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */ +static unsigned +LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32) +{ + const BYTE* const iStart = ip; + reg_t const pattern = (sizeof(pattern)==8) ? + (reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32; + + while (likely(ip < iEnd-(sizeof(pattern)-1))) { + reg_t const diff = LZ4_read_ARCH(ip) ^ pattern; + if (!diff) { ip+=sizeof(pattern); continue; } + ip += LZ4_NbCommonBytes(diff); + return (unsigned)(ip - iStart); + } + + if (LZ4_isLittleEndian()) { + reg_t patternByte = pattern; + while ((ip>= 8; + } + } else { /* big endian */ + U32 bitOffset = (sizeof(pattern)*8) - 8; + while (ip < iEnd) { + BYTE const byte = (BYTE)(pattern >> bitOffset); + if (*ip != byte) break; + ip ++; bitOffset -= 8; + } } + + return (unsigned)(ip - iStart); +} + +/* LZ4HC_reverseCountPattern() : + * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) + * read using natural platform endianness */ +static unsigned +LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern) +{ + const BYTE* const iStart = ip; + + while (likely(ip >= iLow+4)) { + if (LZ4_read32(ip-4) != pattern) break; + ip -= 4; + } + { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianness */ + while (likely(ip>iLow)) { + if (ip[-1] != *bytePtr) break; + ip--; bytePtr--; + } } + return (unsigned)(iStart - ip); +} + +/* LZ4HC_protectDictEnd() : + * Checks if the match is in the last 3 bytes of the dictionary, so reading the + * 4 byte MINMATCH would overflow. + * @returns true if the match index is okay. + */ +static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex) +{ + return ((U32)((dictLimit - 1) - matchIndex) >= 3); +} + +typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e; +typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e; + + +LZ4_FORCE_INLINE LZ4HC_match_t +LZ4HC_InsertAndGetWiderMatch ( + LZ4HC_CCtx_internal* const hc4, + const BYTE* const ip, + const BYTE* const iLowLimit, const BYTE* const iHighLimit, + int longest, + const int maxNbAttempts, + const int patternAnalysis, const int chainSwap, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) +{ + U16* const chainTable = hc4->chainTable; + U32* const hashTable = hc4->hashTable; + const LZ4HC_CCtx_internal* const dictCtx = hc4->dictCtx; + const BYTE* const prefixPtr = hc4->prefixStart; + const U32 prefixIdx = hc4->dictLimit; + const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx; + const int withinStartDistance = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex); + const U32 lowestMatchIndex = (withinStartDistance) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX; + const BYTE* const dictStart = hc4->dictStart; + const U32 dictIdx = hc4->lowLimit; + const BYTE* const dictEnd = dictStart + prefixIdx - dictIdx; + int const lookBackLength = (int)(ip-iLowLimit); + int nbAttempts = maxNbAttempts; + U32 matchChainPos = 0; + U32 const pattern = LZ4_read32(ip); + U32 matchIndex; + repeat_state_e repeat = rep_untested; + size_t srcPatternLength = 0; + int offset = 0, sBack = 0; + + DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); + /* First Match */ + LZ4HC_Insert(hc4, ip); /* insert all prior positions up to ip (excluded) */ + matchIndex = hashTable[LZ4HC_hashPtr(ip)]; + DEBUGLOG(7, "First candidate match for pos %u found at index %u / %u (lowestMatchIndex)", + ipIndex, matchIndex, lowestMatchIndex); + + while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) { + int matchLength=0; + nbAttempts--; + assert(matchIndex < ipIndex); + if (favorDecSpeed && (ipIndex - matchIndex < 8)) { + /* do nothing: + * favorDecSpeed intentionally skips matches with offset < 8 */ + } else if (matchIndex >= prefixIdx) { /* within current Prefix */ + const BYTE* const matchPtr = prefixPtr + (matchIndex - prefixIdx); + assert(matchPtr < ip); + assert(longest >= 1); + if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) { + if (LZ4_read32(matchPtr) == pattern) { + int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, prefixPtr) : 0; + matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; + offset = (int)(ipIndex - matchIndex); + sBack = back; + DEBUGLOG(7, "Found match of len=%i within prefix, offset=%i, back=%i", longest, offset, -back); + } } } + } else { /* lowestMatchIndex <= matchIndex < dictLimit : within Ext Dict */ + const BYTE* const matchPtr = dictStart + (matchIndex - dictIdx); + assert(matchIndex >= dictIdx); + if ( likely(matchIndex <= prefixIdx - 4) + && (LZ4_read32(matchPtr) == pattern) ) { + int back = 0; + const BYTE* vLimit = ip + (prefixIdx - matchIndex); + if (vLimit > iHighLimit) vLimit = iHighLimit; + matchLength = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + if ((ip+matchLength == vLimit) && (vLimit < iHighLimit)) + matchLength += LZ4_count(ip+matchLength, prefixPtr, iHighLimit); + back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0; + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; + offset = (int)(ipIndex - matchIndex); + sBack = back; + DEBUGLOG(7, "Found match of len=%i within dict, offset=%i, back=%i", longest, offset, -back); + } } } + + if (chainSwap && matchLength==longest) { /* better match => select a better chain */ + assert(lookBackLength==0); /* search forward only */ + if (matchIndex + (U32)longest <= ipIndex) { + int const kTrigger = 4; + U32 distanceToNextMatch = 1; + int const end = longest - MINMATCH + 1; + int step = 1; + int accel = 1 << kTrigger; + int pos; + for (pos = 0; pos < end; pos += step) { + U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos); + step = (accel++ >> kTrigger); + if (candidateDist > distanceToNextMatch) { + distanceToNextMatch = candidateDist; + matchChainPos = (U32)pos; + accel = 1 << kTrigger; + } } + if (distanceToNextMatch > 1) { + if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ + matchIndex -= distanceToNextMatch; + continue; + } } } + + { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); + if (patternAnalysis && distNextMatch==1 && matchChainPos==0) { + U32 const matchCandidateIdx = matchIndex-1; + /* may be a repeated pattern */ + if (repeat == rep_untested) { + if ( ((pattern & 0xFFFF) == (pattern >> 16)) + & ((pattern & 0xFF) == (pattern >> 24)) ) { + DEBUGLOG(7, "Repeat pattern detected, char %02X", pattern >> 24); + repeat = rep_confirmed; + srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); + } else { + repeat = rep_not; + } } + if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex) + && LZ4HC_protectDictEnd(prefixIdx, matchCandidateIdx) ) { + const int extDict = matchCandidateIdx < prefixIdx; + const BYTE* const matchPtr = extDict ? dictStart + (matchCandidateIdx - dictIdx) : prefixPtr + (matchCandidateIdx - prefixIdx); + if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ + const BYTE* const iLimit = extDict ? dictEnd : iHighLimit; + size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern); + if (extDict && matchPtr + forwardPatternLength == iLimit) { + U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern); + forwardPatternLength += LZ4HC_countPattern(prefixPtr, iHighLimit, rotatedPattern); + } + { const BYTE* const lowestMatchPtr = extDict ? dictStart : prefixPtr; + size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern); + size_t currentSegmentLength; + if (!extDict + && matchPtr - backLength == prefixPtr + && dictIdx < prefixIdx) { + U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern); + backLength += LZ4HC_reverseCountPattern(dictEnd, dictStart, rotatedPattern); + } + /* Limit backLength not go further than lowestMatchIndex */ + backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex); + assert(matchCandidateIdx - backLength >= lowestMatchIndex); + currentSegmentLength = backLength + forwardPatternLength; + /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */ + if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ + && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ + U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ + if (LZ4HC_protectDictEnd(prefixIdx, newMatchIndex)) + matchIndex = newMatchIndex; + else { + /* Can only happen if started in the prefix */ + assert(newMatchIndex >= prefixIdx - 3 && newMatchIndex < prefixIdx && !extDict); + matchIndex = prefixIdx; + } + } else { + U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */ + if (!LZ4HC_protectDictEnd(prefixIdx, newMatchIndex)) { + assert(newMatchIndex >= prefixIdx - 3 && newMatchIndex < prefixIdx && !extDict); + matchIndex = prefixIdx; + } else { + matchIndex = newMatchIndex; + if (lookBackLength==0) { /* no back possible */ + size_t const maxML = MIN(currentSegmentLength, srcPatternLength); + if ((size_t)longest < maxML) { + assert(prefixPtr - prefixIdx + matchIndex != ip); + if ((size_t)(ip - prefixPtr) + prefixIdx - matchIndex > LZ4_DISTANCE_MAX) break; + assert(maxML < 2 GB); + longest = (int)maxML; + offset = (int)(ipIndex - matchIndex); + assert(sBack == 0); + DEBUGLOG(7, "Found repeat pattern match of len=%i, offset=%i", longest, offset); + } + { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex); + if (distToNextPattern > matchIndex) break; /* avoid overflow */ + matchIndex -= distToNextPattern; + } } } } } + continue; + } } + } } /* PA optimization */ + + /* follow current chain */ + matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos); + + } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ + + if ( dict == usingDictCtxHc + && nbAttempts > 0 + && withinStartDistance) { + size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit; + U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; + assert(dictEndOffset <= 1 GB); + matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset; + if (dictMatchIndex>0) DEBUGLOG(7, "dictEndOffset = %zu, dictMatchIndex = %u => relative matchIndex = %i", dictEndOffset, dictMatchIndex, (int)dictMatchIndex - (int)dictEndOffset); + while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) { + const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + dictMatchIndex; + + if (LZ4_read32(matchPtr) == pattern) { + int mlt; + int back = 0; + const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex); + if (vLimit > iHighLimit) vLimit = iHighLimit; + mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->prefixStart) : 0; + mlt -= back; + if (mlt > longest) { + longest = mlt; + offset = (int)(ipIndex - matchIndex); + sBack = back; + DEBUGLOG(7, "found match of length %i within extDictCtx", longest); + } } + + { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex); + dictMatchIndex -= nextOffset; + matchIndex -= nextOffset; + } } } + + { LZ4HC_match_t md; + assert(longest >= 0); + md.len = longest; + md.off = offset; + md.back = sBack; + return md; + } +} + +LZ4_FORCE_INLINE LZ4HC_match_t +LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + const int maxNbAttempts, + const int patternAnalysis, + const dictCtx_directive dict) +{ + DEBUGLOG(7, "LZ4HC_InsertAndFindBestMatch"); + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + * but this won't be the case here, as we define iLowLimit==ip, + * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ + return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio); +} + + +LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( + LZ4HC_CCtx_internal* const ctx, + const char* const source, + char* const dest, + int* srcSizePtr, + int const maxOutputSize, + int maxNbAttempts, + const limitedOutput_directive limit, + const dictCtx_directive dict + ) +{ + const int inputSize = *srcSizePtr; + const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */ + + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = (iend - LASTLITERALS); + + BYTE* optr = (BYTE*) dest; + BYTE* op = (BYTE*) dest; + BYTE* oend = op + maxOutputSize; + + const BYTE* start0; + const BYTE* start2 = NULL; + const BYTE* start3 = NULL; + LZ4HC_match_t m0, m1, m2, m3; + const LZ4HC_match_t nomatch = {0, 0, 0}; + + /* init */ + DEBUGLOG(5, "LZ4HC_compress_hashChain (dict?=>%i)", dict); + *srcSizePtr = 0; + if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ + + /* Main Loop */ + while (ip <= mflimit) { + m1 = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, maxNbAttempts, patternAnalysis, dict); + if (m1.len encode ML1 immediately */ + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), + m1.len, m1.off, + limit, oend) ) + goto _dest_overflow; + continue; + } + + if (start0 < ip) { /* first match was skipped at least once */ + if (start2 < ip + m0.len) { /* squeezing ML1 between ML0(original ML1) and ML2 */ + ip = start0; m1 = m0; /* restore initial Match1 */ + } } + + /* Here, start0==ip */ + if ((start2 - ip) < 3) { /* First Match too small : removed */ + ip = start2; + m1 = m2; + goto _Search2; + } + +_Search3: + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + int new_ml = m1.len; + if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; + if (ip+new_ml > start2 + m2.len - MINMATCH) + new_ml = (int)(start2 - ip) + m2.len - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + m2.len -= correction; + } + } + + if (start2 + m2.len <= mflimit) { + start3 = start2 + m2.len - 3; + m3 = LZ4HC_InsertAndGetWiderMatch(ctx, + start3, start2, matchlimit, m2.len, + maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); + start3 += m3.back; + } else { + m3 = nomatch; /* do not search further */ + } + + if (m3.len <= m2.len) { /* No better match => encode ML1 and ML2 */ + /* ip & ref are known; Now for ml */ + if (start2 < ip+m1.len) m1.len = (int)(start2 - ip); + /* Now, encode 2 sequences */ + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), + m1.len, m1.off, + limit, oend) ) + goto _dest_overflow; + ip = start2; + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), + m2.len, m2.off, + limit, oend) ) { + m1 = m2; + goto _dest_overflow; + } + continue; + } + + if (start3 < ip+m1.len+3) { /* Not enough space for match 2 : remove it */ + if (start3 >= (ip+m1.len)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ + if (start2 < ip+m1.len) { + int correction = (int)(ip+m1.len - start2); + start2 += correction; + m2.len -= correction; + if (m2.len < MINMATCH) { + start2 = start3; + m2 = m3; + } + } + + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), + m1.len, m1.off, + limit, oend) ) + goto _dest_overflow; + ip = start3; + m1 = m3; + + start0 = start2; + m0 = m2; + goto _Search2; + } + + start2 = start3; + m2 = m3; + goto _Search3; + } + + /* + * OK, now we have 3 ascending matches; + * let's write the first one ML1. + * ip & ref are known; Now decide ml. + */ + if (start2 < ip+m1.len) { + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + if (m1.len > OPTIMAL_ML) m1.len = OPTIMAL_ML; + if (ip + m1.len > start2 + m2.len - MINMATCH) + m1.len = (int)(start2 - ip) + m2.len - MINMATCH; + correction = m1.len - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + m2.len -= correction; + } + } else { + m1.len = (int)(start2 - ip); + } + } + optr = op; + if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), + m1.len, m1.off, + limit, oend) ) + goto _dest_overflow; + + /* ML2 becomes ML1 */ + ip = start2; m1 = m2; + + /* ML3 becomes ML2 */ + start2 = start3; m2 = m3; + + /* let's find a new ML3 */ + goto _Search3; + } + +_last_literals: + /* Encode Last Literals */ + { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; + if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) return 0; + /* adapt lastRunSize to fill 'dest' */ + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); + ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + LZ4_memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } + + /* End */ + *srcSizePtr = (int) (((const char*)ip) - source); + return (int) (((char*)op)-dest); + +_dest_overflow: + if (limit == fillOutput) { + /* Assumption : @ip, @anchor, @optr and @m1 must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG(6, "Last sequence overflowing"); + op = optr; /* restore correct out pointer */ + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); assert(m1.len >= 0); + if ((size_t)m1.len > maxMlSize) m1.len = (int)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + m1.len >= MFLIMIT) { + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), m1.len, m1.off, notLimited, oend); + } } + goto _last_literals; + } + /* compression failed */ + return 0; +} + + +static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx, + const char* const source, char* dst, + int* srcSizePtr, int dstCapacity, + int const nbSearches, size_t sufficient_len, + const limitedOutput_directive limit, int const fullUpdate, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed); + +LZ4_FORCE_INLINE int +LZ4HC_compress_generic_internal ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + const limitedOutput_directive limit, + const dictCtx_directive dict + ) +{ + DEBUGLOG(5, "LZ4HC_compress_generic_internal(src=%p, srcSize=%d)", + src, *srcSizePtr); + + if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */ + if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */ + + ctx->end += *srcSizePtr; + { cParams_t const cParam = LZ4HC_getCLevelParams(cLevel); + HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio; + int result; + + if (cParam.strat == lz4mid) { + result = LZ4MID_compress(ctx, + src, dst, srcSizePtr, dstCapacity, + limit, dict); + } else if (cParam.strat == lz4hc) { + result = LZ4HC_compress_hashChain(ctx, + src, dst, srcSizePtr, dstCapacity, + cParam.nbSearches, limit, dict); + } else { + assert(cParam.strat == lz4opt); + result = LZ4HC_compress_optimal(ctx, + src, dst, srcSizePtr, dstCapacity, + cParam.nbSearches, cParam.targetLength, limit, + cLevel >= LZ4HC_CLEVEL_MAX, /* ultra mode */ + dict, favor); + } + if (result <= 0) ctx->dirty = 1; + return result; + } +} + +static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock); + +static int +LZ4HC_compress_generic_noDictCtx ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + limitedOutput_directive limit + ) +{ + assert(ctx->dictCtx == NULL); + return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx); +} + +static int isStateCompatible(const LZ4HC_CCtx_internal* ctx1, const LZ4HC_CCtx_internal* ctx2) +{ + int const isMid1 = LZ4HC_getCLevelParams(ctx1->compressionLevel).strat == lz4mid; + int const isMid2 = LZ4HC_getCLevelParams(ctx2->compressionLevel).strat == lz4mid; + return !(isMid1 ^ isMid2); +} + +static int +LZ4HC_compress_generic_dictCtx ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + limitedOutput_directive limit + ) +{ + const size_t position = (size_t)(ctx->end - ctx->prefixStart) + (ctx->dictLimit - ctx->lowLimit); + assert(ctx->dictCtx != NULL); + if (position >= 64 KB) { + ctx->dictCtx = NULL; + return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else if (position == 0 && *srcSizePtr > 4 KB && isStateCompatible(ctx, ctx->dictCtx)) { + LZ4_memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal)); + LZ4HC_setExternalDict(ctx, (const BYTE *)src); + ctx->compressionLevel = (short)cLevel; + return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else { + return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc); + } +} + +static int +LZ4HC_compress_generic ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + limitedOutput_directive limit + ) +{ + if (ctx->dictCtx == NULL) { + return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else { + return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } +} + + +int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); } + +static size_t LZ4_streamHC_t_alignment(void) +{ +#if LZ4_ALIGN_TEST + typedef struct { char c; LZ4_streamHC_t t; } t_a; + return sizeof(t_a) - sizeof(LZ4_streamHC_t); +#else + return 1; /* effectively disabled */ +#endif +} + +/* state is presumed correctly initialized, + * in which case its size and alignment have already been validate */ +int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ + LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse; + if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0; + LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel); + LZ4HC_init_internal (ctx, (const BYTE*)src); + if (dstCapacity < LZ4_compressBound(srcSize)) + return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput); + else + return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited); +} + +int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ + LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); + if (ctx==NULL) return 0; /* init failure */ + return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel); +} + +int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ + int cSize; +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t)); + if (statePtr==NULL) return 0; +#else + LZ4_streamHC_t state; + LZ4_streamHC_t* const statePtr = &state; +#endif + DEBUGLOG(5, "LZ4_compress_HC") + cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel); +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + FREEMEM(statePtr); +#endif + return cSize; +} + +/* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */ +int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel) +{ + LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); + if (ctx==NULL) return 0; /* init failure */ + LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE*) source); + LZ4_setCompressionLevel(ctx, cLevel); + return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, sourceSizePtr, targetDestSize, cLevel, fillOutput); +} + + + +/************************************** +* Streaming Functions +**************************************/ +/* allocation */ +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4_streamHC_t* LZ4_createStreamHC(void) +{ + LZ4_streamHC_t* const state = + (LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t)); + if (state == NULL) return NULL; + LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT); + return state; +} + +int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) +{ + DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr); + if (!LZ4_streamHCPtr) return 0; /* support free on NULL */ + FREEMEM(LZ4_streamHCPtr); + return 0; +} +#endif + + +LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size) +{ + LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer; + DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size); + /* check conditions */ + if (buffer == NULL) return NULL; + if (size < sizeof(LZ4_streamHC_t)) return NULL; + if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL; + /* init */ + { LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse); + MEM_INIT(hcstate, 0, sizeof(*hcstate)); } + LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT); + return LZ4_streamHCPtr; +} + +/* just a stub */ +void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); +} + +void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + LZ4HC_CCtx_internal* const s = &LZ4_streamHCPtr->internal_donotuse; + DEBUGLOG(5, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel); + if (s->dirty) { + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + } else { + assert(s->end >= s->prefixStart); + s->dictLimit += (U32)(s->end - s->prefixStart); + s->prefixStart = NULL; + s->end = NULL; + s->dictCtx = NULL; + } + LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); +} + +void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel); + if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT; + if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; + LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel; +} + +void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor) +{ + LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0); +} + +/* LZ4_loadDictHC() : + * LZ4_streamHCPtr is presumed properly initialized */ +int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, + const char* dictionary, int dictSize) +{ + LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + cParams_t cp; + DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d, clevel=%d)", LZ4_streamHCPtr, dictionary, dictSize, ctxPtr->compressionLevel); + assert(dictSize >= 0); + assert(LZ4_streamHCPtr != NULL); + if (dictSize > 64 KB) { + dictionary += (size_t)dictSize - 64 KB; + dictSize = 64 KB; + } + /* need a full initialization, there are bad side-effects when using resetFast() */ + { int const cLevel = ctxPtr->compressionLevel; + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel); + cp = LZ4HC_getCLevelParams(cLevel); + } + LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary); + ctxPtr->end = (const BYTE*)dictionary + dictSize; + if (cp.strat == lz4mid) { + LZ4MID_fillHTable (ctxPtr, dictionary, (size_t)dictSize); + } else { + if (dictSize >= LZ4HC_HASHSIZE) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); + } + return dictSize; +} + +void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) { + working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL; +} + +/* compression */ + +static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock) +{ + DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock); + if ( (ctxPtr->end >= ctxPtr->prefixStart + 4) + && (LZ4HC_getCLevelParams(ctxPtr->compressionLevel).strat != lz4mid) ) { + LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ + } + + /* Only one memory segment for extDict, so any previous extDict is lost at this stage */ + ctxPtr->lowLimit = ctxPtr->dictLimit; + ctxPtr->dictStart = ctxPtr->prefixStart; + ctxPtr->dictLimit += (U32)(ctxPtr->end - ctxPtr->prefixStart); + ctxPtr->prefixStart = newBlock; + ctxPtr->end = newBlock; + ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */ + + /* cannot reference an extDict and a dictCtx at the same time */ + ctxPtr->dictCtx = NULL; +} + +static int +LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr, + const char* src, char* dst, + int* srcSizePtr, int dstCapacity, + limitedOutput_directive limit) +{ + LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)", + LZ4_streamHCPtr, src, *srcSizePtr, limit); + assert(ctxPtr != NULL); + /* auto-init if forgotten */ + if (ctxPtr->prefixStart == NULL) + LZ4HC_init_internal (ctxPtr, (const BYTE*) src); + + /* Check overflow */ + if ((size_t)(ctxPtr->end - ctxPtr->prefixStart) + ctxPtr->dictLimit > 2 GB) { + size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->prefixStart); + if (dictSize > 64 KB) dictSize = 64 KB; + LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize); + } + + /* Check if blocks follow each other */ + if ((const BYTE*)src != ctxPtr->end) + LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src); + + /* Check overlapping input/dictionary space */ + { const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr; + const BYTE* const dictBegin = ctxPtr->dictStart; + const BYTE* const dictEnd = ctxPtr->dictStart + (ctxPtr->dictLimit - ctxPtr->lowLimit); + if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) { + if (sourceEnd > dictEnd) sourceEnd = dictEnd; + ctxPtr->lowLimit += (U32)(sourceEnd - ctxPtr->dictStart); + ctxPtr->dictStart += (U32)(sourceEnd - ctxPtr->dictStart); + /* invalidate dictionary is it's too small */ + if (ctxPtr->dictLimit - ctxPtr->lowLimit < LZ4HC_HASHSIZE) { + ctxPtr->lowLimit = ctxPtr->dictLimit; + ctxPtr->dictStart = ctxPtr->prefixStart; + } } } + + return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit); +} + +int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity) +{ + DEBUGLOG(5, "LZ4_compress_HC_continue"); + if (dstCapacity < LZ4_compressBound(srcSize)) + return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput); + else + return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited); +} + +int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize) +{ + return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput); +} + + +/* LZ4_saveDictHC : + * save history content + * into a user-provided buffer + * which is then used to continue compression + */ +int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize) +{ + LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse; + int const prefixSize = (int)(streamPtr->end - streamPtr->prefixStart); + DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize); + assert(prefixSize >= 0); + if (dictSize > 64 KB) dictSize = 64 KB; + if (dictSize < 4) dictSize = 0; + if (dictSize > prefixSize) dictSize = prefixSize; + if (safeBuffer == NULL) assert(dictSize == 0); + if (dictSize > 0) + LZ4_memmove(safeBuffer, streamPtr->end - dictSize, (size_t)dictSize); + { U32 const endIndex = (U32)(streamPtr->end - streamPtr->prefixStart) + streamPtr->dictLimit; + streamPtr->end = (safeBuffer == NULL) ? NULL : (const BYTE*)safeBuffer + dictSize; + streamPtr->prefixStart = (const BYTE*)safeBuffer; + streamPtr->dictLimit = endIndex - (U32)dictSize; + streamPtr->lowLimit = endIndex - (U32)dictSize; + streamPtr->dictStart = streamPtr->prefixStart; + if (streamPtr->nextToUpdate < streamPtr->dictLimit) + streamPtr->nextToUpdate = streamPtr->dictLimit; + } + return dictSize; +} + + +/* ================================================ + * LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX]) + * ===============================================*/ +typedef struct { + int price; + int off; + int mlen; + int litlen; +} LZ4HC_optimal_t; + +/* price in bytes */ +LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen) +{ + int price = litlen; + assert(litlen >= 0); + if (litlen >= (int)RUN_MASK) + price += 1 + ((litlen-(int)RUN_MASK) / 255); + return price; +} + +/* requires mlen >= MINMATCH */ +LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) +{ + int price = 1 + 2 ; /* token + 16-bit offset */ + assert(litlen >= 0); + assert(mlen >= MINMATCH); + + price += LZ4HC_literalsPrice(litlen); + + if (mlen >= (int)(ML_MASK+MINMATCH)) + price += 1 + ((mlen-(int)(ML_MASK+MINMATCH)) / 255); + + return price; +} + +LZ4_FORCE_INLINE LZ4HC_match_t +LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, + const BYTE* ip, const BYTE* const iHighLimit, + int minLen, int nbSearches, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) +{ + LZ4HC_match_t const match0 = { 0 , 0, 0 }; + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + * but this won't be the case here, as we define iLowLimit==ip, + ** so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ + LZ4HC_match_t md = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed); + assert(md.back == 0); + if (md.len <= minLen) return match0; + if (favorDecSpeed) { + if ((md.len>18) & (md.len<=36)) md.len=18; /* favor dec.speed (shortcut) */ + } + return md; +} + + +static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, + const char* const source, + char* dst, + int* srcSizePtr, + int dstCapacity, + int const nbSearches, + size_t sufficient_len, + const limitedOutput_directive limit, + int const fullUpdate, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) +{ + int retval = 0; +#define TRAILING_LITERALS 3 +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS)); +#else + LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */ +#endif + + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + *srcSizePtr; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = iend - LASTLITERALS; + BYTE* op = (BYTE*) dst; + BYTE* opSaved = (BYTE*) dst; + BYTE* oend = op + dstCapacity; + int ovml = MINMATCH; /* overflow - last sequence */ + int ovoff = 0; + + /* init */ +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + if (opt == NULL) goto _return_label; +#endif + DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity); + *srcSizePtr = 0; + if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; + + /* Main Loop */ + while (ip <= mflimit) { + int const llen = (int)(ip - anchor); + int best_mlen, best_off; + int cur, last_match_pos = 0; + + LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); + if (firstMatch.len==0) { ip++; continue; } + + if ((size_t)firstMatch.len > sufficient_len) { + /* good enough solution : immediate encoding */ + int const firstML = firstMatch.len; + opSaved = op; + if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, firstMatch.off, limit, oend) ) { /* updates ip, op and anchor */ + ovml = firstML; + ovoff = firstMatch.off; + goto _dest_overflow; + } + continue; + } + + /* set prices for first positions (literals) */ + { int rPos; + for (rPos = 0 ; rPos < MINMATCH ; rPos++) { + int const cost = LZ4HC_literalsPrice(llen + rPos); + opt[rPos].mlen = 1; + opt[rPos].off = 0; + opt[rPos].litlen = llen + rPos; + opt[rPos].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + rPos, cost, opt[rPos].litlen); + } } + /* set prices using initial match */ + { int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ + int const offset = firstMatch.off; + int mlen; + assert(matchML < LZ4_OPT_NUM); + for (mlen = MINMATCH ; mlen <= matchML ; mlen++) { + int const cost = LZ4HC_sequencePrice(llen, mlen); + opt[mlen].mlen = mlen; + opt[mlen].off = offset; + opt[mlen].litlen = llen; + opt[mlen].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", + mlen, cost, mlen); + } } + last_match_pos = firstMatch.len; + { int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } + + /* check further positions */ + for (cur = 1; cur < last_match_pos; cur++) { + const BYTE* const curPtr = ip + cur; + LZ4HC_match_t newMatch; + + if (curPtr > mflimit) break; + DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", + cur, opt[cur].price, opt[cur+1].price, cur+1); + if (fullUpdate) { + /* not useful to search here if next position has same (or lower) cost */ + if ( (opt[cur+1].price <= opt[cur].price) + /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */ + && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) ) + continue; + } else { + /* not useful to search here if next position has same (or lower) cost */ + if (opt[cur+1].price <= opt[cur].price) continue; + } + + DEBUGLOG(7, "search at rPos:%u", cur); + if (fullUpdate) + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); + else + /* only test matches of minimum length; slightly faster, but misses a few bytes */ + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed); + if (!newMatch.len) continue; + + if ( ((size_t)newMatch.len > sufficient_len) + || (newMatch.len + cur >= LZ4_OPT_NUM) ) { + /* immediate encoding */ + best_mlen = newMatch.len; + best_off = newMatch.off; + last_match_pos = cur + 1; + goto encode; + } + + /* before match : set price with literals at beginning */ + { int const baseLitlen = opt[cur].litlen; + int litlen; + for (litlen = 1; litlen < MINMATCH; litlen++) { + int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen); + int const pos = cur + litlen; + if (price < opt[pos].price) { + opt[pos].mlen = 1; /* literal */ + opt[pos].off = 0; + opt[pos].litlen = baseLitlen+litlen; + opt[pos].price = price; + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", + pos, price, opt[pos].litlen); + } } } + + /* set prices using match at position = cur */ + { int const matchML = newMatch.len; + int ml = MINMATCH; + + assert(cur + newMatch.len < LZ4_OPT_NUM); + for ( ; ml <= matchML ; ml++) { + int const pos = cur + ml; + int const offset = newMatch.off; + int price; + int ll; + DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)", + pos, last_match_pos); + if (opt[cur].mlen == 1) { + ll = opt[cur].litlen; + price = ((cur > ll) ? opt[cur - ll].price : 0) + + LZ4HC_sequencePrice(ll, ml); + } else { + ll = 0; + price = opt[cur].price + LZ4HC_sequencePrice(0, ml); + } + + assert((U32)favorDecSpeed <= 1); + if (pos > last_match_pos+TRAILING_LITERALS + || price <= opt[pos].price - (int)favorDecSpeed) { + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", + pos, price, ml); + assert(pos < LZ4_OPT_NUM); + if ( (ml == matchML) /* last pos of last match */ + && (last_match_pos < pos) ) + last_match_pos = pos; + opt[pos].mlen = ml; + opt[pos].off = offset; + opt[pos].litlen = ll; + opt[pos].price = price; + } } } + /* complete following positions with literals */ + { int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } + } /* for (cur = 1; cur <= last_match_pos; cur++) */ + + assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS); + best_mlen = opt[last_match_pos].mlen; + best_off = opt[last_match_pos].off; + cur = last_match_pos - best_mlen; + +encode: /* cur, last_match_pos, best_mlen, best_off must be set */ + assert(cur < LZ4_OPT_NUM); + assert(last_match_pos >= 1); /* == 1 when only one candidate */ + DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos); + { int candidate_pos = cur; + int selected_matchLength = best_mlen; + int selected_offset = best_off; + while (1) { /* from end to beginning */ + int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ + int const next_offset = opt[candidate_pos].off; + DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength); + opt[candidate_pos].mlen = selected_matchLength; + opt[candidate_pos].off = selected_offset; + selected_matchLength = next_matchLength; + selected_offset = next_offset; + if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ + assert(next_matchLength > 0); /* can be 1, means literal */ + candidate_pos -= next_matchLength; + } } + + /* encode all recorded sequences in order */ + { int rPos = 0; /* relative position (to ip) */ + while (rPos < last_match_pos) { + int const ml = opt[rPos].mlen; + int const offset = opt[rPos].off; + if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ + rPos += ml; + assert(ml >= MINMATCH); + assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX)); + opSaved = op; + if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset, limit, oend) ) { /* updates ip, op and anchor */ + ovml = ml; + ovoff = offset; + goto _dest_overflow; + } } } + } /* while (ip <= mflimit) */ + +_last_literals: + /* Encode Last Literals */ + { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; + if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) { /* Check output limit */ + retval = 0; + goto _return_label; + } + /* adapt lastRunSize to fill 'dst' */ + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); + ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + LZ4_memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } + + /* End */ + *srcSizePtr = (int) (((const char*)ip) - source); + retval = (int) ((char*)op-dst); + goto _return_label; + +_dest_overflow: +if (limit == fillOutput) { + /* Assumption : ip, anchor, ovml and ovref must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved)); + op = opSaved; /* restore correct out pointer */ + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); assert(ovml >= 0); + if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) { + DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml); + DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor); + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovoff, notLimited, oend); + DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor); + } } + goto _last_literals; +} +_return_label: +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + if (opt) FREEMEM(opt); +#endif + return retval; +} + + +/*************************************************** +* Deprecated Functions +***************************************************/ + +/* These functions currently generate deprecation warnings */ + +/* Wrappers for deprecated compression functions */ +int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); } +int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); } +int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } +int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); } +int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); } +int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); } +int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } +int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); } +int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); } +int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); } + + +/* Deprecated streaming functions */ +int LZ4_sizeofStreamStateHC(void) { return sizeof(LZ4_streamHC_t); } + +/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t) + * @return : 0 on success, !=0 if error */ +int LZ4_resetStreamStateHC(void* state, char* inputBuffer) +{ + LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4)); + if (hc4 == NULL) return 1; /* init failed */ + LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); + return 0; +} + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +void* LZ4_createHC (const char* inputBuffer) +{ + LZ4_streamHC_t* const hc4 = LZ4_createStreamHC(); + if (hc4 == NULL) return NULL; /* not enough memory */ + LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); + return hc4; +} + +int LZ4_freeHC (void* LZ4HC_Data) +{ + if (!LZ4HC_Data) return 0; /* support free on NULL */ + FREEMEM(LZ4HC_Data); + return 0; +} +#endif + +int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel) +{ + return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited); +} + +int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel) +{ + return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput); +} + +char* LZ4_slideInputBufferHC(void* LZ4HC_Data) +{ + LZ4HC_CCtx_internal* const s = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse; + const BYTE* const bufferStart = s->prefixStart - s->dictLimit + s->lowLimit; + LZ4_resetStreamHC_fast((LZ4_streamHC_t*)LZ4HC_Data, s->compressionLevel); + /* ugly conversion trick, required to evade (const char*) -> (char*) cast-qual warning :( */ + return (char*)(uptrval)bufferStart; +} diff --git a/lib/lz4/lz4hc.h b/lib/lz4/lz4hc.h new file mode 100644 index 000000000000..992bc8cdd770 --- /dev/null +++ b/lib/lz4/lz4hc.h @@ -0,0 +1,414 @@ +/* + LZ4 HC - High Compression Mode of LZ4 + Header File + Copyright (C) 2011-2020, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +#ifndef LZ4_HC_H_19834876238432 +#define LZ4_HC_H_19834876238432 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* --- Dependency --- */ +/* note : lz4hc requires lz4.h/lz4.c for compilation */ +#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */ + + +/* --- Useful constants --- */ +#define LZ4HC_CLEVEL_MIN 2 +#define LZ4HC_CLEVEL_DEFAULT 9 +#define LZ4HC_CLEVEL_OPT_MIN 10 +#define LZ4HC_CLEVEL_MAX 12 + + +/*-************************************ + * Block Compression + **************************************/ +/*! LZ4_compress_HC() : + * Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm. + * `dst` must be already allocated. + * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h") + * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h") + * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work. + * Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX. + * @return : the number of bytes written into 'dst' + * or 0 if compression fails. + */ +LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel); + + +/* Note : + * Decompression functions are provided within "lz4.h" (BSD license) + */ + + +/*! LZ4_compress_HC_extStateHC() : + * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`. + * `state` size is provided by LZ4_sizeofStateHC(). + * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly). + */ +LZ4LIB_API int LZ4_sizeofStateHC(void); +LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel); + + +/*! LZ4_compress_HC_destSize() : v1.9.0+ + * Will compress as much data as possible from `src` + * to fit into `targetDstSize` budget. + * Result is provided in 2 parts : + * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) + * or 0 if compression fails. + * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src` + */ +LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC, + const char* src, char* dst, + int* srcSizePtr, int targetDstSize, + int compressionLevel); + + +/*-************************************ + * Streaming Compression + * Bufferless synchronous API + **************************************/ + typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */ + +/*! LZ4_createStreamHC() and LZ4_freeStreamHC() : + * These functions create and release memory for LZ4 HC streaming state. + * Newly created states are automatically initialized. + * A same state can be used multiple times consecutively, + * starting with LZ4_resetStreamHC_fast() to start a new stream of blocks. + */ +LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void); +LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr); + +/* + These functions compress data in successive blocks of any size, + using previous blocks as dictionary, to improve compression ratio. + One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks. + There is an exception for ring buffers, which can be smaller than 64 KB. + Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue(). + + Before starting compression, state must be allocated and properly initialized. + LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT. + + Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream) + or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental). + LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once, + which is automatically the case when state is created using LZ4_createStreamHC(). + + After reset, a first "fictional block" can be designated as initial dictionary, + using LZ4_loadDictHC() (Optional). + Note: In order for LZ4_loadDictHC() to create the correct data structure, + it is essential to set the compression level _before_ loading the dictionary. + + Invoke LZ4_compress_HC_continue() to compress each successive block. + The number of blocks is unlimited. + Previous input blocks, including initial dictionary when present, + must remain accessible and unmodified during compression. + + It's allowed to update compression level anytime between blocks, + using LZ4_setCompressionLevel() (experimental). + + @dst buffer should be sized to handle worst case scenarios + (see LZ4_compressBound(), it ensures compression success). + In case of failure, the API does not guarantee recovery, + so the state _must_ be reset. + To ensure compression success + whenever @dst buffer size cannot be made >= LZ4_compressBound(), + consider using LZ4_compress_HC_continue_destSize(). + + Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks, + it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC(). + Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB) + + After completing a streaming compression, + it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state, + just by resetting it, using LZ4_resetStreamHC_fast(). +*/ + +LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel); /* v1.9.0+ */ +LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize); + +LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, + const char* src, char* dst, + int srcSize, int maxDstSize); + +/*! LZ4_compress_HC_continue_destSize() : v1.9.0+ + * Similar to LZ4_compress_HC_continue(), + * but will read as much data as possible from `src` + * to fit into `targetDstSize` budget. + * Result is provided into 2 parts : + * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) + * or 0 if compression fails. + * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`. + * Note that this function may not consume the entire input. + */ +LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr, + const char* src, char* dst, + int* srcSizePtr, int targetDstSize); + +LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize); + + +/*! LZ4_attach_HC_dictionary() : stable since v1.10.0 + * This API allows for the efficient re-use of a static dictionary many times. + * + * Rather than re-loading the dictionary buffer into a working context before + * each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a + * working LZ4_streamHC_t, this function introduces a no-copy setup mechanism, + * in which the working stream references the dictionary stream in-place. + * + * Several assumptions are made about the state of the dictionary stream. + * Currently, only streams which have been prepared by LZ4_loadDictHC() should + * be expected to work. + * + * Alternatively, the provided dictionary stream pointer may be NULL, in which + * case any existing dictionary stream is unset. + * + * A dictionary should only be attached to a stream without any history (i.e., + * a stream that has just been reset). + * + * The dictionary will remain attached to the working stream only for the + * current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the + * dictionary context association from the working stream. The dictionary + * stream (and source buffer) must remain in-place / accessible / unchanged + * through the lifetime of the stream session. + */ +LZ4LIB_API void +LZ4_attach_HC_dictionary(LZ4_streamHC_t* working_stream, + const LZ4_streamHC_t* dictionary_stream); + + +/*^********************************************** + * !!!!!! STATIC LINKING ONLY !!!!!! + ***********************************************/ + +/*-****************************************************************** + * PRIVATE DEFINITIONS : + * Do not use these definitions directly. + * They are merely exposed to allow static allocation of `LZ4_streamHC_t`. + * Declare an `LZ4_streamHC_t` directly, rather than any type below. + * Even then, only do so in the context of static linking, as definitions may change between versions. + ********************************************************************/ + +#define LZ4HC_DICTIONARY_LOGSIZE 16 +#define LZ4HC_MAXD (1<= LZ4HC_CLEVEL_OPT_MIN. + */ +LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed( + LZ4_streamHC_t* LZ4_streamHCPtr, int favor); + +/*! LZ4_resetStreamHC_fast() : v1.9.0+ + * When an LZ4_streamHC_t is known to be in a internally coherent state, + * it can often be prepared for a new compression with almost no work, only + * sometimes falling back to the full, expensive reset that is always required + * when the stream is in an indeterminate state (i.e., the reset performed by + * LZ4_resetStreamHC()). + * + * LZ4_streamHCs are guaranteed to be in a valid state when: + * - returned from LZ4_createStreamHC() + * - reset by LZ4_resetStreamHC() + * - memset(stream, 0, sizeof(LZ4_streamHC_t)) + * - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast() + * - the stream was in a valid state and was then used in any compression call + * that returned success + * - the stream was in an indeterminate state and was used in a compression + * call that fully reset the state (LZ4_compress_HC_extStateHC()) and that + * returned success + * + * Note: + * A stream that was last used in a compression call that returned an error + * may be passed to this function. However, it will be fully reset, which will + * clear any existing history and settings from the context. + */ +LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast( + LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); + +/*! LZ4_compress_HC_extStateHC_fastReset() : + * A variant of LZ4_compress_HC_extStateHC(). + * + * Using this variant avoids an expensive initialization step. It is only safe + * to call if the state buffer is known to be correctly initialized already + * (see above comment on LZ4_resetStreamHC_fast() for a definition of + * "correctly initialized"). From a high level, the difference is that this + * function initializes the provided state with a call to + * LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a + * call to LZ4_resetStreamHC(). + */ +LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset ( + void* state, + const char* src, char* dst, + int srcSize, int dstCapacity, + int compressionLevel); + +#if defined (__cplusplus) +} +#endif + +#endif /* LZ4_HC_SLO_098092834 */ +#endif /* LZ4_HC_STATIC_LINKING_ONLY */ diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c deleted file mode 100644 index ccbf0e89d713..000000000000 --- a/lib/lz4/lz4hc_compress.c +++ /dev/null @@ -1,769 +0,0 @@ -/* - * LZ4 HC - High Compression Mode of LZ4 - * Copyright (C) 2011-2015, Yann Collet. - * - * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php) - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * You can contact the author at : - * - LZ4 homepage : http://www.lz4.org - * - LZ4 source repository : https://github.com/lz4/lz4 - * - * Changed for kernel usage by: - * Sven Schmidt <4sschmid@informatik.uni-hamburg.de> - */ - -/*-************************************ - * Dependencies - **************************************/ -#include -#include "lz4defs.h" -#include -#include -#include /* memset */ - -/* ************************************* - * Local Constants and types - ***************************************/ - -#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH) - -#define HASH_FUNCTION(i) (((i) * 2654435761U) \ - >> ((MINMATCH*8) - LZ4HC_HASH_LOG)) -#define DELTANEXTU16(p) chainTable[(U16)(p)] /* faster */ - -static U32 LZ4HC_hashPtr(const void *ptr) -{ - return HASH_FUNCTION(LZ4_read32(ptr)); -} - -/************************************** - * HC Compression - **************************************/ -static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start) -{ - memset((void *)hc4->hashTable, 0, sizeof(hc4->hashTable)); - memset(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); - hc4->nextToUpdate = 64 * KB; - hc4->base = start - 64 * KB; - hc4->end = start; - hc4->dictBase = start - 64 * KB; - hc4->dictLimit = 64 * KB; - hc4->lowLimit = 64 * KB; -} - -/* Update chains up to ip (excluded) */ -static FORCE_INLINE void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4, - const BYTE *ip) -{ - U16 * const chainTable = hc4->chainTable; - U32 * const hashTable = hc4->hashTable; - const BYTE * const base = hc4->base; - U32 const target = (U32)(ip - base); - U32 idx = hc4->nextToUpdate; - - while (idx < target) { - U32 const h = LZ4HC_hashPtr(base + idx); - size_t delta = idx - hashTable[h]; - - if (delta > MAX_DISTANCE) - delta = MAX_DISTANCE; - - DELTANEXTU16(idx) = (U16)delta; - - hashTable[h] = idx; - idx++; - } - - hc4->nextToUpdate = target; -} - -static FORCE_INLINE int LZ4HC_InsertAndFindBestMatch( - LZ4HC_CCtx_internal *hc4, /* Index table will be updated */ - const BYTE *ip, - const BYTE * const iLimit, - const BYTE **matchpos, - const int maxNbAttempts) -{ - U16 * const chainTable = hc4->chainTable; - U32 * const HashTable = hc4->hashTable; - const BYTE * const base = hc4->base; - const BYTE * const dictBase = hc4->dictBase; - const U32 dictLimit = hc4->dictLimit; - const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base)) - ? hc4->lowLimit - : (U32)(ip - base) - (64 * KB - 1); - U32 matchIndex; - int nbAttempts = maxNbAttempts; - size_t ml = 0; - - /* HC4 match finder */ - LZ4HC_Insert(hc4, ip); - matchIndex = HashTable[LZ4HC_hashPtr(ip)]; - - while ((matchIndex >= lowLimit) - && (nbAttempts)) { - nbAttempts--; - if (matchIndex >= dictLimit) { - const BYTE * const match = base + matchIndex; - - if (*(match + ml) == *(ip + ml) - && (LZ4_read32(match) == LZ4_read32(ip))) { - size_t const mlt = LZ4_count(ip + MINMATCH, - match + MINMATCH, iLimit) + MINMATCH; - - if (mlt > ml) { - ml = mlt; - *matchpos = match; - } - } - } else { - const BYTE * const match = dictBase + matchIndex; - - if (LZ4_read32(match) == LZ4_read32(ip)) { - size_t mlt; - const BYTE *vLimit = ip - + (dictLimit - matchIndex); - - if (vLimit > iLimit) - vLimit = iLimit; - mlt = LZ4_count(ip + MINMATCH, - match + MINMATCH, vLimit) + MINMATCH; - if ((ip + mlt == vLimit) - && (vLimit < iLimit)) - mlt += LZ4_count(ip + mlt, - base + dictLimit, - iLimit); - if (mlt > ml) { - /* virtual matchpos */ - ml = mlt; - *matchpos = base + matchIndex; - } - } - } - matchIndex -= DELTANEXTU16(matchIndex); - } - - return (int)ml; -} - -static FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch( - LZ4HC_CCtx_internal *hc4, - const BYTE * const ip, - const BYTE * const iLowLimit, - const BYTE * const iHighLimit, - int longest, - const BYTE **matchpos, - const BYTE **startpos, - const int maxNbAttempts) -{ - U16 * const chainTable = hc4->chainTable; - U32 * const HashTable = hc4->hashTable; - const BYTE * const base = hc4->base; - const U32 dictLimit = hc4->dictLimit; - const BYTE * const lowPrefixPtr = base + dictLimit; - const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base)) - ? hc4->lowLimit - : (U32)(ip - base) - (64 * KB - 1); - const BYTE * const dictBase = hc4->dictBase; - U32 matchIndex; - int nbAttempts = maxNbAttempts; - int delta = (int)(ip - iLowLimit); - - /* First Match */ - LZ4HC_Insert(hc4, ip); - matchIndex = HashTable[LZ4HC_hashPtr(ip)]; - - while ((matchIndex >= lowLimit) - && (nbAttempts)) { - nbAttempts--; - if (matchIndex >= dictLimit) { - const BYTE *matchPtr = base + matchIndex; - - if (*(iLowLimit + longest) - == *(matchPtr - delta + longest)) { - if (LZ4_read32(matchPtr) == LZ4_read32(ip)) { - int mlt = MINMATCH + LZ4_count( - ip + MINMATCH, - matchPtr + MINMATCH, - iHighLimit); - int back = 0; - - while ((ip + back > iLowLimit) - && (matchPtr + back > lowPrefixPtr) - && (ip[back - 1] == matchPtr[back - 1])) - back--; - - mlt -= back; - - if (mlt > longest) { - longest = (int)mlt; - *matchpos = matchPtr + back; - *startpos = ip + back; - } - } - } - } else { - const BYTE * const matchPtr = dictBase + matchIndex; - - if (LZ4_read32(matchPtr) == LZ4_read32(ip)) { - size_t mlt; - int back = 0; - const BYTE *vLimit = ip + (dictLimit - matchIndex); - - if (vLimit > iHighLimit) - vLimit = iHighLimit; - - mlt = LZ4_count(ip + MINMATCH, - matchPtr + MINMATCH, vLimit) + MINMATCH; - - if ((ip + mlt == vLimit) && (vLimit < iHighLimit)) - mlt += LZ4_count(ip + mlt, base + dictLimit, - iHighLimit); - while ((ip + back > iLowLimit) - && (matchIndex + back > lowLimit) - && (ip[back - 1] == matchPtr[back - 1])) - back--; - - mlt -= back; - - if ((int)mlt > longest) { - longest = (int)mlt; - *matchpos = base + matchIndex + back; - *startpos = ip + back; - } - } - } - - matchIndex -= DELTANEXTU16(matchIndex); - } - - return longest; -} - -static FORCE_INLINE int LZ4HC_encodeSequence( - const BYTE **ip, - BYTE **op, - const BYTE **anchor, - int matchLength, - const BYTE * const match, - limitedOutput_directive limitedOutputBuffer, - BYTE *oend) -{ - int length; - BYTE *token; - - /* Encode Literal length */ - length = (int)(*ip - *anchor); - token = (*op)++; - - if ((limitedOutputBuffer) - && ((*op + (length>>8) - + length + (2 + 1 + LASTLITERALS)) > oend)) { - /* Check output limit */ - return 1; - } - if (length >= (int)RUN_MASK) { - int len; - - *token = (RUN_MASK< 254 ; len -= 255) - *(*op)++ = 255; - *(*op)++ = (BYTE)len; - } else - *token = (BYTE)(length<>8) - + (1 + LASTLITERALS) > oend)) { - /* Check output limit */ - return 1; - } - - if (length >= (int)ML_MASK) { - *token += ML_MASK; - length -= ML_MASK; - - for (; length > 509 ; length -= 510) { - *(*op)++ = 255; - *(*op)++ = 255; - } - - if (length > 254) { - length -= 255; - *(*op)++ = 255; - } - - *(*op)++ = (BYTE)length; - } else - *token += (BYTE)(length); - - /* Prepare next loop */ - *ip += matchLength; - *anchor = *ip; - - return 0; -} - -static int LZ4HC_compress_generic( - LZ4HC_CCtx_internal *const ctx, - const char * const source, - char * const dest, - int const inputSize, - int const maxOutputSize, - int compressionLevel, - limitedOutput_directive limit - ) -{ - const BYTE *ip = (const BYTE *) source; - const BYTE *anchor = ip; - const BYTE * const iend = ip + inputSize; - const BYTE * const mflimit = iend - MFLIMIT; - const BYTE * const matchlimit = (iend - LASTLITERALS); - - BYTE *op = (BYTE *) dest; - BYTE * const oend = op + maxOutputSize; - - unsigned int maxNbAttempts; - int ml, ml2, ml3, ml0; - const BYTE *ref = NULL; - const BYTE *start2 = NULL; - const BYTE *ref2 = NULL; - const BYTE *start3 = NULL; - const BYTE *ref3 = NULL; - const BYTE *start0; - const BYTE *ref0; - - /* init */ - if (compressionLevel > LZ4HC_MAX_CLEVEL) - compressionLevel = LZ4HC_MAX_CLEVEL; - if (compressionLevel < 1) - compressionLevel = LZ4HC_DEFAULT_CLEVEL; - maxNbAttempts = 1 << (compressionLevel - 1); - ctx->end += inputSize; - - ip++; - - /* Main Loop */ - while (ip < mflimit) { - ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, - matchlimit, (&ref), maxNbAttempts); - if (!ml) { - ip++; - continue; - } - - /* saved, in case we would skip too much */ - start0 = ip; - ref0 = ref; - ml0 = ml; - -_Search2: - if (ip + ml < mflimit) - ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, - ip + ml - 2, ip + 0, - matchlimit, ml, &ref2, - &start2, maxNbAttempts); - else - ml2 = ml; - - if (ml2 == ml) { - /* No better match */ - if (LZ4HC_encodeSequence(&ip, &op, - &anchor, ml, ref, limit, oend)) - return 0; - continue; - } - - if (start0 < ip) { - if (start2 < ip + ml0) { - /* empirical */ - ip = start0; - ref = ref0; - ml = ml0; - } - } - - /* Here, start0 == ip */ - if ((start2 - ip) < 3) { - /* First Match too small : removed */ - ml = ml2; - ip = start2; - ref = ref2; - goto _Search2; - } - -_Search3: - /* - * Currently we have : - * ml2 > ml1, and - * ip1 + 3 <= ip2 (usually < ip1 + ml1) - */ - if ((start2 - ip) < OPTIMAL_ML) { - int correction; - int new_ml = ml; - - if (new_ml > OPTIMAL_ML) - new_ml = OPTIMAL_ML; - if (ip + new_ml > start2 + ml2 - MINMATCH) - new_ml = (int)(start2 - ip) + ml2 - MINMATCH; - - correction = new_ml - (int)(start2 - ip); - - if (correction > 0) { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } - /* - * Now, we have start2 = ip + new_ml, - * with new_ml = min(ml, OPTIMAL_ML = 18) - */ - - if (start2 + ml2 < mflimit) - ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, - start2 + ml2 - 3, start2, - matchlimit, ml2, &ref3, &start3, - maxNbAttempts); - else - ml3 = ml2; - - if (ml3 == ml2) { - /* No better match : 2 sequences to encode */ - /* ip & ref are known; Now for ml */ - if (start2 < ip + ml) - ml = (int)(start2 - ip); - /* Now, encode 2 sequences */ - if (LZ4HC_encodeSequence(&ip, &op, &anchor, - ml, ref, limit, oend)) - return 0; - ip = start2; - if (LZ4HC_encodeSequence(&ip, &op, &anchor, - ml2, ref2, limit, oend)) - return 0; - continue; - } - - if (start3 < ip + ml + 3) { - /* Not enough space for match 2 : remove it */ - if (start3 >= (ip + ml)) { - /* can write Seq1 immediately - * ==> Seq2 is removed, - * so Seq3 becomes Seq1 - */ - if (start2 < ip + ml) { - int correction = (int)(ip + ml - start2); - - start2 += correction; - ref2 += correction; - ml2 -= correction; - if (ml2 < MINMATCH) { - start2 = start3; - ref2 = ref3; - ml2 = ml3; - } - } - - if (LZ4HC_encodeSequence(&ip, &op, &anchor, - ml, ref, limit, oend)) - return 0; - ip = start3; - ref = ref3; - ml = ml3; - - start0 = start2; - ref0 = ref2; - ml0 = ml2; - goto _Search2; - } - - start2 = start3; - ref2 = ref3; - ml2 = ml3; - goto _Search3; - } - - /* - * OK, now we have 3 ascending matches; - * let's write at least the first one - * ip & ref are known; Now for ml - */ - if (start2 < ip + ml) { - if ((start2 - ip) < (int)ML_MASK) { - int correction; - - if (ml > OPTIMAL_ML) - ml = OPTIMAL_ML; - if (ip + ml > start2 + ml2 - MINMATCH) - ml = (int)(start2 - ip) + ml2 - MINMATCH; - correction = ml - (int)(start2 - ip); - if (correction > 0) { - start2 += correction; - ref2 += correction; - ml2 -= correction; - } - } else - ml = (int)(start2 - ip); - } - if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml, - ref, limit, oend)) - return 0; - - ip = start2; - ref = ref2; - ml = ml2; - - start2 = start3; - ref2 = ref3; - ml2 = ml3; - - goto _Search3; - } - - /* Encode Last Literals */ - { - int lastRun = (int)(iend - anchor); - - if ((limit) - && (((char *)op - dest) + lastRun + 1 - + ((lastRun + 255 - RUN_MASK)/255) - > (U32)maxOutputSize)) { - /* Check output limit */ - return 0; - } - if (lastRun >= (int)RUN_MASK) { - *op++ = (RUN_MASK< 254 ; lastRun -= 255) - *op++ = 255; - *op++ = (BYTE) lastRun; - } else - *op++ = (BYTE)(lastRun<internal_donotuse; - - if (((size_t)(state)&(sizeof(void *) - 1)) != 0) { - /* Error : state is not aligned - * for pointers (32 or 64 bits) - */ - return 0; - } - - LZ4HC_init(ctx, (const BYTE *)src); - - if (maxDstSize < LZ4_compressBound(srcSize)) - return LZ4HC_compress_generic(ctx, src, dst, - srcSize, maxDstSize, compressionLevel, limitedOutput); - else - return LZ4HC_compress_generic(ctx, src, dst, - srcSize, maxDstSize, compressionLevel, noLimit); -} - -int LZ4_compress_HC(const char *src, char *dst, int srcSize, - int maxDstSize, int compressionLevel, void *wrkmem) -{ - return LZ4_compress_HC_extStateHC(wrkmem, src, dst, - srcSize, maxDstSize, compressionLevel); -} -EXPORT_SYMBOL(LZ4_compress_HC); - -/************************************** - * Streaming Functions - **************************************/ -void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel) -{ - LZ4_streamHCPtr->internal_donotuse.base = NULL; - LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel; -} - -int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr, - const char *dictionary, - int dictSize) -{ - LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse; - - if (dictSize > 64 * KB) { - dictionary += dictSize - 64 * KB; - dictSize = 64 * KB; - } - LZ4HC_init(ctxPtr, (const BYTE *)dictionary); - if (dictSize >= 4) - LZ4HC_Insert(ctxPtr, (const BYTE *)dictionary + (dictSize - 3)); - ctxPtr->end = (const BYTE *)dictionary + dictSize; - return dictSize; -} -EXPORT_SYMBOL(LZ4_loadDictHC); - -/* compression */ - -static void LZ4HC_setExternalDict( - LZ4HC_CCtx_internal *ctxPtr, - const BYTE *newBlock) -{ - if (ctxPtr->end >= ctxPtr->base + 4) { - /* Referencing remaining dictionary content */ - LZ4HC_Insert(ctxPtr, ctxPtr->end - 3); - } - - /* - * Only one memory segment for extDict, - * so any previous extDict is lost at this stage - */ - ctxPtr->lowLimit = ctxPtr->dictLimit; - ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base); - ctxPtr->dictBase = ctxPtr->base; - ctxPtr->base = newBlock - ctxPtr->dictLimit; - ctxPtr->end = newBlock; - /* match referencing will resume from there */ - ctxPtr->nextToUpdate = ctxPtr->dictLimit; -} -EXPORT_SYMBOL(LZ4HC_setExternalDict); - -static int LZ4_compressHC_continue_generic( - LZ4_streamHC_t *LZ4_streamHCPtr, - const char *source, - char *dest, - int inputSize, - int maxOutputSize, - limitedOutput_directive limit) -{ - LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse; - - /* auto - init if forgotten */ - if (ctxPtr->base == NULL) - LZ4HC_init(ctxPtr, (const BYTE *) source); - - /* Check overflow */ - if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 * GB) { - size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base) - - ctxPtr->dictLimit; - if (dictSize > 64 * KB) - dictSize = 64 * KB; - LZ4_loadDictHC(LZ4_streamHCPtr, - (const char *)(ctxPtr->end) - dictSize, (int)dictSize); - } - - /* Check if blocks follow each other */ - if ((const BYTE *)source != ctxPtr->end) - LZ4HC_setExternalDict(ctxPtr, (const BYTE *)source); - - /* Check overlapping input/dictionary space */ - { - const BYTE *sourceEnd = (const BYTE *) source + inputSize; - const BYTE * const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit; - const BYTE * const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit; - - if ((sourceEnd > dictBegin) - && ((const BYTE *)source < dictEnd)) { - if (sourceEnd > dictEnd) - sourceEnd = dictEnd; - ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase); - - if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) - ctxPtr->lowLimit = ctxPtr->dictLimit; - } - } - - return LZ4HC_compress_generic(ctxPtr, source, dest, - inputSize, maxOutputSize, ctxPtr->compressionLevel, limit); -} - -int LZ4_compress_HC_continue( - LZ4_streamHC_t *LZ4_streamHCPtr, - const char *source, - char *dest, - int inputSize, - int maxOutputSize) -{ - if (maxOutputSize < LZ4_compressBound(inputSize)) - return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, - source, dest, inputSize, maxOutputSize, limitedOutput); - else - return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, - source, dest, inputSize, maxOutputSize, noLimit); -} -EXPORT_SYMBOL(LZ4_compress_HC_continue); - -/* dictionary saving */ - -int LZ4_saveDictHC( - LZ4_streamHC_t *LZ4_streamHCPtr, - char *safeBuffer, - int dictSize) -{ - LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse; - int const prefixSize = (int)(streamPtr->end - - (streamPtr->base + streamPtr->dictLimit)); - - if (dictSize > 64 * KB) - dictSize = 64 * KB; - if (dictSize < 4) - dictSize = 0; - if (dictSize > prefixSize) - dictSize = prefixSize; - - memmove(safeBuffer, streamPtr->end - dictSize, dictSize); - - { - U32 const endIndex = (U32)(streamPtr->end - streamPtr->base); - - streamPtr->end = (const BYTE *)safeBuffer + dictSize; - streamPtr->base = streamPtr->end - endIndex; - streamPtr->dictLimit = endIndex - dictSize; - streamPtr->lowLimit = endIndex - dictSize; - - if (streamPtr->nextToUpdate < streamPtr->dictLimit) - streamPtr->nextToUpdate = streamPtr->dictLimit; - } - return dictSize; -} -EXPORT_SYMBOL(LZ4_saveDictHC); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("LZ4 HC compressor"); From 34caf531417e050987c89d54001fef79ceab1396 Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Sun, 13 Apr 2025 20:19:38 +0300 Subject: [PATCH 06/85] lz4: adapt to Linux kernel A quick benchmark shows this improves zram performance by 3.8% in 4K blocks, 3.4% in 1M blocks. Signed-off-by: Juhyung Park --- include/linux/lz4.h | 17 +++++- lib/lz4/Makefile | 4 ++ lib/lz4/lz4.c | 137 +++++--------------------------------------- lib/lz4/lz4.h | 46 +++++++++------ lib/lz4/lz4hc.c | 51 +++-------------- lib/lz4/lz4hc.h | 2 +- 6 files changed, 71 insertions(+), 186 deletions(-) mode change 120000 => 100644 include/linux/lz4.h create mode 100644 lib/lz4/Makefile diff --git a/include/linux/lz4.h b/include/linux/lz4.h deleted file mode 120000 index 9ff890762b05..000000000000 --- a/include/linux/lz4.h +++ /dev/null @@ -1 +0,0 @@ -../../lib/lz4/lz4.h \ No newline at end of file diff --git a/include/linux/lz4.h b/include/linux/lz4.h new file mode 100644 index 000000000000..580a9caa5cc7 --- /dev/null +++ b/include/linux/lz4.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: BSD-2-Clause */ +// LZ4 compatibility wrapper for Linux kernel + +#ifndef __LINUX_LZ4_H__ +#define __LINUX_LZ4_H__ + +#include "../../lib/lz4/lz4.h" +#include "../../lib/lz4/lz4hc.h" + +#define LZ4_MEM_COMPRESS LZ4_STREAM_MINSIZE +#define LZ4HC_MEM_COMPRESS LZ4_STREAMHC_MINSIZE + +#define LZ4HC_MIN_CLEVEL LZ4HC_CLEVEL_MIN +#define LZ4HC_MAX_CLEVEL LZ4HC_CLEVEL_MAX + +#endif diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile new file mode 100644 index 000000000000..63b2279c5a2d --- /dev/null +++ b/lib/lz4/Makefile @@ -0,0 +1,4 @@ +ccflags-y += -O3 \ + -DLZ4_FREESTANDING=1 + +obj-y += lz4.o lz4hc.o diff --git a/lib/lz4/lz4.c b/lib/lz4/lz4.c index a2f7abee19fb..0dcabf4d54bd 100644 --- a/lib/lz4/lz4.c +++ b/lib/lz4/lz4.c @@ -173,13 +173,6 @@ # define expect(expr,value) (expr) #endif -#ifndef likely -#define likely(expr) expect((expr) != 0, 1) -#endif -#ifndef unlikely -#define unlikely(expr) expect((expr) != 0, 0) -#endif - /* Should the alignment test prove unreliable, for some reason, * it can be disabled by setting LZ4_ALIGN_TEST to 0 */ #ifndef LZ4_ALIGN_TEST /* can be externally provided */ @@ -191,44 +184,8 @@ * Memory routines **************************************/ -/*! LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION : - * Disable relatively high-level LZ4/HC functions that use dynamic memory - * allocation functions (malloc(), calloc(), free()). - * - * Note that this is a compile-time switch. And since it disables - * public/stable LZ4 v1 API functions, we don't recommend using this - * symbol to generate a library for distribution. - * - * The following public functions are removed when this symbol is defined. - * - lz4 : LZ4_createStream, LZ4_freeStream, - * LZ4_createStreamDecode, LZ4_freeStreamDecode, LZ4_create (deprecated) - * - lz4hc : LZ4_createStreamHC, LZ4_freeStreamHC, - * LZ4_createHC (deprecated), LZ4_freeHC (deprecated) - * - lz4frame, lz4file : All LZ4F_* functions - */ -#if defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -# define ALLOC(s) lz4_error_memory_allocation_is_disabled -# define ALLOC_AND_ZERO(s) lz4_error_memory_allocation_is_disabled -# define FREEMEM(p) lz4_error_memory_allocation_is_disabled -#elif defined(LZ4_USER_MEMORY_FUNCTIONS) -/* memory management functions can be customized by user project. - * Below functions must exist somewhere in the Project - * and be available at link time */ -void* LZ4_malloc(size_t s); -void* LZ4_calloc(size_t n, size_t s); -void LZ4_free(void* p); -# define ALLOC(s) LZ4_malloc(s) -# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s) -# define FREEMEM(p) LZ4_free(p) -#else -# include /* malloc, calloc, free */ -# define ALLOC(s) malloc(s) -# define ALLOC_AND_ZERO(s) calloc(1,s) -# define FREEMEM(p) free(p) -#endif - #if ! LZ4_FREESTANDING -# include /* memset, memcpy */ +# include /* memset, memcpy */ #endif #if !defined(LZ4_memset) # define LZ4_memset(p,v,s) memset((p),(v),(s)) @@ -298,26 +255,13 @@ static int LZ4_isAligned(const void* ptr, size_t alignment) /*-************************************ * Types **************************************/ -#include -#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; - typedef uintptr_t uptrval; -#else -# if UINT_MAX != 4294967295UL -# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" -# endif - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; - typedef size_t uptrval; /* generally true, except OpenVMS-64 */ -#endif +#include +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; +typedef uintptr_t uptrval; #if defined(__x86_64__) typedef U64 reg_t; /* 64-bits in x32 mode */ @@ -332,34 +276,6 @@ typedef enum { } limitedOutput_directive; -/*-************************************ -* Reading and writing into memory -**************************************/ - -/** - * LZ4 relies on memcpy with a constant size being inlined. In freestanding - * environments, the compiler can't assume the implementation of memcpy() is - * standard compliant, so it can't apply its specialized memcpy() inlining - * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze - * memcpy() as if it were standard compliant, so it can inline it in freestanding - * environments. This is needed when decompressing the Linux Kernel, for example. - */ -#if !defined(LZ4_memcpy) -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) -# else -# define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) -# endif -#endif - -#if !defined(LZ4_memmove) -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define LZ4_memmove __builtin_memmove -# else -# define LZ4_memmove memmove -# endif -#endif - static unsigned LZ4_isLittleEndian(void) { const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ @@ -1469,10 +1385,11 @@ int LZ4_compress_fast(const char* src, char* dest, int srcSize, int dstCapacity, } -int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity) +int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity, void *wrkmem) { - return LZ4_compress_fast(src, dst, srcSize, dstCapacity, 1); + return LZ4_compress_fast_extState(wrkmem, src, dst, srcSize, dstCapacity, 1); } +EXPORT_SYMBOL(LZ4_compress_default); /* Note!: This function leaves the stream in an unclean/broken state! @@ -1649,6 +1566,7 @@ int LZ4_loadDict(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) { return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_fast); } +EXPORT_SYMBOL(LZ4_loadDict); int LZ4_loadDictSlow(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) { @@ -1832,6 +1750,7 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) return dictSize; } +EXPORT_SYMBOL(LZ4_saveDict); @@ -2756,36 +2675,6 @@ int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSi return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize); } - -/*=************************************************* -* Obsolete Functions -***************************************************/ -/* obsolete compression functions */ -int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) -{ - return LZ4_compress_default(source, dest, inputSize, maxOutputSize); -} -int LZ4_compress(const char* src, char* dest, int srcSize) -{ - return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize)); -} -int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) -{ - return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); -} -int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) -{ - return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); -} -int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity) -{ - return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1); -} -int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) -{ - return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); -} - /* These decompression functions are deprecated and should no longer be used. They are only provided here for compatibility with older user programs. diff --git a/lib/lz4/lz4.h b/lib/lz4/lz4.h index 80e3e5ca04d2..905e7757d186 100644 --- a/lib/lz4/lz4.h +++ b/lib/lz4/lz4.h @@ -39,10 +39,6 @@ extern "C" { #ifndef LZ4_H_2983827168210 #define LZ4_H_2983827168210 -/* --- Dependency --- */ -#include /* size_t */ - - /** Introduction @@ -73,6 +69,12 @@ extern "C" { The `lz4` CLI can only manage frames. */ +#include +#include +#include + +#define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) + /*^*************************************************************** * Export parameters *****************************************************************/ @@ -97,6 +99,22 @@ extern "C" { # define LZ4LIB_API LZ4LIB_VISIBILITY #endif +/*-************************************ +* Reading and writing into memory +**************************************/ + +/** + * LZ4 relies on memcpy with a constant size being inlined. In freestanding + * environments, the compiler can't assume the implementation of memcpy() is + * standard compliant, so it can't apply its specialized memcpy() inlining + * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze + * memcpy() as if it were standard compliant, so it can inline it in freestanding + * environments. This is needed when decompressing the Linux Kernel, for example. + */ +#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) +#define LZ4_memset(dst, src, size) __builtin_memset(dst, src, size) +#define LZ4_memmove(dst, src, size) __builtin_memmove(dst, src, size) + /*! LZ4_FREESTANDING : * When this macro is set to 1, it enables "freestanding mode" that is * suitable for typical freestanding environment which doesn't support @@ -188,7 +206,7 @@ LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; * or 0 if compression fails * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer). */ -LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity); +LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity, void *wrkmem); /*! LZ4_decompress_safe() : * @compressedSize : is the exact complete size of the compressed block. @@ -696,18 +714,12 @@ int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* #define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) #define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ -#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# include - typedef int8_t LZ4_i8; - typedef uint8_t LZ4_byte; - typedef uint16_t LZ4_u16; - typedef uint32_t LZ4_u32; -#else - typedef signed char LZ4_i8; - typedef unsigned char LZ4_byte; - typedef unsigned short LZ4_u16; - typedef unsigned int LZ4_u32; -#endif +#include +#include +typedef int8_t LZ4_i8; +typedef uint8_t LZ4_byte; +typedef uint16_t LZ4_u16; +typedef uint32_t LZ4_u32; /*! LZ4_stream_t : * Never ever use below internal definitions directly ! diff --git a/lib/lz4/lz4hc.c b/lib/lz4/lz4hc.c index 41d2399037c5..ba8efff7dd67 100644 --- a/lib/lz4/lz4hc.c +++ b/lib/lz4/lz4hc.c @@ -1,4 +1,3 @@ - /* LZ4 HC - High Compression Mode of LZ4 Copyright (C) 2011-2020, Yann Collet. @@ -39,6 +38,11 @@ * Tuning Parameter ***************************************/ +/*=== Dependency ===*/ +#define LZ4_HC_STATIC_LINKING_ONLY +#include "lz4hc.h" + + /*! HEAPMODE : * Select how stateless HC compression functions like `LZ4_compress_HC()` * allocate memory for their workspace: @@ -50,12 +54,6 @@ #endif -/*=== Dependency ===*/ -#define LZ4_HC_STATIC_LINKING_ONLY -#include "lz4hc.h" -#include - - /*=== Shared lz4.c code ===*/ #ifndef LZ4_SRC_INCLUDED # if defined(__GNUC__) @@ -1517,23 +1515,12 @@ int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int src return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel); } -int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel, void *wrkmem) { - int cSize; -#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 - LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t)); - if (statePtr==NULL) return 0; -#else - LZ4_streamHC_t state; - LZ4_streamHC_t* const statePtr = &state; -#endif DEBUGLOG(5, "LZ4_compress_HC") - cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel); -#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 - FREEMEM(statePtr); -#endif - return cSize; + return LZ4_compress_HC_extStateHC(wrkmem, src, dst, srcSize, dstCapacity, compressionLevel); } +EXPORT_SYMBOL(LZ4_compress_HC); /* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */ int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel) @@ -2124,28 +2111,6 @@ _return_label: } -/*************************************************** -* Deprecated Functions -***************************************************/ - -/* These functions currently generate deprecation warnings */ - -/* Wrappers for deprecated compression functions */ -int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); } -int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); } -int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } -int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); } -int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); } -int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); } -int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } -int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); } -int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); } -int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); } - - -/* Deprecated streaming functions */ -int LZ4_sizeofStreamStateHC(void) { return sizeof(LZ4_streamHC_t); } - /* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t) * @return : 0 on success, !=0 if error */ int LZ4_resetStreamStateHC(void* state, char* inputBuffer) diff --git a/lib/lz4/lz4hc.h b/lib/lz4/lz4hc.h index 992bc8cdd770..bed6793534c8 100644 --- a/lib/lz4/lz4hc.h +++ b/lib/lz4/lz4hc.h @@ -63,7 +63,7 @@ extern "C" { * @return : the number of bytes written into 'dst' * or 0 if compression fails. */ -LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel); +LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel, void *wrkmem) /* Note : From 1ee5706da6dd048e418bb7e886ebf63f95f40d5a Mon Sep 17 00:00:00 2001 From: Chenyang Zhong Date: Sun, 13 Apr 2025 20:20:22 +0300 Subject: [PATCH 07/85] lz4: enable LZ4_FAST_DEC_LOOP on aarch64 Clang builds Upstream lz4 mentioned a performance regression on Qualcomm SoCs when built with Clang, but not with GCC [1]. However, according to my testing on sm8350 with LLVM Clang 15, this patch does offer a nice 10% boost in decompression, so enable the fast dec loop for Clang as well. Testing procedure: - pre-fill zram with 1GB of real-word zram data dumped under memory pressure, for example $ dd if=/sdcard/zram.test of=/dev/block/zram0 bs=1m count=1000 - $ fio --readonly --name=randread --direct=1 --rw=randread \ --ioengine=psync --randrepeat=0 --numjobs=4 --iodepth=1 \ --group_reporting=1 --filename=/dev/block/zram0 --bs=4K --size=1000M Results: - vanilla lz4: read: IOPS=1646k, BW=6431MiB/s (6743MB/s)(4000MiB/622msec) - lz4 fast dec: read: IOPS=1775k, BW=6932MiB/s (7269MB/s)(4000MiB/577msec) [1] lz4/lz4#707 Signed-off-by: Chenyang Zhong Signed-off-by: Juhyung Park --- lib/lz4/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile index 63b2279c5a2d..cc0f596aa108 100644 --- a/lib/lz4/Makefile +++ b/lib/lz4/Makefile @@ -1,4 +1,5 @@ ccflags-y += -O3 \ - -DLZ4_FREESTANDING=1 + -DLZ4_FREESTANDING=1 \ + -DLZ4_FAST_DEC_LOOP=1 obj-y += lz4.o lz4hc.o From d65ac21a9508aca5c83146ac2f43f4bd186cf7e6 Mon Sep 17 00:00:00 2001 From: EmanuelCN Date: Sun, 13 Apr 2025 20:26:46 +0300 Subject: [PATCH 08/85] lz4: Run clang-format --- lib/lz4/lz4.c | 4301 +++++++++++++++++++++++++++-------------------- lib/lz4/lz4.h | 386 +++-- lib/lz4/lz4hc.c | 3998 +++++++++++++++++++++++++------------------ lib/lz4/lz4hc.h | 225 +-- 4 files changed, 5183 insertions(+), 3727 deletions(-) diff --git a/lib/lz4/lz4.c b/lib/lz4/lz4.c index 0dcabf4d54bd..bb5c90add31a 100644 --- a/lib/lz4/lz4.c +++ b/lib/lz4/lz4.c @@ -42,7 +42,7 @@ * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). */ #ifndef LZ4_HEAPMODE -# define LZ4_HEAPMODE 0 +#define LZ4_HEAPMODE 0 #endif /* @@ -57,7 +57,6 @@ */ #define LZ4_ACCELERATION_MAX 65537 - /*-************************************ * CPU Feature Detection **************************************/ @@ -74,27 +73,28 @@ * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. * Prefer these methods in priority order (0 > 1 > 2) */ -#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ -# if defined(__GNUC__) && \ - ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ - || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define LZ4_FORCE_MEMORY_ACCESS 2 -# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) || defined(_MSC_VER) -# define LZ4_FORCE_MEMORY_ACCESS 1 -# endif +#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ +#if defined(__GNUC__) && \ + (defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__)) +#define LZ4_FORCE_MEMORY_ACCESS 2 +#elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) || \ + defined(_MSC_VER) +#define LZ4_FORCE_MEMORY_ACCESS 1 +#endif #endif /* * LZ4_FORCE_SW_BITCOUNT * Define this parameter if your target system or compiler does not support hardware bit count */ -#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */ -# undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */ -# define LZ4_FORCE_SW_BITCOUNT +#if defined(_MSC_VER) && \ + defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */ +#undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */ +#define LZ4_FORCE_SW_BITCOUNT #endif - - /*-************************************ * Dependency **************************************/ @@ -103,46 +103,51 @@ * Amalgamation flag, whether lz4.c is included */ #ifndef LZ4_SRC_INCLUDED -# define LZ4_SRC_INCLUDED 1 +#define LZ4_SRC_INCLUDED 1 #endif #ifndef LZ4_DISABLE_DEPRECATE_WARNINGS -# define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */ +#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */ #endif #ifndef LZ4_STATIC_LINKING_ONLY -# define LZ4_STATIC_LINKING_ONLY +#define LZ4_STATIC_LINKING_ONLY #endif #include "lz4.h" /* see also "memory routines" below */ - /*-************************************ * Compiler Options **************************************/ -#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */ -# include /* only present in VS2005+ */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 6237) /* disable: C6237: conditional expression is always 0 */ -# pragma warning(disable : 6239) /* disable: C6239: ( && ) always evaluates to the result of */ -# pragma warning(disable : 6240) /* disable: C6240: ( && ) always evaluates to the result of */ -# pragma warning(disable : 6326) /* disable: C6326: Potential comparison of a constant with another constant */ -#endif /* _MSC_VER */ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */ +#include /* only present in VS2005+ */ +#pragma warning( \ + disable : 4127) /* disable: C4127: conditional expression is constant */ +#pragma warning( \ + disable : 6237) /* disable: C6237: conditional expression is always 0 */ +#pragma warning( \ + disable : 6239) /* disable: C6239: ( && ) always evaluates to the result of */ +#pragma warning( \ + disable : 6240) /* disable: C6240: ( && ) always evaluates to the result of */ +#pragma warning( \ + disable : 6326) /* disable: C6326: Potential comparison of a constant with another constant */ +#endif /* _MSC_VER */ #ifndef LZ4_FORCE_INLINE -# if defined (_MSC_VER) && !defined (__clang__) /* MSVC */ -# define LZ4_FORCE_INLINE static __forceinline -# else -# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# if defined (__GNUC__) || defined (__clang__) -# define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) -# else -# define LZ4_FORCE_INLINE static inline -# endif -# else -# define LZ4_FORCE_INLINE static -# endif /* __STDC_VERSION__ */ -# endif /* _MSC_VER */ +#if defined(_MSC_VER) && !defined(__clang__) /* MSVC */ +#define LZ4_FORCE_INLINE static __forceinline +#else +#if defined(__cplusplus) || \ + defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +#if defined(__GNUC__) || defined(__clang__) +#define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) +#else +#define LZ4_FORCE_INLINE static inline +#endif +#else +#define LZ4_FORCE_INLINE static +#endif /* __STDC_VERSION__ */ +#endif /* _MSC_VER */ #endif /* LZ4_FORCE_INLINE */ /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE @@ -159,39 +164,41 @@ * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute * of LZ4_wildCopy8 does not affect the compression speed. */ -#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) -# define LZ4_FORCE_O2 __attribute__((optimize("O2"))) -# undef LZ4_FORCE_INLINE -# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) +#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && \ + !defined(__clang__) +#define LZ4_FORCE_O2 __attribute__((optimize("O2"))) +#undef LZ4_FORCE_INLINE +#define LZ4_FORCE_INLINE \ + static __inline __attribute__((optimize("O2"), always_inline)) #else -# define LZ4_FORCE_O2 +#define LZ4_FORCE_O2 #endif -#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) -# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#if (defined(__GNUC__) && (__GNUC__ >= 3)) || \ + (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || \ + defined(__clang__) +#define expect(expr, value) (__builtin_expect((expr), (value))) #else -# define expect(expr,value) (expr) +#define expect(expr, value) (expr) #endif /* Should the alignment test prove unreliable, for some reason, * it can be disabled by setting LZ4_ALIGN_TEST to 0 */ -#ifndef LZ4_ALIGN_TEST /* can be externally provided */ -# define LZ4_ALIGN_TEST 1 +#ifndef LZ4_ALIGN_TEST /* can be externally provided */ +#define LZ4_ALIGN_TEST 1 #endif - /*-************************************ * Memory routines **************************************/ -#if ! LZ4_FREESTANDING -# include /* memset, memcpy */ +#if !LZ4_FREESTANDING +#include /* memset, memcpy */ #endif #if !defined(LZ4_memset) -# define LZ4_memset(p,v,s) memset((p),(v),(s)) +#define LZ4_memset(p, v, s) memset((p), (v), (s)) #endif -#define MEM_INIT(p,v,s) LZ4_memset((p),(v),(s)) - +#define MEM_INIT(p, v, s) LZ4_memset((p), (v), (s)) /*-************************************ * Common Constants @@ -199,106 +206,132 @@ #define MINMATCH 4 #define WILDCOPYLENGTH 8 -#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ -#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ -#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ +#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MATCH_SAFEGUARD_DISTANCE \ + ((2 * WILDCOPYLENGTH) - \ + MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ #define FASTLOOP_SAFE_DISTANCE 64 -static const int LZ4_minLength = (MFLIMIT+1); +static const int LZ4_minLength = (MFLIMIT + 1); -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) +#define KB *(1 << 10) +#define MB *(1 << 20) +#define GB *(1U << 30) #define LZ4_DISTANCE_ABSOLUTE_MAX 65535 -#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ -# error "LZ4_DISTANCE_MAX is too big : must be <= 65535" +#if (LZ4_DISTANCE_MAX > \ + LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ +#error "LZ4_DISTANCE_MAX is too big : must be <= 65535" #endif -#define ML_BITS 4 -#define ML_MASK ((1U<=1) -# include +#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 1) +#include #else -# ifndef assert -# define assert(condition) ((void)0) -# endif +#ifndef assert +#define assert(condition) ((void)0) +#endif #endif -#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */ +#define LZ4_STATIC_ASSERT(c) \ + { \ + enum { LZ4_static_assert = 1 / (int)(!!(c)) }; \ + } /* use after variable declarations */ -#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) -# include - static int g_debuglog_enable = 1; -# define DEBUGLOG(l, ...) { \ - if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ - fprintf(stderr, __FILE__ " %i: ", __LINE__); \ - fprintf(stderr, __VA_ARGS__); \ - fprintf(stderr, " \n"); \ - } } +#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 2) +#include +static int g_debuglog_enable = 1; +#define DEBUGLOG(l, ...) \ + { \ + if ((g_debuglog_enable) && (l <= LZ4_DEBUG)) { \ + fprintf(stderr, __FILE__ " %i: ", __LINE__); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } \ + } #else -# define DEBUGLOG(l, ...) {} /* disabled */ +#define DEBUGLOG(l, ...) \ + { \ + } /* disabled */ #endif -static int LZ4_isAligned(const void* ptr, size_t alignment) +static int LZ4_isAligned(const void *ptr, size_t alignment) { - return ((size_t)ptr & (alignment -1)) == 0; + return ((size_t)ptr & (alignment - 1)) == 0; } - /*-************************************ * Types **************************************/ #include -typedef uint8_t BYTE; +typedef uint8_t BYTE; typedef uint16_t U16; typedef uint32_t U32; -typedef int32_t S32; +typedef int32_t S32; typedef uint64_t U64; typedef uintptr_t uptrval; #if defined(__x86_64__) - typedef U64 reg_t; /* 64-bits in x32 mode */ +typedef U64 reg_t; /* 64-bits in x32 mode */ #else - typedef size_t reg_t; /* 32-bits in x32 mode */ +typedef size_t reg_t; /* 32-bits in x32 mode */ #endif typedef enum { - notLimited = 0, - limitedOutput = 1, - fillOutput = 2 + notLimited = 0, + limitedOutput = 1, + fillOutput = 2 } limitedOutput_directive; - static unsigned LZ4_isLittleEndian(void) { - const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ - return one.c[0]; + const union { + U32 u; + BYTE c[4]; + } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; } #if defined(__GNUC__) || defined(__INTEL_COMPILER) -#define LZ4_PACK( __Declaration__ ) __Declaration__ __attribute__((__packed__)) +#define LZ4_PACK(__Declaration__) __Declaration__ __attribute__((__packed__)) #elif defined(_MSC_VER) -#define LZ4_PACK( __Declaration__ ) __pragma( pack(push, 1) ) __Declaration__ __pragma( pack(pop)) +#define LZ4_PACK(__Declaration__) \ + __pragma(pack(push, 1)) __Declaration__ __pragma(pack(pop)) #endif -#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) +#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS == 2) /* lie to the compiler about data alignment; use with caution */ -static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } -static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } -static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; } +static U16 LZ4_read16(const void *memPtr) +{ + return *(const U16 *)memPtr; +} +static U32 LZ4_read32(const void *memPtr) +{ + return *(const U32 *)memPtr; +} +static reg_t LZ4_read_ARCH(const void *memPtr) +{ + return *(const reg_t *)memPtr; +} -static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } -static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +static void LZ4_write16(void *memPtr, U16 value) +{ + *(U16 *)memPtr = value; +} +static void LZ4_write32(void *memPtr, U32 value) +{ + *(U32 *)memPtr = value; +} -#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) +#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS == 1) /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ /* currently only defined for gcc and icc */ @@ -306,326 +339,389 @@ LZ4_PACK(typedef struct { U16 u16; }) LZ4_unalign16; LZ4_PACK(typedef struct { U32 u32; }) LZ4_unalign32; LZ4_PACK(typedef struct { reg_t uArch; }) LZ4_unalignST; -static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign16*)ptr)->u16; } -static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign32*)ptr)->u32; } -static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalignST*)ptr)->uArch; } - -static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign16*)memPtr)->u16 = value; } -static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign32*)memPtr)->u32 = value; } - -#else /* safe and portable access using memcpy() */ - -static U16 LZ4_read16(const void* memPtr) +static U16 LZ4_read16(const void *ptr) { - U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; + return ((const LZ4_unalign16 *)ptr)->u16; +} +static U32 LZ4_read32(const void *ptr) +{ + return ((const LZ4_unalign32 *)ptr)->u32; +} +static reg_t LZ4_read_ARCH(const void *ptr) +{ + return ((const LZ4_unalignST *)ptr)->uArch; } -static U32 LZ4_read32(const void* memPtr) +static void LZ4_write16(void *memPtr, U16 value) { - U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; + ((LZ4_unalign16 *)memPtr)->u16 = value; +} +static void LZ4_write32(void *memPtr, U32 value) +{ + ((LZ4_unalign32 *)memPtr)->u32 = value; } -static reg_t LZ4_read_ARCH(const void* memPtr) +#else /* safe and portable access using memcpy() */ + +static U16 LZ4_read16(const void *memPtr) { - reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; + U16 val; + LZ4_memcpy(&val, memPtr, sizeof(val)); + return val; } -static void LZ4_write16(void* memPtr, U16 value) +static U32 LZ4_read32(const void *memPtr) { - LZ4_memcpy(memPtr, &value, sizeof(value)); + U32 val; + LZ4_memcpy(&val, memPtr, sizeof(val)); + return val; } -static void LZ4_write32(void* memPtr, U32 value) +static reg_t LZ4_read_ARCH(const void *memPtr) { - LZ4_memcpy(memPtr, &value, sizeof(value)); + reg_t val; + LZ4_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +static void LZ4_write16(void *memPtr, U16 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} + +static void LZ4_write32(void *memPtr, U32 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); } #endif /* LZ4_FORCE_MEMORY_ACCESS */ - -static U16 LZ4_readLE16(const void* memPtr) +static U16 LZ4_readLE16(const void *memPtr) { - if (LZ4_isLittleEndian()) { - return LZ4_read16(memPtr); - } else { - const BYTE* p = (const BYTE*)memPtr; - return (U16)((U16)p[0] | (p[1]<<8)); - } + if (LZ4_isLittleEndian()) { + return LZ4_read16(memPtr); + } else { + const BYTE *p = (const BYTE *)memPtr; + return (U16)((U16)p[0] | (p[1] << 8)); + } } #ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT -static U32 LZ4_readLE32(const void* memPtr) +static U32 LZ4_readLE32(const void *memPtr) { - if (LZ4_isLittleEndian()) { - return LZ4_read32(memPtr); - } else { - const BYTE* p = (const BYTE*)memPtr; - return (U32)p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24); - } + if (LZ4_isLittleEndian()) { + return LZ4_read32(memPtr); + } else { + const BYTE *p = (const BYTE *)memPtr; + return (U32)p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24); + } } #endif -static void LZ4_writeLE16(void* memPtr, U16 value) +static void LZ4_writeLE16(void *memPtr, U16 value) { - if (LZ4_isLittleEndian()) { - LZ4_write16(memPtr, value); - } else { - BYTE* p = (BYTE*)memPtr; - p[0] = (BYTE) value; - p[1] = (BYTE)(value>>8); - } + if (LZ4_isLittleEndian()) { + LZ4_write16(memPtr, value); + } else { + BYTE *p = (BYTE *)memPtr; + p[0] = (BYTE)value; + p[1] = (BYTE)(value >> 8); + } } /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ LZ4_FORCE_INLINE -void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) +void LZ4_wildCopy8(void *dstPtr, const void *srcPtr, void *dstEnd) { - BYTE* d = (BYTE*)dstPtr; - const BYTE* s = (const BYTE*)srcPtr; - BYTE* const e = (BYTE*)dstEnd; + BYTE *d = (BYTE *)dstPtr; + const BYTE *s = (const BYTE *)srcPtr; + BYTE *const e = (BYTE *)dstEnd; - do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d= 16. */ -LZ4_FORCE_INLINE void -LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) +LZ4_FORCE_INLINE void LZ4_wildCopy32(void *dstPtr, const void *srcPtr, + void *dstEnd) { - BYTE* d = (BYTE*)dstPtr; - const BYTE* s = (const BYTE*)srcPtr; - BYTE* const e = (BYTE*)dstEnd; + BYTE *d = (BYTE *)dstPtr; + const BYTE *s = (const BYTE *)srcPtr; + BYTE *const e = (BYTE *)dstEnd; - do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d= dstPtr + MINMATCH * - there is at least 12 bytes available to write after dstEnd */ -LZ4_FORCE_INLINE void -LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) +LZ4_FORCE_INLINE void LZ4_memcpy_using_offset(BYTE *dstPtr, const BYTE *srcPtr, + BYTE *dstEnd, const size_t offset) { - BYTE v[8]; + BYTE v[8]; - assert(dstEnd >= dstPtr + MINMATCH); + assert(dstEnd >= dstPtr + MINMATCH); - switch(offset) { - case 1: - MEM_INIT(v, *srcPtr, 8); - break; - case 2: - LZ4_memcpy(v, srcPtr, 2); - LZ4_memcpy(&v[2], srcPtr, 2); + switch (offset) { + case 1: + MEM_INIT(v, *srcPtr, 8); + break; + case 2: + LZ4_memcpy(v, srcPtr, 2); + LZ4_memcpy(&v[2], srcPtr, 2); #if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */ -# pragma warning(push) -# pragma warning(disable : 6385) /* warning C6385: Reading invalid data from 'v'. */ +#pragma warning(push) +#pragma warning( \ + disable : 6385) /* warning C6385: Reading invalid data from 'v'. */ #endif - LZ4_memcpy(&v[4], v, 4); + LZ4_memcpy(&v[4], v, 4); #if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */ -# pragma warning(pop) +#pragma warning(pop) #endif - break; - case 4: - LZ4_memcpy(v, srcPtr, 4); - LZ4_memcpy(&v[4], srcPtr, 4); - break; - default: - LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); - return; - } + break; + case 4: + LZ4_memcpy(v, srcPtr, 4); + LZ4_memcpy(&v[4], srcPtr, 4); + break; + default: + LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); + return; + } - LZ4_memcpy(dstPtr, v, 8); - dstPtr += 8; - while (dstPtr < dstEnd) { - LZ4_memcpy(dstPtr, v, 8); - dstPtr += 8; - } + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + while (dstPtr < dstEnd) { + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + } } #endif - /*-************************************ * Common functions **************************************/ -static unsigned LZ4_NbCommonBytes (reg_t val) +static unsigned LZ4_NbCommonBytes(reg_t val) { - assert(val != 0); - if (LZ4_isLittleEndian()) { - if (sizeof(val) == 8) { -# if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT) + assert(val != 0); + if (LZ4_isLittleEndian()) { + if (sizeof(val) == 8) { +#if defined(_MSC_VER) && (_MSC_VER >= 1800) && \ + (defined(_M_AMD64) && !defined(_M_ARM64EC)) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) /*-************************************************************************************************* * ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11. * The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics * including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC. ****************************************************************************************************/ -# if defined(__clang__) && (__clang_major__ < 10) - /* Avoid undefined clang-cl intrinsics issue. +#if defined(__clang__) && (__clang_major__ < 10) + /* Avoid undefined clang-cl intrinsics issue. * See https://github.com/lz4/lz4/pull/1017 for details. */ - return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3; -# else - /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */ - return (unsigned)_tzcnt_u64(val) >> 3; -# endif -# elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanForward64(&r, (U64)val); - return (unsigned)r >> 3; -# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ - ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ - !defined(LZ4_FORCE_SW_BITCOUNT) - return (unsigned)__builtin_ctzll((U64)val) >> 3; -# else - const U64 m = 0x0101010101010101ULL; - val ^= val - 1; - return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56); -# endif - } else /* 32 bits */ { -# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward(&r, (U32)val); - return (unsigned)r >> 3; -# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ - ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ - !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (unsigned)__builtin_ctz((U32)val) >> 3; -# else - const U32 m = 0x01010101; - return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24; -# endif - } - } else /* Big Endian CPU */ { - if (sizeof(val)==8) { -# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ - ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ - !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) - return (unsigned)__builtin_clzll((U64)val) >> 3; -# else -#if 1 - /* this method is probably faster, - * but adds a 128 bytes lookup table */ - static const unsigned char ctz7_tab[128] = { - 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - }; - U64 const mask = 0x0101010101010101ULL; - U64 const t = (((val >> 8) - mask) | val) & mask; - return ctz7_tab[(t * 0x0080402010080402ULL) >> 57]; + return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3; #else - /* this method doesn't consume memory space like the previous one, + /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */ + return (unsigned)_tzcnt_u64(val) >> 3; +#endif +#elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64(&r, (U64)val); + return (unsigned)r >> 3; +#elif (defined(__clang__) || \ + (defined(__GNUC__) && \ + ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctzll((U64)val) >> 3; +#else + const U64 m = 0x0101010101010101ULL; + val ^= val - 1; + return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56); +#endif + } else /* 32 bits */ { +#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward(&r, (U32)val); + return (unsigned)r >> 3; +#elif (defined(__clang__) || \ + (defined(__GNUC__) && \ + ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctz((U32)val) >> 3; +#else + const U32 m = 0x01010101; + return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> + 24; +#endif + } + } else /* Big Endian CPU */ { + if (sizeof(val) == 8) { +#if (defined(__clang__) || \ + (defined(__GNUC__) && \ + ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clzll((U64)val) >> 3; +#else +#if 1 + /* this method is probably faster, + * but adds a 128 bytes lookup table */ + static const unsigned char ctz7_tab[128] = { + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + }; + U64 const mask = 0x0101010101010101ULL; + U64 const t = (((val >> 8) - mask) | val) & mask; + return ctz7_tab[(t * 0x0080402010080402ULL) >> 57]; +#else + /* this method doesn't consume memory space like the previous one, * but it contains several branches, * that may end up slowing execution */ - static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits. + static const U32 by32 = + sizeof(val) * + 4; /* 32 on 64 bits (goal), 16 on 32 bits. Just to avoid some static analyzer complaining about shift by 32 on 32-bits target. Note that this code path is never triggered in 32-bits mode. */ - unsigned r; - if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; } - if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } - r += (!val); - return r; + unsigned r; + if (!(val >> by32)) { + r = 4; + } else { + r = 0; + val >>= by32; + } + if (!(val >> 16)) { + r += 2; + val >>= 8; + } else { + val >>= 24; + } + r += (!val); + return r; #endif -# endif - } else /* 32 bits */ { -# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ - ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ - !defined(LZ4_FORCE_SW_BITCOUNT) - return (unsigned)__builtin_clz((U32)val) >> 3; -# else - val >>= 8; - val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | - (val + 0x00FF0000)) >> 24; - return (unsigned)val ^ 3; -# endif - } - } +#endif + } else /* 32 bits */ { +#if (defined(__clang__) || \ + (defined(__GNUC__) && \ + ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clz((U32)val) >> 3; +#else + val >>= 8; + val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | + (val + 0x00FF0000)) >> + 24; + return (unsigned)val ^ 3; +#endif + } + } } - #define STEPSIZE sizeof(reg_t) LZ4_FORCE_INLINE -unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +unsigned LZ4_count(const BYTE *pIn, const BYTE *pMatch, const BYTE *pInLimit) { - const BYTE* const pStart = pIn; + const BYTE *const pStart = pIn; - if (likely(pIn < pInLimit-(STEPSIZE-1))) { - reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); - if (!diff) { - pIn+=STEPSIZE; pMatch+=STEPSIZE; - } else { - return LZ4_NbCommonBytes(diff); - } } + if (likely(pIn < pInLimit - (STEPSIZE - 1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { + pIn += STEPSIZE; + pMatch += STEPSIZE; + } else { + return LZ4_NbCommonBytes(diff); + } + } - while (likely(pIn < pInLimit-(STEPSIZE-1))) { - reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); - if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } - pIn += LZ4_NbCommonBytes(diff); - return (unsigned)(pIn - pStart); - } + while (likely(pIn < pInLimit - (STEPSIZE - 1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { + pIn += STEPSIZE; + pMatch += STEPSIZE; + continue; + } + pIn += LZ4_NbCommonBytes(diff); + return (unsigned)(pIn - pStart); + } - if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; } - if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; } - if ((pIn compression run slower on incompressible data */ - +static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT - 1)); +static const U32 LZ4_skipTrigger = + 6; /* Increase this value ==> compression run slower on incompressible data */ /*-************************************ * Local Structures and types @@ -655,35 +751,54 @@ typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; * content, and matches are found by looking in the ctx * ->dictCtx->hashTable. */ -typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; +typedef enum { + noDict = 0, + withPrefix64k, + usingExtDict, + usingDictCtx +} dict_directive; typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; - /*-************************************ * Local Utils **************************************/ -int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } -const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; } -int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } -int LZ4_sizeofState(void) { return sizeof(LZ4_stream_t); } - +int LZ4_versionNumber(void) +{ + return LZ4_VERSION_NUMBER; +} +const char *LZ4_versionString(void) +{ + return LZ4_VERSION_STRING; +} +int LZ4_compressBound(int isize) +{ + return LZ4_COMPRESSBOUND(isize); +} +int LZ4_sizeofState(void) +{ + return sizeof(LZ4_stream_t); +} /*-**************************************** * Internal Definitions, used only in Tests *******************************************/ -#if defined (__cplusplus) +#if defined(__cplusplus) extern "C" { #endif -int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize); +int LZ4_compress_forceExtDict(LZ4_stream_t *LZ4_dict, const char *source, + char *dest, int srcSize); -int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, - int compressedSize, int maxOutputSize, - const void* dictStart, size_t dictSize); -int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest, - int compressedSize, int targetOutputSize, int dstCapacity, - const void* dictStart, size_t dictSize); -#if defined (__cplusplus) +int LZ4_decompress_safe_forceExtDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + const void *dictStart, size_t dictSize); +int LZ4_decompress_safe_partial_forceExtDict(const char *source, char *dest, + int compressedSize, + int targetOutputSize, + int dstCapacity, + const void *dictStart, + size_t dictSize); +#if defined(__cplusplus) } #endif @@ -692,72 +807,109 @@ int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest, ********************************/ LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType) { - if (tableType == byU16) - return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); - else - return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); + if (tableType == byU16) + return ((sequence * 2654435761U) >> + ((MINMATCH * 8) - (LZ4_HASHLOG + 1))); + else + return ((sequence * 2654435761U) >> + ((MINMATCH * 8) - LZ4_HASHLOG)); } LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType) { - const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; - if (LZ4_isLittleEndian()) { - const U64 prime5bytes = 889523592379ULL; - return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); - } else { - const U64 prime8bytes = 11400714785074694791ULL; - return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); - } + const U32 hashLog = + (tableType == byU16) ? LZ4_HASHLOG + 1 : LZ4_HASHLOG; + if (LZ4_isLittleEndian()) { + const U64 prime5bytes = 889523592379ULL; + return (U32)(((sequence << 24) * prime5bytes) >> + (64 - hashLog)); + } else { + const U64 prime8bytes = 11400714785074694791ULL; + return (U32)(((sequence >> 24) * prime8bytes) >> + (64 - hashLog)); + } } -LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType) +LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void *const p, + tableType_t const tableType) { - if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType); + if ((sizeof(reg_t) == 8) && (tableType != byU16)) + return LZ4_hash5(LZ4_read_ARCH(p), tableType); #ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT - return LZ4_hash4(LZ4_readLE32(p), tableType); + return LZ4_hash4(LZ4_readLE32(p), tableType); #else - return LZ4_hash4(LZ4_read32(p), tableType); + return LZ4_hash4(LZ4_read32(p), tableType); #endif } -LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType) +LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void *tableBase, + tableType_t const tableType) { - switch (tableType) - { - default: /* fallthrough */ - case clearedTable: { /* illegal! */ assert(0); return; } - case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; } - case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; } - case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; } - } + switch (tableType) { + default: /* fallthrough */ + case clearedTable: { /* illegal! */ + assert(0); + return; + } + case byPtr: { + const BYTE **hashTable = (const BYTE **)tableBase; + hashTable[h] = NULL; + return; + } + case byU32: { + U32 *hashTable = (U32 *)tableBase; + hashTable[h] = 0; + return; + } + case byU16: { + U16 *hashTable = (U16 *)tableBase; + hashTable[h] = 0; + return; + } + } } -LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType) +LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void *tableBase, + tableType_t const tableType) { - switch (tableType) - { - default: /* fallthrough */ - case clearedTable: /* fallthrough */ - case byPtr: { /* illegal! */ assert(0); return; } - case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; } - case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; } - } + switch (tableType) { + default: /* fallthrough */ + case clearedTable: /* fallthrough */ + case byPtr: { /* illegal! */ + assert(0); + return; + } + case byU32: { + U32 *hashTable = (U32 *)tableBase; + hashTable[h] = idx; + return; + } + case byU16: { + U16 *hashTable = (U16 *)tableBase; + assert(idx < 65536); + hashTable[h] = (U16)idx; + return; + } + } } /* LZ4_putPosition*() : only used in byPtr mode */ -LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, - void* tableBase, tableType_t const tableType) +LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE *p, U32 h, + void *tableBase, + tableType_t const tableType) { - const BYTE** const hashTable = (const BYTE**)tableBase; - assert(tableType == byPtr); (void)tableType; - hashTable[h] = p; + const BYTE **const hashTable = (const BYTE **)tableBase; + assert(tableType == byPtr); + (void)tableType; + hashTable[h] = p; } -LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType) +LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE *p, void *tableBase, + tableType_t tableType) { - U32 const h = LZ4_hashPosition(p, tableType); - LZ4_putPositionOnHash(p, h, tableBase, tableType); + U32 const h = LZ4_hashPosition(p, tableType); + LZ4_putPositionOnHash(p, h, tableBase, tableType); } /* LZ4_getIndexOnHash() : @@ -766,75 +918,83 @@ LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_ * Assumption 1 : only valid if tableType == byU32 or byU16. * Assumption 2 : h is presumed valid (within limits of hash table) */ -LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType) +LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void *tableBase, + tableType_t tableType) { - LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2); - if (tableType == byU32) { - const U32* const hashTable = (const U32*) tableBase; - assert(h < (1U << (LZ4_MEMORY_USAGE-2))); - return hashTable[h]; - } - if (tableType == byU16) { - const U16* const hashTable = (const U16*) tableBase; - assert(h < (1U << (LZ4_MEMORY_USAGE-1))); - return hashTable[h]; - } - assert(0); return 0; /* forbidden case */ + LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2); + if (tableType == byU32) { + const U32 *const hashTable = (const U32 *)tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE - 2))); + return hashTable[h]; + } + if (tableType == byU16) { + const U16 *const hashTable = (const U16 *)tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE - 1))); + return hashTable[h]; + } + assert(0); + return 0; /* forbidden case */ } -static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType) +static const BYTE *LZ4_getPositionOnHash(U32 h, const void *tableBase, + tableType_t tableType) { - assert(tableType == byPtr); (void)tableType; - { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; } + assert(tableType == byPtr); + (void)tableType; + { + const BYTE *const *hashTable = (const BYTE *const *)tableBase; + return hashTable[h]; + } } -LZ4_FORCE_INLINE const BYTE* -LZ4_getPosition(const BYTE* p, - const void* tableBase, tableType_t tableType) +LZ4_FORCE_INLINE const BYTE * +LZ4_getPosition(const BYTE *p, const void *tableBase, tableType_t tableType) { - U32 const h = LZ4_hashPosition(p, tableType); - return LZ4_getPositionOnHash(h, tableBase, tableType); + U32 const h = LZ4_hashPosition(p, tableType); + return LZ4_getPositionOnHash(h, tableBase, tableType); } -LZ4_FORCE_INLINE void -LZ4_prepareTable(LZ4_stream_t_internal* const cctx, - const int inputSize, - const tableType_t tableType) { - /* If the table hasn't been used, it's guaranteed to be zeroed out, and is +LZ4_FORCE_INLINE void LZ4_prepareTable(LZ4_stream_t_internal *const cctx, + const int inputSize, + const tableType_t tableType) +{ + /* If the table hasn't been used, it's guaranteed to be zeroed out, and is * therefore safe to use no matter what mode we're in. Otherwise, we figure * out if it's safe to leave as is or whether it needs to be reset. */ - if ((tableType_t)cctx->tableType != clearedTable) { - assert(inputSize >= 0); - if ((tableType_t)cctx->tableType != tableType - || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU) - || ((tableType == byU32) && cctx->currentOffset > 1 GB) - || tableType == byPtr - || inputSize >= 4 KB) - { - DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx); - MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE); - cctx->currentOffset = 0; - cctx->tableType = (U32)clearedTable; - } else { - DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)"); - } - } + if ((tableType_t)cctx->tableType != clearedTable) { + assert(inputSize >= 0); + if ((tableType_t)cctx->tableType != tableType || + ((tableType == byU16) && + cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU) || + ((tableType == byU32) && cctx->currentOffset > 1 GB) || + tableType == byPtr || inputSize >= 4 KB) { + DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", + cctx); + MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE); + cctx->currentOffset = 0; + cctx->tableType = (U32)clearedTable; + } else { + DEBUGLOG( + 4, + "LZ4_prepareTable: Re-use hash table (no reset)"); + } + } - /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, + /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, * is faster than compressing without a gap. * However, compressing with currentOffset == 0 is faster still, * so we preserve that case. */ - if (cctx->currentOffset != 0 && tableType == byU32) { - DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset"); - cctx->currentOffset += 64 KB; - } + if (cctx->currentOffset != 0 && tableType == byU32) { + DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset"); + cctx->currentOffset += 64 KB; + } - /* Finally, clear history */ - cctx->dictCtx = NULL; - cctx->dictionary = NULL; - cctx->dictSize = 0; + /* Finally, clear history */ + cctx->dictCtx = NULL; + cctx->dictionary = NULL; + cctx->dictSize = 0; } /** LZ4_compress_generic_validated() : @@ -844,215 +1004,276 @@ LZ4_prepareTable(LZ4_stream_t_internal* const cctx, * - inputSize > 0 */ LZ4_FORCE_INLINE int LZ4_compress_generic_validated( - LZ4_stream_t_internal* const cctx, - const char* const source, - char* const dest, - const int inputSize, - int* inputConsumed, /* only written when outputDirective == fillOutput */ - const int maxOutputSize, - const limitedOutput_directive outputDirective, - const tableType_t tableType, - const dict_directive dictDirective, - const dictIssue_directive dictIssue, - const int acceleration) + LZ4_stream_t_internal *const cctx, const char *const source, + char *const dest, const int inputSize, + int *inputConsumed, /* only written when outputDirective == fillOutput */ + const int maxOutputSize, const limitedOutput_directive outputDirective, + const tableType_t tableType, const dict_directive dictDirective, + const dictIssue_directive dictIssue, const int acceleration) { - int result; - const BYTE* ip = (const BYTE*)source; + int result; + const BYTE *ip = (const BYTE *)source; - U32 const startIndex = cctx->currentOffset; - const BYTE* base = (const BYTE*)source - startIndex; - const BYTE* lowLimit; + U32 const startIndex = cctx->currentOffset; + const BYTE *base = (const BYTE *)source - startIndex; + const BYTE *lowLimit; - const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx; - const BYTE* const dictionary = - dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary; - const U32 dictSize = - dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize; - const U32 dictDelta = - (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0; /* make indexes in dictCtx comparable with indexes in current context */ + const LZ4_stream_t_internal *dictCtx = + (const LZ4_stream_t_internal *)cctx->dictCtx; + const BYTE *const dictionary = dictDirective == usingDictCtx ? + dictCtx->dictionary : + cctx->dictionary; + const U32 dictSize = dictDirective == usingDictCtx ? dictCtx->dictSize : + cctx->dictSize; + const U32 dictDelta = + (dictDirective == usingDictCtx) ? + startIndex - dictCtx->currentOffset : + 0; /* make indexes in dictCtx comparable with indexes in current context */ - int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx); - U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */ - const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary; - const BYTE* anchor = (const BYTE*) source; - const BYTE* const iend = ip + inputSize; - const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1; - const BYTE* const matchlimit = iend - LASTLITERALS; + int const maybe_extMem = (dictDirective == usingExtDict) || + (dictDirective == usingDictCtx); + U32 const prefixIdxLimit = + startIndex - + dictSize; /* used when dictDirective == dictSmall */ + const BYTE *const dictEnd = + dictionary ? dictionary + dictSize : dictionary; + const BYTE *anchor = (const BYTE *)source; + const BYTE *const iend = ip + inputSize; + const BYTE *const mflimitPlusOne = iend - MFLIMIT + 1; + const BYTE *const matchlimit = iend - LASTLITERALS; - /* the dictCtx currentOffset is indexed on the start of the dictionary, + /* the dictCtx currentOffset is indexed on the start of the dictionary, * while a dictionary in the current context precedes the currentOffset */ - const BYTE* dictBase = (dictionary == NULL) ? NULL : - (dictDirective == usingDictCtx) ? - dictionary + dictSize - dictCtx->currentOffset : - dictionary + dictSize - startIndex; + const BYTE *dictBase = + (dictionary == NULL) ? + NULL : + (dictDirective == usingDictCtx) ? + dictionary + dictSize - dictCtx->currentOffset : + dictionary + dictSize - startIndex; - BYTE* op = (BYTE*) dest; - BYTE* const olimit = op + maxOutputSize; + BYTE *op = (BYTE *)dest; + BYTE *const olimit = op + maxOutputSize; - U32 offset = 0; - U32 forwardH; + U32 offset = 0; + U32 forwardH; - DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType); - assert(ip != NULL); - if (tableType == byU16) assert(inputSize= 1); + if (outputDirective == fillOutput && maxOutputSize < 1) { + return 0; + } /* Impossible to store anything */ + assert(acceleration >= 1); - lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0); + lowLimit = (const BYTE *)source - + (dictDirective == withPrefix64k ? dictSize : 0); - /* Update context state */ - if (dictDirective == usingDictCtx) { - /* Subsequent linked blocks can't use the dictionary. */ - /* Instead, they use the block we just compressed. */ - cctx->dictCtx = NULL; - cctx->dictSize = (U32)inputSize; - } else { - cctx->dictSize += (U32)inputSize; - } - cctx->currentOffset += (U32)inputSize; - cctx->tableType = (U32)tableType; + /* Update context state */ + if (dictDirective == usingDictCtx) { + /* Subsequent linked blocks can't use the dictionary. */ + /* Instead, they use the block we just compressed. */ + cctx->dictCtx = NULL; + cctx->dictSize = (U32)inputSize; + } else { + cctx->dictSize += (U32)inputSize; + } + cctx->currentOffset += (U32)inputSize; + cctx->tableType = (U32)tableType; - if (inputSizehashTable, byPtr); - } else { - LZ4_putIndexOnHash(startIndex, h, cctx->hashTable, tableType); - } } - ip++; forwardH = LZ4_hashPosition(ip, tableType); + /* First Byte */ + { + U32 const h = LZ4_hashPosition(ip, tableType); + if (tableType == byPtr) { + LZ4_putPositionOnHash(ip, h, cctx->hashTable, byPtr); + } else { + LZ4_putIndexOnHash(startIndex, h, cctx->hashTable, + tableType); + } + } + ip++; + forwardH = LZ4_hashPosition(ip, tableType); - /* Main Loop */ - for ( ; ; ) { - const BYTE* match; - BYTE* token; - const BYTE* filledIp; + /* Main Loop */ + for (;;) { + const BYTE *match; + BYTE *token; + const BYTE *filledIp; - /* Find a match */ - if (tableType == byPtr) { - const BYTE* forwardIp = ip; - int step = 1; - int searchMatchNb = acceleration << LZ4_skipTrigger; - do { - U32 const h = forwardH; - ip = forwardIp; - forwardIp += step; - step = (searchMatchNb++ >> LZ4_skipTrigger); + /* Find a match */ + if (tableType == byPtr) { + const BYTE *forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); - if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; - assert(ip < mflimitPlusOne); + if (unlikely(forwardIp > mflimitPlusOne)) + goto _last_literals; + assert(ip < mflimitPlusOne); - match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType); - forwardH = LZ4_hashPosition(forwardIp, tableType); - LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType); + match = LZ4_getPositionOnHash( + h, cctx->hashTable, tableType); + forwardH = + LZ4_hashPosition(forwardIp, tableType); + LZ4_putPositionOnHash(ip, h, cctx->hashTable, + tableType); - } while ( (match+LZ4_DISTANCE_MAX < ip) - || (LZ4_read32(match) != LZ4_read32(ip)) ); + } while ((match + LZ4_DISTANCE_MAX < ip) || + (LZ4_read32(match) != LZ4_read32(ip))); - } else { /* byU32, byU16 */ + } else { /* byU32, byU16 */ - const BYTE* forwardIp = ip; - int step = 1; - int searchMatchNb = acceleration << LZ4_skipTrigger; - do { - U32 const h = forwardH; - U32 const current = (U32)(forwardIp - base); - U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); - assert(matchIndex <= current); - assert(forwardIp - base < (ptrdiff_t)(2 GB - 1)); - ip = forwardIp; - forwardIp += step; - step = (searchMatchNb++ >> LZ4_skipTrigger); + const BYTE *forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + U32 const current = (U32)(forwardIp - base); + U32 matchIndex = LZ4_getIndexOnHash( + h, cctx->hashTable, tableType); + assert(matchIndex <= current); + assert(forwardIp - base < + (ptrdiff_t)(2 GB - 1)); + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); - if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; - assert(ip < mflimitPlusOne); + if (unlikely(forwardIp > mflimitPlusOne)) + goto _last_literals; + assert(ip < mflimitPlusOne); - if (dictDirective == usingDictCtx) { - if (matchIndex < startIndex) { - /* there was no match, try the dictionary */ - assert(tableType == byU32); - matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); - match = dictBase + matchIndex; - matchIndex += dictDelta; /* make dictCtx index comparable with current context */ - lowLimit = dictionary; - } else { - match = base + matchIndex; - lowLimit = (const BYTE*)source; - } - } else if (dictDirective == usingExtDict) { - if (matchIndex < startIndex) { - DEBUGLOG(7, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex); - assert(startIndex - matchIndex >= MINMATCH); - assert(dictBase); - match = dictBase + matchIndex; - lowLimit = dictionary; - } else { - match = base + matchIndex; - lowLimit = (const BYTE*)source; - } - } else { /* single continuous memory segment */ - match = base + matchIndex; - } - forwardH = LZ4_hashPosition(forwardIp, tableType); - LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + assert(tableType == byU32); + matchIndex = LZ4_getIndexOnHash( + h, dictCtx->hashTable, + byU32); + match = dictBase + matchIndex; + matchIndex += + dictDelta; /* make dictCtx index comparable with current context */ + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE *)source; + } + } else if (dictDirective == usingExtDict) { + if (matchIndex < startIndex) { + DEBUGLOG( + 7, + "extDict candidate: matchIndex=%5u < startIndex=%5u", + matchIndex, startIndex); + assert(startIndex - + matchIndex >= + MINMATCH); + assert(dictBase); + match = dictBase + matchIndex; + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE *)source; + } + } else { /* single continuous memory segment */ + match = base + matchIndex; + } + forwardH = + LZ4_hashPosition(forwardIp, tableType); + LZ4_putIndexOnHash(current, h, cctx->hashTable, + tableType); - DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex); - if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */ - assert(matchIndex < current); - if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX)) - && (matchIndex+LZ4_DISTANCE_MAX < current)) { - continue; - } /* too far */ - assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */ + DEBUGLOG(7, + "candidate at pos=%u (offset=%u \n", + matchIndex, current - matchIndex); + if ((dictIssue == dictSmall) && + (matchIndex < prefixIdxLimit)) { + continue; + } /* match outside of valid area */ + assert(matchIndex < current); + if (((tableType != byU16) || + (LZ4_DISTANCE_MAX < + LZ4_DISTANCE_ABSOLUTE_MAX)) && + (matchIndex + LZ4_DISTANCE_MAX < current)) { + continue; + } /* too far */ + assert((current - matchIndex) <= + LZ4_DISTANCE_MAX); /* match now expected within distance */ - if (LZ4_read32(match) == LZ4_read32(ip)) { - if (maybe_extMem) offset = current - matchIndex; - break; /* match found */ - } + if (LZ4_read32(match) == LZ4_read32(ip)) { + if (maybe_extMem) + offset = current - matchIndex; + break; /* match found */ + } - } while(1); - } + } while (1); + } - /* Catch up */ - filledIp = ip; - assert(ip > anchor); /* this is always true as ip has been advanced before entering the main loop */ - if ((match > lowLimit) && unlikely(ip[-1] == match[-1])) { - do { ip--; match--; } while (((ip > anchor) & (match > lowLimit)) && (unlikely(ip[-1] == match[-1]))); - } + /* Catch up */ + filledIp = ip; + assert(ip > + anchor); /* this is always true as ip has been advanced before entering the main loop */ + if ((match > lowLimit) && unlikely(ip[-1] == match[-1])) { + do { + ip--; + match--; + } while (((ip > anchor) & (match > lowLimit)) && + (unlikely(ip[-1] == match[-1]))); + } - /* Encode Literals */ - { unsigned const litLength = (unsigned)(ip - anchor); - token = op++; - if ((outputDirective == limitedOutput) && /* Check output buffer overflow */ - (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) { - return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ - } - if ((outputDirective == fillOutput) && - (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) { - op--; - goto _last_literals; - } - if (litLength >= RUN_MASK) { - unsigned len = litLength - RUN_MASK; - *token = (RUN_MASK<= 255 ; len-=255) *op++ = 255; - *op++ = (BYTE)len; - } - else *token = (BYTE)(litLength< + olimit))) { + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + if ((outputDirective == fillOutput) && + (unlikely( + op + (litLength + 240) / 255 /* litlen */ + + litLength /* literals */ + + 2 /* offset */ + 1 /* token */ + + MFLIMIT - + MINMATCH /* min last literals so last match is <= end - MFLIMIT */ + > olimit))) { + op--; + goto _last_literals; + } + if (litLength >= RUN_MASK) { + unsigned len = litLength - RUN_MASK; + *token = (RUN_MASK << ML_BITS); + for (; len >= 255; len -= 255) + *op++ = 255; + *op++ = (BYTE)len; + } else + *token = (BYTE)(litLength << ML_BITS); - /* Copy Literals */ - LZ4_wildCopy8(op, anchor, op+litLength); - op+=litLength; - DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i", - (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source)); - } + /* Copy Literals */ + LZ4_wildCopy8(op, anchor, op + litLength); + op += litLength; + DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i", + (int)(anchor - (const BYTE *)source), + litLength, (int)(ip - (const BYTE *)source)); + } -_next_match: - /* at this stage, the following variables must be correctly set : + _next_match: + /* at this stage, the following variables must be correctly set : * - ip : at start of LZ operation * - match : at start of previous pattern occurrence; can be within current prefix, or within extDict * - offset : if maybe_ext_memSegment==1 (constant) @@ -1060,197 +1281,266 @@ _next_match: * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written */ - if ((outputDirective == fillOutput) && - (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) { - /* the match was too close to the end, rewind and go to last literals */ - op = token; - goto _last_literals; - } + if ((outputDirective == fillOutput) && + (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - + MINMATCH /* min last literals so last match is <= end - MFLIMIT */ + > olimit)) { + /* the match was too close to the end, rewind and go to last literals */ + op = token; + goto _last_literals; + } - /* Encode Offset */ - if (maybe_extMem) { /* static test */ - DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source)); - assert(offset <= LZ4_DISTANCE_MAX && offset > 0); - LZ4_writeLE16(op, (U16)offset); op+=2; - } else { - DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match)); - assert(ip-match <= LZ4_DISTANCE_MAX); - LZ4_writeLE16(op, (U16)(ip - match)); op+=2; - } + /* Encode Offset */ + if (maybe_extMem) { /* static test */ + DEBUGLOG(6, + " with offset=%u (ext if > %i)", + offset, (int)(ip - (const BYTE *)source)); + assert(offset <= LZ4_DISTANCE_MAX && offset > 0); + LZ4_writeLE16(op, (U16)offset); + op += 2; + } else { + DEBUGLOG(6, + " with offset=%u (same segment)", + (U32)(ip - match)); + assert(ip - match <= LZ4_DISTANCE_MAX); + LZ4_writeLE16(op, (U16)(ip - match)); + op += 2; + } - /* Encode MatchLength */ - { unsigned matchCode; + /* Encode MatchLength */ + { + unsigned matchCode; - if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx) - && (lowLimit==dictionary) /* match within extDict */ ) { - const BYTE* limit = ip + (dictEnd-match); - assert(dictEnd > match); - if (limit > matchlimit) limit = matchlimit; - matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit); - ip += (size_t)matchCode + MINMATCH; - if (ip==limit) { - unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit); - matchCode += more; - ip += more; - } - DEBUGLOG(6, " with matchLength=%u starting in extDict", matchCode+MINMATCH); - } else { - matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); - ip += (size_t)matchCode + MINMATCH; - DEBUGLOG(6, " with matchLength=%u", matchCode+MINMATCH); - } + if ((dictDirective == usingExtDict || + dictDirective == usingDictCtx) && + (lowLimit == + dictionary) /* match within extDict */) { + const BYTE *limit = ip + (dictEnd - match); + assert(dictEnd > match); + if (limit > matchlimit) + limit = matchlimit; + matchCode = LZ4_count(ip + MINMATCH, + match + MINMATCH, limit); + ip += (size_t)matchCode + MINMATCH; + if (ip == limit) { + unsigned const more = + LZ4_count(limit, + (const BYTE *)source, + matchlimit); + matchCode += more; + ip += more; + } + DEBUGLOG( + 6, + " with matchLength=%u starting in extDict", + matchCode + MINMATCH); + } else { + matchCode = + LZ4_count(ip + MINMATCH, + match + MINMATCH, matchlimit); + ip += (size_t)matchCode + MINMATCH; + DEBUGLOG(6, " with matchLength=%u", + matchCode + MINMATCH); + } - if ((outputDirective) && /* Check output buffer overflow */ - (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) { - if (outputDirective == fillOutput) { - /* Match description too long : reduce it */ - U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255; - ip -= matchCode - newMatchCode; - assert(newMatchCode < matchCode); - matchCode = newMatchCode; - if (unlikely(ip <= filledIp)) { - /* We have already filled up to filledIp so if ip ends up less than filledIp + if ((outputDirective) && /* Check output buffer overflow */ + (unlikely(op + (1 + LASTLITERALS) + + (matchCode + 240) / 255 > + olimit))) { + if (outputDirective == fillOutput) { + /* Match description too long : reduce it */ + U32 newMatchCode = + 15 /* in token */ - + 1 /* to avoid needing a zero byte */ + + ((U32)(olimit - op) - 1 - + LASTLITERALS) * + 255; + ip -= matchCode - newMatchCode; + assert(newMatchCode < matchCode); + matchCode = newMatchCode; + if (unlikely(ip <= filledIp)) { + /* We have already filled up to filledIp so if ip ends up less than filledIp * we have positions in the hash table beyond the current position. This is * a problem if we reuse the hash table. So we have to remove these positions * from the hash table. */ - const BYTE* ptr; - DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip)); - for (ptr = ip; ptr <= filledIp; ++ptr) { - U32 const h = LZ4_hashPosition(ptr, tableType); - LZ4_clearHash(h, cctx->hashTable, tableType); - } - } - } else { - assert(outputDirective == limitedOutput); - return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ - } - } - if (matchCode >= ML_MASK) { - *token += ML_MASK; - matchCode -= ML_MASK; - LZ4_write32(op, 0xFFFFFFFF); - while (matchCode >= 4*255) { - op+=4; - LZ4_write32(op, 0xFFFFFFFF); - matchCode -= 4*255; - } - op += matchCode / 255; - *op++ = (BYTE)(matchCode % 255); - } else - *token += (BYTE)(matchCode); - } - /* Ensure we have enough space for the last literals. */ - assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit)); + const BYTE *ptr; + DEBUGLOG( + 5, + "Clearing %u positions", + (U32)(filledIp - ip)); + for (ptr = ip; ptr <= filledIp; + ++ptr) { + U32 const h = + LZ4_hashPosition( + ptr, + tableType); + LZ4_clearHash( + h, + cctx->hashTable, + tableType); + } + } + } else { + assert(outputDirective == + limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + if (matchCode >= ML_MASK) { + *token += ML_MASK; + matchCode -= ML_MASK; + LZ4_write32(op, 0xFFFFFFFF); + while (matchCode >= 4 * 255) { + op += 4; + LZ4_write32(op, 0xFFFFFFFF); + matchCode -= 4 * 255; + } + op += matchCode / 255; + *op++ = (BYTE)(matchCode % 255); + } else + *token += (BYTE)(matchCode); + } + /* Ensure we have enough space for the last literals. */ + assert(!(outputDirective == fillOutput && + op + 1 + LASTLITERALS > olimit)); - anchor = ip; + anchor = ip; - /* Test end of chunk */ - if (ip >= mflimitPlusOne) break; + /* Test end of chunk */ + if (ip >= mflimitPlusOne) + break; - /* Fill table */ - { U32 const h = LZ4_hashPosition(ip-2, tableType); - if (tableType == byPtr) { - LZ4_putPositionOnHash(ip-2, h, cctx->hashTable, byPtr); - } else { - U32 const idx = (U32)((ip-2) - base); - LZ4_putIndexOnHash(idx, h, cctx->hashTable, tableType); - } } + /* Fill table */ + { + U32 const h = LZ4_hashPosition(ip - 2, tableType); + if (tableType == byPtr) { + LZ4_putPositionOnHash(ip - 2, h, + cctx->hashTable, byPtr); + } else { + U32 const idx = (U32)((ip - 2) - base); + LZ4_putIndexOnHash(idx, h, cctx->hashTable, + tableType); + } + } - /* Test next position */ - if (tableType == byPtr) { + /* Test next position */ + if (tableType == byPtr) { + match = LZ4_getPosition(ip, cctx->hashTable, tableType); + LZ4_putPosition(ip, cctx->hashTable, tableType); + if ((match + LZ4_DISTANCE_MAX >= ip) && + (LZ4_read32(match) == LZ4_read32(ip))) { + token = op++; + *token = 0; + goto _next_match; + } - match = LZ4_getPosition(ip, cctx->hashTable, tableType); - LZ4_putPosition(ip, cctx->hashTable, tableType); - if ( (match+LZ4_DISTANCE_MAX >= ip) - && (LZ4_read32(match) == LZ4_read32(ip)) ) - { token=op++; *token=0; goto _next_match; } + } else { /* byU32, byU16 */ - } else { /* byU32, byU16 */ + U32 const h = LZ4_hashPosition(ip, tableType); + U32 const current = (U32)(ip - base); + U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, + tableType); + assert(matchIndex < current); + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + assert(tableType == byU32); + matchIndex = LZ4_getIndexOnHash( + h, dictCtx->hashTable, byU32); + match = dictBase + matchIndex; + lowLimit = + dictionary; /* required for match length counter */ + matchIndex += dictDelta; + } else { + match = base + matchIndex; + lowLimit = (const BYTE *) + source; /* required for match length counter */ + } + } else if (dictDirective == usingExtDict) { + if (matchIndex < startIndex) { + assert(dictBase); + match = dictBase + matchIndex; + lowLimit = + dictionary; /* required for match length counter */ + } else { + match = base + matchIndex; + lowLimit = (const BYTE *) + source; /* required for match length counter */ + } + } else { /* single memory segment */ + match = base + matchIndex; + } + LZ4_putIndexOnHash(current, h, cctx->hashTable, + tableType); + assert(matchIndex < current); + if (((dictIssue == dictSmall) ? + (matchIndex >= prefixIdxLimit) : + 1) && + (((tableType == byU16) && + (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? + 1 : + (matchIndex + LZ4_DISTANCE_MAX >= + current)) && + (LZ4_read32(match) == LZ4_read32(ip))) { + token = op++; + *token = 0; + if (maybe_extMem) + offset = current - matchIndex; + DEBUGLOG( + 6, + "seq.start:%i, literals=%u, match.start:%i", + (int)(anchor - (const BYTE *)source), 0, + (int)(ip - (const BYTE *)source)); + goto _next_match; + } + } - U32 const h = LZ4_hashPosition(ip, tableType); - U32 const current = (U32)(ip-base); - U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); - assert(matchIndex < current); - if (dictDirective == usingDictCtx) { - if (matchIndex < startIndex) { - /* there was no match, try the dictionary */ - assert(tableType == byU32); - matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); - match = dictBase + matchIndex; - lowLimit = dictionary; /* required for match length counter */ - matchIndex += dictDelta; - } else { - match = base + matchIndex; - lowLimit = (const BYTE*)source; /* required for match length counter */ - } - } else if (dictDirective==usingExtDict) { - if (matchIndex < startIndex) { - assert(dictBase); - match = dictBase + matchIndex; - lowLimit = dictionary; /* required for match length counter */ - } else { - match = base + matchIndex; - lowLimit = (const BYTE*)source; /* required for match length counter */ - } - } else { /* single memory segment */ - match = base + matchIndex; - } - LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); - assert(matchIndex < current); - if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1) - && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current)) - && (LZ4_read32(match) == LZ4_read32(ip)) ) { - token=op++; - *token=0; - if (maybe_extMem) offset = current - matchIndex; - DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i", - (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source)); - goto _next_match; - } - } - - /* Prepare next loop */ - forwardH = LZ4_hashPosition(++ip, tableType); - - } + /* Prepare next loop */ + forwardH = LZ4_hashPosition(++ip, tableType); + } _last_literals: - /* Encode Last Literals */ - { size_t lastRun = (size_t)(iend - anchor); - if ( (outputDirective) && /* Check output buffer overflow */ - (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) { - if (outputDirective == fillOutput) { - /* adapt lastRun to fill 'dst' */ - assert(olimit >= op); - lastRun = (size_t)(olimit-op) - 1/*token*/; - lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/ - } else { - assert(outputDirective == limitedOutput); - return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ - } - } - DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun); - if (lastRun >= RUN_MASK) { - size_t accumulator = lastRun - RUN_MASK; - *op++ = RUN_MASK << ML_BITS; - for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; - *op++ = (BYTE) accumulator; - } else { - *op++ = (BYTE)(lastRun< + olimit)) { + if (outputDirective == fillOutput) { + /* adapt lastRun to fill 'dst' */ + assert(olimit >= op); + lastRun = (size_t)(olimit - op) - 1 /*token*/; + lastRun -= (lastRun + 256 - RUN_MASK) / + 256; /*additional length tokens*/ + } else { + assert(outputDirective == limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun); + if (lastRun >= RUN_MASK) { + size_t accumulator = lastRun - RUN_MASK; + *op++ = RUN_MASK << ML_BITS; + for (; accumulator >= 255; accumulator -= 255) + *op++ = 255; + *op++ = (BYTE)accumulator; + } else { + *op++ = (BYTE)(lastRun << ML_BITS); + } + LZ4_memcpy(op, anchor, lastRun); + ip = anchor + lastRun; + op += lastRun; + } - if (outputDirective == fillOutput) { - *inputConsumed = (int) (((const char*)ip)-source); - } - result = (int)(((char*)op) - dest); - assert(result > 0); - DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result); - return result; + if (outputDirective == fillOutput) { + *inputConsumed = (int)(((const char *)ip) - source); + } + result = (int)(((char *)op) - dest); + assert(result > 0); + DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", + inputSize, result); + return result; } /** LZ4_compress_generic() : @@ -1258,64 +1548,88 @@ _last_literals: * takes care of src == (NULL, 0) * and forward the rest to LZ4_compress_generic_validated */ LZ4_FORCE_INLINE int LZ4_compress_generic( - LZ4_stream_t_internal* const cctx, - const char* const src, - char* const dst, - const int srcSize, - int *inputConsumed, /* only written when outputDirective == fillOutput */ - const int dstCapacity, - const limitedOutput_directive outputDirective, - const tableType_t tableType, - const dict_directive dictDirective, - const dictIssue_directive dictIssue, - const int acceleration) + LZ4_stream_t_internal *const cctx, const char *const src, + char *const dst, const int srcSize, + int *inputConsumed, /* only written when outputDirective == fillOutput */ + const int dstCapacity, const limitedOutput_directive outputDirective, + const tableType_t tableType, const dict_directive dictDirective, + const dictIssue_directive dictIssue, const int acceleration) { - DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i", - srcSize, dstCapacity); + DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i", srcSize, + dstCapacity); - if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */ - if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */ - if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */ - DEBUGLOG(5, "Generating an empty block"); - assert(outputDirective == notLimited || dstCapacity >= 1); - assert(dst != NULL); - dst[0] = 0; - if (outputDirective == fillOutput) { - assert (inputConsumed != NULL); - *inputConsumed = 0; - } - return 1; - } - assert(src != NULL); + if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { + return 0; + } /* Unsupported srcSize, too large (or negative) */ + if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */ + if (outputDirective != notLimited && dstCapacity <= 0) + return 0; /* no output, can't write anything */ + DEBUGLOG(5, "Generating an empty block"); + assert(outputDirective == notLimited || dstCapacity >= 1); + assert(dst != NULL); + dst[0] = 0; + if (outputDirective == fillOutput) { + assert(inputConsumed != NULL); + *inputConsumed = 0; + } + return 1; + } + assert(src != NULL); - return LZ4_compress_generic_validated(cctx, src, dst, srcSize, - inputConsumed, /* only written into if outputDirective == fillOutput */ - dstCapacity, outputDirective, - tableType, dictDirective, dictIssue, acceleration); + return LZ4_compress_generic_validated( + cctx, src, dst, srcSize, + inputConsumed, /* only written into if outputDirective == fillOutput */ + dstCapacity, outputDirective, tableType, dictDirective, + dictIssue, acceleration); } - -int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +int LZ4_compress_fast_extState(void *state, const char *source, char *dest, + int inputSize, int maxOutputSize, + int acceleration) { - LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse; - assert(ctx != NULL); - if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; - if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; - if (maxOutputSize >= LZ4_compressBound(inputSize)) { - if (inputSize < LZ4_64Klimit) { - return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration); - } else { - const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; - return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); - } - } else { - if (inputSize < LZ4_64Klimit) { - return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); - } else { - const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; - return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration); - } - } + LZ4_stream_t_internal *const ctx = + &LZ4_initStream(state, sizeof(LZ4_stream_t))->internal_donotuse; + assert(ctx != NULL); + if (acceleration < 1) + acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) + acceleration = LZ4_ACCELERATION_MAX; + if (maxOutputSize >= LZ4_compressBound(inputSize)) { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic(ctx, source, dest, + inputSize, NULL, 0, + notLimited, byU16, noDict, + noDictIssue, acceleration); + } else { + const tableType_t tableType = + ((sizeof(void *) == 4) && + ((uptrval)source > LZ4_DISTANCE_MAX)) ? + byPtr : + byU32; + return LZ4_compress_generic(ctx, source, dest, + inputSize, NULL, 0, + notLimited, tableType, + noDict, noDictIssue, + acceleration); + } + } else { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic( + ctx, source, dest, inputSize, NULL, + maxOutputSize, limitedOutput, byU16, noDict, + noDictIssue, acceleration); + } else { + const tableType_t tableType = + ((sizeof(void *) == 4) && + ((uptrval)source > LZ4_DISTANCE_MAX)) ? + byPtr : + byU32; + return LZ4_compress_generic( + ctx, source, dest, inputSize, NULL, + maxOutputSize, limitedOutput, tableType, noDict, + noDictIssue, acceleration); + } + } } /** @@ -1327,401 +1641,530 @@ int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of * "correctly initialized"). */ -int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration) +int LZ4_compress_fast_extState_fastReset(void *state, const char *src, + char *dst, int srcSize, + int dstCapacity, int acceleration) { - LZ4_stream_t_internal* const ctx = &((LZ4_stream_t*)state)->internal_donotuse; - if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; - if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; - assert(ctx != NULL); + LZ4_stream_t_internal *const ctx = + &((LZ4_stream_t *)state)->internal_donotuse; + if (acceleration < 1) + acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) + acceleration = LZ4_ACCELERATION_MAX; + assert(ctx != NULL); - if (dstCapacity >= LZ4_compressBound(srcSize)) { - if (srcSize < LZ4_64Klimit) { - const tableType_t tableType = byU16; - LZ4_prepareTable(ctx, srcSize, tableType); - if (ctx->currentOffset) { - return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration); - } else { - return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); - } - } else { - const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; - LZ4_prepareTable(ctx, srcSize, tableType); - return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); - } - } else { - if (srcSize < LZ4_64Klimit) { - const tableType_t tableType = byU16; - LZ4_prepareTable(ctx, srcSize, tableType); - if (ctx->currentOffset) { - return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration); - } else { - return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); - } - } else { - const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; - LZ4_prepareTable(ctx, srcSize, tableType); - return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); - } - } + if (dstCapacity >= LZ4_compressBound(srcSize)) { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic( + ctx, src, dst, srcSize, NULL, 0, + notLimited, tableType, noDict, + dictSmall, acceleration); + } else { + return LZ4_compress_generic( + ctx, src, dst, srcSize, NULL, 0, + notLimited, tableType, noDict, + noDictIssue, acceleration); + } + } else { + const tableType_t tableType = + ((sizeof(void *) == 4) && + ((uptrval)src > LZ4_DISTANCE_MAX)) ? + byPtr : + byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, + NULL, 0, notLimited, + tableType, noDict, + noDictIssue, acceleration); + } + } else { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic( + ctx, src, dst, srcSize, NULL, + dstCapacity, limitedOutput, tableType, + noDict, dictSmall, acceleration); + } else { + return LZ4_compress_generic( + ctx, src, dst, srcSize, NULL, + dstCapacity, limitedOutput, tableType, + noDict, noDictIssue, acceleration); + } + } else { + const tableType_t tableType = + ((sizeof(void *) == 4) && + ((uptrval)src > LZ4_DISTANCE_MAX)) ? + byPtr : + byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, + NULL, dstCapacity, + limitedOutput, tableType, + noDict, noDictIssue, + acceleration); + } + } } - -int LZ4_compress_fast(const char* src, char* dest, int srcSize, int dstCapacity, int acceleration) +int LZ4_compress_fast(const char *src, char *dest, int srcSize, int dstCapacity, + int acceleration) { - int result; + int result; #if (LZ4_HEAPMODE) - LZ4_stream_t* const ctxPtr = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ - if (ctxPtr == NULL) return 0; + LZ4_stream_t *const ctxPtr = (LZ4_stream_t *)ALLOC(sizeof( + LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctxPtr == NULL) + return 0; #else - LZ4_stream_t ctx; - LZ4_stream_t* const ctxPtr = &ctx; + LZ4_stream_t ctx; + LZ4_stream_t *const ctxPtr = &ctx; #endif - result = LZ4_compress_fast_extState(ctxPtr, src, dest, srcSize, dstCapacity, acceleration); + result = LZ4_compress_fast_extState(ctxPtr, src, dest, srcSize, + dstCapacity, acceleration); #if (LZ4_HEAPMODE) - FREEMEM(ctxPtr); + FREEMEM(ctxPtr); #endif - return result; + return result; } - -int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity, void *wrkmem) +int LZ4_compress_default(const char *src, char *dst, int srcSize, + int dstCapacity, void *wrkmem) { - return LZ4_compress_fast_extState(wrkmem, src, dst, srcSize, dstCapacity, 1); + return LZ4_compress_fast_extState(wrkmem, src, dst, srcSize, + dstCapacity, 1); } EXPORT_SYMBOL(LZ4_compress_default); - /* Note!: This function leaves the stream in an unclean/broken state! * It is not safe to subsequently use the same state with a _fastReset() or * _continue() call without resetting it. */ -static int LZ4_compress_destSize_extState_internal(LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration) +static int LZ4_compress_destSize_extState_internal(LZ4_stream_t *state, + const char *src, char *dst, + int *srcSizePtr, + int targetDstSize, + int acceleration) { - void* const s = LZ4_initStream(state, sizeof (*state)); - assert(s != NULL); (void)s; + void *const s = LZ4_initStream(state, sizeof(*state)); + assert(s != NULL); + (void)s; - if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */ - return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, acceleration); - } else { - if (*srcSizePtr < LZ4_64Klimit) { - return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, acceleration); - } else { - tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; - return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, acceleration); - } } + if (targetDstSize >= + LZ4_compressBound( + *srcSizePtr)) { /* compression success is guaranteed */ + return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, + targetDstSize, acceleration); + } else { + if (*srcSizePtr < LZ4_64Klimit) { + return LZ4_compress_generic(&state->internal_donotuse, + src, dst, *srcSizePtr, + srcSizePtr, targetDstSize, + fillOutput, byU16, noDict, + noDictIssue, acceleration); + } else { + tableType_t const addrMode = + ((sizeof(void *) == 4) && + ((uptrval)src > LZ4_DISTANCE_MAX)) ? + byPtr : + byU32; + return LZ4_compress_generic(&state->internal_donotuse, + src, dst, *srcSizePtr, + srcSizePtr, targetDstSize, + fillOutput, addrMode, + noDict, noDictIssue, + acceleration); + } + } } -int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration) +int LZ4_compress_destSize_extState(void *state, const char *src, char *dst, + int *srcSizePtr, int targetDstSize, + int acceleration) { - int const r = LZ4_compress_destSize_extState_internal((LZ4_stream_t*)state, src, dst, srcSizePtr, targetDstSize, acceleration); - /* clean the state on exit */ - LZ4_initStream(state, sizeof (LZ4_stream_t)); - return r; + int const r = LZ4_compress_destSize_extState_internal( + (LZ4_stream_t *)state, src, dst, srcSizePtr, targetDstSize, + acceleration); + /* clean the state on exit */ + LZ4_initStream(state, sizeof(LZ4_stream_t)); + return r; } - -int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize) +int LZ4_compress_destSize(const char *src, char *dst, int *srcSizePtr, + int targetDstSize) { #if (LZ4_HEAPMODE) - LZ4_stream_t* const ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ - if (ctx == NULL) return 0; + LZ4_stream_t *const ctx = (LZ4_stream_t *)ALLOC(sizeof( + LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctx == NULL) + return 0; #else - LZ4_stream_t ctxBody; - LZ4_stream_t* const ctx = &ctxBody; + LZ4_stream_t ctxBody; + LZ4_stream_t *const ctx = &ctxBody; #endif - int result = LZ4_compress_destSize_extState_internal(ctx, src, dst, srcSizePtr, targetDstSize, 1); + int result = LZ4_compress_destSize_extState_internal( + ctx, src, dst, srcSizePtr, targetDstSize, 1); #if (LZ4_HEAPMODE) - FREEMEM(ctx); + FREEMEM(ctx); #endif - return result; + return result; } - - /*-****************************** * Streaming functions ********************************/ #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -LZ4_stream_t* LZ4_createStream(void) +LZ4_stream_t *LZ4_createStream(void) { - LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); - LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal)); - DEBUGLOG(4, "LZ4_createStream %p", lz4s); - if (lz4s == NULL) return NULL; - LZ4_initStream(lz4s, sizeof(*lz4s)); - return lz4s; + LZ4_stream_t *const lz4s = (LZ4_stream_t *)ALLOC(sizeof(LZ4_stream_t)); + LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= + sizeof(LZ4_stream_t_internal)); + DEBUGLOG(4, "LZ4_createStream %p", lz4s); + if (lz4s == NULL) + return NULL; + LZ4_initStream(lz4s, sizeof(*lz4s)); + return lz4s; } #endif static size_t LZ4_stream_t_alignment(void) { #if LZ4_ALIGN_TEST - typedef struct { char c; LZ4_stream_t t; } t_a; - return sizeof(t_a) - sizeof(LZ4_stream_t); + typedef struct { + char c; + LZ4_stream_t t; + } t_a; + return sizeof(t_a) - sizeof(LZ4_stream_t); #else - return 1; /* effectively disabled */ + return 1; /* effectively disabled */ #endif } -LZ4_stream_t* LZ4_initStream (void* buffer, size_t size) +LZ4_stream_t *LZ4_initStream(void *buffer, size_t size) { - DEBUGLOG(5, "LZ4_initStream"); - if (buffer == NULL) { return NULL; } - if (size < sizeof(LZ4_stream_t)) { return NULL; } - if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL; - MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal)); - return (LZ4_stream_t*)buffer; + DEBUGLOG(5, "LZ4_initStream"); + if (buffer == NULL) { + return NULL; + } + if (size < sizeof(LZ4_stream_t)) { + return NULL; + } + if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) + return NULL; + MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal)); + return (LZ4_stream_t *)buffer; } /* resetStream is now deprecated, * prefer initStream() which is more general */ -void LZ4_resetStream (LZ4_stream_t* LZ4_stream) +void LZ4_resetStream(LZ4_stream_t *LZ4_stream) { - DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream); - MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal)); + DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream); + MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal)); } -void LZ4_resetStream_fast(LZ4_stream_t* ctx) { - LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32); +void LZ4_resetStream_fast(LZ4_stream_t *ctx) +{ + LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32); } #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -int LZ4_freeStream (LZ4_stream_t* LZ4_stream) +int LZ4_freeStream(LZ4_stream_t *LZ4_stream) { - if (!LZ4_stream) return 0; /* support free on NULL */ - DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream); - FREEMEM(LZ4_stream); - return (0); + if (!LZ4_stream) + return 0; /* support free on NULL */ + DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream); + FREEMEM(LZ4_stream); + return (0); } #endif - typedef enum { _ld_fast, _ld_slow } LoadDict_mode_e; #define HASH_UNIT sizeof(reg_t) -int LZ4_loadDict_internal(LZ4_stream_t* LZ4_dict, - const char* dictionary, int dictSize, - LoadDict_mode_e _ld) +int LZ4_loadDict_internal(LZ4_stream_t *LZ4_dict, const char *dictionary, + int dictSize, LoadDict_mode_e _ld) { - LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; - const tableType_t tableType = byU32; - const BYTE* p = (const BYTE*)dictionary; - const BYTE* const dictEnd = p + dictSize; - U32 idx32; + LZ4_stream_t_internal *const dict = &LZ4_dict->internal_donotuse; + const tableType_t tableType = byU32; + const BYTE *p = (const BYTE *)dictionary; + const BYTE *const dictEnd = p + dictSize; + U32 idx32; - DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict); + DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, + dictionary, LZ4_dict); - /* It's necessary to reset the context, + /* It's necessary to reset the context, * and not just continue it with prepareTable() * to avoid any risk of generating overflowing matchIndex * when compressing using this dictionary */ - LZ4_resetStream(LZ4_dict); + LZ4_resetStream(LZ4_dict); - /* We always increment the offset by 64 KB, since, if the dict is longer, + /* We always increment the offset by 64 KB, since, if the dict is longer, * we truncate it to the last 64k, and if it's shorter, we still want to * advance by a whole window length so we can provide the guarantee that * there are only valid offsets in the window, which allows an optimization * in LZ4_compress_fast_continue() where it uses noDictIssue even when the * dictionary isn't a full 64k. */ - dict->currentOffset += 64 KB; + dict->currentOffset += 64 KB; - if (dictSize < (int)HASH_UNIT) { - return 0; - } + if (dictSize < (int)HASH_UNIT) { + return 0; + } - if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; - dict->dictionary = p; - dict->dictSize = (U32)(dictEnd - p); - dict->tableType = (U32)tableType; - idx32 = dict->currentOffset - dict->dictSize; + if ((dictEnd - p) > 64 KB) + p = dictEnd - 64 KB; + dict->dictionary = p; + dict->dictSize = (U32)(dictEnd - p); + dict->tableType = (U32)tableType; + idx32 = dict->currentOffset - dict->dictSize; - while (p <= dictEnd-HASH_UNIT) { - U32 const h = LZ4_hashPosition(p, tableType); - /* Note: overwriting => favors positions end of dictionary */ - LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType); - p+=3; idx32+=3; - } + while (p <= dictEnd - HASH_UNIT) { + U32 const h = LZ4_hashPosition(p, tableType); + /* Note: overwriting => favors positions end of dictionary */ + LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType); + p += 3; + idx32 += 3; + } - if (_ld == _ld_slow) { - /* Fill hash table with additional references, to improve compression capability */ - p = dict->dictionary; - idx32 = dict->currentOffset - dict->dictSize; - while (p <= dictEnd-HASH_UNIT) { - U32 const h = LZ4_hashPosition(p, tableType); - U32 const limit = dict->currentOffset - 64 KB; - if (LZ4_getIndexOnHash(h, dict->hashTable, tableType) <= limit) { - /* Note: not overwriting => favors positions beginning of dictionary */ - LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType); - } - p++; idx32++; - } - } + if (_ld == _ld_slow) { + /* Fill hash table with additional references, to improve compression capability */ + p = dict->dictionary; + idx32 = dict->currentOffset - dict->dictSize; + while (p <= dictEnd - HASH_UNIT) { + U32 const h = LZ4_hashPosition(p, tableType); + U32 const limit = dict->currentOffset - 64 KB; + if (LZ4_getIndexOnHash(h, dict->hashTable, tableType) <= + limit) { + /* Note: not overwriting => favors positions beginning of dictionary */ + LZ4_putIndexOnHash(idx32, h, dict->hashTable, + tableType); + } + p++; + idx32++; + } + } - return (int)dict->dictSize; + return (int)dict->dictSize; } -int LZ4_loadDict(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) +int LZ4_loadDict(LZ4_stream_t *LZ4_dict, const char *dictionary, int dictSize) { - return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_fast); + return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_fast); } EXPORT_SYMBOL(LZ4_loadDict); -int LZ4_loadDictSlow(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) +int LZ4_loadDictSlow(LZ4_stream_t *LZ4_dict, const char *dictionary, + int dictSize) { - return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_slow); + return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_slow); } -void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) +void LZ4_attach_dictionary(LZ4_stream_t *workingStream, + const LZ4_stream_t *dictionaryStream) { - const LZ4_stream_t_internal* dictCtx = (dictionaryStream == NULL) ? NULL : - &(dictionaryStream->internal_donotuse); + const LZ4_stream_t_internal *dictCtx = + (dictionaryStream == NULL) ? + NULL : + &(dictionaryStream->internal_donotuse); - DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)", - workingStream, dictionaryStream, - dictCtx != NULL ? dictCtx->dictSize : 0); + DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)", workingStream, + dictionaryStream, dictCtx != NULL ? dictCtx->dictSize : 0); - if (dictCtx != NULL) { - /* If the current offset is zero, we will never look in the + if (dictCtx != NULL) { + /* If the current offset is zero, we will never look in the * external dictionary context, since there is no value a table * entry can take that indicate a miss. In that case, we need * to bump the offset to something non-zero. */ - if (workingStream->internal_donotuse.currentOffset == 0) { - workingStream->internal_donotuse.currentOffset = 64 KB; - } + if (workingStream->internal_donotuse.currentOffset == 0) { + workingStream->internal_donotuse.currentOffset = 64 KB; + } - /* Don't actually attach an empty dictionary. + /* Don't actually attach an empty dictionary. */ - if (dictCtx->dictSize == 0) { - dictCtx = NULL; - } - } - workingStream->internal_donotuse.dictCtx = dictCtx; + if (dictCtx->dictSize == 0) { + dictCtx = NULL; + } + } + workingStream->internal_donotuse.dictCtx = dictCtx; } - -static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize) +static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict, int nextSize) { - assert(nextSize >= 0); - if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */ - /* rescale hash table */ - U32 const delta = LZ4_dict->currentOffset - 64 KB; - const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; - int i; - DEBUGLOG(4, "LZ4_renormDictT"); - for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0; - else LZ4_dict->hashTable[i] -= delta; - } - LZ4_dict->currentOffset = 64 KB; - if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; - LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; - } + assert(nextSize >= 0); + if (LZ4_dict->currentOffset + (unsigned)nextSize > + 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */ + /* rescale hash table */ + U32 const delta = LZ4_dict->currentOffset - 64 KB; + const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; + int i; + DEBUGLOG(4, "LZ4_renormDictT"); + for (i = 0; i < LZ4_HASH_SIZE_U32; i++) { + if (LZ4_dict->hashTable[i] < delta) + LZ4_dict->hashTable[i] = 0; + else + LZ4_dict->hashTable[i] -= delta; + } + LZ4_dict->currentOffset = 64 KB; + if (LZ4_dict->dictSize > 64 KB) + LZ4_dict->dictSize = 64 KB; + LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; + } } - -int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, - const char* source, char* dest, - int inputSize, int maxOutputSize, - int acceleration) +int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source, + char *dest, int inputSize, int maxOutputSize, + int acceleration) { - const tableType_t tableType = byU32; - LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse; - const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL; + const tableType_t tableType = byU32; + LZ4_stream_t_internal *const streamPtr = &LZ4_stream->internal_donotuse; + const char *dictEnd = streamPtr->dictSize ? + (const char *)streamPtr->dictionary + + streamPtr->dictSize : + NULL; - DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize); + DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", + inputSize, streamPtr->dictSize); - LZ4_renormDictT(streamPtr, inputSize); /* fix index overflow */ - if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; - if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + LZ4_renormDictT(streamPtr, inputSize); /* fix index overflow */ + if (acceleration < 1) + acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) + acceleration = LZ4_ACCELERATION_MAX; - /* invalidate tiny dictionaries */ - if ( (streamPtr->dictSize < 4) /* tiny dictionary : not enough for a hash */ - && (dictEnd != source) /* prefix mode */ - && (inputSize > 0) /* tolerance : don't lose history, in case next invocation would use prefix mode */ - && (streamPtr->dictCtx == NULL) /* usingDictCtx */ - ) { - DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary); - /* remove dictionary existence from history, to employ faster prefix mode */ - streamPtr->dictSize = 0; - streamPtr->dictionary = (const BYTE*)source; - dictEnd = source; - } + /* invalidate tiny dictionaries */ + if ((streamPtr->dictSize < + 4) /* tiny dictionary : not enough for a hash */ + && (dictEnd != source) /* prefix mode */ + && + (inputSize > + 0) /* tolerance : don't lose history, in case next invocation would use prefix mode */ + && (streamPtr->dictCtx == NULL) /* usingDictCtx */ + ) { + DEBUGLOG( + 5, + "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", + streamPtr->dictSize, streamPtr->dictionary); + /* remove dictionary existence from history, to employ faster prefix mode */ + streamPtr->dictSize = 0; + streamPtr->dictionary = (const BYTE *)source; + dictEnd = source; + } - /* Check overlapping input/dictionary space */ - { const char* const sourceEnd = source + inputSize; - if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) { - streamPtr->dictSize = (U32)(dictEnd - sourceEnd); - if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; - if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; - streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize; - } - } + /* Check overlapping input/dictionary space */ + { + const char *const sourceEnd = source + inputSize; + if ((sourceEnd > (const char *)streamPtr->dictionary) && + (sourceEnd < dictEnd)) { + streamPtr->dictSize = (U32)(dictEnd - sourceEnd); + if (streamPtr->dictSize > 64 KB) + streamPtr->dictSize = 64 KB; + if (streamPtr->dictSize < 4) + streamPtr->dictSize = 0; + streamPtr->dictionary = + (const BYTE *)dictEnd - streamPtr->dictSize; + } + } - /* prefix mode : source data follows dictionary */ - if (dictEnd == source) { - if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) - return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration); - else - return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration); - } + /* prefix mode : source data follows dictionary */ + if (dictEnd == source) { + if ((streamPtr->dictSize < 64 KB) && + (streamPtr->dictSize < streamPtr->currentOffset)) + return LZ4_compress_generic( + streamPtr, source, dest, inputSize, NULL, + maxOutputSize, limitedOutput, tableType, + withPrefix64k, dictSmall, acceleration); + else + return LZ4_compress_generic( + streamPtr, source, dest, inputSize, NULL, + maxOutputSize, limitedOutput, tableType, + withPrefix64k, noDictIssue, acceleration); + } - /* external dictionary mode */ - { int result; - if (streamPtr->dictCtx) { - /* We depend here on the fact that dictCtx'es (produced by + /* external dictionary mode */ + { + int result; + if (streamPtr->dictCtx) { + /* We depend here on the fact that dictCtx'es (produced by * LZ4_loadDict) guarantee that their tables contain no references * to offsets between dictCtx->currentOffset - 64 KB and * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe * to use noDictIssue even when the dict isn't a full 64 KB. */ - if (inputSize > 4 KB) { - /* For compressing large blobs, it is faster to pay the setup + if (inputSize > 4 KB) { + /* For compressing large blobs, it is faster to pay the setup * cost to copy the dictionary's tables into the active context, * so that the compression loop is only looking into one table. */ - LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr)); - result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); - } else { - result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration); - } - } else { /* small data <= 4 KB */ - if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { - result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration); - } else { - result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); - } - } - streamPtr->dictionary = (const BYTE*)source; - streamPtr->dictSize = (U32)inputSize; - return result; - } + LZ4_memcpy(streamPtr, streamPtr->dictCtx, + sizeof(*streamPtr)); + result = LZ4_compress_generic( + streamPtr, source, dest, inputSize, + NULL, maxOutputSize, limitedOutput, + tableType, usingExtDict, noDictIssue, + acceleration); + } else { + result = LZ4_compress_generic( + streamPtr, source, dest, inputSize, + NULL, maxOutputSize, limitedOutput, + tableType, usingDictCtx, noDictIssue, + acceleration); + } + } else { /* small data <= 4 KB */ + if ((streamPtr->dictSize < 64 KB) && + (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic( + streamPtr, source, dest, inputSize, + NULL, maxOutputSize, limitedOutput, + tableType, usingExtDict, dictSmall, + acceleration); + } else { + result = LZ4_compress_generic( + streamPtr, source, dest, inputSize, + NULL, maxOutputSize, limitedOutput, + tableType, usingExtDict, noDictIssue, + acceleration); + } + } + streamPtr->dictionary = (const BYTE *)source; + streamPtr->dictSize = (U32)inputSize; + return result; + } } - /* Hidden debug function, to force-test external dictionary mode */ -int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize) +int LZ4_compress_forceExtDict(LZ4_stream_t *LZ4_dict, const char *source, + char *dest, int srcSize) { - LZ4_stream_t_internal* const streamPtr = &LZ4_dict->internal_donotuse; - int result; + LZ4_stream_t_internal *const streamPtr = &LZ4_dict->internal_donotuse; + int result; - LZ4_renormDictT(streamPtr, srcSize); + LZ4_renormDictT(streamPtr, srcSize); - if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { - result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1); - } else { - result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1); - } + if ((streamPtr->dictSize < 64 KB) && + (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, + NULL, 0, notLimited, byU32, + usingExtDict, dictSmall, 1); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, + NULL, 0, notLimited, byU32, + usingExtDict, noDictIssue, 1); + } - streamPtr->dictionary = (const BYTE*)source; - streamPtr->dictSize = (U32)srcSize; + streamPtr->dictionary = (const BYTE *)source; + streamPtr->dictSize = (U32)srcSize; - return result; + return result; } - /*! LZ4_saveDict() : * If previously compressed data block is not guaranteed to remain available at its memory location, * save it into a safer place (char* safeBuffer). @@ -1729,31 +2172,37 @@ int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* * one can therefore call LZ4_compress_fast_continue() right after. * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error. */ -int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) +int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize) { - LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; + LZ4_stream_t_internal *const dict = &LZ4_dict->internal_donotuse; - DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, safeBuffer); + DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, + safeBuffer); - if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */ - if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; } + if ((U32)dictSize > 64 KB) { + dictSize = 64 KB; + } /* useless to define a dictionary > 64 KB */ + if ((U32)dictSize > dict->dictSize) { + dictSize = (int)dict->dictSize; + } - if (safeBuffer == NULL) assert(dictSize == 0); - if (dictSize > 0) { - const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize; - assert(dict->dictionary); - LZ4_memmove(safeBuffer, previousDictEnd - dictSize, (size_t)dictSize); - } + if (safeBuffer == NULL) + assert(dictSize == 0); + if (dictSize > 0) { + const BYTE *const previousDictEnd = + dict->dictionary + dict->dictSize; + assert(dict->dictionary); + LZ4_memmove(safeBuffer, previousDictEnd - dictSize, + (size_t)dictSize); + } - dict->dictionary = (const BYTE*)safeBuffer; - dict->dictSize = (U32)dictSize; + dict->dictionary = (const BYTE *)safeBuffer; + dict->dictSize = (U32)dictSize; - return dictSize; + return dictSize; } EXPORT_SYMBOL(LZ4_saveDict); - - /*-******************************* * Decompression functions ********************************/ @@ -1761,19 +2210,24 @@ EXPORT_SYMBOL(LZ4_saveDict); typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; #undef MIN -#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) - +#define MIN(a, b) ((a) < (b) ? (a) : (b)) /* variant for decompress_unsafe() * does not know end of input * presumes input is well formed * note : will consume at least one byte */ -static size_t read_long_length_no_check(const BYTE** pp) +static size_t read_long_length_no_check(const BYTE **pp) { - size_t b, l = 0; - do { b = **pp; (*pp)++; l += b; } while (b==255); - DEBUGLOG(6, "read_long_length_no_check: +length=%zu using %zu input bytes", l, l/255 + 1) - return l; + size_t b, l = 0; + do { + b = **pp; + (*pp)++; + l += b; + } while (b == 255); + DEBUGLOG(6, + "read_long_length_no_check: +length=%zu using %zu input bytes", + l, l / 255 + 1) + return l; } /* core decoder variant for LZ4_decompress_fast*() @@ -1785,106 +2239,127 @@ static size_t read_long_length_no_check(const BYTE** pp) * Note : this variant is not optimized for speed, just for maintenance. * the goal is to remove support of decompress_fast*() variants by v2.0 **/ -LZ4_FORCE_INLINE int -LZ4_decompress_unsafe_generic( - const BYTE* const istart, - BYTE* const ostart, - int decompressedSize, +LZ4_FORCE_INLINE int LZ4_decompress_unsafe_generic( + const BYTE *const istart, BYTE *const ostart, int decompressedSize, - size_t prefixSize, - const BYTE* const dictStart, /* only if dict==usingExtDict */ - const size_t dictSize /* note: =0 if dictStart==NULL */ - ) + size_t prefixSize, + const BYTE *const dictStart, /* only if dict==usingExtDict */ + const size_t dictSize /* note: =0 if dictStart==NULL */ +) { - const BYTE* ip = istart; - BYTE* op = (BYTE*)ostart; - BYTE* const oend = ostart + decompressedSize; - const BYTE* const prefixStart = ostart - prefixSize; + const BYTE *ip = istart; + BYTE *op = (BYTE *)ostart; + BYTE *const oend = ostart + decompressedSize; + const BYTE *const prefixStart = ostart - prefixSize; - DEBUGLOG(5, "LZ4_decompress_unsafe_generic"); - if (dictStart == NULL) assert(dictSize == 0); + DEBUGLOG(5, "LZ4_decompress_unsafe_generic"); + if (dictStart == NULL) + assert(dictSize == 0); - while (1) { - /* start new sequence */ - unsigned token = *ip++; + while (1) { + /* start new sequence */ + unsigned token = *ip++; - /* literals */ - { size_t ll = token >> ML_BITS; - if (ll==15) { - /* long literal length */ - ll += read_long_length_no_check(&ip); - } - if ((size_t)(oend-op) < ll) return -1; /* output buffer overflow */ - LZ4_memmove(op, ip, ll); /* support in-place decompression */ - op += ll; - ip += ll; - if ((size_t)(oend-op) < MFLIMIT) { - if (op==oend) break; /* end of block */ - DEBUGLOG(5, "invalid: literals end at distance %zi from end of block", oend-op); - /* incorrect end of block : + /* literals */ + { + size_t ll = token >> ML_BITS; + if (ll == 15) { + /* long literal length */ + ll += read_long_length_no_check(&ip); + } + if ((size_t)(oend - op) < ll) + return -1; /* output buffer overflow */ + LZ4_memmove(op, ip, + ll); /* support in-place decompression */ + op += ll; + ip += ll; + if ((size_t)(oend - op) < MFLIMIT) { + if (op == oend) + break; /* end of block */ + DEBUGLOG( + 5, + "invalid: literals end at distance %zi from end of block", + oend - op); + /* incorrect end of block : * last match must start at least MFLIMIT==12 bytes before end of output block */ - return -1; - } } + return -1; + } + } - /* match */ - { size_t ml = token & 15; - size_t const offset = LZ4_readLE16(ip); - ip+=2; + /* match */ + { + size_t ml = token & 15; + size_t const offset = LZ4_readLE16(ip); + ip += 2; - if (ml==15) { - /* long literal length */ - ml += read_long_length_no_check(&ip); - } - ml += MINMATCH; + if (ml == 15) { + /* long literal length */ + ml += read_long_length_no_check(&ip); + } + ml += MINMATCH; - if ((size_t)(oend-op) < ml) return -1; /* output buffer overflow */ + if ((size_t)(oend - op) < ml) + return -1; /* output buffer overflow */ - { const BYTE* match = op - offset; + { + const BYTE *match = op - offset; - /* out of range */ - if (offset > (size_t)(op - prefixStart) + dictSize) { - DEBUGLOG(6, "offset out of range"); - return -1; - } + /* out of range */ + if (offset > + (size_t)(op - prefixStart) + dictSize) { + DEBUGLOG(6, "offset out of range"); + return -1; + } - /* check special case : extDict */ - if (offset > (size_t)(op - prefixStart)) { - /* extDict scenario */ - const BYTE* const dictEnd = dictStart + dictSize; - const BYTE* extMatch = dictEnd - (offset - (size_t)(op-prefixStart)); - size_t const extml = (size_t)(dictEnd - extMatch); - if (extml > ml) { - /* match entirely within extDict */ - LZ4_memmove(op, extMatch, ml); - op += ml; - ml = 0; - } else { - /* match split between extDict & prefix */ - LZ4_memmove(op, extMatch, extml); - op += extml; - ml -= extml; - } - match = prefixStart; - } + /* check special case : extDict */ + if (offset > (size_t)(op - prefixStart)) { + /* extDict scenario */ + const BYTE *const dictEnd = + dictStart + dictSize; + const BYTE *extMatch = + dictEnd - + (offset - + (size_t)(op - prefixStart)); + size_t const extml = + (size_t)(dictEnd - extMatch); + if (extml > ml) { + /* match entirely within extDict */ + LZ4_memmove(op, extMatch, ml); + op += ml; + ml = 0; + } else { + /* match split between extDict & prefix */ + LZ4_memmove(op, extMatch, + extml); + op += extml; + ml -= extml; + } + match = prefixStart; + } - /* match copy - slow variant, supporting overlap copy */ - { size_t u; - for (u=0; u= ilimit)) { /* read limit reached */ - return rvl_error; - } - s = **ip; - (*ip)++; - length += s; - if (unlikely((*ip) > ilimit)) { /* read limit reached */ - return rvl_error; - } - /* accumulator overflow detection (32-bit mode only) */ - if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) { - return rvl_error; - } - if (likely(s != 255)) return length; - do { - s = **ip; - (*ip)++; - length += s; - if (unlikely((*ip) > ilimit)) { /* read limit reached */ - return rvl_error; - } - /* accumulator overflow detection (32-bit mode only) */ - if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) { - return rvl_error; - } - } while (s == 255); + Rvl_t s, length = 0; + assert(ip != NULL); + assert(*ip != NULL); + assert(ilimit != NULL); + if (initial_check && + unlikely((*ip) >= ilimit)) { /* read limit reached */ + return rvl_error; + } + s = **ip; + (*ip)++; + length += s; + if (unlikely((*ip) > ilimit)) { /* read limit reached */ + return rvl_error; + } + /* accumulator overflow detection (32-bit mode only) */ + if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1) / 2))) { + return rvl_error; + } + if (likely(s != 255)) + return length; + do { + s = **ip; + (*ip)++; + length += s; + if (unlikely((*ip) > ilimit)) { /* read limit reached */ + return rvl_error; + } + /* accumulator overflow detection (32-bit mode only) */ + if ((sizeof(length) < 8) && + unlikely(length > ((Rvl_t)(-1) / 2))) { + return rvl_error; + } + } while (s == 255); - return length; + return length; } /*! LZ4_decompress_generic() : @@ -1938,206 +2415,280 @@ read_variable_length(const BYTE** ip, const BYTE* ilimit, * Note that it is important for performance that this function really get inlined, * in order to remove useless branches during compilation optimization. */ -LZ4_FORCE_INLINE int -LZ4_decompress_generic( - const char* const src, - char* const dst, - int srcSize, - int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ +LZ4_FORCE_INLINE int LZ4_decompress_generic( + const char *const src, char *const dst, int srcSize, + int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ - earlyEnd_directive partialDecoding, /* full, partial */ - dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ - const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ - const BYTE* const dictStart, /* only if dict==usingExtDict */ - const size_t dictSize /* note : = 0 if noDict */ - ) + earlyEnd_directive partialDecoding, /* full, partial */ + dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ + const BYTE *const lowPrefix, /* always <= dst, == dst when no prefix */ + const BYTE *const dictStart, /* only if dict==usingExtDict */ + const size_t dictSize /* note : = 0 if noDict */ +) { - if ((src == NULL) || (outputSize < 0)) { return -1; } + if ((src == NULL) || (outputSize < 0)) { + return -1; + } - { const BYTE* ip = (const BYTE*) src; - const BYTE* const iend = ip + srcSize; + { + const BYTE *ip = (const BYTE *)src; + const BYTE *const iend = ip + srcSize; - BYTE* op = (BYTE*) dst; - BYTE* const oend = op + outputSize; - BYTE* cpy; + BYTE *op = (BYTE *)dst; + BYTE *const oend = op + outputSize; + BYTE *cpy; - const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; + const BYTE *const dictEnd = + (dictStart == NULL) ? NULL : dictStart + dictSize; - const int checkOffset = (dictSize < (int)(64 KB)); + const int checkOffset = (dictSize < (int)(64 KB)); + /* Set up the "end" pointers for the shortcut. */ + const BYTE *const shortiend = + iend - 14 /*maxLL*/ - 2 /*offset*/; + const BYTE *const shortoend = + oend - 14 /*maxLL*/ - 18 /*maxML*/; - /* Set up the "end" pointers for the shortcut. */ - const BYTE* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/; - const BYTE* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/; + const BYTE *match; + size_t offset; + unsigned token; + size_t length; - const BYTE* match; - size_t offset; - unsigned token; - size_t length; + DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", + srcSize, outputSize); + /* Special cases */ + assert(lowPrefix <= op); + if (unlikely(outputSize == 0)) { + /* Empty output buffer */ + if (partialDecoding) + return 0; + return ((srcSize == 1) && (*ip == 0)) ? 0 : -1; + } + if (unlikely(srcSize == 0)) { + return -1; + } - DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); - - /* Special cases */ - assert(lowPrefix <= op); - if (unlikely(outputSize==0)) { - /* Empty output buffer */ - if (partialDecoding) return 0; - return ((srcSize==1) && (*ip==0)) ? 0 : -1; - } - if (unlikely(srcSize==0)) { return -1; } - - /* LZ4_FAST_DEC_LOOP: + /* LZ4_FAST_DEC_LOOP: * designed for modern OoO performance cpus, * where copying reliably 32-bytes is preferable to an unpredictable branch. * note : fast loop may show a regression for some client arm chips. */ #if LZ4_FAST_DEC_LOOP - if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { - DEBUGLOG(6, "move to safe decode loop"); - goto safe_decode; - } + if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { + DEBUGLOG(6, "move to safe decode loop"); + goto safe_decode; + } - /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */ - DEBUGLOG(6, "using fast decode loop"); - while (1) { - /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ - assert(oend - op >= FASTLOOP_SAFE_DISTANCE); - assert(ip < iend); - token = *ip++; - length = token >> ML_BITS; /* literal length */ - DEBUGLOG(7, "blockPos%6u: litLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length); + /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */ + DEBUGLOG(6, "using fast decode loop"); + while (1) { + /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ + assert(oend - op >= FASTLOOP_SAFE_DISTANCE); + assert(ip < iend); + token = *ip++; + length = token >> ML_BITS; /* literal length */ + DEBUGLOG(7, "blockPos%6u: litLength token = %u", + (unsigned)(op - (BYTE *)dst), + (unsigned)length); - /* decode literal length */ - if (length == RUN_MASK) { - size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1); - if (addl == rvl_error) { - DEBUGLOG(6, "error reading long literal length"); - goto _output_error; - } - length += addl; - if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ - if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + /* decode literal length */ + if (length == RUN_MASK) { + size_t const addl = read_variable_length( + &ip, iend - RUN_MASK, 1); + if (addl == rvl_error) { + DEBUGLOG( + 6, + "error reading long literal length"); + goto _output_error; + } + length += addl; + if (unlikely((uptrval)(op) + length < + (uptrval)(op))) { + goto _output_error; + } /* overflow detection */ + if (unlikely((uptrval)(ip) + length < + (uptrval)(ip))) { + goto _output_error; + } /* overflow detection */ - /* copy literals */ - LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); - if ((op+length>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } - LZ4_wildCopy32(op, ip, op+length); - ip += length; op += length; - } else if (ip <= iend-(16 + 1/*max lit + offset + nextToken*/)) { - /* We don't need to check oend, since we check it once for each loop below */ - DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); - /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */ - LZ4_memcpy(op, ip, 16); - ip += length; op += length; - } else { - goto safe_literal_copy; - } + /* copy literals */ + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if ((op + length > oend - 32) || + (ip + length > iend - 32)) { + goto safe_literal_copy; + } + LZ4_wildCopy32(op, ip, op + length); + ip += length; + op += length; + } else if (ip <= + iend - (16 + + 1 /*max lit + offset + nextToken*/)) { + /* We don't need to check oend, since we check it once for each loop below */ + DEBUGLOG(7, + "copy %u bytes in a 16-bytes stripe", + (unsigned)length); + /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */ + LZ4_memcpy(op, ip, 16); + ip += length; + op += length; + } else { + goto safe_literal_copy; + } - /* get offset */ - offset = LZ4_readLE16(ip); ip+=2; - DEBUGLOG(6, "blockPos%6u: offset = %u", (unsigned)(op-(BYTE*)dst), (unsigned)offset); - match = op - offset; - assert(match <= op); /* overflow check */ + /* get offset */ + offset = LZ4_readLE16(ip); + ip += 2; + DEBUGLOG(6, "blockPos%6u: offset = %u", + (unsigned)(op - (BYTE *)dst), + (unsigned)offset); + match = op - offset; + assert(match <= op); /* overflow check */ - /* get matchlength */ - length = token & ML_MASK; - DEBUGLOG(7, " match length token = %u (len==%u)", (unsigned)length, (unsigned)length+MINMATCH); + /* get matchlength */ + length = token & ML_MASK; + DEBUGLOG(7, " match length token = %u (len==%u)", + (unsigned)length, (unsigned)length + MINMATCH); - if (length == ML_MASK) { - size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0); - if (addl == rvl_error) { - DEBUGLOG(5, "error reading long match length"); - goto _output_error; - } - length += addl; - length += MINMATCH; - DEBUGLOG(7, " long match length == %u", (unsigned)length); - if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ - if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { - goto safe_match_copy; - } - } else { - length += MINMATCH; - if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { - DEBUGLOG(7, "moving to safe_match_copy (ml==%u)", (unsigned)length); - goto safe_match_copy; - } + if (length == ML_MASK) { + size_t const addl = read_variable_length( + &ip, iend - LASTLITERALS + 1, 0); + if (addl == rvl_error) { + DEBUGLOG( + 5, + "error reading long match length"); + goto _output_error; + } + length += addl; + length += MINMATCH; + DEBUGLOG(7, " long match length == %u", + (unsigned)length); + if (unlikely((uptrval)(op) + length < + (uptrval)op)) { + goto _output_error; + } /* overflow detection */ + if (op + length >= + oend - FASTLOOP_SAFE_DISTANCE) { + goto safe_match_copy; + } + } else { + length += MINMATCH; + if (op + length >= + oend - FASTLOOP_SAFE_DISTANCE) { + DEBUGLOG( + 7, + "moving to safe_match_copy (ml==%u)", + (unsigned)length); + goto safe_match_copy; + } - /* Fastpath check: skip LZ4_wildCopy32 when true */ - if ((dict == withPrefix64k) || (match >= lowPrefix)) { - if (offset >= 8) { - assert(match >= lowPrefix); - assert(match <= op); - assert(op + 18 <= oend); + /* Fastpath check: skip LZ4_wildCopy32 when true */ + if ((dict == withPrefix64k) || + (match >= lowPrefix)) { + if (offset >= 8) { + assert(match >= lowPrefix); + assert(match <= op); + assert(op + 18 <= oend); - LZ4_memcpy(op, match, 8); - LZ4_memcpy(op+8, match+8, 8); - LZ4_memcpy(op+16, match+16, 2); - op += length; - continue; - } } } + LZ4_memcpy(op, match, 8); + LZ4_memcpy(op + 8, match + 8, + 8); + LZ4_memcpy(op + 16, match + 16, + 2); + op += length; + continue; + } + } + } - if ( checkOffset && (unlikely(match + dictSize < lowPrefix)) ) { - DEBUGLOG(5, "Error : pos=%zi, offset=%zi => outside buffers", op-lowPrefix, op-match); - goto _output_error; - } - /* match starting within external dictionary */ - if ((dict==usingExtDict) && (match < lowPrefix)) { - assert(dictEnd != NULL); - if (unlikely(op+length > oend-LASTLITERALS)) { - if (partialDecoding) { - DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); - length = MIN(length, (size_t)(oend-op)); - } else { - DEBUGLOG(6, "end-of-block condition violated") - goto _output_error; - } } + if (checkOffset && + (unlikely(match + dictSize < lowPrefix))) { + DEBUGLOG( + 5, + "Error : pos=%zi, offset=%zi => outside buffers", + op - lowPrefix, op - match); + goto _output_error; + } + /* match starting within external dictionary */ + if ((dict == usingExtDict) && (match < lowPrefix)) { + assert(dictEnd != NULL); + if (unlikely(op + length > + oend - LASTLITERALS)) { + if (partialDecoding) { + DEBUGLOG( + 7, + "partialDecoding: dictionary match, close to dstEnd"); + length = MIN( + length, + (size_t)(oend - op)); + } else { + DEBUGLOG( + 6, + "end-of-block condition violated") + goto _output_error; + } + } - if (length <= (size_t)(lowPrefix-match)) { - /* match fits entirely within external dictionary : just copy */ - LZ4_memmove(op, dictEnd - (lowPrefix-match), length); - op += length; - } else { - /* match stretches into both external dictionary and current block */ - size_t const copySize = (size_t)(lowPrefix - match); - size_t const restSize = length - copySize; - LZ4_memcpy(op, dictEnd - copySize, copySize); - op += copySize; - if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ - BYTE* const endOfMatch = op + restSize; - const BYTE* copyFrom = lowPrefix; - while (op < endOfMatch) { *op++ = *copyFrom++; } - } else { - LZ4_memcpy(op, lowPrefix, restSize); - op += restSize; - } } - continue; - } + if (length <= (size_t)(lowPrefix - match)) { + /* match fits entirely within external dictionary : just copy */ + LZ4_memmove(op, + dictEnd - + (lowPrefix - match), + length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = + (size_t)(lowPrefix - match); + size_t const restSize = + length - copySize; + LZ4_memcpy(op, dictEnd - copySize, + copySize); + op += copySize; + if (restSize > + (size_t)(op - + lowPrefix)) { /* overlap copy */ + BYTE *const endOfMatch = + op + restSize; + const BYTE *copyFrom = + lowPrefix; + while (op < endOfMatch) { + *op++ = *copyFrom++; + } + } else { + LZ4_memcpy(op, lowPrefix, + restSize); + op += restSize; + } + } + continue; + } - /* copy match within block */ - cpy = op + length; + /* copy match within block */ + cpy = op + length; - assert((op <= oend) && (oend-op >= 32)); - if (unlikely(offset<16)) { - LZ4_memcpy_using_offset(op, match, cpy, offset); - } else { - LZ4_wildCopy32(op, match, cpy); - } + assert((op <= oend) && (oend - op >= 32)); + if (unlikely(offset < 16)) { + LZ4_memcpy_using_offset(op, match, cpy, offset); + } else { + LZ4_wildCopy32(op, match, cpy); + } - op = cpy; /* wildcopy correction */ - } - safe_decode: + op = cpy; /* wildcopy correction */ + } + safe_decode: #endif - /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ - DEBUGLOG(6, "using safe decode loop"); - while (1) { - assert(ip < iend); - token = *ip++; - length = token >> ML_BITS; /* literal length */ - DEBUGLOG(7, "blockPos%6u: litLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length); + /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ + DEBUGLOG(6, "using safe decode loop"); + while (1) { + assert(ip < iend); + token = *ip++; + length = token >> ML_BITS; /* literal length */ + DEBUGLOG(7, "blockPos%6u: litLength token = %u", + (unsigned)(op - (BYTE *)dst), + (unsigned)length); - /* A two-stage shortcut for the most common case: + /* A two-stage shortcut for the most common case: * 1) If the literal length is 0..14, and there is enough space, * enter the shortcut and copy 16 bytes on behalf of the literals * (in the fast mode, only 8 bytes can be safely copied this way). @@ -2146,327 +2697,435 @@ LZ4_decompress_generic( * those 18 bytes earlier, upon entering the shortcut (in other words, * there is a combined check for both stages). */ - if ( (length != RUN_MASK) - /* strictly "less than" on input, to re-enter the loop with at least one byte */ - && likely((ip < shortiend) & (op <= shortoend)) ) { - /* Copy the literals */ - LZ4_memcpy(op, ip, 16); - op += length; ip += length; + if ((length != RUN_MASK) + /* strictly "less than" on input, to re-enter the loop with at least one byte */ + && likely((ip < shortiend) & (op <= shortoend))) { + /* Copy the literals */ + LZ4_memcpy(op, ip, 16); + op += length; + ip += length; - /* The second stage: prepare for match copying, decode full info. + /* The second stage: prepare for match copying, decode full info. * If it doesn't work out, the info won't be wasted. */ - length = token & ML_MASK; /* match length */ - DEBUGLOG(7, "blockPos%6u: matchLength token = %u (len=%u)", (unsigned)(op-(BYTE*)dst), (unsigned)length, (unsigned)length + 4); - offset = LZ4_readLE16(ip); ip += 2; - match = op - offset; - assert(match <= op); /* check overflow */ + length = token & ML_MASK; /* match length */ + DEBUGLOG( + 7, + "blockPos%6u: matchLength token = %u (len=%u)", + (unsigned)(op - (BYTE *)dst), + (unsigned)length, (unsigned)length + 4); + offset = LZ4_readLE16(ip); + ip += 2; + match = op - offset; + assert(match <= op); /* check overflow */ - /* Do not deal with overlapping matches. */ - if ( (length != ML_MASK) - && (offset >= 8) - && (dict==withPrefix64k || match >= lowPrefix) ) { - /* Copy the match. */ - LZ4_memcpy(op + 0, match + 0, 8); - LZ4_memcpy(op + 8, match + 8, 8); - LZ4_memcpy(op +16, match +16, 2); - op += length + MINMATCH; - /* Both stages worked, load the next token. */ - continue; - } + /* Do not deal with overlapping matches. */ + if ((length != ML_MASK) && (offset >= 8) && + (dict == withPrefix64k || + match >= lowPrefix)) { + /* Copy the match. */ + LZ4_memcpy(op + 0, match + 0, 8); + LZ4_memcpy(op + 8, match + 8, 8); + LZ4_memcpy(op + 16, match + 16, 2); + op += length + MINMATCH; + /* Both stages worked, load the next token. */ + continue; + } - /* The second stage didn't work out, but the info is ready. + /* The second stage didn't work out, but the info is ready. * Propel it right to the point of match copying. */ - goto _copy_match; - } + goto _copy_match; + } - /* decode literal length */ - if (length == RUN_MASK) { - size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1); - if (addl == rvl_error) { goto _output_error; } - length += addl; - if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ - if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ - } + /* decode literal length */ + if (length == RUN_MASK) { + size_t const addl = read_variable_length( + &ip, iend - RUN_MASK, 1); + if (addl == rvl_error) { + goto _output_error; + } + length += addl; + if (unlikely((uptrval)(op) + length < + (uptrval)(op))) { + goto _output_error; + } /* overflow detection */ + if (unlikely((uptrval)(ip) + length < + (uptrval)(ip))) { + goto _output_error; + } /* overflow detection */ + } #if LZ4_FAST_DEC_LOOP - safe_literal_copy: + safe_literal_copy: #endif - /* copy literals */ - cpy = op+length; + /* copy literals */ + cpy = op + length; - LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); - if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) { - /* We've either hit the input parsing restriction or the output parsing restriction. + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if ((cpy > oend - MFLIMIT) || + (ip + length > iend - (2 + 1 + LASTLITERALS))) { + /* We've either hit the input parsing restriction or the output parsing restriction. * In the normal scenario, decoding a full block, it must be the last sequence, * otherwise it's an error (invalid input or dimensions). * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. */ - if (partialDecoding) { - /* Since we are partial decoding we may be in this block because of the output parsing + if (partialDecoding) { + /* Since we are partial decoding we may be in this block because of the output parsing * restriction, which is not valid since the output buffer is allowed to be undersized. */ - DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") - DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); - DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); - DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); - /* Finishing in the middle of a literals segment, + DEBUGLOG( + 7, + "partialDecoding: copying literals, close to input or output end") + DEBUGLOG( + 7, + "partialDecoding: literal length = %u", + (unsigned)length); + DEBUGLOG( + 7, + "partialDecoding: remaining space in dstBuffer : %i", + (int)(oend - op)); + DEBUGLOG( + 7, + "partialDecoding: remaining space in srcBuffer : %i", + (int)(iend - ip)); + /* Finishing in the middle of a literals segment, * due to lack of input. */ - if (ip+length > iend) { - length = (size_t)(iend-ip); - cpy = op + length; - } - /* Finishing in the middle of a literals segment, + if (ip + length > iend) { + length = (size_t)(iend - ip); + cpy = op + length; + } + /* Finishing in the middle of a literals segment, * due to lack of output space. */ - if (cpy > oend) { - cpy = oend; - assert(op<=oend); - length = (size_t)(oend-op); - } - } else { - /* We must be on the last sequence (or invalid) because of the parsing limitations + if (cpy > oend) { + cpy = oend; + assert(op <= oend); + length = (size_t)(oend - op); + } + } else { + /* We must be on the last sequence (or invalid) because of the parsing limitations * so check that we exactly consume the input and don't overrun the output buffer. */ - if ((ip+length != iend) || (cpy > oend)) { - DEBUGLOG(5, "should have been last run of literals") - DEBUGLOG(5, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend); - DEBUGLOG(5, "or cpy(%p) > (oend-MFLIMIT)(%p)", cpy, oend-MFLIMIT); - DEBUGLOG(5, "after writing %u bytes / %i bytes available", (unsigned)(op-(BYTE*)dst), outputSize); - goto _output_error; - } - } - LZ4_memmove(op, ip, length); /* supports overlapping memory regions, for in-place decompression scenarios */ - ip += length; - op += length; - /* Necessarily EOF when !partialDecoding. + if ((ip + length != iend) || + (cpy > oend)) { + DEBUGLOG( + 5, + "should have been last run of literals") + DEBUGLOG( + 5, + "ip(%p) + length(%i) = %p != iend (%p)", + ip, (int)length, + ip + length, iend); + DEBUGLOG( + 5, + "or cpy(%p) > (oend-MFLIMIT)(%p)", + cpy, oend - MFLIMIT); + DEBUGLOG( + 5, + "after writing %u bytes / %i bytes available", + (unsigned)(op - + (BYTE *)dst), + outputSize); + goto _output_error; + } + } + LZ4_memmove( + op, ip, + length); /* supports overlapping memory regions, for in-place decompression scenarios */ + ip += length; + op += length; + /* Necessarily EOF when !partialDecoding. * When partialDecoding, it is EOF if we've either * filled the output buffer or * can't proceed with reading an offset for following match. */ - if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { - break; - } - } else { - LZ4_wildCopy8(op, ip, cpy); /* can overwrite up to 8 bytes beyond cpy */ - ip += length; op = cpy; - } + if (!partialDecoding || (cpy == oend) || + (ip >= (iend - 2))) { + break; + } + } else { + LZ4_wildCopy8( + op, ip, + cpy); /* can overwrite up to 8 bytes beyond cpy */ + ip += length; + op = cpy; + } - /* get offset */ - offset = LZ4_readLE16(ip); ip+=2; - match = op - offset; + /* get offset */ + offset = LZ4_readLE16(ip); + ip += 2; + match = op - offset; - /* get matchlength */ - length = token & ML_MASK; - DEBUGLOG(7, "blockPos%6u: matchLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length); + /* get matchlength */ + length = token & ML_MASK; + DEBUGLOG(7, "blockPos%6u: matchLength token = %u", + (unsigned)(op - (BYTE *)dst), + (unsigned)length); - _copy_match: - if (length == ML_MASK) { - size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0); - if (addl == rvl_error) { goto _output_error; } - length += addl; - if (unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ - } - length += MINMATCH; + _copy_match: + if (length == ML_MASK) { + size_t const addl = read_variable_length( + &ip, iend - LASTLITERALS + 1, 0); + if (addl == rvl_error) { + goto _output_error; + } + length += addl; + if (unlikely((uptrval)(op) + length < + (uptrval)op)) + goto _output_error; /* overflow detection */ + } + length += MINMATCH; #if LZ4_FAST_DEC_LOOP - safe_match_copy: + safe_match_copy: #endif - if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ - /* match starting within external dictionary */ - if ((dict==usingExtDict) && (match < lowPrefix)) { - assert(dictEnd != NULL); - if (unlikely(op+length > oend-LASTLITERALS)) { - if (partialDecoding) length = MIN(length, (size_t)(oend-op)); - else goto _output_error; /* doesn't respect parsing restriction */ - } + if ((checkOffset) && + (unlikely(match + dictSize < lowPrefix))) + goto _output_error; /* Error : offset outside buffers */ + /* match starting within external dictionary */ + if ((dict == usingExtDict) && (match < lowPrefix)) { + assert(dictEnd != NULL); + if (unlikely(op + length > + oend - LASTLITERALS)) { + if (partialDecoding) + length = MIN( + length, + (size_t)(oend - op)); + else + goto _output_error; /* doesn't respect parsing restriction */ + } - if (length <= (size_t)(lowPrefix-match)) { - /* match fits entirely within external dictionary : just copy */ - LZ4_memmove(op, dictEnd - (lowPrefix-match), length); - op += length; - } else { - /* match stretches into both external dictionary and current block */ - size_t const copySize = (size_t)(lowPrefix - match); - size_t const restSize = length - copySize; - LZ4_memcpy(op, dictEnd - copySize, copySize); - op += copySize; - if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ - BYTE* const endOfMatch = op + restSize; - const BYTE* copyFrom = lowPrefix; - while (op < endOfMatch) *op++ = *copyFrom++; - } else { - LZ4_memcpy(op, lowPrefix, restSize); - op += restSize; - } } - continue; - } - assert(match >= lowPrefix); + if (length <= (size_t)(lowPrefix - match)) { + /* match fits entirely within external dictionary : just copy */ + LZ4_memmove(op, + dictEnd - + (lowPrefix - match), + length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = + (size_t)(lowPrefix - match); + size_t const restSize = + length - copySize; + LZ4_memcpy(op, dictEnd - copySize, + copySize); + op += copySize; + if (restSize > + (size_t)(op - + lowPrefix)) { /* overlap copy */ + BYTE *const endOfMatch = + op + restSize; + const BYTE *copyFrom = + lowPrefix; + while (op < endOfMatch) + *op++ = *copyFrom++; + } else { + LZ4_memcpy(op, lowPrefix, + restSize); + op += restSize; + } + } + continue; + } + assert(match >= lowPrefix); - /* copy match within block */ - cpy = op + length; + /* copy match within block */ + cpy = op + length; - /* partialDecoding : may end anywhere within the block */ - assert(op<=oend); - if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { - size_t const mlen = MIN(length, (size_t)(oend-op)); - const BYTE* const matchEnd = match + mlen; - BYTE* const copyEnd = op + mlen; - if (matchEnd > op) { /* overlap copy */ - while (op < copyEnd) { *op++ = *match++; } - } else { - LZ4_memcpy(op, match, mlen); - } - op = copyEnd; - if (op == oend) { break; } - continue; - } + /* partialDecoding : may end anywhere within the block */ + assert(op <= oend); + if (partialDecoding && + (cpy > oend - MATCH_SAFEGUARD_DISTANCE)) { + size_t const mlen = + MIN(length, (size_t)(oend - op)); + const BYTE *const matchEnd = match + mlen; + BYTE *const copyEnd = op + mlen; + if (matchEnd > op) { /* overlap copy */ + while (op < copyEnd) { + *op++ = *match++; + } + } else { + LZ4_memcpy(op, match, mlen); + } + op = copyEnd; + if (op == oend) { + break; + } + continue; + } - if (unlikely(offset<8)) { - LZ4_write32(op, 0); /* silence msan warning when offset==0 */ - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += inc32table[offset]; - LZ4_memcpy(op+4, match, 4); - match -= dec64table[offset]; - } else { - LZ4_memcpy(op, match, 8); - match += 8; - } - op += 8; + if (unlikely(offset < 8)) { + LZ4_write32( + op, + 0); /* silence msan warning when offset==0 */ + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += inc32table[offset]; + LZ4_memcpy(op + 4, match, 4); + match -= dec64table[offset]; + } else { + LZ4_memcpy(op, match, 8); + match += 8; + } + op += 8; - if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { - BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); - if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ - if (op < oCopyLimit) { - LZ4_wildCopy8(op, match, oCopyLimit); - match += oCopyLimit - op; - op = oCopyLimit; - } - while (op < cpy) { *op++ = *match++; } - } else { - LZ4_memcpy(op, match, 8); - if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } - } - op = cpy; /* wildcopy correction */ - } + if (unlikely(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) { + BYTE *const oCopyLimit = + oend - (WILDCOPYLENGTH - 1); + if (cpy > oend - LASTLITERALS) { + goto _output_error; + } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ + if (op < oCopyLimit) { + LZ4_wildCopy8(op, match, oCopyLimit); + match += oCopyLimit - op; + op = oCopyLimit; + } + while (op < cpy) { + *op++ = *match++; + } + } else { + LZ4_memcpy(op, match, 8); + if (length > 16) { + LZ4_wildCopy8(op + 8, match + 8, cpy); + } + } + op = cpy; /* wildcopy correction */ + } - /* end of decoding */ - DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); - return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ + /* end of decoding */ + DEBUGLOG(5, "decoded %i bytes", (int)(((char *)op) - dst)); + return (int)(((char *)op) - + dst); /* Nb of output bytes decoded */ - /* Overflow error detected */ - _output_error: - return (int) (-(((const char*)ip)-src))-1; - } + /* Overflow error detected */ + _output_error: + return (int)(-(((const char *)ip) - src)) - 1; + } } - /*===== Instantiate the API decoding functions. =====*/ LZ4_FORCE_O2 -int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) +int LZ4_decompress_safe(const char *source, char *dest, int compressedSize, + int maxDecompressedSize) { - return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, - decode_full_block, noDict, - (BYTE*)dest, NULL, 0); + return LZ4_decompress_generic(source, dest, compressedSize, + maxDecompressedSize, decode_full_block, + noDict, (BYTE *)dest, NULL, 0); } LZ4_FORCE_O2 -int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) +int LZ4_decompress_safe_partial(const char *src, char *dst, int compressedSize, + int targetOutputSize, int dstCapacity) { - dstCapacity = MIN(targetOutputSize, dstCapacity); - return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, - partial_decode, - noDict, (BYTE*)dst, NULL, 0); + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, + partial_decode, noDict, (BYTE *)dst, NULL, + 0); } LZ4_FORCE_O2 -int LZ4_decompress_fast(const char* source, char* dest, int originalSize) +int LZ4_decompress_fast(const char *source, char *dest, int originalSize) { - DEBUGLOG(5, "LZ4_decompress_fast"); - return LZ4_decompress_unsafe_generic( - (const BYTE*)source, (BYTE*)dest, originalSize, - 0, NULL, 0); + DEBUGLOG(5, "LZ4_decompress_fast"); + return LZ4_decompress_unsafe_generic((const BYTE *)source, (BYTE *)dest, + originalSize, 0, NULL, 0); } /*===== Instantiate a few more decoding cases, used more than once. =====*/ LZ4_FORCE_O2 /* Exported, an obsolete API function. */ -int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) + int + LZ4_decompress_safe_withPrefix64k(const char *source, char *dest, + int compressedSize, int maxOutputSize) { - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - decode_full_block, withPrefix64k, - (BYTE*)dest - 64 KB, NULL, 0); + return LZ4_decompress_generic(source, dest, compressedSize, + maxOutputSize, decode_full_block, + withPrefix64k, (BYTE *)dest - 64 KB, NULL, + 0); } LZ4_FORCE_O2 -static int LZ4_decompress_safe_partial_withPrefix64k(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity) +static int LZ4_decompress_safe_partial_withPrefix64k(const char *source, + char *dest, + int compressedSize, + int targetOutputSize, + int dstCapacity) { - dstCapacity = MIN(targetOutputSize, dstCapacity); - return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, - partial_decode, withPrefix64k, - (BYTE*)dest - 64 KB, NULL, 0); + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, + partial_decode, withPrefix64k, + (BYTE *)dest - 64 KB, NULL, 0); } /* Another obsolete API function, paired with the previous one. */ -int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) +int LZ4_decompress_fast_withPrefix64k(const char *source, char *dest, + int originalSize) { - return LZ4_decompress_unsafe_generic( - (const BYTE*)source, (BYTE*)dest, originalSize, - 64 KB, NULL, 0); + return LZ4_decompress_unsafe_generic((const BYTE *)source, (BYTE *)dest, + originalSize, 64 KB, NULL, 0); } LZ4_FORCE_O2 -static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, - size_t prefixSize) +static int LZ4_decompress_safe_withSmallPrefix(const char *source, char *dest, + int compressedSize, + int maxOutputSize, + size_t prefixSize) { - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - decode_full_block, noDict, - (BYTE*)dest-prefixSize, NULL, 0); + return LZ4_decompress_generic(source, dest, compressedSize, + maxOutputSize, decode_full_block, noDict, + (BYTE *)dest - prefixSize, NULL, 0); } LZ4_FORCE_O2 -static int LZ4_decompress_safe_partial_withSmallPrefix(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, - size_t prefixSize) +static int LZ4_decompress_safe_partial_withSmallPrefix( + const char *source, char *dest, int compressedSize, + int targetOutputSize, int dstCapacity, size_t prefixSize) { - dstCapacity = MIN(targetOutputSize, dstCapacity); - return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, - partial_decode, noDict, - (BYTE*)dest-prefixSize, NULL, 0); + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, + partial_decode, noDict, + (BYTE *)dest - prefixSize, NULL, 0); } LZ4_FORCE_O2 -int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, - int compressedSize, int maxOutputSize, - const void* dictStart, size_t dictSize) +int LZ4_decompress_safe_forceExtDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + const void *dictStart, size_t dictSize) { - DEBUGLOG(5, "LZ4_decompress_safe_forceExtDict"); - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - decode_full_block, usingExtDict, - (BYTE*)dest, (const BYTE*)dictStart, dictSize); + DEBUGLOG(5, "LZ4_decompress_safe_forceExtDict"); + return LZ4_decompress_generic(source, dest, compressedSize, + maxOutputSize, decode_full_block, + usingExtDict, (BYTE *)dest, + (const BYTE *)dictStart, dictSize); } LZ4_FORCE_O2 -int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest, - int compressedSize, int targetOutputSize, int dstCapacity, - const void* dictStart, size_t dictSize) +int LZ4_decompress_safe_partial_forceExtDict(const char *source, char *dest, + int compressedSize, + int targetOutputSize, + int dstCapacity, + const void *dictStart, + size_t dictSize) { - dstCapacity = MIN(targetOutputSize, dstCapacity); - return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, - partial_decode, usingExtDict, - (BYTE*)dest, (const BYTE*)dictStart, dictSize); + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, + partial_decode, usingExtDict, + (BYTE *)dest, (const BYTE *)dictStart, + dictSize); } LZ4_FORCE_O2 -static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, - const void* dictStart, size_t dictSize) +static int LZ4_decompress_fast_extDict(const char *source, char *dest, + int originalSize, const void *dictStart, + size_t dictSize) { - return LZ4_decompress_unsafe_generic( - (const BYTE*)source, (BYTE*)dest, originalSize, - 0, (const BYTE*)dictStart, dictSize); + return LZ4_decompress_unsafe_generic((const BYTE *)source, (BYTE *)dest, + originalSize, 0, + (const BYTE *)dictStart, dictSize); } /* The "double dictionary" mode, for use with e.g. ring buffers: the first part @@ -2474,28 +3133,34 @@ static int LZ4_decompress_fast_extDict(const char* source, char* dest, int origi * These routines are used only once, in LZ4_decompress_*_continue(). */ LZ4_FORCE_INLINE -int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize, - size_t prefixSize, const void* dictStart, size_t dictSize) +int LZ4_decompress_safe_doubleDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + size_t prefixSize, const void *dictStart, + size_t dictSize) { - return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, - decode_full_block, usingExtDict, - (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); + return LZ4_decompress_generic(source, dest, compressedSize, + maxOutputSize, decode_full_block, + usingExtDict, (BYTE *)dest - prefixSize, + (const BYTE *)dictStart, dictSize); } /*===== streaming decompression functions =====*/ #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -LZ4_streamDecode_t* LZ4_createStreamDecode(void) +LZ4_streamDecode_t *LZ4_createStreamDecode(void) { - LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= sizeof(LZ4_streamDecode_t_internal)); - return (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t)); + LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= + sizeof(LZ4_streamDecode_t_internal)); + return (LZ4_streamDecode_t *)ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t)); } -int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) +int LZ4_freeStreamDecode(LZ4_streamDecode_t *LZ4_stream) { - if (LZ4_stream == NULL) { return 0; } /* support free on NULL */ - FREEMEM(LZ4_stream); - return 0; + if (LZ4_stream == NULL) { + return 0; + } /* support free on NULL */ + FREEMEM(LZ4_stream); + return 0; } #endif @@ -2505,19 +3170,21 @@ int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) * Loading a size of 0 is allowed (same effect as no dictionary). * @return : 1 if OK, 0 if error */ -int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) +int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, + const char *dictionary, int dictSize) { - LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; - lz4sd->prefixSize = (size_t)dictSize; - if (dictSize) { - assert(dictionary != NULL); - lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize; - } else { - lz4sd->prefixEnd = (const BYTE*) dictionary; - } - lz4sd->externalDict = NULL; - lz4sd->extDictSize = 0; - return 1; + LZ4_streamDecode_t_internal *lz4sd = + &LZ4_streamDecode->internal_donotuse; + lz4sd->prefixSize = (size_t)dictSize; + if (dictSize) { + assert(dictionary != NULL); + lz4sd->prefixEnd = (const BYTE *)dictionary + dictSize; + } else { + lz4sd->prefixEnd = (const BYTE *)dictionary; + } + lz4sd->externalDict = NULL; + lz4sd->extDictSize = 0; + return 1; } /*! LZ4_decoderRingBufferSize() : @@ -2533,10 +3200,13 @@ int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dicti */ int LZ4_decoderRingBufferSize(int maxBlockSize) { - if (maxBlockSize < 0) return 0; - if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0; - if (maxBlockSize < 16) maxBlockSize = 16; - return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize); + if (maxBlockSize < 0) + return 0; + if (maxBlockSize > LZ4_MAX_INPUT_SIZE) + return 0; + if (maxBlockSize < 16) + maxBlockSize = 16; + return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize); } /* @@ -2547,87 +3217,104 @@ int LZ4_decoderRingBufferSize(int maxBlockSize) and indicate where it stands using LZ4_setStreamDecode() */ LZ4_FORCE_O2 -int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) +int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *source, char *dest, + int compressedSize, int maxOutputSize) { - LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; - int result; + LZ4_streamDecode_t_internal *lz4sd = + &LZ4_streamDecode->internal_donotuse; + int result; - if (lz4sd->prefixSize == 0) { - /* The first call, no dictionary yet. */ - assert(lz4sd->extDictSize == 0); - result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); - if (result <= 0) return result; - lz4sd->prefixSize = (size_t)result; - lz4sd->prefixEnd = (BYTE*)dest + result; - } else if (lz4sd->prefixEnd == (BYTE*)dest) { - /* They're rolling the current segment. */ - if (lz4sd->prefixSize >= 64 KB - 1) - result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); - else if (lz4sd->extDictSize == 0) - result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, - lz4sd->prefixSize); - else - result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize, - lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize += (size_t)result; - lz4sd->prefixEnd += result; - } else { - /* The buffer wraps around, or they're switching to another buffer. */ - lz4sd->extDictSize = lz4sd->prefixSize; - lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, - lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize = (size_t)result; - lz4sd->prefixEnd = (BYTE*)dest + result; - } + if (lz4sd->prefixSize == 0) { + /* The first call, no dictionary yet. */ + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_safe(source, dest, compressedSize, + maxOutputSize); + if (result <= 0) + return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE *)dest + result; + } else if (lz4sd->prefixEnd == (BYTE *)dest) { + /* They're rolling the current segment. */ + if (lz4sd->prefixSize >= 64 KB - 1) + result = LZ4_decompress_safe_withPrefix64k( + source, dest, compressedSize, maxOutputSize); + else if (lz4sd->extDictSize == 0) + result = LZ4_decompress_safe_withSmallPrefix( + source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize); + else + result = LZ4_decompress_safe_doubleDict( + source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize, lz4sd->externalDict, + lz4sd->extDictSize); + if (result <= 0) + return result; + lz4sd->prefixSize += (size_t)result; + lz4sd->prefixEnd += result; + } else { + /* The buffer wraps around, or they're switching to another buffer. */ + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_safe_forceExtDict( + source, dest, compressedSize, maxOutputSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) + return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE *)dest + result; + } - return result; + return result; } LZ4_FORCE_O2 int -LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, - const char* source, char* dest, int originalSize) +LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *source, char *dest, int originalSize) { - LZ4_streamDecode_t_internal* const lz4sd = - (assert(LZ4_streamDecode!=NULL), &LZ4_streamDecode->internal_donotuse); - int result; + LZ4_streamDecode_t_internal *const lz4sd = + (assert(LZ4_streamDecode != NULL), + &LZ4_streamDecode->internal_donotuse); + int result; - DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", originalSize); - assert(originalSize >= 0); + DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", + originalSize); + assert(originalSize >= 0); - if (lz4sd->prefixSize == 0) { - DEBUGLOG(5, "first invocation : no prefix nor extDict"); - assert(lz4sd->extDictSize == 0); - result = LZ4_decompress_fast(source, dest, originalSize); - if (result <= 0) return result; - lz4sd->prefixSize = (size_t)originalSize; - lz4sd->prefixEnd = (BYTE*)dest + originalSize; - } else if (lz4sd->prefixEnd == (BYTE*)dest) { - DEBUGLOG(5, "continue using existing prefix"); - result = LZ4_decompress_unsafe_generic( - (const BYTE*)source, (BYTE*)dest, originalSize, - lz4sd->prefixSize, - lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize += (size_t)originalSize; - lz4sd->prefixEnd += originalSize; - } else { - DEBUGLOG(5, "prefix becomes extDict"); - lz4sd->extDictSize = lz4sd->prefixSize; - lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; - result = LZ4_decompress_fast_extDict(source, dest, originalSize, - lz4sd->externalDict, lz4sd->extDictSize); - if (result <= 0) return result; - lz4sd->prefixSize = (size_t)originalSize; - lz4sd->prefixEnd = (BYTE*)dest + originalSize; - } + if (lz4sd->prefixSize == 0) { + DEBUGLOG(5, "first invocation : no prefix nor extDict"); + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_fast(source, dest, originalSize); + if (result <= 0) + return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE *)dest + originalSize; + } else if (lz4sd->prefixEnd == (BYTE *)dest) { + DEBUGLOG(5, "continue using existing prefix"); + result = LZ4_decompress_unsafe_generic( + (const BYTE *)source, (BYTE *)dest, originalSize, + lz4sd->prefixSize, lz4sd->externalDict, + lz4sd->extDictSize); + if (result <= 0) + return result; + lz4sd->prefixSize += (size_t)originalSize; + lz4sd->prefixEnd += originalSize; + } else { + DEBUGLOG(5, "prefix becomes extDict"); + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_fast_extDict(source, dest, originalSize, + lz4sd->externalDict, + lz4sd->extDictSize); + if (result <= 0) + return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE *)dest + originalSize; + } - return result; + return result; } - /* Advanced decoding functions : *_usingDict() : @@ -2635,44 +3322,67 @@ Advanced decoding functions : the dictionary must be explicitly provided within parameters */ -int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) +int LZ4_decompress_safe_usingDict(const char *source, char *dest, + int compressedSize, int maxOutputSize, + const char *dictStart, int dictSize) { - if (dictSize==0) - return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); - if (dictStart+dictSize == dest) { - if (dictSize >= 64 KB - 1) { - return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); - } - assert(dictSize >= 0); - return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize); - } - assert(dictSize >= 0); - return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize); + if (dictSize == 0) + return LZ4_decompress_safe(source, dest, compressedSize, + maxOutputSize); + if (dictStart + dictSize == dest) { + if (dictSize >= 64 KB - 1) { + return LZ4_decompress_safe_withPrefix64k( + source, dest, compressedSize, maxOutputSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_withSmallPrefix(source, dest, + compressedSize, + maxOutputSize, + (size_t)dictSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, + maxOutputSize, dictStart, + (size_t)dictSize); } -int LZ4_decompress_safe_partial_usingDict(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, const char* dictStart, int dictSize) +int LZ4_decompress_safe_partial_usingDict(const char *source, char *dest, + int compressedSize, + int targetOutputSize, int dstCapacity, + const char *dictStart, int dictSize) { - if (dictSize==0) - return LZ4_decompress_safe_partial(source, dest, compressedSize, targetOutputSize, dstCapacity); - if (dictStart+dictSize == dest) { - if (dictSize >= 64 KB - 1) { - return LZ4_decompress_safe_partial_withPrefix64k(source, dest, compressedSize, targetOutputSize, dstCapacity); - } - assert(dictSize >= 0); - return LZ4_decompress_safe_partial_withSmallPrefix(source, dest, compressedSize, targetOutputSize, dstCapacity, (size_t)dictSize); - } - assert(dictSize >= 0); - return LZ4_decompress_safe_partial_forceExtDict(source, dest, compressedSize, targetOutputSize, dstCapacity, dictStart, (size_t)dictSize); + if (dictSize == 0) + return LZ4_decompress_safe_partial(source, dest, compressedSize, + targetOutputSize, + dstCapacity); + if (dictStart + dictSize == dest) { + if (dictSize >= 64 KB - 1) { + return LZ4_decompress_safe_partial_withPrefix64k( + source, dest, compressedSize, targetOutputSize, + dstCapacity); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_partial_withSmallPrefix( + source, dest, compressedSize, targetOutputSize, + dstCapacity, (size_t)dictSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_partial_forceExtDict( + source, dest, compressedSize, targetOutputSize, dstCapacity, + dictStart, (size_t)dictSize); } -int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) +int LZ4_decompress_fast_usingDict(const char *source, char *dest, + int originalSize, const char *dictStart, + int dictSize) { - if (dictSize==0 || dictStart+dictSize == dest) - return LZ4_decompress_unsafe_generic( - (const BYTE*)source, (BYTE*)dest, originalSize, - (size_t)dictSize, NULL, 0); - assert(dictSize >= 0); - return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize); + if (dictSize == 0 || dictStart + dictSize == dest) + return LZ4_decompress_unsafe_generic((const BYTE *)source, + (BYTE *)dest, originalSize, + (size_t)dictSize, NULL, 0); + assert(dictSize >= 0); + return LZ4_decompress_fast_extDict(source, dest, originalSize, + dictStart, (size_t)dictSize); } /* @@ -2681,38 +3391,43 @@ They are only provided here for compatibility with older user programs. - LZ4_uncompress is totally equivalent to LZ4_decompress_fast - LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe */ -int LZ4_uncompress (const char* source, char* dest, int outputSize) +int LZ4_uncompress(const char *source, char *dest, int outputSize) { - return LZ4_decompress_fast(source, dest, outputSize); + return LZ4_decompress_fast(source, dest, outputSize); } -int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) +int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, + int maxOutputSize) { - return LZ4_decompress_safe(source, dest, isize, maxOutputSize); + return LZ4_decompress_safe(source, dest, isize, maxOutputSize); } /* Obsolete Streaming functions */ -int LZ4_sizeofStreamState(void) { return sizeof(LZ4_stream_t); } - -int LZ4_resetStreamState(void* state, char* inputBuffer) +int LZ4_sizeofStreamState(void) { - (void)inputBuffer; - LZ4_resetStream((LZ4_stream_t*)state); - return 0; + return sizeof(LZ4_stream_t); +} + +int LZ4_resetStreamState(void *state, char *inputBuffer) +{ + (void)inputBuffer; + LZ4_resetStream((LZ4_stream_t *)state); + return 0; } #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -void* LZ4_create (char* inputBuffer) +void *LZ4_create(char *inputBuffer) { - (void)inputBuffer; - return LZ4_createStream(); + (void)inputBuffer; + return LZ4_createStream(); } #endif -char* LZ4_slideInputBuffer (void* state) +char *LZ4_slideInputBuffer(void *state) { - /* avoid const char * -> char * conversion warning */ - return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary; + /* avoid const char * -> char * conversion warning */ + return (char *)(uptrval)((LZ4_stream_t *)state) + ->internal_donotuse.dictionary; } -#endif /* LZ4_COMMONDEFS_ONLY */ +#endif /* LZ4_COMMONDEFS_ONLY */ diff --git a/lib/lz4/lz4.h b/lib/lz4/lz4.h index 905e7757d186..052153de37df 100644 --- a/lib/lz4/lz4.h +++ b/lib/lz4/lz4.h @@ -32,7 +32,7 @@ - LZ4 homepage : http://www.lz4.org - LZ4 source repository : https://github.com/lz4/lz4 */ -#if defined (__cplusplus) +#if defined(__cplusplus) extern "C" { #endif @@ -85,18 +85,20 @@ extern "C" { * Control library symbols visibility. */ #ifndef LZ4LIB_VISIBILITY -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default"))) -# else -# define LZ4LIB_VISIBILITY -# endif -#endif -#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1) -# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY -#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1) -# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#if defined(__GNUC__) && (__GNUC__ >= 4) +#define LZ4LIB_VISIBILITY __attribute__((visibility("default"))) #else -# define LZ4LIB_API LZ4LIB_VISIBILITY +#define LZ4LIB_VISIBILITY +#endif +#endif +#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT == 1) +#define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY +#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT == 1) +#define LZ4LIB_API \ + __declspec(dllimport) \ + LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +#define LZ4LIB_API LZ4LIB_VISIBILITY #endif /*-************************************ @@ -128,38 +130,41 @@ extern "C" { * - See tests/freestanding.c to check its basic setup. */ #if defined(LZ4_FREESTANDING) && (LZ4_FREESTANDING == 1) -# define LZ4_HEAPMODE 0 -# define LZ4HC_HEAPMODE 0 -# define LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION 1 -# if !defined(LZ4_memcpy) -# error "LZ4_FREESTANDING requires macro 'LZ4_memcpy'." -# endif -# if !defined(LZ4_memset) -# error "LZ4_FREESTANDING requires macro 'LZ4_memset'." -# endif -# if !defined(LZ4_memmove) -# error "LZ4_FREESTANDING requires macro 'LZ4_memmove'." -# endif -#elif ! defined(LZ4_FREESTANDING) -# define LZ4_FREESTANDING 0 +#define LZ4_HEAPMODE 0 +#define LZ4HC_HEAPMODE 0 +#define LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION 1 +#if !defined(LZ4_memcpy) +#error "LZ4_FREESTANDING requires macro 'LZ4_memcpy'." +#endif +#if !defined(LZ4_memset) +#error "LZ4_FREESTANDING requires macro 'LZ4_memset'." +#endif +#if !defined(LZ4_memmove) +#error "LZ4_FREESTANDING requires macro 'LZ4_memmove'." +#endif +#elif !defined(LZ4_FREESTANDING) +#define LZ4_FREESTANDING 0 #endif - /*------ Version ------*/ -#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ -#define LZ4_VERSION_MINOR 10 /* for new (non-breaking) interface capabilities */ -#define LZ4_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */ +#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ +#define LZ4_VERSION_MINOR 10 /* for new (non-breaking) interface capabilities */ +#define LZ4_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */ -#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) +#define LZ4_VERSION_NUMBER \ + (LZ4_VERSION_MAJOR * 100 * 100 + LZ4_VERSION_MINOR * 100 + \ + LZ4_VERSION_RELEASE) #define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE #define LZ4_QUOTE(str) #str #define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str) -#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) /* requires v1.7.3+ */ - -LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version; requires v1.3.0+ */ -LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version; requires v1.7.5+ */ +#define LZ4_VERSION_STRING \ + LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) /* requires v1.7.3+ */ +LZ4LIB_API int LZ4_versionNumber( + void); /**< library version number; useful to check dll version; requires v1.3.0+ */ +LZ4LIB_API const char *LZ4_versionString( + void); /**< library version string; useful to check dll version; requires v1.7.5+ */ /*-************************************ * Tuning memory usage @@ -173,7 +178,7 @@ LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; * Default value is 14, for 16KB, which nicely fits into most L1 caches. */ #ifndef LZ4_MEMORY_USAGE -# define LZ4_MEMORY_USAGE LZ4_MEMORY_USAGE_DEFAULT +#define LZ4_MEMORY_USAGE LZ4_MEMORY_USAGE_DEFAULT #endif /* These are absolute limits, they should not be changed by users */ @@ -182,11 +187,11 @@ LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; #define LZ4_MEMORY_USAGE_MAX 20 #if (LZ4_MEMORY_USAGE < LZ4_MEMORY_USAGE_MIN) -# error "LZ4_MEMORY_USAGE is too small !" +#error "LZ4_MEMORY_USAGE is too small !" #endif #if (LZ4_MEMORY_USAGE > LZ4_MEMORY_USAGE_MAX) -# error "LZ4_MEMORY_USAGE is too large !" +#error "LZ4_MEMORY_USAGE is too large !" #endif /*-************************************ @@ -206,7 +211,8 @@ LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; * or 0 if compression fails * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer). */ -LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity, void *wrkmem); +LZ4LIB_API int LZ4_compress_default(const char *src, char *dst, int srcSize, + int dstCapacity, void *wrkmem); /*! LZ4_decompress_safe() : * @compressedSize : is the exact complete size of the compressed block. @@ -223,14 +229,17 @@ LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int * The implementation is free to send / store / derive this information in whichever way is most beneficial. * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead. */ -LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity); - +LZ4LIB_API int LZ4_decompress_safe(const char *src, char *dst, + int compressedSize, int dstCapacity); /*-************************************ * Advanced Functions **************************************/ -#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ -#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) +#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ +#define LZ4_COMPRESSBOUND(isize) \ + ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? \ + 0 : \ + (isize) + ((isize) / 255) + 16) /*! LZ4_compressBound() : Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) @@ -251,8 +260,8 @@ LZ4LIB_API int LZ4_compressBound(int inputSize); Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c). Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c). */ -LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); - +LZ4LIB_API int LZ4_compress_fast(const char *src, char *dst, int srcSize, + int dstCapacity, int acceleration); /*! LZ4_compress_fast_extState() : * Same as LZ4_compress_fast(), using an externally allocated memory space for its state. @@ -261,7 +270,9 @@ LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int d * Then, provide this buffer as `void* state` to compression function. */ LZ4LIB_API int LZ4_sizeofState(void); -LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); +LZ4LIB_API int LZ4_compress_fast_extState(void *state, const char *src, + char *dst, int srcSize, + int dstCapacity, int acceleration); /*! LZ4_compress_destSize() : * Reverse the logic : compresses as much data as possible from 'src' buffer @@ -287,7 +298,8 @@ LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* d * a dstCapacity which is > decompressedSize, by at least 1 byte. * See https://github.com/lz4/lz4/issues/859 for details */ -LZ4LIB_API int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize); +LZ4LIB_API int LZ4_compress_destSize(const char *src, char *dst, + int *srcSizePtr, int targetDstSize); /*! LZ4_decompress_safe_partial() : * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', @@ -323,13 +335,14 @@ LZ4LIB_API int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr * then targetOutputSize **MUST** be <= block's decompressed size. * Otherwise, *silent corruption will occur*. */ -LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); - +LZ4LIB_API int LZ4_decompress_safe_partial(const char *src, char *dst, + int srcSize, int targetOutputSize, + int dstCapacity); /*-********************************************* * Streaming Compression Functions ***********************************************/ -typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */ +typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */ /*! Note about RC_INVOKED @@ -344,10 +357,11 @@ typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */ "#if !defined(RC_INVOKED) ... #endif" block that means "skip this block when rc.exe is trying to read it". */ -#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */ +#if !defined( \ + RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */ #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -LZ4LIB_API LZ4_stream_t* LZ4_createStream(void); -LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr); +LZ4LIB_API LZ4_stream_t *LZ4_createStream(void); +LZ4LIB_API int LZ4_freeStream(LZ4_stream_t *streamPtr); #endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */ #endif @@ -373,7 +387,7 @@ LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr); * The *extState* functions perform their own resets. * Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive. */ -LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr); +LZ4LIB_API void LZ4_resetStream_fast(LZ4_stream_t *streamPtr); /*! LZ4_loadDict() : * Use this function to reference a static dictionary into LZ4_stream_t. @@ -386,7 +400,8 @@ LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr); * Loading a size of 0 is allowed, and is the same as reset. * @return : loaded dictionary size, in bytes (note: only the last 64 KB are loaded) */ -LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); +LZ4LIB_API int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary, + int dictSize); /*! LZ4_loadDictSlow() : v1.10.0+ * Same as LZ4_loadDict(), @@ -395,7 +410,8 @@ LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, in * The extra-cpu cost is likely worth it if the dictionary is re-used across multiple sessions. * @return : loaded dictionary size, in bytes (note: only the last 64 KB are loaded) */ -LZ4LIB_API int LZ4_loadDictSlow(LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); +LZ4LIB_API int LZ4_loadDictSlow(LZ4_stream_t *streamPtr, const char *dictionary, + int dictSize); /*! LZ4_attach_dictionary() : stable since v1.10.0 * @@ -429,9 +445,8 @@ LZ4LIB_API int LZ4_loadDictSlow(LZ4_stream_t* streamPtr, const char* dictionary, * just employ the regular LZ4_setStreamDecode() for streaming, * or the stateless LZ4_decompress_safe_usingDict() for one-shot decompression. */ -LZ4LIB_API void -LZ4_attach_dictionary(LZ4_stream_t* workingStream, - const LZ4_stream_t* dictionaryStream); +LZ4LIB_API void LZ4_attach_dictionary(LZ4_stream_t *workingStream, + const LZ4_stream_t *dictionaryStream); /*! LZ4_compress_fast_continue() : * Compress 'src' content using data from previously compressed blocks, for better compression ratio. @@ -456,7 +471,10 @@ LZ4_attach_dictionary(LZ4_stream_t* workingStream, * * Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed. */ -LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); +LZ4LIB_API int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, + const char *src, char *dst, + int srcSize, int dstCapacity, + int acceleration); /*! LZ4_saveDict() : * If last 64KB data cannot be guaranteed to remain available at its current memory location, @@ -465,23 +483,24 @@ LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* * but is much faster, because LZ4_saveDict() doesn't need to rebuild tables. * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error. */ -LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize); - +LZ4LIB_API int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, + int maxDictSize); /*-********************************************** * Streaming Decompression Functions * Bufferless synchronous API ************************************************/ -typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */ +typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */ /*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() : * creation / destruction of streaming decompression tracking context. * A tracking context can be re-used multiple times. */ -#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */ +#if !defined( \ + RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */ #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void); -LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); +LZ4LIB_API LZ4_streamDecode_t *LZ4_createStreamDecode(void); +LZ4LIB_API int LZ4_freeStreamDecode(LZ4_streamDecode_t *LZ4_stream); #endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */ #endif @@ -492,7 +511,8 @@ LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_str * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. * @return : 1 if OK, 0 if error */ -LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); +LZ4LIB_API int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode, + const char *dictionary, int dictSize); /*! LZ4_decoderRingBufferSize() : v1.8.2+ * Note : in a ring buffer scenario (optional), @@ -506,7 +526,9 @@ LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const * or 0 if there is an error (invalid maxBlockSize). */ LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize); -#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */ +#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) \ + (65536 + 14 + \ + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */ /*! LZ4_decompress_safe_continue() : * This decoding function allows decompression of consecutive blocks in "streaming" mode. @@ -547,10 +569,9 @@ LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize); * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block. */ LZ4LIB_API int -LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, - const char* src, char* dst, - int srcSize, int dstCapacity); - +LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *src, char *dst, int srcSize, + int dstCapacity); /*! LZ4_decompress_safe_usingDict() : * Works the same as @@ -560,10 +581,10 @@ LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, * Performance tip : Decompression speed can be substantially increased * when dst == dictStart + dictSize. */ -LZ4LIB_API int -LZ4_decompress_safe_usingDict(const char* src, char* dst, - int srcSize, int dstCapacity, - const char* dictStart, int dictSize); +LZ4LIB_API int LZ4_decompress_safe_usingDict(const char *src, char *dst, + int srcSize, int dstCapacity, + const char *dictStart, + int dictSize); /*! LZ4_decompress_safe_partial_usingDict() : * Behaves the same as LZ4_decompress_safe_partial() @@ -571,15 +592,12 @@ LZ4_decompress_safe_usingDict(const char* src, char* dst, * Performance tip : Decompression speed can be substantially increased * when dst == dictStart + dictSize. */ -LZ4LIB_API int -LZ4_decompress_safe_partial_usingDict(const char* src, char* dst, - int compressedSize, - int targetOutputSize, int maxOutputSize, - const char* dictStart, int dictSize); +LZ4LIB_API int LZ4_decompress_safe_partial_usingDict( + const char *src, char *dst, int compressedSize, int targetOutputSize, + int maxOutputSize, const char *dictStart, int dictSize); #endif /* LZ4_H_2983827168210 */ - /*^************************************* * !!!!!! STATIC LINKING ONLY !!!!!! ***************************************/ @@ -610,12 +628,11 @@ LZ4_decompress_safe_partial_usingDict(const char* src, char* dst, #define LZ4_STATIC_3504398509 #ifdef LZ4_PUBLISH_STATIC_FUNCTIONS -# define LZ4LIB_STATIC_API LZ4LIB_API +#define LZ4LIB_STATIC_API LZ4LIB_API #else -# define LZ4LIB_STATIC_API +#define LZ4LIB_STATIC_API #endif - /*! LZ4_compress_fast_extState_fastReset() : * A variant of LZ4_compress_fast_extState(). * @@ -626,13 +643,18 @@ LZ4_decompress_safe_partial_usingDict(const char* src, char* dst, * this function initializes the provided state with a call to something like LZ4_resetStream_fast() * while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream(). */ -LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); +LZ4LIB_STATIC_API int +LZ4_compress_fast_extState_fastReset(void *state, const char *src, char *dst, + int srcSize, int dstCapacity, + int acceleration); /*! LZ4_compress_destSize_extState() : introduced in v1.10.0 * Same as LZ4_compress_destSize(), but using an externally allocated state. * Also: exposes @acceleration */ -int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration); +int LZ4_compress_destSize_extState(void *state, const char *src, char *dst, + int *srcSizePtr, int targetDstSize, + int acceleration); /*! In-place compression and decompression * @@ -685,20 +707,26 @@ int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* * so it's possible to reduce memory requirements by playing with them. */ -#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32) -#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */ +#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) \ + (((compressedSize) >> 8) + 32) +#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) \ + ((decompressedSize) + \ + LZ4_DECOMPRESS_INPLACE_MARGIN( \ + decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */ -#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ -# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ +#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ +#define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ #endif -#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */ -#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */ - -#endif /* LZ4_STATIC_3504398509 */ -#endif /* LZ4_STATIC_LINKING_ONLY */ - +#define LZ4_COMPRESS_INPLACE_MARGIN \ + (LZ4_DISTANCE_MAX + \ + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */ +#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) \ + ((maxCompressedSize) + \ + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */ +#endif /* LZ4_STATIC_3504398509 */ +#endif /* LZ4_STATIC_LINKING_ONLY */ #ifndef LZ4_H_98237428734687 #define LZ4_H_98237428734687 @@ -710,14 +738,15 @@ int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. * Accessing members will expose user code to API and/or ABI break in future versions of the library. **************************************************************/ -#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) +#define LZ4_HASHLOG (LZ4_MEMORY_USAGE - 2) #define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) -#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ +#define LZ4_HASH_SIZE_U32 \ + (1 << LZ4_HASHLOG) /* required as macro for static allocation */ #include #include -typedef int8_t LZ4_i8; -typedef uint8_t LZ4_byte; +typedef int8_t LZ4_i8; +typedef uint8_t LZ4_byte; typedef uint16_t LZ4_u16; typedef uint32_t LZ4_u32; @@ -729,22 +758,23 @@ typedef uint32_t LZ4_u32; typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; struct LZ4_stream_t_internal { - LZ4_u32 hashTable[LZ4_HASH_SIZE_U32]; - const LZ4_byte* dictionary; - const LZ4_stream_t_internal* dictCtx; - LZ4_u32 currentOffset; - LZ4_u32 tableType; - LZ4_u32 dictSize; - /* Implicit padding to ensure structure is aligned */ + LZ4_u32 hashTable[LZ4_HASH_SIZE_U32]; + const LZ4_byte *dictionary; + const LZ4_stream_t_internal *dictCtx; + LZ4_u32 currentOffset; + LZ4_u32 tableType; + LZ4_u32 dictSize; + /* Implicit padding to ensure structure is aligned */ }; -#define LZ4_STREAM_MINSIZE ((1UL << (LZ4_MEMORY_USAGE)) + 32) /* static size, for inter-version compatibility */ +#define LZ4_STREAM_MINSIZE \ + ((1UL << (LZ4_MEMORY_USAGE)) + \ + 32) /* static size, for inter-version compatibility */ union LZ4_stream_u { - char minStateSize[LZ4_STREAM_MINSIZE]; - LZ4_stream_t_internal internal_donotuse; + char minStateSize[LZ4_STREAM_MINSIZE]; + LZ4_stream_t_internal internal_donotuse; }; /* previously typedef'd to LZ4_stream_t */ - /*! LZ4_initStream() : v1.9.0+ * An LZ4_stream_t structure must be initialized at least once. * This is automatically done when invoking LZ4_createStream(), @@ -759,8 +789,7 @@ union LZ4_stream_u { * Note2: An LZ4_stream_t structure guarantees correct alignment and size. * Note3: Before v1.9.0, use LZ4_resetStream() instead **/ -LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* stateBuffer, size_t size); - +LZ4LIB_API LZ4_stream_t *LZ4_initStream(void *stateBuffer, size_t size); /*! LZ4_streamDecode_t : * Never ever use below internal definitions directly ! @@ -768,19 +797,17 @@ LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* stateBuffer, size_t size); * If you need static allocation, declare or allocate an LZ4_streamDecode_t object. **/ typedef struct { - const LZ4_byte* externalDict; - const LZ4_byte* prefixEnd; - size_t extDictSize; - size_t prefixSize; + const LZ4_byte *externalDict; + const LZ4_byte *prefixEnd; + size_t extDictSize; + size_t prefixSize; } LZ4_streamDecode_t_internal; #define LZ4_STREAMDECODE_MINSIZE 32 union LZ4_streamDecode_u { - char minStateSize[LZ4_STREAMDECODE_MINSIZE]; - LZ4_streamDecode_t_internal internal_donotuse; -} ; /* previously typedef'd to LZ4_streamDecode_t */ - - + char minStateSize[LZ4_STREAMDECODE_MINSIZE]; + LZ4_streamDecode_t_internal internal_donotuse; +}; /* previously typedef'd to LZ4_streamDecode_t */ /*-************************************ * Obsolete Functions @@ -798,33 +825,54 @@ union LZ4_streamDecode_u { * before including the header file. */ #ifdef LZ4_DISABLE_DEPRECATE_WARNINGS -# define LZ4_DEPRECATED(message) /* disable deprecation warnings */ +#define LZ4_DEPRECATED(message) /* disable deprecation warnings */ #else -# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ -# define LZ4_DEPRECATED(message) [[deprecated(message)]] -# elif defined(_MSC_VER) -# define LZ4_DEPRECATED(message) __declspec(deprecated(message)) -# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45)) -# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) -# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31) -# define LZ4_DEPRECATED(message) __attribute__((deprecated)) -# else -# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler") -# define LZ4_DEPRECATED(message) /* disabled */ -# endif +#if defined(__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +#define LZ4_DEPRECATED(message) [[deprecated(message)]] +#elif defined(_MSC_VER) +#define LZ4_DEPRECATED(message) __declspec(deprecated(message)) +#elif defined(__clang__) || \ + (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45)) +#define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) +#elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31) +#define LZ4_DEPRECATED(message) __attribute__((deprecated)) +#else +#pragma message( \ + "WARNING: LZ4_DEPRECATED needs custom implementation for this compiler") +#define LZ4_DEPRECATED(message) /* disabled */ +#endif #endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */ /*! Obsolete compression functions (since v1.7.3) */ -LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize); -LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize); -LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); -LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); -LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); -LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_default() instead") +LZ4LIB_API int LZ4_compress(const char *src, char *dest, int srcSize); +LZ4_DEPRECATED("use LZ4_compress_default() instead") +LZ4LIB_API int LZ4_compress_limitedOutput(const char *src, char *dest, + int srcSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") +LZ4LIB_API int LZ4_compress_withState(void *state, const char *source, + char *dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") +LZ4LIB_API int LZ4_compress_limitedOutput_withState(void *state, + const char *source, + char *dest, int inputSize, + int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") +LZ4LIB_API int LZ4_compress_continue(LZ4_stream_t *LZ4_streamPtr, + const char *source, char *dest, + int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") +LZ4LIB_API int LZ4_compress_limitedOutput_continue(LZ4_stream_t *LZ4_streamPtr, + const char *source, + char *dest, int inputSize, + int maxOutputSize); /*! Obsolete decompression functions (since v1.8.0) */ -LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize); -LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_decompress_fast() instead") +LZ4LIB_API int LZ4_uncompress(const char *source, char *dest, int outputSize); +LZ4_DEPRECATED("use LZ4_decompress_safe() instead") +LZ4LIB_API int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, + int isize, int maxOutputSize); /* Obsolete streaming functions (since v1.7.0) * degraded functionality; do not use! @@ -836,14 +884,23 @@ LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompres * achieved will therefore be no better than compressing each chunk * independently. */ -LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer); -LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void); -LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer); -LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state); +LZ4_DEPRECATED("Use LZ4_createStream() instead") +LZ4LIB_API void *LZ4_create(char *inputBuffer); +LZ4_DEPRECATED("Use LZ4_createStream() instead") +LZ4LIB_API int LZ4_sizeofStreamState(void); +LZ4_DEPRECATED("Use LZ4_resetStream() instead") +LZ4LIB_API int LZ4_resetStreamState(void *state, char *inputBuffer); +LZ4_DEPRECATED("Use LZ4_saveDict() instead") +LZ4LIB_API char *LZ4_slideInputBuffer(void *state); /*! Obsolete streaming decoding functions (since v1.7.0) */ -LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); -LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") +LZ4LIB_API int LZ4_decompress_safe_withPrefix64k(const char *src, char *dst, + int compressedSize, + int maxDstSize); +LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") +LZ4LIB_API int LZ4_decompress_fast_withPrefix64k(const char *src, char *dst, + int originalSize); /*! Obsolete LZ4_decompress_fast variants (since v1.9.0) : * These functions used to be faster than LZ4_decompress_safe(), @@ -871,13 +928,22 @@ LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4 * But they may happen if input data is invalid (error or intentional tampering). * As a consequence, use these functions in trusted environments with trusted data **only**. */ -LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_partial() instead") -LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); -LZ4_DEPRECATED("This function is deprecated and unsafe. Consider migrating towards LZ4_decompress_safe_continue() instead. " - "Note that the contract will change (requires block's compressed size, instead of decompressed size)") -LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); -LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_partial_usingDict() instead") -LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); +LZ4_DEPRECATED( + "This function is deprecated and unsafe. Consider using LZ4_decompress_safe_partial() instead") +LZ4LIB_API int LZ4_decompress_fast(const char *src, char *dst, + int originalSize); +LZ4_DEPRECATED( + "This function is deprecated and unsafe. Consider migrating towards LZ4_decompress_safe_continue() instead. " + "Note that the contract will change (requires block's compressed size, instead of decompressed size)") +LZ4LIB_API int +LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode, + const char *src, char *dst, int originalSize); +LZ4_DEPRECATED( + "This function is deprecated and unsafe. Consider using LZ4_decompress_safe_partial_usingDict() instead") +LZ4LIB_API int LZ4_decompress_fast_usingDict(const char *src, char *dst, + int originalSize, + const char *dictStart, + int dictSize); /*! LZ4_resetStream() : * An LZ4_stream_t structure must be initialized at least once. @@ -885,12 +951,10 @@ LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int or * Consider switching to LZ4_initStream(), * invoking LZ4_resetStream() will trigger deprecation warnings in the future. */ -LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr); - +LZ4LIB_API void LZ4_resetStream(LZ4_stream_t *streamPtr); #endif /* LZ4_H_98237428734687 */ - -#if defined (__cplusplus) +#if defined(__cplusplus) } #endif diff --git a/lib/lz4/lz4hc.c b/lib/lz4/lz4hc.c index ba8efff7dd67..fe76c0e870a9 100644 --- a/lib/lz4/lz4hc.c +++ b/lib/lz4/lz4hc.c @@ -33,7 +33,6 @@ */ /* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */ - /* ************************************* * Tuning Parameter ***************************************/ @@ -42,7 +41,6 @@ #define LZ4_HC_STATIC_LINKING_ONLY #include "lz4hc.h" - /*! HEAPMODE : * Select how stateless HC compression functions like `LZ4_compress_HC()` * allocate memory for their workspace: @@ -50,820 +48,997 @@ * Since workspace is rather large, heap mode is recommended. **/ #ifndef LZ4HC_HEAPMODE -# define LZ4HC_HEAPMODE 1 +#define LZ4HC_HEAPMODE 1 #endif - /*=== Shared lz4.c code ===*/ #ifndef LZ4_SRC_INCLUDED -# if defined(__GNUC__) -# pragma GCC diagnostic ignored "-Wunused-function" -# endif -# if defined (__clang__) -# pragma clang diagnostic ignored "-Wunused-function" -# endif -# define LZ4_COMMONDEFS_ONLY -# include "lz4.c" /* LZ4_count, constants, mem */ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wunused-function" +#endif +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wunused-function" +#endif +#define LZ4_COMMONDEFS_ONLY +#include "lz4.c" /* LZ4_count, constants, mem */ #endif - /*=== Enums ===*/ typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive; - /*=== Constants ===*/ -#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) -#define LZ4_OPT_NUM (1<<12) - +#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH) +#define LZ4_OPT_NUM (1 << 12) /*=== Macros ===*/ -#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) -#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) - +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) /*=== Levels definition ===*/ typedef enum { lz4mid, lz4hc, lz4opt } lz4hc_strat_e; typedef struct { - lz4hc_strat_e strat; - int nbSearches; - U32 targetLength; + lz4hc_strat_e strat; + int nbSearches; + U32 targetLength; } cParams_t; -static const cParams_t k_clTable[LZ4HC_CLEVEL_MAX+1] = { - { lz4mid, 2, 16 }, /* 0, unused */ - { lz4mid, 2, 16 }, /* 1, unused */ - { lz4mid, 2, 16 }, /* 2 */ - { lz4hc, 4, 16 }, /* 3 */ - { lz4hc, 8, 16 }, /* 4 */ - { lz4hc, 16, 16 }, /* 5 */ - { lz4hc, 32, 16 }, /* 6 */ - { lz4hc, 64, 16 }, /* 7 */ - { lz4hc, 128, 16 }, /* 8 */ - { lz4hc, 256, 16 }, /* 9 */ - { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/ - { lz4opt, 512,128 }, /*11 */ - { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */ +static const cParams_t k_clTable[LZ4HC_CLEVEL_MAX + 1] = { + { lz4mid, 2, 16 }, /* 0, unused */ + { lz4mid, 2, 16 }, /* 1, unused */ + { lz4mid, 2, 16 }, /* 2 */ + { lz4hc, 4, 16 }, /* 3 */ + { lz4hc, 8, 16 }, /* 4 */ + { lz4hc, 16, 16 }, /* 5 */ + { lz4hc, 32, 16 }, /* 6 */ + { lz4hc, 64, 16 }, /* 7 */ + { lz4hc, 128, 16 }, /* 8 */ + { lz4hc, 256, 16 }, /* 9 */ + { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/ + { lz4opt, 512, 128 }, /*11 */ + { lz4opt, 16384, LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */ }; static cParams_t LZ4HC_getCLevelParams(int cLevel) { - /* note : clevel convention is a bit different from lz4frame, + /* note : clevel convention is a bit different from lz4frame, * possibly something worth revisiting for consistency */ - if (cLevel < 1) - cLevel = LZ4HC_CLEVEL_DEFAULT; - cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel); - return k_clTable[cLevel]; + if (cLevel < 1) + cLevel = LZ4HC_CLEVEL_DEFAULT; + cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel); + return k_clTable[cLevel]; } - /*=== Hashing ===*/ #define LZ4HC_HASHSIZE 4 -#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG)) -static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); } +#define HASH_FUNCTION(i) \ + (((i) * 2654435761U) >> ((MINMATCH * 8) - LZ4HC_HASH_LOG)) +static U32 LZ4HC_hashPtr(const void *ptr) +{ + return HASH_FUNCTION(LZ4_read32(ptr)); +} -#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) +#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS == 2) /* lie to the compiler about data alignment; use with caution */ -static U64 LZ4_read64(const void* memPtr) { return *(const U64*) memPtr; } +static U64 LZ4_read64(const void *memPtr) +{ + return *(const U64 *)memPtr; +} -#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) +#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS == 1) /* __pack instructions are safer, but compiler specific */ LZ4_PACK(typedef struct { U64 u64; }) LZ4_unalign64; -static U64 LZ4_read64(const void* ptr) { return ((const LZ4_unalign64*)ptr)->u64; } - -#else /* safe and portable access using memcpy() */ -static U64 LZ4_read64(const void* memPtr) +static U64 LZ4_read64(const void *ptr) { - U64 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; + return ((const LZ4_unalign64 *)ptr)->u64; +} + +#else /* safe and portable access using memcpy() */ +static U64 LZ4_read64(const void *memPtr) +{ + U64 val; + LZ4_memcpy(&val, memPtr, sizeof(val)); + return val; } #endif /* LZ4_FORCE_MEMORY_ACCESS */ #define LZ4MID_HASHSIZE 8 -#define LZ4MID_HASHLOG (LZ4HC_HASH_LOG-1) +#define LZ4MID_HASHLOG (LZ4HC_HASH_LOG - 1) #define LZ4MID_HASHTABLESIZE (1 << LZ4MID_HASHLOG) -static U32 LZ4MID_hash4(U32 v) { return (v * 2654435761U) >> (32-LZ4MID_HASHLOG); } -static U32 LZ4MID_hash4Ptr(const void* ptr) { return LZ4MID_hash4(LZ4_read32(ptr)); } +static U32 LZ4MID_hash4(U32 v) +{ + return (v * 2654435761U) >> (32 - LZ4MID_HASHLOG); +} +static U32 LZ4MID_hash4Ptr(const void *ptr) +{ + return LZ4MID_hash4(LZ4_read32(ptr)); +} /* note: hash7 hashes the lower 56-bits. * It presumes input was read using little endian.*/ -static U32 LZ4MID_hash7(U64 v) { return (U32)(((v << (64-56)) * 58295818150454627ULL) >> (64-LZ4MID_HASHLOG)) ; } -static U64 LZ4_readLE64(const void* memPtr); -static U32 LZ4MID_hash8Ptr(const void* ptr) { return LZ4MID_hash7(LZ4_readLE64(ptr)); } - -static U64 LZ4_readLE64(const void* memPtr) +static U32 LZ4MID_hash7(U64 v) { - if (LZ4_isLittleEndian()) { - return LZ4_read64(memPtr); - } else { - const BYTE* p = (const BYTE*)memPtr; - /* note: relies on the compiler to simplify this expression */ - return (U64)p[0] | ((U64)p[1]<<8) | ((U64)p[2]<<16) | ((U64)p[3]<<24) - | ((U64)p[4]<<32) | ((U64)p[5]<<40) | ((U64)p[6]<<48) | ((U64)p[7]<<56); - } + return (U32)(((v << (64 - 56)) * 58295818150454627ULL) >> + (64 - LZ4MID_HASHLOG)); +} +static U64 LZ4_readLE64(const void *memPtr); +static U32 LZ4MID_hash8Ptr(const void *ptr) +{ + return LZ4MID_hash7(LZ4_readLE64(ptr)); } +static U64 LZ4_readLE64(const void *memPtr) +{ + if (LZ4_isLittleEndian()) { + return LZ4_read64(memPtr); + } else { + const BYTE *p = (const BYTE *)memPtr; + /* note: relies on the compiler to simplify this expression */ + return (U64)p[0] | ((U64)p[1] << 8) | ((U64)p[2] << 16) | + ((U64)p[3] << 24) | ((U64)p[4] << 32) | + ((U64)p[5] << 40) | ((U64)p[6] << 48) | + ((U64)p[7] << 56); + } +} /*=== Count match length ===*/ LZ4_FORCE_INLINE unsigned LZ4HC_NbCommonBytes32(U32 val) { - assert(val != 0); - if (LZ4_isLittleEndian()) { -# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanReverse(&r, val); - return (unsigned)((31 - r) >> 3); -# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ - ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ - !defined(LZ4_FORCE_SW_BITCOUNT) - return (unsigned)__builtin_clz(val) >> 3; -# else - val >>= 8; - val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | - (val + 0x00FF0000)) >> 24; - return (unsigned)val ^ 3; -# endif - } else { -# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r; - _BitScanForward(&r, val); - return (unsigned)(r >> 3); -# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ - ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ - !defined(LZ4_FORCE_SW_BITCOUNT) - return (unsigned)__builtin_ctz(val) >> 3; -# else - const U32 m = 0x01010101; - return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24; -# endif - } + assert(val != 0); + if (LZ4_isLittleEndian()) { +#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanReverse(&r, val); + return (unsigned)((31 - r) >> 3); +#elif (defined(__clang__) || \ + (defined(__GNUC__) && \ + ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clz(val) >> 3; +#else + val >>= 8; + val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | + (val + 0x00FF0000)) >> + 24; + return (unsigned)val ^ 3; +#endif + } else { +#if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward(&r, val); + return (unsigned)(r >> 3); +#elif (defined(__clang__) || \ + (defined(__GNUC__) && \ + ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctz(val) >> 3; +#else + const U32 m = 0x01010101; + return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24; +#endif + } } /** LZ4HC_countBack() : * @return : negative value, nb of common bytes before ip/match */ LZ4_FORCE_INLINE -int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, - const BYTE* const iMin, const BYTE* const mMin) +int LZ4HC_countBack(const BYTE *const ip, const BYTE *const match, + const BYTE *const iMin, const BYTE *const mMin) { - int back = 0; - int const min = (int)MAX(iMin - ip, mMin - match); - assert(min <= 0); - assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31)); - assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31)); + int back = 0; + int const min = (int)MAX(iMin - ip, mMin - match); + assert(min <= 0); + assert(ip >= iMin); + assert((size_t)(ip - iMin) < (1U << 31)); + assert(match >= mMin); + assert((size_t)(match - mMin) < (1U << 31)); - while ((back - min) > 3) { - U32 const v = LZ4_read32(ip + back - 4) ^ LZ4_read32(match + back - 4); - if (v) { - return (back - (int)LZ4HC_NbCommonBytes32(v)); - } else back -= 4; /* 4-byte step */ - } - /* check remainder if any */ - while ( (back > min) - && (ip[back-1] == match[back-1]) ) - back--; - return back; + while ((back - min) > 3) { + U32 const v = LZ4_read32(ip + back - 4) ^ + LZ4_read32(match + back - 4); + if (v) { + return (back - (int)LZ4HC_NbCommonBytes32(v)); + } else + back -= 4; /* 4-byte step */ + } + /* check remainder if any */ + while ((back > min) && (ip[back - 1] == match[back - 1])) + back--; + return back; } /*=== Chain table updates ===*/ -#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */ +#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */ /* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */ #define UPDATABLE(ip, op, anchor) &ip, &op, &anchor - /************************************** * Init **************************************/ -static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4) +static void LZ4HC_clearTables(LZ4HC_CCtx_internal *hc4) { - MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable)); - MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); + MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable)); + MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); } -static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start) +static void LZ4HC_init_internal(LZ4HC_CCtx_internal *hc4, const BYTE *start) { - size_t const bufferSize = (size_t)(hc4->end - hc4->prefixStart); - size_t newStartingOffset = bufferSize + hc4->dictLimit; - DEBUGLOG(5, "LZ4HC_init_internal"); - assert(newStartingOffset >= bufferSize); /* check overflow */ - if (newStartingOffset > 1 GB) { - LZ4HC_clearTables(hc4); - newStartingOffset = 0; - } - newStartingOffset += 64 KB; - hc4->nextToUpdate = (U32)newStartingOffset; - hc4->prefixStart = start; - hc4->end = start; - hc4->dictStart = start; - hc4->dictLimit = (U32)newStartingOffset; - hc4->lowLimit = (U32)newStartingOffset; + size_t const bufferSize = (size_t)(hc4->end - hc4->prefixStart); + size_t newStartingOffset = bufferSize + hc4->dictLimit; + DEBUGLOG(5, "LZ4HC_init_internal"); + assert(newStartingOffset >= bufferSize); /* check overflow */ + if (newStartingOffset > 1 GB) { + LZ4HC_clearTables(hc4); + newStartingOffset = 0; + } + newStartingOffset += 64 KB; + hc4->nextToUpdate = (U32)newStartingOffset; + hc4->prefixStart = start; + hc4->end = start; + hc4->dictStart = start; + hc4->dictLimit = (U32)newStartingOffset; + hc4->lowLimit = (U32)newStartingOffset; } - /************************************** * Encode **************************************/ /* LZ4HC_encodeSequence() : * @return : 0 if ok, * 1 if buffer issue detected */ -LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( - const BYTE** _ip, - BYTE** _op, - const BYTE** _anchor, - int matchLength, - int offset, - limitedOutput_directive limit, - BYTE* oend) +LZ4_FORCE_INLINE int LZ4HC_encodeSequence(const BYTE **_ip, BYTE **_op, + const BYTE **_anchor, int matchLength, + int offset, + limitedOutput_directive limit, + BYTE *oend) { -#define ip (*_ip) -#define op (*_op) -#define anchor (*_anchor) +#define ip (*_ip) +#define op (*_op) +#define anchor (*_anchor) - size_t length; - BYTE* const token = op++; + size_t length; + BYTE *const token = op++; #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6) - static const BYTE* start = NULL; - static U32 totalCost = 0; - U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start); - U32 const ll = (U32)(ip - anchor); - U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; - U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; - U32 const cost = 1 + llAdd + ll + 2 + mlAdd; - if (start==NULL) start = anchor; /* only works for single segment */ - /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */ - DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5i, cost:%4u + %5u", - pos, - (U32)(ip - anchor), matchLength, offset, - cost, totalCost); - totalCost += cost; + static const BYTE *start = NULL; + static U32 totalCost = 0; + U32 const pos = (start == NULL) ? 0 : (U32)(anchor - start); + U32 const ll = (U32)(ip - anchor); + U32 const llAdd = (ll >= 15) ? ((ll - 15) / 255) + 1 : 0; + U32 const mlAdd = + (matchLength >= 19) ? ((matchLength - 19) / 255) + 1 : 0; + U32 const cost = 1 + llAdd + ll + 2 + mlAdd; + if (start == NULL) + start = anchor; /* only works for single segment */ + /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */ + DEBUGLOG( + 6, + "pos:%7u -- literals:%4u, match:%4i, offset:%5i, cost:%4u + %5u", + pos, (U32)(ip - anchor), matchLength, offset, cost, totalCost); + totalCost += cost; #endif - /* Encode Literal length */ - length = (size_t)(ip - anchor); - LZ4_STATIC_ASSERT(notLimited == 0); - /* Check output limit */ - if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) { - DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)", - (int)length, (int)(oend - op)); - return 1; - } - if (length >= RUN_MASK) { - size_t len = length - RUN_MASK; - *token = (RUN_MASK << ML_BITS); - for(; len >= 255 ; len -= 255) *op++ = 255; - *op++ = (BYTE)len; - } else { - *token = (BYTE)(length << ML_BITS); - } + /* Encode Literal length */ + length = (size_t)(ip - anchor); + LZ4_STATIC_ASSERT(notLimited == 0); + /* Check output limit */ + if (limit && + ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) { + DEBUGLOG( + 6, + "Not enough room to write %i literals (%i bytes remaining)", + (int)length, (int)(oend - op)); + return 1; + } + if (length >= RUN_MASK) { + size_t len = length - RUN_MASK; + *token = (RUN_MASK << ML_BITS); + for (; len >= 255; len -= 255) + *op++ = 255; + *op++ = (BYTE)len; + } else { + *token = (BYTE)(length << ML_BITS); + } - /* Copy Literals */ - LZ4_wildCopy8(op, anchor, op + length); - op += length; + /* Copy Literals */ + LZ4_wildCopy8(op, anchor, op + length); + op += length; - /* Encode Offset */ - assert(offset <= LZ4_DISTANCE_MAX ); - assert(offset > 0); - LZ4_writeLE16(op, (U16)(offset)); op += 2; + /* Encode Offset */ + assert(offset <= LZ4_DISTANCE_MAX); + assert(offset > 0); + LZ4_writeLE16(op, (U16)(offset)); + op += 2; - /* Encode MatchLength */ - assert(matchLength >= MINMATCH); - length = (size_t)matchLength - MINMATCH; - if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) { - DEBUGLOG(6, "Not enough room to write match length"); - return 1; /* Check output limit */ - } - if (length >= ML_MASK) { - *token += ML_MASK; - length -= ML_MASK; - for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; } - if (length >= 255) { length -= 255; *op++ = 255; } - *op++ = (BYTE)length; - } else { - *token += (BYTE)(length); - } + /* Encode MatchLength */ + assert(matchLength >= MINMATCH); + length = (size_t)matchLength - MINMATCH; + if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) { + DEBUGLOG(6, "Not enough room to write match length"); + return 1; /* Check output limit */ + } + if (length >= ML_MASK) { + *token += ML_MASK; + length -= ML_MASK; + for (; length >= 510; length -= 510) { + *op++ = 255; + *op++ = 255; + } + if (length >= 255) { + length -= 255; + *op++ = 255; + } + *op++ = (BYTE)length; + } else { + *token += (BYTE)(length); + } - /* Prepare next loop */ - ip += matchLength; - anchor = ip; + /* Prepare next loop */ + ip += matchLength; + anchor = ip; - return 0; + return 0; #undef ip #undef op #undef anchor } - typedef struct { - int off; - int len; - int back; /* negative value */ + int off; + int len; + int back; /* negative value */ } LZ4HC_match_t; -LZ4HC_match_t LZ4HC_searchExtDict(const BYTE* ip, U32 ipIndex, - const BYTE* const iLowLimit, const BYTE* const iHighLimit, - const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex, - int currentBestML, int nbAttempts) +LZ4HC_match_t LZ4HC_searchExtDict(const BYTE *ip, U32 ipIndex, + const BYTE *const iLowLimit, + const BYTE *const iHighLimit, + const LZ4HC_CCtx_internal *dictCtx, + U32 gDictEndIndex, int currentBestML, + int nbAttempts) { - size_t const lDictEndIndex = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit; - U32 lDictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; - U32 matchIndex = lDictMatchIndex + gDictEndIndex - (U32)lDictEndIndex; - int offset = 0, sBack = 0; - assert(lDictEndIndex <= 1 GB); - if (lDictMatchIndex>0) - DEBUGLOG(7, "lDictEndIndex = %zu, lDictMatchIndex = %u", lDictEndIndex, lDictMatchIndex); - while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) { - const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + lDictMatchIndex; + size_t const lDictEndIndex = + (size_t)(dictCtx->end - dictCtx->prefixStart) + + dictCtx->dictLimit; + U32 lDictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; + U32 matchIndex = lDictMatchIndex + gDictEndIndex - (U32)lDictEndIndex; + int offset = 0, sBack = 0; + assert(lDictEndIndex <= 1 GB); + if (lDictMatchIndex > 0) + DEBUGLOG(7, "lDictEndIndex = %zu, lDictMatchIndex = %u", + lDictEndIndex, lDictMatchIndex); + while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) { + const BYTE *const matchPtr = dictCtx->prefixStart - + dictCtx->dictLimit + + lDictMatchIndex; - if (LZ4_read32(matchPtr) == LZ4_read32(ip)) { - int mlt; - int back = 0; - const BYTE* vLimit = ip + (lDictEndIndex - lDictMatchIndex); - if (vLimit > iHighLimit) vLimit = iHighLimit; - mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; - back = (ip > iLowLimit) ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->prefixStart) : 0; - mlt -= back; - if (mlt > currentBestML) { - currentBestML = mlt; - offset = (int)(ipIndex - matchIndex); - sBack = back; - DEBUGLOG(7, "found match of length %i within extDictCtx", currentBestML); - } } + if (LZ4_read32(matchPtr) == LZ4_read32(ip)) { + int mlt; + int back = 0; + const BYTE *vLimit = + ip + (lDictEndIndex - lDictMatchIndex); + if (vLimit > iHighLimit) + vLimit = iHighLimit; + mlt = (int)LZ4_count(ip + MINMATCH, matchPtr + MINMATCH, + vLimit) + + MINMATCH; + back = (ip > iLowLimit) ? + LZ4HC_countBack(ip, matchPtr, iLowLimit, + dictCtx->prefixStart) : + 0; + mlt -= back; + if (mlt > currentBestML) { + currentBestML = mlt; + offset = (int)(ipIndex - matchIndex); + sBack = back; + DEBUGLOG( + 7, + "found match of length %i within extDictCtx", + currentBestML); + } + } - { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, lDictMatchIndex); - lDictMatchIndex -= nextOffset; - matchIndex -= nextOffset; - } } + { + U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, + lDictMatchIndex); + lDictMatchIndex -= nextOffset; + matchIndex -= nextOffset; + } + } - { LZ4HC_match_t md; - md.len = currentBestML; - md.off = offset; - md.back = sBack; - return md; - } + { + LZ4HC_match_t md; + md.len = currentBestML; + md.off = offset; + md.back = sBack; + return md; + } } -typedef LZ4HC_match_t (*LZ4MID_searchIntoDict_f)(const BYTE* ip, U32 ipIndex, - const BYTE* const iHighLimit, - const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex); +typedef LZ4HC_match_t (*LZ4MID_searchIntoDict_f)( + const BYTE *ip, U32 ipIndex, const BYTE *const iHighLimit, + const LZ4HC_CCtx_internal *dictCtx, U32 gDictEndIndex); -static LZ4HC_match_t LZ4MID_searchHCDict(const BYTE* ip, U32 ipIndex, - const BYTE* const iHighLimit, - const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex) +static LZ4HC_match_t LZ4MID_searchHCDict(const BYTE *ip, U32 ipIndex, + const BYTE *const iHighLimit, + const LZ4HC_CCtx_internal *dictCtx, + U32 gDictEndIndex) { - return LZ4HC_searchExtDict(ip,ipIndex, - ip, iHighLimit, - dictCtx, gDictEndIndex, - MINMATCH-1, 2); + return LZ4HC_searchExtDict(ip, ipIndex, ip, iHighLimit, dictCtx, + gDictEndIndex, MINMATCH - 1, 2); } -static LZ4HC_match_t LZ4MID_searchExtDict(const BYTE* ip, U32 ipIndex, - const BYTE* const iHighLimit, - const LZ4HC_CCtx_internal* dictCtx, U32 gDictEndIndex) +static LZ4HC_match_t LZ4MID_searchExtDict(const BYTE *ip, U32 ipIndex, + const BYTE *const iHighLimit, + const LZ4HC_CCtx_internal *dictCtx, + U32 gDictEndIndex) { - size_t const lDictEndIndex = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit; - const U32* const hash4Table = dictCtx->hashTable; - const U32* const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE; - DEBUGLOG(7, "LZ4MID_searchExtDict (ipIdx=%u)", ipIndex); + size_t const lDictEndIndex = + (size_t)(dictCtx->end - dictCtx->prefixStart) + + dictCtx->dictLimit; + const U32 *const hash4Table = dictCtx->hashTable; + const U32 *const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE; + DEBUGLOG(7, "LZ4MID_searchExtDict (ipIdx=%u)", ipIndex); - /* search long match first */ - { U32 l8DictMatchIndex = hash8Table[LZ4MID_hash8Ptr(ip)]; - U32 m8Index = l8DictMatchIndex + gDictEndIndex - (U32)lDictEndIndex; - assert(lDictEndIndex <= 1 GB); - if (ipIndex - m8Index <= LZ4_DISTANCE_MAX) { - const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + l8DictMatchIndex; - const size_t safeLen = MIN(lDictEndIndex - l8DictMatchIndex, (size_t)(iHighLimit - ip)); - int mlt = (int)LZ4_count(ip, matchPtr, ip + safeLen); - if (mlt >= MINMATCH) { - LZ4HC_match_t md; - DEBUGLOG(7, "Found long ExtDict match of len=%u", mlt); - md.len = mlt; - md.off = (int)(ipIndex - m8Index); - md.back = 0; - return md; - } - } - } + /* search long match first */ + { + U32 l8DictMatchIndex = hash8Table[LZ4MID_hash8Ptr(ip)]; + U32 m8Index = + l8DictMatchIndex + gDictEndIndex - (U32)lDictEndIndex; + assert(lDictEndIndex <= 1 GB); + if (ipIndex - m8Index <= LZ4_DISTANCE_MAX) { + const BYTE *const matchPtr = dictCtx->prefixStart - + dictCtx->dictLimit + + l8DictMatchIndex; + const size_t safeLen = + MIN(lDictEndIndex - l8DictMatchIndex, + (size_t)(iHighLimit - ip)); + int mlt = (int)LZ4_count(ip, matchPtr, ip + safeLen); + if (mlt >= MINMATCH) { + LZ4HC_match_t md; + DEBUGLOG(7, + "Found long ExtDict match of len=%u", + mlt); + md.len = mlt; + md.off = (int)(ipIndex - m8Index); + md.back = 0; + return md; + } + } + } - /* search for short match second */ - { U32 l4DictMatchIndex = hash4Table[LZ4MID_hash4Ptr(ip)]; - U32 m4Index = l4DictMatchIndex + gDictEndIndex - (U32)lDictEndIndex; - if (ipIndex - m4Index <= LZ4_DISTANCE_MAX) { - const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + l4DictMatchIndex; - const size_t safeLen = MIN(lDictEndIndex - l4DictMatchIndex, (size_t)(iHighLimit - ip)); - int mlt = (int)LZ4_count(ip, matchPtr, ip + safeLen); - if (mlt >= MINMATCH) { - LZ4HC_match_t md; - DEBUGLOG(7, "Found short ExtDict match of len=%u", mlt); - md.len = mlt; - md.off = (int)(ipIndex - m4Index); - md.back = 0; - return md; - } - } - } + /* search for short match second */ + { + U32 l4DictMatchIndex = hash4Table[LZ4MID_hash4Ptr(ip)]; + U32 m4Index = + l4DictMatchIndex + gDictEndIndex - (U32)lDictEndIndex; + if (ipIndex - m4Index <= LZ4_DISTANCE_MAX) { + const BYTE *const matchPtr = dictCtx->prefixStart - + dictCtx->dictLimit + + l4DictMatchIndex; + const size_t safeLen = + MIN(lDictEndIndex - l4DictMatchIndex, + (size_t)(iHighLimit - ip)); + int mlt = (int)LZ4_count(ip, matchPtr, ip + safeLen); + if (mlt >= MINMATCH) { + LZ4HC_match_t md; + DEBUGLOG(7, + "Found short ExtDict match of len=%u", + mlt); + md.len = mlt; + md.off = (int)(ipIndex - m4Index); + md.back = 0; + return md; + } + } + } - /* nothing found */ - { LZ4HC_match_t const md = {0, 0, 0 }; - return md; - } + /* nothing found */ + { + LZ4HC_match_t const md = { 0, 0, 0 }; + return md; + } } /************************************** * Mid Compression (level 2) **************************************/ -LZ4_FORCE_INLINE void -LZ4MID_addPosition(U32* hTable, U32 hValue, U32 index) +LZ4_FORCE_INLINE void LZ4MID_addPosition(U32 *hTable, U32 hValue, U32 index) { - hTable[hValue] = index; + hTable[hValue] = index; } -#define ADDPOS8(_p, _idx) LZ4MID_addPosition(hash8Table, LZ4MID_hash8Ptr(_p), _idx) -#define ADDPOS4(_p, _idx) LZ4MID_addPosition(hash4Table, LZ4MID_hash4Ptr(_p), _idx) +#define ADDPOS8(_p, _idx) \ + LZ4MID_addPosition(hash8Table, LZ4MID_hash8Ptr(_p), _idx) +#define ADDPOS4(_p, _idx) \ + LZ4MID_addPosition(hash4Table, LZ4MID_hash4Ptr(_p), _idx) /* Fill hash tables with references into dictionary. * The resulting table is only exploitable by LZ4MID (level 2) */ -static void -LZ4MID_fillHTable (LZ4HC_CCtx_internal* cctx, const void* dict, size_t size) +static void LZ4MID_fillHTable(LZ4HC_CCtx_internal *cctx, const void *dict, + size_t size) { - U32* const hash4Table = cctx->hashTable; - U32* const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE; - const BYTE* const prefixPtr = (const BYTE*)dict; - U32 const prefixIdx = cctx->dictLimit; - U32 const target = prefixIdx + (U32)size - LZ4MID_HASHSIZE; - U32 idx = cctx->nextToUpdate; - assert(dict == cctx->prefixStart); - DEBUGLOG(4, "LZ4MID_fillHTable (size:%zu)", size); - if (size <= LZ4MID_HASHSIZE) - return; + U32 *const hash4Table = cctx->hashTable; + U32 *const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE; + const BYTE *const prefixPtr = (const BYTE *)dict; + U32 const prefixIdx = cctx->dictLimit; + U32 const target = prefixIdx + (U32)size - LZ4MID_HASHSIZE; + U32 idx = cctx->nextToUpdate; + assert(dict == cctx->prefixStart); + DEBUGLOG(4, "LZ4MID_fillHTable (size:%zu)", size); + if (size <= LZ4MID_HASHSIZE) + return; - for (; idx < target; idx += 3) { - ADDPOS4(prefixPtr+idx-prefixIdx, idx); - ADDPOS8(prefixPtr+idx+1-prefixIdx, idx+1); - } + for (; idx < target; idx += 3) { + ADDPOS4(prefixPtr + idx - prefixIdx, idx); + ADDPOS8(prefixPtr + idx + 1 - prefixIdx, idx + 1); + } - idx = (size > 32 KB + LZ4MID_HASHSIZE) ? target - 32 KB : cctx->nextToUpdate; - for (; idx < target; idx += 1) { - ADDPOS8(prefixPtr+idx-prefixIdx, idx); - } + idx = (size > 32 KB + LZ4MID_HASHSIZE) ? target - 32 KB : + cctx->nextToUpdate; + for (; idx < target; idx += 1) { + ADDPOS8(prefixPtr + idx - prefixIdx, idx); + } - cctx->nextToUpdate = target; + cctx->nextToUpdate = target; } -static LZ4MID_searchIntoDict_f select_searchDict_function(const LZ4HC_CCtx_internal* dictCtx) +static LZ4MID_searchIntoDict_f +select_searchDict_function(const LZ4HC_CCtx_internal *dictCtx) { - if (dictCtx == NULL) return NULL; - if (LZ4HC_getCLevelParams(dictCtx->compressionLevel).strat == lz4mid) - return LZ4MID_searchExtDict; - return LZ4MID_searchHCDict; + if (dictCtx == NULL) + return NULL; + if (LZ4HC_getCLevelParams(dictCtx->compressionLevel).strat == lz4mid) + return LZ4MID_searchExtDict; + return LZ4MID_searchHCDict; } -static int LZ4MID_compress ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* srcSizePtr, - int const maxOutputSize, - const limitedOutput_directive limit, - const dictCtx_directive dict - ) +static int LZ4MID_compress(LZ4HC_CCtx_internal *const ctx, + const char *const src, char *const dst, + int *srcSizePtr, int const maxOutputSize, + const limitedOutput_directive limit, + const dictCtx_directive dict) { - U32* const hash4Table = ctx->hashTable; - U32* const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE; - const BYTE* ip = (const BYTE*)src; - const BYTE* anchor = ip; - const BYTE* const iend = ip + *srcSizePtr; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = (iend - LASTLITERALS); - const BYTE* const ilimit = (iend - LZ4MID_HASHSIZE); - BYTE* op = (BYTE*)dst; - BYTE* oend = op + maxOutputSize; + U32 *const hash4Table = ctx->hashTable; + U32 *const hash8Table = hash4Table + LZ4MID_HASHTABLESIZE; + const BYTE *ip = (const BYTE *)src; + const BYTE *anchor = ip; + const BYTE *const iend = ip + *srcSizePtr; + const BYTE *const mflimit = iend - MFLIMIT; + const BYTE *const matchlimit = (iend - LASTLITERALS); + const BYTE *const ilimit = (iend - LZ4MID_HASHSIZE); + BYTE *op = (BYTE *)dst; + BYTE *oend = op + maxOutputSize; - const BYTE* const prefixPtr = ctx->prefixStart; - const U32 prefixIdx = ctx->dictLimit; - const U32 ilimitIdx = (U32)(ilimit - prefixPtr) + prefixIdx; - const BYTE* const dictStart = ctx->dictStart; - const U32 dictIdx = ctx->lowLimit; - const U32 gDictEndIndex = ctx->lowLimit; - const LZ4MID_searchIntoDict_f searchIntoDict = (dict == usingDictCtxHc) ? select_searchDict_function(ctx->dictCtx) : NULL; - unsigned matchLength; - unsigned matchDistance; + const BYTE *const prefixPtr = ctx->prefixStart; + const U32 prefixIdx = ctx->dictLimit; + const U32 ilimitIdx = (U32)(ilimit - prefixPtr) + prefixIdx; + const BYTE *const dictStart = ctx->dictStart; + const U32 dictIdx = ctx->lowLimit; + const U32 gDictEndIndex = ctx->lowLimit; + const LZ4MID_searchIntoDict_f searchIntoDict = + (dict == usingDictCtxHc) ? + select_searchDict_function(ctx->dictCtx) : + NULL; + unsigned matchLength; + unsigned matchDistance; - /* input sanitization */ - DEBUGLOG(5, "LZ4MID_compress (%i bytes)", *srcSizePtr); - if (dict == usingDictCtxHc) DEBUGLOG(5, "usingDictCtxHc"); - assert(*srcSizePtr >= 0); - if (*srcSizePtr) assert(src != NULL); - if (maxOutputSize) assert(dst != NULL); - if (*srcSizePtr < 0) return 0; /* invalid */ - if (maxOutputSize < 0) return 0; /* invalid */ - if (*srcSizePtr > LZ4_MAX_INPUT_SIZE) { - /* forbidden: no input is allowed to be that large */ - return 0; - } - if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ - if (*srcSizePtr < LZ4_minLength) - goto _lz4mid_last_literals; /* Input too small, no compression (all literals) */ + /* input sanitization */ + DEBUGLOG(5, "LZ4MID_compress (%i bytes)", *srcSizePtr); + if (dict == usingDictCtxHc) + DEBUGLOG(5, "usingDictCtxHc"); + assert(*srcSizePtr >= 0); + if (*srcSizePtr) + assert(src != NULL); + if (maxOutputSize) + assert(dst != NULL); + if (*srcSizePtr < 0) + return 0; /* invalid */ + if (maxOutputSize < 0) + return 0; /* invalid */ + if (*srcSizePtr > LZ4_MAX_INPUT_SIZE) { + /* forbidden: no input is allowed to be that large */ + return 0; + } + if (limit == fillOutput) + oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (*srcSizePtr < LZ4_minLength) + goto _lz4mid_last_literals; /* Input too small, no compression (all literals) */ - /* main loop */ - while (ip <= mflimit) { - const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx; - /* search long match */ - { U32 const h8 = LZ4MID_hash8Ptr(ip); - U32 const pos8 = hash8Table[h8]; - assert(h8 < LZ4MID_HASHTABLESIZE); - assert(pos8 < ipIndex); - LZ4MID_addPosition(hash8Table, h8, ipIndex); - if (ipIndex - pos8 <= LZ4_DISTANCE_MAX) { - /* match candidate found */ - if (pos8 >= prefixIdx) { - const BYTE* const matchPtr = prefixPtr + pos8 - prefixIdx; - assert(matchPtr < ip); - matchLength = LZ4_count(ip, matchPtr, matchlimit); - if (matchLength >= MINMATCH) { - DEBUGLOG(7, "found long match at pos %u (len=%u)", pos8, matchLength); - matchDistance = ipIndex - pos8; - goto _lz4mid_encode_sequence; - } - } else { - if (pos8 >= dictIdx) { - /* extDict match candidate */ - const BYTE* const matchPtr = dictStart + (pos8 - dictIdx); - const size_t safeLen = MIN(prefixIdx - pos8, (size_t)(matchlimit - ip)); - matchLength = LZ4_count(ip, matchPtr, ip + safeLen); - if (matchLength >= MINMATCH) { - DEBUGLOG(7, "found long match at ExtDict pos %u (len=%u)", pos8, matchLength); - matchDistance = ipIndex - pos8; - goto _lz4mid_encode_sequence; - } - } - } - } } - /* search short match */ - { U32 const h4 = LZ4MID_hash4Ptr(ip); - U32 const pos4 = hash4Table[h4]; - assert(h4 < LZ4MID_HASHTABLESIZE); - assert(pos4 < ipIndex); - LZ4MID_addPosition(hash4Table, h4, ipIndex); - if (ipIndex - pos4 <= LZ4_DISTANCE_MAX) { - /* match candidate found */ - if (pos4 >= prefixIdx) { - /* only search within prefix */ - const BYTE* const matchPtr = prefixPtr + (pos4 - prefixIdx); - assert(matchPtr < ip); - assert(matchPtr >= prefixPtr); - matchLength = LZ4_count(ip, matchPtr, matchlimit); - if (matchLength >= MINMATCH) { - /* short match found, let's just check ip+1 for longer */ - U32 const h8 = LZ4MID_hash8Ptr(ip+1); - U32 const pos8 = hash8Table[h8]; - U32 const m2Distance = ipIndex + 1 - pos8; - matchDistance = ipIndex - pos4; - if ( m2Distance <= LZ4_DISTANCE_MAX - && pos8 >= prefixIdx /* only search within prefix */ - && likely(ip < mflimit) - ) { - const BYTE* const m2Ptr = prefixPtr + (pos8 - prefixIdx); - unsigned ml2 = LZ4_count(ip+1, m2Ptr, matchlimit); - if (ml2 > matchLength) { - LZ4MID_addPosition(hash8Table, h8, ipIndex+1); - ip++; - matchLength = ml2; - matchDistance = m2Distance; - } } - goto _lz4mid_encode_sequence; - } - } else { - if (pos4 >= dictIdx) { - /* extDict match candidate */ - const BYTE* const matchPtr = dictStart + (pos4 - dictIdx); - const size_t safeLen = MIN(prefixIdx - pos4, (size_t)(matchlimit - ip)); - matchLength = LZ4_count(ip, matchPtr, ip + safeLen); - if (matchLength >= MINMATCH) { - DEBUGLOG(7, "found match at ExtDict pos %u (len=%u)", pos4, matchLength); - matchDistance = ipIndex - pos4; - goto _lz4mid_encode_sequence; - } - } - } - } } - /* no match found in prefix */ - if ( (dict == usingDictCtxHc) - && (ipIndex - gDictEndIndex < LZ4_DISTANCE_MAX - 8) ) { - /* search a match into external dictionary */ - LZ4HC_match_t dMatch = searchIntoDict(ip, ipIndex, - matchlimit, - ctx->dictCtx, gDictEndIndex); - if (dMatch.len >= MINMATCH) { - DEBUGLOG(7, "found Dictionary match (offset=%i)", dMatch.off); - assert(dMatch.back == 0); - matchLength = (unsigned)dMatch.len; - matchDistance = (unsigned)dMatch.off; - goto _lz4mid_encode_sequence; - } - } - /* no match found */ - ip += 1 + ((ip-anchor) >> 9); /* skip faster over incompressible data */ - continue; + /* main loop */ + while (ip <= mflimit) { + const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx; + /* search long match */ + { + U32 const h8 = LZ4MID_hash8Ptr(ip); + U32 const pos8 = hash8Table[h8]; + assert(h8 < LZ4MID_HASHTABLESIZE); + assert(pos8 < ipIndex); + LZ4MID_addPosition(hash8Table, h8, ipIndex); + if (ipIndex - pos8 <= LZ4_DISTANCE_MAX) { + /* match candidate found */ + if (pos8 >= prefixIdx) { + const BYTE *const matchPtr = + prefixPtr + pos8 - prefixIdx; + assert(matchPtr < ip); + matchLength = LZ4_count(ip, matchPtr, + matchlimit); + if (matchLength >= MINMATCH) { + DEBUGLOG( + 7, + "found long match at pos %u (len=%u)", + pos8, matchLength); + matchDistance = ipIndex - pos8; + goto _lz4mid_encode_sequence; + } + } else { + if (pos8 >= dictIdx) { + /* extDict match candidate */ + const BYTE *const matchPtr = + dictStart + + (pos8 - dictIdx); + const size_t safeLen = MIN( + prefixIdx - pos8, + (size_t)(matchlimit - + ip)); + matchLength = + LZ4_count(ip, matchPtr, + ip + safeLen); + if (matchLength >= MINMATCH) { + DEBUGLOG( + 7, + "found long match at ExtDict pos %u (len=%u)", + pos8, + matchLength); + matchDistance = + ipIndex - pos8; + goto _lz4mid_encode_sequence; + } + } + } + } + } + /* search short match */ + { + U32 const h4 = LZ4MID_hash4Ptr(ip); + U32 const pos4 = hash4Table[h4]; + assert(h4 < LZ4MID_HASHTABLESIZE); + assert(pos4 < ipIndex); + LZ4MID_addPosition(hash4Table, h4, ipIndex); + if (ipIndex - pos4 <= LZ4_DISTANCE_MAX) { + /* match candidate found */ + if (pos4 >= prefixIdx) { + /* only search within prefix */ + const BYTE *const matchPtr = + prefixPtr + (pos4 - prefixIdx); + assert(matchPtr < ip); + assert(matchPtr >= prefixPtr); + matchLength = LZ4_count(ip, matchPtr, + matchlimit); + if (matchLength >= MINMATCH) { + /* short match found, let's just check ip+1 for longer */ + U32 const h8 = + LZ4MID_hash8Ptr(ip + 1); + U32 const pos8 = hash8Table[h8]; + U32 const m2Distance = + ipIndex + 1 - pos8; + matchDistance = ipIndex - pos4; + if (m2Distance <= + LZ4_DISTANCE_MAX && + pos8 >= prefixIdx /* only search within prefix */ + && likely(ip < mflimit)) { + const BYTE *const m2Ptr = + prefixPtr + + (pos8 - + prefixIdx); + unsigned ml2 = LZ4_count( + ip + 1, m2Ptr, + matchlimit); + if (ml2 > matchLength) { + LZ4MID_addPosition( + hash8Table, + h8, + ipIndex + + 1); + ip++; + matchLength = + ml2; + matchDistance = + m2Distance; + } + } + goto _lz4mid_encode_sequence; + } + } else { + if (pos4 >= dictIdx) { + /* extDict match candidate */ + const BYTE *const matchPtr = + dictStart + + (pos4 - dictIdx); + const size_t safeLen = MIN( + prefixIdx - pos4, + (size_t)(matchlimit - + ip)); + matchLength = + LZ4_count(ip, matchPtr, + ip + safeLen); + if (matchLength >= MINMATCH) { + DEBUGLOG( + 7, + "found match at ExtDict pos %u (len=%u)", + pos4, + matchLength); + matchDistance = + ipIndex - pos4; + goto _lz4mid_encode_sequence; + } + } + } + } + } + /* no match found in prefix */ + if ((dict == usingDictCtxHc) && + (ipIndex - gDictEndIndex < LZ4_DISTANCE_MAX - 8)) { + /* search a match into external dictionary */ + LZ4HC_match_t dMatch = + searchIntoDict(ip, ipIndex, matchlimit, + ctx->dictCtx, gDictEndIndex); + if (dMatch.len >= MINMATCH) { + DEBUGLOG(7, + "found Dictionary match (offset=%i)", + dMatch.off); + assert(dMatch.back == 0); + matchLength = (unsigned)dMatch.len; + matchDistance = (unsigned)dMatch.off; + goto _lz4mid_encode_sequence; + } + } + /* no match found */ + ip += 1 + ((ip - anchor) >> + 9); /* skip faster over incompressible data */ + continue; -_lz4mid_encode_sequence: - /* catch back */ - while (((ip > anchor) & ((U32)(ip-prefixPtr) > matchDistance)) && (unlikely(ip[-1] == ip[-(int)matchDistance-1]))) { - ip--; matchLength++; - }; + _lz4mid_encode_sequence: + /* catch back */ + while (((ip > anchor) & + ((U32)(ip - prefixPtr) > matchDistance)) && + (unlikely(ip[-1] == ip[-(int)matchDistance - 1]))) { + ip--; + matchLength++; + }; - /* fill table with beginning of match */ - ADDPOS8(ip+1, ipIndex+1); - ADDPOS8(ip+2, ipIndex+2); - ADDPOS4(ip+1, ipIndex+1); + /* fill table with beginning of match */ + ADDPOS8(ip + 1, ipIndex + 1); + ADDPOS8(ip + 2, ipIndex + 2); + ADDPOS4(ip + 1, ipIndex + 1); - /* encode */ - { BYTE* const saved_op = op; - /* LZ4HC_encodeSequence always updates @op; on success, it updates @ip and @anchor */ - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), - (int)matchLength, (int)matchDistance, - limit, oend) ) { - op = saved_op; /* restore @op value before failed LZ4HC_encodeSequence */ - goto _lz4mid_dest_overflow; - } - } + /* encode */ + { + BYTE *const saved_op = op; + /* LZ4HC_encodeSequence always updates @op; on success, it updates @ip and @anchor */ + if (LZ4HC_encodeSequence( + UPDATABLE(ip, op, anchor), (int)matchLength, + (int)matchDistance, limit, oend)) { + op = saved_op; /* restore @op value before failed LZ4HC_encodeSequence */ + goto _lz4mid_dest_overflow; + } + } - /* fill table with end of match */ - { U32 endMatchIdx = (U32)(ip-prefixPtr) + prefixIdx; - U32 pos_m2 = endMatchIdx - 2; - if (pos_m2 < ilimitIdx) { - if (likely(ip - prefixPtr > 5)) { - ADDPOS8(ip-5, endMatchIdx - 5); - } - ADDPOS8(ip-3, endMatchIdx - 3); - ADDPOS8(ip-2, endMatchIdx - 2); - ADDPOS4(ip-2, endMatchIdx - 2); - ADDPOS4(ip-1, endMatchIdx - 1); - } - } - } + /* fill table with end of match */ + { + U32 endMatchIdx = (U32)(ip - prefixPtr) + prefixIdx; + U32 pos_m2 = endMatchIdx - 2; + if (pos_m2 < ilimitIdx) { + if (likely(ip - prefixPtr > 5)) { + ADDPOS8(ip - 5, endMatchIdx - 5); + } + ADDPOS8(ip - 3, endMatchIdx - 3); + ADDPOS8(ip - 2, endMatchIdx - 2); + ADDPOS4(ip - 2, endMatchIdx - 2); + ADDPOS4(ip - 1, endMatchIdx - 1); + } + } + } _lz4mid_last_literals: - /* Encode Last Literals */ - { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ - size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; - size_t const totalSize = 1 + llAdd + lastRunSize; - if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ - if (limit && (op + totalSize > oend)) { - if (limit == limitedOutput) return 0; /* not enough space in @dst */ - /* adapt lastRunSize to fill 'dest' */ - lastRunSize = (size_t)(oend - op) - 1 /*token*/; - llAdd = (lastRunSize + 256 - RUN_MASK) / 256; - lastRunSize -= llAdd; - } - DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); - ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ + /* Encode Last Literals */ + { + size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; + if (limit == fillOutput) + oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) + return 0; /* not enough space in @dst */ + /* adapt lastRunSize to fill 'dest' */ + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; + } + DEBUGLOG(6, "Final literal run : %i literals", + (int)lastRunSize); + ip = anchor + + lastRunSize; /* can be != iend if limit==fillOutput */ - if (lastRunSize >= RUN_MASK) { - size_t accumulator = lastRunSize - RUN_MASK; - *op++ = (RUN_MASK << ML_BITS); - for(; accumulator >= 255 ; accumulator -= 255) - *op++ = 255; - *op++ = (BYTE) accumulator; - } else { - *op++ = (BYTE)(lastRunSize << ML_BITS); - } - assert(lastRunSize <= (size_t)(oend - op)); - LZ4_memcpy(op, anchor, lastRunSize); - op += lastRunSize; - } + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for (; accumulator >= 255; accumulator -= 255) + *op++ = 255; + *op++ = (BYTE)accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + assert(lastRunSize <= (size_t)(oend - op)); + LZ4_memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } - /* End */ - DEBUGLOG(5, "compressed %i bytes into %i bytes", *srcSizePtr, (int)((char*)op - dst)); - assert(ip >= (const BYTE*)src); - assert(ip <= iend); - *srcSizePtr = (int)(ip - (const BYTE*)src); - assert((char*)op >= dst); - assert(op <= oend); - assert((char*)op - dst < INT_MAX); - return (int)((char*)op - dst); + /* End */ + DEBUGLOG(5, "compressed %i bytes into %i bytes", *srcSizePtr, + (int)((char *)op - dst)); + assert(ip >= (const BYTE *)src); + assert(ip <= iend); + *srcSizePtr = (int)(ip - (const BYTE *)src); + assert((char *)op >= dst); + assert(op <= oend); + assert((char *)op - dst < INT_MAX); + return (int)((char *)op - dst); _lz4mid_dest_overflow: - if (limit == fillOutput) { - /* Assumption : @ip, @anchor, @optr and @matchLength must be set correctly */ - size_t const ll = (size_t)(ip - anchor); - size_t const ll_addbytes = (ll + 240) / 255; - size_t const ll_totalCost = 1 + ll_addbytes + ll; - BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ - DEBUGLOG(6, "Last sequence is overflowing : %u literals, %u remaining space", - (unsigned)ll, (unsigned)(oend-op)); - if (op + ll_totalCost <= maxLitPos) { - /* ll validated; now adjust match length */ - size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); - size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); - assert(maxMlSize < INT_MAX); - if ((size_t)matchLength > maxMlSize) matchLength= (unsigned)maxMlSize; - if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + matchLength >= MFLIMIT) { - DEBUGLOG(6, "Let's encode a last sequence (ll=%u, ml=%u)", (unsigned)ll, matchLength); - LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), - (int)matchLength, (int)matchDistance, - notLimited, oend); - } } - DEBUGLOG(6, "Let's finish with a run of literals (%u bytes left)", (unsigned)(oend-op)); - goto _lz4mid_last_literals; - } - /* compression failed */ - return 0; + if (limit == fillOutput) { + /* Assumption : @ip, @anchor, @optr and @matchLength must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE *const maxLitPos = + oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG( + 6, + "Last sequence is overflowing : %u literals, %u remaining space", + (unsigned)ll, (unsigned)(oend - op)); + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = + (size_t)(maxLitPos - (op + ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK - 1) + + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); + if ((size_t)matchLength > maxMlSize) + matchLength = (unsigned)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - + 1 + matchLength >= + MFLIMIT) { + DEBUGLOG( + 6, + "Let's encode a last sequence (ll=%u, ml=%u)", + (unsigned)ll, matchLength); + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), + (int)matchLength, + (int)matchDistance, + notLimited, oend); + } + } + DEBUGLOG(6, + "Let's finish with a run of literals (%u bytes left)", + (unsigned)(oend - op)); + goto _lz4mid_last_literals; + } + /* compression failed */ + return 0; } - /************************************** * HC Compression - Search **************************************/ /* Update chains up to ip (excluded) */ -LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip) +LZ4_FORCE_INLINE void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4, const BYTE *ip) { - U16* const chainTable = hc4->chainTable; - U32* const hashTable = hc4->hashTable; - const BYTE* const prefixPtr = hc4->prefixStart; - U32 const prefixIdx = hc4->dictLimit; - U32 const target = (U32)(ip - prefixPtr) + prefixIdx; - U32 idx = hc4->nextToUpdate; - assert(ip >= prefixPtr); - assert(target >= prefixIdx); + U16 *const chainTable = hc4->chainTable; + U32 *const hashTable = hc4->hashTable; + const BYTE *const prefixPtr = hc4->prefixStart; + U32 const prefixIdx = hc4->dictLimit; + U32 const target = (U32)(ip - prefixPtr) + prefixIdx; + U32 idx = hc4->nextToUpdate; + assert(ip >= prefixPtr); + assert(target >= prefixIdx); - while (idx < target) { - U32 const h = LZ4HC_hashPtr(prefixPtr+idx-prefixIdx); - size_t delta = idx - hashTable[h]; - if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX; - DELTANEXTU16(chainTable, idx) = (U16)delta; - hashTable[h] = idx; - idx++; - } + while (idx < target) { + U32 const h = LZ4HC_hashPtr(prefixPtr + idx - prefixIdx); + size_t delta = idx - hashTable[h]; + if (delta > LZ4_DISTANCE_MAX) + delta = LZ4_DISTANCE_MAX; + DELTANEXTU16(chainTable, idx) = (U16)delta; + hashTable[h] = idx; + idx++; + } - hc4->nextToUpdate = target; + hc4->nextToUpdate = target; } #if defined(_MSC_VER) -# define LZ4HC_rotl32(x,r) _rotl(x,r) +#define LZ4HC_rotl32(x, r) _rotl(x, r) #else -# define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#define LZ4HC_rotl32(x, r) ((x << r) | (x >> (32 - r))) #endif - static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern) { - size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3; - if (bitsToRotate == 0) return pattern; - return LZ4HC_rotl32(pattern, (int)bitsToRotate); + size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3; + if (bitsToRotate == 0) + return pattern; + return LZ4HC_rotl32(pattern, (int)bitsToRotate); } /* LZ4HC_countPattern() : * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */ -static unsigned -LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32) +static unsigned LZ4HC_countPattern(const BYTE *ip, const BYTE *const iEnd, + U32 const pattern32) { - const BYTE* const iStart = ip; - reg_t const pattern = (sizeof(pattern)==8) ? - (reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32; + const BYTE *const iStart = ip; + reg_t const pattern = + (sizeof(pattern) == 8) ? + (reg_t)pattern32 + + (((reg_t)pattern32) << (sizeof(pattern) * 4)) : + pattern32; - while (likely(ip < iEnd-(sizeof(pattern)-1))) { - reg_t const diff = LZ4_read_ARCH(ip) ^ pattern; - if (!diff) { ip+=sizeof(pattern); continue; } - ip += LZ4_NbCommonBytes(diff); - return (unsigned)(ip - iStart); - } + while (likely(ip < iEnd - (sizeof(pattern) - 1))) { + reg_t const diff = LZ4_read_ARCH(ip) ^ pattern; + if (!diff) { + ip += sizeof(pattern); + continue; + } + ip += LZ4_NbCommonBytes(diff); + return (unsigned)(ip - iStart); + } - if (LZ4_isLittleEndian()) { - reg_t patternByte = pattern; - while ((ip>= 8; - } - } else { /* big endian */ - U32 bitOffset = (sizeof(pattern)*8) - 8; - while (ip < iEnd) { - BYTE const byte = (BYTE)(pattern >> bitOffset); - if (*ip != byte) break; - ip ++; bitOffset -= 8; - } } + if (LZ4_isLittleEndian()) { + reg_t patternByte = pattern; + while ((ip < iEnd) && (*ip == (BYTE)patternByte)) { + ip++; + patternByte >>= 8; + } + } else { /* big endian */ + U32 bitOffset = (sizeof(pattern) * 8) - 8; + while (ip < iEnd) { + BYTE const byte = (BYTE)(pattern >> bitOffset); + if (*ip != byte) + break; + ip++; + bitOffset -= 8; + } + } - return (unsigned)(ip - iStart); + return (unsigned)(ip - iStart); } /* LZ4HC_reverseCountPattern() : * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) * read using natural platform endianness */ -static unsigned -LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern) +static unsigned LZ4HC_reverseCountPattern(const BYTE *ip, + const BYTE *const iLow, U32 pattern) { - const BYTE* const iStart = ip; + const BYTE *const iStart = ip; - while (likely(ip >= iLow+4)) { - if (LZ4_read32(ip-4) != pattern) break; - ip -= 4; - } - { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianness */ - while (likely(ip>iLow)) { - if (ip[-1] != *bytePtr) break; - ip--; bytePtr--; - } } - return (unsigned)(iStart - ip); + while (likely(ip >= iLow + 4)) { + if (LZ4_read32(ip - 4) != pattern) + break; + ip -= 4; + } + { + const BYTE *bytePtr = (const BYTE *)(&pattern) + + 3; /* works for any endianness */ + while (likely(ip > iLow)) { + if (ip[-1] != *bytePtr) + break; + ip--; + bytePtr--; + } + } + return (unsigned)(iStart - ip); } /* LZ4HC_protectDictEnd() : @@ -873,1286 +1048,1751 @@ LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern) */ static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex) { - return ((U32)((dictLimit - 1) - matchIndex) >= 3); + return ((U32)((dictLimit - 1) - matchIndex) >= 3); } typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e; -typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e; +typedef enum { favorCompressionRatio = 0, favorDecompressionSpeed } HCfavor_e; - -LZ4_FORCE_INLINE LZ4HC_match_t -LZ4HC_InsertAndGetWiderMatch ( - LZ4HC_CCtx_internal* const hc4, - const BYTE* const ip, - const BYTE* const iLowLimit, const BYTE* const iHighLimit, - int longest, - const int maxNbAttempts, - const int patternAnalysis, const int chainSwap, - const dictCtx_directive dict, - const HCfavor_e favorDecSpeed) +LZ4_FORCE_INLINE LZ4HC_match_t LZ4HC_InsertAndGetWiderMatch( + LZ4HC_CCtx_internal *const hc4, const BYTE *const ip, + const BYTE *const iLowLimit, const BYTE *const iHighLimit, int longest, + const int maxNbAttempts, const int patternAnalysis, const int chainSwap, + const dictCtx_directive dict, const HCfavor_e favorDecSpeed) { - U16* const chainTable = hc4->chainTable; - U32* const hashTable = hc4->hashTable; - const LZ4HC_CCtx_internal* const dictCtx = hc4->dictCtx; - const BYTE* const prefixPtr = hc4->prefixStart; - const U32 prefixIdx = hc4->dictLimit; - const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx; - const int withinStartDistance = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex); - const U32 lowestMatchIndex = (withinStartDistance) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX; - const BYTE* const dictStart = hc4->dictStart; - const U32 dictIdx = hc4->lowLimit; - const BYTE* const dictEnd = dictStart + prefixIdx - dictIdx; - int const lookBackLength = (int)(ip-iLowLimit); - int nbAttempts = maxNbAttempts; - U32 matchChainPos = 0; - U32 const pattern = LZ4_read32(ip); - U32 matchIndex; - repeat_state_e repeat = rep_untested; - size_t srcPatternLength = 0; - int offset = 0, sBack = 0; + U16 *const chainTable = hc4->chainTable; + U32 *const hashTable = hc4->hashTable; + const LZ4HC_CCtx_internal *const dictCtx = hc4->dictCtx; + const BYTE *const prefixPtr = hc4->prefixStart; + const U32 prefixIdx = hc4->dictLimit; + const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx; + const int withinStartDistance = + (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex); + const U32 lowestMatchIndex = (withinStartDistance) ? + hc4->lowLimit : + ipIndex - LZ4_DISTANCE_MAX; + const BYTE *const dictStart = hc4->dictStart; + const U32 dictIdx = hc4->lowLimit; + const BYTE *const dictEnd = dictStart + prefixIdx - dictIdx; + int const lookBackLength = (int)(ip - iLowLimit); + int nbAttempts = maxNbAttempts; + U32 matchChainPos = 0; + U32 const pattern = LZ4_read32(ip); + U32 matchIndex; + repeat_state_e repeat = rep_untested; + size_t srcPatternLength = 0; + int offset = 0, sBack = 0; - DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); - /* First Match */ - LZ4HC_Insert(hc4, ip); /* insert all prior positions up to ip (excluded) */ - matchIndex = hashTable[LZ4HC_hashPtr(ip)]; - DEBUGLOG(7, "First candidate match for pos %u found at index %u / %u (lowestMatchIndex)", - ipIndex, matchIndex, lowestMatchIndex); + DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); + /* First Match */ + LZ4HC_Insert(hc4, + ip); /* insert all prior positions up to ip (excluded) */ + matchIndex = hashTable[LZ4HC_hashPtr(ip)]; + DEBUGLOG( + 7, + "First candidate match for pos %u found at index %u / %u (lowestMatchIndex)", + ipIndex, matchIndex, lowestMatchIndex); - while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) { - int matchLength=0; - nbAttempts--; - assert(matchIndex < ipIndex); - if (favorDecSpeed && (ipIndex - matchIndex < 8)) { - /* do nothing: + while ((matchIndex >= lowestMatchIndex) && (nbAttempts > 0)) { + int matchLength = 0; + nbAttempts--; + assert(matchIndex < ipIndex); + if (favorDecSpeed && (ipIndex - matchIndex < 8)) { + /* do nothing: * favorDecSpeed intentionally skips matches with offset < 8 */ - } else if (matchIndex >= prefixIdx) { /* within current Prefix */ - const BYTE* const matchPtr = prefixPtr + (matchIndex - prefixIdx); - assert(matchPtr < ip); - assert(longest >= 1); - if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) { - if (LZ4_read32(matchPtr) == pattern) { - int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, prefixPtr) : 0; - matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); - matchLength -= back; - if (matchLength > longest) { - longest = matchLength; - offset = (int)(ipIndex - matchIndex); - sBack = back; - DEBUGLOG(7, "Found match of len=%i within prefix, offset=%i, back=%i", longest, offset, -back); - } } } - } else { /* lowestMatchIndex <= matchIndex < dictLimit : within Ext Dict */ - const BYTE* const matchPtr = dictStart + (matchIndex - dictIdx); - assert(matchIndex >= dictIdx); - if ( likely(matchIndex <= prefixIdx - 4) - && (LZ4_read32(matchPtr) == pattern) ) { - int back = 0; - const BYTE* vLimit = ip + (prefixIdx - matchIndex); - if (vLimit > iHighLimit) vLimit = iHighLimit; - matchLength = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; - if ((ip+matchLength == vLimit) && (vLimit < iHighLimit)) - matchLength += LZ4_count(ip+matchLength, prefixPtr, iHighLimit); - back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0; - matchLength -= back; - if (matchLength > longest) { - longest = matchLength; - offset = (int)(ipIndex - matchIndex); - sBack = back; - DEBUGLOG(7, "Found match of len=%i within dict, offset=%i, back=%i", longest, offset, -back); - } } } + } else if (matchIndex >= + prefixIdx) { /* within current Prefix */ + const BYTE *const matchPtr = + prefixPtr + (matchIndex - prefixIdx); + assert(matchPtr < ip); + assert(longest >= 1); + if (LZ4_read16(iLowLimit + longest - 1) == + LZ4_read16(matchPtr - lookBackLength + longest - + 1)) { + if (LZ4_read32(matchPtr) == pattern) { + int const back = + lookBackLength ? + LZ4HC_countBack( + ip, matchPtr, + iLowLimit, + prefixPtr) : + 0; + matchLength = + MINMATCH + + (int)LZ4_count(ip + MINMATCH, + matchPtr + + MINMATCH, + iHighLimit); + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; + offset = (int)(ipIndex - + matchIndex); + sBack = back; + DEBUGLOG( + 7, + "Found match of len=%i within prefix, offset=%i, back=%i", + longest, offset, -back); + } + } + } + } else { /* lowestMatchIndex <= matchIndex < dictLimit : within Ext Dict */ + const BYTE *const matchPtr = + dictStart + (matchIndex - dictIdx); + assert(matchIndex >= dictIdx); + if (likely(matchIndex <= prefixIdx - 4) && + (LZ4_read32(matchPtr) == pattern)) { + int back = 0; + const BYTE *vLimit = + ip + (prefixIdx - matchIndex); + if (vLimit > iHighLimit) + vLimit = iHighLimit; + matchLength = + (int)LZ4_count(ip + MINMATCH, + matchPtr + MINMATCH, + vLimit) + + MINMATCH; + if ((ip + matchLength == vLimit) && + (vLimit < iHighLimit)) + matchLength += + LZ4_count(ip + matchLength, + prefixPtr, + iHighLimit); + back = lookBackLength ? + LZ4HC_countBack(ip, matchPtr, + iLowLimit, + dictStart) : + 0; + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; + offset = (int)(ipIndex - matchIndex); + sBack = back; + DEBUGLOG( + 7, + "Found match of len=%i within dict, offset=%i, back=%i", + longest, offset, -back); + } + } + } - if (chainSwap && matchLength==longest) { /* better match => select a better chain */ - assert(lookBackLength==0); /* search forward only */ - if (matchIndex + (U32)longest <= ipIndex) { - int const kTrigger = 4; - U32 distanceToNextMatch = 1; - int const end = longest - MINMATCH + 1; - int step = 1; - int accel = 1 << kTrigger; - int pos; - for (pos = 0; pos < end; pos += step) { - U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos); - step = (accel++ >> kTrigger); - if (candidateDist > distanceToNextMatch) { - distanceToNextMatch = candidateDist; - matchChainPos = (U32)pos; - accel = 1 << kTrigger; - } } - if (distanceToNextMatch > 1) { - if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ - matchIndex -= distanceToNextMatch; - continue; - } } } + if (chainSwap && + matchLength == + longest) { /* better match => select a better chain */ + assert(lookBackLength == 0); /* search forward only */ + if (matchIndex + (U32)longest <= ipIndex) { + int const kTrigger = 4; + U32 distanceToNextMatch = 1; + int const end = longest - MINMATCH + 1; + int step = 1; + int accel = 1 << kTrigger; + int pos; + for (pos = 0; pos < end; pos += step) { + U32 const candidateDist = DELTANEXTU16( + chainTable, + matchIndex + (U32)pos); + step = (accel++ >> kTrigger); + if (candidateDist > + distanceToNextMatch) { + distanceToNextMatch = + candidateDist; + matchChainPos = (U32)pos; + accel = 1 << kTrigger; + } + } + if (distanceToNextMatch > 1) { + if (distanceToNextMatch > matchIndex) + break; /* avoid overflow */ + matchIndex -= distanceToNextMatch; + continue; + } + } + } - { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); - if (patternAnalysis && distNextMatch==1 && matchChainPos==0) { - U32 const matchCandidateIdx = matchIndex-1; - /* may be a repeated pattern */ - if (repeat == rep_untested) { - if ( ((pattern & 0xFFFF) == (pattern >> 16)) - & ((pattern & 0xFF) == (pattern >> 24)) ) { - DEBUGLOG(7, "Repeat pattern detected, char %02X", pattern >> 24); - repeat = rep_confirmed; - srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); - } else { - repeat = rep_not; - } } - if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex) - && LZ4HC_protectDictEnd(prefixIdx, matchCandidateIdx) ) { - const int extDict = matchCandidateIdx < prefixIdx; - const BYTE* const matchPtr = extDict ? dictStart + (matchCandidateIdx - dictIdx) : prefixPtr + (matchCandidateIdx - prefixIdx); - if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ - const BYTE* const iLimit = extDict ? dictEnd : iHighLimit; - size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern); - if (extDict && matchPtr + forwardPatternLength == iLimit) { - U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern); - forwardPatternLength += LZ4HC_countPattern(prefixPtr, iHighLimit, rotatedPattern); - } - { const BYTE* const lowestMatchPtr = extDict ? dictStart : prefixPtr; - size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern); - size_t currentSegmentLength; - if (!extDict - && matchPtr - backLength == prefixPtr - && dictIdx < prefixIdx) { - U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern); - backLength += LZ4HC_reverseCountPattern(dictEnd, dictStart, rotatedPattern); - } - /* Limit backLength not go further than lowestMatchIndex */ - backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex); - assert(matchCandidateIdx - backLength >= lowestMatchIndex); - currentSegmentLength = backLength + forwardPatternLength; - /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */ - if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ - && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ - U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ - if (LZ4HC_protectDictEnd(prefixIdx, newMatchIndex)) - matchIndex = newMatchIndex; - else { - /* Can only happen if started in the prefix */ - assert(newMatchIndex >= prefixIdx - 3 && newMatchIndex < prefixIdx && !extDict); - matchIndex = prefixIdx; - } - } else { - U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */ - if (!LZ4HC_protectDictEnd(prefixIdx, newMatchIndex)) { - assert(newMatchIndex >= prefixIdx - 3 && newMatchIndex < prefixIdx && !extDict); - matchIndex = prefixIdx; - } else { - matchIndex = newMatchIndex; - if (lookBackLength==0) { /* no back possible */ - size_t const maxML = MIN(currentSegmentLength, srcPatternLength); - if ((size_t)longest < maxML) { - assert(prefixPtr - prefixIdx + matchIndex != ip); - if ((size_t)(ip - prefixPtr) + prefixIdx - matchIndex > LZ4_DISTANCE_MAX) break; - assert(maxML < 2 GB); - longest = (int)maxML; - offset = (int)(ipIndex - matchIndex); - assert(sBack == 0); - DEBUGLOG(7, "Found repeat pattern match of len=%i, offset=%i", longest, offset); - } - { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex); - if (distToNextPattern > matchIndex) break; /* avoid overflow */ - matchIndex -= distToNextPattern; - } } } } } - continue; - } } - } } /* PA optimization */ + { + U32 const distNextMatch = + DELTANEXTU16(chainTable, matchIndex); + if (patternAnalysis && distNextMatch == 1 && + matchChainPos == 0) { + U32 const matchCandidateIdx = matchIndex - 1; + /* may be a repeated pattern */ + if (repeat == rep_untested) { + if (((pattern & 0xFFFF) == + (pattern >> 16)) & + ((pattern & 0xFF) == + (pattern >> 24))) { + DEBUGLOG( + 7, + "Repeat pattern detected, char %02X", + pattern >> 24); + repeat = rep_confirmed; + srcPatternLength = + LZ4HC_countPattern( + ip + sizeof(pattern), + iHighLimit, + pattern) + + sizeof(pattern); + } else { + repeat = rep_not; + } + } + if ((repeat == rep_confirmed) && + (matchCandidateIdx >= lowestMatchIndex) && + LZ4HC_protectDictEnd(prefixIdx, + matchCandidateIdx)) { + const int extDict = + matchCandidateIdx < prefixIdx; + const BYTE *const matchPtr = + extDict ? + dictStart + + (matchCandidateIdx - + dictIdx) : + prefixPtr + + (matchCandidateIdx - + prefixIdx); + if (LZ4_read32(matchPtr) == + pattern) { /* good candidate */ + const BYTE *const iLimit = + extDict ? dictEnd : + iHighLimit; + size_t forwardPatternLength = + LZ4HC_countPattern( + matchPtr + + sizeof(pattern), + iLimit, + pattern) + + sizeof(pattern); + if (extDict && + matchPtr + forwardPatternLength == + iLimit) { + U32 const rotatedPattern = + LZ4HC_rotatePattern( + forwardPatternLength, + pattern); + forwardPatternLength += + LZ4HC_countPattern( + prefixPtr, + iHighLimit, + rotatedPattern); + } + { + const BYTE *const lowestMatchPtr = + extDict ? + dictStart : + prefixPtr; + size_t backLength = + LZ4HC_reverseCountPattern( + matchPtr, + lowestMatchPtr, + pattern); + size_t currentSegmentLength; + if (!extDict && + matchPtr - backLength == + prefixPtr && + dictIdx < + prefixIdx) { + U32 const rotatedPattern = LZ4HC_rotatePattern( + (U32)(-(int)backLength), + pattern); + backLength += LZ4HC_reverseCountPattern( + dictEnd, + dictStart, + rotatedPattern); + } + /* Limit backLength not go further than lowestMatchIndex */ + backLength = + matchCandidateIdx - + MAX(matchCandidateIdx - + (U32)backLength, + lowestMatchIndex); + assert(matchCandidateIdx - + backLength >= + lowestMatchIndex); + currentSegmentLength = + backLength + + forwardPatternLength; + /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */ + if ((currentSegmentLength >= + srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ + && + (forwardPatternLength <= + srcPatternLength)) { /* haven't reached this position yet */ + U32 const newMatchIndex = + matchCandidateIdx + + (U32)forwardPatternLength - + (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ + if (LZ4HC_protectDictEnd( + prefixIdx, + newMatchIndex)) + matchIndex = + newMatchIndex; + else { + /* Can only happen if started in the prefix */ + assert(newMatchIndex >= + prefixIdx - + 3 && + newMatchIndex < + prefixIdx && + !extDict); + matchIndex = + prefixIdx; + } + } else { + U32 const newMatchIndex = + matchCandidateIdx - + (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */ + if (!LZ4HC_protectDictEnd( + prefixIdx, + newMatchIndex)) { + assert(newMatchIndex >= + prefixIdx - + 3 && + newMatchIndex < + prefixIdx && + !extDict); + matchIndex = + prefixIdx; + } else { + matchIndex = + newMatchIndex; + if (lookBackLength == + 0) { /* no back possible */ + size_t const maxML = + MIN(currentSegmentLength, + srcPatternLength); + if ((size_t)longest < + maxML) { + assert(prefixPtr - + prefixIdx + + matchIndex != + ip); + if ((size_t)(ip - + prefixPtr) + + prefixIdx - + matchIndex > + LZ4_DISTANCE_MAX) + break; + assert(maxML < + 2 GB); + longest = (int) + maxML; + offset = + (int)(ipIndex - + matchIndex); + assert(sBack == + 0); + DEBUGLOG( + 7, + "Found repeat pattern match of len=%i, offset=%i", + longest, + offset); + } + { + U32 const distToNextPattern = + DELTANEXTU16( + chainTable, + matchIndex); + if (distToNextPattern > + matchIndex) + break; /* avoid overflow */ + matchIndex -= + distToNextPattern; + } + } + } + } + } + continue; + } + } + } + } /* PA optimization */ - /* follow current chain */ - matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos); + /* follow current chain */ + matchIndex -= + DELTANEXTU16(chainTable, matchIndex + matchChainPos); - } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ + } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ - if ( dict == usingDictCtxHc - && nbAttempts > 0 - && withinStartDistance) { - size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit; - U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; - assert(dictEndOffset <= 1 GB); - matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset; - if (dictMatchIndex>0) DEBUGLOG(7, "dictEndOffset = %zu, dictMatchIndex = %u => relative matchIndex = %i", dictEndOffset, dictMatchIndex, (int)dictMatchIndex - (int)dictEndOffset); - while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) { - const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + dictMatchIndex; + if (dict == usingDictCtxHc && nbAttempts > 0 && withinStartDistance) { + size_t const dictEndOffset = + (size_t)(dictCtx->end - dictCtx->prefixStart) + + dictCtx->dictLimit; + U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; + assert(dictEndOffset <= 1 GB); + matchIndex = + dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset; + if (dictMatchIndex > 0) + DEBUGLOG( + 7, + "dictEndOffset = %zu, dictMatchIndex = %u => relative matchIndex = %i", + dictEndOffset, dictMatchIndex, + (int)dictMatchIndex - (int)dictEndOffset); + while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && + nbAttempts--) { + const BYTE *const matchPtr = dictCtx->prefixStart - + dictCtx->dictLimit + + dictMatchIndex; - if (LZ4_read32(matchPtr) == pattern) { - int mlt; - int back = 0; - const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex); - if (vLimit > iHighLimit) vLimit = iHighLimit; - mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; - back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->prefixStart) : 0; - mlt -= back; - if (mlt > longest) { - longest = mlt; - offset = (int)(ipIndex - matchIndex); - sBack = back; - DEBUGLOG(7, "found match of length %i within extDictCtx", longest); - } } + if (LZ4_read32(matchPtr) == pattern) { + int mlt; + int back = 0; + const BYTE *vLimit = + ip + (dictEndOffset - dictMatchIndex); + if (vLimit > iHighLimit) + vLimit = iHighLimit; + mlt = (int)LZ4_count(ip + MINMATCH, + matchPtr + MINMATCH, + vLimit) + + MINMATCH; + back = lookBackLength ? + LZ4HC_countBack( + ip, matchPtr, iLowLimit, + dictCtx->prefixStart) : + 0; + mlt -= back; + if (mlt > longest) { + longest = mlt; + offset = (int)(ipIndex - matchIndex); + sBack = back; + DEBUGLOG( + 7, + "found match of length %i within extDictCtx", + longest); + } + } - { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex); - dictMatchIndex -= nextOffset; - matchIndex -= nextOffset; - } } } + { + U32 const nextOffset = DELTANEXTU16( + dictCtx->chainTable, dictMatchIndex); + dictMatchIndex -= nextOffset; + matchIndex -= nextOffset; + } + } + } - { LZ4HC_match_t md; - assert(longest >= 0); - md.len = longest; - md.off = offset; - md.back = sBack; - return md; - } + { + LZ4HC_match_t md; + assert(longest >= 0); + md.len = longest; + md.off = offset; + md.back = sBack; + return md; + } } -LZ4_FORCE_INLINE LZ4HC_match_t -LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ - const BYTE* const ip, const BYTE* const iLimit, - const int maxNbAttempts, - const int patternAnalysis, - const dictCtx_directive dict) +LZ4_FORCE_INLINE LZ4HC_match_t LZ4HC_InsertAndFindBestMatch( + LZ4HC_CCtx_internal *const hc4, /* Index table will be updated */ + const BYTE *const ip, const BYTE *const iLimit, const int maxNbAttempts, + const int patternAnalysis, const dictCtx_directive dict) { - DEBUGLOG(7, "LZ4HC_InsertAndFindBestMatch"); - /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + DEBUGLOG(7, "LZ4HC_InsertAndFindBestMatch"); + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio); + return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH - 1, + maxNbAttempts, patternAnalysis, + 0 /*chainSwap*/, dict, + favorCompressionRatio); } - -LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( - LZ4HC_CCtx_internal* const ctx, - const char* const source, - char* const dest, - int* srcSizePtr, - int const maxOutputSize, - int maxNbAttempts, - const limitedOutput_directive limit, - const dictCtx_directive dict - ) +LZ4_FORCE_INLINE int +LZ4HC_compress_hashChain(LZ4HC_CCtx_internal *const ctx, + const char *const source, char *const dest, + int *srcSizePtr, int const maxOutputSize, + int maxNbAttempts, const limitedOutput_directive limit, + const dictCtx_directive dict) { - const int inputSize = *srcSizePtr; - const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */ + const int inputSize = *srcSizePtr; + const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */ - const BYTE* ip = (const BYTE*) source; - const BYTE* anchor = ip; - const BYTE* const iend = ip + inputSize; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = (iend - LASTLITERALS); + const BYTE *ip = (const BYTE *)source; + const BYTE *anchor = ip; + const BYTE *const iend = ip + inputSize; + const BYTE *const mflimit = iend - MFLIMIT; + const BYTE *const matchlimit = (iend - LASTLITERALS); - BYTE* optr = (BYTE*) dest; - BYTE* op = (BYTE*) dest; - BYTE* oend = op + maxOutputSize; + BYTE *optr = (BYTE *)dest; + BYTE *op = (BYTE *)dest; + BYTE *oend = op + maxOutputSize; - const BYTE* start0; - const BYTE* start2 = NULL; - const BYTE* start3 = NULL; - LZ4HC_match_t m0, m1, m2, m3; - const LZ4HC_match_t nomatch = {0, 0, 0}; + const BYTE *start0; + const BYTE *start2 = NULL; + const BYTE *start3 = NULL; + LZ4HC_match_t m0, m1, m2, m3; + const LZ4HC_match_t nomatch = { 0, 0, 0 }; - /* init */ - DEBUGLOG(5, "LZ4HC_compress_hashChain (dict?=>%i)", dict); - *srcSizePtr = 0; - if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ - if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ + /* init */ + DEBUGLOG(5, "LZ4HC_compress_hashChain (dict?=>%i)", dict); + *srcSizePtr = 0; + if (limit == fillOutput) + oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (inputSize < LZ4_minLength) + goto _last_literals; /* Input too small, no compression (all literals) */ - /* Main Loop */ - while (ip <= mflimit) { - m1 = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, maxNbAttempts, patternAnalysis, dict); - if (m1.len encode ML1 immediately */ - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), - m1.len, m1.off, - limit, oend) ) - goto _dest_overflow; - continue; - } + if (m2.len <= + m1.len) { /* No better match => encode ML1 immediately */ + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), + m1.len, m1.off, limit, oend)) + goto _dest_overflow; + continue; + } - if (start0 < ip) { /* first match was skipped at least once */ - if (start2 < ip + m0.len) { /* squeezing ML1 between ML0(original ML1) and ML2 */ - ip = start0; m1 = m0; /* restore initial Match1 */ - } } + if (start0 < ip) { /* first match was skipped at least once */ + if (start2 < + ip + m0.len) { /* squeezing ML1 between ML0(original ML1) and ML2 */ + ip = start0; + m1 = m0; /* restore initial Match1 */ + } + } - /* Here, start0==ip */ - if ((start2 - ip) < 3) { /* First Match too small : removed */ - ip = start2; - m1 = m2; - goto _Search2; - } + /* Here, start0==ip */ + if ((start2 - ip) < 3) { /* First Match too small : removed */ + ip = start2; + m1 = m2; + goto _Search2; + } -_Search3: - if ((start2 - ip) < OPTIMAL_ML) { - int correction; - int new_ml = m1.len; - if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; - if (ip+new_ml > start2 + m2.len - MINMATCH) - new_ml = (int)(start2 - ip) + m2.len - MINMATCH; - correction = new_ml - (int)(start2 - ip); - if (correction > 0) { - start2 += correction; - m2.len -= correction; - } - } + _Search3: + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + int new_ml = m1.len; + if (new_ml > OPTIMAL_ML) + new_ml = OPTIMAL_ML; + if (ip + new_ml > start2 + m2.len - MINMATCH) + new_ml = (int)(start2 - ip) + m2.len - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + m2.len -= correction; + } + } - if (start2 + m2.len <= mflimit) { - start3 = start2 + m2.len - 3; - m3 = LZ4HC_InsertAndGetWiderMatch(ctx, - start3, start2, matchlimit, m2.len, - maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); - start3 += m3.back; - } else { - m3 = nomatch; /* do not search further */ - } + if (start2 + m2.len <= mflimit) { + start3 = start2 + m2.len - 3; + m3 = LZ4HC_InsertAndGetWiderMatch( + ctx, start3, start2, matchlimit, m2.len, + maxNbAttempts, patternAnalysis, 0, dict, + favorCompressionRatio); + start3 += m3.back; + } else { + m3 = nomatch; /* do not search further */ + } - if (m3.len <= m2.len) { /* No better match => encode ML1 and ML2 */ - /* ip & ref are known; Now for ml */ - if (start2 < ip+m1.len) m1.len = (int)(start2 - ip); - /* Now, encode 2 sequences */ - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), - m1.len, m1.off, - limit, oend) ) - goto _dest_overflow; - ip = start2; - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), - m2.len, m2.off, - limit, oend) ) { - m1 = m2; - goto _dest_overflow; - } - continue; - } + if (m3.len <= + m2.len) { /* No better match => encode ML1 and ML2 */ + /* ip & ref are known; Now for ml */ + if (start2 < ip + m1.len) + m1.len = (int)(start2 - ip); + /* Now, encode 2 sequences */ + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), + m1.len, m1.off, limit, oend)) + goto _dest_overflow; + ip = start2; + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), + m2.len, m2.off, limit, oend)) { + m1 = m2; + goto _dest_overflow; + } + continue; + } - if (start3 < ip+m1.len+3) { /* Not enough space for match 2 : remove it */ - if (start3 >= (ip+m1.len)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ - if (start2 < ip+m1.len) { - int correction = (int)(ip+m1.len - start2); - start2 += correction; - m2.len -= correction; - if (m2.len < MINMATCH) { - start2 = start3; - m2 = m3; - } - } + if (start3 < + ip + m1.len + + 3) { /* Not enough space for match 2 : remove it */ + if (start3 >= + (ip + + m1.len)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ + if (start2 < ip + m1.len) { + int correction = + (int)(ip + m1.len - start2); + start2 += correction; + m2.len -= correction; + if (m2.len < MINMATCH) { + start2 = start3; + m2 = m3; + } + } - optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), - m1.len, m1.off, - limit, oend) ) - goto _dest_overflow; - ip = start3; - m1 = m3; + optr = op; + if (LZ4HC_encodeSequence( + UPDATABLE(ip, op, anchor), m1.len, + m1.off, limit, oend)) + goto _dest_overflow; + ip = start3; + m1 = m3; - start0 = start2; - m0 = m2; - goto _Search2; - } + start0 = start2; + m0 = m2; + goto _Search2; + } - start2 = start3; - m2 = m3; - goto _Search3; - } + start2 = start3; + m2 = m3; + goto _Search3; + } - /* + /* * OK, now we have 3 ascending matches; * let's write the first one ML1. * ip & ref are known; Now decide ml. */ - if (start2 < ip+m1.len) { - if ((start2 - ip) < OPTIMAL_ML) { - int correction; - if (m1.len > OPTIMAL_ML) m1.len = OPTIMAL_ML; - if (ip + m1.len > start2 + m2.len - MINMATCH) - m1.len = (int)(start2 - ip) + m2.len - MINMATCH; - correction = m1.len - (int)(start2 - ip); - if (correction > 0) { - start2 += correction; - m2.len -= correction; - } - } else { - m1.len = (int)(start2 - ip); - } - } - optr = op; - if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), - m1.len, m1.off, - limit, oend) ) - goto _dest_overflow; + if (start2 < ip + m1.len) { + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + if (m1.len > OPTIMAL_ML) + m1.len = OPTIMAL_ML; + if (ip + m1.len > start2 + m2.len - MINMATCH) + m1.len = (int)(start2 - ip) + m2.len - + MINMATCH; + correction = m1.len - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + m2.len -= correction; + } + } else { + m1.len = (int)(start2 - ip); + } + } + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), m1.len, + m1.off, limit, oend)) + goto _dest_overflow; - /* ML2 becomes ML1 */ - ip = start2; m1 = m2; + /* ML2 becomes ML1 */ + ip = start2; + m1 = m2; - /* ML3 becomes ML2 */ - start2 = start3; m2 = m3; + /* ML3 becomes ML2 */ + start2 = start3; + m2 = m3; - /* let's find a new ML3 */ - goto _Search3; - } + /* let's find a new ML3 */ + goto _Search3; + } _last_literals: - /* Encode Last Literals */ - { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ - size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; - size_t const totalSize = 1 + llAdd + lastRunSize; - if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ - if (limit && (op + totalSize > oend)) { - if (limit == limitedOutput) return 0; - /* adapt lastRunSize to fill 'dest' */ - lastRunSize = (size_t)(oend - op) - 1 /*token*/; - llAdd = (lastRunSize + 256 - RUN_MASK) / 256; - lastRunSize -= llAdd; - } - DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); - ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ + /* Encode Last Literals */ + { + size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; + if (limit == fillOutput) + oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) + return 0; + /* adapt lastRunSize to fill 'dest' */ + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; + } + DEBUGLOG(6, "Final literal run : %i literals", + (int)lastRunSize); + ip = anchor + + lastRunSize; /* can be != iend if limit==fillOutput */ - if (lastRunSize >= RUN_MASK) { - size_t accumulator = lastRunSize - RUN_MASK; - *op++ = (RUN_MASK << ML_BITS); - for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; - *op++ = (BYTE) accumulator; - } else { - *op++ = (BYTE)(lastRunSize << ML_BITS); - } - LZ4_memcpy(op, anchor, lastRunSize); - op += lastRunSize; - } + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for (; accumulator >= 255; accumulator -= 255) + *op++ = 255; + *op++ = (BYTE)accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + LZ4_memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } - /* End */ - *srcSizePtr = (int) (((const char*)ip) - source); - return (int) (((char*)op)-dest); + /* End */ + *srcSizePtr = (int)(((const char *)ip) - source); + return (int)(((char *)op) - dest); _dest_overflow: - if (limit == fillOutput) { - /* Assumption : @ip, @anchor, @optr and @m1 must be set correctly */ - size_t const ll = (size_t)(ip - anchor); - size_t const ll_addbytes = (ll + 240) / 255; - size_t const ll_totalCost = 1 + ll_addbytes + ll; - BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ - DEBUGLOG(6, "Last sequence overflowing"); - op = optr; /* restore correct out pointer */ - if (op + ll_totalCost <= maxLitPos) { - /* ll validated; now adjust match length */ - size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); - size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); - assert(maxMlSize < INT_MAX); assert(m1.len >= 0); - if ((size_t)m1.len > maxMlSize) m1.len = (int)maxMlSize; - if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + m1.len >= MFLIMIT) { - LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), m1.len, m1.off, notLimited, oend); - } } - goto _last_literals; - } - /* compression failed */ - return 0; + if (limit == fillOutput) { + /* Assumption : @ip, @anchor, @optr and @m1 must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE *const maxLitPos = + oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG(6, "Last sequence overflowing"); + op = optr; /* restore correct out pointer */ + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = + (size_t)(maxLitPos - (op + ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK - 1) + + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); + assert(m1.len >= 0); + if ((size_t)m1.len > maxMlSize) + m1.len = (int)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - + 1 + m1.len >= + MFLIMIT) { + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), + m1.len, m1.off, notLimited, + oend); + } + } + goto _last_literals; + } + /* compression failed */ + return 0; } +static int LZ4HC_compress_optimal(LZ4HC_CCtx_internal *ctx, + const char *const source, char *dst, + int *srcSizePtr, int dstCapacity, + int const nbSearches, size_t sufficient_len, + const limitedOutput_directive limit, + int const fullUpdate, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed); -static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx, - const char* const source, char* dst, - int* srcSizePtr, int dstCapacity, - int const nbSearches, size_t sufficient_len, - const limitedOutput_directive limit, int const fullUpdate, - const dictCtx_directive dict, - const HCfavor_e favorDecSpeed); - -LZ4_FORCE_INLINE int -LZ4HC_compress_generic_internal ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - int const dstCapacity, - int cLevel, - const limitedOutput_directive limit, - const dictCtx_directive dict - ) +LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal( + LZ4HC_CCtx_internal *const ctx, const char *const src, char *const dst, + int *const srcSizePtr, int const dstCapacity, int cLevel, + const limitedOutput_directive limit, const dictCtx_directive dict) { - DEBUGLOG(5, "LZ4HC_compress_generic_internal(src=%p, srcSize=%d)", - src, *srcSizePtr); + DEBUGLOG(5, "LZ4HC_compress_generic_internal(src=%p, srcSize=%d)", src, + *srcSizePtr); - if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */ - if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */ + if (limit == fillOutput && dstCapacity < 1) + return 0; /* Impossible to store anything */ + if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) + return 0; /* Unsupported input size (too large or negative) */ - ctx->end += *srcSizePtr; - { cParams_t const cParam = LZ4HC_getCLevelParams(cLevel); - HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio; - int result; + ctx->end += *srcSizePtr; + { + cParams_t const cParam = LZ4HC_getCLevelParams(cLevel); + HCfavor_e const favor = ctx->favorDecSpeed ? + favorDecompressionSpeed : + favorCompressionRatio; + int result; - if (cParam.strat == lz4mid) { - result = LZ4MID_compress(ctx, - src, dst, srcSizePtr, dstCapacity, - limit, dict); - } else if (cParam.strat == lz4hc) { - result = LZ4HC_compress_hashChain(ctx, - src, dst, srcSizePtr, dstCapacity, - cParam.nbSearches, limit, dict); - } else { - assert(cParam.strat == lz4opt); - result = LZ4HC_compress_optimal(ctx, - src, dst, srcSizePtr, dstCapacity, - cParam.nbSearches, cParam.targetLength, limit, - cLevel >= LZ4HC_CLEVEL_MAX, /* ultra mode */ - dict, favor); - } - if (result <= 0) ctx->dirty = 1; - return result; - } + if (cParam.strat == lz4mid) { + result = LZ4MID_compress(ctx, src, dst, srcSizePtr, + dstCapacity, limit, dict); + } else if (cParam.strat == lz4hc) { + result = LZ4HC_compress_hashChain( + ctx, src, dst, srcSizePtr, dstCapacity, + cParam.nbSearches, limit, dict); + } else { + assert(cParam.strat == lz4opt); + result = LZ4HC_compress_optimal( + ctx, src, dst, srcSizePtr, dstCapacity, + cParam.nbSearches, cParam.targetLength, limit, + cLevel >= LZ4HC_CLEVEL_MAX, /* ultra mode */ + dict, favor); + } + if (result <= 0) + ctx->dirty = 1; + return result; + } } -static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock); +static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal *ctxPtr, + const BYTE *newBlock); -static int -LZ4HC_compress_generic_noDictCtx ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - int const dstCapacity, - int cLevel, - limitedOutput_directive limit - ) +static int LZ4HC_compress_generic_noDictCtx(LZ4HC_CCtx_internal *const ctx, + const char *const src, + char *const dst, + int *const srcSizePtr, + int const dstCapacity, int cLevel, + limitedOutput_directive limit) { - assert(ctx->dictCtx == NULL); - return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx); + assert(ctx->dictCtx == NULL); + return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, + dstCapacity, cLevel, limit, + noDictCtx); } -static int isStateCompatible(const LZ4HC_CCtx_internal* ctx1, const LZ4HC_CCtx_internal* ctx2) +static int isStateCompatible(const LZ4HC_CCtx_internal *ctx1, + const LZ4HC_CCtx_internal *ctx2) { - int const isMid1 = LZ4HC_getCLevelParams(ctx1->compressionLevel).strat == lz4mid; - int const isMid2 = LZ4HC_getCLevelParams(ctx2->compressionLevel).strat == lz4mid; - return !(isMid1 ^ isMid2); + int const isMid1 = + LZ4HC_getCLevelParams(ctx1->compressionLevel).strat == lz4mid; + int const isMid2 = + LZ4HC_getCLevelParams(ctx2->compressionLevel).strat == lz4mid; + return !(isMid1 ^ isMid2); } -static int -LZ4HC_compress_generic_dictCtx ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - int const dstCapacity, - int cLevel, - limitedOutput_directive limit - ) +static int LZ4HC_compress_generic_dictCtx(LZ4HC_CCtx_internal *const ctx, + const char *const src, + char *const dst, + int *const srcSizePtr, + int const dstCapacity, int cLevel, + limitedOutput_directive limit) { - const size_t position = (size_t)(ctx->end - ctx->prefixStart) + (ctx->dictLimit - ctx->lowLimit); - assert(ctx->dictCtx != NULL); - if (position >= 64 KB) { - ctx->dictCtx = NULL; - return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); - } else if (position == 0 && *srcSizePtr > 4 KB && isStateCompatible(ctx, ctx->dictCtx)) { - LZ4_memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal)); - LZ4HC_setExternalDict(ctx, (const BYTE *)src); - ctx->compressionLevel = (short)cLevel; - return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); - } else { - return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc); - } + const size_t position = (size_t)(ctx->end - ctx->prefixStart) + + (ctx->dictLimit - ctx->lowLimit); + assert(ctx->dictCtx != NULL); + if (position >= 64 KB) { + ctx->dictCtx = NULL; + return LZ4HC_compress_generic_noDictCtx( + ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else if (position == 0 && *srcSizePtr > 4 KB && + isStateCompatible(ctx, ctx->dictCtx)) { + LZ4_memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal)); + LZ4HC_setExternalDict(ctx, (const BYTE *)src); + ctx->compressionLevel = (short)cLevel; + return LZ4HC_compress_generic_noDictCtx( + ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else { + return LZ4HC_compress_generic_internal(ctx, src, dst, + srcSizePtr, dstCapacity, + cLevel, limit, + usingDictCtxHc); + } } -static int -LZ4HC_compress_generic ( - LZ4HC_CCtx_internal* const ctx, - const char* const src, - char* const dst, - int* const srcSizePtr, - int const dstCapacity, - int cLevel, - limitedOutput_directive limit - ) +static int LZ4HC_compress_generic(LZ4HC_CCtx_internal *const ctx, + const char *const src, char *const dst, + int *const srcSizePtr, int const dstCapacity, + int cLevel, limitedOutput_directive limit) { - if (ctx->dictCtx == NULL) { - return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); - } else { - return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); - } + if (ctx->dictCtx == NULL) { + return LZ4HC_compress_generic_noDictCtx( + ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else { + return LZ4HC_compress_generic_dictCtx( + ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } } - -int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); } +int LZ4_sizeofStateHC(void) +{ + return (int)sizeof(LZ4_streamHC_t); +} static size_t LZ4_streamHC_t_alignment(void) { #if LZ4_ALIGN_TEST - typedef struct { char c; LZ4_streamHC_t t; } t_a; - return sizeof(t_a) - sizeof(LZ4_streamHC_t); + typedef struct { + char c; + LZ4_streamHC_t t; + } t_a; + return sizeof(t_a) - sizeof(LZ4_streamHC_t); #else - return 1; /* effectively disabled */ + return 1; /* effectively disabled */ #endif } /* state is presumed correctly initialized, * in which case its size and alignment have already been validate */ -int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +int LZ4_compress_HC_extStateHC_fastReset(void *state, const char *src, + char *dst, int srcSize, + int dstCapacity, int compressionLevel) { - LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse; - if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0; - LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel); - LZ4HC_init_internal (ctx, (const BYTE*)src); - if (dstCapacity < LZ4_compressBound(srcSize)) - return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput); - else - return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited); + LZ4HC_CCtx_internal *const ctx = + &((LZ4_streamHC_t *)state)->internal_donotuse; + if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) + return 0; + LZ4_resetStreamHC_fast((LZ4_streamHC_t *)state, compressionLevel); + LZ4HC_init_internal(ctx, (const BYTE *)src); + if (dstCapacity < LZ4_compressBound(srcSize)) + return LZ4HC_compress_generic(ctx, src, dst, &srcSize, + dstCapacity, compressionLevel, + limitedOutput); + else + return LZ4HC_compress_generic(ctx, src, dst, &srcSize, + dstCapacity, compressionLevel, + notLimited); } -int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +int LZ4_compress_HC_extStateHC(void *state, const char *src, char *dst, + int srcSize, int dstCapacity, + int compressionLevel) { - LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); - if (ctx==NULL) return 0; /* init failure */ - return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel); + LZ4_streamHC_t *const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); + if (ctx == NULL) + return 0; /* init failure */ + return LZ4_compress_HC_extStateHC_fastReset( + state, src, dst, srcSize, dstCapacity, compressionLevel); } -int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel, void *wrkmem) +int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity, + int compressionLevel, void *wrkmem) { - DEBUGLOG(5, "LZ4_compress_HC") - return LZ4_compress_HC_extStateHC(wrkmem, src, dst, srcSize, dstCapacity, compressionLevel); + DEBUGLOG(5, "LZ4_compress_HC") + return LZ4_compress_HC_extStateHC(wrkmem, src, dst, srcSize, + dstCapacity, compressionLevel); } EXPORT_SYMBOL(LZ4_compress_HC); /* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */ -int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel) +int LZ4_compress_HC_destSize(void *state, const char *source, char *dest, + int *sourceSizePtr, int targetDestSize, int cLevel) { - LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); - if (ctx==NULL) return 0; /* init failure */ - LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE*) source); - LZ4_setCompressionLevel(ctx, cLevel); - return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, sourceSizePtr, targetDestSize, cLevel, fillOutput); + LZ4_streamHC_t *const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); + if (ctx == NULL) + return 0; /* init failure */ + LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE *)source); + LZ4_setCompressionLevel(ctx, cLevel); + return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, + sourceSizePtr, targetDestSize, cLevel, + fillOutput); } - - /************************************** * Streaming Functions **************************************/ /* allocation */ #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -LZ4_streamHC_t* LZ4_createStreamHC(void) +LZ4_streamHC_t *LZ4_createStreamHC(void) { - LZ4_streamHC_t* const state = - (LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t)); - if (state == NULL) return NULL; - LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT); - return state; + LZ4_streamHC_t *const state = + (LZ4_streamHC_t *)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t)); + if (state == NULL) + return NULL; + LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT); + return state; } -int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) +int LZ4_freeStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr) { - DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr); - if (!LZ4_streamHCPtr) return 0; /* support free on NULL */ - FREEMEM(LZ4_streamHCPtr); - return 0; + DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr); + if (!LZ4_streamHCPtr) + return 0; /* support free on NULL */ + FREEMEM(LZ4_streamHCPtr); + return 0; } #endif - -LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size) +LZ4_streamHC_t *LZ4_initStreamHC(void *buffer, size_t size) { - LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer; - DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size); - /* check conditions */ - if (buffer == NULL) return NULL; - if (size < sizeof(LZ4_streamHC_t)) return NULL; - if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL; - /* init */ - { LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse); - MEM_INIT(hcstate, 0, sizeof(*hcstate)); } - LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT); - return LZ4_streamHCPtr; + LZ4_streamHC_t *const LZ4_streamHCPtr = (LZ4_streamHC_t *)buffer; + DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size); + /* check conditions */ + if (buffer == NULL) + return NULL; + if (size < sizeof(LZ4_streamHC_t)) + return NULL; + if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) + return NULL; + /* init */ + { + LZ4HC_CCtx_internal *const hcstate = + &(LZ4_streamHCPtr->internal_donotuse); + MEM_INIT(hcstate, 0, sizeof(*hcstate)); + } + LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT); + return LZ4_streamHCPtr; } /* just a stub */ -void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel) { - LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); - LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); } -void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +void LZ4_resetStreamHC_fast(LZ4_streamHC_t *LZ4_streamHCPtr, + int compressionLevel) { - LZ4HC_CCtx_internal* const s = &LZ4_streamHCPtr->internal_donotuse; - DEBUGLOG(5, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel); - if (s->dirty) { - LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); - } else { - assert(s->end >= s->prefixStart); - s->dictLimit += (U32)(s->end - s->prefixStart); - s->prefixStart = NULL; - s->end = NULL; - s->dictCtx = NULL; - } - LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); + LZ4HC_CCtx_internal *const s = &LZ4_streamHCPtr->internal_donotuse; + DEBUGLOG(5, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, + compressionLevel); + if (s->dirty) { + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + } else { + assert(s->end >= s->prefixStart); + s->dictLimit += (U32)(s->end - s->prefixStart); + s->prefixStart = NULL; + s->end = NULL; + s->dictCtx = NULL; + } + LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); } -void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +void LZ4_setCompressionLevel(LZ4_streamHC_t *LZ4_streamHCPtr, + int compressionLevel) { - DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel); - if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT; - if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; - LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel; + DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, + compressionLevel); + if (compressionLevel < 1) + compressionLevel = LZ4HC_CLEVEL_DEFAULT; + if (compressionLevel > LZ4HC_CLEVEL_MAX) + compressionLevel = LZ4HC_CLEVEL_MAX; + LZ4_streamHCPtr->internal_donotuse.compressionLevel = + (short)compressionLevel; } -void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor) +void LZ4_favorDecompressionSpeed(LZ4_streamHC_t *LZ4_streamHCPtr, int favor) { - LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0); + LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor != 0); } /* LZ4_loadDictHC() : * LZ4_streamHCPtr is presumed properly initialized */ -int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, - const char* dictionary, int dictSize) +int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr, const char *dictionary, + int dictSize) { - LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; - cParams_t cp; - DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d, clevel=%d)", LZ4_streamHCPtr, dictionary, dictSize, ctxPtr->compressionLevel); - assert(dictSize >= 0); - assert(LZ4_streamHCPtr != NULL); - if (dictSize > 64 KB) { - dictionary += (size_t)dictSize - 64 KB; - dictSize = 64 KB; - } - /* need a full initialization, there are bad side-effects when using resetFast() */ - { int const cLevel = ctxPtr->compressionLevel; - LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); - LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel); - cp = LZ4HC_getCLevelParams(cLevel); - } - LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary); - ctxPtr->end = (const BYTE*)dictionary + dictSize; - if (cp.strat == lz4mid) { - LZ4MID_fillHTable (ctxPtr, dictionary, (size_t)dictSize); - } else { - if (dictSize >= LZ4HC_HASHSIZE) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); - } - return dictSize; + LZ4HC_CCtx_internal *const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + cParams_t cp; + DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d, clevel=%d)", + LZ4_streamHCPtr, dictionary, dictSize, + ctxPtr->compressionLevel); + assert(dictSize >= 0); + assert(LZ4_streamHCPtr != NULL); + if (dictSize > 64 KB) { + dictionary += (size_t)dictSize - 64 KB; + dictSize = 64 KB; + } + /* need a full initialization, there are bad side-effects when using resetFast() */ + { + int const cLevel = ctxPtr->compressionLevel; + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel); + cp = LZ4HC_getCLevelParams(cLevel); + } + LZ4HC_init_internal(ctxPtr, (const BYTE *)dictionary); + ctxPtr->end = (const BYTE *)dictionary + dictSize; + if (cp.strat == lz4mid) { + LZ4MID_fillHTable(ctxPtr, dictionary, (size_t)dictSize); + } else { + if (dictSize >= LZ4HC_HASHSIZE) + LZ4HC_Insert(ctxPtr, ctxPtr->end - 3); + } + return dictSize; } -void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) { - working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL; +void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, + const LZ4_streamHC_t *dictionary_stream) +{ + working_stream->internal_donotuse.dictCtx = + dictionary_stream != NULL ? + &(dictionary_stream->internal_donotuse) : + NULL; } /* compression */ -static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock) +static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal *ctxPtr, + const BYTE *newBlock) { - DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock); - if ( (ctxPtr->end >= ctxPtr->prefixStart + 4) - && (LZ4HC_getCLevelParams(ctxPtr->compressionLevel).strat != lz4mid) ) { - LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ - } + DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock); + if ((ctxPtr->end >= ctxPtr->prefixStart + 4) && + (LZ4HC_getCLevelParams(ctxPtr->compressionLevel).strat != lz4mid)) { + LZ4HC_Insert( + ctxPtr, + ctxPtr->end - + 3); /* Referencing remaining dictionary content */ + } - /* Only one memory segment for extDict, so any previous extDict is lost at this stage */ - ctxPtr->lowLimit = ctxPtr->dictLimit; - ctxPtr->dictStart = ctxPtr->prefixStart; - ctxPtr->dictLimit += (U32)(ctxPtr->end - ctxPtr->prefixStart); - ctxPtr->prefixStart = newBlock; - ctxPtr->end = newBlock; - ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */ + /* Only one memory segment for extDict, so any previous extDict is lost at this stage */ + ctxPtr->lowLimit = ctxPtr->dictLimit; + ctxPtr->dictStart = ctxPtr->prefixStart; + ctxPtr->dictLimit += (U32)(ctxPtr->end - ctxPtr->prefixStart); + ctxPtr->prefixStart = newBlock; + ctxPtr->end = newBlock; + ctxPtr->nextToUpdate = + ctxPtr->dictLimit; /* match referencing will resume from there */ - /* cannot reference an extDict and a dictCtx at the same time */ - ctxPtr->dictCtx = NULL; + /* cannot reference an extDict and a dictCtx at the same time */ + ctxPtr->dictCtx = NULL; } -static int -LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr, - const char* src, char* dst, - int* srcSizePtr, int dstCapacity, - limitedOutput_directive limit) +static int LZ4_compressHC_continue_generic(LZ4_streamHC_t *LZ4_streamHCPtr, + const char *src, char *dst, + int *srcSizePtr, int dstCapacity, + limitedOutput_directive limit) { - LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; - DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)", - LZ4_streamHCPtr, src, *srcSizePtr, limit); - assert(ctxPtr != NULL); - /* auto-init if forgotten */ - if (ctxPtr->prefixStart == NULL) - LZ4HC_init_internal (ctxPtr, (const BYTE*) src); + LZ4HC_CCtx_internal *const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + DEBUGLOG( + 5, + "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)", + LZ4_streamHCPtr, src, *srcSizePtr, limit); + assert(ctxPtr != NULL); + /* auto-init if forgotten */ + if (ctxPtr->prefixStart == NULL) + LZ4HC_init_internal(ctxPtr, (const BYTE *)src); - /* Check overflow */ - if ((size_t)(ctxPtr->end - ctxPtr->prefixStart) + ctxPtr->dictLimit > 2 GB) { - size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->prefixStart); - if (dictSize > 64 KB) dictSize = 64 KB; - LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize); - } + /* Check overflow */ + if ((size_t)(ctxPtr->end - ctxPtr->prefixStart) + ctxPtr->dictLimit > + 2 GB) { + size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->prefixStart); + if (dictSize > 64 KB) + dictSize = 64 KB; + LZ4_loadDictHC(LZ4_streamHCPtr, + (const char *)(ctxPtr->end) - dictSize, + (int)dictSize); + } - /* Check if blocks follow each other */ - if ((const BYTE*)src != ctxPtr->end) - LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src); + /* Check if blocks follow each other */ + if ((const BYTE *)src != ctxPtr->end) + LZ4HC_setExternalDict(ctxPtr, (const BYTE *)src); - /* Check overlapping input/dictionary space */ - { const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr; - const BYTE* const dictBegin = ctxPtr->dictStart; - const BYTE* const dictEnd = ctxPtr->dictStart + (ctxPtr->dictLimit - ctxPtr->lowLimit); - if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) { - if (sourceEnd > dictEnd) sourceEnd = dictEnd; - ctxPtr->lowLimit += (U32)(sourceEnd - ctxPtr->dictStart); - ctxPtr->dictStart += (U32)(sourceEnd - ctxPtr->dictStart); - /* invalidate dictionary is it's too small */ - if (ctxPtr->dictLimit - ctxPtr->lowLimit < LZ4HC_HASHSIZE) { - ctxPtr->lowLimit = ctxPtr->dictLimit; - ctxPtr->dictStart = ctxPtr->prefixStart; - } } } + /* Check overlapping input/dictionary space */ + { + const BYTE *sourceEnd = (const BYTE *)src + *srcSizePtr; + const BYTE *const dictBegin = ctxPtr->dictStart; + const BYTE *const dictEnd = + ctxPtr->dictStart + + (ctxPtr->dictLimit - ctxPtr->lowLimit); + if ((sourceEnd > dictBegin) && ((const BYTE *)src < dictEnd)) { + if (sourceEnd > dictEnd) + sourceEnd = dictEnd; + ctxPtr->lowLimit += + (U32)(sourceEnd - ctxPtr->dictStart); + ctxPtr->dictStart += + (U32)(sourceEnd - ctxPtr->dictStart); + /* invalidate dictionary is it's too small */ + if (ctxPtr->dictLimit - ctxPtr->lowLimit < + LZ4HC_HASHSIZE) { + ctxPtr->lowLimit = ctxPtr->dictLimit; + ctxPtr->dictStart = ctxPtr->prefixStart; + } + } + } - return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit); + return LZ4HC_compress_generic(ctxPtr, src, dst, srcSizePtr, dstCapacity, + ctxPtr->compressionLevel, limit); } -int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity) +int LZ4_compress_HC_continue(LZ4_streamHC_t *LZ4_streamHCPtr, const char *src, + char *dst, int srcSize, int dstCapacity) { - DEBUGLOG(5, "LZ4_compress_HC_continue"); - if (dstCapacity < LZ4_compressBound(srcSize)) - return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput); - else - return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited); + DEBUGLOG(5, "LZ4_compress_HC_continue"); + if (dstCapacity < LZ4_compressBound(srcSize)) + return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, + dst, &srcSize, + dstCapacity, + limitedOutput); + else + return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, + dst, &srcSize, + dstCapacity, notLimited); } -int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize) +int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t *LZ4_streamHCPtr, + const char *src, char *dst, + int *srcSizePtr, int targetDestSize) { - return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput); + return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, + srcSizePtr, targetDestSize, + fillOutput); } - /* LZ4_saveDictHC : * save history content * into a user-provided buffer * which is then used to continue compression */ -int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize) +int LZ4_saveDictHC(LZ4_streamHC_t *LZ4_streamHCPtr, char *safeBuffer, + int dictSize) { - LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse; - int const prefixSize = (int)(streamPtr->end - streamPtr->prefixStart); - DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize); - assert(prefixSize >= 0); - if (dictSize > 64 KB) dictSize = 64 KB; - if (dictSize < 4) dictSize = 0; - if (dictSize > prefixSize) dictSize = prefixSize; - if (safeBuffer == NULL) assert(dictSize == 0); - if (dictSize > 0) - LZ4_memmove(safeBuffer, streamPtr->end - dictSize, (size_t)dictSize); - { U32 const endIndex = (U32)(streamPtr->end - streamPtr->prefixStart) + streamPtr->dictLimit; - streamPtr->end = (safeBuffer == NULL) ? NULL : (const BYTE*)safeBuffer + dictSize; - streamPtr->prefixStart = (const BYTE*)safeBuffer; - streamPtr->dictLimit = endIndex - (U32)dictSize; - streamPtr->lowLimit = endIndex - (U32)dictSize; - streamPtr->dictStart = streamPtr->prefixStart; - if (streamPtr->nextToUpdate < streamPtr->dictLimit) - streamPtr->nextToUpdate = streamPtr->dictLimit; - } - return dictSize; + LZ4HC_CCtx_internal *const streamPtr = + &LZ4_streamHCPtr->internal_donotuse; + int const prefixSize = (int)(streamPtr->end - streamPtr->prefixStart); + DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, + dictSize); + assert(prefixSize >= 0); + if (dictSize > 64 KB) + dictSize = 64 KB; + if (dictSize < 4) + dictSize = 0; + if (dictSize > prefixSize) + dictSize = prefixSize; + if (safeBuffer == NULL) + assert(dictSize == 0); + if (dictSize > 0) + LZ4_memmove(safeBuffer, streamPtr->end - dictSize, + (size_t)dictSize); + { + U32 const endIndex = + (U32)(streamPtr->end - streamPtr->prefixStart) + + streamPtr->dictLimit; + streamPtr->end = (safeBuffer == NULL) ? + NULL : + (const BYTE *)safeBuffer + dictSize; + streamPtr->prefixStart = (const BYTE *)safeBuffer; + streamPtr->dictLimit = endIndex - (U32)dictSize; + streamPtr->lowLimit = endIndex - (U32)dictSize; + streamPtr->dictStart = streamPtr->prefixStart; + if (streamPtr->nextToUpdate < streamPtr->dictLimit) + streamPtr->nextToUpdate = streamPtr->dictLimit; + } + return dictSize; } - /* ================================================ * LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX]) * ===============================================*/ typedef struct { - int price; - int off; - int mlen; - int litlen; + int price; + int off; + int mlen; + int litlen; } LZ4HC_optimal_t; /* price in bytes */ LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen) { - int price = litlen; - assert(litlen >= 0); - if (litlen >= (int)RUN_MASK) - price += 1 + ((litlen-(int)RUN_MASK) / 255); - return price; + int price = litlen; + assert(litlen >= 0); + if (litlen >= (int)RUN_MASK) + price += 1 + ((litlen - (int)RUN_MASK) / 255); + return price; } /* requires mlen >= MINMATCH */ LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) { - int price = 1 + 2 ; /* token + 16-bit offset */ - assert(litlen >= 0); - assert(mlen >= MINMATCH); + int price = 1 + 2; /* token + 16-bit offset */ + assert(litlen >= 0); + assert(mlen >= MINMATCH); - price += LZ4HC_literalsPrice(litlen); + price += LZ4HC_literalsPrice(litlen); - if (mlen >= (int)(ML_MASK+MINMATCH)) - price += 1 + ((mlen-(int)(ML_MASK+MINMATCH)) / 255); + if (mlen >= (int)(ML_MASK + MINMATCH)) + price += 1 + ((mlen - (int)(ML_MASK + MINMATCH)) / 255); - return price; + return price; } -LZ4_FORCE_INLINE LZ4HC_match_t -LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, - const BYTE* ip, const BYTE* const iHighLimit, - int minLen, int nbSearches, - const dictCtx_directive dict, - const HCfavor_e favorDecSpeed) +LZ4_FORCE_INLINE LZ4HC_match_t LZ4HC_FindLongerMatch( + LZ4HC_CCtx_internal *const ctx, const BYTE *ip, + const BYTE *const iHighLimit, int minLen, int nbSearches, + const dictCtx_directive dict, const HCfavor_e favorDecSpeed) { - LZ4HC_match_t const match0 = { 0 , 0, 0 }; - /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + LZ4HC_match_t const match0 = { 0, 0, 0 }; + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), * but this won't be the case here, as we define iLowLimit==ip, ** so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ - LZ4HC_match_t md = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed); - assert(md.back == 0); - if (md.len <= minLen) return match0; - if (favorDecSpeed) { - if ((md.len>18) & (md.len<=36)) md.len=18; /* favor dec.speed (shortcut) */ - } - return md; + LZ4HC_match_t md = LZ4HC_InsertAndGetWiderMatch( + ctx, ip, ip, iHighLimit, minLen, nbSearches, + 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed); + assert(md.back == 0); + if (md.len <= minLen) + return match0; + if (favorDecSpeed) { + if ((md.len > 18) & (md.len <= 36)) + md.len = 18; /* favor dec.speed (shortcut) */ + } + return md; } - -static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, - const char* const source, - char* dst, - int* srcSizePtr, - int dstCapacity, - int const nbSearches, - size_t sufficient_len, - const limitedOutput_directive limit, - int const fullUpdate, - const dictCtx_directive dict, - const HCfavor_e favorDecSpeed) +static int LZ4HC_compress_optimal(LZ4HC_CCtx_internal *ctx, + const char *const source, char *dst, + int *srcSizePtr, int dstCapacity, + int const nbSearches, size_t sufficient_len, + const limitedOutput_directive limit, + int const fullUpdate, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) { - int retval = 0; + int retval = 0; #define TRAILING_LITERALS 3 -#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 - LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS)); +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE == 1 + LZ4HC_optimal_t *const opt = (LZ4HC_optimal_t *)ALLOC( + sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS)); #else - LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */ + LZ4HC_optimal_t + opt[LZ4_OPT_NUM + + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */ #endif - const BYTE* ip = (const BYTE*) source; - const BYTE* anchor = ip; - const BYTE* const iend = ip + *srcSizePtr; - const BYTE* const mflimit = iend - MFLIMIT; - const BYTE* const matchlimit = iend - LASTLITERALS; - BYTE* op = (BYTE*) dst; - BYTE* opSaved = (BYTE*) dst; - BYTE* oend = op + dstCapacity; - int ovml = MINMATCH; /* overflow - last sequence */ - int ovoff = 0; + const BYTE *ip = (const BYTE *)source; + const BYTE *anchor = ip; + const BYTE *const iend = ip + *srcSizePtr; + const BYTE *const mflimit = iend - MFLIMIT; + const BYTE *const matchlimit = iend - LASTLITERALS; + BYTE *op = (BYTE *)dst; + BYTE *opSaved = (BYTE *)dst; + BYTE *oend = op + dstCapacity; + int ovml = MINMATCH; /* overflow - last sequence */ + int ovoff = 0; - /* init */ -#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 - if (opt == NULL) goto _return_label; + /* init */ +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE == 1 + if (opt == NULL) + goto _return_label; #endif - DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity); - *srcSizePtr = 0; - if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ - if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; + DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, + (unsigned)dstCapacity); + *srcSizePtr = 0; + if (limit == fillOutput) + oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (sufficient_len >= LZ4_OPT_NUM) + sufficient_len = LZ4_OPT_NUM - 1; - /* Main Loop */ - while (ip <= mflimit) { - int const llen = (int)(ip - anchor); - int best_mlen, best_off; - int cur, last_match_pos = 0; + /* Main Loop */ + while (ip <= mflimit) { + int const llen = (int)(ip - anchor); + int best_mlen, best_off; + int cur, last_match_pos = 0; - LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); - if (firstMatch.len==0) { ip++; continue; } + LZ4HC_match_t const firstMatch = + LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH - 1, + nbSearches, dict, favorDecSpeed); + if (firstMatch.len == 0) { + ip++; + continue; + } - if ((size_t)firstMatch.len > sufficient_len) { - /* good enough solution : immediate encoding */ - int const firstML = firstMatch.len; - opSaved = op; - if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, firstMatch.off, limit, oend) ) { /* updates ip, op and anchor */ - ovml = firstML; - ovoff = firstMatch.off; - goto _dest_overflow; - } - continue; - } + if ((size_t)firstMatch.len > sufficient_len) { + /* good enough solution : immediate encoding */ + int const firstML = firstMatch.len; + opSaved = op; + if (LZ4HC_encodeSequence( + UPDATABLE(ip, op, anchor), firstML, + firstMatch.off, limit, + oend)) { /* updates ip, op and anchor */ + ovml = firstML; + ovoff = firstMatch.off; + goto _dest_overflow; + } + continue; + } - /* set prices for first positions (literals) */ - { int rPos; - for (rPos = 0 ; rPos < MINMATCH ; rPos++) { - int const cost = LZ4HC_literalsPrice(llen + rPos); - opt[rPos].mlen = 1; - opt[rPos].off = 0; - opt[rPos].litlen = llen + rPos; - opt[rPos].price = cost; - DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", - rPos, cost, opt[rPos].litlen); - } } - /* set prices using initial match */ - { int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ - int const offset = firstMatch.off; - int mlen; - assert(matchML < LZ4_OPT_NUM); - for (mlen = MINMATCH ; mlen <= matchML ; mlen++) { - int const cost = LZ4HC_sequencePrice(llen, mlen); - opt[mlen].mlen = mlen; - opt[mlen].off = offset; - opt[mlen].litlen = llen; - opt[mlen].price = cost; - DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", - mlen, cost, mlen); - } } - last_match_pos = firstMatch.len; - { int addLit; - for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { - opt[last_match_pos+addLit].mlen = 1; /* literal */ - opt[last_match_pos+addLit].off = 0; - opt[last_match_pos+addLit].litlen = addLit; - opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); - DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", - last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); - } } + /* set prices for first positions (literals) */ + { + int rPos; + for (rPos = 0; rPos < MINMATCH; rPos++) { + int const cost = + LZ4HC_literalsPrice(llen + rPos); + opt[rPos].mlen = 1; + opt[rPos].off = 0; + opt[rPos].litlen = llen + rPos; + opt[rPos].price = cost; + DEBUGLOG( + 7, + "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + rPos, cost, opt[rPos].litlen); + } + } + /* set prices using initial match */ + { + int const matchML = + firstMatch + .len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ + int const offset = firstMatch.off; + int mlen; + assert(matchML < LZ4_OPT_NUM); + for (mlen = MINMATCH; mlen <= matchML; mlen++) { + int const cost = + LZ4HC_sequencePrice(llen, mlen); + opt[mlen].mlen = mlen; + opt[mlen].off = offset; + opt[mlen].litlen = llen; + opt[mlen].price = cost; + DEBUGLOG( + 7, + "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", + mlen, cost, mlen); + } + } + last_match_pos = firstMatch.len; + { + int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; + addLit++) { + opt[last_match_pos + addLit].mlen = + 1; /* literal */ + opt[last_match_pos + addLit].off = 0; + opt[last_match_pos + addLit].litlen = addLit; + opt[last_match_pos + addLit].price = + opt[last_match_pos].price + + LZ4HC_literalsPrice(addLit); + DEBUGLOG( + 7, + "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + last_match_pos + addLit, + opt[last_match_pos + addLit].price, + addLit); + } + } - /* check further positions */ - for (cur = 1; cur < last_match_pos; cur++) { - const BYTE* const curPtr = ip + cur; - LZ4HC_match_t newMatch; + /* check further positions */ + for (cur = 1; cur < last_match_pos; cur++) { + const BYTE *const curPtr = ip + cur; + LZ4HC_match_t newMatch; - if (curPtr > mflimit) break; - DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", - cur, opt[cur].price, opt[cur+1].price, cur+1); - if (fullUpdate) { - /* not useful to search here if next position has same (or lower) cost */ - if ( (opt[cur+1].price <= opt[cur].price) - /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */ - && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) ) - continue; - } else { - /* not useful to search here if next position has same (or lower) cost */ - if (opt[cur+1].price <= opt[cur].price) continue; - } + if (curPtr > mflimit) + break; + DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", cur, + opt[cur].price, opt[cur + 1].price, cur + 1); + if (fullUpdate) { + /* not useful to search here if next position has same (or lower) cost */ + if ((opt[cur + 1].price <= opt[cur].price) + /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */ + && (opt[cur + MINMATCH].price < + opt[cur].price + 3 /*min seq price*/)) + continue; + } else { + /* not useful to search here if next position has same (or lower) cost */ + if (opt[cur + 1].price <= opt[cur].price) + continue; + } - DEBUGLOG(7, "search at rPos:%u", cur); - if (fullUpdate) - newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); - else - /* only test matches of minimum length; slightly faster, but misses a few bytes */ - newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed); - if (!newMatch.len) continue; + DEBUGLOG(7, "search at rPos:%u", cur); + if (fullUpdate) + newMatch = LZ4HC_FindLongerMatch( + ctx, curPtr, matchlimit, MINMATCH - 1, + nbSearches, dict, favorDecSpeed); + else + /* only test matches of minimum length; slightly faster, but misses a few bytes */ + newMatch = LZ4HC_FindLongerMatch( + ctx, curPtr, matchlimit, + last_match_pos - cur, nbSearches, dict, + favorDecSpeed); + if (!newMatch.len) + continue; - if ( ((size_t)newMatch.len > sufficient_len) - || (newMatch.len + cur >= LZ4_OPT_NUM) ) { - /* immediate encoding */ - best_mlen = newMatch.len; - best_off = newMatch.off; - last_match_pos = cur + 1; - goto encode; - } + if (((size_t)newMatch.len > sufficient_len) || + (newMatch.len + cur >= LZ4_OPT_NUM)) { + /* immediate encoding */ + best_mlen = newMatch.len; + best_off = newMatch.off; + last_match_pos = cur + 1; + goto encode; + } - /* before match : set price with literals at beginning */ - { int const baseLitlen = opt[cur].litlen; - int litlen; - for (litlen = 1; litlen < MINMATCH; litlen++) { - int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen); - int const pos = cur + litlen; - if (price < opt[pos].price) { - opt[pos].mlen = 1; /* literal */ - opt[pos].off = 0; - opt[pos].litlen = baseLitlen+litlen; - opt[pos].price = price; - DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", - pos, price, opt[pos].litlen); - } } } + /* before match : set price with literals at beginning */ + { + int const baseLitlen = opt[cur].litlen; + int litlen; + for (litlen = 1; litlen < MINMATCH; litlen++) { + int const price = + opt[cur].price - + LZ4HC_literalsPrice( + baseLitlen) + + LZ4HC_literalsPrice(baseLitlen + + litlen); + int const pos = cur + litlen; + if (price < opt[pos].price) { + opt[pos].mlen = 1; /* literal */ + opt[pos].off = 0; + opt[pos].litlen = + baseLitlen + litlen; + opt[pos].price = price; + DEBUGLOG( + 7, + "rPos:%3i => price:%3i (litlen=%i)", + pos, price, + opt[pos].litlen); + } + } + } - /* set prices using match at position = cur */ - { int const matchML = newMatch.len; - int ml = MINMATCH; + /* set prices using match at position = cur */ + { + int const matchML = newMatch.len; + int ml = MINMATCH; - assert(cur + newMatch.len < LZ4_OPT_NUM); - for ( ; ml <= matchML ; ml++) { - int const pos = cur + ml; - int const offset = newMatch.off; - int price; - int ll; - DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)", - pos, last_match_pos); - if (opt[cur].mlen == 1) { - ll = opt[cur].litlen; - price = ((cur > ll) ? opt[cur - ll].price : 0) - + LZ4HC_sequencePrice(ll, ml); - } else { - ll = 0; - price = opt[cur].price + LZ4HC_sequencePrice(0, ml); - } + assert(cur + newMatch.len < LZ4_OPT_NUM); + for (; ml <= matchML; ml++) { + int const pos = cur + ml; + int const offset = newMatch.off; + int price; + int ll; + DEBUGLOG( + 7, + "testing price rPos %i (last_match_pos=%i)", + pos, last_match_pos); + if (opt[cur].mlen == 1) { + ll = opt[cur].litlen; + price = ((cur > ll) ? + opt[cur - ll] + .price : + 0) + + LZ4HC_sequencePrice(ll, + ml); + } else { + ll = 0; + price = opt[cur].price + + LZ4HC_sequencePrice(0, + ml); + } - assert((U32)favorDecSpeed <= 1); - if (pos > last_match_pos+TRAILING_LITERALS - || price <= opt[pos].price - (int)favorDecSpeed) { - DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", - pos, price, ml); - assert(pos < LZ4_OPT_NUM); - if ( (ml == matchML) /* last pos of last match */ - && (last_match_pos < pos) ) - last_match_pos = pos; - opt[pos].mlen = ml; - opt[pos].off = offset; - opt[pos].litlen = ll; - opt[pos].price = price; - } } } - /* complete following positions with literals */ - { int addLit; - for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { - opt[last_match_pos+addLit].mlen = 1; /* literal */ - opt[last_match_pos+addLit].off = 0; - opt[last_match_pos+addLit].litlen = addLit; - opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); - DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); - } } - } /* for (cur = 1; cur <= last_match_pos; cur++) */ + assert((U32)favorDecSpeed <= 1); + if (pos > last_match_pos + + TRAILING_LITERALS || + price <= + opt[pos].price - + (int)favorDecSpeed) { + DEBUGLOG( + 7, + "rPos:%3i => price:%3i (matchlen=%i)", + pos, price, ml); + assert(pos < LZ4_OPT_NUM); + if ((ml == + matchML) /* last pos of last match */ + && (last_match_pos < pos)) + last_match_pos = pos; + opt[pos].mlen = ml; + opt[pos].off = offset; + opt[pos].litlen = ll; + opt[pos].price = price; + } + } + } + /* complete following positions with literals */ + { + int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; + addLit++) { + opt[last_match_pos + addLit].mlen = + 1; /* literal */ + opt[last_match_pos + addLit].off = 0; + opt[last_match_pos + addLit].litlen = + addLit; + opt[last_match_pos + addLit].price = + opt[last_match_pos].price + + LZ4HC_literalsPrice(addLit); + DEBUGLOG( + 7, + "rPos:%3i => price:%3i (litlen=%i)", + last_match_pos + addLit, + opt[last_match_pos + addLit] + .price, + addLit); + } + } + } /* for (cur = 1; cur <= last_match_pos; cur++) */ - assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS); - best_mlen = opt[last_match_pos].mlen; - best_off = opt[last_match_pos].off; - cur = last_match_pos - best_mlen; + assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS); + best_mlen = opt[last_match_pos].mlen; + best_off = opt[last_match_pos].off; + cur = last_match_pos - best_mlen; -encode: /* cur, last_match_pos, best_mlen, best_off must be set */ - assert(cur < LZ4_OPT_NUM); - assert(last_match_pos >= 1); /* == 1 when only one candidate */ - DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos); - { int candidate_pos = cur; - int selected_matchLength = best_mlen; - int selected_offset = best_off; - while (1) { /* from end to beginning */ - int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ - int const next_offset = opt[candidate_pos].off; - DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength); - opt[candidate_pos].mlen = selected_matchLength; - opt[candidate_pos].off = selected_offset; - selected_matchLength = next_matchLength; - selected_offset = next_offset; - if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ - assert(next_matchLength > 0); /* can be 1, means literal */ - candidate_pos -= next_matchLength; - } } + encode: /* cur, last_match_pos, best_mlen, best_off must be set */ + assert(cur < LZ4_OPT_NUM); + assert(last_match_pos >= 1); /* == 1 when only one candidate */ + DEBUGLOG( + 6, + "reverse traversal, looking for shortest path (last_match_pos=%i)", + last_match_pos); + { + int candidate_pos = cur; + int selected_matchLength = best_mlen; + int selected_offset = best_off; + while (1) { /* from end to beginning */ + int const next_matchLength = + opt[candidate_pos] + .mlen; /* can be 1, means literal */ + int const next_offset = opt[candidate_pos].off; + DEBUGLOG(7, "pos %i: sequence length %i", + candidate_pos, selected_matchLength); + opt[candidate_pos].mlen = selected_matchLength; + opt[candidate_pos].off = selected_offset; + selected_matchLength = next_matchLength; + selected_offset = next_offset; + if (next_matchLength > candidate_pos) + break; /* last match elected, first match to encode */ + assert(next_matchLength > + 0); /* can be 1, means literal */ + candidate_pos -= next_matchLength; + } + } - /* encode all recorded sequences in order */ - { int rPos = 0; /* relative position (to ip) */ - while (rPos < last_match_pos) { - int const ml = opt[rPos].mlen; - int const offset = opt[rPos].off; - if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ - rPos += ml; - assert(ml >= MINMATCH); - assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX)); - opSaved = op; - if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, offset, limit, oend) ) { /* updates ip, op and anchor */ - ovml = ml; - ovoff = offset; - goto _dest_overflow; - } } } - } /* while (ip <= mflimit) */ + /* encode all recorded sequences in order */ + { + int rPos = 0; /* relative position (to ip) */ + while (rPos < last_match_pos) { + int const ml = opt[rPos].mlen; + int const offset = opt[rPos].off; + if (ml == 1) { + ip++; + rPos++; + continue; + } /* literal; note: can end up with several literals, in which case, skip them */ + rPos += ml; + assert(ml >= MINMATCH); + assert((offset >= 1) && + (offset <= LZ4_DISTANCE_MAX)); + opSaved = op; + if (LZ4HC_encodeSequence( + UPDATABLE(ip, op, anchor), ml, + offset, limit, + oend)) { /* updates ip, op and anchor */ + ovml = ml; + ovoff = offset; + goto _dest_overflow; + } + } + } + } /* while (ip <= mflimit) */ _last_literals: - /* Encode Last Literals */ - { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ - size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; - size_t const totalSize = 1 + llAdd + lastRunSize; - if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ - if (limit && (op + totalSize > oend)) { - if (limit == limitedOutput) { /* Check output limit */ - retval = 0; - goto _return_label; - } - /* adapt lastRunSize to fill 'dst' */ - lastRunSize = (size_t)(oend - op) - 1 /*token*/; - llAdd = (lastRunSize + 256 - RUN_MASK) / 256; - lastRunSize -= llAdd; - } - DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); - ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ + /* Encode Last Literals */ + { + size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; + if (limit == fillOutput) + oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) { /* Check output limit */ + retval = 0; + goto _return_label; + } + /* adapt lastRunSize to fill 'dst' */ + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; + } + DEBUGLOG(6, "Final literal run : %i literals", + (int)lastRunSize); + ip = anchor + + lastRunSize; /* can be != iend if limit==fillOutput */ - if (lastRunSize >= RUN_MASK) { - size_t accumulator = lastRunSize - RUN_MASK; - *op++ = (RUN_MASK << ML_BITS); - for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; - *op++ = (BYTE) accumulator; - } else { - *op++ = (BYTE)(lastRunSize << ML_BITS); - } - LZ4_memcpy(op, anchor, lastRunSize); - op += lastRunSize; - } + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for (; accumulator >= 255; accumulator -= 255) + *op++ = 255; + *op++ = (BYTE)accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + LZ4_memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } - /* End */ - *srcSizePtr = (int) (((const char*)ip) - source); - retval = (int) ((char*)op-dst); - goto _return_label; + /* End */ + *srcSizePtr = (int)(((const char *)ip) - source); + retval = (int)((char *)op - dst); + goto _return_label; _dest_overflow: -if (limit == fillOutput) { - /* Assumption : ip, anchor, ovml and ovref must be set correctly */ - size_t const ll = (size_t)(ip - anchor); - size_t const ll_addbytes = (ll + 240) / 255; - size_t const ll_totalCost = 1 + ll_addbytes + ll; - BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ - DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved)); - op = opSaved; /* restore correct out pointer */ - if (op + ll_totalCost <= maxLitPos) { - /* ll validated; now adjust match length */ - size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); - size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); - assert(maxMlSize < INT_MAX); assert(ovml >= 0); - if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize; - if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) { - DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml); - DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor); - LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovoff, notLimited, oend); - DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor); - } } - goto _last_literals; -} + if (limit == fillOutput) { + /* Assumption : ip, anchor, ovml and ovref must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE *const maxLitPos = + oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG(6, + "Last sequence overflowing (only %i bytes remaining)", + (int)(oend - 1 - opSaved)); + op = opSaved; /* restore correct out pointer */ + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = + (size_t)(maxLitPos - (op + ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK - 1) + + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); + assert(ovml >= 0); + if ((size_t)ovml > maxMlSize) + ovml = (int)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - + 1 + ovml >= + MFLIMIT) { + DEBUGLOG(6, "Space to end : %i + ml (%i)", + (int)((oend + LASTLITERALS) - + (op + ll_totalCost + 2) - 1), + ovml); + DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, + anchor); + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), + ovml, ovoff, notLimited, + oend); + DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, + anchor); + } + } + goto _last_literals; + } _return_label: -#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 - if (opt) FREEMEM(opt); +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE == 1 + if (opt) + FREEMEM(opt); #endif - return retval; + return retval; } - /* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t) * @return : 0 on success, !=0 if error */ -int LZ4_resetStreamStateHC(void* state, char* inputBuffer) +int LZ4_resetStreamStateHC(void *state, char *inputBuffer) { - LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4)); - if (hc4 == NULL) return 1; /* init failed */ - LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); - return 0; + LZ4_streamHC_t *const hc4 = LZ4_initStreamHC(state, sizeof(*hc4)); + if (hc4 == NULL) + return 1; /* init failed */ + LZ4HC_init_internal(&hc4->internal_donotuse, (const BYTE *)inputBuffer); + return 0; } #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) -void* LZ4_createHC (const char* inputBuffer) +void *LZ4_createHC(const char *inputBuffer) { - LZ4_streamHC_t* const hc4 = LZ4_createStreamHC(); - if (hc4 == NULL) return NULL; /* not enough memory */ - LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); - return hc4; + LZ4_streamHC_t *const hc4 = LZ4_createStreamHC(); + if (hc4 == NULL) + return NULL; /* not enough memory */ + LZ4HC_init_internal(&hc4->internal_donotuse, (const BYTE *)inputBuffer); + return hc4; } -int LZ4_freeHC (void* LZ4HC_Data) +int LZ4_freeHC(void *LZ4HC_Data) { - if (!LZ4HC_Data) return 0; /* support free on NULL */ - FREEMEM(LZ4HC_Data); - return 0; + if (!LZ4HC_Data) + return 0; /* support free on NULL */ + FREEMEM(LZ4HC_Data); + return 0; } #endif -int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel) +int LZ4_compressHC2_continue(void *LZ4HC_Data, const char *src, char *dst, + int srcSize, int cLevel) { - return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited); + return LZ4HC_compress_generic( + &((LZ4_streamHC_t *)LZ4HC_Data)->internal_donotuse, src, dst, + &srcSize, 0, cLevel, notLimited); } -int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel) +int LZ4_compressHC2_limitedOutput_continue(void *LZ4HC_Data, const char *src, + char *dst, int srcSize, + int dstCapacity, int cLevel) { - return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput); + return LZ4HC_compress_generic( + &((LZ4_streamHC_t *)LZ4HC_Data)->internal_donotuse, src, dst, + &srcSize, dstCapacity, cLevel, limitedOutput); } -char* LZ4_slideInputBufferHC(void* LZ4HC_Data) +char *LZ4_slideInputBufferHC(void *LZ4HC_Data) { - LZ4HC_CCtx_internal* const s = &((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse; - const BYTE* const bufferStart = s->prefixStart - s->dictLimit + s->lowLimit; - LZ4_resetStreamHC_fast((LZ4_streamHC_t*)LZ4HC_Data, s->compressionLevel); - /* ugly conversion trick, required to evade (const char*) -> (char*) cast-qual warning :( */ - return (char*)(uptrval)bufferStart; + LZ4HC_CCtx_internal *const s = + &((LZ4_streamHC_t *)LZ4HC_Data)->internal_donotuse; + const BYTE *const bufferStart = + s->prefixStart - s->dictLimit + s->lowLimit; + LZ4_resetStreamHC_fast((LZ4_streamHC_t *)LZ4HC_Data, + s->compressionLevel); + /* ugly conversion trick, required to evade (const char*) -> (char*) cast-qual warning :( */ + return (char *)(uptrval)bufferStart; } diff --git a/lib/lz4/lz4hc.h b/lib/lz4/lz4hc.h index bed6793534c8..92d2aef17b6f 100644 --- a/lib/lz4/lz4hc.h +++ b/lib/lz4/lz4hc.h @@ -34,21 +34,19 @@ #ifndef LZ4_HC_H_19834876238432 #define LZ4_HC_H_19834876238432 -#if defined (__cplusplus) +#if defined(__cplusplus) extern "C" { #endif /* --- Dependency --- */ /* note : lz4hc requires lz4.h/lz4.c for compilation */ -#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */ - +#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */ /* --- Useful constants --- */ -#define LZ4HC_CLEVEL_MIN 2 -#define LZ4HC_CLEVEL_DEFAULT 9 -#define LZ4HC_CLEVEL_OPT_MIN 10 -#define LZ4HC_CLEVEL_MAX 12 - +#define LZ4HC_CLEVEL_MIN 2 +#define LZ4HC_CLEVEL_DEFAULT 9 +#define LZ4HC_CLEVEL_OPT_MIN 10 +#define LZ4HC_CLEVEL_MAX 12 /*-************************************ * Block Compression @@ -63,22 +61,23 @@ extern "C" { * @return : the number of bytes written into 'dst' * or 0 if compression fails. */ -LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel, void *wrkmem) - +LZ4LIB_API int LZ4_compress_HC(const char *src, char *dst, int srcSize, + int dstCapacity, int compressionLevel, + void *wrkmem); /* Note : * Decompression functions are provided within "lz4.h" (BSD license) */ - /*! LZ4_compress_HC_extStateHC() : * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`. * `state` size is provided by LZ4_sizeofStateHC(). * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly). */ LZ4LIB_API int LZ4_sizeofStateHC(void); -LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel); - +LZ4LIB_API int LZ4_compress_HC_extStateHC(void *stateHC, const char *src, + char *dst, int srcSize, + int maxDstSize, int compressionLevel); /*! LZ4_compress_HC_destSize() : v1.9.0+ * Will compress as much data as possible from `src` @@ -88,17 +87,16 @@ LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* * or 0 if compression fails. * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src` */ -LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC, - const char* src, char* dst, - int* srcSizePtr, int targetDstSize, - int compressionLevel); - +LZ4LIB_API int LZ4_compress_HC_destSize(void *stateHC, const char *src, + char *dst, int *srcSizePtr, + int targetDstSize, + int compressionLevel); /*-************************************ * Streaming Compression * Bufferless synchronous API **************************************/ - typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */ +typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */ /*! LZ4_createStreamHC() and LZ4_freeStreamHC() : * These functions create and release memory for LZ4 HC streaming state. @@ -106,8 +104,8 @@ LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC, * A same state can be used multiple times consecutively, * starting with LZ4_resetStreamHC_fast() to start a new stream of blocks. */ -LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void); -LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr); +LZ4LIB_API LZ4_streamHC_t *LZ4_createStreamHC(void); +LZ4LIB_API int LZ4_freeStreamHC(LZ4_streamHC_t *streamHCPtr); /* These functions compress data in successive blocks of any size, @@ -154,12 +152,14 @@ LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr); just by resetting it, using LZ4_resetStreamHC_fast(). */ -LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel); /* v1.9.0+ */ -LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize); +LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t *streamHCPtr, + int compressionLevel); /* v1.9.0+ */ +LZ4LIB_API int LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, + const char *dictionary, int dictSize); -LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, - const char* src, char* dst, - int srcSize, int maxDstSize); +LZ4LIB_API int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, + const char *src, char *dst, int srcSize, + int maxDstSize); /*! LZ4_compress_HC_continue_destSize() : v1.9.0+ * Similar to LZ4_compress_HC_continue(), @@ -171,12 +171,13 @@ LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`. * Note that this function may not consume the entire input. */ -LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr, - const char* src, char* dst, - int* srcSizePtr, int targetDstSize); - -LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize); +LZ4LIB_API int +LZ4_compress_HC_continue_destSize(LZ4_streamHC_t *LZ4_streamHCPtr, + const char *src, char *dst, int *srcSizePtr, + int targetDstSize); +LZ4LIB_API int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer, + int maxDictSize); /*! LZ4_attach_HC_dictionary() : stable since v1.10.0 * This API allows for the efficient re-use of a static dictionary many times. @@ -203,9 +204,8 @@ LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, in * through the lifetime of the stream session. */ LZ4LIB_API void -LZ4_attach_HC_dictionary(LZ4_streamHC_t* working_stream, - const LZ4_streamHC_t* dictionary_stream); - +LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, + const LZ4_streamHC_t *dictionary_stream); /*^********************************************** * !!!!!! STATIC LINKING ONLY !!!!!! @@ -220,39 +220,38 @@ LZ4_attach_HC_dictionary(LZ4_streamHC_t* working_stream, ********************************************************************/ #define LZ4HC_DICTIONARY_LOGSIZE 16 -#define LZ4HC_MAXD (1<= LZ4HC_CLEVEL_OPT_MIN. */ -LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed( - LZ4_streamHC_t* LZ4_streamHCPtr, int favor); +LZ4LIB_STATIC_API void +LZ4_favorDecompressionSpeed(LZ4_streamHC_t *LZ4_streamHCPtr, int favor); /*! LZ4_resetStreamHC_fast() : v1.9.0+ * When an LZ4_streamHC_t is known to be in a internally coherent state, @@ -386,8 +424,8 @@ LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed( * may be passed to this function. However, it will be fully reset, which will * clear any existing history and settings from the context. */ -LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast( - LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); +LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t *LZ4_streamHCPtr, + int compressionLevel); /*! LZ4_compress_HC_extStateHC_fastReset() : * A variant of LZ4_compress_HC_extStateHC(). @@ -400,15 +438,14 @@ LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast( * LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a * call to LZ4_resetStreamHC(). */ -LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset ( - void* state, - const char* src, char* dst, - int srcSize, int dstCapacity, - int compressionLevel); +LZ4LIB_STATIC_API int +LZ4_compress_HC_extStateHC_fastReset(void *state, const char *src, char *dst, + int srcSize, int dstCapacity, + int compressionLevel); -#if defined (__cplusplus) +#if defined(__cplusplus) } #endif -#endif /* LZ4_HC_SLO_098092834 */ -#endif /* LZ4_HC_STATIC_LINKING_ONLY */ +#endif /* LZ4_HC_SLO_098092834 */ +#endif /* LZ4_HC_STATIC_LINKING_ONLY */ From c7699b13a904d03a39cfbe961f55beeeb40a7c0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=98=BF=E8=8F=8C=E2=80=A2=E6=9C=AA=E9=9C=9C?= <799620521@qq.com> Date: Tue, 2 Aug 2022 00:23:53 +0800 Subject: [PATCH 09/85] lib/lz4: Import arm64 V8 ASM lz4 decompression acceleration Change-Id: I3c8dd91df090bb692784a6b7a61c8877b1e1dfba --- lib/lz4/Makefile | 2 + lib/lz4/lz4.c | 87 ++++++++++- lib/lz4/lz4.h | 12 ++ lib/lz4/lz4armv8/lz4accel.c | 48 ++++++ lib/lz4/lz4armv8/lz4accel.h | 56 +++++++ lib/lz4/lz4armv8/lz4armv8.S | 283 ++++++++++++++++++++++++++++++++++++ 6 files changed, 482 insertions(+), 6 deletions(-) create mode 100644 lib/lz4/lz4armv8/lz4accel.c create mode 100644 lib/lz4/lz4armv8/lz4accel.h create mode 100644 lib/lz4/lz4armv8/lz4armv8.S diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile index cc0f596aa108..cce094ffab6a 100644 --- a/lib/lz4/Makefile +++ b/lib/lz4/Makefile @@ -3,3 +3,5 @@ ccflags-y += -O3 \ -DLZ4_FAST_DEC_LOOP=1 obj-y += lz4.o lz4hc.o + +obj-$(CONFIG_ARM64) += $(addprefix lz4armv8/, lz4accel.o lz4armv8.o) diff --git a/lib/lz4/lz4.c b/lib/lz4/lz4.c index bb5c90add31a..ac9179f6ca8d 100644 --- a/lib/lz4/lz4.c +++ b/lib/lz4/lz4.c @@ -2415,8 +2415,9 @@ LZ4_FORCE_INLINE Rvl_t read_variable_length(const BYTE **ip, const BYTE *ilimit, * Note that it is important for performance that this function really get inlined, * in order to remove useless branches during compilation optimization. */ -LZ4_FORCE_INLINE int LZ4_decompress_generic( - const char *const src, char *const dst, int srcSize, +LZ4_FORCE_INLINE int __LZ4_decompress_generic( + const char *const src, char *const dst, const BYTE *ip, BYTE *op, + int srcSize, int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ earlyEnd_directive partialDecoding, /* full, partial */ @@ -2431,11 +2432,9 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( } { - const BYTE *ip = (const BYTE *)src; - const BYTE *const iend = ip + srcSize; + const BYTE *const iend = src + srcSize; - BYTE *op = (BYTE *)dst; - BYTE *const oend = op + outputSize; + BYTE *const oend = dst + outputSize; BYTE *cpy; const BYTE *const dictEnd = @@ -3008,6 +3007,30 @@ LZ4_FORCE_INLINE int LZ4_decompress_generic( /*===== Instantiate the API decoding functions. =====*/ +LZ4_FORCE_INLINE int +LZ4_decompress_generic(const char *const src, char *const dst, int srcSize, + /* + * If endOnInput == endOnInputSize, + * this value is `dstCapacity` + */ + int outputSize, + /* full, partial */ + earlyEnd_directive partialDecoding, + /* noDict, withPrefix64k, usingExtDict */ + dict_directive dict, + /* always <= dst, == dst when no prefix */ + const BYTE *const lowPrefix, + /* only if dict == usingExtDict */ + const BYTE *const dictStart, + /* note : = 0 if noDict */ + const size_t dictSize) +{ + return __LZ4_decompress_generic(src, dst, (const BYTE *)src, + (BYTE *)dst, srcSize, outputSize, + partialDecoding, dict, lowPrefix, + dictStart, dictSize); +} + LZ4_FORCE_O2 int LZ4_decompress_safe(const char *source, char *dest, int compressedSize, int maxDecompressedSize) @@ -3268,6 +3291,58 @@ int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, return result; } +LZ4_FORCE_O2 ssize_t LZ4_arm64_decompress_safe_partial(const void *source, + void *dest, + size_t inputSize, + size_t outputSize, + bool dip) +{ + uint8_t *dstPtr = dest; + const uint8_t *srcPtr = source; + ssize_t ret; + +#ifdef __ARCH_HAS_LZ4_ACCELERATOR + /* Go fast if we can, keeping away from the end of buffers */ + if (outputSize > LZ4_FAST_MARGIN && inputSize > LZ4_FAST_MARGIN && + lz4_decompress_accel_enable()) { + ret = lz4_decompress_asm( + &dstPtr, dest, dest + outputSize - LZ4_FAST_MARGIN, + &srcPtr, source + inputSize - LZ4_FAST_MARGIN, dip); + if (ret) + return -EIO; + } +#endif + /* Finish in safe */ + return __LZ4_decompress_generic(source, dest, srcPtr, dstPtr, inputSize, + outputSize, partial_decode, noDict, + (BYTE *)dest, NULL, 0); +} + +LZ4_FORCE_O2 ssize_t LZ4_arm64_decompress_safe(const void *source, void *dest, + size_t inputSize, + size_t outputSize, bool dip) +{ + uint8_t *dstPtr = dest; + const uint8_t *srcPtr = source; + ssize_t ret; + +#ifdef __ARCH_HAS_LZ4_ACCELERATOR + /* Go fast if we can, keeping away from the end of buffers */ + if (outputSize > LZ4_FAST_MARGIN && inputSize > LZ4_FAST_MARGIN && + lz4_decompress_accel_enable()) { + ret = lz4_decompress_asm( + &dstPtr, dest, dest + outputSize - LZ4_FAST_MARGIN, + &srcPtr, source + inputSize - LZ4_FAST_MARGIN, dip); + if (ret) + return -EIO; + } +#endif + /* Finish in safe */ + return __LZ4_decompress_generic(source, dest, srcPtr, dstPtr, inputSize, + outputSize, decode_full_block, noDict, + (BYTE *)dest, NULL, 0); +} + LZ4_FORCE_O2 int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode, const char *source, char *dest, int originalSize) diff --git a/lib/lz4/lz4.h b/lib/lz4/lz4.h index 052153de37df..d21ce150bc7d 100644 --- a/lib/lz4/lz4.h +++ b/lib/lz4/lz4.h @@ -73,6 +73,8 @@ extern "C" { #include #include +#include "lz4armv8/lz4accel.h" + #define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) /*^*************************************************************** @@ -573,6 +575,16 @@ LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, const char *src, char *dst, int srcSize, int dstCapacity); +LZ4LIB_API ssize_t LZ4_arm64_decompress_safe_partial(const void *source, + void *dest, + size_t inputSize, + size_t outputSize, + bool dip); + +LZ4LIB_API ssize_t LZ4_arm64_decompress_safe(const void *source, void *dest, + size_t inputSize, + size_t outputSize, bool dip); + /*! LZ4_decompress_safe_usingDict() : * Works the same as * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_safe_continue() diff --git a/lib/lz4/lz4armv8/lz4accel.c b/lib/lz4/lz4armv8/lz4accel.c new file mode 100644 index 000000000000..e7144b4a2295 --- /dev/null +++ b/lib/lz4/lz4armv8/lz4accel.c @@ -0,0 +1,48 @@ +#include "lz4accel.h" +#include + +#ifdef CONFIG_CFI_CLANG +static inline int +__cfi_lz4_decompress_asm(uint8_t **dst_ptr, uint8_t *dst_begin, + uint8_t *dst_end, const uint8_t **src_ptr, + const uint8_t *src_end, bool dip) +{ + return _lz4_decompress_asm(dst_ptr, dst_begin, dst_end, + src_ptr, src_end, dip); +} + +static inline int +__cfi_lz4_decompress_asm_noprfm(uint8_t **dst_ptr, uint8_t *dst_begin, + uint8_t *dst_end, const uint8_t **src_ptr, + const uint8_t *src_end, bool dip) +{ + return _lz4_decompress_asm_noprfm(dst_ptr, dst_begin, dst_end, + src_ptr, src_end, dip); +} + +#define _lz4_decompress_asm __cfi_lz4_decompress_asm +#define _lz4_decompress_asm_noprfm __cfi_lz4_decompress_asm_noprfm +#endif + +int lz4_decompress_asm_select(uint8_t **dst_ptr, uint8_t *dst_begin, + uint8_t *dst_end, const uint8_t **src_ptr, + const uint8_t *src_end, bool dip) { + const unsigned i = smp_processor_id(); + + switch(read_cpuid_part_number()) { + case ARM_CPU_PART_CORTEX_A53: + lz4_decompress_asm_fn[i] = _lz4_decompress_asm_noprfm; + return _lz4_decompress_asm_noprfm(dst_ptr, dst_begin, dst_end, + src_ptr, src_end, dip); + } + lz4_decompress_asm_fn[i] = _lz4_decompress_asm; + return _lz4_decompress_asm(dst_ptr, dst_begin, dst_end, + src_ptr, src_end, dip); +} + +int (*lz4_decompress_asm_fn[NR_CPUS])(uint8_t **dst_ptr, uint8_t *dst_begin, + uint8_t *dst_end, const uint8_t **src_ptr, + const uint8_t *src_end, bool dip) +__read_mostly = { + [0 ... NR_CPUS-1] = lz4_decompress_asm_select, +}; diff --git a/lib/lz4/lz4armv8/lz4accel.h b/lib/lz4/lz4armv8/lz4accel.h new file mode 100644 index 000000000000..a8862c44a1d8 --- /dev/null +++ b/lib/lz4/lz4armv8/lz4accel.h @@ -0,0 +1,56 @@ +#include +#include + +#define LZ4_FAST_MARGIN (128) + +#if defined(CONFIG_ARM64) && defined(CONFIG_KERNEL_MODE_NEON) +#include +#include + +asmlinkage int _lz4_decompress_asm(uint8_t **dst_ptr, uint8_t *dst_begin, + uint8_t *dst_end, const uint8_t **src_ptr, + const uint8_t *src_end, bool dip); + +asmlinkage int _lz4_decompress_asm_noprfm(uint8_t **dst_ptr, uint8_t *dst_begin, + uint8_t *dst_end, const uint8_t **src_ptr, + const uint8_t *src_end, bool dip); + +static inline int lz4_decompress_accel_enable(void) +{ + return may_use_simd(); +} + +extern int (*lz4_decompress_asm_fn[])(uint8_t **dst_ptr, uint8_t *dst_begin, + uint8_t *dst_end, const uint8_t **src_ptr, + const uint8_t *src_end, bool dip); + +static inline ssize_t lz4_decompress_asm( + uint8_t **dst_ptr, uint8_t *dst_begin, uint8_t *dst_end, + const uint8_t **src_ptr, const uint8_t *src_end, bool dip) +{ + int ret; + + kernel_neon_begin(); + ret = lz4_decompress_asm_fn[smp_processor_id()](dst_ptr, dst_begin, + dst_end, src_ptr, + src_end, dip); + kernel_neon_end(); + return (ssize_t)ret; +} + +#define __ARCH_HAS_LZ4_ACCELERATOR + +#else + +static inline int lz4_decompress_accel_enable(void) +{ + return 0; +} + +static inline ssize_t lz4_decompress_asm( + uint8_t **dst_ptr, uint8_t *dst_begin, uint8_t *dst_end, + const uint8_t **src_ptr, const uint8_t *src_end, bool dip) +{ + return 0; +} +#endif diff --git a/lib/lz4/lz4armv8/lz4armv8.S b/lib/lz4/lz4armv8/lz4armv8.S new file mode 100644 index 000000000000..5f134fd42041 --- /dev/null +++ b/lib/lz4/lz4armv8/lz4armv8.S @@ -0,0 +1,283 @@ +/* + * lz4armv8.S + * LZ4 decompression optimization based on arm64 NEON instruction + */ + +#include +#include + +/** + * _lz4_decompress_asm: The fast LZ4 decompression, lz4 decompression algothrim asm + * routine,support Huawei EROFS filesystem striving for maximum decompression speed. + * Entry point _lz4_decompress_asm. + * @para: + * x0 = current destination address ptr + * x1 = destination start position + * x2 = destination end position + * x3 = current source address ptr + * x4 = source end position + * x5 = flag for DIP + * @ret: + * 0 on success, -1 on failure + * + * x7: match_length + * x8: literal_legth + * x9: copy start ptr + * x10: copy end ptr + */ + + +#define match_length x7 +#define literal_length x8 +#define copy_from_ptr x9 /* copy source ptr*/ +#define copy_to_ptr x10 /* copy destination ptr*/ +#define w_tmp w11 /* temp var */ +#define tmp x11 +#define w_offset w12 +#define offset x12 +#define permtable_addr x13 +#define cplen_table_addr x14 +#define save_dst x15 +#define save_src x16 +#define offset_src_ptr x17 +#define w_tmp_match_length w18 +#define tmp_match_length x18 + + +/* x3 >= x4 src overflow */ +.macro check_src_overflow + cmp x3, x4 + b.hs Done +.endm + +.macro check_src_overflow1 + cmp x3, x4 + b.hs Done1 +.endm +/* x0 >= x2 dst overflow */ +.macro check_dst_overflow + cmp x0, x2 + b.hs Done +.endm + +.macro check_dst_overflow1 + cmp x0, x2 + b.hs Done1 +.endm + +.altmacro +.macro lz4_decompress_asm_generic doprfm=1 + stp x29, x30, [sp, #-16]! + mov x29, sp + stp x3, x0, [sp, #-16]! /* push src and dst in stack */ + ldr x3, [x3] /* x3 = *src_ptr */ + ldr x0, [x0] /* x0 = *dst_ptr */ + adr permtable_addr, Permtable + adr cplen_table_addr, Copylength_table + + /* + * save current dst and src ,ensure when return from asm routine + * current both of "dst" and "src" save good position. + */ +1: mov save_dst, x0 + mov save_src, x3 + + check_dst_overflow + check_src_overflow + +.if \doprfm + add tmp, x0, #512 + cmp x2, tmp + b.ls 2f + prfm pstl2strm,[x0,#512] +.endif + + /* Decode Token Byte: */ +2: ldrb w_tmp, [x3], #1 /* read Token Byte */ + lsr literal_length, tmp, #4 /* get literal_length */ + and tmp_match_length, tmp, #0xf /* get match_length */ + add match_length, tmp_match_length, #4 /* match_length >=4 */ + + /* + * literal_length <= 14 : no more literal length byte,fllowing zero + * or more bytes are liteal bytes. + */ + cmp literal_length, #14 + b.ls 5f + + /* + * literal_length == 15 : more literal length bytes after TokenByte. + * continue decoding more literal length bytes. + */ + +3: check_src_overflow + ldrb w_tmp, [x3], #1 + add literal_length, literal_length, tmp + cmp tmp, #255 + b.eq 3b + +/* literal copy */ + +4: mov copy_from_ptr, x3 + mov copy_to_ptr, x0 + add x3, x3, literal_length + add x0, x0, literal_length + check_dst_overflow + check_src_overflow + +4: ldr q0, [copy_from_ptr], #16 + str q0, [copy_to_ptr], #16 + + cmp x0, copy_to_ptr + b.ls 6f + b 4b + +5: ldr q0, [x3] + str q0, [x0] + add x3, x3, literal_length + add x0, x0, literal_length + + /* Decode offset and match_length */ +6: mov offset_src_ptr, x3 + ldrh w_offset, [x3], #2 /* 2Byte:offset bytes */ + cbz offset, Failed /* match_length == 0 is invalid */ + sub copy_from_ptr, x0, offset + cmp copy_from_ptr, x1 + b.lo Failed + mov copy_to_ptr, x0 + /* + * set x0 to the end of "match copy"; + */ + add x0, x0, match_length + cmp match_length, #19 + b.lo 8f + /* + * continue decoding more match length bytes. + */ + +7: check_src_overflow1 + ldrb w_tmp, [x3], #1 + add x0, x0, tmp + add match_length, match_length, tmp + cmp tmp, #255 + b.eq 7b + + /* + * here got the matchlength,start "match copy". + */ + +8: check_dst_overflow1 + cmp offset , match_length + b.hs 13f + +9: cmp offset , #32 + b.hs 13f + +10: ldr q1, [copy_from_ptr] + add tmp, permtable_addr, offset, lsl #5 + ldp q2, q3, [tmp] + tbl v0.16b, {v1.16b}, v2.16b + tbl v1.16b, {v1.16b}, v3.16b + cmp offset , #16 + b.lo 11f + ldp q0, q1, [copy_from_ptr] + +11: ldrb w_tmp, [cplen_table_addr, offset] + stp q0, q1, [copy_to_ptr] + add copy_to_ptr, copy_to_ptr, tmp + cmp x0, copy_to_ptr + b.ls 1b + +12: stp q0, q1, [copy_to_ptr] + add copy_to_ptr, copy_to_ptr, tmp + stp q0, q1, [copy_to_ptr] + add copy_to_ptr, copy_to_ptr, tmp + cmp x0, copy_to_ptr + b.hi 12b + b 1b + +/* offset >= match */ + +13: ldr q0, [copy_from_ptr], #16 + str q0, [copy_to_ptr], #16 + + cmp x0, copy_to_ptr + b.ls 1b + +14: ldp q0, q1, [copy_from_ptr], #32 + stp q0, q1, [copy_to_ptr], #32 + + cmp x0, copy_to_ptr + b.hi 14b + b 1b +.endm + +.text +.p2align 4 + +ENTRY(_lz4_decompress_asm) + lz4_decompress_asm_generic +ENDPROC(_lz4_decompress_asm) + +Failed: + mov tmp, #-1 + b Exit_here + +Done1: + cbz x5, Done + sub save_src, offset_src_ptr, #1 + strb w_tmp_match_length, [save_src] + add save_dst,save_dst,literal_length +Done: + mov tmp, #0 + +Exit_here: + ldp x3, x0, [sp], #16 + str save_src, [x3] + str save_dst, [x0] + mov x0, tmp + ldp x29, x30, [sp], #16 + ret x30 + + +/* + * In case of offset <= 31 < matchlength ,expand the pattern and store in + * repeating pattern size(RPS),store the RPS in Copylength_table. + * case 1): 1 <= offset <= 15 + * expand the pattern according to the Permtable and store their repeating pattern in q0 q1; + * RPS = 32 - (32 % offset) offset <= 31 + * case 2): offset >= 16 + * read the pattern and store in q0 q1. + * RPS = offset. + */ +.text +.p2align 8 +Permtable: +.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 //offset = 0 +.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 //offset = 1 +.byte 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 //offset = 2 +.byte 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1 //offset = 3 +.byte 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 //offset = 4 +.byte 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1 //offset = 5 +.byte 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1 //offset = 6 +.byte 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3 //offset = 7 +.byte 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7 //offset = 8 +.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4 //offset = 9 +.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1 //offset = 10 +.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 //offset = 11 +.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 0, 1, 2, 3, 4, 5, 6, 7 //offset = 12 +.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12, 0, 1, 2, 3, 4, 5 //offset = 13 +.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0, 1, 2, 3 //offset = 14 +.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, 0, 1 //offset = 15 + +.p2align 8 +Copylength_table: +.byte 32,32,32,30,32,30,30,28,32,27,30,22,24,26,28,30 // 0 .. 15 +.byte 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 // 16 .. 31 + + +.text +.p2align 4 +ENTRY(_lz4_decompress_asm_noprfm) + lz4_decompress_asm_generic 0 +ENDPROC(_lz4_decompress_asm_noprfm) From 8a9ee3a4b1ce98ea1de444bf80f8d83e3c1d2766 Mon Sep 17 00:00:00 2001 From: Dark-Matter7232 Date: Mon, 17 Jun 2024 11:57:19 +0500 Subject: [PATCH 10/85] lz4armv8: Update assembly instructions from Huawei kernel drop Signed-off-by: Dark-Matter7232 [Tashar02: Fragment from original commit, improve indentations and reword commit message] Signed-off-by: Tashfin Shakeer Rhythm --- lib/lz4/lz4armv8/lz4armv8.S | 125 ++++++++++++++++++++++-------------- 1 file changed, 77 insertions(+), 48 deletions(-) diff --git a/lib/lz4/lz4armv8/lz4armv8.S b/lib/lz4/lz4armv8/lz4armv8.S index 5f134fd42041..1d836f686dd9 100644 --- a/lib/lz4/lz4armv8/lz4armv8.S +++ b/lib/lz4/lz4armv8/lz4armv8.S @@ -5,6 +5,7 @@ #include #include +#include /** * _lz4_decompress_asm: The fast LZ4 decompression, lz4 decompression algothrim asm @@ -40,9 +41,13 @@ #define save_dst x15 #define save_src x16 #define offset_src_ptr x17 +#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0) +#define w_tmp_match_length w6 +#define tmp_match_length x6 +#else #define w_tmp_match_length w18 #define tmp_match_length x18 - +#endif /* x3 >= x4 src overflow */ .macro check_src_overflow @@ -69,17 +74,19 @@ .macro lz4_decompress_asm_generic doprfm=1 stp x29, x30, [sp, #-16]! mov x29, sp - stp x3, x0, [sp, #-16]! /* push src and dst in stack */ + stp x3, x0, [sp, #-16]! /* push src and dst in stack */ ldr x3, [x3] /* x3 = *src_ptr */ ldr x0, [x0] /* x0 = *dst_ptr */ - adr permtable_addr, Permtable - adr cplen_table_addr, Copylength_table + adr_l permtable_addr, Permtable + adr_l cplen_table_addr, Copylength_table +1: /* + * Lz4_decompress_begin: * save current dst and src ,ensure when return from asm routine * current both of "dst" and "src" save good position. */ -1: mov save_dst, x0 + mov save_dst, x0 mov save_src, x3 check_dst_overflow @@ -92,10 +99,11 @@ prfm pstl2strm,[x0,#512] .endif - /* Decode Token Byte: */ -2: ldrb w_tmp, [x3], #1 /* read Token Byte */ - lsr literal_length, tmp, #4 /* get literal_length */ - and tmp_match_length, tmp, #0xf /* get match_length */ +2: + /* Decode_token: */ + ldrb w_tmp, [x3], #1 /* read Token Byte */ + lsr literal_length, tmp, #4 /* get literal_length */ + and tmp_match_length, tmp, #0xf /* get match_length */ add match_length, tmp_match_length, #4 /* match_length >=4 */ /* @@ -103,43 +111,51 @@ * or more bytes are liteal bytes. */ cmp literal_length, #14 - b.ls 5f + b.ls 6f /* * literal_length == 15 : more literal length bytes after TokenByte. * continue decoding more literal length bytes. */ - -3: check_src_overflow +3: + /* Get_literal_length: */ + check_src_overflow ldrb w_tmp, [x3], #1 add literal_length, literal_length, tmp cmp tmp, #255 b.eq 3b -/* literal copy */ - -4: mov copy_from_ptr, x3 + /* literal copy */ +4: + /* Copy_long_literal_hs_15: */ + mov copy_from_ptr, x3 mov copy_to_ptr, x0 add x3, x3, literal_length add x0, x0, literal_length check_dst_overflow check_src_overflow -4: ldr q0, [copy_from_ptr], #16 +5: + /* Copy_long_literal_loop: */ + ldr q0, [copy_from_ptr], #16 str q0, [copy_to_ptr], #16 cmp x0, copy_to_ptr - b.ls 6f - b 4b + b.ls 7f + b 5b -5: ldr q0, [x3] +6: + /* Copy_literal_lt_15: */ + ldr q0, [x3] str q0, [x0] add x3, x3, literal_length add x0, x0, literal_length /* Decode offset and match_length */ -6: mov offset_src_ptr, x3 - ldrh w_offset, [x3], #2 /* 2Byte:offset bytes */ +7: + /* Decode_offset_matchlength: */ + mov offset_src_ptr, x3 + ldrh w_offset, [x3], #2 /* 2Byte: offset bytes */ cbz offset, Failed /* match_length == 0 is invalid */ sub copy_from_ptr, x0, offset cmp copy_from_ptr, x1 @@ -150,76 +166,90 @@ */ add x0, x0, match_length cmp match_length, #19 - b.lo 8f + b.lo 9f + /* * continue decoding more match length bytes. */ - -7: check_src_overflow1 +8: + /* Get_long_matchlength: */ + check_src_overflow1 ldrb w_tmp, [x3], #1 add x0, x0, tmp add match_length, match_length, tmp cmp tmp, #255 - b.eq 7b + b.eq 8b /* * here got the matchlength,start "match copy". */ - -8: check_dst_overflow1 +9: + /* Copy_match_begin: */ + check_dst_overflow1 cmp offset , match_length - b.hs 13f + b.hs 14f -9: cmp offset , #32 - b.hs 13f +10: + /* Cond_offset_lt_matchlength: */ + cmp offset , #32 + b.hs 14f -10: ldr q1, [copy_from_ptr] +11: + /* Copy_offset_lt_32: */ + ldr q1, [copy_from_ptr] add tmp, permtable_addr, offset, lsl #5 ldp q2, q3, [tmp] tbl v0.16b, {v1.16b}, v2.16b tbl v1.16b, {v1.16b}, v3.16b cmp offset , #16 - b.lo 11f + b.lo 12f ldp q0, q1, [copy_from_ptr] -11: ldrb w_tmp, [cplen_table_addr, offset] +12: + /* Copy_match_perm: */ + ldrb w_tmp, [cplen_table_addr, offset] stp q0, q1, [copy_to_ptr] add copy_to_ptr, copy_to_ptr, tmp cmp x0, copy_to_ptr b.ls 1b -12: stp q0, q1, [copy_to_ptr] +13: + /* Copy_offset_lt_32_loop: */ + stp q0, q1, [copy_to_ptr] add copy_to_ptr, copy_to_ptr, tmp stp q0, q1, [copy_to_ptr] add copy_to_ptr, copy_to_ptr, tmp cmp x0, copy_to_ptr - b.hi 12b + b.hi 13b b 1b -/* offset >= match */ - -13: ldr q0, [copy_from_ptr], #16 + /* offset >= match */ +14: + /* Cond_offset_ge_matchlength: */ + ldr q0, [copy_from_ptr], #16 str q0, [copy_to_ptr], #16 cmp x0, copy_to_ptr b.ls 1b -14: ldp q0, q1, [copy_from_ptr], #32 +15: + /* Copy_offset_ge_match_loop: */ + ldp q0, q1, [copy_from_ptr], #32 stp q0, q1, [copy_to_ptr], #32 cmp x0, copy_to_ptr - b.hi 14b + b.hi 15b b 1b .endm .text .p2align 4 -ENTRY(_lz4_decompress_asm) +SYM_FUNC_START(_lz4_decompress_asm) lz4_decompress_asm_generic -ENDPROC(_lz4_decompress_asm) +SYM_FUNC_END(_lz4_decompress_asm) -Failed: +SYM_INNER_LABEL(Failed, SYM_L_LOCAL) mov tmp, #-1 b Exit_here @@ -239,7 +269,6 @@ Exit_here: ldp x29, x30, [sp], #16 ret x30 - /* * In case of offset <= 31 < matchlength ,expand the pattern and store in * repeating pattern size(RPS),store the RPS in Copylength_table. @@ -250,7 +279,7 @@ Exit_here: * read the pattern and store in q0 q1. * RPS = offset. */ -.text +.pushsection ".rodata", "a" .p2align 8 Permtable: .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 //offset = 0 @@ -274,10 +303,10 @@ Permtable: Copylength_table: .byte 32,32,32,30,32,30,30,28,32,27,30,22,24,26,28,30 // 0 .. 15 .byte 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 // 16 .. 31 - +.popsection .text .p2align 4 -ENTRY(_lz4_decompress_asm_noprfm) +SYM_FUNC_START(_lz4_decompress_asm_noprfm) lz4_decompress_asm_generic 0 -ENDPROC(_lz4_decompress_asm_noprfm) +SYM_FUNC_END(_lz4_decompress_asm_noprfm) From a4518203d407dcf35ab4067e7ab8c2f2f92d8a1f Mon Sep 17 00:00:00 2001 From: Tashfin Shakeer Rhythm Date: Wed, 22 Feb 2023 19:09:38 +0600 Subject: [PATCH 11/85] lz4: Use ARM64 v8 ASM to accelerate lz4 decompression Signed-off-by: Tashfin Shakeer Rhythm --- lib/lz4/lz4.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib/lz4/lz4.c b/lib/lz4/lz4.c index ac9179f6ca8d..d754ec1c324c 100644 --- a/lib/lz4/lz4.c +++ b/lib/lz4/lz4.c @@ -3251,8 +3251,13 @@ int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode, if (lz4sd->prefixSize == 0) { /* The first call, no dictionary yet. */ assert(lz4sd->extDictSize == 0); +#if defined(CONFIG_ARM64) && defined(CONFIG_KERNEL_MODE_NEON) + result = LZ4_arm64_decompress_safe(source, dest, compressedSize, + maxOutputSize, false); +#else result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); +#endif if (result <= 0) return result; lz4sd->prefixSize = (size_t)result; From 5c71eb07fb993ceff4c842cac0d832dc19579151 Mon Sep 17 00:00:00 2001 From: EmanuelCN Date: Sun, 13 Apr 2025 21:12:17 +0300 Subject: [PATCH 12/85] lz4: Rename conflicting macro Rename current to curr because lz4accel.h imports asm/current.h indirectly which defines current as get_current(). --- lib/lz4/lz4.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/lib/lz4/lz4.c b/lib/lz4/lz4.c index d754ec1c324c..23ac1387b397 100644 --- a/lib/lz4/lz4.c +++ b/lib/lz4/lz4.c @@ -1142,10 +1142,10 @@ LZ4_FORCE_INLINE int LZ4_compress_generic_validated( int searchMatchNb = acceleration << LZ4_skipTrigger; do { U32 const h = forwardH; - U32 const current = (U32)(forwardIp - base); + U32 const curr = (U32)(forwardIp - base); U32 matchIndex = LZ4_getIndexOnHash( h, cctx->hashTable, tableType); - assert(matchIndex <= current); + assert(matchIndex <= curr); assert(forwardIp - base < (ptrdiff_t)(2 GB - 1)); ip = forwardIp; @@ -1192,29 +1192,29 @@ LZ4_FORCE_INLINE int LZ4_compress_generic_validated( } forwardH = LZ4_hashPosition(forwardIp, tableType); - LZ4_putIndexOnHash(current, h, cctx->hashTable, + LZ4_putIndexOnHash(curr, h, cctx->hashTable, tableType); DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", - matchIndex, current - matchIndex); + matchIndex, curr - matchIndex); if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */ - assert(matchIndex < current); + assert(matchIndex < curr); if (((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX)) && - (matchIndex + LZ4_DISTANCE_MAX < current)) { + (matchIndex + LZ4_DISTANCE_MAX < curr)) { continue; } /* too far */ - assert((current - matchIndex) <= + assert((curr - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */ if (LZ4_read32(match) == LZ4_read32(ip)) { if (maybe_extMem) - offset = current - matchIndex; + offset = curr - matchIndex; break; /* match found */ } @@ -1438,10 +1438,10 @@ LZ4_FORCE_INLINE int LZ4_compress_generic_validated( } else { /* byU32, byU16 */ U32 const h = LZ4_hashPosition(ip, tableType); - U32 const current = (U32)(ip - base); + U32 const curr = (U32)(ip - base); U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); - assert(matchIndex < current); + assert(matchIndex < curr); if (dictDirective == usingDictCtx) { if (matchIndex < startIndex) { /* there was no match, try the dictionary */ @@ -1471,9 +1471,9 @@ LZ4_FORCE_INLINE int LZ4_compress_generic_validated( } else { /* single memory segment */ match = base + matchIndex; } - LZ4_putIndexOnHash(current, h, cctx->hashTable, + LZ4_putIndexOnHash(curr, h, cctx->hashTable, tableType); - assert(matchIndex < current); + assert(matchIndex < curr); if (((dictIssue == dictSmall) ? (matchIndex >= prefixIdxLimit) : 1) && @@ -1481,12 +1481,12 @@ LZ4_FORCE_INLINE int LZ4_compress_generic_validated( (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex + LZ4_DISTANCE_MAX >= - current)) && + curr)) && (LZ4_read32(match) == LZ4_read32(ip))) { token = op++; *token = 0; if (maybe_extMem) - offset = current - matchIndex; + offset = curr - matchIndex; DEBUGLOG( 6, "seq.start:%i, literals=%u, match.start:%i", From d51b289108110dd366208fcb9b5e3bf9f2d4e4ab Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Mon, 14 Apr 2025 16:12:40 -0700 Subject: [PATCH 13/85] lz4: define LZ4HC_DEFAULT_CLEVEL for compatibility Signed-off-by: Juhyung Park Signed-off-by: Tashfin Shakeer Rhythm --- include/linux/lz4.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/lz4.h b/include/linux/lz4.h index 580a9caa5cc7..025b2a0cc125 100644 --- a/include/linux/lz4.h +++ b/include/linux/lz4.h @@ -11,6 +11,7 @@ #define LZ4HC_MEM_COMPRESS LZ4_STREAMHC_MINSIZE #define LZ4HC_MIN_CLEVEL LZ4HC_CLEVEL_MIN +#define LZ4HC_DEFAULT_CLEVEL LZ4HC_CLEVEL_DEFAULT #define LZ4HC_MAX_CLEVEL LZ4HC_CLEVEL_MAX #endif From e0792f4bb3483d067c8adb7eff132a1a34214dfe Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Mon, 14 Apr 2025 16:13:04 -0700 Subject: [PATCH 14/85] lz4: move LZ4_ACCELERATION_* macros to lz4.h zram uses this. Signed-off-by: Juhyung Park Signed-off-by: Tashfin Shakeer Rhythm --- lib/lz4/lz4.c | 12 ------------ lib/lz4/lz4.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/lib/lz4/lz4.c b/lib/lz4/lz4.c index 23ac1387b397..92c447437cac 100644 --- a/lib/lz4/lz4.c +++ b/lib/lz4/lz4.c @@ -45,18 +45,6 @@ #define LZ4_HEAPMODE 0 #endif -/* - * LZ4_ACCELERATION_DEFAULT : - * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 - */ -#define LZ4_ACCELERATION_DEFAULT 1 -/* - * LZ4_ACCELERATION_MAX : - * Any "acceleration" value higher than this threshold - * get treated as LZ4_ACCELERATION_MAX instead (fix #876) - */ -#define LZ4_ACCELERATION_MAX 65537 - /*-************************************ * CPU Feature Detection **************************************/ diff --git a/lib/lz4/lz4.h b/lib/lz4/lz4.h index d21ce150bc7d..cf488fbe8d93 100644 --- a/lib/lz4/lz4.h +++ b/lib/lz4/lz4.h @@ -196,6 +196,18 @@ LZ4LIB_API const char *LZ4_versionString( #error "LZ4_MEMORY_USAGE is too large !" #endif +/* + * LZ4_ACCELERATION_DEFAULT : + * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 + */ +#define LZ4_ACCELERATION_DEFAULT 1 +/* + * LZ4_ACCELERATION_MAX : + * Any "acceleration" value higher than this threshold + * get treated as LZ4_ACCELERATION_MAX instead (fix #876) + */ +#define LZ4_ACCELERATION_MAX 65537 + /*-************************************ * Simple Functions **************************************/ From b9b4b69b95ba69731c5a06b99338f59bf0b495ab Mon Sep 17 00:00:00 2001 From: Juhyung Park Date: Mon, 14 Apr 2025 16:13:49 -0700 Subject: [PATCH 15/85] lz4: fix LZ4_compress_fast() definition LZ4_compress_fast() should be exported with wrkmem. Signed-off-by: Juhyung Park Signed-off-by: Tashfin Shakeer Rhythm --- lib/lz4/lz4.c | 21 +++------------------ lib/lz4/lz4.h | 2 +- 2 files changed, 4 insertions(+), 19 deletions(-) diff --git a/lib/lz4/lz4.c b/lib/lz4/lz4.c index 92c447437cac..000c4fa9ceb6 100644 --- a/lib/lz4/lz4.c +++ b/lib/lz4/lz4.c @@ -1700,26 +1700,11 @@ int LZ4_compress_fast_extState_fastReset(void *state, const char *src, } int LZ4_compress_fast(const char *src, char *dest, int srcSize, int dstCapacity, - int acceleration) + int acceleration, void *wrkmem) { - int result; -#if (LZ4_HEAPMODE) - LZ4_stream_t *const ctxPtr = (LZ4_stream_t *)ALLOC(sizeof( - LZ4_stream_t)); /* malloc-calloc always properly aligned */ - if (ctxPtr == NULL) - return 0; -#else - LZ4_stream_t ctx; - LZ4_stream_t *const ctxPtr = &ctx; -#endif - result = LZ4_compress_fast_extState(ctxPtr, src, dest, srcSize, - dstCapacity, acceleration); - -#if (LZ4_HEAPMODE) - FREEMEM(ctxPtr); -#endif - return result; + return LZ4_compress_fast_extState(wrkmem, src, dest, srcSize, dstCapacity, acceleration); } +EXPORT_SYMBOL(LZ4_compress_fast); int LZ4_compress_default(const char *src, char *dst, int srcSize, int dstCapacity, void *wrkmem) diff --git a/lib/lz4/lz4.h b/lib/lz4/lz4.h index cf488fbe8d93..3348f214ee47 100644 --- a/lib/lz4/lz4.h +++ b/lib/lz4/lz4.h @@ -275,7 +275,7 @@ LZ4LIB_API int LZ4_compressBound(int inputSize); Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c). */ LZ4LIB_API int LZ4_compress_fast(const char *src, char *dst, int srcSize, - int dstCapacity, int acceleration); + int dstCapacity, int acceleration, void *wrkmem); /*! LZ4_compress_fast_extState() : * Same as LZ4_compress_fast(), using an externally allocated memory space for its state. From a41b337a431204c8672d618331ed33fc932d5edb Mon Sep 17 00:00:00 2001 From: Samuel Pascua Date: Tue, 19 Aug 2025 06:44:53 +0800 Subject: [PATCH 16/85] lz4: armv8: use old annotations Signed-off-by: Samuel Pascua --- lib/lz4/lz4armv8/lz4armv8.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/lz4/lz4armv8/lz4armv8.S b/lib/lz4/lz4armv8/lz4armv8.S index 1d836f686dd9..a549cd9145e2 100644 --- a/lib/lz4/lz4armv8/lz4armv8.S +++ b/lib/lz4/lz4armv8/lz4armv8.S @@ -245,11 +245,11 @@ .text .p2align 4 -SYM_FUNC_START(_lz4_decompress_asm) +ENTRY(_lz4_decompress_asm) lz4_decompress_asm_generic -SYM_FUNC_END(_lz4_decompress_asm) +ENDPROC(_lz4_decompress_asm) -SYM_INNER_LABEL(Failed, SYM_L_LOCAL) +Failed: mov tmp, #-1 b Exit_here @@ -307,6 +307,6 @@ Copylength_table: .text .p2align 4 -SYM_FUNC_START(_lz4_decompress_asm_noprfm) +ENTRY(_lz4_decompress_asm_noprfm) lz4_decompress_asm_generic 0 -SYM_FUNC_END(_lz4_decompress_asm_noprfm) +ENDPROC(_lz4_decompress_asm_noprfm) From a037977eb009e61c099c55a53e91a5b2b42b3769 Mon Sep 17 00:00:00 2001 From: Shalini Manjunatha Date: Tue, 23 May 2023 11:29:05 +0530 Subject: [PATCH 17/85] BACKPORT: dsp: afe: check for param size before copying Check for the proper param size before copying, to avoid buffer overflow. Original-Change-Id: I70c52e6ab76f528ea3714784ab9013b070839c40 Signed-off-by: Shalini Manjunatha Change-Id: Ic7fa9b3dd047d8eeba3cea02b99d6bc5b9df8daf --- techpack/audio/dsp/q6afe.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/techpack/audio/dsp/q6afe.c b/techpack/audio/dsp/q6afe.c index 0cee4c134aec..05d4828839ff 100644 --- a/techpack/audio/dsp/q6afe.c +++ b/techpack/audio/dsp/q6afe.c @@ -404,24 +404,49 @@ static int32_t sp_make_afe_callback(uint32_t opcode, uint32_t *payload, switch (param_hdr.param_id) { case AFE_PARAM_ID_CALIB_RES_CFG_V2: expected_size += sizeof(struct asm_calib_res_cfg); + if (param_hdr.param_size != sizeof(struct asm_calib_res_cfg)) { + pr_err("%s: Error: param_size %d is greater than expected\n", + __func__,param_hdr.param_size); + return -EINVAL; + } data_dest = (u32 *) &this_afe.calib_data; break; case AFE_PARAM_ID_SP_V2_TH_VI_FTM_PARAMS: expected_size += sizeof(struct afe_sp_th_vi_ftm_params); + if (param_hdr.param_size != sizeof(struct afe_sp_th_vi_ftm_params)) { + pr_err("%s: Error: param_size %d is greater than expected\n", + __func__,param_hdr.param_size); + return -EINVAL; + } data_dest = (u32 *) &this_afe.th_vi_resp; break; case AFE_PARAM_ID_SP_V2_TH_VI_V_VALI_PARAMS: pr_err("%s: got response pkt\n", __func__); expected_size += sizeof(struct afe_sp_th_vi_v_vali_params); + if (param_hdr.param_size != sizeof(struct afe_sp_th_vi_v_vali_params)) { + pr_err("%s: Error: param_size %d is greater than expected\n", + __func__,param_hdr.param_size); + return -EINVAL; + } data_dest = (u32 *) &this_afe.th_vi_v_vali_resp; break; case AFE_PARAM_ID_SP_V2_EX_VI_FTM_PARAMS: expected_size += sizeof(struct afe_sp_ex_vi_ftm_params); + if (param_hdr.param_size != sizeof(struct afe_sp_ex_vi_ftm_params)) { + pr_err("%s: Error: param_size %d is greater than expected\n", + __func__,param_hdr.param_size); + return -EINVAL; + } data_dest = (u32 *) &this_afe.ex_vi_resp; break; case AFE_PARAM_ID_SP_RX_TMAX_XMAX_LOGGING: expected_size += sizeof( struct afe_sp_rx_tmax_xmax_logging_param); + if (param_hdr.param_size != sizeof(struct afe_sp_rx_tmax_xmax_logging_param)) { + pr_err("%s: Error: param_size %d is greater than expected\n", + __func__,param_hdr.param_size); + return -EINVAL; + } data_dest = (u32 *) &this_afe.xt_logging_resp; break; default: From f8b865aaa655303636650214b51eea9630758e63 Mon Sep 17 00:00:00 2001 From: Santosh Sakore Date: Thu, 23 May 2024 18:58:00 +0530 Subject: [PATCH 18/85] msm: adsprpc: use-after-free (UAF) in global maps Currently, remote heap maps get added to the global list before the fastrpc_internal_mmap function completes the mapping. Meanwhile, the fastrpc_internal_munmap function accesses the map, starts unmapping, and frees the map before the fastrpc_internal_mmap function completes, resulting in a use-after-free (UAF) issue. Add the map to the list after the fastrpc_internal_mmap function completes the mapping. Change-Id: I73c536718f3228b7cbb7a19b76270e0dd3e32bd1 Acked-by: Abhishek Singh Signed-off-by: Santosh Sakore (cherry picked from commit 6f39d9be6244a1c23397fd959bee425be4440849) --- drivers/char/adsprpc.c | 83 ++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 47 deletions(-) diff --git a/drivers/char/adsprpc.c b/drivers/char/adsprpc.c index 1baa32c4cfcb..bdc4348c7f79 100644 --- a/drivers/char/adsprpc.c +++ b/drivers/char/adsprpc.c @@ -596,64 +596,44 @@ static void fastrpc_remote_buf_list_free(struct fastrpc_file *fl) } while (free); } +static void fastrpc_mmap_add_global(struct fastrpc_mmap *map) +{ + struct fastrpc_apps *me = &gfa; + unsigned long irq_flags = 0; + + spin_lock_irqsave(&me->hlock, irq_flags); + hlist_add_head(&map->hn, &me->maps); + spin_unlock_irqrestore(&me->hlock, irq_flags); +} + static void fastrpc_mmap_add(struct fastrpc_mmap *map) { - if (map->flags == ADSP_MMAP_HEAP_ADDR || - map->flags == ADSP_MMAP_REMOTE_HEAP_ADDR) { - struct fastrpc_apps *me = &gfa; + struct fastrpc_file *fl = map->fl; - spin_lock(&me->hlock); - hlist_add_head(&map->hn, &me->maps); - spin_unlock(&me->hlock); - } else { - struct fastrpc_file *fl = map->fl; - - hlist_add_head(&map->hn, &fl->maps); - } + hlist_add_head(&map->hn, &fl->maps); } static int fastrpc_mmap_find(struct fastrpc_file *fl, int fd, uintptr_t va, size_t len, int mflags, int refs, struct fastrpc_mmap **ppmap) { - struct fastrpc_apps *me = &gfa; struct fastrpc_mmap *match = NULL, *map = NULL; struct hlist_node *n; if ((va + len) < va) return -EOVERFLOW; - if (mflags == ADSP_MMAP_HEAP_ADDR || - mflags == ADSP_MMAP_REMOTE_HEAP_ADDR) { - spin_lock(&me->hlock); - hlist_for_each_entry_safe(map, n, &me->maps, hn) { - if (va >= map->va && - va + len <= map->va + map->len && - map->fd == fd) { - if (refs) { - if (map->refs + 1 == INT_MAX) { - spin_unlock(&me->hlock); - return -ETOOMANYREFS; - } - map->refs++; - } - match = map; - break; - } - } - spin_unlock(&me->hlock); - } else { - hlist_for_each_entry_safe(map, n, &fl->maps, hn) { - if (va >= map->va && - va + len <= map->va + map->len && - map->fd == fd) { - if (refs) { - if (map->refs + 1 == INT_MAX) - return -ETOOMANYREFS; - map->refs++; - } - match = map; - break; + + hlist_for_each_entry_safe(map, n, &fl->maps, hn) { + if (va >= map->va && + va + len <= map->va + map->len && + map->fd == fd) { + if (refs) { + if (map->refs + 1 == INT_MAX) + return -ETOOMANYREFS; + map->refs++; } + match = map; + break; } } if (match) { @@ -997,8 +977,9 @@ static int fastrpc_mmap_create(struct fastrpc_file *fl, int fd, map->va = va; } map->len = len; - - fastrpc_mmap_add(map); + if ((mflags != ADSP_MMAP_HEAP_ADDR) && + (mflags != ADSP_MMAP_REMOTE_HEAP_ADDR)) + fastrpc_mmap_add(map); *ppmap = map; bail: @@ -2311,6 +2292,7 @@ static int fastrpc_init_process(struct fastrpc_file *fl, mutex_unlock(&fl->map_mutex); if (err) goto bail; + fastrpc_mmap_add_global(mem); phys = mem->phys; size = mem->size; if (me->channel[fl->cid].rhvm.vmid) { @@ -2641,7 +2623,7 @@ static int fastrpc_mmap_remove_ssr(struct fastrpc_file *fl) } while (match); bail: if (err && match) - fastrpc_mmap_add(match); + fastrpc_mmap_add_global(match); return err; } @@ -2758,7 +2740,11 @@ static int fastrpc_internal_munmap(struct fastrpc_file *fl, bail: if (err && map) { mutex_lock(&fl->map_mutex); - fastrpc_mmap_add(map); + if ((map->flags == ADSP_MMAP_HEAP_ADDR) || + (map->flags == ADSP_MMAP_REMOTE_HEAP_ADDR)) + fastrpc_mmap_add_global(map); + else + fastrpc_mmap_add(map); mutex_unlock(&fl->map_mutex); } mutex_unlock(&fl->internal_map_mutex); @@ -2865,6 +2851,9 @@ static int fastrpc_internal_mmap(struct fastrpc_file *fl, if (err) goto bail; map->raddr = raddr; + if (ud->flags == ADSP_MMAP_HEAP_ADDR || + ud->flags == ADSP_MMAP_REMOTE_HEAP_ADDR) + fastrpc_mmap_add_global(map); } ud->vaddrout = raddr; bail: From 0739c908f6bd73326bb35f94072a79181c3a338e Mon Sep 17 00:00:00 2001 From: Benoit Sevens Date: Thu, 7 Nov 2024 14:22:02 +0000 Subject: [PATCH 19/85] UPSTREAM: USB: media: uvcvideo: Skip parsing frames of type UVC_VS_UNDEFINED in uvc_parse_format This can lead to out of bounds writes since frames of this type were not taken into account when calculating the size of the frames buffer in uvc_parse_streaming. Fixes: c0efd232929c ("V4L/DVB (8145a): USB Video Class driver") Signed-off-by: Benoit Sevens Cc: stable@vger.kernel.org Acked-by: Greg Kroah-Hartman Reviewed-by: Laurent Pinchart Signed-off-by: Hans Verkuil Signed-off-by: Greg Kroah-Hartman Bug: 378455392 (cherry picked from commit ecf2b43018da9579842c774b7f35dbe11b5c38dd) Signed-off-by: Greg Kroah-Hartman Change-Id: I959a6374ba7adf021fc19da755f5c7611fef9b8c --- drivers/media/usb/uvc/uvc_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index b814ca282ff5..f02130d16daf 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -531,7 +531,7 @@ static int uvc_parse_format(struct uvc_device *dev, /* Parse the frame descriptors. Only uncompressed, MJPEG and frame * based formats have frame descriptors. */ - while (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && + while (ftype && buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE && buffer[2] == ftype) { frame = &format->frame[format->nframes]; if (ftype != UVC_VS_FRAME_FRAME_BASED) From 2aedf65efd6bf8e4e3c42c60e2e445c730667323 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Sevens?= Date: Wed, 20 Nov 2024 12:41:44 +0000 Subject: [PATCH 20/85] UPSTREAM: ALSA: usb-audio: Fix potential out-of-bound accesses for Extigy and Mbox devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b909df18ce2a998afef81d58bbd1a05dc0788c40 upstream. A bogus device can provide a bNumConfigurations value that exceeds the initial value used in usb_get_configuration for allocating dev->config. This can lead to out-of-bounds accesses later, e.g. in usb_destroy_configuration. Bug: 382243530 Signed-off-by: Benoît Sevens Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: stable@kernel.org Link: https://patch.msgid.link/20241120124144.3814457-1-bsevens@google.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9887d859cd60727432a01564e8f91302d361b72b) Signed-off-by: Lee Jones Change-Id: I2df0d59750943fa34747bd4bae2e549320f2a0ce --- sound/usb/quirks.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 313f83f2b75e..f3c61f5ca4b3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -582,6 +582,7 @@ int snd_usb_create_quirk(struct snd_usb_audio *chip, static int snd_usb_extigy_boot_quirk(struct usb_device *dev, struct usb_interface *intf) { struct usb_host_config *config = dev->actconfig; + struct usb_device_descriptor new_device_descriptor; int err; if (le16_to_cpu(get_cfg_desc(config)->wTotalLength) == EXTIGY_FIRMWARE_SIZE_OLD || @@ -593,10 +594,14 @@ static int snd_usb_extigy_boot_quirk(struct usb_device *dev, struct usb_interfac if (err < 0) dev_dbg(&dev->dev, "error sending boot message: %d\n", err); err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, - &dev->descriptor, sizeof(dev->descriptor)); - config = dev->actconfig; + &new_device_descriptor, sizeof(new_device_descriptor)); if (err < 0) dev_dbg(&dev->dev, "error usb_get_descriptor: %d\n", err); + if (new_device_descriptor.bNumConfigurations > dev->descriptor.bNumConfigurations) + dev_dbg(&dev->dev, "error too large bNumConfigurations: %d\n", + new_device_descriptor.bNumConfigurations); + else + memcpy(&dev->descriptor, &new_device_descriptor, sizeof(dev->descriptor)); err = usb_reset_configuration(dev); if (err < 0) dev_dbg(&dev->dev, "error usb_reset_configuration: %d\n", err); @@ -812,6 +817,7 @@ static void mbox2_setup_48_24_magic(struct usb_device *dev) static int snd_usb_mbox2_boot_quirk(struct usb_device *dev) { struct usb_host_config *config = dev->actconfig; + struct usb_device_descriptor new_device_descriptor; int err; u8 bootresponse[0x12]; int fwsize; @@ -847,10 +853,14 @@ static int snd_usb_mbox2_boot_quirk(struct usb_device *dev) dev_dbg(&dev->dev, "device initialised!\n"); err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, - &dev->descriptor, sizeof(dev->descriptor)); - config = dev->actconfig; + &new_device_descriptor, sizeof(new_device_descriptor)); if (err < 0) dev_dbg(&dev->dev, "error usb_get_descriptor: %d\n", err); + if (new_device_descriptor.bNumConfigurations > dev->descriptor.bNumConfigurations) + dev_dbg(&dev->dev, "error too large bNumConfigurations: %d\n", + new_device_descriptor.bNumConfigurations); + else + memcpy(&dev->descriptor, &new_device_descriptor, sizeof(dev->descriptor)); err = usb_reset_configuration(dev); if (err < 0) From 259196ff11da748ce0124e2b258cdb479194f872 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 2 Dec 2024 15:57:54 +0300 Subject: [PATCH 21/85] UPSTREAM: ALSA: usb-audio: Fix a DMA to stack memory bug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f7d306b47a24367302bd4fe846854e07752ffcd9 upstream. The usb_get_descriptor() function does DMA so we're not allowed to use a stack buffer for that. Doing DMA to the stack is not portable all architectures. Move the "new_device_descriptor" from being stored on the stack and allocate it with kmalloc() instead. Bug: 382243530 Fixes: b909df18ce2a ("ALSA: usb-audio: Fix potential out-of-bound accesses for Extigy and Mbox devices") Cc: stable@kernel.org Signed-off-by: Dan Carpenter Link: https://patch.msgid.link/60e3aa09-039d-46d2-934c-6f123026c2eb@stanley.mountain Signed-off-by: Takashi Iwai Signed-off-by: Benoît Sevens Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 4e54dc4bbc602133217de301d9f814f3e6d22eee) Signed-off-by: Lee Jones Change-Id: I469212aa538584e3d8cc5b0087b68c99acf43f64 --- sound/usb/quirks.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index f3c61f5ca4b3..04bc4c16e353 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -582,7 +582,7 @@ int snd_usb_create_quirk(struct snd_usb_audio *chip, static int snd_usb_extigy_boot_quirk(struct usb_device *dev, struct usb_interface *intf) { struct usb_host_config *config = dev->actconfig; - struct usb_device_descriptor new_device_descriptor; + struct usb_device_descriptor *new_device_descriptor = NULL; int err; if (le16_to_cpu(get_cfg_desc(config)->wTotalLength) == EXTIGY_FIRMWARE_SIZE_OLD || @@ -593,15 +593,20 @@ static int snd_usb_extigy_boot_quirk(struct usb_device *dev, struct usb_interfac 0x10, 0x43, 0x0001, 0x000a, NULL, 0); if (err < 0) dev_dbg(&dev->dev, "error sending boot message: %d\n", err); + + new_device_descriptor = kmalloc(sizeof(*new_device_descriptor), GFP_KERNEL); + if (!new_device_descriptor) + return -ENOMEM; err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, - &new_device_descriptor, sizeof(new_device_descriptor)); + new_device_descriptor, sizeof(*new_device_descriptor)); if (err < 0) dev_dbg(&dev->dev, "error usb_get_descriptor: %d\n", err); - if (new_device_descriptor.bNumConfigurations > dev->descriptor.bNumConfigurations) + if (new_device_descriptor->bNumConfigurations > dev->descriptor.bNumConfigurations) dev_dbg(&dev->dev, "error too large bNumConfigurations: %d\n", - new_device_descriptor.bNumConfigurations); + new_device_descriptor->bNumConfigurations); else - memcpy(&dev->descriptor, &new_device_descriptor, sizeof(dev->descriptor)); + memcpy(&dev->descriptor, new_device_descriptor, sizeof(dev->descriptor)); + kfree(new_device_descriptor); err = usb_reset_configuration(dev); if (err < 0) dev_dbg(&dev->dev, "error usb_reset_configuration: %d\n", err); @@ -817,7 +822,7 @@ static void mbox2_setup_48_24_magic(struct usb_device *dev) static int snd_usb_mbox2_boot_quirk(struct usb_device *dev) { struct usb_host_config *config = dev->actconfig; - struct usb_device_descriptor new_device_descriptor; + struct usb_device_descriptor *new_device_descriptor = NULL; int err; u8 bootresponse[0x12]; int fwsize; @@ -852,15 +857,21 @@ static int snd_usb_mbox2_boot_quirk(struct usb_device *dev) dev_dbg(&dev->dev, "device initialised!\n"); + new_device_descriptor = kmalloc(sizeof(*new_device_descriptor), GFP_KERNEL); + if (!new_device_descriptor) + return -ENOMEM; + err = usb_get_descriptor(dev, USB_DT_DEVICE, 0, - &new_device_descriptor, sizeof(new_device_descriptor)); + new_device_descriptor, sizeof(*new_device_descriptor)); if (err < 0) dev_dbg(&dev->dev, "error usb_get_descriptor: %d\n", err); - if (new_device_descriptor.bNumConfigurations > dev->descriptor.bNumConfigurations) + if (new_device_descriptor->bNumConfigurations > dev->descriptor.bNumConfigurations) dev_dbg(&dev->dev, "error too large bNumConfigurations: %d\n", - new_device_descriptor.bNumConfigurations); + new_device_descriptor->bNumConfigurations); else - memcpy(&dev->descriptor, &new_device_descriptor, sizeof(dev->descriptor)); + memcpy(&dev->descriptor, new_device_descriptor, sizeof(dev->descriptor)); + + kfree(new_device_descriptor); err = usb_reset_configuration(dev); if (err < 0) From abdabd6880566f4088efe61da40af68f89186b32 Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 29 Oct 2024 15:44:35 +0100 Subject: [PATCH 22/85] UPSTREAM: HID: core: zero-initialize the report buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 177f25d1292c7e16e1199b39c85480f7f8815552 ] Since the report buffer is used by all kinds of drivers in various ways, let's zero-initialize it during allocation to make sure that it can't be ever used to leak kernel memory via specially-crafted report. Bug: 380395346 Fixes: 27ce405039bf ("HID: fix data access in implement()") Reported-by: Benoît Sevens Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin (cherry picked from commit 9d9f5c75c0c7f31766ec27d90f7a6ac673193191) Signed-off-by: Lee Jones Change-Id: I31f64f2745347137bbc415eb35b7fab5761867f3 --- drivers/hid/hid-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 7c4616f47692..7fb732861b95 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1476,7 +1476,7 @@ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags) u32 len = hid_report_len(report) + 7; - return kmalloc(len, flags); + return kzalloc(len, flags); } EXPORT_SYMBOL_GPL(hid_alloc_report_buf); From 4fa5939ec9dae08642bff07df74f658f96adeb93 Mon Sep 17 00:00:00 2001 From: Abinath S Date: Fri, 9 Aug 2024 17:53:45 +0530 Subject: [PATCH 23/85] asoc: codec: avoid out of bound write to map array added check for port num and channel iteration are lessthan 8 to avoid out of bound write to 8x8 map array. Change-Id: I4c6fe13a5eb09be623a1c40ce16c5a5e4246e021 Signed-off-by: Abinath S --- techpack/audio/4.0/asoc/codecs/wcd937x/wcd937x.c | 5 +++++ techpack/audio/4.0/asoc/codecs/wcd938x/wcd938x.c | 6 ++++++ techpack/audio/asoc/codecs/wcd937x/wcd937x.c | 5 +++++ 3 files changed, 16 insertions(+) diff --git a/techpack/audio/4.0/asoc/codecs/wcd937x/wcd937x.c b/techpack/audio/4.0/asoc/codecs/wcd937x/wcd937x.c index 5a12f0be2bc7..f11930b3aef5 100644 --- a/techpack/audio/4.0/asoc/codecs/wcd937x/wcd937x.c +++ b/techpack/audio/4.0/asoc/codecs/wcd937x/wcd937x.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -226,6 +227,10 @@ static int wcd937x_parse_port_mapping(struct device *dev, for (i = 0; i < map_length; i++) { port_num = dt_array[NUM_SWRS_DT_PARAMS * i]; + if (port_num >= MAX_PORT || ch_iter >= MAX_CH_PER_PORT) { + dev_err(dev, "%s: Invalid port or channel number\n", __func__); + goto err_pdata_fail; + } slave_port_type = dt_array[NUM_SWRS_DT_PARAMS * i + 1]; ch_mask = dt_array[NUM_SWRS_DT_PARAMS * i + 2]; ch_rate = dt_array[NUM_SWRS_DT_PARAMS * i + 3]; diff --git a/techpack/audio/4.0/asoc/codecs/wcd938x/wcd938x.c b/techpack/audio/4.0/asoc/codecs/wcd938x/wcd938x.c index 83adf9d8fb8f..4e1388e62cb8 100644 --- a/techpack/audio/4.0/asoc/codecs/wcd938x/wcd938x.c +++ b/techpack/audio/4.0/asoc/codecs/wcd938x/wcd938x.c @@ -377,6 +377,12 @@ static int wcd938x_parse_port_mapping(struct device *dev, for (i = 0; i < map_length; i++) { port_num = dt_array[NUM_SWRS_DT_PARAMS * i]; + + if (port_num >= MAX_PORT || ch_iter >= MAX_CH_PER_PORT) { + dev_err(dev, "%s: Invalid port or channel number\n", __func__); + goto err_pdata_fail; + } + slave_port_type = dt_array[NUM_SWRS_DT_PARAMS * i + 1]; ch_mask = dt_array[NUM_SWRS_DT_PARAMS * i + 2]; ch_rate = dt_array[NUM_SWRS_DT_PARAMS * i + 3]; diff --git a/techpack/audio/asoc/codecs/wcd937x/wcd937x.c b/techpack/audio/asoc/codecs/wcd937x/wcd937x.c index ee49f4ffbcf1..f562b2358fbf 100644 --- a/techpack/audio/asoc/codecs/wcd937x/wcd937x.c +++ b/techpack/audio/asoc/codecs/wcd937x/wcd937x.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 and @@ -227,6 +228,10 @@ static int wcd937x_parse_port_mapping(struct device *dev, for (i = 0; i < map_length; i++) { port_num = dt_array[NUM_SWRS_DT_PARAMS * i]; + if (port_num >= MAX_PORT || ch_iter >= MAX_CH_PER_PORT) { + dev_err(dev, "%s: Invalid port or channel number\n", __func__); + goto err_pdata_fail; + } slave_port_type = dt_array[NUM_SWRS_DT_PARAMS * i + 1]; ch_mask = dt_array[NUM_SWRS_DT_PARAMS * i + 2]; ch_rate = dt_array[NUM_SWRS_DT_PARAMS * i + 3]; From d1dfb7d4543308091eba926b481600c73b85f502 Mon Sep 17 00:00:00 2001 From: Jinfeng Gu Date: Thu, 22 Aug 2024 15:51:37 +0800 Subject: [PATCH 24/85] disp: msm: dsi: add null pointer check in dsi_display_dev_remove This change add display null pointer check in dsi_display_dev_remove. Change-Id: Ib31756c3b22256d19cbcb508f60de4550e3834e1 Signed-off-by: Jinfeng Gu --- drivers/gpu/drm/msm/dsi-staging/dsi_display.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/dsi-staging/dsi_display.c b/drivers/gpu/drm/msm/dsi-staging/dsi_display.c index cb87e40bf2f6..1461f612e765 100644 --- a/drivers/gpu/drm/msm/dsi-staging/dsi_display.c +++ b/drivers/gpu/drm/msm/dsi-staging/dsi_display.c @@ -5912,6 +5912,10 @@ int dsi_display_dev_remove(struct platform_device *pdev) } display = platform_get_drvdata(pdev); + if (!display || !display->disp_node) { + pr_err("invalid display\n"); + return -EINVAL; + } /* decrement ref count */ of_node_put(display->disp_node); From e6812d5584eae1ecf5026d8bc781ffafa2e8f61f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 24 Aug 2019 15:28:46 +0200 Subject: [PATCH 25/85] of: Let of_for_each_phandle fallback to non-negative cell_count MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Referencing device tree nodes from a property allows to pass arguments. This is for example used for referencing gpios. This looks as follows: gpio_ctrl: gpio-controller { #gpio-cells = <2> ... } someothernode { gpios = <&gpio_ctrl 5 0 &gpio_ctrl 3 0>; ... } To know the number of arguments this must be either fixed, or the referenced node is checked for a $cells_name (here: "#gpio-cells") property and with this information the start of the second reference can be determined. Currently regulators are referenced with no additional arguments. To allow some optional arguments without having to change all referenced nodes this change introduces a way to specify a default cell_count. So when a phandle is parsed we check for the $cells_name property and use it as before if present. If it is not present we fall back to cells_count if non-negative and only fail if cells_count is smaller than zero. Change-Id: Ic7a6a5e667d46847becb2a9593a00ba6db49fc98 Signed-off-by: Uwe Kleine-König Signed-off-by: Rob Herring --- drivers/of/base.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 90a47d88901c..920dcb8e8796 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1216,11 +1216,20 @@ int of_phandle_iterator_next(struct of_phandle_iterator *it) if (of_property_read_u32(it->node, it->cells_name, &count)) { - pr_err("%pOF: could not get %s for %pOF\n", - it->parent, - it->cells_name, - it->node); - goto err; + /* + * If both cell_count and cells_name is given, + * fall back to cell_count in absence + * of the cells_name property + */ + if (it->cell_count >= 0) { + count = it->cell_count; + } else { + pr_err("%pOF: could not get %s for %pOF\n", + it->parent, + it->cells_name, + it->node); + goto err; + } } } else { count = it->cell_count; @@ -1385,7 +1394,7 @@ int of_parse_phandle_with_args(const struct device_node *np, const char *list_na { if (index < 0) return -EINVAL; - return __of_parse_phandle_with_args(np, list_name, cells_name, 0, + return __of_parse_phandle_with_args(np, list_name, cells_name, -1, index, out_args); } EXPORT_SYMBOL(of_parse_phandle_with_args); @@ -1452,7 +1461,7 @@ int of_count_phandle_with_args(const struct device_node *np, const char *list_na struct of_phandle_iterator it; int rc, cur_index = 0; - rc = of_phandle_iterator_init(&it, np, list_name, cells_name, 0); + rc = of_phandle_iterator_init(&it, np, list_name, cells_name, -1); if (rc) return rc; From fadf3cfa47a1c26b2e3fbb64a3aaf0946dd1c9c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 18 Sep 2019 10:47:48 +0200 Subject: [PATCH 26/85] of: restore old handling of cells_name=NULL in of_*_phandle_with_args() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before commit e42ee61017f5 ("of: Let of_for_each_phandle fallback to non-negative cell_count") the iterator functions calling of_for_each_phandle assumed a cell count of 0 if cells_name was NULL. This corner case was missed when implementing the fallback logic in e42ee61017f5 and resulted in an endless loop. Restore the old behaviour of of_count_phandle_with_args() and of_parse_phandle_with_args() and add a check to of_phandle_iterator_init() to prevent a similar failure as a safety precaution. of_parse_phandle_with_args_map() doesn't need a similar fix as cells_name isn't NULL there. Affected drivers are: - drivers/base/power/domain.c - drivers/base/power/domain.c - drivers/clk/ti/clk-dra7-atl.c - drivers/hwmon/ibmpowernv.c - drivers/i2c/muxes/i2c-demux-pinctrl.c - drivers/iommu/mtk_iommu.c - drivers/net/ethernet/freescale/fman/mac.c - drivers/opp/of.c - drivers/perf/arm_dsu_pmu.c - drivers/regulator/of_regulator.c - drivers/remoteproc/imx_rproc.c - drivers/soc/rockchip/pm_domains.c - sound/soc/fsl/imx-audmix.c - sound/soc/fsl/imx-audmix.c - sound/soc/meson/axg-card.c - sound/soc/samsung/tm2_wm5110.c - sound/soc/samsung/tm2_wm5110.c Thanks to Geert Uytterhoeven for reporting the issue, Peter Rosin for helping pinpoint the actual problem and the testers for confirming this fix. Fixes: e42ee61017f5 ("of: Let of_for_each_phandle fallback to non-negative cell_count") Tested-by: Marek Szyprowski Tested-by: Geert Uytterhoeven Change-Id: I684efc01df23ea32c578c1da4f8ea6fcf6f03ced Signed-off-by: Uwe Kleine-König Signed-off-by: Rob Herring --- drivers/of/base.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 920dcb8e8796..d3babccedfbc 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1167,6 +1167,13 @@ int of_phandle_iterator_init(struct of_phandle_iterator *it, memset(it, 0, sizeof(*it)); + /* + * one of cell_count or cells_name must be provided to determine the + * argument length. + */ + if (cell_count < 0 && !cells_name) + return -EINVAL; + list = of_get_property(np, list_name, &size); if (!list) return -ENOENT; @@ -1392,10 +1399,17 @@ int of_parse_phandle_with_args(const struct device_node *np, const char *list_na const char *cells_name, int index, struct of_phandle_args *out_args) { + int cell_count = -1; + if (index < 0) return -EINVAL; - return __of_parse_phandle_with_args(np, list_name, cells_name, -1, - index, out_args); + + /* If cells_name is NULL we assume a cell count of 0 */ + if (!cells_name) + cell_count = 0; + + return __of_parse_phandle_with_args(np, list_name, cells_name, + cell_count, index, out_args); } EXPORT_SYMBOL(of_parse_phandle_with_args); @@ -1461,6 +1475,23 @@ int of_count_phandle_with_args(const struct device_node *np, const char *list_na struct of_phandle_iterator it; int rc, cur_index = 0; + /* + * If cells_name is NULL we assume a cell count of 0. This makes + * counting the phandles trivial as each 32bit word in the list is a + * phandle and no arguments are to consider. So we don't iterate through + * the list but just use the length to determine the phandle count. + */ + if (!cells_name) { + const __be32 *list; + int size; + + list = of_get_property(np, list_name, &size); + if (!list) + return -ENOENT; + + return size / sizeof(*list); + } + rc = of_phandle_iterator_init(&it, np, list_name, cells_name, -1); if (rc) return rc; From 600992995179a82611e626d80d93a0f79bc04ccd Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 23 Aug 2018 18:47:09 +1000 Subject: [PATCH 27/85] UPSTREAM: mm: mmu_notifier fix for tlb_end_vma The generic tlb_end_vma does not call invalidate_range mmu notifier, and it resets resets the mmu_gather range, which means the notifier won't be called on part of the range in case of an unmap that spans multiple vmas. ARM64 seems to be the only arch I could see that has notifiers and uses the generic tlb_end_vma. I have not actually tested it. [ Catalin and Will point out that ARM64 currently only uses the notifiers for KVM, which doesn't use the ->invalidate_range() callback right now, so it's a bug, but one that happens to not affect them. So not necessary for stable. - Linus ] Change-Id: Id7b31c8a84be494b2f6341beb3be23485b5dd6bb Signed-off-by: Nicholas Piggin Acked-by: Catalin Marinas Acked-by: Will Deacon Signed-off-by: Linus Torvalds Signed-off-by: Cyber Knight --- include/asm-generic/tlb.h | 17 +++++++++++++---- mm/memory.c | 10 ---------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 43409a047480..e6c235035544 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -15,6 +15,7 @@ #ifndef _ASM_GENERIC__TLB_H #define _ASM_GENERIC__TLB_H +#include #include #include #include @@ -146,6 +147,16 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) } } +static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) +{ + if (!tlb->end) + return; + + tlb_flush(tlb); + mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end); + __tlb_reset_range(tlb); +} + static inline void tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) { @@ -194,10 +205,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define __tlb_end_vma(tlb, vma) \ do { \ - if (!tlb->fullmm && tlb->end) { \ - tlb_flush(tlb); \ - __tlb_reset_range(tlb); \ - } \ + if (!tlb->fullmm) \ + tlb_flush_mmu_tlbonly(tlb); \ } while (0) #ifndef tlb_end_vma diff --git a/mm/memory.c b/mm/memory.c index d68a43b3d6d2..7a1f66986aa4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -251,16 +251,6 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, __tlb_reset_range(tlb); } -static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) -{ - if (!tlb->end) - return; - - tlb_flush(tlb); - mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end); - __tlb_reset_range(tlb); -} - static void tlb_flush_mmu_free(struct mmu_gather *tlb) { struct mmu_gather_batch *batch; From 008d866c951d82f0252dd0cfc5aac657dcf15b7b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 24 Aug 2018 00:23:04 +0100 Subject: [PATCH 28/85] UPSTREAM: arm64: tlb: Provide forward declaration of tlb_flush() before including tlb.h As of commit fd1102f0aade ("mm: mmu_notifier fix for tlb_end_vma"), asm-generic/tlb.h now calls tlb_flush() from a static inline function, so we need to make sure that it's declared before #including the asm-generic header in the arch header. Change-Id: Ib914ff3a30a5f081a05eeccff3d59dd7e084838a Signed-off-by: Will Deacon Acked-by: Nicholas Piggin Signed-off-by: Linus Torvalds Signed-off-by: Cyber Knight --- arch/arm64/include/asm/tlb.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index f85c97422072..9efe52747651 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -36,6 +36,8 @@ static inline void __tlb_remove_table(void *_table) #define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry) #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ +static void tlb_flush(struct mmu_gather *tlb); + #include static inline void tlb_flush(struct mmu_gather *tlb) From b5571fa155f02d0b0f549058146662fba1ce9ab4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 24 Aug 2018 13:28:28 +0100 Subject: [PATCH 29/85] UPSTREAM: asm-generic/tlb: Guard with #ifdef CONFIG_MMU The inner workings of the mmu_gather-based TLB invalidation mechanism are not relevant to nommu configurations, so guard them with an #ifdef. This allows us to implement future functions using static inlines without breaking the build. Acked-by: Nicholas Piggin Acked-by: Peter Zijlstra (Intel) Change-Id: I8d6673a8daa1ff4de448477b8f0bfc5cd0ec5719 Signed-off-by: Will Deacon Signed-off-by: Cyber Knight --- include/asm-generic/tlb.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index e6c235035544..7939f29e611d 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -20,6 +20,8 @@ #include #include +#ifdef CONFIG_MMU + #ifdef CONFIG_HAVE_RCU_TABLE_FREE /* * Semi RCU freeing of the page directories. @@ -310,6 +312,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, } while (0) #endif +#endif /* CONFIG_MMU */ + #define tlb_migrate_finish(mm) do {} while (0) #endif /* _ASM_GENERIC__TLB_H */ From 50136c947b724c94d4987b76e87ac0f7b51028db Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Aug 2018 20:27:25 +0100 Subject: [PATCH 30/85] UPSTREAM: asm-generic/tlb: Track freeing of page-table directories in struct mmu_gather Some architectures require different TLB invalidation instructions depending on whether it is only the last-level of page table being changed, or whether there are also changes to the intermediate (directory) entries higher up the tree. Add a new bit to the flags bitfield in struct mmu_gather so that the architecture code can operate accordingly if it's the intermediate levels being invalidated. Acked-by: Nicholas Piggin Change-Id: I9a19a09e1ddff1e2386a29fe1392b0cb0de9cfe7 Signed-off-by: Peter Zijlstra Signed-off-by: Will Deacon Signed-off-by: Cyber Knight --- include/asm-generic/tlb.h | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 7939f29e611d..83e767761760 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -105,12 +105,22 @@ struct mmu_gather { #endif unsigned long start; unsigned long end; - /* we are in the middle of an operation to clear - * a full mm and can make some optimizations */ - unsigned int fullmm : 1, - /* we have performed an operation which - * requires a complete flush of the tlb */ - need_flush_all : 1; + /* + * we are in the middle of an operation to clear + * a full mm and can make some optimizations + */ + unsigned int fullmm : 1; + + /* + * we have performed an operation which + * requires a complete flush of the tlb + */ + unsigned int need_flush_all : 1; + + /* + * we have removed page directories + */ + unsigned int freed_tables : 1; struct mmu_gather_batch *active; struct mmu_gather_batch local; @@ -147,6 +157,7 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) tlb->start = TASK_SIZE; tlb->end = 0; } + tlb->freed_tables = 0; } static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) @@ -287,12 +298,14 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define pte_free_tlb(tlb, ptep, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #define pmd_free_tlb(tlb, pmdp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) @@ -300,6 +313,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define pud_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif @@ -307,7 +321,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #ifndef __ARCH_HAS_5LEVEL_HACK #define p4d_free_tlb(tlb, pudp, address) \ do { \ - __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->freed_tables = 1; \ __p4d_free_tlb(tlb, pudp, address); \ } while (0) #endif From f497d3676b9e43112ef883a3b6cb9069366b9eba Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 21:01:46 +0100 Subject: [PATCH 31/85] UPSTREAM: asm-generic/tlb: Track which levels of the page tables have been cleared It is common for architectures with hugepage support to require only a single TLB invalidation operation per hugepage during unmap(), rather than iterating through the mapping at a PAGE_SIZE increment. Currently, however, the level in the page table where the unmap() operation occurs is not stored in the mmu_gather structure, therefore forcing architectures to issue additional TLB invalidation operations or to give up and over-invalidate by e.g. invalidating the entire TLB. Ideally, we could add an interval rbtree to the mmu_gather structure, which would allow us to associate the correct mapping granule with the various sub-mappings within the range being invalidated. However, this is costly in terms of book-keeping and memory management, so instead we approximate by keeping track of the page table levels that are cleared and provide a means to query the smallest granule required for invalidation. Acked-by: Peter Zijlstra (Intel) Acked-by: Nicholas Piggin Change-Id: Ifb486381b6e71f4e05c9d38a246bf82de2d224ac Signed-off-by: Will Deacon Signed-off-by: Cyber Knight --- include/asm-generic/tlb.h | 58 +++++++++++++++++++++++++++++++++------ mm/memory.c | 4 ++- 2 files changed, 53 insertions(+), 9 deletions(-) diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 83e767761760..3e228e547f00 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -122,6 +122,14 @@ struct mmu_gather { */ unsigned int freed_tables : 1; + /* + * at which levels have we cleared entries? + */ + unsigned int cleared_ptes : 1; + unsigned int cleared_pmds : 1; + unsigned int cleared_puds : 1; + unsigned int cleared_p4ds : 1; + struct mmu_gather_batch *active; struct mmu_gather_batch local; struct page *__pages[MMU_GATHER_BUNDLE]; @@ -158,6 +166,10 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) tlb->end = 0; } tlb->freed_tables = 0; + tlb->cleared_ptes = 0; + tlb->cleared_pmds = 0; + tlb->cleared_puds = 0; + tlb->cleared_p4ds = 0; } static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) @@ -207,6 +219,25 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, } #endif +static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb) +{ + if (tlb->cleared_ptes) + return PAGE_SHIFT; + if (tlb->cleared_pmds) + return PMD_SHIFT; + if (tlb->cleared_puds) + return PUD_SHIFT; + if (tlb->cleared_p4ds) + return P4D_SHIFT; + + return PAGE_SHIFT; +} + +static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) +{ + return 1UL << tlb_get_unmap_shift(tlb); +} + /* * In the case of tlb vma handling, we can optimise these away in the * case where we're doing a full MM flush. When we're doing a munmap, @@ -240,13 +271,19 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ + tlb->cleared_ptes = 1; \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) -#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ - do { \ - __tlb_adjust_range(tlb, address, huge_page_size(h)); \ - __tlb_remove_tlb_entry(tlb, ptep, address); \ +#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ + do { \ + unsigned long _sz = huge_page_size(h); \ + __tlb_adjust_range(tlb, address, _sz); \ + if (_sz == PMD_SIZE) \ + tlb->cleared_pmds = 1; \ + else if (_sz == PUD_SIZE) \ + tlb->cleared_puds = 1; \ + __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) /** @@ -260,6 +297,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ do { \ __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE); \ + tlb->cleared_pmds = 1; \ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) @@ -274,6 +312,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \ + tlb->cleared_puds = 1; \ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ } while (0) @@ -298,14 +337,16 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define pte_free_tlb(tlb, ptep, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - tlb->freed_tables = 1; \ + tlb->freed_tables = 1; \ + tlb->cleared_pmds = 1; \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #define pmd_free_tlb(tlb, pmdp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - tlb->freed_tables = 1; \ + tlb->freed_tables = 1; \ + tlb->cleared_puds = 1; \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) @@ -313,7 +354,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define pud_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - tlb->freed_tables = 1; \ + tlb->freed_tables = 1; \ + tlb->cleared_p4ds = 1; \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif @@ -322,7 +364,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, #define p4d_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ - tlb->freed_tables = 1; \ + tlb->freed_tables = 1; \ __p4d_free_tlb(tlb, pudp, address); \ } while (0) #endif diff --git a/mm/memory.c b/mm/memory.c index 7a1f66986aa4..e027bf9e3b85 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -280,8 +280,10 @@ void arch_tlb_finish_mmu(struct mmu_gather *tlb, { struct mmu_gather_batch *batch, *next; - if (force) + if (force) { + __tlb_reset_range(tlb); __tlb_adjust_range(tlb, start, end - start); + } tlb_flush_mmu(tlb); From cbfc0b77b568bb7fcc8a1aadcc26a5e6b1974f79 Mon Sep 17 00:00:00 2001 From: Cyber Knight Date: Wed, 9 Jul 2025 00:24:02 +0800 Subject: [PATCH 32/85] Revert "hugetlbfs: flush TLBs correctly after huge_pmd_unshare" This reverts commit 7bf1f5cb5150b1a53f6ccaadc0bc77f8f33206c8 to reapply it with changes in accordance with an upcoming commit that moves the TLB flushing logic into mmu_gather.c. Change-Id: I706c51a56b083669f70822d6ad148f2d6f91d8bf Signed-off-by: Cyber Knight --- arch/arm/include/asm/tlb.h | 8 -------- arch/ia64/include/asm/tlb.h | 10 ---------- arch/s390/include/asm/tlb.h | 14 -------------- arch/sh/include/asm/tlb.h | 10 ---------- arch/um/include/asm/tlb.h | 12 ------------ include/asm-generic/tlb.h | 2 -- mm/hugetlb.c | 19 ------------------- mm/memory.c | 10 ---------- 8 files changed, 85 deletions(-) diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index fd9717e9fee3..d5562f9ce600 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -280,14 +280,6 @@ tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr tlb_add_flush(tlb, addr); } -static inline void -tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, - unsigned long size) -{ - tlb_add_flush(tlb, address); - tlb_add_flush(tlb, address + size - PMD_SIZE); -} - #define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr) #define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr) #define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp) diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index 26d49489498b..44f0ac0df308 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -269,16 +269,6 @@ __tlb_remove_tlb_entry (struct mmu_gather *tlb, pte_t *ptep, unsigned long addre tlb->end_addr = address + PAGE_SIZE; } -static inline void -tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, - unsigned long size) -{ - if (tlb->start_addr > address) - tlb->start_addr = address; - if (tlb->end_addr < address + size) - tlb->end_addr = address + size; -} - #define tlb_migrate_finish(mm) platform_tlb_migrate_finish(mm) #define tlb_start_vma(tlb, vma) do { } while (0) diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index c216a24ddff8..457b7ba0fbb6 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -116,20 +116,6 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb, return tlb_remove_page(tlb, page); } -static inline void tlb_flush_pmd_range(struct mmu_gather *tlb, - unsigned long address, unsigned long size) -{ - /* - * the range might exceed the original range that was provided to - * tlb_gather_mmu(), so we need to update it despite the fact it is - * usually not updated. - */ - if (tlb->start > address) - tlb->start = address; - if (tlb->end < address + size) - tlb->end = address + size; -} - /* * pte_free_tlb frees a pte table and clears the CRSTE for the * page table from the tlb. diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index adcb0bfe238e..77abe192fb43 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -127,16 +127,6 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb, return tlb_remove_page(tlb, page); } -static inline void -tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, - unsigned long size) -{ - if (tlb->start > address) - tlb->start = address; - if (tlb->end < address + size) - tlb->end = address + size; -} - #define tlb_remove_check_page_size_change tlb_remove_check_page_size_change static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, unsigned int page_size) diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 02e61f6abfca..dce6db147f24 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -130,18 +130,6 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb, return tlb_remove_page(tlb, page); } -static inline void -tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, - unsigned long size) -{ - tlb->need_flush = 1; - - if (tlb->start > address) - tlb->start = address; - if (tlb->end < address + size) - tlb->end = address + size; -} - /** * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. * diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 3e228e547f00..b3a176928a39 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -144,8 +144,6 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, void tlb_flush_mmu(struct mmu_gather *tlb); void arch_tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end, bool force); -void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, - unsigned long size); extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e3775096a338..3f7b3f6d7d03 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3385,7 +3385,6 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long sz = huge_page_size(h); unsigned long mmun_start = start; /* For mmu_notifiers */ unsigned long mmun_end = end; /* For mmu_notifiers */ - bool force_flush = false; WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~huge_page_mask(h)); @@ -3412,8 +3411,6 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, &address, ptep)) { spin_unlock(ptl); - tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE); - force_flush = true; continue; } @@ -3470,22 +3467,6 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, } mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); tlb_end_vma(tlb, vma); - - /* - * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We - * could defer the flush until now, since by holding i_mmap_rwsem we - * guaranteed that the last refernece would not be dropped. But we must - * do the flushing before we return, as otherwise i_mmap_rwsem will be - * dropped and the last reference to the shared PMDs page might be - * dropped as well. - * - * In theory we could defer the freeing of the PMD pages as well, but - * huge_pmd_unshare() relies on the exact page_count for the PMD page to - * detect sharing, so we cannot defer the release of the page either. - * Instead, do flush now. - */ - if (force_flush) - tlb_flush_mmu(tlb); } void __unmap_hugepage_range_final(struct mmu_gather *tlb, diff --git a/mm/memory.c b/mm/memory.c index e027bf9e3b85..15437a873e3b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -327,16 +327,6 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ return false; } -void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, - unsigned long size) -{ - if (tlb->page_size != 0 && tlb->page_size != PMD_SIZE) - tlb_flush_mmu(tlb); - - tlb->page_size = PMD_SIZE; - tlb->start = min(tlb->start, address); - tlb->end = max(tlb->end, address + size); -} #endif /* HAVE_GENERIC_MMU_GATHER */ #ifdef CONFIG_HAVE_RCU_TABLE_FREE From 1e15e942558469dca39c56f033d47f17850159b4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 3 Sep 2018 15:07:36 +0100 Subject: [PATCH 33/85] BACKPORT: mm/memory: Move mmu_gather and TLB invalidation code into its own file In preparation for maintaining the mmu_gather code as its own entity, move the implementation out of memory.c and into its own file. Cc: "Kirill A. Shutemov" Cc: Andrew Morton Cc: Michal Hocko Change-Id: Ia925c303703e188a89bd3e66e6cc7302cb651826 Signed-off-by: Peter Zijlstra Signed-off-by: Will Deacon [cyberknight777: Backport to msm-4.14 & move tlb_remove_table_sync_one() to mmu_gather.c] Signed-off-by: Cyber Knight --- include/asm-generic/tlb.h | 1 + mm/Makefile | 6 +- mm/memory.c | 247 ----------------------------------- mm/mmu_gather.c | 266 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 270 insertions(+), 250 deletions(-) create mode 100644 mm/mmu_gather.c diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index b3a176928a39..3e8190bc5ae0 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -144,6 +144,7 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, void tlb_flush_mmu(struct mmu_gather *tlb); void arch_tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end, bool force); +void tlb_flush_mmu_free(struct mmu_gather *tlb); extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); diff --git a/mm/Makefile b/mm/Makefile index 1d7c5586f4c6..7871d13e4e2d 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -23,9 +23,9 @@ KCOV_INSTRUMENT_vmstat.o := n mmu-y := nommu.o mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ - mlock.o mmap.o mprotect.o mremap.o msync.o \ - page_vma_mapped.o pagewalk.o pgtable-generic.o \ - rmap.o vmalloc.o + mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \ + msync.o page_vma_mapped.o pagewalk.o \ + pgtable-generic.o rmap.o vmalloc.o ifdef CONFIG_CROSS_MEMORY_ATTACH diff --git a/mm/memory.c b/mm/memory.c index 15437a873e3b..bc6e5c47a1c4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -199,253 +199,6 @@ static void check_sync_rss_stat(struct task_struct *task) #endif /* SPLIT_RSS_COUNTING */ -#ifdef HAVE_GENERIC_MMU_GATHER - -static bool tlb_next_batch(struct mmu_gather *tlb) -{ - struct mmu_gather_batch *batch; - - batch = tlb->active; - if (batch->next) { - tlb->active = batch->next; - return true; - } - - if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) - return false; - - batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); - if (!batch) - return false; - - tlb->batch_count++; - batch->next = NULL; - batch->nr = 0; - batch->max = MAX_GATHER_BATCH; - - tlb->active->next = batch; - tlb->active = batch; - - return true; -} - -void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - tlb->mm = mm; - - /* Is it from 0 to ~0? */ - tlb->fullmm = !(start | (end+1)); - tlb->need_flush_all = 0; - tlb->local.next = NULL; - tlb->local.nr = 0; - tlb->local.max = ARRAY_SIZE(tlb->__pages); - tlb->active = &tlb->local; - tlb->batch_count = 0; - -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - tlb->batch = NULL; -#endif - tlb->page_size = 0; - - __tlb_reset_range(tlb); -} - -static void tlb_flush_mmu_free(struct mmu_gather *tlb) -{ - struct mmu_gather_batch *batch; - -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - tlb_table_flush(tlb); -#endif - for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { - free_pages_and_swap_cache(batch->pages, batch->nr); - batch->nr = 0; - } - tlb->active = &tlb->local; -} - -void tlb_flush_mmu(struct mmu_gather *tlb) -{ - tlb_flush_mmu_tlbonly(tlb); - tlb_flush_mmu_free(tlb); -} - -/* tlb_finish_mmu - * Called at the end of the shootdown operation to free up any resources - * that were required. - */ -void arch_tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end, bool force) -{ - struct mmu_gather_batch *batch, *next; - - if (force) { - __tlb_reset_range(tlb); - __tlb_adjust_range(tlb, start, end - start); - } - - tlb_flush_mmu(tlb); - - /* keep the page table cache within bounds */ - check_pgt_cache(); - - for (batch = tlb->local.next; batch; batch = next) { - next = batch->next; - free_pages((unsigned long)batch, 0); - } - tlb->local.next = NULL; -} - -/* __tlb_remove_page - * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while - * handling the additional races in SMP caused by other CPUs caching valid - * mappings in their TLBs. Returns the number of free page slots left. - * When out of page slots we must call tlb_flush_mmu(). - *returns true if the caller should flush. - */ -bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) -{ - struct mmu_gather_batch *batch; - - VM_BUG_ON(!tlb->end); - VM_WARN_ON(tlb->page_size != page_size); - - batch = tlb->active; - /* - * Add the page and check if we are full. If so - * force a flush. - */ - batch->pages[batch->nr++] = page; - if (batch->nr == batch->max) { - if (!tlb_next_batch(tlb)) - return true; - batch = tlb->active; - } - VM_BUG_ON_PAGE(batch->nr > batch->max, page); - - return false; -} - -#endif /* HAVE_GENERIC_MMU_GATHER */ - -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - -/* - * See the comment near struct mmu_table_batch. - */ - -/* - * If we want tlb_remove_table() to imply TLB invalidates. - */ -static inline void tlb_table_invalidate(struct mmu_gather *tlb) -{ -#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE - /* - * Invalidate page-table caches used by hardware walkers. Then we still - * need to RCU-sched wait while freeing the pages because software - * walkers can still be in-flight. - */ - tlb_flush_mmu_tlbonly(tlb); -#endif -} - -static void tlb_remove_table_smp_sync(void *arg) -{ - /* Simply deliver the interrupt */ -} - -void tlb_remove_table_sync_one(void) -{ - smp_call_function(tlb_remove_table_smp_sync, NULL, 1); -} - -static void tlb_remove_table_one(void *table) -{ - /* - * This isn't an RCU grace period and hence the page-tables cannot be - * assumed to be actually RCU-freed. - * - * It is however sufficient for software page-table walkers that rely on - * IRQ disabling. See the comment near struct mmu_table_batch. - */ - smp_call_function(tlb_remove_table_smp_sync, NULL, 1); - __tlb_remove_table(table); -} - -static void tlb_remove_table_rcu(struct rcu_head *head) -{ - struct mmu_table_batch *batch; - int i; - - batch = container_of(head, struct mmu_table_batch, rcu); - - for (i = 0; i < batch->nr; i++) - __tlb_remove_table(batch->tables[i]); - - free_page((unsigned long)batch); -} - -void tlb_table_flush(struct mmu_gather *tlb) -{ - struct mmu_table_batch **batch = &tlb->batch; - - if (*batch) { - tlb_table_invalidate(tlb); - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); - *batch = NULL; - } -} - -void tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - struct mmu_table_batch **batch = &tlb->batch; - - if (*batch == NULL) { - *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); - if (*batch == NULL) { - tlb_table_invalidate(tlb); - tlb_remove_table_one(table); - return; - } - (*batch)->nr = 0; - } - - (*batch)->tables[(*batch)->nr++] = table; - if ((*batch)->nr == MAX_TABLE_BATCH) - tlb_table_flush(tlb); -} - -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ - -/* tlb_gather_mmu - * Called to initialize an (on-stack) mmu_gather structure for page-table - * tear-down from @mm. The @fullmm argument is used when @mm is without - * users and we're going to destroy the full address space (exit/execve). - */ -void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - arch_tlb_gather_mmu(tlb, mm, start, end); - inc_tlb_flush_pending(tlb->mm); -} - -void tlb_finish_mmu(struct mmu_gather *tlb, - unsigned long start, unsigned long end) -{ - /* - * If there are parallel threads are doing PTE changes on same range - * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB - * flush by batching, a thread has stable TLB entry can fail to flush - * the TLB by observing pte_none|!pte_dirty, for example so flush TLB - * forcefully if we detect parallel PTE batching threads. - */ - bool force = mm_tlb_flush_nested(tlb->mm); - - arch_tlb_finish_mmu(tlb, start, end, force); - dec_tlb_flush_pending(tlb->mm); -} - /* * Note: this doesn't free the actual pages themselves. That * has been handled earlier when unmapping all the memory regions. diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c new file mode 100644 index 000000000000..57168caf6002 --- /dev/null +++ b/mm/mmu_gather.c @@ -0,0 +1,266 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#ifdef HAVE_GENERIC_MMU_GATHER + +static bool tlb_next_batch(struct mmu_gather *tlb) +{ + struct mmu_gather_batch *batch; + + batch = tlb->active; + if (batch->next) { + tlb->active = batch->next; + return true; + } + + if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) + return false; + + batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); + if (!batch) + return false; + + tlb->batch_count++; + batch->next = NULL; + batch->nr = 0; + batch->max = MAX_GATHER_BATCH; + + tlb->active->next = batch; + tlb->active = batch; + + return true; +} + +void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + tlb->mm = mm; + + /* Is it from 0 to ~0? */ + tlb->fullmm = !(start | (end+1)); + tlb->need_flush_all = 0; + tlb->local.next = NULL; + tlb->local.nr = 0; + tlb->local.max = ARRAY_SIZE(tlb->__pages); + tlb->active = &tlb->local; + tlb->batch_count = 0; + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + tlb->batch = NULL; +#endif + tlb->page_size = 0; + + __tlb_reset_range(tlb); +} + +void tlb_flush_mmu_free(struct mmu_gather *tlb) +{ + struct mmu_gather_batch *batch; + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + tlb_table_flush(tlb); +#endif + for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { + free_pages_and_swap_cache(batch->pages, batch->nr); + batch->nr = 0; + } + tlb->active = &tlb->local; +} + +void tlb_flush_mmu(struct mmu_gather *tlb) +{ + tlb_flush_mmu_tlbonly(tlb); + tlb_flush_mmu_free(tlb); +} + +/* tlb_finish_mmu + * Called at the end of the shootdown operation to free up any resources + * that were required. + */ +void arch_tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end, bool force) +{ + struct mmu_gather_batch *batch, *next; + + if (force) { + __tlb_reset_range(tlb); + __tlb_adjust_range(tlb, start, end - start); + } + + tlb_flush_mmu(tlb); + + /* keep the page table cache within bounds */ + check_pgt_cache(); + + for (batch = tlb->local.next; batch; batch = next) { + next = batch->next; + free_pages((unsigned long)batch, 0); + } + tlb->local.next = NULL; +} + +/* __tlb_remove_page + * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while + * handling the additional races in SMP caused by other CPUs caching valid + * mappings in their TLBs. Returns the number of free page slots left. + * When out of page slots we must call tlb_flush_mmu(). + *returns true if the caller should flush. + */ +bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) +{ + struct mmu_gather_batch *batch; + + VM_BUG_ON(!tlb->end); + VM_WARN_ON(tlb->page_size != page_size); + + batch = tlb->active; + /* + * Add the page and check if we are full. If so + * force a flush. + */ + batch->pages[batch->nr++] = page; + if (batch->nr == batch->max) { + if (!tlb_next_batch(tlb)) + return true; + batch = tlb->active; + } + VM_BUG_ON_PAGE(batch->nr > batch->max, page); + + return false; +} + +#endif /* HAVE_GENERIC_MMU_GATHER */ + +#ifdef CONFIG_HAVE_RCU_TABLE_FREE + +/* + * See the comment near struct mmu_table_batch. + */ + +/* + * If we want tlb_remove_table() to imply TLB invalidates. + */ +static inline void tlb_table_invalidate(struct mmu_gather *tlb) +{ +#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE + /* + * Invalidate page-table caches used by hardware walkers. Then we still + * need to RCU-sched wait while freeing the pages because software + * walkers can still be in-flight. + */ + tlb_flush_mmu_tlbonly(tlb); +#endif +} + +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +void tlb_remove_table_sync_one(void) +{ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); +} + +static void tlb_remove_table_one(void *table) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely on + * IRQ disabling. See the comment near struct mmu_table_batch. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + __tlb_remove_table(table); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + struct mmu_table_batch *batch; + int i; + + batch = container_of(head, struct mmu_table_batch, rcu); + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +void tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + tlb_table_invalidate(tlb); + call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + *batch = NULL; + } +} + +void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch == NULL) { + *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + tlb_table_invalidate(tlb); + tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + tlb_table_flush(tlb); +} + +#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ + +/** + * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down + * @tlb: the mmu_gather structure to initialize + * @mm: the mm_struct of the target address space + * @start: start of the region that will be removed from the page-table + * @end: end of the region that will be removed from the page-table + * + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. The @start and @end are set to 0 and -1 + * respectively when @mm is without users and we're going to destroy + * the full address space (exit/execve). + */ +void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + arch_tlb_gather_mmu(tlb, mm, start, end); + inc_tlb_flush_pending(tlb->mm); +} + +void tlb_finish_mmu(struct mmu_gather *tlb, + unsigned long start, unsigned long end) +{ + /* + * If there are parallel threads are doing PTE changes on same range + * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB + * flush by batching, a thread has stable TLB entry can fail to flush + * the TLB by observing pte_none|!pte_dirty, for example so flush TLB + * forcefully if we detect parallel PTE batching threads. + */ + bool force = mm_tlb_flush_nested(tlb->mm); + + arch_tlb_finish_mmu(tlb, start, end, force); + dec_tlb_flush_pending(tlb->mm); +} From 5d04f78905be005f795f6fb33c1a8763c86b7a55 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 21 Nov 2021 12:40:07 -0800 Subject: [PATCH 34/85] RELAND: hugetlbfs: flush TLBs correctly after huge_pmd_unshare commit a4a118f2eead1d6c49e00765de89878288d4b890 upstream. When __unmap_hugepage_range() calls to huge_pmd_unshare() succeed, a TLB flush is missing. This TLB flush must be performed before releasing the i_mmap_rwsem, in order to prevent an unshared PMDs page from being released and reused before the TLB flush took place. Arguably, a comprehensive solution would use mmu_gather interface to batch the TLB flushes and the PMDs page release, however it is not an easy solution: (1) try_to_unmap_one() and try_to_migrate_one() also call huge_pmd_unshare() and they cannot use the mmu_gather interface; and (2) deferring the release of the page reference for the PMDs page until after i_mmap_rwsem is dropeed can confuse huge_pmd_unshare() into thinking PMDs are shared when they are not. Fix __unmap_hugepage_range() by adding the missing TLB flush, and forcing a flush when unshare is successful. Fixes: 24669e58477e ("hugetlb: use mmu_gather instead of a temporary linked list for accumulating pages)" # 3.6 [Jebaitedneko: move tlb_flush_pmd_range() into mmu_gather.c] Change-Id: Ic0b2a2b47792a24ee2ea4112c34152b0d263009a Signed-off-by: Nadav Amit Reviewed-by: Mike Kravetz Cc: Aneesh Kumar K.V Cc: KAMEZAWA Hiroyuki Cc: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman Co-authored-by: Jebaitedneko --- arch/arm/include/asm/tlb.h | 8 ++++++++ arch/ia64/include/asm/tlb.h | 10 ++++++++++ arch/s390/include/asm/tlb.h | 14 ++++++++++++++ arch/sh/include/asm/tlb.h | 10 ++++++++++ arch/um/include/asm/tlb.h | 12 ++++++++++++ include/asm-generic/tlb.h | 2 ++ mm/hugetlb.c | 19 +++++++++++++++++++ mm/mmu_gather.c | 10 ++++++++++ 8 files changed, 85 insertions(+) diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index d5562f9ce600..fd9717e9fee3 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -280,6 +280,14 @@ tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr tlb_add_flush(tlb, addr); } +static inline void +tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, + unsigned long size) +{ + tlb_add_flush(tlb, address); + tlb_add_flush(tlb, address + size - PMD_SIZE); +} + #define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr) #define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr) #define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp) diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index 44f0ac0df308..26d49489498b 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -269,6 +269,16 @@ __tlb_remove_tlb_entry (struct mmu_gather *tlb, pte_t *ptep, unsigned long addre tlb->end_addr = address + PAGE_SIZE; } +static inline void +tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, + unsigned long size) +{ + if (tlb->start_addr > address) + tlb->start_addr = address; + if (tlb->end_addr < address + size) + tlb->end_addr = address + size; +} + #define tlb_migrate_finish(mm) platform_tlb_migrate_finish(mm) #define tlb_start_vma(tlb, vma) do { } while (0) diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 457b7ba0fbb6..c216a24ddff8 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -116,6 +116,20 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb, return tlb_remove_page(tlb, page); } +static inline void tlb_flush_pmd_range(struct mmu_gather *tlb, + unsigned long address, unsigned long size) +{ + /* + * the range might exceed the original range that was provided to + * tlb_gather_mmu(), so we need to update it despite the fact it is + * usually not updated. + */ + if (tlb->start > address) + tlb->start = address; + if (tlb->end < address + size) + tlb->end = address + size; +} + /* * pte_free_tlb frees a pte table and clears the CRSTE for the * page table from the tlb. diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 77abe192fb43..adcb0bfe238e 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -127,6 +127,16 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb, return tlb_remove_page(tlb, page); } +static inline void +tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, + unsigned long size) +{ + if (tlb->start > address) + tlb->start = address; + if (tlb->end < address + size) + tlb->end = address + size; +} + #define tlb_remove_check_page_size_change tlb_remove_check_page_size_change static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, unsigned int page_size) diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index dce6db147f24..02e61f6abfca 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -130,6 +130,18 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb, return tlb_remove_page(tlb, page); } +static inline void +tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, + unsigned long size) +{ + tlb->need_flush = 1; + + if (tlb->start > address) + tlb->start = address; + if (tlb->end < address + size) + tlb->end = address + size; +} + /** * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. * diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 3e8190bc5ae0..cf112bb7d529 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -144,6 +144,8 @@ void arch_tlb_gather_mmu(struct mmu_gather *tlb, void tlb_flush_mmu(struct mmu_gather *tlb); void arch_tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end, bool force); +void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, + unsigned long size); void tlb_flush_mmu_free(struct mmu_gather *tlb); extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 3f7b3f6d7d03..e3775096a338 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3385,6 +3385,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long sz = huge_page_size(h); unsigned long mmun_start = start; /* For mmu_notifiers */ unsigned long mmun_end = end; /* For mmu_notifiers */ + bool force_flush = false; WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~huge_page_mask(h)); @@ -3411,6 +3412,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, &address, ptep)) { spin_unlock(ptl); + tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE); + force_flush = true; continue; } @@ -3467,6 +3470,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, } mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); tlb_end_vma(tlb, vma); + + /* + * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We + * could defer the flush until now, since by holding i_mmap_rwsem we + * guaranteed that the last refernece would not be dropped. But we must + * do the flushing before we return, as otherwise i_mmap_rwsem will be + * dropped and the last reference to the shared PMDs page might be + * dropped as well. + * + * In theory we could defer the freeing of the PMD pages as well, but + * huge_pmd_unshare() relies on the exact page_count for the PMD page to + * detect sharing, so we cannot defer the release of the page either. + * Instead, do flush now. + */ + if (force_flush) + tlb_flush_mmu(tlb); } void __unmap_hugepage_range_final(struct mmu_gather *tlb, diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 57168caf6002..a44cf211ffee 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -139,6 +139,16 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_ return false; } +void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, + unsigned long size) +{ + if (tlb->page_size != 0 && tlb->page_size != PMD_SIZE) + tlb_flush_mmu(tlb); + + tlb->page_size = PMD_SIZE; + tlb->start = min(tlb->start, address); + tlb->end = max(tlb->end, address + size); +} #endif /* HAVE_GENERIC_MMU_GATHER */ #ifdef CONFIG_HAVE_RCU_TABLE_FREE From af36119f839635e74fd33f357f09d187fb864dc7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 3 Sep 2018 15:19:37 +0100 Subject: [PATCH 35/85] UPSTREAM: MAINTAINERS: Add entry for MMU GATHER AND TLB INVALIDATION We recently had to debug a TLB invalidation problem on the munmap() path, which was made more difficult than necessary because: (a) The MMU gather code had changed without people realising (b) Many people subtly misunderstood the operation of the MMU gather code and its interactions with RCU and arch-specific TLB invalidation (c) Untangling the intended behaviour involved educated guesswork and plenty of discussion Hopefully, we can avoid getting into this mess again by designating a cross-arch group of people to look after this code. It is not intended that they will have a separate tree, but they at least provide a point of contact for anybody working in this area and can co-ordinate any proposed future changes to the internal API. Cc: Peter Zijlstra Cc: Nicholas Piggin Cc: Linus Torvalds Cc: "Aneesh Kumar K.V" Cc: "Kirill A. Shutemov" Cc: Andrew Morton Cc: Michal Hocko Change-Id: Ie434451c6fea97908ce566d3ce5cf8976207d2fb Signed-off-by: Will Deacon Signed-off-by: Cyber Knight --- MAINTAINERS | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index a17554af111f..e7059ebbd85f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9057,6 +9057,19 @@ S: Maintained F: arch/arm/boot/dts/mmp* F: arch/arm/mach-mmp/ +MMU GATHER AND TLB INVALIDATION +M: Will Deacon +M: "Aneesh Kumar K.V" +M: Andrew Morton +M: Nick Piggin +M: Peter Zijlstra +L: linux-arch@vger.kernel.org +L: linux-mm@kvack.org +S: Maintained +F: arch/*/include/asm/tlb.h +F: include/asm-generic/tlb.h +F: mm/mmu_gather.c + MN88472 MEDIA DRIVER M: Antti Palosaari L: linux-media@vger.kernel.org From 0d93d4fbf1519a1d4b5bf67a119d9532e8937d0a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 22 Aug 2018 21:23:05 +0100 Subject: [PATCH 36/85] UPSTREAM: arm64: tlb: Use last-level invalidation in flush_tlb_kernel_range() flush_tlb_kernel_range() is only ever used to invalidate last-level entries, so we can restrict the scope of the TLB invalidation instruction. Acked-by: Peter Zijlstra (Intel) Change-Id: I1c7944e35ba4c39e0736419f8fc5fce37c1eebd8 Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Cyber Knight --- arch/arm64/include/asm/tlbflush.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index c008c38f799a..9e6382aadde1 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -199,7 +199,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end dsb(ishst); for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - __tlbi(vaae1is, addr); + __tlbi(vaale1is, addr); dsb(ish); isb(); } From a4e0fe14f170d0ab05fdf556ad68f26293d2e00c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 22 Aug 2018 21:40:30 +0100 Subject: [PATCH 37/85] UPSTREAM: arm64: tlb: Add DSB ISHST prior to TLBI in __flush_tlb_[kernel_]pgtable() __flush_tlb_[kernel_]pgtable() rely on set_pXd() having a DSB after writing the new table entry and therefore avoid the barrier prior to the TLBI instruction. In preparation for delaying our walk-cache invalidation on the unmap() path, move the DSB into the TLB invalidation routines. Acked-by: Peter Zijlstra (Intel) Change-Id: I7a8a259d78b6d4410c4a6e59b2f229dbd58244af Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Cyber Knight --- arch/arm64/include/asm/tlbflush.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 9e6382aadde1..549d824e44b5 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -213,6 +213,7 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm, { unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm)); + dsb(ishst); __tlbi(vae1is, addr); __tlbi_user(vae1is, addr); dsb(ish); @@ -222,6 +223,7 @@ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) { unsigned long addr = __TLBI_VADDR(kaddr, 0); + dsb(ishst); __tlbi(vaae1is, addr); dsb(ish); } From 74f612dc646ef245a38f0085ca4edff812d380fd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 22 Aug 2018 21:36:31 +0100 Subject: [PATCH 38/85] BACKPORT: arm64: pgtable: Implement p[mu]d_valid() and check in set_p[mu]d() Now that our walk-cache invalidation routines imply a DSB before the invalidation, we no longer need one when we are clearing an entry during unmap. Acked-by: Peter Zijlstra (Intel) Change-Id: Ib0ad415b232f766fb93455f39de5449f4bf45dfb Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas [cyberknight777: Backport to msm-4.14] Signed-off-by: Cyber Knight --- arch/arm64/include/asm/pgtable.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index f07b0517860d..0cf93a2faabe 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -381,6 +381,7 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) +#define pmd_valid(pmd) pte_valid(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) #define pmd_mkold(pmd) pte_pmd(pte_mkold(pmd_pte(pmd))) #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) @@ -459,8 +460,11 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) #else *pmdp = pmd; #endif - dsb(ishst); - isb(); + + if (pmd_valid(pmd)) { + dsb(ishst); + isb(); + } } static inline void pmd_clear(pmd_t *pmdp) @@ -512,6 +516,7 @@ static inline void pte_unmap(pte_t *pte) { } #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) #define pud_present(pud) pte_present(pud_pte(pud)) +#define pud_valid(pud) pte_valid(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { @@ -529,8 +534,11 @@ static inline void set_pud(pud_t *pudp, pud_t pud) #else *pudp = pud; #endif - dsb(ishst); - isb(); + + if (pud_valid(pud)) { + dsb(ishst); + isb(); + } } static inline void pud_clear(pud_t *pudp) From ab4a8660077ebc73083466874e48db7530dd8e1a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 19:08:15 +0100 Subject: [PATCH 39/85] UPSTREAM: arm64: tlb: Justify non-leaf invalidation in flush_tlb_range() Add a comment to explain why we can't get away with last-level invalidation in flush_tlb_range() Acked-by: Peter Zijlstra (Intel) Change-Id: I6e5251011b20a0270206b0cf50c34f991752792a Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Cyber Knight --- arch/arm64/include/asm/tlbflush.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 549d824e44b5..eee2bbd3c374 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -182,6 +182,10 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, static inline void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { + /* + * We cannot use leaf-only invalidation here, since we may be invalidating + * table entries as part of collapsing hugepages or moving page tables. + */ __flush_tlb_range(vma, start, end, false); } From 7e2c7b29afda9fba92924c80099ce6c6e86af72c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 19:26:21 +0100 Subject: [PATCH 40/85] UPSTREAM: arm64: tlbflush: Allow stride to be specified for __flush_tlb_range() When we are unmapping intermediate page-table entries or huge pages, we don't need to issue a TLBI instruction for every PAGE_SIZE chunk in the VA range being unmapped. Allow the invalidation stride to be passed to __flush_tlb_range(), and adjust our "just nuke the ASID" heuristic to take this into account. Acked-by: Peter Zijlstra (Intel) Change-Id: I75dd94e14ea9920b3500e8003cad2ee0a74bb05f Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Cyber Knight --- arch/arm64/include/asm/tlb.h | 2 +- arch/arm64/include/asm/tlbflush.h | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 9efe52747651..f7004bed997b 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -56,7 +56,7 @@ static inline void tlb_flush(struct mmu_gather *tlb) * the __(pte|pmd|pud)_free_tlb() functions, so last level * TLBI is sufficient here. */ - __flush_tlb_range(&vma, tlb->start, tlb->end, true); + __flush_tlb_range(&vma, tlb->start, tlb->end, PAGE_SIZE, true); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index eee2bbd3c374..a42be789aac6 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -149,25 +149,28 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) +#define MAX_TLBI_OPS 1024UL static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, - bool last_level) + unsigned long stride, bool last_level) { unsigned long asid = ASID(vma->vm_mm); unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * stride)) { flush_tlb_mm(vma->vm_mm); return; } + /* Convert the stride into units of 4k */ + stride >>= 12; + start = __TLBI_VADDR(start, asid); end = __TLBI_VADDR(end, asid); dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { + for (addr = start; addr < end; addr += stride) { if (last_level) { __tlbi(vale1is, addr); __tlbi_user(vale1is, addr); @@ -186,14 +189,14 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, * We cannot use leaf-only invalidation here, since we may be invalidating * table entries as part of collapsing hugepages or moving page tables. */ - __flush_tlb_range(vma, start, end, false); + __flush_tlb_range(vma, start, end, PAGE_SIZE, false); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; - if ((end - start) > MAX_TLB_RANGE) { + if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { flush_tlb_all(); return; } From c05b72865cea41621b998f7488c2c42dfb83272a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 19:48:44 +0100 Subject: [PATCH 41/85] UPSTREAM: arm64: tlb: Remove redundant !CONFIG_HAVE_RCU_TABLE_FREE code If there's one thing the RCU-based table freeing doesn't need, it's more ifdeffery. Remove the redundant !CONFIG_HAVE_RCU_TABLE_FREE code, since this option is unconditionally selected in our Kconfig. Acked-by: Peter Zijlstra (Intel) Change-Id: Ifbe6dc2d8ce9e7e0d17c1c594325b04c3d39ca95 Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Cyber Knight --- arch/arm64/include/asm/tlb.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index f7004bed997b..7affa4a4eb3b 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -25,16 +25,10 @@ #include #endif -#ifdef CONFIG_HAVE_RCU_TABLE_FREE - -#define tlb_remove_entry(tlb, entry) tlb_remove_table(tlb, entry) static inline void __tlb_remove_table(void *_table) { free_page_and_swap_cache((struct page *)_table); } -#else -#define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry) -#endif /* CONFIG_HAVE_RCU_TABLE_FREE */ static void tlb_flush(struct mmu_gather *tlb); @@ -64,7 +58,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, { __flush_tlb_pgtable(tlb->mm, addr); pgtable_page_dtor(pte); - tlb_remove_entry(tlb, pte); + tlb_remove_table(tlb, pte); } #if CONFIG_PGTABLE_LEVELS > 2 @@ -76,7 +70,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, rkp_ro_free((void *)pmdp); } else #endif - tlb_remove_entry(tlb, virt_to_page(pmdp)); + tlb_remove_table(tlb, virt_to_page(pmdp)); } #endif @@ -89,7 +83,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, rkp_ro_free((void *)pudp); else #endif - tlb_remove_entry(tlb, virt_to_page(pudp)); + tlb_remove_table(tlb, virt_to_page(pudp)); } #endif From ef0e7e7e3a267163a5a6e587191e5f572bf13714 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 21:08:31 +0100 Subject: [PATCH 42/85] BACKPORT: arm64: tlb: Adjust stride and type of TLBI according to mmu_gather Now that the core mmu_gather code keeps track of both the levels of page table cleared and also whether or not these entries correspond to intermediate entries, we can use this in our tlb_flush() callback to reduce the number of invalidations we issue as well as their scope. Acked-by: Peter Zijlstra (Intel) Change-Id: Ibe3adb99f9f7b64517c614fd08cf3fa5c034c7ee Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas [cyberknight777: Backport to msm-4.14] Signed-off-by: Cyber Knight --- arch/arm64/include/asm/tlb.h | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 7affa4a4eb3b..e333141b8a53 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -37,20 +37,21 @@ static void tlb_flush(struct mmu_gather *tlb); static inline void tlb_flush(struct mmu_gather *tlb) { struct vm_area_struct vma = { .vm_mm = tlb->mm, }; + bool last_level = !tlb->freed_tables; + unsigned long stride = tlb_get_unmap_size(tlb); /* - * The ASID allocator will either invalidate the ASID or mark - * it as used. + * If we're tearing down the address space then we only care about + * invalidating the walk-cache, since the ASID allocator won't + * reallocate our ASID without invalidating the entire TLB. */ - if (tlb->fullmm) + if (tlb->fullmm) { + if (!last_level) + flush_tlb_mm(tlb->mm); return; + } - /* - * The intermediate page table levels are already handled by - * the __(pte|pmd|pud)_free_tlb() functions, so last level - * TLBI is sufficient here. - */ - __flush_tlb_range(&vma, tlb->start, tlb->end, PAGE_SIZE, true); + __flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, From 1af59a7f9e8a634461dad153452f7b32dd7c65e2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Aug 2018 21:16:50 +0100 Subject: [PATCH 43/85] BACKPORT: arm64: tlb: Avoid synchronous TLBIs when freeing page tables By selecting HAVE_RCU_TABLE_INVALIDATE, we can rely on tlb_flush() being called if we fail to batch table pages for freeing. This in turn allows us to postpone walk-cache invalidation until tlb_finish_mmu(), which avoids lots of unnecessary DSBs and means we can shoot down the ASID if the range is large enough. Acked-by: Peter Zijlstra (Intel) Change-Id: Ie25f4be366f5a170adbb0e64c7d57ecc2b379a58 Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas [cyberknight777: Backport to msm-4.14] Signed-off-by: Cyber Knight --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/tlb.h | 1 - arch/arm64/include/asm/tlbflush.h | 11 ----------- 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 6212f843614c..460165e82a4f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -127,6 +127,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RCU_TABLE_FREE + select HAVE_RCU_TABLE_INVALIDATE select HAVE_SYSCALL_TRACEPOINTS select HAVE_KPROBES select HAVE_KRETPROBES diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index e333141b8a53..e09a2da6d6da 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -57,7 +57,6 @@ static inline void tlb_flush(struct mmu_gather *tlb) static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr) { - __flush_tlb_pgtable(tlb->mm, addr); pgtable_page_dtor(pte); tlb_remove_table(tlb, pte); } diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index a42be789aac6..44de513f998a 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -215,17 +215,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end * Used to invalidate the TLB (walk caches) corresponding to intermediate page * table levels (pgd/pud/pmd). */ -static inline void __flush_tlb_pgtable(struct mm_struct *mm, - unsigned long uaddr) -{ - unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm)); - - dsb(ishst); - __tlbi(vae1is, addr); - __tlbi_user(vae1is, addr); - dsb(ish); -} - static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) { unsigned long addr = __TLBI_VADDR(kaddr, 0); From c9df1b74dd38a2834a34913eafd5e21fe453728e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 28 Aug 2018 14:52:17 +0100 Subject: [PATCH 44/85] UPSTREAM: arm64: tlb: Rewrite stale comment in asm/tlbflush.h Peter Z asked me to justify the barrier usage in asm/tlbflush.h, but actually that whole block comment needs to be rewritten. Reported-by: Peter Zijlstra Acked-by: Peter Zijlstra (Intel) Change-Id: If49b019942043655d3ce72021e4daa66a82c60fb Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Cyber Knight --- arch/arm64/include/asm/tlbflush.h | 80 +++++++++++++++++++++---------- 1 file changed, 55 insertions(+), 25 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 44de513f998a..5dfd23897dea 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -70,43 +70,73 @@ }) /* - * TLB Management - * ============== + * TLB Invalidation + * ================ * - * The TLB specific code is expected to perform whatever tests it needs - * to determine if it should invalidate the TLB for each call. Start - * addresses are inclusive and end addresses are exclusive; it is safe to - * round these addresses down. + * This header file implements the low-level TLB invalidation routines + * (sometimes referred to as "flushing" in the kernel) for arm64. + * + * Every invalidation operation uses the following template: + * + * DSB ISHST // Ensure prior page-table updates have completed + * TLBI ... // Invalidate the TLB + * DSB ISH // Ensure the TLB invalidation has completed + * if (invalidated kernel mappings) + * ISB // Discard any instructions fetched from the old mapping + * + * + * The following functions form part of the "core" TLB invalidation API, + * as documented in Documentation/core-api/cachetlb.rst: * * flush_tlb_all() - * - * Invalidate the entire TLB. + * Invalidate the entire TLB (kernel + user) on all CPUs * * flush_tlb_mm(mm) + * Invalidate an entire user address space on all CPUs. + * The 'mm' argument identifies the ASID to invalidate. * - * Invalidate all TLB entries in a particular address space. - * - mm - mm_struct describing address space + * flush_tlb_range(vma, start, end) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * Note that this operation also invalidates any walk-cache + * entries associated with translations for the specified address + * range. * - * flush_tlb_range(mm,start,end) + * flush_tlb_kernel_range(start, end) + * Same as flush_tlb_range(..., start, end), but applies to + * kernel mappings rather than a particular user address space. + * Whilst not explicitly documented, this function is used when + * unmapping pages from vmalloc/io space. * - * Invalidate a range of TLB entries in the specified address - * space. - * - mm - mm_struct describing address space - * - start - start address (may not be aligned) - * - end - end address (exclusive, may not be aligned) + * flush_tlb_page(vma, addr) + * Invalidate a single user mapping for address 'addr' in the + * address space corresponding to 'vma->mm'. Note that this + * operation only invalidates a single, last-level page-table + * entry and therefore does not affect any walk-caches. * - * flush_tlb_page(vaddr,vma) * - * Invalidate the specified page in the specified address range. - * - vaddr - virtual address (may not be aligned) - * - vma - vma_struct describing address range + * Next, we have some undocumented invalidation routines that you probably + * don't want to call unless you know what you're doing: * - * flush_kern_tlb_page(kaddr) + * local_flush_tlb_all() + * Same as flush_tlb_all(), but only applies to the calling CPU. * - * Invalidate the TLB entry for the specified page. The address - * will be in the kernels virtual memory space. Current uses - * only require the D-TLB to be invalidated. - * - kaddr - Kernel virtual memory address + * __flush_tlb_kernel_pgtable(addr) + * Invalidate a single kernel mapping for address 'addr' on all + * CPUs, ensuring that any walk-cache entries associated with the + * translation are also invalidated. + * + * __flush_tlb_range(vma, start, end, stride, last_level) + * Invalidate the virtual-address range '[start, end)' on all + * CPUs for the user address space corresponding to 'vma->mm'. + * The invalidation operations are issued at a granularity + * determined by 'stride' and only affect any walk-cache entries + * if 'last_level' is equal to false. + * + * + * Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented + * on top of these routines, since that is our interface to the mmu_gather + * API as used by munmap() and friends. */ static inline void local_flush_tlb_all(void) { From 8a26f3eda67df47957a54c0a85da314869637eec Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Tue, 16 Jul 2019 03:39:42 +0900 Subject: [PATCH 45/85] Revert "drivers: GICv3: remove the rtb logs of gic write and read" This reverts commit 1bfe1dd120ce3cecf853a0ddbe34ed1c59cc006c. Signed-off-by: Park Ju Hyung --- arch/arm64/include/asm/arch_gicv3.h | 4 ++-- drivers/irqchip/irq-gic-v3.c | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index a658464763a8..475a88f0029d 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -120,8 +120,8 @@ static inline void gic_write_bpr1(u32 val) write_sysreg_s(val, SYS_ICC_BPR1_EL1); } -#define gic_read_typer(c) readq_relaxed_no_log(c) -#define gic_write_irouter(v, c) writeq_relaxed_no_log(v, c) +#define gic_read_typer(c) readq_relaxed(c) +#define gic_write_irouter(v, c) writeq_relaxed(v, c) #define gic_read_lpir(c) readq_relaxed(c) #define gic_write_lpir(v, c) writeq_relaxed(v, c) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index d904490fbbb5..cb3dbe33d934 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -116,7 +116,7 @@ static void gic_do_wait_for_rwp(void __iomem *base) { u32 count = 1000000; /* 1s! */ - while (readl_relaxed_no_log(base + GICD_CTLR) & GICD_CTLR_RWP) { + while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) { count--; if (!count) { pr_err_ratelimited("RWP timeout, gone fishing\n"); @@ -234,8 +234,7 @@ static int gic_peek_irq(struct irq_data *d, u32 offset) else base = gic_data.dist_base; - return !!(readl_relaxed_no_log - (base + offset + (gic_irq(d) / 32) * 4) & mask); + return !!(readl_relaxed(base + offset + (gic_irq(d) / 32) * 4) & mask); } static void gic_poke_irq(struct irq_data *d, u32 offset) From 3d43b88a930f2adba81a0f0d6d860c89d23ff6c7 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Tue, 16 Jul 2019 03:39:52 +0900 Subject: [PATCH 46/85] Revert "arm64: Prevent msm-rtb tracing in memcpy_{from,to}io and memset_io" This reverts commit 9bbe8bfbb6cc3f8c2758aa3e58577644655dfd51. Change-Id: Iecddbfc9a5e7f0449ee5837f9b6c70828ea26282 Signed-off-by: Park Ju Hyung --- arch/arm64/kernel/io.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/io.c b/arch/arm64/kernel/io.c index b08c4ebc7f0d..354be2a872ae 100644 --- a/arch/arm64/kernel/io.c +++ b/arch/arm64/kernel/io.c @@ -27,21 +27,21 @@ void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) { while (count && (!IS_ALIGNED((unsigned long)from, 8) || !IS_ALIGNED((unsigned long)to, 8))) { - *(u8 *)to = __raw_readb_no_log(from); + *(u8 *)to = __raw_readb(from); from++; to++; count--; } while (count >= 8) { - *(u64 *)to = __raw_readq_no_log(from); + *(u64 *)to = __raw_readq(from); from += 8; to += 8; count -= 8; } while (count) { - *(u8 *)to = __raw_readb_no_log(from); + *(u8 *)to = __raw_readb(from); from++; to++; count--; @@ -56,21 +56,21 @@ void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count) { while (count && (!IS_ALIGNED((unsigned long)to, 8) || !IS_ALIGNED((unsigned long)from, 8))) { - __raw_writeb_no_log(*(volatile u8 *)from, to); + __raw_writeb(*(volatile u8 *)from, to); from++; to++; count--; } while (count >= 8) { - __raw_writeq_no_log(*(volatile u64 *)from, to); + __raw_writeq(*(volatile u64 *)from, to); from += 8; to += 8; count -= 8; } while (count) { - __raw_writeb_no_log(*(volatile u8 *)from, to); + __raw_writeb(*(volatile u8 *)from, to); from++; to++; count--; @@ -90,19 +90,19 @@ void __memset_io(volatile void __iomem *dst, int c, size_t count) qc |= qc << 32; while (count && !IS_ALIGNED((unsigned long)dst, 8)) { - __raw_writeb_no_log(c, dst); + __raw_writeb(c, dst); dst++; count--; } while (count >= 8) { - __raw_writeq_no_log(qc, dst); + __raw_writeq(qc, dst); dst += 8; count -= 8; } while (count) { - __raw_writeb_no_log(c, dst); + __raw_writeb(c, dst); dst++; count--; } From a49d9eeed0c14d2ff5deed0c2843cad91d2f0836 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Tue, 16 Jul 2019 03:40:57 +0900 Subject: [PATCH 47/85] Revert "msm: redefine __raw_{read, write}v for RTB" This reverts commit f45fe19bc51e68d759c09a5d2f1f64aaaaa02aa6. Signed-off-by: Park Ju Hyung --- arch/arm64/include/asm/io.h | 102 ++++++------------------------------ lib/iomap.c | 23 +++----- 2 files changed, 22 insertions(+), 103 deletions(-) diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h index 29cb5f1e2e51..49bb9a020a09 100644 --- a/arch/arm64/include/asm/io.h +++ b/arch/arm64/include/asm/io.h @@ -30,35 +30,38 @@ #include #include #include -#include #include /* * Generic IO read/write. These perform native-endian accesses. - * that some architectures will want to re-define __raw_{read,write}w. */ -static inline void __raw_writeb_no_log(u8 val, volatile void __iomem *addr) +#define __raw_writeb __raw_writeb +static inline void __raw_writeb(u8 val, volatile void __iomem *addr) { asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr)); } -static inline void __raw_writew_no_log(u16 val, volatile void __iomem *addr) +#define __raw_writew __raw_writew +static inline void __raw_writew(u16 val, volatile void __iomem *addr) { asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr)); } -static inline void __raw_writel_no_log(u32 val, volatile void __iomem *addr) +#define __raw_writel __raw_writel +static inline void __raw_writel(u32 val, volatile void __iomem *addr) { asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr)); } -static inline void __raw_writeq_no_log(u64 val, volatile void __iomem *addr) +#define __raw_writeq __raw_writeq +static inline void __raw_writeq(u64 val, volatile void __iomem *addr) { asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr)); } -static inline u8 __raw_readb_no_log(const volatile void __iomem *addr) +#define __raw_readb __raw_readb +static inline u8 __raw_readb(const volatile void __iomem *addr) { u8 val; asm volatile(ALTERNATIVE("ldrb %w0, [%1]", @@ -68,7 +71,8 @@ static inline u8 __raw_readb_no_log(const volatile void __iomem *addr) return val; } -static inline u16 __raw_readw_no_log(const volatile void __iomem *addr) +#define __raw_readw __raw_readw +static inline u16 __raw_readw(const volatile void __iomem *addr) { u16 val; @@ -79,7 +83,8 @@ static inline u16 __raw_readw_no_log(const volatile void __iomem *addr) return val; } -static inline u32 __raw_readl_no_log(const volatile void __iomem *addr) +#define __raw_readl __raw_readl +static inline u32 __raw_readl(const volatile void __iomem *addr) { u32 val; asm volatile(ALTERNATIVE("ldr %w0, [%1]", @@ -89,7 +94,8 @@ static inline u32 __raw_readl_no_log(const volatile void __iomem *addr) return val; } -static inline u64 __raw_readq_no_log(const volatile void __iomem *addr) +#define __raw_readq __raw_readq +static inline u64 __raw_readq(const volatile void __iomem *addr) { u64 val; asm volatile(ALTERNATIVE("ldr %0, [%1]", @@ -99,48 +105,6 @@ static inline u64 __raw_readq_no_log(const volatile void __iomem *addr) return val; } -/* - * There may be cases when clients don't want to support or can't support the - * logging, The appropriate functions can be used but clinets should carefully - * consider why they can't support the logging - */ - -#define __raw_write_logged(v, a, _t) ({ \ - int _ret; \ - volatile void __iomem *_a = (a); \ - void *_addr = (void __force *)(_a); \ - _ret = uncached_logk(LOGK_WRITEL, _addr); \ - if (_ret) /* COFNIG_SEC_DEBUG */\ - ETB_WAYPOINT; \ - __raw_write##_t##_no_log((v), _a); \ - if (_ret) \ - LOG_BARRIER; \ - }) - -#define __raw_writeb(v, a) __raw_write_logged((v), a, b) -#define __raw_writew(v, a) __raw_write_logged((v), a, w) -#define __raw_writel(v, a) __raw_write_logged((v), a, l) -#define __raw_writeq(v, a) __raw_write_logged((v), a, q) - -#define __raw_read_logged(a, _l, _t) ({ \ - _t __a; \ - const volatile void __iomem *_a = (a); \ - void *_addr = (void __force *)(_a); \ - int _ret; \ - _ret = uncached_logk(LOGK_READL, _addr); \ - if (_ret) /* CONFIG_SEC_DEBUG */ \ - ETB_WAYPOINT; \ - __a = __raw_read##_l##_no_log(_a); \ - if (_ret) \ - LOG_BARRIER; \ - __a; \ - }) - -#define __raw_readb(a) __raw_read_logged((a), b, u8) -#define __raw_readw(a) __raw_read_logged((a), w, u16) -#define __raw_readl(a) __raw_read_logged((a), l, u32) -#define __raw_readq(a) __raw_read_logged((a), q, u64) - /* IO barriers */ #define __iormb(v) \ ({ \ @@ -178,22 +142,6 @@ static inline u64 __raw_readq_no_log(const volatile void __iomem *addr) #define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c))) #define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c))) -#define readb_relaxed_no_log(c) ({ u8 __v = __raw_readb_no_log(c); __v; }) -#define readw_relaxed_no_log(c) \ - ({ u16 __v = le16_to_cpu((__force __le16)__raw_readw_no_log(c)); __v; }) -#define readl_relaxed_no_log(c) \ - ({ u32 __v = le32_to_cpu((__force __le32)__raw_readl_no_log(c)); __v; }) -#define readq_relaxed_no_log(c) \ - ({ u64 __v = le64_to_cpu((__force __le64)__raw_readq_no_log(c)); __v; }) - -#define writeb_relaxed_no_log(v, c) ((void)__raw_writeb_no_log((v), (c))) -#define writew_relaxed_no_log(v, c) \ - ((void)__raw_writew_no_log((__force u16)cpu_to_le32(v), (c))) -#define writel_relaxed_no_log(v, c) \ - ((void)__raw_writel_no_log((__force u32)cpu_to_le32(v), (c))) -#define writeq_relaxed_no_log(v, c) \ - ((void)__raw_writeq_no_log((__force u64)cpu_to_le32(v), (c))) - /* * I/O memory access primitives. Reads are ordered relative to any * following Normal memory access. Writes are ordered relative to any prior @@ -209,24 +157,6 @@ static inline u64 __raw_readq_no_log(const volatile void __iomem *addr) #define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c)); }) #define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c)); }) -#define readb_no_log(c) \ - ({ u8 __v = readb_relaxed_no_log(c); __iormb(__v); __v; }) -#define readw_no_log(c) \ - ({ u16 __v = readw_relaxed_no_log(c); __iormb(__v); __v; }) -#define readl_no_log(c) \ - ({ u32 __v = readl_relaxed_no_log(c); __iormb(__v); __v; }) -#define readq_no_log(c) \ - ({ u64 __v = readq_relaxed_no_log(c); __iormb(__v); __v; }) - -#define writeb_no_log(v, c) \ - ({ __iowmb(); writeb_relaxed_no_log((v), (c)); }) -#define writew_no_log(v, c) \ - ({ __iowmb(); writew_relaxed_no_log((v), (c)); }) -#define writel_no_log(v, c) \ - ({ __iowmb(); writel_relaxed_no_log((v), (c)); }) -#define writeq_no_log(v, c) \ - ({ __iowmb(); writeq_relaxed_no_log((v), (c)); }) - /* * I/O port access primitives. */ diff --git a/lib/iomap.c b/lib/iomap.c index f45ea96c77a2..541d926da95e 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -6,7 +6,6 @@ */ #include #include -#include #include @@ -72,31 +71,26 @@ static void bad_io_access(unsigned long port, const char *access) unsigned int ioread8(void __iomem *addr) { - uncached_logk_pc(LOGK_READL, __builtin_return_address(0), addr); - IO_COND(addr, return inb(port), return readb_no_log(addr)); + IO_COND(addr, return inb(port), return readb(addr)); return 0xff; } unsigned int ioread16(void __iomem *addr) { - uncached_logk_pc(LOGK_READL, __builtin_return_address(0), addr); - IO_COND(addr, return inw(port), return readw_no_log(addr)); + IO_COND(addr, return inw(port), return readw(addr)); return 0xffff; } unsigned int ioread16be(void __iomem *addr) { - uncached_logk_pc(LOGK_READL, __builtin_return_address(0), addr); IO_COND(addr, return pio_read16be(port), return mmio_read16be(addr)); return 0xffff; } unsigned int ioread32(void __iomem *addr) { - uncached_logk_pc(LOGK_READL, __builtin_return_address(0), addr); - IO_COND(addr, return inl(port), return readl_no_log(addr)); + IO_COND(addr, return inl(port), return readl(addr)); return 0xffffffff; } unsigned int ioread32be(void __iomem *addr) { - uncached_logk_pc(LOGK_READL, __builtin_return_address(0), addr); IO_COND(addr, return pio_read32be(port), return mmio_read32be(addr)); return 0xffffffff; } @@ -118,27 +112,22 @@ EXPORT_SYMBOL(ioread32be); void iowrite8(u8 val, void __iomem *addr) { - uncached_logk_pc(LOGK_WRITEL, __builtin_return_address(0), addr); - IO_COND(addr, outb(val, port), writeb_no_log(val, addr)); + IO_COND(addr, outb(val,port), writeb(val, addr)); } void iowrite16(u16 val, void __iomem *addr) { - uncached_logk_pc(LOGK_WRITEL, __builtin_return_address(0), addr); - IO_COND(addr, outw(val, port), writew_no_log(val, addr)); + IO_COND(addr, outw(val,port), writew(val, addr)); } void iowrite16be(u16 val, void __iomem *addr) { - uncached_logk_pc(LOGK_WRITEL, __builtin_return_address(0), addr); IO_COND(addr, pio_write16be(val,port), mmio_write16be(val, addr)); } void iowrite32(u32 val, void __iomem *addr) { - uncached_logk_pc(LOGK_WRITEL, __builtin_return_address(0), addr); - IO_COND(addr, outl(val, port), writel_no_log(val, addr)); + IO_COND(addr, outl(val,port), writel(val, addr)); } void iowrite32be(u32 val, void __iomem *addr) { - uncached_logk_pc(LOGK_WRITEL, __builtin_return_address(0), addr); IO_COND(addr, pio_write32be(val,port), mmio_write32be(val, addr)); } EXPORT_SYMBOL(iowrite8); From 1ed9fa92d9f77aa04c77ff2ca400a578131010be Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Tue, 16 Jul 2019 03:41:30 +0900 Subject: [PATCH 48/85] Revert "trace: rtb: add msm_rtb register tracing feature snapshot" This reverts commit 122e0ddaadac64b56b5d118b0aed16edffc09796. Signed-off-by: Park Ju Hyung --- kernel/trace/Kconfig | 18 ------------------ kernel/trace/Makefile | 1 - 2 files changed, 19 deletions(-) diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index aaffbfba4a33..7bb797a19230 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -96,24 +96,6 @@ config IPC_LOGGING If in doubt, say no. -config QCOM_RTB - bool "Register tracing" - help - Enable the kernel to trace every kernel function. This is done - Add support for logging different events to a small uncached - region. This is designed to aid in debugging reset cases where the - caches may not be flushed before the target resets. - -config QCOM_RTB_SEPARATE_CPUS - bool "Separate entries for each cpu" - depends on QCOM_RTB - depends on SMP - help - Under some circumstances, it may be beneficial to give dedicated space - for each cpu to log accesses. Selecting this option will log each cpu - separately. This will guarantee that the last acesses for each cpu - will be logged but there will be fewer entries per cpu - # All tracer options should select GENERIC_TRACER. For those options that are # enabled by all tracers (context switch and event tracer) they select TRACING. # This allows those options to appear when no other tracer is selected. But the diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 6883b7951cb8..f3ff62da1f7e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -73,7 +73,6 @@ obj-$(CONFIG_GPU_TRACEPOINTS) += gpu-traces.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o -obj-$(CONFIG_QCOM_RTB) += msm_rtb.o obj-$(CONFIG_IPC_LOGGING) += ipc_logging.o ifdef CONFIG_DEBUG_FS obj-$(CONFIG_IPC_LOGGING) += ipc_logging_debug.o From 286b7e43c53f51c694e49b6487d07b4b05438c2f Mon Sep 17 00:00:00 2001 From: idkwhoiam322 Date: Thu, 23 Jan 2020 05:16:56 +0530 Subject: [PATCH 49/85] Revert "ARM: gic: Add support for logging interrupts in RTB" This reverts commit b6f137ab06e63f2323d102c562a7381000a76bd2. --- drivers/irqchip/irq-gic.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 43ebc5026ebf..967297647bca 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -41,7 +41,6 @@ #include #include #include -#include #ifdef CONFIG_PM #include #endif @@ -506,7 +505,6 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI); isb(); handle_domain_irq(gic->domain, irqnr, regs); - uncached_logk(LOGK_IRQ, (void *)(uintptr_t)irqnr); continue; } if (irqnr < 16) { @@ -524,7 +522,6 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) smp_rmb(); handle_IPI(irqnr, regs); #endif - uncached_logk(LOGK_IRQ, (void *)(uintptr_t)irqnr); continue; } break; From 81a3ee23f604d7e6e54ecb02fce96cc73c707760 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Tue, 16 Jul 2019 03:42:50 +0900 Subject: [PATCH 50/85] Revert "ARM: gic-v3: Log the IRQs in RTB before handling an IRQ" This reverts commit 5f0823d3f6524e9531be2d3cf1f11d7e2e68946d. Signed-off-by: Park Ju Hyung --- drivers/irqchip/irq-gic-v3.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index cb3dbe33d934..5ddbe25e9974 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -578,7 +577,6 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) { int err; - uncached_logk(LOGK_IRQ, (void *)(uintptr_t)irqnr); if (static_key_true(&supports_deactivate)) gic_write_eoir(irqnr); else @@ -599,7 +597,6 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs continue; } if (irqnr < 16) { - uncached_logk(LOGK_IRQ, (void *)(uintptr_t)irqnr); gic_write_eoir(irqnr); if (static_key_true(&supports_deactivate)) gic_write_dir(irqnr); From 419d15dca17e5a0439f7ff0202205b909a3b94af Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Tue, 16 Jul 2019 03:43:12 +0900 Subject: [PATCH 51/85] Revert "sched: move logging process id in the rtb to sched" This reverts commit d21bdd9c8802b8227a815ca0df7ceb2e6da7037d. Signed-off-by: Park Ju Hyung --- arch/arm64/include/asm/mmu_context.h | 1 + kernel/sched/core.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 014d8d07dc81..58a0930b80f6 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -46,6 +46,7 @@ static inline void contextidr_thread_switch(struct task_struct *next) write_sysreg(pid, contextidr_el1); isb(); + uncached_logk(LOGK_CTXID, (void *)(u64)pid); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c85b870325c2..4a80d3a3ea69 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -34,7 +34,6 @@ #include #include -#include #include #ifdef CONFIG_PARAVIRT #include @@ -2947,7 +2946,7 @@ context_switch(struct rq *rq, struct task_struct *prev, */ rq_unpin_lock(rq, rf); spin_release(&rq->lock.dep_map, 1, _THIS_IP_); - uncached_logk(LOGK_CTXID, (void *)(u64)next->pid); + /* Here we just switch the register state and the stack. */ switch_to(prev, next, prev); barrier(); From 3e359209845e8e25c45246642c1f261f4dc68357 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Tue, 16 Jul 2019 03:43:32 +0900 Subject: [PATCH 52/85] Revert "arm64: mm: Log the process id in the rtb" This reverts commit c3c9f7649543536205e87d848b143fef4629be73. Signed-off-by: Park Ju Hyung --- arch/arm64/include/asm/mmu_context.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 58a0930b80f6..ecb5a1115455 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -34,20 +34,14 @@ #include #include #include -#include static inline void contextidr_thread_switch(struct task_struct *next) { - pid_t pid = task_pid_nr(next); - if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR)) return; - write_sysreg(pid, contextidr_el1); + write_sysreg(task_pid_nr(next), contextidr_el1); isb(); - - uncached_logk(LOGK_CTXID, (void *)(u64)pid); - } /* From 8d8a89145bcf2162d2bab681911b6a4f5a8d4163 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Tue, 16 Jul 2019 03:48:15 +0900 Subject: [PATCH 53/85] Revert "ARM: msm: add support for logged IO accessors" This reverts commit 7a0322f8a16bfea525e37d094b6effe45aa20316. Signed-off-by: Park Ju Hyung --- arch/arm/include/asm/io.h | 104 +++++++------------------------------- 1 file changed, 18 insertions(+), 86 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index c61b1db9e4c9..6527a81d30b5 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -28,7 +28,6 @@ #include #include #include -#include #include /* @@ -62,24 +61,23 @@ void __raw_readsl(const volatile void __iomem *addr, void *data, int longlen); * the bus. Rather than special-case the machine, just let the compiler * generate the access for CPUs prior to ARMv6. */ -#define __raw_readw_no_log(a) (__chk_io_ptr(a), \ - *(volatile unsigned short __force *)(a)) -#define __raw_writew_no_log(v, a) ((void)(__chk_io_ptr(a), \ - *(volatile unsigned short __force *)\ - (a) = (v))) +#define __raw_readw(a) (__chk_io_ptr(a), *(volatile unsigned short __force *)(a)) +#define __raw_writew(v,a) ((void)(__chk_io_ptr(a), *(volatile unsigned short __force *)(a) = (v))) #else /* * When running under a hypervisor, we want to avoid I/O accesses with * writeback addressing modes as these incur a significant performance * overhead (the address generation must be emulated in software). */ -static inline void __raw_writew_no_log(u16 val, volatile void __iomem *addr) +#define __raw_writew __raw_writew +static inline void __raw_writew(u16 val, volatile void __iomem *addr) { asm volatile("strh %1, %0" : : "Q" (*(volatile u16 __force *)addr), "r" (val)); } -static inline u16 __raw_readw_no_log(const volatile void __iomem *addr) +#define __raw_readw __raw_readw +static inline u16 __raw_readw(const volatile void __iomem *addr) { u16 val; asm volatile("ldrh %0, %1" @@ -89,19 +87,22 @@ static inline u16 __raw_readw_no_log(const volatile void __iomem *addr) } #endif -static inline void __raw_writeb_no_log(u8 val, volatile void __iomem *addr) +#define __raw_writeb __raw_writeb +static inline void __raw_writeb(u8 val, volatile void __iomem *addr) { asm volatile("strb %1, %0" : : "Qo" (*(volatile u8 __force *)addr), "r" (val)); } -static inline void __raw_writel_no_log(u32 val, volatile void __iomem *addr) +#define __raw_writel __raw_writel +static inline void __raw_writel(u32 val, volatile void __iomem *addr) { asm volatile("str %1, %0" : : "Qo" (*(volatile u32 __force *)addr), "r" (val)); } -static inline void __raw_writeq_no_log(u64 val, volatile void __iomem *addr) +#define __raw_writeq __raw_writeq +static inline void __raw_writeq(u64 val, volatile void __iomem *addr) { register u64 v asm ("r2"); @@ -112,7 +113,8 @@ static inline void __raw_writeq_no_log(u64 val, volatile void __iomem *addr) : "r" (v)); } -static inline u8 __raw_readb_no_log(const volatile void __iomem *addr) +#define __raw_readb __raw_readb +static inline u8 __raw_readb(const volatile void __iomem *addr) { u8 val; asm volatile("ldrb %0, %1" @@ -121,7 +123,8 @@ static inline u8 __raw_readb_no_log(const volatile void __iomem *addr) return val; } -static inline u32 __raw_readl_no_log(const volatile void __iomem *addr) +#define __raw_readl __raw_readl +static inline u32 __raw_readl(const volatile void __iomem *addr) { u32 val; asm volatile("ldr %0, %1" @@ -130,7 +133,8 @@ static inline u32 __raw_readl_no_log(const volatile void __iomem *addr) return val; } -static inline u64 __raw_readq_no_log(const volatile void __iomem *addr) +#define __raw_readq __raw_readq +static inline u64 __raw_readq(const volatile void __iomem *addr) { register u64 val asm ("r2"); @@ -140,48 +144,6 @@ static inline u64 __raw_readq_no_log(const volatile void __iomem *addr) return val; } -/* - * There may be cases when clients don't want to support or can't support the - * logging. The appropriate functions can be used but clients should carefully - * consider why they can't support the logging. - */ - -#define __raw_write_logged(v, a, _t) ({ \ - int _ret; \ - volatile void __iomem *_a = (a); \ - void *_addr = (void __force *)(_a); \ - _ret = uncached_logk(LOGK_WRITEL, _addr); \ - ETB_WAYPOINT; \ - __raw_write##_t##_no_log((v), _a); \ - if (_ret) \ - LOG_BARRIER; \ - }) - - -#define __raw_writeb(v, a) __raw_write_logged((v), (a), b) -#define __raw_writew(v, a) __raw_write_logged((v), (a), w) -#define __raw_writel(v, a) __raw_write_logged((v), (a), l) -#define __raw_writeq(v, a) __raw_write_logged((v), (a), q) - -#define __raw_read_logged(a, _l, _t) ({ \ - unsigned _t __a; \ - const volatile void __iomem *_a = (a); \ - void *_addr = (void __force *)(_a); \ - int _ret; \ - _ret = uncached_logk(LOGK_READL, _addr); \ - ETB_WAYPOINT; \ - __a = __raw_read##_l##_no_log(_a);\ - if (_ret) \ - LOG_BARRIER; \ - __a; \ - }) - - -#define __raw_readb(a) __raw_read_logged((a), b, char) -#define __raw_readw(a) __raw_read_logged((a), w, short) -#define __raw_readl(a) __raw_read_logged((a), l, int) -#define __raw_readq(a) __raw_read_logged((a), q, long long) - /* * Architecture ioremap implementation. */ @@ -363,24 +325,12 @@ extern void _memset_io(volatile void __iomem *, int, size_t); __raw_readl(c)); __r; }) #define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64) \ __raw_readq(c)); __r; }) -#define readb_relaxed_no_log(c) ({ u8 __r = __raw_readb_no_log(c); __r; }) -#define readl_relaxed_no_log(c) ({ u32 __r = le32_to_cpu((__force __le32) \ - __raw_readl_no_log(c)); __r; }) -#define readq_relaxed_no_log(c) ({ u64 __r = le64_to_cpu((__force __le64) \ - __raw_readq_no_log(c)); __r; }) #define writeb_relaxed(v, c) __raw_writeb(v, c) #define writew_relaxed(v, c) __raw_writew((__force u16) cpu_to_le16(v), c) #define writel_relaxed(v, c) __raw_writel((__force u32) cpu_to_le32(v), c) #define writeq_relaxed(v, c) __raw_writeq((__force u64) cpu_to_le64(v), c) -#define writeb_relaxed_no_log(v, c) ((void)__raw_writeb_no_log((v), (c))) -#define writew_relaxed_no_log(v, c) __raw_writew_no_log((__force u16) \ - cpu_to_le16(v), c) -#define writel_relaxed_no_log(v, c) __raw_writel_no_log((__force u32) \ - cpu_to_le32(v), c) -#define writeq_relaxed_no_log(v, c) __raw_writeq_no_log((__force u64) \ - cpu_to_le64(v), c) #define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; }) #define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; }) @@ -401,24 +351,6 @@ extern void _memset_io(volatile void __iomem *, int, size_t); #define writesw(p,d,l) __raw_writesw(p,d,l) #define writesl(p,d,l) __raw_writesl(p,d,l) -#define readb_no_log(c) \ - ({ u8 __v = readb_relaxed_no_log(c); __iormb(); __v; }) -#define readw_no_log(c) \ - ({ u16 __v = readw_relaxed_no_log(c); __iormb(); __v; }) -#define readl_no_log(c) \ - ({ u32 __v = readl_relaxed_no_log(c); __iormb(); __v; }) -#define readq_no_log(c) \ - ({ u64 __v = readq_relaxed_no_log(c); __iormb(); __v; }) - -#define writeb_no_log(v, c) \ - ({ __iowmb(); writeb_relaxed_no_log((v), (c)); }) -#define writew_no_log(v, c) \ - ({ __iowmb(); writew_relaxed_no_log((v), (c)); }) -#define writel_no_log(v, c) \ - ({ __iowmb(); writel_relaxed_no_log((v), (c)); }) -#define writeq_no_log(v, c) \ - ({ __iowmb(); writeq_relaxed_no_log((v), (c)); }) - #ifndef __ARMBE__ static inline void memset_io(volatile void __iomem *dst, unsigned c, size_t count) From ccf94d3627cd74bd7bf5b89f6c00f9a8386ee213 Mon Sep 17 00:00:00 2001 From: Park Ju Hyung Date: Tue, 19 Mar 2019 14:27:01 +0900 Subject: [PATCH 54/85] treewide: remove remaining _no_log() usage sed -i -e 's/_no_log//g' $(git grep -l _no_log | tr '\n' ' ') and manually fix drivers/clk/qcom/clk-cpu-osm.c. Signed-off-by: Park Ju Hyung --- arch/arm/kernel/io.c | 18 +++++------ drivers/bus/mhi/core/mhi_internal.h | 2 +- drivers/bus/mhi/core/mhi_main.c | 4 +-- drivers/clk/qcom/clk-cpu-osm.c | 9 ++---- drivers/clocksource/arm_arch_timer.c | 30 +++++++++---------- drivers/hwtracing/coresight/coresight-ost.c | 10 +++---- .../platform/msm/ais/cam_utils/cam_io_util.c | 4 +-- .../msm/camera/cam_utils/cam_io_util.c | 4 +-- .../platform/msm/sde/rotator/sde_rotator_r3.c | 20 ++++++------- drivers/platform/msm/qcom-geni-se.c | 4 +-- drivers/soc/qcom/boot_stats.c | 4 +-- drivers/soc/qcom/dcc_v2.c | 6 ++-- drivers/soc/qcom/jtagv8-etm.c | 8 ++--- drivers/thermal/tsens2xxx.c | 10 +++---- 14 files changed, 64 insertions(+), 69 deletions(-) diff --git a/arch/arm/kernel/io.c b/arch/arm/kernel/io.c index a20e48c50d85..c894fdddf592 100644 --- a/arch/arm/kernel/io.c +++ b/arch/arm/kernel/io.c @@ -46,21 +46,21 @@ EXPORT_SYMBOL(atomic_io_modify); void _memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) { while (count && (!IO_CHECK_ALIGN(from, 8) || !IO_CHECK_ALIGN(to, 8))) { - *(u8 *)to = readb_relaxed_no_log(from); + *(u8 *)to = readb_relaxed(from); from++; to++; count--; } while (count >= 8) { - *(u64 *)to = readq_relaxed_no_log(from); + *(u64 *)to = readq_relaxed(from); from += 8; to += 8; count -= 8; } while (count) { - *(u8 *)to = readb_relaxed_no_log(from); + *(u8 *)to = readb_relaxed(from); from++; to++; count--; @@ -76,21 +76,21 @@ void _memcpy_toio(volatile void __iomem *to, const void *from, size_t count) void *p = (void __force *)to; while (count && (!IO_CHECK_ALIGN(p, 8) || !IO_CHECK_ALIGN(from, 8))) { - writeb_relaxed_no_log(*(volatile u8 *)from, p); + writeb_relaxed(*(volatile u8 *)from, p); from++; p++; count--; } while (count >= 8) { - writeq_relaxed_no_log(*(volatile u64 *)from, p); + writeq_relaxed(*(volatile u64 *)from, p); from += 8; p += 8; count -= 8; } while (count) { - writeb_relaxed_no_log(*(volatile u8 *)from, p); + writeb_relaxed(*(volatile u8 *)from, p); from++; p++; count--; @@ -111,19 +111,19 @@ void _memset_io(volatile void __iomem *dst, int c, size_t count) qc |= qc << 32; while (count && !IO_CHECK_ALIGN(p, 8)) { - writeb_relaxed_no_log(c, p); + writeb_relaxed(c, p); p++; count--; } while (count >= 8) { - writeq_relaxed_no_log(qc, p); + writeq_relaxed(qc, p); p += 8; count -= 8; } while (count) { - writeb_relaxed_no_log(c, p); + writeb_relaxed(c, p); p++; count--; } diff --git a/drivers/bus/mhi/core/mhi_internal.h b/drivers/bus/mhi/core/mhi_internal.h index 9743cf0043df..c568878c59d9 100644 --- a/drivers/bus/mhi/core/mhi_internal.h +++ b/drivers/bus/mhi/core/mhi_internal.h @@ -861,7 +861,7 @@ static inline void mhi_timesync_log(struct mhi_controller *mhi_cntrl) if (mhi_tsync && mhi_cntrl->tsync_log) mhi_cntrl->tsync_log(mhi_cntrl, - readq_no_log(mhi_tsync->time_reg)); + readq(mhi_tsync->time_reg)); } /* memory allocation methods */ diff --git a/drivers/bus/mhi/core/mhi_main.c b/drivers/bus/mhi/core/mhi_main.c index 3a4c2a47190c..127b91607cdd 100644 --- a/drivers/bus/mhi/core/mhi_main.c +++ b/drivers/bus/mhi/core/mhi_main.c @@ -2621,7 +2621,7 @@ int mhi_get_remote_time_sync(struct mhi_device *mhi_dev, local_irq_disable(); *t_host = mhi_cntrl->time_get(mhi_cntrl, mhi_cntrl->priv_data); - *t_dev = readq_relaxed_no_log(mhi_tsync->time_reg); + *t_dev = readq_relaxed(mhi_tsync->time_reg); local_irq_enable(); preempt_enable(); @@ -2726,7 +2726,7 @@ int mhi_get_remote_time(struct mhi_device *mhi_dev, mhi_tsync->local_time = mhi_cntrl->time_get(mhi_cntrl, mhi_cntrl->priv_data); - writel_relaxed_no_log(mhi_tsync->int_sequence, mhi_cntrl->tsync_db); + writel_relaxed(mhi_tsync->int_sequence, mhi_cntrl->tsync_db); /* write must go thru immediately */ wmb(); diff --git a/drivers/clk/qcom/clk-cpu-osm.c b/drivers/clk/qcom/clk-cpu-osm.c index fb3b748792d3..fa3b540c8975 100644 --- a/drivers/clk/qcom/clk-cpu-osm.c +++ b/drivers/clk/qcom/clk-cpu-osm.c @@ -110,14 +110,9 @@ static inline int clk_osm_read_reg(struct clk_osm *c, u32 offset) return readl_relaxed(c->vbase + offset); } -static inline int clk_osm_read_reg_no_log(struct clk_osm *c, u32 offset) -{ - return readl_relaxed_no_log(c->vbase + offset); -} - static inline int clk_osm_mb(struct clk_osm *c) { - return readl_relaxed_no_log(c->vbase + ENABLE_REG); + return readl_relaxed(c->vbase + ENABLE_REG); } static long clk_osm_list_rate(struct clk_hw *hw, unsigned int n, @@ -924,7 +919,7 @@ static u64 clk_osm_get_cpu_cycle_counter(int cpu) * core DCVS is disabled. */ core_num = parent->per_core_dcvs ? c->core_num : 0; - val = clk_osm_read_reg_no_log(parent, + val = clk_osm_read_reg(parent, OSM_CYCLE_COUNTER_STATUS_REG(core_num)); if (val < c->prev_cycle_counter) { diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index f7791e2c3b5a..2301b0327836 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -100,20 +100,20 @@ void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val, struct arch_timer *timer = to_arch_timer(clk); switch (reg) { case ARCH_TIMER_REG_CTRL: - writel_relaxed_no_log(val, timer->base + CNTP_CTL); + writel_relaxed(val, timer->base + CNTP_CTL); break; case ARCH_TIMER_REG_TVAL: - writel_relaxed_no_log(val, timer->base + CNTP_TVAL); + writel_relaxed(val, timer->base + CNTP_TVAL); break; } } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { struct arch_timer *timer = to_arch_timer(clk); switch (reg) { case ARCH_TIMER_REG_CTRL: - writel_relaxed_no_log(val, timer->base + CNTV_CTL); + writel_relaxed(val, timer->base + CNTV_CTL); break; case ARCH_TIMER_REG_TVAL: - writel_relaxed_no_log(val, timer->base + CNTV_TVAL); + writel_relaxed(val, timer->base + CNTV_TVAL); break; } } else { @@ -131,20 +131,20 @@ u32 arch_timer_reg_read(int access, enum arch_timer_reg reg, struct arch_timer *timer = to_arch_timer(clk); switch (reg) { case ARCH_TIMER_REG_CTRL: - val = readl_relaxed_no_log(timer->base + CNTP_CTL); + val = readl_relaxed(timer->base + CNTP_CTL); break; case ARCH_TIMER_REG_TVAL: - val = readl_relaxed_no_log(timer->base + CNTP_TVAL); + val = readl_relaxed(timer->base + CNTP_TVAL); break; } } else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) { struct arch_timer *timer = to_arch_timer(clk); switch (reg) { case ARCH_TIMER_REG_CTRL: - val = readl_relaxed_no_log(timer->base + CNTV_CTL); + val = readl_relaxed(timer->base + CNTV_CTL); break; case ARCH_TIMER_REG_TVAL: - val = readl_relaxed_no_log(timer->base + CNTV_TVAL); + val = readl_relaxed(timer->base + CNTV_TVAL); break; } } else { @@ -900,11 +900,11 @@ void arch_timer_mem_get_cval(u32 *lo, u32 *hi) if (!arch_counter_base) return; - ctrl = readl_relaxed_no_log(arch_counter_base + CNTV_CTL); + ctrl = readl_relaxed(arch_counter_base + CNTV_CTL); if (ctrl & ARCH_TIMER_CTRL_ENABLE) { - *lo = readl_relaxed_no_log(arch_counter_base + CNTCVAL_LO); - *hi = readl_relaxed_no_log(arch_counter_base + CNTCVAL_HI); + *lo = readl_relaxed(arch_counter_base + CNTCVAL_LO); + *hi = readl_relaxed(arch_counter_base + CNTCVAL_HI); } } @@ -913,9 +913,9 @@ static u64 arch_counter_get_cntvct_mem(void) u32 vct_lo, vct_hi, tmp_hi; do { - vct_hi = readl_relaxed_no_log(arch_counter_base + CNTVCT_HI); - vct_lo = readl_relaxed_no_log(arch_counter_base + CNTVCT_LO); - tmp_hi = readl_relaxed_no_log(arch_counter_base + CNTVCT_HI); + vct_hi = readl_relaxed(arch_counter_base + CNTVCT_HI); + vct_lo = readl_relaxed(arch_counter_base + CNTVCT_LO); + tmp_hi = readl_relaxed(arch_counter_base + CNTVCT_HI); } while (vct_hi != tmp_hi); return ((u64) vct_hi << 32) | vct_lo; @@ -1285,7 +1285,7 @@ arch_timer_mem_find_best_frame(struct arch_timer_mem *timer_mem) return NULL; } - cnttidr = readl_relaxed_no_log(cntctlbase + CNTTIDR); + cnttidr = readl_relaxed(cntctlbase + CNTTIDR); /* * Try to find a virtual capable frame. Otherwise fall back to a diff --git a/drivers/hwtracing/coresight/coresight-ost.c b/drivers/hwtracing/coresight/coresight-ost.c index 4a277148196b..662196cb39b6 100644 --- a/drivers/hwtracing/coresight/coresight-ost.c +++ b/drivers/hwtracing/coresight/coresight-ost.c @@ -62,30 +62,30 @@ static int stm_ost_send(void __iomem *addr, const void *data, uint32_t size) uint32_t len = size; if (((unsigned long)data & 0x1) && (size >= 1)) { - writeb_relaxed_no_log(*(uint8_t *)data, addr); + writeb_relaxed(*(uint8_t *)data, addr); data++; size--; } if (((unsigned long)data & 0x2) && (size >= 2)) { - writew_relaxed_no_log(*(uint16_t *)data, addr); + writew_relaxed(*(uint16_t *)data, addr); data += 2; size -= 2; } /* now we are 32bit aligned */ while (size >= 4) { - writel_relaxed_no_log(*(uint32_t *)data, addr); + writel_relaxed(*(uint32_t *)data, addr); data += 4; size -= 4; } if (size >= 2) { - writew_relaxed_no_log(*(uint16_t *)data, addr); + writew_relaxed(*(uint16_t *)data, addr); data += 2; size -= 2; } if (size >= 1) { - writeb_relaxed_no_log(*(uint8_t *)data, addr); + writeb_relaxed(*(uint8_t *)data, addr); data++; size--; } diff --git a/drivers/media/platform/msm/ais/cam_utils/cam_io_util.c b/drivers/media/platform/msm/ais/cam_utils/cam_io_util.c index 8d5f96ac816f..13a7fdcfa718 100644 --- a/drivers/media/platform/msm/ais/cam_utils/cam_io_util.c +++ b/drivers/media/platform/msm/ais/cam_utils/cam_io_util.c @@ -23,7 +23,7 @@ int cam_io_w(uint32_t data, void __iomem *addr) return -EINVAL; CAM_DBG(CAM_UTIL, "0x%pK %08x", addr, data); - writel_relaxed_no_log(data, addr); + writel_relaxed(data, addr); return 0; } @@ -36,7 +36,7 @@ int cam_io_w_mb(uint32_t data, void __iomem *addr) CAM_DBG(CAM_UTIL, "0x%pK %08x", addr, data); /* Ensure previous writes are done */ wmb(); - writel_relaxed_no_log(data, addr); + writel_relaxed(data, addr); /* Ensure previous writes are done */ wmb(); diff --git a/drivers/media/platform/msm/camera/cam_utils/cam_io_util.c b/drivers/media/platform/msm/camera/cam_utils/cam_io_util.c index 34e309ff0055..d7af6a77aad9 100644 --- a/drivers/media/platform/msm/camera/cam_utils/cam_io_util.c +++ b/drivers/media/platform/msm/camera/cam_utils/cam_io_util.c @@ -23,7 +23,7 @@ int cam_io_w(uint32_t data, void __iomem *addr) return -EINVAL; CAM_DBG(CAM_UTIL, "0x%pK %08x", addr, data); - writel_relaxed_no_log(data, addr); + writel_relaxed(data, addr); return 0; } @@ -36,7 +36,7 @@ int cam_io_w_mb(uint32_t data, void __iomem *addr) CAM_DBG(CAM_UTIL, "0x%pK %08x", addr, data); /* Ensure previous writes are done */ wmb(); - writel_relaxed_no_log(data, addr); + writel_relaxed(data, addr); /* Ensure previous writes are done */ wmb(); diff --git a/drivers/media/platform/msm/sde/rotator/sde_rotator_r3.c b/drivers/media/platform/msm/sde/rotator/sde_rotator_r3.c index 88d0b2e76ff6..6af18bc61314 100644 --- a/drivers/media/platform/msm/sde/rotator/sde_rotator_r3.c +++ b/drivers/media/platform/msm/sde/rotator/sde_rotator_r3.c @@ -68,12 +68,12 @@ do { \ SDEROT_DBG("SDEREG.W:[%s:0x%X] <= 0x%X\n", #off, (off),\ (u32)(data));\ - writel_relaxed_no_log( \ + writel_relaxed( \ (REGDMA_OP_REGWRITE | \ ((off) & REGDMA_ADDR_OFFSET_MASK)), \ p); \ p += sizeof(u32); \ - writel_relaxed_no_log(data, p); \ + writel_relaxed(data, p); \ p += sizeof(u32); \ } while (0) @@ -81,14 +81,14 @@ do { \ SDEROT_DBG("SDEREG.M:[%s:0x%X] <= 0x%X\n", #off, (off),\ (u32)(data));\ - writel_relaxed_no_log( \ + writel_relaxed( \ (REGDMA_OP_REGMODIFY | \ ((off) & REGDMA_ADDR_OFFSET_MASK)), \ p); \ p += sizeof(u32); \ - writel_relaxed_no_log(mask, p); \ + writel_relaxed(mask, p); \ p += sizeof(u32); \ - writel_relaxed_no_log(data, p); \ + writel_relaxed(data, p); \ p += sizeof(u32); \ } while (0) @@ -96,25 +96,25 @@ do { \ SDEROT_DBG("SDEREG.B:[%s:0x%X:0x%X]\n", #off, (off),\ (u32)(len));\ - writel_relaxed_no_log( \ + writel_relaxed( \ (REGDMA_OP_BLKWRITE_INC | \ ((off) & REGDMA_ADDR_OFFSET_MASK)), \ p); \ p += sizeof(u32); \ - writel_relaxed_no_log(len, p); \ + writel_relaxed(len, p); \ p += sizeof(u32); \ } while (0) #define SDE_REGDMA_BLKWRITE_DATA(p, data) \ do { \ SDEROT_DBG("SDEREG.I:[:] <= 0x%X\n", (u32)(data));\ - writel_relaxed_no_log(data, p); \ + writel_relaxed(data, p); \ p += sizeof(u32); \ } while (0) #define SDE_REGDMA_READ(p, data) \ do { \ - data = readl_relaxed_no_log(p); \ + data = readl_relaxed(p); \ p += sizeof(u32); \ } while (0) @@ -2041,7 +2041,7 @@ static u32 sde_hw_rotator_start_no_regdma(struct sde_hw_rotator_context *ctx, /* Write all command stream to Rotator blocks */ /* Rotator will start right away after command stream finish writing */ while (mem_rdptr < wrptr) { - u32 op = REGDMA_OP_MASK & readl_relaxed_no_log(mem_rdptr); + u32 op = REGDMA_OP_MASK & readl_relaxed(mem_rdptr); switch (op) { case REGDMA_OP_NOP: diff --git a/drivers/platform/msm/qcom-geni-se.c b/drivers/platform/msm/qcom-geni-se.c index 279e45efefca..fc48851a5728 100644 --- a/drivers/platform/msm/qcom-geni-se.c +++ b/drivers/platform/msm/qcom-geni-se.c @@ -159,7 +159,7 @@ static int geni_se_iommu_map_and_attach(struct geni_se_device *geni_se_dev); */ unsigned int geni_read_reg_nolog(void __iomem *base, int offset) { - return readl_relaxed_no_log(base + offset); + return readl_relaxed(base + offset); } EXPORT_SYMBOL(geni_read_reg_nolog); @@ -171,7 +171,7 @@ EXPORT_SYMBOL(geni_read_reg_nolog); */ void geni_write_reg_nolog(unsigned int value, void __iomem *base, int offset) { - return writel_relaxed_no_log(value, (base + offset)); + return writel_relaxed(value, (base + offset)); } EXPORT_SYMBOL(geni_write_reg_nolog); diff --git a/drivers/soc/qcom/boot_stats.c b/drivers/soc/qcom/boot_stats.c index 86512038bd1a..7ca6961d9fba 100644 --- a/drivers/soc/qcom/boot_stats.c +++ b/drivers/soc/qcom/boot_stats.c @@ -127,11 +127,11 @@ unsigned long long int msm_timer_get_sclk_ticks(void) if (!sclk_tick) return -EINVAL; while (loop_zero_count--) { - t1 = __raw_readl_no_log(sclk_tick); + t1 = __raw_readl(sclk_tick); do { udelay(1); t2 = t1; - t1 = __raw_readl_no_log(sclk_tick); + t1 = __raw_readl(sclk_tick); } while ((t2 != t1) && --loop_count); if (!loop_count) { pr_err("boot_stats: SCLK did not stabilize\n"); diff --git a/drivers/soc/qcom/dcc_v2.c b/drivers/soc/qcom/dcc_v2.c index 4ce4eb0bddb3..b0df7ca34f3e 100644 --- a/drivers/soc/qcom/dcc_v2.c +++ b/drivers/soc/qcom/dcc_v2.c @@ -197,7 +197,7 @@ static void dcc_sram_memset(const struct device *dev, void __iomem *dst, } while (count >= 4) { - __raw_writel_no_log(qc, dst); + __raw_writel(qc, dst); dst += 4; count -= 4; } @@ -213,7 +213,7 @@ static int dcc_sram_memcpy(void *to, const void __iomem *from, } while (count >= 4) { - *(unsigned int *)to = __raw_readl_no_log(from); + *(unsigned int *)to = __raw_readl(from); to += 4; from += 4; count -= 4; @@ -1929,7 +1929,7 @@ static int dcc_v2_restore(struct device *dev) data = drvdata->sram_save_state; for (i = 0; i < drvdata->ram_size / 4; i++) - __raw_writel_no_log(data[i], + __raw_writel(data[i], drvdata->ram_base + (i * 4)); state = drvdata->reg_save_state; diff --git a/drivers/soc/qcom/jtagv8-etm.c b/drivers/soc/qcom/jtagv8-etm.c index e118e51e624e..a3080251aaf8 100644 --- a/drivers/soc/qcom/jtagv8-etm.c +++ b/drivers/soc/qcom/jtagv8-etm.c @@ -186,7 +186,7 @@ /* spread out etm register write */ #define etm_writel(etm, val, off) \ do { \ - writel_relaxed_no_log(val, etm->base + off); \ + writel_relaxed(val, etm->base + off); \ udelay(20); \ } while (0) @@ -194,13 +194,13 @@ do { \ __raw_writel(val, etm->base + off) #define etm_readl(etm, off) \ - readl_relaxed_no_log(etm->base + off) + readl_relaxed(etm->base + off) #define etm_writeq(etm, val, off) \ - writeq_relaxed_no_log(val, etm->base + off) + writeq_relaxed(val, etm->base + off) #define etm_readq(etm, off) \ - readq_relaxed_no_log(etm->base + off) + readq_relaxed(etm->base + off) #define ETM_LOCK(base) \ do { \ diff --git a/drivers/thermal/tsens2xxx.c b/drivers/thermal/tsens2xxx.c index 0f50a15ea931..15c7aee0b7ff 100644 --- a/drivers/thermal/tsens2xxx.c +++ b/drivers/thermal/tsens2xxx.c @@ -155,7 +155,7 @@ static int tsens2xxx_get_temp(struct tsens_sensor *sensor, int *temp) sensor_addr = TSENS_TM_SN_STATUS(tmdev->tsens_tm_addr); trdy = TSENS_TM_TRDY(tmdev->tsens_tm_addr); - code = readl_relaxed_no_log(trdy); + code = readl_relaxed(trdy); if (!((code & TSENS_TM_TRDY_FIRST_ROUND_COMPLETE) >> TSENS_TM_TRDY_FIRST_ROUND_COMPLETE_SHIFT)) { @@ -170,7 +170,7 @@ static int tsens2xxx_get_temp(struct tsens_sensor *sensor, int *temp) /* Wait for 2.5 ms for tsens controller to recover */ do { udelay(500); - code = readl_relaxed_no_log(trdy); + code = readl_relaxed(trdy); if (code & TSENS_TM_TRDY_FIRST_ROUND_COMPLETE) { TSENS_DUMP(tmdev, "%s", "tsens controller recovered\n"); @@ -296,7 +296,7 @@ sensor_read: tmdev->trdy_fail_ctr = 0; - code = readl_relaxed_no_log(sensor_addr + + code = readl_relaxed(sensor_addr + (sensor->hw_id << TSENS_STATUS_ADDR_OFFSET)); last_temp = code & TSENS_TM_SN_LAST_TEMP_MASK; @@ -305,7 +305,7 @@ sensor_read: goto dbg; } - code = readl_relaxed_no_log(sensor_addr + + code = readl_relaxed(sensor_addr + (sensor->hw_id << TSENS_STATUS_ADDR_OFFSET)); last_temp2 = code & TSENS_TM_SN_LAST_TEMP_MASK; if (code & TSENS_TM_SN_STATUS_VALID_BIT) { @@ -314,7 +314,7 @@ sensor_read: goto dbg; } - code = readl_relaxed_no_log(sensor_addr + + code = readl_relaxed(sensor_addr + (sensor->hw_id << TSENS_STATUS_ADDR_OFFSET)); last_temp3 = code & TSENS_TM_SN_LAST_TEMP_MASK; From 7f2e6f150f55b1beb3f28353b81f3a393441c561 Mon Sep 17 00:00:00 2001 From: Samuel Pascua Date: Fri, 26 Dec 2025 17:13:58 +0800 Subject: [PATCH 55/85] ARM64: configs: a71: remove QCOM_RTB Change-Id: I1d15b4b2302c356a311b4f48d6370b83d7addde3 Signed-off-by: Samuel Pascua --- arch/arm64/configs/a71_defconfig | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/configs/a71_defconfig b/arch/arm64/configs/a71_defconfig index b025acb3fc27..1e046855de76 100644 --- a/arch/arm64/configs/a71_defconfig +++ b/arch/arm64/configs/a71_defconfig @@ -6191,8 +6191,6 @@ CONFIG_RING_BUFFER=y CONFIG_EVENT_TRACING=y CONFIG_CONTEXT_SWITCH_TRACER=y CONFIG_IPC_LOGGING=y -CONFIG_QCOM_RTB=y -CONFIG_QCOM_RTB_SEPARATE_CPUS=y CONFIG_TRACING=y CONFIG_GENERIC_TRACER=y CONFIG_TRACING_SUPPORT=y From f34bd568d6c33ec5087f35fb7b75aa22ab29e552 Mon Sep 17 00:00:00 2001 From: "J. Avila" Date: Fri, 4 Dec 2020 22:36:19 +0000 Subject: [PATCH 56/85] hwtracing: Add a driver for disabling coresight clocks In certain configs which don't use coresight, the clocks are left on, leading to power regressions. Add a driver which can disable them. Bug: 170753932 Signed-off-by: J. Avila Signed-off-by: Yabin Cui Signed-off-by: Adithya R Signed-off-by: Fiqri Ardyansyah --- drivers/Kconfig | 2 + drivers/Makefile | 1 + drivers/hwtracing/google/Kconfig | 15 +++ drivers/hwtracing/google/Makefile | 3 + .../google/coresight-clk-amba-placeholder.c | 95 +++++++++++++++++++ .../google/coresight-clk-placeholder.c | 43 +++++++++ 6 files changed, 159 insertions(+) create mode 100644 drivers/hwtracing/google/Kconfig create mode 100644 drivers/hwtracing/google/Makefile create mode 100644 drivers/hwtracing/google/coresight-clk-amba-placeholder.c create mode 100644 drivers/hwtracing/google/coresight-clk-placeholder.c diff --git a/drivers/Kconfig b/drivers/Kconfig index 53c2f5dd9b75..3fba14ef5157 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -221,6 +221,8 @@ source "drivers/hwtracing/stm/Kconfig" source "drivers/hwtracing/intel_th/Kconfig" +source "drivers/hwtracing/google/Kconfig" + source "drivers/fpga/Kconfig" source "drivers/fsi/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index efc6d69b45d7..3756b212ae51 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -187,6 +187,7 @@ obj-$(CONFIG_RAS) += ras/ obj-$(CONFIG_THUNDERBOLT) += thunderbolt/ obj-$(CONFIG_CORESIGHT) += hwtracing/coresight/ obj-y += hwtracing/intel_th/ +obj-y += hwtracing/google/ obj-$(CONFIG_STM) += hwtracing/stm/ obj-$(CONFIG_ANDROID) += android/ obj-$(CONFIG_NVMEM) += nvmem/ diff --git a/drivers/hwtracing/google/Kconfig b/drivers/hwtracing/google/Kconfig new file mode 100644 index 000000000000..434e45009a9a --- /dev/null +++ b/drivers/hwtracing/google/Kconfig @@ -0,0 +1,15 @@ +config CORESIGHT_PLACEHOLDER + tristate "Coresight device placeholder driver" + default y + depends on !CORESIGHT + help + For targets which do not use coresight, this option enables a placeholder + which probes coresight devices to turn down clocks to save power. + +config CORESIGHT_AMBA_PLACEHOLDER + tristate "Coresight primecell device placeholder driver" + default y + depends on !CORESIGHT + help + For targets which do not use coresight, this option enables a placeholder + which probes coresight AMBA devices to turn down clocks to save power. diff --git a/drivers/hwtracing/google/Makefile b/drivers/hwtracing/google/Makefile new file mode 100644 index 000000000000..5234112456cd --- /dev/null +++ b/drivers/hwtracing/google/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_CORESIGHT_PLACEHOLDER) += coresight-clk-placeholder.o +obj-$(CONFIG_CORESIGHT_AMBA_PLACEHOLDER) += coresight-clk-amba-placeholder.o diff --git a/drivers/hwtracing/google/coresight-clk-amba-placeholder.c b/drivers/hwtracing/google/coresight-clk-amba-placeholder.c new file mode 100644 index 000000000000..f221212febc5 --- /dev/null +++ b/drivers/hwtracing/google/coresight-clk-amba-placeholder.c @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, Google LLC. All rights reserved. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static int coresight_clk_disable_amba_probe(struct amba_device *adev, + const struct amba_id *id) +{ + pm_runtime_put(&adev->dev); + return 0; +} + +#define ETM4x_AMBA_ID(pid) \ + { \ + .id = pid, .mask = 0x000fffff, \ + } + +#define TMC_ETR_AXI_ARCACHE (0x1U << 1) +#define TMC_ETR_SAVE_RESTORE (0x1U << 2) +#define CORESIGHT_SOC_600_ETR_CAPS (TMC_ETR_SAVE_RESTORE | TMC_ETR_AXI_ARCACHE) + +static const struct amba_id coresight_ids[] = { + /* ETM4 IDs */ + ETM4x_AMBA_ID(0x000bb95d), /* Cortex-A53 */ + ETM4x_AMBA_ID(0x000bb95e), /* Cortex-A57 */ + ETM4x_AMBA_ID(0x000bb95a), /* Cortex-A72 */ + ETM4x_AMBA_ID(0x000bb959), /* Cortex-A73 */ + ETM4x_AMBA_ID(0x000bb9da), /* Cortex-A35 */ + /* dynamic-replicator IDs */ + { + .id = 0x000bb909, + .mask = 0x000fffff, + }, + { + /* Coresight SoC-600 */ + .id = 0x000bb9ec, + .mask = 0x000fffff, + }, + /* dynamic-funnel IDs */ + { + .id = 0x000bb908, + .mask = 0x000fffff, + }, + { + /* Coresight SoC-600 */ + .id = 0x000bb9eb, + .mask = 0x000fffff, + }, + /* coresight-tmc IDs */ + { + .id = 0x000bb961, + .mask = 0x000fffff, + }, + { + /* Coresight SoC 600 TMC-ETR/ETS */ + .id = 0x000bb9e8, + .mask = 0x000fffff, + .data = (void *)(unsigned long)CORESIGHT_SOC_600_ETR_CAPS, + }, + { + /* Coresight SoC 600 TMC-ETB */ + .id = 0x000bb9e9, + .mask = 0x000fffff, + }, + { + /* Coresight SoC 600 TMC-ETF */ + .id = 0x000bb9ea, + .mask = 0x000fffff, + }, + { 0, 0 }, +}; + +static struct amba_driver coresight_clk_disable_amba_driver = { + .drv = { + .name = "coresight-clk-disable-amba", + .suppress_bind_attrs = true, + }, + .probe = coresight_clk_disable_amba_probe, + .id_table = coresight_ids, +}; + +module_amba_driver(coresight_clk_disable_amba_driver); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("CoreSight DEBUGv8 and ETMv4 clock disable AMBA driver stub"); +MODULE_AUTHOR("J. Avila "); diff --git a/drivers/hwtracing/google/coresight-clk-placeholder.c b/drivers/hwtracing/google/coresight-clk-placeholder.c new file mode 100644 index 000000000000..8ac1a9df0a1e --- /dev/null +++ b/drivers/hwtracing/google/coresight-clk-placeholder.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, Google LLC. All rights reserved. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +static int coresight_clk_disable_probe(struct platform_device *pdev) +{ + return 0; +} + +static int coresight_clk_disable_remove(struct platform_device *pdev) +{ + return 0; +} + +static const struct of_device_id coresight_clk_disable_match[] = { + { .compatible = "qcom,coresight-csr" }, + {} +}; + +static struct platform_driver coresight_clk_disable_driver = { + .probe = coresight_clk_disable_probe, + .remove = coresight_clk_disable_remove, + .driver = { + .name = "coresight-clk-disable", + .of_match_table = coresight_clk_disable_match, + }, +}; + +module_platform_driver(coresight_clk_disable_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("CoreSight DEBUGv8 and ETMv4 clock disable driver stub"); +MODULE_AUTHOR("J. Avila "); From 8ea1c50f0ce2d820bce3e871c82c26d61c144089 Mon Sep 17 00:00:00 2001 From: Fiqri Ardyansyah Date: Sat, 27 Jan 2024 14:13:37 +0700 Subject: [PATCH 57/85] hwtracing: coresight: Add coresight IDs from sdmmagpie cat arch/arm64/boot/dts/qcom/sdmmagpie-coresight.dtsi | grep primecell-periphid | cut -c29- | sed "s/>;//g;s/^/ETM4x_AMBA_ID(/g;s/$/),/g" | sort -u Signed-off-by: Fiqri Ardyansyah --- .../hwtracing/google/coresight-clk-amba-placeholder.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/hwtracing/google/coresight-clk-amba-placeholder.c b/drivers/hwtracing/google/coresight-clk-amba-placeholder.c index f221212febc5..0a73cdbfa96c 100644 --- a/drivers/hwtracing/google/coresight-clk-amba-placeholder.c +++ b/drivers/hwtracing/google/coresight-clk-amba-placeholder.c @@ -36,6 +36,16 @@ static const struct amba_id coresight_ids[] = { ETM4x_AMBA_ID(0x000bb95a), /* Cortex-A72 */ ETM4x_AMBA_ID(0x000bb959), /* Cortex-A73 */ ETM4x_AMBA_ID(0x000bb9da), /* Cortex-A35 */ + /* sdmmagpie coresight IDs */ + ETM4x_AMBA_ID(0x0003b908), + ETM4x_AMBA_ID(0x0003b909), + ETM4x_AMBA_ID(0x0003b961), + ETM4x_AMBA_ID(0x0003b962), + ETM4x_AMBA_ID(0x0003b966), + ETM4x_AMBA_ID(0x0003b968), + ETM4x_AMBA_ID(0x0003b969), + ETM4x_AMBA_ID(0x0003b999), + ETM4x_AMBA_ID(0x000bb95d), /* dynamic-replicator IDs */ { .id = 0x000bb909, From 5b33eef4f9b7735309a497dc7d95bca44b655f2f Mon Sep 17 00:00:00 2001 From: Samuel Pascua Date: Fri, 26 Dec 2025 17:16:03 +0800 Subject: [PATCH 58/85] ARM64: configs: a71: disable coresight Change-Id: Iff16307e360f1eff522d5bb998af79e72747a0b9 Signed-off-by: Samuel Pascua --- arch/arm64/configs/a71_defconfig | 32 ++++---------------------------- 1 file changed, 4 insertions(+), 28 deletions(-) diff --git a/arch/arm64/configs/a71_defconfig b/arch/arm64/configs/a71_defconfig index 1e046855de76..513253e051a8 100644 --- a/arch/arm64/configs/a71_defconfig +++ b/arch/arm64/configs/a71_defconfig @@ -5524,11 +5524,10 @@ CONFIG_DAX=y CONFIG_NVMEM=y # CONFIG_QCOM_QFPROM is not set CONFIG_NVMEM_SPMI_SDAM=y -CONFIG_STM=y -# CONFIG_STM_DUMMY is not set -# CONFIG_STM_SOURCE_CONSOLE is not set -# CONFIG_STM_SOURCE_HEARTBEAT is not set +# CONFIG_STM is not set # CONFIG_INTEL_TH is not set +CONFIG_CORESIGHT_PLACEHOLDER=y +CONFIG_CORESIGHT_AMBA_PLACEHOLDER=y # CONFIG_FPGA is not set # @@ -6272,30 +6271,7 @@ CONFIG_DEBUG_ALIGN_RODATA=y # CONFIG_SEC_PM=y CONFIG_SEC_PM_DEBUG=y -CONFIG_CORESIGHT=y -CONFIG_CORESIGHT_LINKS_AND_SINKS=y -CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y -# CONFIG_CORESIGHT_CATU is not set -# CONFIG_CORESIGHT_SINK_TPIU is not set -# CONFIG_CORESIGHT_SINK_ETBV10 is not set -# CONFIG_CORESIGHT_SOURCE_ETM4X is not set -CONFIG_CORESIGHT_DYNAMIC_REPLICATOR=y -# CONFIG_CORESIGHT_DBGUI is not set -CONFIG_CORESIGHT_STM=y -# CONFIG_CORESIGHT_CPU_DEBUG is not set -CONFIG_CORESIGHT_CTI=y -CONFIG_CORESIGHT_OST=y -CONFIG_CORESIGHT_TPDA=y -CONFIG_CORESIGHT_TPDM=y -# CONFIG_CORESIGHT_TPDM_DEFAULT_ENABLE is not set -# CONFIG_CORESIGHT_QPDI is not set -CONFIG_CORESIGHT_HWEVENT=y -CONFIG_CORESIGHT_DUMMY=y -CONFIG_CORESIGHT_REMOTE_ETM=y -CONFIG_CORESIGHT_REMOTE_ETM_DEFAULT_ENABLE=0 -CONFIG_CORESIGHT_CSR=y -# CONFIG_CORESIGHT_TGU is not set -CONFIG_CORESIGHT_EVENT=y +# CONFIG_CORESIGHT is not set # # Security options From 00a256b5326c8117c65369a78f93c69a35391a69 Mon Sep 17 00:00:00 2001 From: Kazuki H Date: Sat, 23 Sep 2023 18:39:13 +0900 Subject: [PATCH 59/85] sched/idle: Enter wfi state instead of polling during active migration WFI's wakeup latency is low enough, use that instead of polling and burning power. Change-Id: Iee1c1cdf515224267925037a859c6a74fc61abb7 Signed-off-by: Kazuki H Signed-off-by: Alexander Winkowski --- drivers/cpuidle/lpm-levels.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cpuidle/lpm-levels.c b/drivers/cpuidle/lpm-levels.c index 4fcb80ef9072..c8084bb6a58b 100644 --- a/drivers/cpuidle/lpm-levels.c +++ b/drivers/cpuidle/lpm-levels.c @@ -52,6 +52,7 @@ #elif defined(CONFIG_COMMON_CLK_MSM) #include "../../drivers/clk/msm/clock.h" #endif /* CONFIG_COMMON_CLK */ +#include "../../kernel/sched/sched.h" #define CREATE_TRACE_POINTS #include @@ -723,7 +724,8 @@ static int cpu_power_select(struct cpuidle_device *dev, struct power_params *pwr_params; uint64_t bias_time = 0; - if ((sleep_disabled && !cpu_isolated(dev->cpu)) || sleep_us < 0) + if ((sleep_disabled && !cpu_isolated(dev->cpu)) || + is_reserved(dev->cpu) || sleep_us < 0) return best_level; idx_restrict = cpu->nlevels + 1; From f94b7cd5fbf97e4678f7d083383791c5ee6ddfdd Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Sun, 21 Feb 2021 13:40:26 -0800 Subject: [PATCH 60/85] msm: kgsl: Remove L2PC PM QoS feature KGSL already has PM QoS covering what matters. The L2PC PM QoS code is not only unneeded, but also unused, so remove it. It's poorly designed anyway since it uses a timeout with PM QoS, which is drastically bad for power consumption. Change-Id: I3aba9f5c0cf09d8c5e13e5c5e87e20456ca1c5f4 Signed-off-by: Sultan Alsawaf --- .../devicetree/bindings/gpu/adreno.txt | 7 ---- drivers/gpu/msm/adreno.c | 9 ----- drivers/gpu/msm/adreno_dispatch.c | 4 -- drivers/gpu/msm/kgsl.c | 19 --------- drivers/gpu/msm/kgsl_pwrctrl.c | 40 ------------------- drivers/gpu/msm/kgsl_pwrctrl.h | 21 ---------- 6 files changed, 100 deletions(-) diff --git a/Documentation/devicetree/bindings/gpu/adreno.txt b/Documentation/devicetree/bindings/gpu/adreno.txt index 37d769c9ea05..20a4b4504cd7 100644 --- a/Documentation/devicetree/bindings/gpu/adreno.txt +++ b/Documentation/devicetree/bindings/gpu/adreno.txt @@ -194,13 +194,6 @@ Optional Properties: Specify the number of macrotiling channels for this chip. This is programmed into certain registers and also pass to the user as a property. -- qcom,l2pc-cpu-mask: - Disables L2PC on masked CPUs when any of Graphics - rendering thread is running on masked CPUs. - Bit 0 is for CPU-0, bit 1 is for CPU-1... - -- qcom,l2pc-update-queue: - Disables L2PC on masked CPUs at queue time when it's true. - qcom,snapshot-size: Specify the size of snapshot in bytes. This will override diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index bc0aa640dc22..ae5a8d96b41b 100644 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -1149,11 +1149,6 @@ static int adreno_of_get_power(struct adreno_device *adreno_dev, &device->pwrctrl.pm_qos_active_latency)) device->pwrctrl.pm_qos_active_latency = 501; - /* get pm-qos-cpu-mask-latency, set it to default if not found */ - if (of_property_read_u32(node, "qcom,l2pc-cpu-mask-latency", - &device->pwrctrl.pm_qos_cpu_mask_latency)) - device->pwrctrl.pm_qos_cpu_mask_latency = 501; - /* get pm-qos-wakeup-latency, set it to default if not found */ if (of_property_read_u32(node, "qcom,pm-qos-wakeup-latency", &device->pwrctrl.pm_qos_wakeup_latency)) @@ -1930,10 +1925,6 @@ static int _adreno_start(struct adreno_device *adreno_dev) /* make sure ADRENO_DEVICE_STARTED is not set here */ WARN_ON(test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)); - /* disallow l2pc during wake up to improve GPU wake up time */ - kgsl_pwrctrl_update_l2pc(&adreno_dev->dev, - KGSL_L2PC_WAKEUP_TIMEOUT); - pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma, pmqos_wakeup_vote); diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c index af62806f1868..481ab12ea7ff 100644 --- a/drivers/gpu/msm/adreno_dispatch.c +++ b/drivers/gpu/msm/adreno_dispatch.c @@ -1453,10 +1453,6 @@ int adreno_dispatcher_queue_cmds(struct kgsl_device_private *dev_priv, spin_unlock(&drawctxt->lock); - if (device->pwrctrl.l2pc_update_queue) - kgsl_pwrctrl_update_l2pc(&adreno_dev->dev, - KGSL_L2PC_QUEUE_TIMEOUT); - /* Add the context to the dispatcher pending list */ dispatcher_queue_context(adreno_dev, drawctxt); diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c index a410f48e1bf9..788863647185 100644 --- a/drivers/gpu/msm/kgsl.c +++ b/drivers/gpu/msm/kgsl.c @@ -5166,7 +5166,6 @@ int kgsl_device_platform_probe(struct kgsl_device *device) { int status = -EINVAL; struct resource *res; - int cpu; status = _register_device(device); if (status) @@ -5303,22 +5302,6 @@ int kgsl_device_platform_probe(struct kgsl_device *device) PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE); - if (device->pwrctrl.l2pc_cpus_mask) { - struct pm_qos_request *qos = &device->pwrctrl.l2pc_cpus_qos; - - qos->type = PM_QOS_REQ_AFFINE_CORES; - - cpumask_empty(&qos->cpus_affine); - for_each_possible_cpu(cpu) { - if ((1 << cpu) & device->pwrctrl.l2pc_cpus_mask) - cpumask_set_cpu(cpu, &qos->cpus_affine); - } - - pm_qos_add_request(&device->pwrctrl.l2pc_cpus_qos, - PM_QOS_CPU_DMA_LATENCY, - PM_QOS_DEFAULT_VALUE); - } - device->events_wq = alloc_workqueue("kgsl-events", WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); @@ -5355,8 +5338,6 @@ void kgsl_device_platform_remove(struct kgsl_device *device) kgsl_pwrctrl_uninit_sysfs(device); pm_qos_remove_request(&device->pwrctrl.pm_qos_req_dma); - if (device->pwrctrl.l2pc_cpus_mask) - pm_qos_remove_request(&device->pwrctrl.l2pc_cpus_qos); idr_destroy(&device->context_idr); diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c index 7eff18dfa1f7..0c6ba90d8781 100644 --- a/drivers/gpu/msm/kgsl_pwrctrl.c +++ b/drivers/gpu/msm/kgsl_pwrctrl.c @@ -592,35 +592,6 @@ void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, } EXPORT_SYMBOL(kgsl_pwrctrl_set_constraint); -/** - * kgsl_pwrctrl_update_l2pc() - Update existing qos request - * @device: Pointer to the kgsl_device struct - * @timeout_us: the effective duration of qos request in usecs. - * - * Updates an existing qos request to avoid L2PC on the - * CPUs (which are selected through dtsi) on which GPU - * thread is running. This would help for performance. - */ -void kgsl_pwrctrl_update_l2pc(struct kgsl_device *device, - unsigned long timeout_us) -{ - int cpu; - - if (device->pwrctrl.l2pc_cpus_mask == 0) - return; - - cpu = get_cpu(); - put_cpu(); - - if ((1 << cpu) & device->pwrctrl.l2pc_cpus_mask) { - pm_qos_update_request_timeout( - &device->pwrctrl.l2pc_cpus_qos, - device->pwrctrl.pm_qos_cpu_mask_latency, - timeout_us); - } -} -EXPORT_SYMBOL(kgsl_pwrctrl_update_l2pc); - static ssize_t kgsl_pwrctrl_thermal_pwrlevel_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -2351,13 +2322,6 @@ int kgsl_pwrctrl_init(struct kgsl_device *device) pwr->power_flags = 0; - kgsl_property_read_u32(device, "qcom,l2pc-cpu-mask", - &pwr->l2pc_cpus_mask); - - pwr->l2pc_update_queue = of_property_read_bool( - device->pdev->dev.of_node, - "qcom,l2pc-update-queue"); - pm_runtime_enable(&pdev->dev); ocmem_bus_node = of_find_node_by_name( @@ -3033,10 +2997,6 @@ _slumber(struct kgsl_device *device) kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER); pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma, PM_QOS_DEFAULT_VALUE); - if (device->pwrctrl.l2pc_cpus_mask) - pm_qos_update_request( - &device->pwrctrl.l2pc_cpus_qos, - PM_QOS_DEFAULT_VALUE); break; case KGSL_STATE_SUSPEND: complete_all(&device->hwaccess_gate); diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h index 526fa2accdf9..1531b1c07edf 100644 --- a/drivers/gpu/msm/kgsl_pwrctrl.h +++ b/drivers/gpu/msm/kgsl_pwrctrl.h @@ -57,19 +57,6 @@ #define KGSL_PWR_DEL_LIMIT 1 #define KGSL_PWR_SET_LIMIT 2 -/* - * The effective duration of qos request in usecs at queue time. - * After timeout, qos request is cancelled automatically. - * Kept 80ms default, inline with default GPU idle time. - */ -#define KGSL_L2PC_QUEUE_TIMEOUT (80 * 1000) - -/* - * The effective duration of qos request in usecs at wakeup time. - * After timeout, qos request is cancelled automatically. - */ -#define KGSL_L2PC_WAKEUP_TIMEOUT (10 * 1000) - enum kgsl_pwrctrl_timer_type { KGSL_PWR_IDLE_TIMER, }; @@ -150,9 +137,6 @@ struct kgsl_regulator { * @ahbpath_pcl - CPU to AHB path bus scale identifier * @irq_name - resource name for the IRQ * @clk_stats - structure of clock statistics - * @l2pc_cpus_mask - mask to avoid L2PC on masked CPUs - * @l2pc_update_queue - Boolean flag to avoid L2PC on masked CPUs at queue time - * @l2pc_cpus_qos - qos structure to avoid L2PC on CPUs * @pm_qos_req_dma - the power management quality of service structure * @pm_qos_active_latency - allowed CPU latency in microseconds when active * @pm_qos_cpu_mask_latency - allowed CPU mask latency in microseconds @@ -211,9 +195,6 @@ struct kgsl_pwrctrl { uint32_t ahbpath_pcl; const char *irq_name; struct kgsl_clk_stats clk_stats; - unsigned int l2pc_cpus_mask; - bool l2pc_update_queue; - struct pm_qos_request l2pc_cpus_qos; struct pm_qos_request pm_qos_req_dma; unsigned int pm_qos_active_latency; unsigned int pm_qos_cpu_mask_latency; @@ -286,7 +267,5 @@ int kgsl_active_count_wait(struct kgsl_device *device, int count); void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy); void kgsl_pwrctrl_set_constraint(struct kgsl_device *device, struct kgsl_pwr_constraint *pwrc, uint32_t id); -void kgsl_pwrctrl_update_l2pc(struct kgsl_device *device, - unsigned long timeout_us); void kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device); #endif /* __KGSL_PWRCTRL_H */ From 6a7d395bead7418400ca0d89ffbc6a5fc5979abe Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Sat, 26 Dec 2020 11:05:42 -0800 Subject: [PATCH 61/85] msm: kgsl: Wake GPU upon receiving an ioctl rather than upon touch input Waking the GPU upon touch wastes power when the screen is being touched in a way that does not induce animation or any actual need for GPU usage. Instead of preemptively waking the GPU on touch input, wake it up upon receiving a IOCTL_KGSL_GPU_COMMAND ioctl since it is a sign that the GPU will soon be needed. Change-Id: I6387083562578b229ea0913b5b2fa6562d4a85e9 Signed-off-by: Sultan Alsawaf --- drivers/gpu/msm/adreno.c | 173 ++---------------------------- drivers/gpu/msm/adreno.h | 5 +- drivers/gpu/msm/adreno_dispatch.c | 6 -- drivers/gpu/msm/adreno_sysfs.c | 2 - drivers/gpu/msm/kgsl_device.h | 1 - drivers/gpu/msm/kgsl_ioctl.c | 6 ++ drivers/gpu/msm/kgsl_pwrctrl.h | 2 - 7 files changed, 14 insertions(+), 181 deletions(-) diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c index ae5a8d96b41b..35355631eec7 100644 --- a/drivers/gpu/msm/adreno.c +++ b/drivers/gpu/msm/adreno.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -63,7 +62,7 @@ MODULE_PARM_DESC(swfdetect, "Enable soft fault detection"); #define KGSL_LOG_LEVEL_DEFAULT 3 -static void adreno_input_work(struct work_struct *work); +static void adreno_pwr_on_work(struct work_struct *work); static unsigned int counter_delta(struct kgsl_device *device, unsigned int reg, unsigned int *counter); @@ -104,8 +103,6 @@ static struct adreno_device device_3d0 = { .ft_policy = KGSL_FT_DEFAULT_POLICY, .ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY, .long_ib_detect = 1, - .input_work = __WORK_INITIALIZER(device_3d0.input_work, - adreno_input_work), .pwrctrl_flag = BIT(ADRENO_HWCG_CTRL) | BIT(ADRENO_THROTTLING_CTRL), .profile.enabled = false, .active_list = LIST_HEAD_INIT(device_3d0.active_list), @@ -117,6 +114,8 @@ static struct adreno_device device_3d0 = { .skipsaverestore = 1, .usesgmem = 1, }, + .pwr_on_work = __WORK_INITIALIZER(device_3d0.pwr_on_work, + adreno_pwr_on_work), }; /* Ptr to array for the current set of fault detect registers */ @@ -138,9 +137,6 @@ static unsigned int adreno_ft_regs_default[] = { /* Nice level for the higher priority GPU start thread */ int adreno_wake_nice = -7; -/* Number of milliseconds to stay active active after a wake on touch */ -unsigned int adreno_wake_timeout = 100; - /** * adreno_readreg64() - Read a 64bit register by getting its offset from the * offset array defined in gpudev node @@ -370,152 +366,17 @@ void adreno_fault_detect_stop(struct adreno_device *adreno_dev) adreno_dev->fast_hang_detect = 0; } -/* - * A workqueue callback responsible for actually turning on the GPU after a - * touch event. kgsl_pwrctrl_change_state(ACTIVE) is used without any - * active_count protection to avoid the need to maintain state. Either - * somebody will start using the GPU or the idle timer will fire and put the - * GPU back into slumber. - */ -static void adreno_input_work(struct work_struct *work) +static void adreno_pwr_on_work(struct work_struct *work) { - struct adreno_device *adreno_dev = container_of(work, - struct adreno_device, input_work); + struct adreno_device *adreno_dev = + container_of(work, typeof(*adreno_dev), pwr_on_work); struct kgsl_device *device = KGSL_DEVICE(adreno_dev); mutex_lock(&device->mutex); - - device->flags |= KGSL_FLAG_WAKE_ON_TOUCH; - - /* - * Don't schedule adreno_start in a high priority workqueue, we are - * already in a workqueue which should be sufficient - */ kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE); - - /* - * When waking up from a touch event we want to stay active long enough - * for the user to send a draw command. The default idle timer timeout - * is shorter than we want so go ahead and push the idle timer out - * further for this special case - */ - mod_timer(&device->idle_timer, - jiffies + msecs_to_jiffies(adreno_wake_timeout)); mutex_unlock(&device->mutex); } -/* - * Process input events and schedule work if needed. At this point we are only - * interested in groking EV_ABS touchscreen events - */ -static void adreno_input_event(struct input_handle *handle, unsigned int type, - unsigned int code, int value) -{ - struct kgsl_device *device = handle->handler->private; - struct adreno_device *adreno_dev = ADRENO_DEVICE(device); - - /* Only consider EV_ABS (touch) events */ - if (type != EV_ABS) - return; - - /* - * Don't do anything if anything hasn't been rendered since we've been - * here before - */ - - if (device->flags & KGSL_FLAG_WAKE_ON_TOUCH) - return; - - /* - * If the device is in nap, kick the idle timer to make sure that we - * don't go into slumber before the first render. If the device is - * already in slumber schedule the wake. - */ - - if (device->state == KGSL_STATE_NAP) { - /* - * Set the wake on touch bit to keep from coming back here and - * keeping the device in nap without rendering - */ - - device->flags |= KGSL_FLAG_WAKE_ON_TOUCH; - - mod_timer(&device->idle_timer, - jiffies + device->pwrctrl.interval_timeout); - } else if (device->state == KGSL_STATE_SLUMBER) { - schedule_work(&adreno_dev->input_work); - } -} - -#ifdef CONFIG_INPUT -static int adreno_input_connect(struct input_handler *handler, - struct input_dev *dev, const struct input_device_id *id) -{ - struct input_handle *handle; - int ret; - - handle = kzalloc(sizeof(*handle), GFP_KERNEL); - if (handle == NULL) - return -ENOMEM; - - handle->dev = dev; - handle->handler = handler; - handle->name = handler->name; - - ret = input_register_handle(handle); - if (ret) { - kfree(handle); - return ret; - } - - ret = input_open_device(handle); - if (ret) { - input_unregister_handle(handle); - kfree(handle); - } - - return ret; -} - -static void adreno_input_disconnect(struct input_handle *handle) -{ - input_close_device(handle); - input_unregister_handle(handle); - kfree(handle); -} -#else -static int adreno_input_connect(struct input_handler *handler, - struct input_dev *dev, const struct input_device_id *id) -{ - return 0; -} -static void adreno_input_disconnect(struct input_handle *handle) {} -#endif - -/* - * We are only interested in EV_ABS events so only register handlers for those - * input devices that have EV_ABS events - */ -static const struct input_device_id adreno_input_ids[] = { - { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT, - .evbit = { BIT_MASK(EV_ABS) }, - /* assumption: MT_.._X & MT_.._Y are in the same long */ - .absbit = { [BIT_WORD(ABS_MT_POSITION_X)] = - BIT_MASK(ABS_MT_POSITION_X) | - BIT_MASK(ABS_MT_POSITION_Y) }, - }, - { }, -}; - -static struct input_handler adreno_input_handler = { - .event = adreno_input_event, - .connect = adreno_input_connect, - .disconnect = adreno_input_disconnect, - .name = "kgsl", - .id_table = adreno_input_ids, -}; - /* * _soft_reset() - Soft reset GPU * @adreno_dev: Pointer to adreno device @@ -1162,9 +1023,6 @@ static int adreno_of_get_power(struct adreno_device *adreno_dev, device->pwrctrl.bus_control = of_property_read_bool(node, "qcom,bus-control"); - device->pwrctrl.input_disable = of_property_read_bool(node, - "qcom,disable-wake-on-touch"); - return 0; } @@ -1466,21 +1324,6 @@ static int adreno_probe(struct platform_device *pdev) "Failed to get gpuhtw LLC slice descriptor %ld\n", PTR_ERR(adreno_dev->gpuhtw_llc_slice)); -#ifdef CONFIG_INPUT - if (!device->pwrctrl.input_disable) { - adreno_input_handler.private = device; - /* - * It isn't fatal if we cannot register the input handler. Sad, - * perhaps, but not fatal - */ - if (input_register_handler(&adreno_input_handler)) { - adreno_input_handler.private = NULL; - KGSL_DRV_ERR(device, - "Unable to register the input handler\n"); - } - } -#endif - place_marker("M - DRIVER GPU Ready"); out: if (status) { @@ -1533,10 +1376,6 @@ static int adreno_remove(struct platform_device *pdev) /* The memory is fading */ _adreno_free_memories(adreno_dev); -#ifdef CONFIG_INPUT - if (adreno_input_handler.private) - input_unregister_handler(&adreno_input_handler); -#endif adreno_sysfs_close(adreno_dev); adreno_coresight_remove(adreno_dev); diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h index a3016d77c091..81ce051bed80 100644 --- a/drivers/gpu/msm/adreno.h +++ b/drivers/gpu/msm/adreno.h @@ -485,7 +485,7 @@ enum gpu_coresight_sources { * @dispatcher: Container for adreno GPU dispatcher * @pwron_fixup: Command buffer to run a post-power collapse shader workaround * @pwron_fixup_dwords: Number of dwords in the command buffer - * @input_work: Work struct for turning on the GPU after a touch event + * @pwr_on_work: Work struct for turning on the GPU * @busy_data: Struct holding GPU VBIF busy stats * @ram_cycles_lo: Number of DDR clock cycles for the monitor session (Only * DDR channel 0 read cycles in case of GBIF) @@ -565,7 +565,7 @@ struct adreno_device { struct adreno_dispatcher dispatcher; struct kgsl_memdesc pwron_fixup; unsigned int pwron_fixup_dwords; - struct work_struct input_work; + struct work_struct pwr_on_work; struct adreno_busy_data busy_data; unsigned int ram_cycles_lo; unsigned int ram_cycles_lo_ch1_read; @@ -1141,7 +1141,6 @@ extern struct adreno_gpudev adreno_a5xx_gpudev; extern struct adreno_gpudev adreno_a6xx_gpudev; extern int adreno_wake_nice; -extern unsigned int adreno_wake_timeout; int adreno_start(struct kgsl_device *device, int priority); int adreno_soft_reset(struct kgsl_device *device); diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c index 481ab12ea7ff..4d2c9780f915 100644 --- a/drivers/gpu/msm/adreno_dispatch.c +++ b/drivers/gpu/msm/adreno_dispatch.c @@ -1153,12 +1153,6 @@ static inline int _verify_cmdobj(struct kgsl_device_private *dev_priv, &ADRENO_CONTEXT(context)->base, ib) == false) return -EINVAL; - /* - * Clear the wake on touch bit to indicate an IB has - * been submitted since the last time we set it. - * But only clear it when we have rendering commands. - */ - device->flags &= ~KGSL_FLAG_WAKE_ON_TOUCH; } /* A3XX does not have support for drawobj profiling */ diff --git a/drivers/gpu/msm/adreno_sysfs.c b/drivers/gpu/msm/adreno_sysfs.c index 13a5c0a67da8..fad09452f65d 100644 --- a/drivers/gpu/msm/adreno_sysfs.c +++ b/drivers/gpu/msm/adreno_sysfs.c @@ -649,7 +649,6 @@ static ADRENO_SYSFS_BOOL(gpu_llc_slice_enable); static ADRENO_SYSFS_BOOL(gpuhtw_llc_slice_enable); static DEVICE_INT_ATTR(wake_nice, 0644, adreno_wake_nice); -static DEVICE_INT_ATTR(wake_timeout, 0644, adreno_wake_timeout); static ADRENO_SYSFS_BOOL(sptp_pc); static ADRENO_SYSFS_BOOL(lm); @@ -674,7 +673,6 @@ static const struct device_attribute *_attr_list[] = { &adreno_attr_ft_long_ib_detect.attr, &adreno_attr_ft_hang_intr_status.attr, &dev_attr_wake_nice.attr, - &dev_attr_wake_timeout.attr, &adreno_attr_sptp_pc.attr, &adreno_attr_lm.attr, &adreno_attr_preemption.attr, diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h index b3e3ea603334..3b610ff5de6a 100644 --- a/drivers/gpu/msm/kgsl_device.h +++ b/drivers/gpu/msm/kgsl_device.h @@ -68,7 +68,6 @@ enum kgsl_event_results { KGSL_EVENT_CANCELLED = 2, }; -#define KGSL_FLAG_WAKE_ON_TOUCH BIT(0) #define KGSL_FLAG_SPARSE BIT(1) /* diff --git a/drivers/gpu/msm/kgsl_ioctl.c b/drivers/gpu/msm/kgsl_ioctl.c index 9b02e1993a09..fce411b3c781 100644 --- a/drivers/gpu/msm/kgsl_ioctl.c +++ b/drivers/gpu/msm/kgsl_ioctl.c @@ -17,6 +17,7 @@ #include #include "kgsl_device.h" #include "kgsl_sync.h" +#include "adreno.h" static const struct kgsl_ioctl kgsl_ioctl_funcs[] = { KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY, @@ -168,8 +169,13 @@ long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kgsl_device_private *dev_priv = filep->private_data; struct kgsl_device *device = dev_priv->device; + struct adreno_device *adreno_dev = ADRENO_DEVICE(device); long ret; + if (cmd == IOCTL_KGSL_GPU_COMMAND && + READ_ONCE(device->state) != KGSL_STATE_ACTIVE) + kgsl_schedule_work(&adreno_dev->pwr_on_work); + ret = kgsl_ioctl_helper(filep, cmd, arg, kgsl_ioctl_funcs, ARRAY_SIZE(kgsl_ioctl_funcs)); diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h index 1531b1c07edf..efce1ab57f01 100644 --- a/drivers/gpu/msm/kgsl_pwrctrl.h +++ b/drivers/gpu/msm/kgsl_pwrctrl.h @@ -140,7 +140,6 @@ struct kgsl_regulator { * @pm_qos_req_dma - the power management quality of service structure * @pm_qos_active_latency - allowed CPU latency in microseconds when active * @pm_qos_cpu_mask_latency - allowed CPU mask latency in microseconds - * @input_disable - To disable GPU wakeup on touch input event * @pm_qos_wakeup_latency - allowed CPU latency in microseconds during wakeup * @bus_control - true if the bus calculation is independent * @bus_mod - modifier from the current power level for the bus vote @@ -199,7 +198,6 @@ struct kgsl_pwrctrl { unsigned int pm_qos_active_latency; unsigned int pm_qos_cpu_mask_latency; unsigned int pm_qos_wakeup_latency; - bool input_disable; bool bus_control; int bus_mod; unsigned int bus_percent_ab; From 22755a05d371abaedd24a82525bfe3509139d5bb Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Mon, 21 Sep 2020 16:39:49 +0200 Subject: [PATCH 62/85] UPSTREAM: sched/rt: Disable RT_RUNTIME_SHARE by default The RT_RUNTIME_SHARE sched feature enables the sharing of rt_runtime between CPUs, allowing a CPU to run a real-time task up to 100% of the time while leaving more space for non-real-time tasks to run on the CPU that lend rt_runtime. The problem is that a CPU can easily borrow enough rt_runtime to allow a spinning rt-task to run forever, starving per-cpu tasks like kworkers, which are non-real-time by design. This patch disables RT_RUNTIME_SHARE by default, avoiding this problem. The feature will still be present for users that want to enable it, though. Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Peter Zijlstra (Intel) Tested-by: Wei Wang Link: https://lkml.kernel.org/r/b776ab46817e3db5d8ef79175fa0d71073c051c7.1600697903.git.bristot@redhat.com (cherry picked from commit 2586af1ac187f6b3a50930a4e33497074e81762d) Change-Id: Ibb1b185d512130783ac9f0a29f0e20e9828c86fd Bug: 169673278 Test: build, boot and check the trace with RT task Signed-off-by: Kyle Lin Change-Id: Iffede8107863b02ad4a0cb902fc8119416931bdb --- kernel/sched/features.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 96636034bb2a..cc27c95ddf5a 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -84,7 +84,7 @@ SCHED_FEAT(WARN_DOUBLE_CLOCK, false) SCHED_FEAT(RT_PUSH_IPI, true) #endif -SCHED_FEAT(RT_RUNTIME_SHARE, true) +SCHED_FEAT(RT_RUNTIME_SHARE, false) SCHED_FEAT(LB_MIN, false) SCHED_FEAT(ATTACH_AGE_LOAD, true) From 6cddf1769c4c8cbf3ce8e38b1d6f3491668dc77a Mon Sep 17 00:00:00 2001 From: Connor O'Brien Date: Mon, 11 Feb 2019 18:07:54 -0800 Subject: [PATCH 63/85] cpufreq: schedutil: fix check for stale utilization values Part of the fix from commit d86ab9cff8b9 ("cpufreq: schedutil: use now as reference when aggregating shared policy requests") is reversed in commit 05d2ca242067 ("cpufreq: schedutil: Ignore CPU load older than WALT window size") due to a porting mistake. Restore it while keeping the relevant change from the latter patch. Bug: 117438867 Test: build & boot Change-Id: I21399be760d7c8e2fff6c158368a285dc6261647 Signed-off-by: Connor O'Brien Signed-off-by: Alexander Winkowski --- kernel/sched/cpufreq_schedutil.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 226a4ad15a83..75e603f573ee 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -479,7 +479,6 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - u64 last_freq_update_time = sg_policy->last_freq_update_time; unsigned long util = 0, max = 1; unsigned int j; @@ -495,7 +494,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) * enough, don't take the CPU into account as it probably is * idle now (and clear iowait_boost for it). */ - delta_ns = last_freq_update_time - j_sg_cpu->last_update; + delta_ns = time - j_sg_cpu->last_update; if (delta_ns > stale_ns) { j_sg_cpu->iowait_boost = 0; j_sg_cpu->iowait_boost_pending = false; From 51d1d0cf50e5ca204df878a87defdd3a60dd170a Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Fri, 24 Nov 2023 23:10:24 -0800 Subject: [PATCH 64/85] cpufreq: schedutil: Use the frequency below the target if they're close Schedutil targets a frequency tipping point of 80% to vote for a higher frequency when utilization crosses that threshold. Since the tipping point calculation is done without regard to the size of the gap between each frequency step, this often results in a large frequency jump when it isn't strictly necessary, which hurts energy efficiency. For example, if a CPU has 2000 MHz and 3000 MHz frequency steps, and schedutil targets a frequency of 2005 MHz, then the 3000 MHz frequency step will be used even though the target frequency of 2005 MHz is very close to 2000 MHz. In this hypothetical scenario, using 2000 MHz would clearly satisfy the system's performance needs while consuming less energy than the 3000 MHz step. To counter-balance the frequency tipping point, add a frequency tipping point in the opposite direction to prefer the frequency step below the calculated target frequency when the target frequency is less than 20% higher than that lower step. A threshold of 20% was empirically determined to provide significant energy savings without really impacting performance. This improves schedutil's energy efficiency on CPUs which have large gaps between their frequency steps, as is often the case on ARM. Change-Id: Ie75b79e5eb9f52c966848a9fb1c8016d7ae22098 Signed-off-by: Sultan Alsawaf --- kernel/sched/cpufreq_schedutil.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 75e603f573ee..a2b35f351396 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -287,6 +287,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, struct cpufreq_policy *policy = sg_policy->policy; unsigned int freq = arch_scale_freq_invariant() ? policy->cpuinfo.max_freq : policy->cur; + unsigned int idx, l_freq, h_freq; freq = (freq + (freq >> 2)) * util / max; trace_sugov_next_freq(policy->cpu, util, max, freq); @@ -294,7 +295,21 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy, if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX) return sg_policy->next_freq; sg_policy->cached_raw_freq = freq; - return cpufreq_driver_resolve_freq(policy, freq); + l_freq = cpufreq_driver_resolve_freq(policy, freq); + idx = cpufreq_frequency_table_target(policy, freq, CPUFREQ_RELATION_H); + h_freq = policy->freq_table[idx].frequency; + h_freq = clamp(h_freq, policy->min, policy->max); + if (l_freq <= h_freq || l_freq == policy->min) + return l_freq; + + /* + * Use the frequency step below if the calculated frequency is <20% + * higher than it. + */ + if (mult_frac(100, freq - h_freq, l_freq - h_freq) < 20) + return h_freq; + + return l_freq; } static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu) From c1e31c8a1e9a7f271d0b6b2a1bea421cc456315c Mon Sep 17 00:00:00 2001 From: Alexander Winkowski Date: Fri, 13 Aug 2021 15:46:42 +0200 Subject: [PATCH 65/85] Revert "sched: Improve the scheduler" This reverts commit 92daaf50af35dfc47f96090ba4fab8468ed054fe. Change-Id: I52d562da3c755f114d459ad09813188697ca81d8 --- kernel/sched/core.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4a80d3a3ea69..2f88e73fbe96 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4940,10 +4940,8 @@ unsigned int sched_lib_mask_force; bool is_sched_lib_based_app(pid_t pid) { const char *name = NULL; - char *libname, *lib_list; struct vm_area_struct *vma; char path_buf[LIB_PATH_LENGTH]; - char *tmp_lib_name; bool found = false; struct task_struct *p; struct mm_struct *mm; @@ -4951,16 +4949,11 @@ bool is_sched_lib_based_app(pid_t pid) if (strnlen(sched_lib_name, LIB_PATH_LENGTH) == 0) return false; - tmp_lib_name = kmalloc(LIB_PATH_LENGTH, GFP_KERNEL); - if (!tmp_lib_name) - return false; - rcu_read_lock(); p = find_process_by_pid(pid); if (!p) { rcu_read_unlock(); - kfree(tmp_lib_name); return false; } @@ -4980,15 +4973,10 @@ bool is_sched_lib_based_app(pid_t pid) if (IS_ERR(name)) goto release_sem; - strlcpy(tmp_lib_name, sched_lib_name, LIB_PATH_LENGTH); - lib_list = tmp_lib_name; - while ((libname = strsep(&lib_list, ","))) { - libname = skip_spaces(libname); - if (strnstr(name, libname, + if (strnstr(name, sched_lib_name, strnlen(name, LIB_PATH_LENGTH))) { - found = true; - goto release_sem; - } + found = true; + break; } } } @@ -4998,7 +4986,6 @@ release_sem: mmput(mm); put_task_struct: put_task_struct(p); - kfree(tmp_lib_name); return found; } From 77f1ecf303abe773aedcbe8c98df16eed9763e11 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 24 May 2019 19:18:36 -0700 Subject: [PATCH 66/85] Revert "sched: Improve the scheduler" This reverts commit a3dd94a1bb80ec98924070f28ba80d93a4d559a6. Bug:133481659 Test: build Change-Id: Ib23609315f3446223521612621fe54469537c172 Signed-off-by: Wei Wang --- drivers/cpufreq/cpufreq.c | 32 +------------------------------- drivers/cpufreq/freq_table.c | 3 --- include/linux/cpufreq.h | 2 -- include/linux/sched/sysctl.h | 2 +- kernel/compat.c | 2 +- kernel/sched/core.c | 18 ++++++++++++++++-- kernel/sysctl.c | 9 +++++++++ 7 files changed, 28 insertions(+), 40 deletions(-) diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 5aa6f91831d5..acf39ae2eb8d 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -31,7 +31,6 @@ #include #include #include -#include #include @@ -660,40 +659,11 @@ static ssize_t show_##file_name \ } show_one(cpuinfo_min_freq, cpuinfo.min_freq); +show_one(cpuinfo_max_freq, cpuinfo.max_freq); show_one(cpuinfo_transition_latency, cpuinfo.transition_latency); show_one(scaling_min_freq, min); show_one(scaling_max_freq, max); -unsigned int cpuinfo_max_freq_cached; - -static bool should_use_cached_freq(int cpu) -{ - /* This is a safe check. may not be needed */ - if (!cpuinfo_max_freq_cached) - return false; - - /* - * perfd already configure sched_lib_mask_force to - * 0xf0 from user space. so re-using it. - */ - if (!(BIT(cpu) & sched_lib_mask_force)) - return false; - - return is_sched_lib_based_app(current->pid); -} - -static ssize_t show_cpuinfo_max_freq(struct cpufreq_policy *policy, char *buf) -{ - unsigned int freq = policy->cpuinfo.max_freq; - - if (should_use_cached_freq(policy->cpu)) - freq = cpuinfo_max_freq_cached << 1; - else - freq = policy->cpuinfo.max_freq; - - return scnprintf(buf, PAGE_SIZE, "%u\n", freq); -} - __weak unsigned int arch_freq_get_on_cpu(int cpu) { return 0; diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index df38cc64a87a..deafbdfa31d1 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -62,9 +62,6 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, policy->min = policy->cpuinfo.min_freq = min_freq; policy->max = policy->cpuinfo.max_freq = max_freq; - if (max_freq > cpuinfo_max_freq_cached) - cpuinfo_max_freq_cached = max_freq; - if (policy->min == ~0) return -EINVAL; else diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 834403072a70..e0f6d42b3622 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -957,6 +957,4 @@ unsigned int cpufreq_generic_get(unsigned int cpu); int cpufreq_generic_init(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table, unsigned int transition_latency); - -extern unsigned int cpuinfo_max_freq_cached; #endif /* _LINUX_CPUFREQ_H */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index c4da504fbfc0..72e441cc7575 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -136,7 +136,7 @@ extern int sched_little_cluster_coloc_fmin_khz_handler(struct ctl_table *table, #define LIB_PATH_LENGTH 512 extern char sched_lib_name[LIB_PATH_LENGTH]; +extern unsigned int sched_lib_mask_check; extern unsigned int sched_lib_mask_force; -extern bool is_sched_lib_based_app(pid_t pid); #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/kernel/compat.c b/kernel/compat.c index 63d10b91f80f..78ab1233d86b 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -334,7 +334,7 @@ COMPAT_SYSCALL_DEFINE3(sched_setaffinity, compat_pid_t, pid, if (retval) goto out; - retval = sched_setaffinity(pid, new_mask); + retval = msm_sched_setaffinity(pid, new_mask); out: free_cpumask_var(new_mask); return retval; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2f88e73fbe96..3bb24262bd06 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4936,8 +4936,9 @@ out_put_task: } char sched_lib_name[LIB_PATH_LENGTH]; +unsigned int sched_lib_mask_check; unsigned int sched_lib_mask_force; -bool is_sched_lib_based_app(pid_t pid) +static inline bool is_sched_lib_based_app(pid_t pid) { const char *name = NULL; struct vm_area_struct *vma; @@ -4989,6 +4990,19 @@ put_task_struct: return found; } +long msm_sched_setaffinity(pid_t pid, struct cpumask *new_mask) +{ + if (sched_lib_mask_check != 0 && sched_lib_mask_force != 0 && + (cpumask_bits(new_mask)[0] == sched_lib_mask_check) && + is_sched_lib_based_app(pid)) { + + cpumask_t forced_mask = { {sched_lib_mask_force} }; + + cpumask_copy(new_mask, &forced_mask); + } + return sched_setaffinity(pid, new_mask); +} + static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, struct cpumask *new_mask) { @@ -5019,7 +5033,7 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, retval = get_user_cpu_mask(user_mask_ptr, len, new_mask); if (retval == 0) - retval = sched_setaffinity(pid, new_mask); + retval = msm_sched_setaffinity(pid, new_mask); free_cpumask_var(new_mask); return retval; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 2afaec84bbc0..0a2bd19a86b9 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -658,6 +658,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dostring, }, + { + .procname = "sched_lib_mask_check", + .data = &sched_lib_mask_check, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = &zero, + .extra2 = &two_hundred_fifty_five, + }, { .procname = "sched_lib_mask_force", .data = &sched_lib_mask_force, From 62d72466e809be6148042eed02984acae02d3128 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 24 May 2019 19:21:50 -0700 Subject: [PATCH 67/85] Revert "sched/core: Fix use after free issue in is_sched_lib_based_app()" This reverts commit 0e6ca1640cec57004d702e5e7c3e59ba77541e2f. Bug:133481659 Test: build Change-Id: Ie6a0b5e46386c98882614be19dedc61ffd3870e5 Signed-off-by: Wei Wang --- kernel/sched/core.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3bb24262bd06..569d2d03b8cc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4945,7 +4945,6 @@ static inline bool is_sched_lib_based_app(pid_t pid) char path_buf[LIB_PATH_LENGTH]; bool found = false; struct task_struct *p; - struct mm_struct *mm; if (strnlen(sched_lib_name, LIB_PATH_LENGTH) == 0) return false; @@ -4962,12 +4961,11 @@ static inline bool is_sched_lib_based_app(pid_t pid) get_task_struct(p); rcu_read_unlock(); - mm = get_task_mm(p); - if (!mm) + if (!p->mm) goto put_task_struct; - down_read(&mm->mmap_sem); - for (vma = mm->mmap; vma ; vma = vma->vm_next) { + down_read(&p->mm->mmap_sem); + for (vma = p->mm->mmap; vma ; vma = vma->vm_next) { if (vma->vm_file && vma->vm_flags & VM_EXEC) { name = d_path(&vma->vm_file->f_path, path_buf, LIB_PATH_LENGTH); @@ -4983,8 +4981,7 @@ static inline bool is_sched_lib_based_app(pid_t pid) } release_sem: - up_read(&mm->mmap_sem); - mmput(mm); + up_read(&p->mm->mmap_sem); put_task_struct: put_task_struct(p); return found; From 8cc2ad424a65e6da5cefb2200917f1860cc42ff3 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Fri, 24 May 2019 19:24:34 -0700 Subject: [PATCH 68/85] Revert "sched/core: fix userspace affining threads incorrectly" This reverts commit d43b69c4ad2a977406c84d47fe8a5261e0099e78. Bug:133481659 Test: build Change-Id: I615023c611c4de1eb334e4374af7306991f4216b Signed-off-by: Wei Wang --- include/linux/sched.h | 1 - include/linux/sched/sysctl.h | 5 --- kernel/compat.c | 2 +- kernel/sched/core.c | 67 +----------------------------------- kernel/sysctl.c | 26 -------------- 5 files changed, 2 insertions(+), 99 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index e1fbdb9105ec..7153926900cf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1968,7 +1968,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) # define vcpu_is_preempted(cpu) false #endif -extern long msm_sched_setaffinity(pid_t pid, struct cpumask *new_mask); extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 72e441cc7575..71615c95127c 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -134,9 +134,4 @@ extern int sched_little_cluster_coloc_fmin_khz_handler(struct ctl_table *table, size_t *lenp, loff_t *ppos); #endif -#define LIB_PATH_LENGTH 512 -extern char sched_lib_name[LIB_PATH_LENGTH]; -extern unsigned int sched_lib_mask_check; -extern unsigned int sched_lib_mask_force; - #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/kernel/compat.c b/kernel/compat.c index 78ab1233d86b..63d10b91f80f 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -334,7 +334,7 @@ COMPAT_SYSCALL_DEFINE3(sched_setaffinity, compat_pid_t, pid, if (retval) goto out; - retval = msm_sched_setaffinity(pid, new_mask); + retval = sched_setaffinity(pid, new_mask); out: free_cpumask_var(new_mask); return retval; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 569d2d03b8cc..a07d3bb1ab78 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4935,71 +4935,6 @@ out_put_task: return retval; } -char sched_lib_name[LIB_PATH_LENGTH]; -unsigned int sched_lib_mask_check; -unsigned int sched_lib_mask_force; -static inline bool is_sched_lib_based_app(pid_t pid) -{ - const char *name = NULL; - struct vm_area_struct *vma; - char path_buf[LIB_PATH_LENGTH]; - bool found = false; - struct task_struct *p; - - if (strnlen(sched_lib_name, LIB_PATH_LENGTH) == 0) - return false; - - rcu_read_lock(); - - p = find_process_by_pid(pid); - if (!p) { - rcu_read_unlock(); - return false; - } - - /* Prevent p going away */ - get_task_struct(p); - rcu_read_unlock(); - - if (!p->mm) - goto put_task_struct; - - down_read(&p->mm->mmap_sem); - for (vma = p->mm->mmap; vma ; vma = vma->vm_next) { - if (vma->vm_file && vma->vm_flags & VM_EXEC) { - name = d_path(&vma->vm_file->f_path, - path_buf, LIB_PATH_LENGTH); - if (IS_ERR(name)) - goto release_sem; - - if (strnstr(name, sched_lib_name, - strnlen(name, LIB_PATH_LENGTH))) { - found = true; - break; - } - } - } - -release_sem: - up_read(&p->mm->mmap_sem); -put_task_struct: - put_task_struct(p); - return found; -} - -long msm_sched_setaffinity(pid_t pid, struct cpumask *new_mask) -{ - if (sched_lib_mask_check != 0 && sched_lib_mask_force != 0 && - (cpumask_bits(new_mask)[0] == sched_lib_mask_check) && - is_sched_lib_based_app(pid)) { - - cpumask_t forced_mask = { {sched_lib_mask_force} }; - - cpumask_copy(new_mask, &forced_mask); - } - return sched_setaffinity(pid, new_mask); -} - static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, struct cpumask *new_mask) { @@ -5030,7 +4965,7 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, retval = get_user_cpu_mask(user_mask_ptr, len, new_mask); if (retval == 0) - retval = msm_sched_setaffinity(pid, new_mask); + retval = sched_setaffinity(pid, new_mask); free_cpumask_var(new_mask); return retval; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 0a2bd19a86b9..afa39c97a342 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -149,7 +149,6 @@ static int ten_thousand = 10000; #ifdef CONFIG_PERF_EVENTS static int six_hundred_forty_kb = 640 * 1024; #endif -static int two_hundred_fifty_five = 255; /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; @@ -651,31 +650,6 @@ static struct ctl_table kern_table[] = { .extra1 = &one, }, #endif - { - .procname = "sched_lib_name", - .data = sched_lib_name, - .maxlen = LIB_PATH_LENGTH, - .mode = 0644, - .proc_handler = proc_dostring, - }, - { - .procname = "sched_lib_mask_check", - .data = &sched_lib_mask_check, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_douintvec_minmax, - .extra1 = &zero, - .extra2 = &two_hundred_fifty_five, - }, - { - .procname = "sched_lib_mask_force", - .data = &sched_lib_mask_force, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_douintvec_minmax, - .extra1 = &zero, - .extra2 = &two_hundred_fifty_five, - }, #ifdef CONFIG_PROVE_LOCKING { .procname = "prove_locking", From a67055f66551a93f4289349c8594470c3a2d5675 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Tue, 12 Mar 2024 12:27:34 -0700 Subject: [PATCH 69/85] sched/fair: Set asym priority equally for all CPUs in a performance domain All CPUs in a performance domain share the same capacity, and therefore aren't different from one another when distinguishing between which one is better for asymmetric packing. Instead of unfairly prioritizing lower-numbered CPUs within the same performance domain, treat all CPUs in a performance domain equally for asymmetric packing. Change-Id: Ibc18d45fabc2983650ebebec910578e26bd26809 Signed-off-by: Sultan Alsawaf --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 62893be62841..a61a54a36eff 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -157,11 +157,11 @@ __read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload = #ifdef CONFIG_SMP /* - * For asym packing, by default the lower numbered cpu has higher priority. + * For asym packing, by default the lower max-capacity cpu has higher priority. */ int __weak arch_asym_cpu_priority(int cpu) { - return -cpu; + return -arch_scale_cpu_capacity(NULL, cpu); } #endif From 77f43184dabaea3428c8f47d9b1a4ea2ea574444 Mon Sep 17 00:00:00 2001 From: Alexander Winkowski Date: Sun, 21 Apr 2024 08:29:16 +0000 Subject: [PATCH 70/85] sched: Remove unused core_ctl.h To avoid confusion with include/linux/sched/core_ctl.h Change-Id: I037b1cc0fa09c06737a369b4e7dfdd89cd7ad9af Signed-off-by: Alexander Winkowski --- kernel/sched/core_ctl.h | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 kernel/sched/core_ctl.h diff --git a/kernel/sched/core_ctl.h b/kernel/sched/core_ctl.h deleted file mode 100644 index 98d7cb3e899b..000000000000 --- a/kernel/sched/core_ctl.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2016, The Linux Foundation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef __CORE_CTL_H -#define __CORE_CTL_H - -#ifdef CONFIG_SCHED_CORE_CTL -void core_ctl_check(u64 wallclock); -int core_ctl_set_boost(bool boost); -#else -static inline void core_ctl_check(u64 wallclock) {} -static inline int core_ctl_set_boost(bool boost) -{ - return 0; -} -#endif -#endif From bdf23ea276ed8dabbd28138b20114215031b924c Mon Sep 17 00:00:00 2001 From: Alexander Winkowski Date: Mon, 1 Apr 2024 11:11:31 +0000 Subject: [PATCH 71/85] sched: Introduce rotation_ctl This is WALT rotation logic extracted from core_ctl to avoid CPU isolation overhead while retaining the performance gain. Change-Id: I912d2dabf7e32eaf9da2f30b38898d1b29ff0a53 Signed-off-by: Alexander Winkowski --- include/linux/sched/core_ctl.h | 3 +- kernel/sched/Makefile | 3 + kernel/sched/rotation_ctl.c | 144 +++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 kernel/sched/rotation_ctl.c diff --git a/include/linux/sched/core_ctl.h b/include/linux/sched/core_ctl.h index 359ad874295a..dcb3a871c725 100644 --- a/include/linux/sched/core_ctl.h +++ b/include/linux/sched/core_ctl.h @@ -25,7 +25,8 @@ int core_ctl_set_boost(bool boost); void core_ctl_notifier_register(struct notifier_block *n); void core_ctl_notifier_unregister(struct notifier_block *n); #else -static inline void core_ctl_check(u64 wallclock) {} +void rotation_ctl_check(u64 wallclock); +#define core_ctl_check rotation_ctl_check static inline int core_ctl_set_boost(bool boost) { return 0; diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index e9adba01c456..020978df1313 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -31,4 +31,7 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o obj-$(CONFIG_MEMBARRIER) += membarrier.o obj-$(CONFIG_SCHED_CORE_CTL) += core_ctl.o +ifndef CONFIG_SCHED_CORE_CTL +obj-$(CONFIG_SCHED_WALT) += rotation_ctl.o +endif obj-$(CONFIG_PSI) += psi.o diff --git a/kernel/sched/rotation_ctl.c b/kernel/sched/rotation_ctl.c new file mode 100644 index 000000000000..32bec3a43985 --- /dev/null +++ b/kernel/sched/rotation_ctl.c @@ -0,0 +1,144 @@ +/* Copyright (c) 2014-2018, 2020, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "rotation_ctl: " fmt + +#include "sched.h" +#include "walt.h" + +struct cluster_data { + cpumask_t cpu_mask; + unsigned int first_cpu; + bool inited; +}; + +static struct cluster_data cluster_state[MAX_CLUSTERS]; +static unsigned int num_clusters; + +#define for_each_cluster(cluster, idx) \ + for (; (idx) < num_clusters && ((cluster) = &cluster_state[idx]);\ + (idx)++) + +static bool initialized; + +static int cluster_real_big_tasks(int index, const struct sched_avg_stats *nr_stats) +{ + int nr_big = 0; + int cpu; + const struct cluster_data *cluster = &cluster_state[index]; + + if (!index) { + for_each_cpu(cpu, &cluster->cpu_mask) + nr_big += nr_stats[cpu].nr_misfit; + } else { + for_each_cpu(cpu, &cluster->cpu_mask) + nr_big += nr_stats[cpu].nr; + } + + return nr_big; +} + +static void update_running_avg(void) +{ + struct sched_avg_stats nr_stats[NR_CPUS]; + const struct cluster_data *cluster; + unsigned int index = 0; + int big_avg = 0; + + sched_get_nr_running_avg(nr_stats); + + for_each_cluster(cluster, index) { + if (!cluster->inited) + continue; + + big_avg += cluster_real_big_tasks(index, nr_stats); + } + + walt_rotation_checkpoint(big_avg); +} + +static u64 rotation_ctl_check_timestamp; + +void rotation_ctl_check(u64 window_start) +{ + if (unlikely(!initialized)) + return; + + if (window_start == rotation_ctl_check_timestamp) + return; + + rotation_ctl_check_timestamp = window_start; + + update_running_avg(); +} + +/* ============================ init code ============================== */ + +static const struct cluster_data *find_cluster_by_first_cpu(unsigned int first_cpu) +{ + unsigned int i; + + for (i = 0; i < num_clusters; ++i) { + if (cluster_state[i].first_cpu == first_cpu) + return &cluster_state[i]; + } + + return NULL; +} + +static int cluster_init(const struct cpumask *mask) +{ + struct device *dev; + unsigned int first_cpu = cpumask_first(mask); + struct cluster_data *cluster; + + if (find_cluster_by_first_cpu(first_cpu)) + return 0; + + dev = get_cpu_device(first_cpu); + if (!dev) + return -ENODEV; + + pr_info("Creating CPU group %d\n", first_cpu); + + if (num_clusters == MAX_CLUSTERS) { + pr_err("Unsupported number of clusters. Only %u supported\n", + MAX_CLUSTERS); + return -EINVAL; + } + cluster = &cluster_state[num_clusters]; + ++num_clusters; + + cpumask_copy(&cluster->cpu_mask, mask); + cluster->first_cpu = first_cpu; + + cluster->inited = true; + + return 0; +} + +static int __init rotation_ctl_init(void) +{ + const struct sched_cluster *cluster; + int ret; + + for_each_sched_cluster(cluster) { + ret = cluster_init(&cluster->cpus); + if (ret) + pr_warn("unable to create rotation ctl group: %d\n", ret); + } + + initialized = true; + return 0; +} + +late_initcall(rotation_ctl_init); From 8c9d3503f46ec98175e8f5fc2d0125bad6a2d69c Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Thu, 18 Apr 2019 14:24:30 +0530 Subject: [PATCH 72/85] sched/fair: Optimize the tick path active migration When a task is upmigrating via tickpath, the lower capacity CPU that is running the task will wake up the migration task to carry the migration to the other higher capacity CPU. The migration task dequeue the task from lower capacity CPU and enqueue it on the higher capacity CPU. A rescheduler IPI is sent now to the higher capacity CPU. If the higher capacity CPU was in deep sleep state, it results in more waiting time for the task to be upmigrated. This can be optimized by waking up the higher capacity CPU along with waking the migration task on the lower capacity CPU. Since we reserve the higher capacity CPU, the is_reserved() API can be used to prevent the CPU entering idle again. Change-Id: I7bda9a905a66a9326c1dc74e50fa94eb58e6b705 Signed-off-by: Pavankumar Kondeti [clingutla@codeaurora.org: Resolved minor merge conflicts] Signed-off-by: Lingutla Chandrasekhar Signed-off-by: Alexander Winkowski --- kernel/sched/fair.c | 7 ++++++- kernel/sched/idle.c | 6 ++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a61a54a36eff..66f9287df7e4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -13159,6 +13159,7 @@ void check_for_migration(struct rq *rq, struct task_struct *p) int new_cpu = -1; int cpu = smp_processor_id(); int prev_cpu = task_cpu(p); + int ret; struct sched_domain *sd = NULL; if (rq->misfit_task_load) { @@ -13179,9 +13180,13 @@ void check_for_migration(struct rq *rq, struct task_struct *p) if (active_balance) { mark_reserved(new_cpu); raw_spin_unlock(&migration_lock); - stop_one_cpu_nowait(prev_cpu, + ret = stop_one_cpu_nowait(prev_cpu, active_load_balance_cpu_stop, rq, &rq->active_balance_work); + if (!ret) + clear_reserved(new_cpu); + else + wake_up_if_idle(new_cpu); return; } } else { diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 9e8c1f8b02b3..2bc2b78adb45 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -66,7 +66,8 @@ static noinline int __cpuidle cpu_idle_poll(void) local_irq_enable(); stop_critical_timings(); while (!tif_need_resched() && - (cpu_idle_force_poll || tick_check_broadcast_expired())) + (cpu_idle_force_poll || tick_check_broadcast_expired() || + is_reserved(smp_processor_id()))) cpu_relax(); start_critical_timings(); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); @@ -261,7 +262,8 @@ static void do_idle(void) * broadcast device expired for us, we don't want to go deep * idle as we know that the IPI is going to arrive right away. */ - if (cpu_idle_force_poll || tick_check_broadcast_expired()) { + if (cpu_idle_force_poll || tick_check_broadcast_expired() || + is_reserved(smp_processor_id())) { tick_nohz_idle_restart_tick(); cpu_idle_poll(); } else { From e14809d0a351b5318185a1803a52b8f60b65c8d7 Mon Sep 17 00:00:00 2001 From: Abhijeet Dharmapurikar Date: Thu, 13 Aug 2020 15:34:48 -0700 Subject: [PATCH 73/85] sched: Improve the Scheduler This change is for general scheduler improvement. Change-Id: I7cb85ea7133a94923fae97d99f5b0027750ce189 Signed-off-by: Abhijeet Dharmapurikar Signed-off-by: Alexander Winkowski --- kernel/sched/fair.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 66f9287df7e4..a09a9ccc5d2f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -13170,6 +13170,13 @@ void check_for_migration(struct rq *rq, struct task_struct *p) if (task_will_be_throttled(p)) return; + if (walt_rotation_enabled) { + raw_spin_lock(&migration_lock); + walt_check_for_rotation(rq); + raw_spin_unlock(&migration_lock); + return; + } + raw_spin_lock(&migration_lock); rcu_read_lock(); new_cpu = find_energy_efficient_cpu(sd, p, cpu, prev_cpu, 0, 1); @@ -13189,8 +13196,6 @@ void check_for_migration(struct rq *rq, struct task_struct *p) wake_up_if_idle(new_cpu); return; } - } else { - walt_check_for_rotation(rq); } raw_spin_unlock(&migration_lock); } From 7a2e034ebdfb725c0e0a5878f8d9d4bb8ba9f7b0 Mon Sep 17 00:00:00 2001 From: Lingutla Chandrasekhar Date: Tue, 1 Jun 2021 16:58:40 +0530 Subject: [PATCH 74/85] sched: walt: Improve the scheduler This change is for general scheduler improvements. Change-Id: Ia2854ae8701151761fe0780b6451133ab09a050b Signed-off-by: Lingutla Chandrasekhar Signed-off-by: Alexander Winkowski --- kernel/sched/core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a07d3bb1ab78..6147c9363a17 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3212,6 +3212,9 @@ void scheduler_tick(void) if (curr->sched_class == &fair_sched_class) check_for_migration(rq, curr); + + if (idle_cpu(cpu) && is_reserved(cpu)) + clear_reserved(cpu); } #ifdef CONFIG_NO_HZ_FULL From 7978413de4f13b0375c7f7457c1dee5b2cfa55be Mon Sep 17 00:00:00 2001 From: Lingutla Chandrasekhar Date: Wed, 30 Jun 2021 20:27:44 +0530 Subject: [PATCH 75/85] sched: Improve the scheduler This change is for general scheduler improvements. Change-Id: I37d6cb75ca8b08d9ca155b86b7d71ff369f46e14 Signed-off-by: Lingutla Chandrasekhar Signed-off-by: Alexander Winkowski --- kernel/sched/core.c | 6 +++++- kernel/sched/fair.c | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6147c9363a17..ca921ee4cc78 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3213,8 +3213,12 @@ void scheduler_tick(void) if (curr->sched_class == &fair_sched_class) check_for_migration(rq, curr); - if (idle_cpu(cpu) && is_reserved(cpu)) +#ifdef CONFIG_SMP + rq_lock(rq, &rf); + if (idle_cpu(cpu) && is_reserved(cpu) && !rq->active_balance) clear_reserved(cpu); + rq_unlock(rq, &rf); +#endif } #ifdef CONFIG_NO_HZ_FULL diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a09a9ccc5d2f..18dbd43d27e8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -13035,12 +13035,23 @@ static void walt_rotate_work_func(struct work_struct *work) { struct walt_rotate_work *wr = container_of(work, struct walt_rotate_work, w); + struct rq *src_rq = cpu_rq(wr->src_cpu), *dst_rq = cpu_rq(wr->dst_cpu); + unsigned long flags; migrate_swap(wr->src_task, wr->dst_task); put_task_struct(wr->src_task); put_task_struct(wr->dst_task); + local_irq_save(flags); + double_rq_lock(src_rq, dst_rq); + + dst_rq->active_balance = 0; + src_rq->active_balance = 0; + + double_rq_unlock(src_rq, dst_rq); + local_irq_restore(flags); + clear_reserved(wr->src_cpu); clear_reserved(wr->dst_cpu); } @@ -13127,7 +13138,8 @@ static void walt_check_for_rotation(struct rq *src_rq) dst_rq = cpu_rq(dst_cpu); double_rq_lock(src_rq, dst_rq); - if (dst_rq->curr->sched_class == &fair_sched_class) { + if (dst_rq->curr->sched_class == &fair_sched_class && + !src_rq->active_balance && !dst_rq->active_balance) { get_task_struct(src_rq->curr); get_task_struct(dst_rq->curr); @@ -13140,7 +13152,10 @@ static void walt_check_for_rotation(struct rq *src_rq) wr->src_cpu = src_cpu; wr->dst_cpu = dst_cpu; + dst_rq->active_balance = 1; + src_rq->active_balance = 1; } + double_rq_unlock(src_rq, dst_rq); if (wr) From e5afb625b33c3e04673dcc35d79500e98e815410 Mon Sep 17 00:00:00 2001 From: Abhijeet Dharmapurikar Date: Tue, 2 Nov 2021 17:09:09 -0700 Subject: [PATCH 76/85] sched/walt: Improve the scheduler This change is for general scheduler improvement. Change-Id: Iffd4ae221581aaa4aeb244a0cddd40a8b6aac74d Signed-off-by: Abhijeet Dharmapurikar [dereference23: Backport to msm-4.14] Signed-off-by: Alexander Winkowski --- kernel/sched/fair.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 18dbd43d27e8..e1c477ed21d3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -13139,7 +13139,9 @@ static void walt_check_for_rotation(struct rq *src_rq) double_rq_lock(src_rq, dst_rq); if (dst_rq->curr->sched_class == &fair_sched_class && - !src_rq->active_balance && !dst_rq->active_balance) { + !src_rq->active_balance && !dst_rq->active_balance && + cpumask_test_cpu(dst_cpu, &src_rq->curr->cpus_allowed) && + cpumask_test_cpu(src_cpu, &dst_rq->curr->cpus_allowed)) { get_task_struct(src_rq->curr); get_task_struct(dst_rq->curr); From aeb2647ddbec47a74432716b4e239c6437e0e1f6 Mon Sep 17 00:00:00 2001 From: Pavankumar Kondeti Date: Tue, 27 Apr 2021 16:21:11 +0530 Subject: [PATCH 77/85] sched: walt: Fix stale walt CPU reservation flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CPU trying to move a task to other cpu in active load balance or by other means, then the other helping cpu marked as reserved to avoid it for other scheduler decisions. Once the task moved successfully, the reservation will be cleared enables for other scheduler decisions. The reserved flag is been analogously protected with busy cpu’s rq->active_balance, which is protected with runqueue locks. So whenever rq->active_balance is set for busy cpu, then reserved flag would set for helping cpu. Sometimes, it is observed that, cpu is marked as reserved with no cpu's rq->active_balance set. There are some unlikely possible corner cases may cause this behavior: - On active load balance path, cpu stop machine returns queued status of active_balance work on cpu_stopper, which is not checked on active balance path. so when stop machine is not able to queue ( unlikely), then reserved flag wouldn't be cleared. So, catch the return value and on failure, clear reserved flag for cpu. - Clear_walt_request() called on the cpu to clear any pending walt works, it may possible that, push_task might have changed or cleared, then reserved cpu would be left uncleared. So clear the push_cpu independent of push_task. Change-Id: I75d032bf399cb3da8e807186b1bc903114168a4e Signed-off-by: Pavankumar Kondeti Signed-off-by: Lingutla Chandrasekhar Signed-off-by: Alexander Winkowski --- kernel/sched/fair.c | 9 ++++++++- kernel/sched/walt.c | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e1c477ed21d3..3869782f1c12 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -11271,9 +11271,16 @@ no_move: raw_spin_unlock_irqrestore(&busiest->lock, flags); if (active_balance) { - stop_one_cpu_nowait(cpu_of(busiest), + int ret; + + ret = stop_one_cpu_nowait(cpu_of(busiest), active_load_balance_cpu_stop, busiest, &busiest->active_balance_work); + if (!ret) { + clear_reserved(this_cpu); + busiest->active_balance = 0; + active_balance = 0; + } *continue_balancing = 0; } diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c index 1a22866a8d73..e4a363c8c900 100644 --- a/kernel/sched/walt.c +++ b/kernel/sched/walt.c @@ -435,10 +435,10 @@ void clear_walt_request(int cpu) raw_spin_lock_irqsave(&rq->lock, flags); if (rq->push_task) { - clear_reserved(rq->push_cpu); push_task = rq->push_task; rq->push_task = NULL; } + clear_reserved(rq->push_cpu); rq->active_balance = 0; raw_spin_unlock_irqrestore(&rq->lock, flags); if (push_task) From 05712a61e3fb6501314c30b1da3a56763c17d905 Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Fri, 27 Aug 2021 14:35:07 -0700 Subject: [PATCH 78/85] sched: walt: Increase nr_threshold to 40 percent Increase the nr_threshold percentage to 40 from 15. Change-Id: I32ce7246fc4cd32d4c8110bef63971c9a2dceb55 Signed-off-by: Rishabh Bhatnagar Signed-off-by: Alexander Winkowski --- kernel/sched/sched_avg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/sched_avg.c b/kernel/sched/sched_avg.c index 42b047330c1a..7119a9549483 100644 --- a/kernel/sched/sched_avg.c +++ b/kernel/sched/sched_avg.c @@ -35,7 +35,7 @@ static s64 last_get_time; static DEFINE_PER_CPU(atomic64_t, last_busy_time) = ATOMIC64_INIT(0); -#define NR_THRESHOLD_PCT 15 +#define NR_THRESHOLD_PCT 40 /** * sched_get_nr_running_avg From 758bd66cf9c1061f835cc11d719fc251ff41702b Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Wed, 20 Jun 2018 22:32:50 +0530 Subject: [PATCH 79/85] sched/numa: Modify migrate_swap() to accept additional parameters There are checks in migrate_swap_stop() that check if the task/CPU combination is as per migrate_swap_arg before migrating. However atleast one of the two tasks to be swapped by migrate_swap() could have migrated to a completely different CPU before updating the migrate_swap_arg. The new CPU where the task is currently running could be a different node too. If the task has migrated, numa balancer might end up placing a task in a wrong node. Instead of achieving node consolidation, it may end up spreading the load across nodes. To avoid that pass the CPUs as additional parameters. While here, place migrate_swap under CONFIG_NUMA_BALANCING. Running SPECjbb2005 on a 4 node machine and comparing bops/JVM JVMS LAST_PATCH WITH_PATCH %CHANGE 16 25377.3 25226.6 -0.59 1 72287 73326 1.437 Signed-off-by: Srikar Dronamraju Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Rik van Riel Acked-by: Mel Gorman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1529514181-9842-10-git-send-email-srikar@linux.vnet.ibm.com Signed-off-by: Ingo Molnar (cherry picked from commit 0ad4e3dfe6cf3f207e61cbd8e3e4a943f8c1ad20) Change-Id: Ia520fdeb7233d96891af72f80a44b71658951981 [dereference23: Backport to msm-4.14] Signed-off-by: Alexander Winkowski --- kernel/sched/core.c | 7 ++++--- kernel/sched/fair.c | 5 +++-- kernel/sched/sched.h | 3 ++- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ca921ee4cc78..4e3d46b7f671 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1344,16 +1344,17 @@ unlock: /* * Cross migrate two tasks */ -int migrate_swap(struct task_struct *cur, struct task_struct *p) +int migrate_swap(struct task_struct *cur, struct task_struct *p, + int target_cpu, int curr_cpu) { struct migration_swap_arg arg; int ret = -EINVAL; arg = (struct migration_swap_arg){ .src_task = cur, - .src_cpu = task_cpu(cur), + .src_cpu = curr_cpu, .dst_task = p, - .dst_cpu = task_cpu(p), + .dst_cpu = target_cpu, }; if (arg.src_cpu == arg.dst_cpu) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3869782f1c12..475162848da2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1938,7 +1938,8 @@ static int task_numa_migrate(struct task_struct *p) return ret; } - ret = migrate_swap(p, env.best_task); + ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu); + if (ret != 0) trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task)); put_task_struct(env.best_task); @@ -13045,7 +13046,7 @@ static void walt_rotate_work_func(struct work_struct *work) struct rq *src_rq = cpu_rq(wr->src_cpu), *dst_rq = cpu_rq(wr->dst_cpu); unsigned long flags; - migrate_swap(wr->src_task, wr->dst_task); + migrate_swap(wr->src_task, wr->dst_task, wr->dst_cpu, wr->src_cpu); put_task_struct(wr->src_task); put_task_struct(wr->dst_task); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 31ed518469da..6dab6f7c83c1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1221,7 +1221,8 @@ enum numa_faults_stats { extern void sched_setnuma(struct task_struct *p, int node); extern int migrate_task_to(struct task_struct *p, int cpu); #endif /* CONFIG_NUMA_BALANCING */ -extern int migrate_swap(struct task_struct *cur, struct task_struct *p); +extern int migrate_swap(struct task_struct *p, struct task_struct *t, + int cpu, int scpu); #ifdef CONFIG_SMP From 2bf55e9119c2f0c51b393382f90bb33c75385330 Mon Sep 17 00:00:00 2001 From: Mimi Wu Date: Thu, 3 May 2018 17:12:44 +0800 Subject: [PATCH 80/85] scsi: ufs: disable clock scaling Disable clock scaling to avoid costly workqueue overheads. Power test results on Blueline: [without this change] Suspend: 9.75mA Idle: 238.26mA Camera Preview: 1309.99mA Partial Wake Lock: 13.67mA [with this change - disable clock scaling] Suspend: 9.73mA (-0.21%) Idle: 215.87mA (-9.4%) Camera Preview: 1181.71mA (-9.79%) Partial Wake Lock: 13.85mA (+1.32%) Bug: 78601190 Signed-off-by: Mimi Wu Change-Id: I09f07619ab3e11b05149358c1d06b0d1039decf3 --- drivers/scsi/ufs/ufs-qcom.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index a8029019cc03..f10678afe451 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -1480,7 +1480,6 @@ static void ufs_qcom_set_caps(struct ufs_hba *hba) if (!host->disable_lpm) { hba->caps |= UFSHCD_CAP_CLK_GATING; hba->caps |= UFSHCD_CAP_HIBERN8_WITH_CLK_GATING; - hba->caps |= UFSHCD_CAP_CLK_SCALING; } hba->caps |= UFSHCD_CAP_AUTO_BKOPS_SUSPEND; From 704df79435675e318a749a7fd0305364058e29af Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Mon, 27 May 2019 19:40:44 -0700 Subject: [PATCH 81/85] scsi: ufs: Only apply pm_qos to the CPU servicing UFS interrupts Applying pm_qos restrictions to multiple CPUs which aren't used for ufs processing is a waste of power. Instead, only apply the pm_qos restrictions to the CPU that services the UFS interrupts to save power. Signed-off-by: Sultan Alsawaf Signed-off-by: Samuel Pascua --- drivers/scsi/ufs/ufs-qcom.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index f10678afe451..7c2f34c3aa67 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -1840,9 +1840,8 @@ static int ufs_qcom_pm_qos_init(struct ufs_qcom_host *host) if (ret) goto free_groups; - host->pm_qos.groups[i].req.type = PM_QOS_REQ_AFFINE_CORES; - host->pm_qos.groups[i].req.cpus_affine = - host->pm_qos.groups[i].mask; + host->pm_qos.groups[i].req.type = PM_QOS_REQ_AFFINE_IRQ; + host->pm_qos.groups[i].req.irq = host->hba->irq; host->pm_qos.groups[i].state = PM_QOS_UNVOTED; host->pm_qos.groups[i].active_reqs = 0; host->pm_qos.groups[i].host = host; From 00290c4372b3b891e920b1ad62e6b821e4c51bbb Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Sun, 21 Feb 2021 16:57:19 -0800 Subject: [PATCH 82/85] scsi: ufs: Scrap Qualcomm's PM QoS implementation This implementation is completely over the top and wastes lots of CPU cycles. It's too convoluted to fix, so just scrap it to make way for a simpler solution. This purges every PM QoS reference in the UFS drivers. Signed-off-by: Sultan Alsawaf --- drivers/scsi/ufs/ufs-qcom-debugfs.c | 45 ---- drivers/scsi/ufs/ufs-qcom.c | 404 ---------------------------- drivers/scsi/ufs/ufs-qcom.h | 57 ---- drivers/scsi/ufs/ufshcd.c | 10 - drivers/scsi/ufs/ufshcd.h | 24 -- 5 files changed, 540 deletions(-) diff --git a/drivers/scsi/ufs/ufs-qcom-debugfs.c b/drivers/scsi/ufs/ufs-qcom-debugfs.c index 8e845300f4bd..ba49542084cd 100644 --- a/drivers/scsi/ufs/ufs-qcom-debugfs.c +++ b/drivers/scsi/ufs/ufs-qcom-debugfs.c @@ -244,40 +244,6 @@ static const struct file_operations ufs_qcom_dbg_dbg_regs_desc = { .release = single_release, }; -static int ufs_qcom_dbg_pm_qos_show(struct seq_file *file, void *data) -{ - struct ufs_qcom_host *host = (struct ufs_qcom_host *)file->private; - unsigned long flags; - int i; - - spin_lock_irqsave(host->hba->host->host_lock, flags); - - seq_printf(file, "enabled: %d\n", host->pm_qos.is_enabled); - for (i = 0; i < host->pm_qos.num_groups && host->pm_qos.groups; i++) - seq_printf(file, - "CPU Group #%d(mask=0x%lx): active_reqs=%d, state=%d, latency=%d\n", - i, host->pm_qos.groups[i].mask.bits[0], - host->pm_qos.groups[i].active_reqs, - host->pm_qos.groups[i].state, - host->pm_qos.groups[i].latency_us); - - spin_unlock_irqrestore(host->hba->host->host_lock, flags); - - return 0; -} - -static int ufs_qcom_dbg_pm_qos_open(struct inode *inode, - struct file *file) -{ - return single_open(file, ufs_qcom_dbg_pm_qos_show, inode->i_private); -} - -static const struct file_operations ufs_qcom_dbg_pm_qos_desc = { - .open = ufs_qcom_dbg_pm_qos_open, - .read = seq_read, - .release = single_release, -}; - void ufs_qcom_dbg_add_debugfs(struct ufs_hba *hba, struct dentry *root) { struct ufs_qcom_host *host; @@ -366,17 +332,6 @@ void ufs_qcom_dbg_add_debugfs(struct ufs_hba *hba, struct dentry *root) goto err; } - host->debugfs_files.pm_qos = - debugfs_create_file("pm_qos", 0400, - host->debugfs_files.debugfs_root, host, - &ufs_qcom_dbg_pm_qos_desc); - if (!host->debugfs_files.dbg_regs) { - dev_err(host->hba->dev, - "%s: failed create dbg_regs debugfs entry\n", - __func__); - goto err; - } - return; err: diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index 7c2f34c3aa67..862d876306bd 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -35,8 +35,6 @@ #define MAX_PROP_SIZE 32 #define VDDP_REF_CLK_MIN_UV 1200000 #define VDDP_REF_CLK_MAX_UV 1200000 -/* TODO: further tuning for this parameter may be required */ -#define UFS_QCOM_PM_QOS_UNVOTE_TIMEOUT_US (10000) /* microseconds */ #define UFS_QCOM_DEFAULT_DBG_PRINT_EN \ (UFS_QCOM_DBG_PRINT_REGS_EN | UFS_QCOM_DBG_PRINT_TEST_BUS_EN) @@ -64,7 +62,6 @@ static void ufs_qcom_get_default_testbus_cfg(struct ufs_qcom_host *host); static int ufs_qcom_set_dme_vs_core_clk_ctrl_clear_div(struct ufs_hba *hba, u32 clk_1us_cycles, u32 clk_40ns_cycles); -static void ufs_qcom_pm_qos_suspend(struct ufs_qcom_host *host); static void ufs_qcom_dump_regs(struct ufs_hba *hba, int offset, int len, char *prefix) @@ -847,8 +844,6 @@ static int ufs_qcom_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) goto out; } } - /* Unvote PM QoS */ - ufs_qcom_pm_qos_suspend(host); out: return ret; @@ -1557,394 +1552,6 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on, return 0; } -#ifdef CONFIG_SMP /* CONFIG_SMP */ -static int ufs_qcom_cpu_to_group(struct ufs_qcom_host *host, int cpu) -{ - int i; - - if (cpu >= 0 && cpu < num_possible_cpus()) - for (i = 0; i < host->pm_qos.num_groups; i++) - if (cpumask_test_cpu(cpu, &host->pm_qos.groups[i].mask)) - return i; - - return host->pm_qos.default_cpu; -} - -static void ufs_qcom_pm_qos_req_start(struct ufs_hba *hba, struct request *req) -{ - unsigned long flags; - struct ufs_qcom_host *host; - struct ufs_qcom_pm_qos_cpu_group *group; - - if (!hba || !req) - return; - - host = ufshcd_get_variant(hba); - if (!host->pm_qos.groups) - return; - - group = &host->pm_qos.groups[ufs_qcom_cpu_to_group(host, req->cpu)]; - - spin_lock_irqsave(hba->host->host_lock, flags); - if (!host->pm_qos.is_enabled) - goto out; - - group->active_reqs++; - if (group->state != PM_QOS_REQ_VOTE && - group->state != PM_QOS_VOTED) { - group->state = PM_QOS_REQ_VOTE; - queue_work(host->pm_qos.workq, &group->vote_work); - } -out: - spin_unlock_irqrestore(hba->host->host_lock, flags); -} - -/* hba->host->host_lock is assumed to be held by caller */ -static void __ufs_qcom_pm_qos_req_end(struct ufs_qcom_host *host, int req_cpu) -{ - struct ufs_qcom_pm_qos_cpu_group *group; - - if (!host->pm_qos.groups || !host->pm_qos.is_enabled) - return; - - group = &host->pm_qos.groups[ufs_qcom_cpu_to_group(host, req_cpu)]; - - if (--group->active_reqs) - return; - group->state = PM_QOS_REQ_UNVOTE; - queue_work(host->pm_qos.workq, &group->unvote_work); -} - -static void ufs_qcom_pm_qos_req_end(struct ufs_hba *hba, struct request *req, - bool should_lock) -{ - unsigned long flags = 0; - - if (!hba || !req) - return; - - if (should_lock) - spin_lock_irqsave(hba->host->host_lock, flags); - __ufs_qcom_pm_qos_req_end(ufshcd_get_variant(hba), req->cpu); - if (should_lock) - spin_unlock_irqrestore(hba->host->host_lock, flags); -} - -static void ufs_qcom_pm_qos_vote_work(struct work_struct *work) -{ - struct ufs_qcom_pm_qos_cpu_group *group = - container_of(work, struct ufs_qcom_pm_qos_cpu_group, vote_work); - struct ufs_qcom_host *host = group->host; - unsigned long flags; - - spin_lock_irqsave(host->hba->host->host_lock, flags); - - if (!host->pm_qos.is_enabled || !group->active_reqs) { - spin_unlock_irqrestore(host->hba->host->host_lock, flags); - return; - } - - group->state = PM_QOS_VOTED; - spin_unlock_irqrestore(host->hba->host->host_lock, flags); - - pm_qos_update_request(&group->req, group->latency_us); -} - -static void ufs_qcom_pm_qos_unvote_work(struct work_struct *work) -{ - struct ufs_qcom_pm_qos_cpu_group *group = container_of(work, - struct ufs_qcom_pm_qos_cpu_group, unvote_work); - struct ufs_qcom_host *host = group->host; - unsigned long flags; - - /* - * Check if new requests were submitted in the meantime and do not - * unvote if so. - */ - spin_lock_irqsave(host->hba->host->host_lock, flags); - - if (!host->pm_qos.is_enabled || group->active_reqs) { - spin_unlock_irqrestore(host->hba->host->host_lock, flags); - return; - } - - group->state = PM_QOS_UNVOTED; - spin_unlock_irqrestore(host->hba->host->host_lock, flags); - - pm_qos_update_request_timeout(&group->req, - group->latency_us, UFS_QCOM_PM_QOS_UNVOTE_TIMEOUT_US); -} - -static ssize_t ufs_qcom_pm_qos_enable_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct ufs_hba *hba = dev_get_drvdata(dev->parent); - struct ufs_qcom_host *host = ufshcd_get_variant(hba); - - return snprintf(buf, PAGE_SIZE, "%d\n", host->pm_qos.is_enabled); -} - -static ssize_t ufs_qcom_pm_qos_enable_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct ufs_hba *hba = dev_get_drvdata(dev->parent); - struct ufs_qcom_host *host = ufshcd_get_variant(hba); - unsigned long value; - unsigned long flags; - bool enable; - int i; - - if (kstrtoul(buf, 0, &value)) - return -EINVAL; - - enable = !!value; - - /* - * Must take the spinlock and save irqs before changing the enabled - * flag in order to keep correctness of PM QoS release. - */ - spin_lock_irqsave(hba->host->host_lock, flags); - if (enable == host->pm_qos.is_enabled) { - spin_unlock_irqrestore(hba->host->host_lock, flags); - return count; - } - host->pm_qos.is_enabled = enable; - spin_unlock_irqrestore(hba->host->host_lock, flags); - - if (!enable) - for (i = 0; i < host->pm_qos.num_groups; i++) { - cancel_work_sync(&host->pm_qos.groups[i].vote_work); - cancel_work_sync(&host->pm_qos.groups[i].unvote_work); - spin_lock_irqsave(hba->host->host_lock, flags); - host->pm_qos.groups[i].state = PM_QOS_UNVOTED; - host->pm_qos.groups[i].active_reqs = 0; - spin_unlock_irqrestore(hba->host->host_lock, flags); - pm_qos_update_request(&host->pm_qos.groups[i].req, - PM_QOS_DEFAULT_VALUE); - } - - return count; -} - -static ssize_t ufs_qcom_pm_qos_latency_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct ufs_hba *hba = dev_get_drvdata(dev->parent); - struct ufs_qcom_host *host = ufshcd_get_variant(hba); - int ret; - int i; - int offset = 0; - - for (i = 0; i < host->pm_qos.num_groups; i++) { - ret = snprintf(&buf[offset], PAGE_SIZE, - "cpu group #%d(mask=0x%lx): %d\n", i, - host->pm_qos.groups[i].mask.bits[0], - host->pm_qos.groups[i].latency_us); - if (ret > 0) - offset += ret; - else - break; - } - - return offset; -} - -static ssize_t ufs_qcom_pm_qos_latency_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t count) -{ - struct ufs_hba *hba = dev_get_drvdata(dev->parent); - struct ufs_qcom_host *host = ufshcd_get_variant(hba); - unsigned long value; - unsigned long flags; - char *strbuf; - char *strbuf_copy; - char *token; - int i; - int ret; - - /* reserve one byte for null termination */ - strbuf = kmalloc(count + 1, GFP_KERNEL); - if (!strbuf) - return -ENOMEM; - strbuf_copy = strbuf; - strlcpy(strbuf, buf, count + 1); - - for (i = 0; i < host->pm_qos.num_groups; i++) { - token = strsep(&strbuf, ","); - if (!token) - break; - - ret = kstrtoul(token, 0, &value); - if (ret) - break; - - spin_lock_irqsave(hba->host->host_lock, flags); - host->pm_qos.groups[i].latency_us = value; - spin_unlock_irqrestore(hba->host->host_lock, flags); - } - - kfree(strbuf_copy); - return count; -} - -static int ufs_qcom_pm_qos_init(struct ufs_qcom_host *host) -{ - struct device_node *node = host->hba->dev->of_node; - struct device_attribute *attr; - int ret = 0; - int num_groups; - int num_values; - char wq_name[sizeof("ufs_pm_qos_00")]; - int i; - - num_groups = of_property_count_u32_elems(node, - "qcom,pm-qos-cpu-groups"); - if (num_groups <= 0) - goto no_pm_qos; - - num_values = of_property_count_u32_elems(node, - "qcom,pm-qos-cpu-group-latency-us"); - if (num_values <= 0) - goto no_pm_qos; - - if (num_values != num_groups || num_groups > num_possible_cpus()) { - dev_err(host->hba->dev, "%s: invalid count: num_groups=%d, num_values=%d, num_possible_cpus=%d\n", - __func__, num_groups, num_values, num_possible_cpus()); - goto no_pm_qos; - } - - host->pm_qos.num_groups = num_groups; - host->pm_qos.groups = kcalloc(host->pm_qos.num_groups, - sizeof(struct ufs_qcom_pm_qos_cpu_group), GFP_KERNEL); - if (!host->pm_qos.groups) - return -ENOMEM; - - for (i = 0; i < host->pm_qos.num_groups; i++) { - u32 mask; - - ret = of_property_read_u32_index(node, "qcom,pm-qos-cpu-groups", - i, &mask); - if (ret) - goto free_groups; - host->pm_qos.groups[i].mask.bits[0] = mask; - if (!cpumask_subset(&host->pm_qos.groups[i].mask, - cpu_possible_mask)) { - dev_err(host->hba->dev, "%s: invalid mask 0x%x for cpu group\n", - __func__, mask); - goto free_groups; - } - - ret = of_property_read_u32_index(node, - "qcom,pm-qos-cpu-group-latency-us", i, - &host->pm_qos.groups[i].latency_us); - if (ret) - goto free_groups; - - host->pm_qos.groups[i].req.type = PM_QOS_REQ_AFFINE_IRQ; - host->pm_qos.groups[i].req.irq = host->hba->irq; - host->pm_qos.groups[i].state = PM_QOS_UNVOTED; - host->pm_qos.groups[i].active_reqs = 0; - host->pm_qos.groups[i].host = host; - - INIT_WORK(&host->pm_qos.groups[i].vote_work, - ufs_qcom_pm_qos_vote_work); - INIT_WORK(&host->pm_qos.groups[i].unvote_work, - ufs_qcom_pm_qos_unvote_work); - } - - ret = of_property_read_u32(node, "qcom,pm-qos-default-cpu", - &host->pm_qos.default_cpu); - if (ret || host->pm_qos.default_cpu > num_possible_cpus()) - host->pm_qos.default_cpu = 0; - - /* - * Use a single-threaded workqueue to assure work submitted to the queue - * is performed in order. Consider the following 2 possible cases: - * - * 1. A new request arrives and voting work is scheduled for it. Before - * the voting work is performed the request is finished and unvote - * work is also scheduled. - * 2. A request is finished and unvote work is scheduled. Before the - * work is performed a new request arrives and voting work is also - * scheduled. - * - * In both cases a vote work and unvote work wait to be performed. - * If ordering is not guaranteed, then the end state might be the - * opposite of the desired state. - */ - snprintf(wq_name, ARRAY_SIZE(wq_name), "%s_%d", "ufs_pm_qos", - host->hba->host->host_no); - host->pm_qos.workq = create_singlethread_workqueue(wq_name); - if (!host->pm_qos.workq) { - dev_err(host->hba->dev, "%s: failed to create the workqueue\n", - __func__); - ret = -ENOMEM; - goto free_groups; - } - - /* Initialization was ok, add all PM QoS requests */ - for (i = 0; i < host->pm_qos.num_groups; i++) - pm_qos_add_request(&host->pm_qos.groups[i].req, - PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE); - - /* PM QoS latency sys-fs attribute */ - attr = &host->pm_qos.latency_attr; - attr->show = ufs_qcom_pm_qos_latency_show; - attr->store = ufs_qcom_pm_qos_latency_store; - sysfs_attr_init(&attr->attr); - attr->attr.name = "pm_qos_latency_us"; - attr->attr.mode = 0644; - if (device_create_file(host->hba->var->dev, attr)) - dev_dbg(host->hba->dev, "Failed to create sysfs for pm_qos_latency_us\n"); - - /* PM QoS enable sys-fs attribute */ - attr = &host->pm_qos.enable_attr; - attr->show = ufs_qcom_pm_qos_enable_show; - attr->store = ufs_qcom_pm_qos_enable_store; - sysfs_attr_init(&attr->attr); - attr->attr.name = "pm_qos_enable"; - attr->attr.mode = 0644; - if (device_create_file(host->hba->var->dev, attr)) - dev_dbg(host->hba->dev, "Failed to create sysfs for pm_qos enable\n"); - - host->pm_qos.is_enabled = true; - - return 0; - -free_groups: - kfree(host->pm_qos.groups); -no_pm_qos: - host->pm_qos.groups = NULL; - return ret ? ret : -ENOTSUPP; -} - -static void ufs_qcom_pm_qos_suspend(struct ufs_qcom_host *host) -{ - int i; - - if (!host->pm_qos.groups) - return; - - for (i = 0; i < host->pm_qos.num_groups; i++) - flush_work(&host->pm_qos.groups[i].unvote_work); -} - -static void ufs_qcom_pm_qos_remove(struct ufs_qcom_host *host) -{ - int i; - - if (!host->pm_qos.groups) - return; - - for (i = 0; i < host->pm_qos.num_groups; i++) - pm_qos_remove_request(&host->pm_qos.groups[i].req); - destroy_workqueue(host->pm_qos.workq); - - kfree(host->pm_qos.groups); - host->pm_qos.groups = NULL; -} -#endif /* CONFIG_SMP */ - #define ANDROID_BOOT_DEV_MAX 30 static char android_boot_dev[ANDROID_BOOT_DEV_MAX]; @@ -2107,10 +1714,6 @@ static int ufs_qcom_init(struct ufs_hba *hba) goto out_variant_clear; } - err = ufs_qcom_pm_qos_init(host); - if (err) - dev_info(dev, "%s: PM QoS will be disabled\n", __func__); - /* restore the secure configuration */ ufs_qcom_update_sec_cfg(hba, true); @@ -2239,7 +1842,6 @@ static void ufs_qcom_exit(struct ufs_hba *hba) host->is_phy_pwr_on = false; } phy_exit(host->generic_phy); - ufs_qcom_pm_qos_remove(host); } static int ufs_qcom_set_dme_vs_core_clk_ctrl_clear_div(struct ufs_hba *hba, @@ -2706,15 +2308,9 @@ static struct ufs_hba_variant_ops ufs_hba_qcom_vops = { #endif }; -static struct ufs_hba_pm_qos_variant_ops ufs_hba_pm_qos_variant_ops = { - .req_start = ufs_qcom_pm_qos_req_start, - .req_end = ufs_qcom_pm_qos_req_end, -}; - static struct ufs_hba_variant ufs_hba_qcom_variant = { .name = "qcom", .vops = &ufs_hba_qcom_vops, - .pm_qos_vops = &ufs_hba_pm_qos_variant_ops, }; /** diff --git a/drivers/scsi/ufs/ufs-qcom.h b/drivers/scsi/ufs/ufs-qcom.h index 44315ab110f8..22914790c9d8 100644 --- a/drivers/scsi/ufs/ufs-qcom.h +++ b/drivers/scsi/ufs/ufs-qcom.h @@ -15,7 +15,6 @@ #define UFS_QCOM_H_ #include -#include #include "ufshcd.h" #define MAX_UFS_QCOM_HOSTS 2 @@ -245,62 +244,9 @@ struct qcom_debugfs_files { struct dentry *testbus_cfg; struct dentry *testbus_bus; struct dentry *dbg_regs; - struct dentry *pm_qos; }; #endif -/* PM QoS voting state */ -enum ufs_qcom_pm_qos_state { - PM_QOS_UNVOTED, - PM_QOS_VOTED, - PM_QOS_REQ_VOTE, - PM_QOS_REQ_UNVOTE, -}; - -/** - * struct ufs_qcom_pm_qos_cpu_group - data related to cluster PM QoS voting - * logic - * @req: request object for PM QoS - * @vote_work: work object for voting procedure - * @unvote_work: work object for un-voting procedure - * @host: back pointer to the main structure - * @state: voting state machine current state - * @latency_us: requested latency value used for cluster voting, in - * microseconds - * @mask: cpu mask defined for this cluster - * @active_reqs: number of active requests on this cluster - */ -struct ufs_qcom_pm_qos_cpu_group { - struct pm_qos_request req; - struct work_struct vote_work; - struct work_struct unvote_work; - struct ufs_qcom_host *host; - enum ufs_qcom_pm_qos_state state; - s32 latency_us; - cpumask_t mask; - int active_reqs; -}; - -/** - * struct ufs_qcom_pm_qos - data related to PM QoS voting logic - * @groups: PM QoS cpu group state array - * @enable_attr: sysfs attribute to enable/disable PM QoS voting logic - * @latency_attr: sysfs attribute to set latency value - * @workq: single threaded workqueue to run PM QoS voting/unvoting - * @num_clusters: number of clusters defined - * @default_cpu: cpu to use for voting for request not specifying a cpu - * @is_enabled: flag specifying whether voting logic is enabled - */ -struct ufs_qcom_pm_qos { - struct ufs_qcom_pm_qos_cpu_group *groups; - struct device_attribute enable_attr; - struct device_attribute latency_attr; - struct workqueue_struct *workq; - int num_groups; - int default_cpu; - bool is_enabled; -}; - struct ufs_qcom_host { /* * Set this capability if host controller supports the QUniPro mode @@ -337,9 +283,6 @@ struct ufs_qcom_host { struct clk *rx_l1_sync_clk; struct clk *tx_l1_sync_clk; - /* PM Quality-of-Service (QoS) data */ - struct ufs_qcom_pm_qos pm_qos; - bool disable_lpm; bool is_lane_clks_enabled; bool sec_cfg_updated; diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 9c84935bf443..75fa8714d07b 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4337,9 +4337,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) if (ufshcd_is_hibern8_on_idle_allowed(hba)) WARN_ON(hba->hibern8_on_idle.state != HIBERN8_EXITED); - /* Vote PM QoS for the request */ - ufshcd_vops_pm_qos_req_start(hba, cmd->request); - /* IO svc time latency histogram */ if (hba != NULL && cmd->request != NULL) { if (hba->latency_hist_enabled) { @@ -4384,7 +4381,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) lrbp->cmd = NULL; clear_bit_unlock(tag, &hba->lrb_in_use); ufshcd_release_all(hba); - ufshcd_vops_pm_qos_req_end(hba, cmd->request, true); goto out; } @@ -4394,7 +4390,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) lrbp->cmd = NULL; clear_bit_unlock(tag, &hba->lrb_in_use); ufshcd_release_all(hba); - ufshcd_vops_pm_qos_req_end(hba, cmd->request, true); goto out; } @@ -4412,7 +4407,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) lrbp->cmd = NULL; clear_bit_unlock(tag, &hba->lrb_in_use); ufshcd_release_all(hba); - ufshcd_vops_pm_qos_req_end(hba, cmd->request, true); dev_err(hba->dev, "%s: failed sending command, %d\n", __func__, err); err = DID_ERROR; @@ -7481,8 +7475,6 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, * this must be called before calling * ->scsi_done() callback. */ - ufshcd_vops_pm_qos_req_end(hba, cmd->request, - false); } req = cmd->request; @@ -7563,8 +7555,6 @@ void ufshcd_abort_outstanding_transfer_requests(struct ufs_hba *hba, int result) * this must be called before calling * ->scsi_done() callback. */ - ufshcd_vops_pm_qos_req_end(hba, cmd->request, - true); } /* Do not touch lrbp after scsi done */ cmd->scsi_done(cmd); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index a7f722210694..f5eae627e1b7 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -402,14 +402,6 @@ struct ufs_hba_variant_ops { const union ufs_crypto_cfg_entry *cfg, int slot); }; -/** -* struct ufs_hba_pm_qos_variant_ops - variant specific PM QoS callbacks -*/ -struct ufs_hba_pm_qos_variant_ops { - void (*req_start)(struct ufs_hba *, struct request *); - void (*req_end)(struct ufs_hba *, struct request *, bool); -}; - /** * struct ufs_hba_variant - variant specific parameters * @name: variant name @@ -418,7 +410,6 @@ struct ufs_hba_variant { struct device *dev; const char *name; struct ufs_hba_variant_ops *vops; - struct ufs_hba_pm_qos_variant_ops *pm_qos_vops; }; struct keyslot_mgmt_ll_ops; @@ -1694,21 +1685,6 @@ static inline void ufshcd_vops_remove_debugfs(struct ufs_hba *hba) } #endif -static inline void ufshcd_vops_pm_qos_req_start(struct ufs_hba *hba, - struct request *req) -{ - if (hba->var && hba->var->pm_qos_vops && - hba->var->pm_qos_vops->req_start) - hba->var->pm_qos_vops->req_start(hba, req); -} - -static inline void ufshcd_vops_pm_qos_req_end(struct ufs_hba *hba, - struct request *req, bool lock) -{ - if (hba->var && hba->var->pm_qos_vops && hba->var->pm_qos_vops->req_end) - hba->var->pm_qos_vops->req_end(hba, req, lock); -} - #define UFS_DEV_ATTR(name, fmt, args...) \ static ssize_t ufs_##name##_show(struct device *dev, struct device_attribute *attr, char *buf) \ { \ From 07aeced86bb2e6cd106f19451189f702daef1307 Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Sun, 21 Feb 2021 16:57:06 -0800 Subject: [PATCH 83/85] scsi: ufs: Fix compilation when command logging is disabled Signed-off-by: Sultan Alsawaf --- drivers/scsi/ufs/ufshcd.c | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 75fa8714d07b..0bfeb29e0156 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -1220,22 +1220,6 @@ static void ufshcd_cmd_log_init(struct ufs_hba *hba) { } -static void __ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type, - unsigned int tag, u8 cmd_id, u8 idn, u8 lun, - sector_t lba, int transfer_len) -{ - struct ufshcd_cmd_log_entry entry; - - entry.str = str; - entry.lba = lba; - entry.cmd_id = cmd_id; - entry.transfer_len = transfer_len; - entry.doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); - entry.tag = tag; - - ufshcd_add_command_trace(hba, &entry); -} - static void ufshcd_dme_cmd_log(struct ufs_hba *hba, char *str, u8 cmd_id) { } From 577e047f9f93e87f1a5e54ff3206b8673914dc10 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 24 May 2018 23:06:58 -0700 Subject: [PATCH 84/85] scsi: ufs: disallow SECURITY_PROTOCOL_IN without _OUT This merged the following fix: 6a317b49c98c ("scsi: ufs: revise commit ecd2676bd513 ("disallow SECURITY_PROTOCOL_IN without _OUT")") If we allow this, Hynix will give timeout due to spec violation. The latest Hynix controller gives error instead of timeout. Bug: 113580864 Bug: 79898356 Bug: 109850759 Bug: 117682499 Bug: 112560467 Change-Id: Ie7820a9604e4c7bc4cc530acf41bb5bb72f33d5b Signed-off-by: Jaegeuk Kim Signed-off-by: Randall Huang (cherry picked from commit 003012f13632af193b7ec5656e5ed5a6747ee0dd) Signed-off-by: alk3pInjection Signed-off-by: UtsavBalar1231 --- drivers/scsi/ufs/ufshcd.c | 24 +++++++++++++++++++++--- drivers/scsi/ufs/ufshcd.h | 2 ++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 0bfeb29e0156..86abec5f5c0f 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -3495,7 +3495,19 @@ static void ufshcd_clk_scaling_update_busy(struct ufs_hba *hba) static inline int ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag) { - int ret = 0; + if (hba->lrb[task_tag].cmd) { + u8 opcode = (u8)(*hba->lrb[task_tag].cmd->cmnd); + + if (opcode == SECURITY_PROTOCOL_OUT && hba->security_in) { + hba->security_in--; + } else if (opcode == SECURITY_PROTOCOL_IN) { + if (hba->security_in) { + WARN_ON(1); + return -EINVAL; + } + hba->security_in++; + } + } hba->lrb[task_tag].issue_time_stamp = ktime_get(); hba->lrb[task_tag].complete_time_stamp = ktime_set(0, 0); @@ -3507,7 +3519,7 @@ int ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag) ufshcd_cond_add_cmd_trace(hba, task_tag, hba->lrb[task_tag].cmd ? "scsi_send" : "dev_cmd_send"); ufshcd_update_tag_stats(hba, task_tag); - return ret; + return 0; } /** @@ -4393,7 +4405,13 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) ufshcd_release_all(hba); dev_err(hba->dev, "%s: failed sending command, %d\n", __func__, err); - err = DID_ERROR; + if (err == -EINVAL) { + set_host_byte(cmd, DID_ERROR); + if (has_read_lock) + ufshcd_put_read_lock(hba); + cmd->scsi_done(cmd); + return 0; + } goto out; } diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index f5eae627e1b7..935ba39d3436 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -1103,6 +1103,8 @@ struct ufs_hba { /* Number of requests aborts */ int req_abort_count; + u32 security_in; + /* Number of lanes available (1 or 2) for Rx/Tx */ u32 lanes_per_direction; From 3340d216db6673c879c807e3e6ab52b0a08689bf Mon Sep 17 00:00:00 2001 From: Sultan Alsawaf Date: Mon, 22 Feb 2021 00:43:24 -0800 Subject: [PATCH 85/85] scsi: ufs: Add simple IRQ-affined PM QoS operations Qualcomm's PM QoS solution suffers from a number of issues: applying PM QoS to all CPUs, convoluted spaghetti code that wastes CPU cycles, and keeping PM QoS applied for 10 ms after all requests finish processing. This implements a simple IRQ-affined PM QoS mechanism for each UFS adapter which uses atomics to elide locking, and enqueues a worker to apply PM QoS to the target CPU as soon as a command request is issued. Signed-off-by: Sultan Alsawaf Signed-off-by: alk3pInjection Signed-off-by: UtsavBalar1231 --- drivers/scsi/ufs/ufshcd.c | 85 ++++++++++++++++++++++++++++++++++----- drivers/scsi/ufs/ufshcd.h | 10 +++++ 2 files changed, 84 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 86abec5f5c0f..dc95fe1d2a59 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -4217,6 +4217,48 @@ static inline void ufshcd_put_read_lock(struct ufs_hba *hba) up_read(&hba->lock); } +static void ufshcd_pm_qos_get_worker(struct work_struct *work) +{ + struct ufs_hba *hba = container_of(work, typeof(*hba), pm_qos.get_work); + + if (!atomic_read(&hba->pm_qos.count)) + return; + + mutex_lock(&hba->pm_qos.lock); + if (atomic_read(&hba->pm_qos.count) && !hba->pm_qos.active) { + pm_qos_update_request(&hba->pm_qos.req, 100); + hba->pm_qos.active = true; + } + mutex_unlock(&hba->pm_qos.lock); +} + +static void ufshcd_pm_qos_put_worker(struct work_struct *work) +{ + struct ufs_hba *hba = container_of(work, typeof(*hba), pm_qos.put_work); + + if (atomic_read(&hba->pm_qos.count)) + return; + + mutex_lock(&hba->pm_qos.lock); + if (!atomic_read(&hba->pm_qos.count) && hba->pm_qos.active) { + pm_qos_update_request(&hba->pm_qos.req, PM_QOS_DEFAULT_VALUE); + hba->pm_qos.active = false; + } + mutex_unlock(&hba->pm_qos.lock); +} + +static void ufshcd_pm_qos_get(struct ufs_hba *hba) +{ + if (atomic_inc_return(&hba->pm_qos.count) == 1) + queue_work(system_unbound_wq, &hba->pm_qos.get_work); +} + +static void ufshcd_pm_qos_put(struct ufs_hba *hba) +{ + if (atomic_dec_return(&hba->pm_qos.count) == 0) + queue_work(system_unbound_wq, &hba->pm_qos.put_work); +} + /** * ufshcd_queuecommand - main entry point for SCSI requests * @cmd: command from SCSI Midlayer @@ -4232,12 +4274,16 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) int tag; int err = 0; bool has_read_lock = false; + bool cmd_sent = false; hba = shost_priv(host); if (!cmd || !cmd->request || !hba) return -EINVAL; + /* Wake the CPU managing the IRQ as soon as possible */ + ufshcd_pm_qos_get(hba); + tag = cmd->request->tag; if (!ufshcd_valid_tag(hba, tag)) { dev_err(hba->dev, @@ -4249,10 +4295,13 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) err = ufshcd_get_read_lock(hba, cmd->device->lun); if (unlikely(err < 0)) { if (err == -EPERM) { - return SCSI_MLQUEUE_HOST_BUSY; + err = SCSI_MLQUEUE_HOST_BUSY; + goto out_pm_qos; + } + if (err == -EAGAIN) { + err = SCSI_MLQUEUE_HOST_BUSY; + goto out_pm_qos; } - if (err == -EAGAIN) - return SCSI_MLQUEUE_HOST_BUSY; } else if (err == 1) { has_read_lock = true; } @@ -4410,16 +4459,22 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) if (has_read_lock) ufshcd_put_read_lock(hba); cmd->scsi_done(cmd); - return 0; + err = 0; + goto out_pm_qos; } goto out; } + cmd_sent = true; + out_unlock: spin_unlock_irqrestore(hba->host->host_lock, flags); out: if (has_read_lock) ufshcd_put_read_lock(hba); +out_pm_qos: + if (!cmd_sent) + ufshcd_pm_qos_put(hba); return err; } @@ -7481,6 +7536,7 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, req = cmd->request; if (req) { + ufshcd_pm_qos_put(hba); /* Update IO svc time latency histogram */ if (req->lat_hist_enabled) { ktime_t completion; @@ -7551,13 +7607,8 @@ void ufshcd_abort_outstanding_transfer_requests(struct ufs_hba *hba, int result) /* Mark completed command as NULL in LRB */ lrbp->cmd = NULL; ufshcd_release_all(hba); - if (cmd->request) { - /* - * As we are accessing the "request" structure, - * this must be called before calling - * ->scsi_done() callback. - */ - } + if (cmd->request) + ufshcd_pm_qos_put(hba); /* Do not touch lrbp after scsi done */ cmd->scsi_done(cmd); } else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE) { @@ -12691,6 +12742,9 @@ void ufshcd_remove(struct ufs_hba *hba) /* disable interrupts */ ufshcd_disable_intr(hba, hba->intr_mask); ufshcd_hba_stop(hba, true); + cancel_work_sync(&hba->pm_qos.put_work); + cancel_work_sync(&hba->pm_qos.get_work); + pm_qos_remove_request(&hba->pm_qos.req); ufshcd_exit_clk_gating(hba); ufshcd_exit_hibern8_on_idle(hba); @@ -12969,6 +13023,14 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) */ ufshcd_readl(hba, REG_INTERRUPT_ENABLE); + mutex_init(&hba->pm_qos.lock); + INIT_WORK(&hba->pm_qos.get_work, ufshcd_pm_qos_get_worker); + INIT_WORK(&hba->pm_qos.put_work, ufshcd_pm_qos_put_worker); + hba->pm_qos.req.type = PM_QOS_REQ_AFFINE_IRQ; + hba->pm_qos.req.irq = irq; + pm_qos_add_request(&hba->pm_qos.req, PM_QOS_CPU_DMA_LATENCY, + PM_QOS_DEFAULT_VALUE); + /* IRQ registration */ err = devm_request_irq(dev, irq, ufshcd_intr, IRQF_SHARED, dev_name(dev), hba); @@ -13075,6 +13137,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) out_remove_scsi_host: scsi_remove_host(hba->host); exit_gating: + pm_qos_remove_request(&hba->pm_qos.req); ufshcd_exit_clk_gating(hba); ufshcd_exit_latency_hist(hba); out_disable: diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index 935ba39d3436..b769fe40943c 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -58,6 +58,7 @@ #include #include #include +#include #include "unipro.h" #include @@ -1214,6 +1215,15 @@ struct ufs_hba { void *crypto_DO_NOT_USE[8]; #endif /* CONFIG_SCSI_UFS_CRYPTO */ + struct { + struct pm_qos_request req; + struct work_struct get_work; + struct work_struct put_work; + struct mutex lock; + atomic_t count; + bool active; + } pm_qos; + #if IS_ENABLED(CONFIG_BLK_TURBO_WRITE) bool support_tw; bool tw_state_not_allowed;