Merge branch 'android-4.14' of https://github.com/pascua28/android_kernel_samsung_sm7150 into 16.0

Change-Id: I0b8c22853de7baba34abdc8c4792d4b2bf07cfef Signed-off-by: Samuel Pascua <pascua.samuel.14@gmail.com>
2025-12-27 09:35:20 +08:00
parent 1574397aa6 3340d216db
commit ecab66f27c
97 changed files with 9432 additions and 4864 deletions
--- a/Documentation/devicetree/bindings/gpu/adreno.txt
+++ b/Documentation/devicetree/bindings/gpu/adreno.txt
@@ -194,13 +194,6 @@ Optional Properties:
 				Specify the number of macrotiling channels for this chip.
 				This is programmed into certain registers and also pass to
 				the user as a property.
- qcom,l2pc-cpu-mask:
-				Disables L2PC on masked CPUs when any of Graphics
-				rendering thread is running on masked CPUs.
-				Bit 0 is for CPU-0, bit 1 is for CPU-1...
-
- qcom,l2pc-update-queue:
-				Disables L2PC on masked CPUs at queue time when it's true.

 - qcom,snapshot-size:
 				Specify the size of snapshot in bytes. This will override
--- a/13
+++ b/13
@@ -9057,6 +9057,19 @@ S:	Maintained
 F:	arch/arm/boot/dts/mmp*
 F:	arch/arm/mach-mmp/

+MMU GATHER AND TLB INVALIDATION
+M:	Will Deacon <will.deacon@arm.com>
+M:	"Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
+M:	Andrew Morton <akpm@linux-foundation.org>
+M:	Nick Piggin <npiggin@gmail.com>
+M:	Peter Zijlstra <peterz@infradead.org>
+L:	linux-arch@vger.kernel.org
+L:	linux-mm@kvack.org
+S:	Maintained
+F:	arch/*/include/asm/tlb.h
+F:	include/asm-generic/tlb.h
+F:	mm/mmu_gather.c
+
 MN88472 MEDIA DRIVER
 M:	Antti Palosaari <crope@iki.fi>
 L:	linux-media@vger.kernel.org
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -28,7 +28,6 @@
 #include <asm/byteorder.h>
 #include <asm/memory.h>
 #include <asm-generic/pci_iomap.h>
-#include <linux/msm_rtb.h>
 #include <xen/xen.h>

 /*
@@ -62,24 +61,23 @@ void __raw_readsl(const volatile void __iomem *addr, void *data, int longlen);
 * the bus. Rather than special-case the machine, just let the compiler
 * generate the access for CPUs prior to ARMv6.
 */
-#define __raw_readw_no_log(a)		(__chk_io_ptr(a), \
-					*(volatile unsigned short __force *)(a))
-#define __raw_writew_no_log(v, a)      ((void)(__chk_io_ptr(a), \
-					*(volatile unsigned short __force *)\
-					(a) = (v)))
+#define __raw_readw(a)         (__chk_io_ptr(a), *(volatile unsigned short __force *)(a))
+#define __raw_writew(v,a)      ((void)(__chk_io_ptr(a), *(volatile unsigned short __force *)(a) = (v)))
 #else
 /*
 * When running under a hypervisor, we want to avoid I/O accesses with
 * writeback addressing modes as these incur a significant performance
 * overhead (the address generation must be emulated in software).
 */
-static inline void __raw_writew_no_log(u16 val, volatile void __iomem *addr)
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 val, volatile void __iomem *addr)
 {
 	asm volatile("strh %1, %0"
 		     : : "Q" (*(volatile u16 __force *)addr), "r" (val));
 }

-static inline u16 __raw_readw_no_log(const volatile void __iomem *addr)
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
 {
 	u16 val;
 	asm volatile("ldrh %0, %1"
@@ -89,19 +87,22 @@ static inline u16 __raw_readw_no_log(const volatile void __iomem *addr)
 }
 #endif

-static inline void __raw_writeb_no_log(u8 val, volatile void __iomem *addr)
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
 {
 	asm volatile("strb %1, %0"
 		     : : "Qo" (*(volatile u8 __force *)addr), "r" (val));
 }

-static inline void __raw_writel_no_log(u32 val, volatile void __iomem *addr)
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 val, volatile void __iomem *addr)
 {
 	asm volatile("str %1, %0"
 		     : : "Qo" (*(volatile u32 __force *)addr), "r" (val));
 }

-static inline void __raw_writeq_no_log(u64 val, volatile void __iomem *addr)
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
 {
 	register u64 v asm ("r2");

@@ -112,7 +113,8 @@ static inline void __raw_writeq_no_log(u64 val, volatile void __iomem *addr)
 		     : "r" (v));
 }

-static inline u8 __raw_readb_no_log(const volatile void __iomem *addr)
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
 {
 	u8 val;
 	asm volatile("ldrb %0, %1"
@@ -121,7 +123,8 @@ static inline u8 __raw_readb_no_log(const volatile void __iomem *addr)
 	return val;
 }

-static inline u32 __raw_readl_no_log(const volatile void __iomem *addr)
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
 {
 	u32 val;
 	asm volatile("ldr %0, %1"
@@ -130,7 +133,8 @@ static inline u32 __raw_readl_no_log(const volatile void __iomem *addr)
 	return val;
 }

-static inline u64 __raw_readq_no_log(const volatile void __iomem *addr)
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
 {
 	register u64 val asm ("r2");

@@ -140,48 +144,6 @@ static inline u64 __raw_readq_no_log(const volatile void __iomem *addr)
 	return val;
 }

-/*
- * There may be cases when clients don't want to support or can't support the
- * logging. The appropriate functions can be used but clients should carefully
- * consider why they can't support the logging.
- */
-
-#define __raw_write_logged(v, a, _t)	({ \
-	int _ret; \
-	volatile void __iomem *_a = (a); \
-	void *_addr = (void __force *)(_a); \
-	_ret = uncached_logk(LOGK_WRITEL, _addr); \
-	ETB_WAYPOINT; \
-	__raw_write##_t##_no_log((v), _a); \
-	if (_ret) \
-		LOG_BARRIER; \
-	})
-
-
-#define __raw_writeb(v, a)	__raw_write_logged((v), (a), b)
-#define __raw_writew(v, a)	__raw_write_logged((v), (a), w)
-#define __raw_writel(v, a)	__raw_write_logged((v), (a), l)
-#define __raw_writeq(v, a)	__raw_write_logged((v), (a), q)
-
-#define __raw_read_logged(a, _l, _t)		({ \
-	unsigned _t __a; \
-	const volatile void __iomem *_a = (a); \
-	void *_addr = (void __force *)(_a); \
-	int _ret; \
-	_ret = uncached_logk(LOGK_READL, _addr); \
-	ETB_WAYPOINT; \
-	__a = __raw_read##_l##_no_log(_a);\
-	if (_ret) \
-		LOG_BARRIER; \
-	__a; \
-	})
-
-
-#define __raw_readb(a)		__raw_read_logged((a), b, char)
-#define __raw_readw(a)		__raw_read_logged((a), w, short)
-#define __raw_readl(a)		__raw_read_logged((a), l, int)
-#define __raw_readq(a)		__raw_read_logged((a), q, long long)
-
 /*
 * Architecture ioremap implementation.
 */
@@ -363,24 +325,12 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
 					__raw_readl(c)); __r; })
 #define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64) \
 					__raw_readq(c)); __r; })
-#define readb_relaxed_no_log(c)	({ u8 __r = __raw_readb_no_log(c); __r; })
-#define readl_relaxed_no_log(c) ({ u32 __r = le32_to_cpu((__force __le32) \
-					__raw_readl_no_log(c)); __r; })
-#define readq_relaxed_no_log(c) ({ u64 __r = le64_to_cpu((__force __le64) \
-					__raw_readq_no_log(c)); __r; })


 #define writeb_relaxed(v, c)	__raw_writeb(v, c)
 #define writew_relaxed(v, c)	__raw_writew((__force u16) cpu_to_le16(v), c)
 #define writel_relaxed(v, c)	__raw_writel((__force u32) cpu_to_le32(v), c)
 #define writeq_relaxed(v, c)	__raw_writeq((__force u64) cpu_to_le64(v), c)
-#define writeb_relaxed_no_log(v, c)	((void)__raw_writeb_no_log((v), (c)))
-#define writew_relaxed_no_log(v, c) __raw_writew_no_log((__force u16) \
-					cpu_to_le16(v), c)
-#define writel_relaxed_no_log(v, c) __raw_writel_no_log((__force u32) \
-					cpu_to_le32(v), c)
-#define writeq_relaxed_no_log(v, c) __raw_writeq_no_log((__force u64) \
-					cpu_to_le64(v), c)

 #define readb(c)		({ u8  __v = readb_relaxed(c); __iormb(); __v; })
 #define readw(c)		({ u16 __v = readw_relaxed(c); __iormb(); __v; })
@@ -401,24 +351,6 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
 #define writesw(p,d,l)		__raw_writesw(p,d,l)
 #define writesl(p,d,l)		__raw_writesl(p,d,l)

-#define readb_no_log(c) \
-		({ u8  __v = readb_relaxed_no_log(c); __iormb(); __v; })
-#define readw_no_log(c) \
-		({ u16 __v = readw_relaxed_no_log(c); __iormb(); __v; })
-#define readl_no_log(c) \
-		({ u32 __v = readl_relaxed_no_log(c); __iormb(); __v; })
-#define readq_no_log(c) \
-		({ u64 __v = readq_relaxed_no_log(c); __iormb(); __v; })
-
-#define writeb_no_log(v, c) \
-		({ __iowmb(); writeb_relaxed_no_log((v), (c)); })
-#define writew_no_log(v, c) \
-		({ __iowmb(); writew_relaxed_no_log((v), (c)); })
-#define writel_no_log(v, c) \
-		({ __iowmb(); writel_relaxed_no_log((v), (c)); })
-#define writeq_no_log(v, c) \
-		({ __iowmb(); writeq_relaxed_no_log((v), (c)); })
-
 #ifndef __ARMBE__
 static inline void memset_io(volatile void __iomem *dst, unsigned c,
 	size_t count)
--- a/arch/arm/kernel/io.c
+++ b/arch/arm/kernel/io.c
@@ -46,21 +46,21 @@ EXPORT_SYMBOL(atomic_io_modify);
 void _memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
 {
 	while (count && (!IO_CHECK_ALIGN(from, 8) || !IO_CHECK_ALIGN(to, 8))) {
-		*(u8 *)to = readb_relaxed_no_log(from);
+		*(u8 *)to = readb_relaxed(from);
 		from++;
 		to++;
 		count--;
 	}

 	while (count >= 8) {
-		*(u64 *)to = readq_relaxed_no_log(from);
+		*(u64 *)to = readq_relaxed(from);
 		from += 8;
 		to += 8;
 		count -= 8;
 	}

 	while (count) {
-		*(u8 *)to = readb_relaxed_no_log(from);
+		*(u8 *)to = readb_relaxed(from);
 		from++;
 		to++;
 		count--;
@@ -76,21 +76,21 @@ void _memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
 	void *p = (void __force *)to;

 	while (count && (!IO_CHECK_ALIGN(p, 8) || !IO_CHECK_ALIGN(from, 8))) {
-		writeb_relaxed_no_log(*(volatile u8 *)from, p);
+		writeb_relaxed(*(volatile u8 *)from, p);
 		from++;
 		p++;
 		count--;
 	}

 	while (count >= 8) {
-		writeq_relaxed_no_log(*(volatile u64 *)from, p);
+		writeq_relaxed(*(volatile u64 *)from, p);
 		from += 8;
 		p += 8;
 		count -= 8;
 	}

 	while (count) {
-		writeb_relaxed_no_log(*(volatile u8 *)from, p);
+		writeb_relaxed(*(volatile u8 *)from, p);
 		from++;
 		p++;
 		count--;
@@ -111,19 +111,19 @@ void _memset_io(volatile void __iomem *dst, int c, size_t count)
 	qc |= qc << 32;

 	while (count && !IO_CHECK_ALIGN(p, 8)) {
-		writeb_relaxed_no_log(c, p);
+		writeb_relaxed(c, p);
 		p++;
 		count--;
 	}

 	while (count >= 8) {
-		writeq_relaxed_no_log(qc, p);
+		writeq_relaxed(qc, p);
 		p += 8;
 		count -= 8;
 	}

 	while (count) {
-		writeb_relaxed_no_log(c, p);
+		writeb_relaxed(c, p);
 		p++;
 		count--;
 	}
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -127,6 +127,7 @@ config ARM64
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RCU_TABLE_FREE
+	select HAVE_RCU_TABLE_INVALIDATE
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
--- a/arch/arm64/configs/a71_defconfig
+++ b/arch/arm64/configs/a71_defconfig
@@ -5524,11 +5524,10 @@ CONFIG_DAX=y
 CONFIG_NVMEM=y
 # CONFIG_QCOM_QFPROM is not set
 CONFIG_NVMEM_SPMI_SDAM=y
-CONFIG_STM=y
-# CONFIG_STM_DUMMY is not set
-# CONFIG_STM_SOURCE_CONSOLE is not set
-# CONFIG_STM_SOURCE_HEARTBEAT is not set
+# CONFIG_STM is not set
 # CONFIG_INTEL_TH is not set
+CONFIG_CORESIGHT_PLACEHOLDER=y
+CONFIG_CORESIGHT_AMBA_PLACEHOLDER=y
 # CONFIG_FPGA is not set

 #
@@ -6222,8 +6221,6 @@ CONFIG_RING_BUFFER=y
 CONFIG_EVENT_TRACING=y
 CONFIG_CONTEXT_SWITCH_TRACER=y
 CONFIG_IPC_LOGGING=y
-CONFIG_QCOM_RTB=y
-CONFIG_QCOM_RTB_SEPARATE_CPUS=y
 CONFIG_TRACING=y
 CONFIG_GENERIC_TRACER=y
 CONFIG_TRACING_SUPPORT=y
@@ -6305,30 +6302,7 @@ CONFIG_DEBUG_ALIGN_RODATA=y
 #
 CONFIG_SEC_PM=y
 CONFIG_SEC_PM_DEBUG=y
-CONFIG_CORESIGHT=y
-CONFIG_CORESIGHT_LINKS_AND_SINKS=y
-CONFIG_CORESIGHT_LINK_AND_SINK_TMC=y
-# CONFIG_CORESIGHT_CATU is not set
-# CONFIG_CORESIGHT_SINK_TPIU is not set
-# CONFIG_CORESIGHT_SINK_ETBV10 is not set
-# CONFIG_CORESIGHT_SOURCE_ETM4X is not set
-CONFIG_CORESIGHT_DYNAMIC_REPLICATOR=y
-# CONFIG_CORESIGHT_DBGUI is not set
-CONFIG_CORESIGHT_STM=y
-# CONFIG_CORESIGHT_CPU_DEBUG is not set
-CONFIG_CORESIGHT_CTI=y
-CONFIG_CORESIGHT_OST=y
-CONFIG_CORESIGHT_TPDA=y
-CONFIG_CORESIGHT_TPDM=y
-# CONFIG_CORESIGHT_TPDM_DEFAULT_ENABLE is not set
-# CONFIG_CORESIGHT_QPDI is not set
-CONFIG_CORESIGHT_HWEVENT=y
-CONFIG_CORESIGHT_DUMMY=y
-CONFIG_CORESIGHT_REMOTE_ETM=y
-CONFIG_CORESIGHT_REMOTE_ETM_DEFAULT_ENABLE=0
-CONFIG_CORESIGHT_CSR=y
-# CONFIG_CORESIGHT_TGU is not set
-CONFIG_CORESIGHT_EVENT=y
+# CONFIG_CORESIGHT is not set

 #
 # Security options
--- a/arch/arm64/include/asm/arch_gicv3.h
+++ b/arch/arm64/include/asm/arch_gicv3.h
@@ -120,8 +120,8 @@ static inline void gic_write_bpr1(u32 val)
 	write_sysreg_s(val, SYS_ICC_BPR1_EL1);
 }

-#define gic_read_typer(c)		readq_relaxed_no_log(c)
-#define gic_write_irouter(v, c)		writeq_relaxed_no_log(v, c)
+#define gic_read_typer(c)		readq_relaxed(c)
+#define gic_write_irouter(v, c)		writeq_relaxed(v, c)
 #define gic_read_lpir(c)		readq_relaxed(c)
 #define gic_write_lpir(v, c)		writeq_relaxed(v, c)

--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -30,35 +30,38 @@
 #include <asm/early_ioremap.h>
 #include <asm/alternative.h>
 #include <asm/cpufeature.h>
-#include <linux/msm_rtb.h>

 #include <xen/xen.h>

 /*
 * Generic IO read/write.  These perform native-endian accesses.
- * that some architectures will want to re-define __raw_{read,write}w.
 */
-static inline void __raw_writeb_no_log(u8 val, volatile void __iomem *addr)
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
 {
 	asm volatile("strb %w0, [%1]" : : "rZ" (val), "r" (addr));
 }

-static inline void __raw_writew_no_log(u16 val, volatile void __iomem *addr)
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 val, volatile void __iomem *addr)
 {
 	asm volatile("strh %w0, [%1]" : : "rZ" (val), "r" (addr));
 }

-static inline void __raw_writel_no_log(u32 val, volatile void __iomem *addr)
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 val, volatile void __iomem *addr)
 {
 	asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
 }

-static inline void __raw_writeq_no_log(u64 val, volatile void __iomem *addr)
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
 {
 	asm volatile("str %x0, [%1]" : : "rZ" (val), "r" (addr));
 }

-static inline u8 __raw_readb_no_log(const volatile void __iomem *addr)
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
 {
 	u8 val;
 	asm volatile(ALTERNATIVE("ldrb %w0, [%1]",
@@ -68,7 +71,8 @@ static inline u8 __raw_readb_no_log(const volatile void __iomem *addr)
 	return val;
 }

-static inline u16 __raw_readw_no_log(const volatile void __iomem *addr)
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
 {
 	u16 val;

@@ -79,7 +83,8 @@ static inline u16 __raw_readw_no_log(const volatile void __iomem *addr)
 	return val;
 }

-static inline u32 __raw_readl_no_log(const volatile void __iomem *addr)
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
 {
 	u32 val;
 	asm volatile(ALTERNATIVE("ldr %w0, [%1]",
@@ -89,7 +94,8 @@ static inline u32 __raw_readl_no_log(const volatile void __iomem *addr)
 	return val;
 }

-static inline u64 __raw_readq_no_log(const volatile void __iomem *addr)
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
 {
 	u64 val;
 	asm volatile(ALTERNATIVE("ldr %0, [%1]",
@@ -99,48 +105,6 @@ static inline u64 __raw_readq_no_log(const volatile void __iomem *addr)
 	return val;
 }

-/*
- * There may be cases when  clients don't want to support or can't support the
- * logging, The appropriate functions can be used but clinets should carefully
- * consider why they can't support the logging
- */
-
-#define __raw_write_logged(v, a, _t) ({ \
-	int _ret; \
-	volatile void __iomem *_a = (a); \
-	void *_addr = (void __force *)(_a); \
-	_ret = uncached_logk(LOGK_WRITEL, _addr); \
-	if (_ret)	/* COFNIG_SEC_DEBUG */\
-		ETB_WAYPOINT; \
-	__raw_write##_t##_no_log((v), _a); \
-	if (_ret) \
-		LOG_BARRIER; \
-	})
-
-#define __raw_writeb(v, a)	__raw_write_logged((v), a, b)
-#define __raw_writew(v, a)	__raw_write_logged((v), a, w)
-#define __raw_writel(v, a)	__raw_write_logged((v), a, l)
-#define __raw_writeq(v, a)	__raw_write_logged((v), a, q)
-
-#define __raw_read_logged(a, _l, _t)    ({ \
-	_t __a; \
-	const volatile void __iomem *_a = (a); \
-	void *_addr = (void __force *)(_a); \
-	int _ret; \
-	_ret = uncached_logk(LOGK_READL, _addr); \
-	if (_ret)	/* CONFIG_SEC_DEBUG */ \
-		ETB_WAYPOINT; \
-	__a = __raw_read##_l##_no_log(_a); \
-	if (_ret) \
-		LOG_BARRIER; \
-	__a; \
-	})
-
-#define __raw_readb(a)		__raw_read_logged((a), b, u8)
-#define __raw_readw(a)		__raw_read_logged((a), w, u16)
-#define __raw_readl(a)		__raw_read_logged((a), l, u32)
-#define __raw_readq(a)		__raw_read_logged((a), q, u64)
-
 /* IO barriers */
 #define __iormb(v)							\
 ({									\
@@ -178,22 +142,6 @@ static inline u64 __raw_readq_no_log(const volatile void __iomem *addr)
 #define writel_relaxed(v,c)	((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
 #define writeq_relaxed(v,c)	((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))

-#define readb_relaxed_no_log(c)	({ u8 __v = __raw_readb_no_log(c); __v; })
-#define readw_relaxed_no_log(c) \
-	({ u16 __v = le16_to_cpu((__force __le16)__raw_readw_no_log(c)); __v; })
-#define readl_relaxed_no_log(c) \
-	({ u32 __v = le32_to_cpu((__force __le32)__raw_readl_no_log(c)); __v; })
-#define readq_relaxed_no_log(c) \
-	({ u64 __v = le64_to_cpu((__force __le64)__raw_readq_no_log(c)); __v; })
-
-#define writeb_relaxed_no_log(v, c)	((void)__raw_writeb_no_log((v), (c)))
-#define writew_relaxed_no_log(v, c) \
-	((void)__raw_writew_no_log((__force u16)cpu_to_le32(v), (c)))
-#define writel_relaxed_no_log(v, c) \
-	((void)__raw_writel_no_log((__force u32)cpu_to_le32(v), (c)))
-#define writeq_relaxed_no_log(v, c) \
-	((void)__raw_writeq_no_log((__force u64)cpu_to_le32(v), (c)))
-
 /*
 * I/O memory access primitives. Reads are ordered relative to any
 * following Normal memory access. Writes are ordered relative to any prior
@@ -209,24 +157,6 @@ static inline u64 __raw_readq_no_log(const volatile void __iomem *addr)
 #define writel(v,c)		({ __iowmb(); writel_relaxed((v),(c)); })
 #define writeq(v,c)		({ __iowmb(); writeq_relaxed((v),(c)); })

-#define readb_no_log(c) \
-		({ u8  __v = readb_relaxed_no_log(c); __iormb(__v); __v; })
-#define readw_no_log(c) \
-		({ u16 __v = readw_relaxed_no_log(c); __iormb(__v); __v; })
-#define readl_no_log(c) \
-		({ u32 __v = readl_relaxed_no_log(c); __iormb(__v); __v; })
-#define readq_no_log(c) \
-		({ u64 __v = readq_relaxed_no_log(c); __iormb(__v); __v; })
-
-#define writeb_no_log(v, c) \
-		({ __iowmb(); writeb_relaxed_no_log((v), (c)); })
-#define writew_no_log(v, c) \
-		({ __iowmb(); writew_relaxed_no_log((v), (c)); })
-#define writel_no_log(v, c) \
-		({ __iowmb(); writel_relaxed_no_log((v), (c)); })
-#define writeq_no_log(v, c) \
-		({ __iowmb(); writeq_relaxed_no_log((v), (c)); })
-
 /*
 *  I/O port access primitives.
 */
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -34,19 +34,14 @@
 #include <asm/pgtable.h>
 #include <asm/sysreg.h>
 #include <asm/tlbflush.h>
-#include <linux/msm_rtb.h>

 static inline void contextidr_thread_switch(struct task_struct *next)
 {
-	pid_t pid = task_pid_nr(next);
-
 	if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR))
 		return;

-	write_sysreg(pid, contextidr_el1);
+	write_sysreg(task_pid_nr(next), contextidr_el1);
 	isb();
-
-
 }

 /*
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -381,6 +381,7 @@ static inline int pmd_protnone(pmd_t pmd)
 #define pmd_present(pmd)	pte_present(pmd_pte(pmd))
 #define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
 #define pmd_young(pmd)		pte_young(pmd_pte(pmd))
+#define pmd_valid(pmd)		pte_valid(pmd_pte(pmd))
 #define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
 #define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
 #define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
@@ -459,8 +460,11 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
 #else
 	*pmdp = pmd;
 #endif
-	dsb(ishst);
-	isb();
+
+	if (pmd_valid(pmd)) {
+		dsb(ishst);
+		isb();
+	}
 }

 static inline void pmd_clear(pmd_t *pmdp)
@@ -512,6 +516,7 @@ static inline void pte_unmap(pte_t *pte) { }
 #define pud_none(pud)		(!pud_val(pud))
 #define pud_bad(pud)		(!(pud_val(pud) & PUD_TABLE_BIT))
 #define pud_present(pud)	pte_present(pud_pte(pud))
+#define pud_valid(pud)		pte_valid(pud_pte(pud))

 static inline void set_pud(pud_t *pudp, pud_t pud)
 {
@@ -529,8 +534,11 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
 #else
 	*pudp = pud;
 #endif
-	dsb(ishst);
-	isb();
+
+	if (pud_valid(pud)) {
+		dsb(ishst);
+		isb();
+	}
 }

 static inline void pud_clear(pud_t *pudp)
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -25,44 +25,40 @@
 #include <linux/rkp.h>
 #endif

-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-
-#define tlb_remove_entry(tlb, entry)	tlb_remove_table(tlb, entry)
 static inline void __tlb_remove_table(void *_table)
 {
 	free_page_and_swap_cache((struct page *)_table);
 }
-#else
-#define tlb_remove_entry(tlb, entry)	tlb_remove_page(tlb, entry)
-#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+
+static void tlb_flush(struct mmu_gather *tlb);

 #include <asm-generic/tlb.h>

 static inline void tlb_flush(struct mmu_gather *tlb)
 {
 	struct vm_area_struct vma = { .vm_mm = tlb->mm, };
+	bool last_level = !tlb->freed_tables;
+	unsigned long stride = tlb_get_unmap_size(tlb);

 	/*
-	 * The ASID allocator will either invalidate the ASID or mark
-	 * it as used.
+	 * If we're tearing down the address space then we only care about
+	 * invalidating the walk-cache, since the ASID allocator won't
+	 * reallocate our ASID without invalidating the entire TLB.
 	 */
-	if (tlb->fullmm)
+	if (tlb->fullmm) {
+		if (!last_level)
+			flush_tlb_mm(tlb->mm);
 		return;
+	}

-	/*
-	 * The intermediate page table levels are already handled by
-	 * the __(pte|pmd|pud)_free_tlb() functions, so last level
-	 * TLBI is sufficient here.
-	 */
-	__flush_tlb_range(&vma, tlb->start, tlb->end, true);
+	__flush_tlb_range(&vma, tlb->start, tlb->end, stride, last_level);
 }

 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 				  unsigned long addr)
 {
-	__flush_tlb_pgtable(tlb->mm, addr);
 	pgtable_page_dtor(pte);
-	tlb_remove_entry(tlb, pte);
+	tlb_remove_table(tlb, pte);
 }

 #if CONFIG_PGTABLE_LEVELS > 2
@@ -74,7 +70,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 		rkp_ro_free((void *)pmdp);
 	} else 
 #endif
-	tlb_remove_entry(tlb, virt_to_page(pmdp));
+	tlb_remove_table(tlb, virt_to_page(pmdp));
 }
 #endif

@@ -87,7 +83,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
 		rkp_ro_free((void *)pudp);
 	else
 #endif
-	tlb_remove_entry(tlb, virt_to_page(pudp));
+	tlb_remove_table(tlb, virt_to_page(pudp));
 }
 #endif

--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -70,43 +70,73 @@
 	})

 /*
- *	TLB Management
- *	==============
+ *	TLB Invalidation
+ *	================
 *
- *	The TLB specific code is expected to perform whatever tests it needs
- *	to determine if it should invalidate the TLB for each call.  Start
- *	addresses are inclusive and end addresses are exclusive; it is safe to
- *	round these addresses down.
+ * 	This header file implements the low-level TLB invalidation routines
+ *	(sometimes referred to as "flushing" in the kernel) for arm64.
+ *
+ *	Every invalidation operation uses the following template:
+ *
+ *	DSB ISHST	// Ensure prior page-table updates have completed
+ *	TLBI ...	// Invalidate the TLB
+ *	DSB ISH		// Ensure the TLB invalidation has completed
+ *      if (invalidated kernel mappings)
+ *		ISB	// Discard any instructions fetched from the old mapping
+ *
+ *
+ *	The following functions form part of the "core" TLB invalidation API,
+ *	as documented in Documentation/core-api/cachetlb.rst:
 *
 *	flush_tlb_all()
- *
- *		Invalidate the entire TLB.
+ *		Invalidate the entire TLB (kernel + user) on all CPUs
 *
 *	flush_tlb_mm(mm)
+ *		Invalidate an entire user address space on all CPUs.
+ *		The 'mm' argument identifies the ASID to invalidate.
 *
- *		Invalidate all TLB entries in a particular address space.
- *		- mm	- mm_struct describing address space
+ *	flush_tlb_range(vma, start, end)
+ *		Invalidate the virtual-address range '[start, end)' on all
+ *		CPUs for the user address space corresponding to 'vma->mm'.
+ *		Note that this operation also invalidates any walk-cache
+ *		entries associated with translations for the specified address
+ *		range.
 *
- *	flush_tlb_range(mm,start,end)
+ *	flush_tlb_kernel_range(start, end)
+ *		Same as flush_tlb_range(..., start, end), but applies to
+ * 		kernel mappings rather than a particular user address space.
+ *		Whilst not explicitly documented, this function is used when
+ *		unmapping pages from vmalloc/io space.
 *
- *		Invalidate a range of TLB entries in the specified address
- *		space.
- *		- mm	- mm_struct describing address space
- *		- start - start address (may not be aligned)
- *		- end	- end address (exclusive, may not be aligned)
+ *	flush_tlb_page(vma, addr)
+ *		Invalidate a single user mapping for address 'addr' in the
+ *		address space corresponding to 'vma->mm'.  Note that this
+ *		operation only invalidates a single, last-level page-table
+ *		entry and therefore does not affect any walk-caches.
 *
- *	flush_tlb_page(vaddr,vma)
 *
- *		Invalidate the specified page in the specified address range.
- *		- vaddr - virtual address (may not be aligned)
- *		- vma	- vma_struct describing address range
+ *	Next, we have some undocumented invalidation routines that you probably
+ *	don't want to call unless you know what you're doing:
 *
- *	flush_kern_tlb_page(kaddr)
+ *	local_flush_tlb_all()
+ *		Same as flush_tlb_all(), but only applies to the calling CPU.
 *
- *		Invalidate the TLB entry for the specified page.  The address
- *		will be in the kernels virtual memory space.  Current uses
- *		only require the D-TLB to be invalidated.
- *		- kaddr - Kernel virtual memory address
+ *	__flush_tlb_kernel_pgtable(addr)
+ *		Invalidate a single kernel mapping for address 'addr' on all
+ *		CPUs, ensuring that any walk-cache entries associated with the
+ *		translation are also invalidated.
+ *
+ *	__flush_tlb_range(vma, start, end, stride, last_level)
+ *		Invalidate the virtual-address range '[start, end)' on all
+ *		CPUs for the user address space corresponding to 'vma->mm'.
+ *		The invalidation operations are issued at a granularity
+ *		determined by 'stride' and only affect any walk-cache entries
+ *		if 'last_level' is equal to false.
+ *
+ *
+ *	Finally, take a look at asm/tlb.h to see how tlb_flush() is implemented
+ *	on top of these routines, since that is our interface to the mmu_gather
+ *	API as used by munmap() and friends.
 */
 static inline void local_flush_tlb_all(void)
 {
@@ -149,25 +179,28 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 * This is meant to avoid soft lock-ups on large TLB flushing ranges and not
 * necessarily a performance improvement.
 */
-#define MAX_TLB_RANGE	(1024UL << PAGE_SHIFT)
+#define MAX_TLBI_OPS	1024UL

 static inline void __flush_tlb_range(struct vm_area_struct *vma,
 				     unsigned long start, unsigned long end,
-				     bool last_level)
+				     unsigned long stride, bool last_level)
 {
 	unsigned long asid = ASID(vma->vm_mm);
 	unsigned long addr;

-	if ((end - start) > MAX_TLB_RANGE) {
+	if ((end - start) > (MAX_TLBI_OPS * stride)) {
 		flush_tlb_mm(vma->vm_mm);
 		return;
 	}

+	/* Convert the stride into units of 4k */
+	stride >>= 12;
+
 	start = __TLBI_VADDR(start, asid);
 	end = __TLBI_VADDR(end, asid);

 	dsb(ishst);
-	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) {
+	for (addr = start; addr < end; addr += stride) {
 		if (last_level) {
 			__tlbi(vale1is, addr);
 			__tlbi_user(vale1is, addr);
@@ -182,14 +215,18 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end)
 {
-	__flush_tlb_range(vma, start, end, false);
+	/*
+	 * We cannot use leaf-only invalidation here, since we may be invalidating
+	 * table entries as part of collapsing hugepages or moving page tables.
+	 */
+	__flush_tlb_range(vma, start, end, PAGE_SIZE, false);
 }

 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
 	unsigned long addr;

-	if ((end - start) > MAX_TLB_RANGE) {
+	if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) {
 		flush_tlb_all();
 		return;
 	}
@@ -199,7 +236,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end

 	dsb(ishst);
 	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
-		__tlbi(vaae1is, addr);
+		__tlbi(vaale1is, addr);
 	dsb(ish);
 	isb();
 }
@@ -208,20 +245,11 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
 * Used to invalidate the TLB (walk caches) corresponding to intermediate page
 * table levels (pgd/pud/pmd).
 */
-static inline void __flush_tlb_pgtable(struct mm_struct *mm,
-				       unsigned long uaddr)
-{
-	unsigned long addr = __TLBI_VADDR(uaddr, ASID(mm));
-
-	__tlbi(vae1is, addr);
-	__tlbi_user(vae1is, addr);
-	dsb(ish);
-}
-
 static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr)
 {
 	unsigned long addr = __TLBI_VADDR(kaddr, 0);

+	dsb(ishst);
 	__tlbi(vaae1is, addr);
 	dsb(ish);
 }
--- a/arch/arm64/kernel/io.c
+++ b/arch/arm64/kernel/io.c
@@ -27,21 +27,21 @@ void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
 {
 	while (count && (!IS_ALIGNED((unsigned long)from, 8) ||
 			 !IS_ALIGNED((unsigned long)to, 8))) {
-		*(u8 *)to = __raw_readb_no_log(from);
+		*(u8 *)to = __raw_readb(from);
 		from++;
 		to++;
 		count--;
 	}

 	while (count >= 8) {
-		*(u64 *)to = __raw_readq_no_log(from);
+		*(u64 *)to = __raw_readq(from);
 		from += 8;
 		to += 8;
 		count -= 8;
 	}

 	while (count) {
-		*(u8 *)to = __raw_readb_no_log(from);
+		*(u8 *)to = __raw_readb(from);
 		from++;
 		to++;
 		count--;
@@ -56,21 +56,21 @@ void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
 {
 	while (count && (!IS_ALIGNED((unsigned long)to, 8) ||
 			 !IS_ALIGNED((unsigned long)from, 8))) {
-		__raw_writeb_no_log(*(volatile u8 *)from, to);
+		__raw_writeb(*(volatile u8 *)from, to);
 		from++;
 		to++;
 		count--;
 	}

 	while (count >= 8) {
-		__raw_writeq_no_log(*(volatile u64 *)from, to);
+		__raw_writeq(*(volatile u64 *)from, to);
 		from += 8;
 		to += 8;
 		count -= 8;
 	}

 	while (count) {
-		__raw_writeb_no_log(*(volatile u8 *)from, to);
+		__raw_writeb(*(volatile u8 *)from, to);
 		from++;
 		to++;
 		count--;
@@ -90,19 +90,19 @@ void __memset_io(volatile void __iomem *dst, int c, size_t count)
 	qc |= qc << 32;

 	while (count && !IS_ALIGNED((unsigned long)dst, 8)) {
-		__raw_writeb_no_log(c, dst);
+		__raw_writeb(c, dst);
 		dst++;
 		count--;
 	}

 	while (count >= 8) {
-		__raw_writeq_no_log(qc, dst);
+		__raw_writeq(qc, dst);
 		dst += 8;
 		count -= 8;
 	}

 	while (count) {
-		__raw_writeb_no_log(c, dst);
+		__raw_writeb(c, dst);
 		dst++;
 		count--;
 	}
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -221,6 +221,8 @@ source "drivers/hwtracing/stm/Kconfig"

 source "drivers/hwtracing/intel_th/Kconfig"

+source "drivers/hwtracing/google/Kconfig"
+
 source "drivers/fpga/Kconfig"

 source "drivers/fsi/Kconfig"
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -187,6 +187,7 @@ obj-$(CONFIG_RAS)		+= ras/
 obj-$(CONFIG_THUNDERBOLT)	+= thunderbolt/
 obj-$(CONFIG_CORESIGHT)		+= hwtracing/coresight/
 obj-y				+= hwtracing/intel_th/
+obj-y				+= hwtracing/google/
 obj-$(CONFIG_STM)		+= hwtracing/stm/
 obj-$(CONFIG_ANDROID)		+= android/
 obj-$(CONFIG_NVMEM)		+= nvmem/
--- a/drivers/bus/mhi/core/mhi_internal.h
+++ b/drivers/bus/mhi/core/mhi_internal.h
@@ -861,7 +861,7 @@ static inline void mhi_timesync_log(struct mhi_controller *mhi_cntrl)

 	if (mhi_tsync && mhi_cntrl->tsync_log)
 		mhi_cntrl->tsync_log(mhi_cntrl,
-				     readq_no_log(mhi_tsync->time_reg));
+				     readq(mhi_tsync->time_reg));
 }

 /* memory allocation methods */
--- a/drivers/bus/mhi/core/mhi_main.c
+++ b/drivers/bus/mhi/core/mhi_main.c
@@ -2621,7 +2621,7 @@ int mhi_get_remote_time_sync(struct mhi_device *mhi_dev,
 	local_irq_disable();

 	*t_host = mhi_cntrl->time_get(mhi_cntrl, mhi_cntrl->priv_data);
-	*t_dev = readq_relaxed_no_log(mhi_tsync->time_reg);
+	*t_dev = readq_relaxed(mhi_tsync->time_reg);

 	local_irq_enable();
 	preempt_enable();
@@ -2726,7 +2726,7 @@ int mhi_get_remote_time(struct mhi_device *mhi_dev,

 	mhi_tsync->local_time =
 		mhi_cntrl->time_get(mhi_cntrl, mhi_cntrl->priv_data);
-	writel_relaxed_no_log(mhi_tsync->int_sequence, mhi_cntrl->tsync_db);
+	writel_relaxed(mhi_tsync->int_sequence, mhi_cntrl->tsync_db);
 	/* write must go thru immediately */
 	wmb();

--- a/drivers/char/adsprpc.c
+++ b/drivers/char/adsprpc.c
@@ -596,64 +596,44 @@ static void fastrpc_remote_buf_list_free(struct fastrpc_file *fl)
 	} while (free);
 }

+static void fastrpc_mmap_add_global(struct fastrpc_mmap *map)
+{
+	struct fastrpc_apps *me = &gfa;
+	unsigned long irq_flags = 0;
+
+	spin_lock_irqsave(&me->hlock, irq_flags);
+	hlist_add_head(&map->hn, &me->maps);
+	spin_unlock_irqrestore(&me->hlock, irq_flags);
+}
+
 static void fastrpc_mmap_add(struct fastrpc_mmap *map)
 {
-	if (map->flags == ADSP_MMAP_HEAP_ADDR ||
-				map->flags == ADSP_MMAP_REMOTE_HEAP_ADDR) {
-		struct fastrpc_apps *me = &gfa;
+	struct fastrpc_file *fl = map->fl;

-		spin_lock(&me->hlock);
-		hlist_add_head(&map->hn, &me->maps);
-		spin_unlock(&me->hlock);
-	} else {
-		struct fastrpc_file *fl = map->fl;
-
-		hlist_add_head(&map->hn, &fl->maps);
-	}
+	hlist_add_head(&map->hn, &fl->maps);
 }

 static int fastrpc_mmap_find(struct fastrpc_file *fl, int fd,
 		uintptr_t va, size_t len, int mflags, int refs,
 		struct fastrpc_mmap **ppmap)
 {
-	struct fastrpc_apps *me = &gfa;
 	struct fastrpc_mmap *match = NULL, *map = NULL;
 	struct hlist_node *n;

 	if ((va + len) < va)
 		return -EOVERFLOW;
-	if (mflags == ADSP_MMAP_HEAP_ADDR ||
-				 mflags == ADSP_MMAP_REMOTE_HEAP_ADDR) {
-		spin_lock(&me->hlock);
-		hlist_for_each_entry_safe(map, n, &me->maps, hn) {
-			if (va >= map->va &&
-				va + len <= map->va + map->len &&
-				map->fd == fd) {
-				if (refs) {
-					if (map->refs + 1 == INT_MAX) {
-						spin_unlock(&me->hlock);
-						return -ETOOMANYREFS;
-					}
-					map->refs++;
-				}
-				match = map;
-				break;
-			}
-		}
-		spin_unlock(&me->hlock);
-	} else {
-		hlist_for_each_entry_safe(map, n, &fl->maps, hn) {
-			if (va >= map->va &&
-				va + len <= map->va + map->len &&
-				map->fd == fd) {
-				if (refs) {
-					if (map->refs + 1 == INT_MAX)
-						return -ETOOMANYREFS;
-					map->refs++;
-				}
-				match = map;
-				break;
+
+	hlist_for_each_entry_safe(map, n, &fl->maps, hn) {
+		if (va >= map->va &&
+			va + len <= map->va + map->len &&
+			map->fd == fd) {
+			if (refs) {
+				if (map->refs + 1 == INT_MAX)
+					return -ETOOMANYREFS;
+				map->refs++;
 			}
+			match = map;
+			break;
 		}
 	}
 	if (match) {
@@ -997,8 +977,9 @@ static int fastrpc_mmap_create(struct fastrpc_file *fl, int fd,
 		map->va = va;
 	}
 	map->len = len;
-
-	fastrpc_mmap_add(map);
+	if ((mflags != ADSP_MMAP_HEAP_ADDR) &&
+			(mflags != ADSP_MMAP_REMOTE_HEAP_ADDR))
+		fastrpc_mmap_add(map);
 	*ppmap = map;

 bail:
@@ -2311,6 +2292,7 @@ static int fastrpc_init_process(struct fastrpc_file *fl,
 			mutex_unlock(&fl->map_mutex);
 			if (err)
 				goto bail;
+			fastrpc_mmap_add_global(mem);
 			phys = mem->phys;
 			size = mem->size;
 			if (me->channel[fl->cid].rhvm.vmid) {
@@ -2641,7 +2623,7 @@ static int fastrpc_mmap_remove_ssr(struct fastrpc_file *fl)
 	} while (match);
 bail:
 	if (err && match)
-		fastrpc_mmap_add(match);
+		fastrpc_mmap_add_global(match);
 	return err;
 }

@@ -2758,7 +2740,11 @@ static int fastrpc_internal_munmap(struct fastrpc_file *fl,
 bail:
 	if (err && map) {
 		mutex_lock(&fl->map_mutex);
-		fastrpc_mmap_add(map);
+		if ((map->flags == ADSP_MMAP_HEAP_ADDR) ||
+				(map->flags == ADSP_MMAP_REMOTE_HEAP_ADDR))
+			fastrpc_mmap_add_global(map);
+		else
+			fastrpc_mmap_add(map);
 		mutex_unlock(&fl->map_mutex);
 	}
 	mutex_unlock(&fl->internal_map_mutex);
@@ -2865,6 +2851,9 @@ static int fastrpc_internal_mmap(struct fastrpc_file *fl,
 		if (err)
 			goto bail;
 		map->raddr = raddr;
+		if (ud->flags == ADSP_MMAP_HEAP_ADDR ||
+				ud->flags == ADSP_MMAP_REMOTE_HEAP_ADDR)
+			fastrpc_mmap_add_global(map);
 	}
 	ud->vaddrout = raddr;
 bail:
--- a/drivers/clk/qcom/clk-cpu-osm.c
+++ b/drivers/clk/qcom/clk-cpu-osm.c
@@ -110,14 +110,9 @@ static inline int clk_osm_read_reg(struct clk_osm *c, u32 offset)
 	return readl_relaxed(c->vbase + offset);
 }

-static inline int clk_osm_read_reg_no_log(struct clk_osm *c, u32 offset)
-{
-	return readl_relaxed_no_log(c->vbase + offset);
-}
-
 static inline int clk_osm_mb(struct clk_osm *c)
 {
-	return readl_relaxed_no_log(c->vbase + ENABLE_REG);
+	return readl_relaxed(c->vbase + ENABLE_REG);
 }

 static long clk_osm_list_rate(struct clk_hw *hw, unsigned int n,
@@ -924,7 +919,7 @@ static u64 clk_osm_get_cpu_cycle_counter(int cpu)
 	 * core DCVS is disabled.
 	 */
 	core_num = parent->per_core_dcvs ? c->core_num : 0;
-	val = clk_osm_read_reg_no_log(parent,
+	val = clk_osm_read_reg(parent,
 				OSM_CYCLE_COUNTER_STATUS_REG(core_num));

 	if (val < c->prev_cycle_counter) {
--- a/drivers/clocksource/arm_arch_timer.c
+++ b/drivers/clocksource/arm_arch_timer.c
@@ -100,20 +100,20 @@ void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val,
 		struct arch_timer *timer = to_arch_timer(clk);
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
-			writel_relaxed_no_log(val, timer->base + CNTP_CTL);
+			writel_relaxed(val, timer->base + CNTP_CTL);
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			writel_relaxed_no_log(val, timer->base + CNTP_TVAL);
+			writel_relaxed(val, timer->base + CNTP_TVAL);
 			break;
 		}
 	} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
 		struct arch_timer *timer = to_arch_timer(clk);
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
-			writel_relaxed_no_log(val, timer->base + CNTV_CTL);
+			writel_relaxed(val, timer->base + CNTV_CTL);
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			writel_relaxed_no_log(val, timer->base + CNTV_TVAL);
+			writel_relaxed(val, timer->base + CNTV_TVAL);
 			break;
 		}
 	} else {
@@ -131,20 +131,20 @@ u32 arch_timer_reg_read(int access, enum arch_timer_reg reg,
 		struct arch_timer *timer = to_arch_timer(clk);
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
-			val = readl_relaxed_no_log(timer->base + CNTP_CTL);
+			val = readl_relaxed(timer->base + CNTP_CTL);
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			val = readl_relaxed_no_log(timer->base + CNTP_TVAL);
+			val = readl_relaxed(timer->base + CNTP_TVAL);
 			break;
 		}
 	} else if (access == ARCH_TIMER_MEM_VIRT_ACCESS) {
 		struct arch_timer *timer = to_arch_timer(clk);
 		switch (reg) {
 		case ARCH_TIMER_REG_CTRL:
-			val = readl_relaxed_no_log(timer->base + CNTV_CTL);
+			val = readl_relaxed(timer->base + CNTV_CTL);
 			break;
 		case ARCH_TIMER_REG_TVAL:
-			val = readl_relaxed_no_log(timer->base + CNTV_TVAL);
+			val = readl_relaxed(timer->base + CNTV_TVAL);
 			break;
 		}
 	} else {
@@ -900,11 +900,11 @@ void arch_timer_mem_get_cval(u32 *lo, u32 *hi)
 	if (!arch_counter_base)
 		return;

-	ctrl = readl_relaxed_no_log(arch_counter_base + CNTV_CTL);
+	ctrl = readl_relaxed(arch_counter_base + CNTV_CTL);

 	if (ctrl & ARCH_TIMER_CTRL_ENABLE) {
-		*lo = readl_relaxed_no_log(arch_counter_base + CNTCVAL_LO);
-		*hi = readl_relaxed_no_log(arch_counter_base + CNTCVAL_HI);
+		*lo = readl_relaxed(arch_counter_base + CNTCVAL_LO);
+		*hi = readl_relaxed(arch_counter_base + CNTCVAL_HI);
 	}
 }

@@ -913,9 +913,9 @@ static u64 arch_counter_get_cntvct_mem(void)
 	u32 vct_lo, vct_hi, tmp_hi;

 	do {
-		vct_hi = readl_relaxed_no_log(arch_counter_base + CNTVCT_HI);
-		vct_lo = readl_relaxed_no_log(arch_counter_base + CNTVCT_LO);
-		tmp_hi = readl_relaxed_no_log(arch_counter_base + CNTVCT_HI);
+		vct_hi = readl_relaxed(arch_counter_base + CNTVCT_HI);
+		vct_lo = readl_relaxed(arch_counter_base + CNTVCT_LO);
+		tmp_hi = readl_relaxed(arch_counter_base + CNTVCT_HI);
 	} while (vct_hi != tmp_hi);

 	return ((u64) vct_hi << 32) | vct_lo;
@@ -1285,7 +1285,7 @@ arch_timer_mem_find_best_frame(struct arch_timer_mem *timer_mem)
 		return NULL;
 	}

-	cnttidr = readl_relaxed_no_log(cntctlbase + CNTTIDR);
+	cnttidr = readl_relaxed(cntctlbase + CNTTIDR);

 	/*
 	 * Try to find a virtual capable frame. Otherwise fall back to a
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -31,7 +31,6 @@
 #include <linux/syscore_ops.h>
 #include <linux/tick.h>
 #include <linux/sched/topology.h>
-#include <linux/sched/sysctl.h>

 #include <trace/events/power.h>

@@ -660,40 +659,11 @@ static ssize_t show_##file_name				\
 }

 show_one(cpuinfo_min_freq, cpuinfo.min_freq);
+show_one(cpuinfo_max_freq, cpuinfo.max_freq);
 show_one(cpuinfo_transition_latency, cpuinfo.transition_latency);
 show_one(scaling_min_freq, min);
 show_one(scaling_max_freq, max);

-unsigned int cpuinfo_max_freq_cached;
-
-static bool should_use_cached_freq(int cpu)
-{
-	/* This is a safe check. may not be needed */
-	if (!cpuinfo_max_freq_cached)
-		return false;
-
-	/*
-	 * perfd already configure sched_lib_mask_force to
-	 * 0xf0 from user space. so re-using it.
-	 */
-	if (!(BIT(cpu) & sched_lib_mask_force))
-		return false;
-
-	return is_sched_lib_based_app(current->pid);
-}
-
-static ssize_t show_cpuinfo_max_freq(struct cpufreq_policy *policy, char *buf)
-{
-	unsigned int freq = policy->cpuinfo.max_freq;
-
-	if (should_use_cached_freq(policy->cpu))
-		freq = cpuinfo_max_freq_cached << 1;
-	else
-		freq = policy->cpuinfo.max_freq;
-
-	return scnprintf(buf, PAGE_SIZE, "%u\n", freq);
-}
-
 __weak unsigned int arch_freq_get_on_cpu(int cpu)
 {
 	return 0;
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -62,9 +62,6 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
 	policy->min = policy->cpuinfo.min_freq = min_freq;
 	policy->max = policy->cpuinfo.max_freq = max_freq;

-	if (max_freq > cpuinfo_max_freq_cached)
-		cpuinfo_max_freq_cached = max_freq;
-
 	if (policy->min == ~0)
 		return -EINVAL;
 	else
--- a/drivers/cpuidle/lpm-levels.c
+++ b/drivers/cpuidle/lpm-levels.c
@@ -52,6 +52,7 @@
 #elif defined(CONFIG_COMMON_CLK_MSM)
 #include "../../drivers/clk/msm/clock.h"
 #endif /* CONFIG_COMMON_CLK */
+#include "../../kernel/sched/sched.h"
 #define CREATE_TRACE_POINTS
 #include <trace/events/trace_msm_low_power.h>

@@ -723,7 +724,8 @@ static int cpu_power_select(struct cpuidle_device *dev,
 	struct power_params *pwr_params;
 	uint64_t bias_time = 0;

-	if ((sleep_disabled && !cpu_isolated(dev->cpu)) || sleep_us < 0)
+	if ((sleep_disabled && !cpu_isolated(dev->cpu)) ||
+	    is_reserved(dev->cpu) || sleep_us < 0)
 		return best_level;

 	idx_restrict = cpu->nlevels + 1;
--- a/drivers/gpu/drm/msm/dsi-staging/dsi_display.c
+++ b/drivers/gpu/drm/msm/dsi-staging/dsi_display.c
@@ -5912,6 +5912,10 @@ int dsi_display_dev_remove(struct platform_device *pdev)
 	}

 	display = platform_get_drvdata(pdev);
+	if (!display || !display->disp_node) {
+		pr_err("invalid display\n");
+		return -EINVAL;
+	}

 	/* decrement ref count */
 	of_node_put(display->disp_node);
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -17,7 +17,6 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/delay.h>
-#include <linux/input.h>
 #include <linux/io.h>
 #include <soc/qcom/scm.h>
 #include <soc/qcom/boot_stats.h>
@@ -63,7 +62,7 @@ MODULE_PARM_DESC(swfdetect, "Enable soft fault detection");

 #define KGSL_LOG_LEVEL_DEFAULT 3

-static void adreno_input_work(struct work_struct *work);
+static void adreno_pwr_on_work(struct work_struct *work);
 static unsigned int counter_delta(struct kgsl_device *device,
 	unsigned int reg, unsigned int *counter);

@@ -104,8 +103,6 @@ static struct adreno_device device_3d0 = {
 	.ft_policy = KGSL_FT_DEFAULT_POLICY,
 	.ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY,
 	.long_ib_detect = 1,
-	.input_work = __WORK_INITIALIZER(device_3d0.input_work,
-		adreno_input_work),
 	.pwrctrl_flag = BIT(ADRENO_HWCG_CTRL) | BIT(ADRENO_THROTTLING_CTRL),
 	.profile.enabled = false,
 	.active_list = LIST_HEAD_INIT(device_3d0.active_list),
@@ -117,6 +114,8 @@ static struct adreno_device device_3d0 = {
 		.skipsaverestore = 1,
 		.usesgmem = 1,
 	},
+	.pwr_on_work = __WORK_INITIALIZER(device_3d0.pwr_on_work,
+		adreno_pwr_on_work),
 };

 /* Ptr to array for the current set of fault detect registers */
@@ -138,9 +137,6 @@ static unsigned int adreno_ft_regs_default[] = {
 /* Nice level for the higher priority GPU start thread */
 int adreno_wake_nice = -7;

-/* Number of milliseconds to stay active active after a wake on touch */
-unsigned int adreno_wake_timeout = 100;
-
 /**
 * adreno_readreg64() - Read a 64bit register by getting its offset from the
 * offset array defined in gpudev node
@@ -370,152 +366,17 @@ void adreno_fault_detect_stop(struct adreno_device *adreno_dev)
 	adreno_dev->fast_hang_detect = 0;
 }

-/*
- * A workqueue callback responsible for actually turning on the GPU after a
- * touch event. kgsl_pwrctrl_change_state(ACTIVE) is used without any
- * active_count protection to avoid the need to maintain state.  Either
- * somebody will start using the GPU or the idle timer will fire and put the
- * GPU back into slumber.
- */
-static void adreno_input_work(struct work_struct *work)
+static void adreno_pwr_on_work(struct work_struct *work)
 {
-	struct adreno_device *adreno_dev = container_of(work,
-			struct adreno_device, input_work);
+	struct adreno_device *adreno_dev =
+		container_of(work, typeof(*adreno_dev), pwr_on_work);
 	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);

 	mutex_lock(&device->mutex);
-
-	device->flags |= KGSL_FLAG_WAKE_ON_TOUCH;
-
-	/*
-	 * Don't schedule adreno_start in a high priority workqueue, we are
-	 * already in a workqueue which should be sufficient
-	 */
 	kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
-
-	/*
-	 * When waking up from a touch event we want to stay active long enough
-	 * for the user to send a draw command.  The default idle timer timeout
-	 * is shorter than we want so go ahead and push the idle timer out
-	 * further for this special case
-	 */
-	mod_timer(&device->idle_timer,
-		jiffies + msecs_to_jiffies(adreno_wake_timeout));
 	mutex_unlock(&device->mutex);
 }

-/*
- * Process input events and schedule work if needed.  At this point we are only
- * interested in groking EV_ABS touchscreen events
- */
-static void adreno_input_event(struct input_handle *handle, unsigned int type,
-		unsigned int code, int value)
-{
-	struct kgsl_device *device = handle->handler->private;
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-
-	/* Only consider EV_ABS (touch) events */
-	if (type != EV_ABS)
-		return;
-
-	/*
-	 * Don't do anything if anything hasn't been rendered since we've been
-	 * here before
-	 */
-
-	if (device->flags & KGSL_FLAG_WAKE_ON_TOUCH)
-		return;
-
-	/*
-	 * If the device is in nap, kick the idle timer to make sure that we
-	 * don't go into slumber before the first render. If the device is
-	 * already in slumber schedule the wake.
-	 */
-
-	if (device->state == KGSL_STATE_NAP) {
-		/*
-		 * Set the wake on touch bit to keep from coming back here and
-		 * keeping the device in nap without rendering
-		 */
-
-		device->flags |= KGSL_FLAG_WAKE_ON_TOUCH;
-
-		mod_timer(&device->idle_timer,
-			jiffies + device->pwrctrl.interval_timeout);
-	} else if (device->state == KGSL_STATE_SLUMBER) {
-		schedule_work(&adreno_dev->input_work);
-	}
-}
-
-#ifdef CONFIG_INPUT
-static int adreno_input_connect(struct input_handler *handler,
-		struct input_dev *dev, const struct input_device_id *id)
-{
-	struct input_handle *handle;
-	int ret;
-
-	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
-	if (handle == NULL)
-		return -ENOMEM;
-
-	handle->dev = dev;
-	handle->handler = handler;
-	handle->name = handler->name;
-
-	ret = input_register_handle(handle);
-	if (ret) {
-		kfree(handle);
-		return ret;
-	}
-
-	ret = input_open_device(handle);
-	if (ret) {
-		input_unregister_handle(handle);
-		kfree(handle);
-	}
-
-	return ret;
-}
-
-static void adreno_input_disconnect(struct input_handle *handle)
-{
-	input_close_device(handle);
-	input_unregister_handle(handle);
-	kfree(handle);
-}
-#else
-static int adreno_input_connect(struct input_handler *handler,
-		struct input_dev *dev, const struct input_device_id *id)
-{
-	return 0;
-}
-static void adreno_input_disconnect(struct input_handle *handle) {}
-#endif
-
-/*
- * We are only interested in EV_ABS events so only register handlers for those
- * input devices that have EV_ABS events
- */
-static const struct input_device_id adreno_input_ids[] = {
-	{
-		.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
-		.evbit = { BIT_MASK(EV_ABS) },
-		/* assumption: MT_.._X & MT_.._Y are in the same long */
-		.absbit = { [BIT_WORD(ABS_MT_POSITION_X)] =
-				BIT_MASK(ABS_MT_POSITION_X) |
-				BIT_MASK(ABS_MT_POSITION_Y) },
-	},
-	{ },
-};
-
-static struct input_handler adreno_input_handler = {
-	.event = adreno_input_event,
-	.connect = adreno_input_connect,
-	.disconnect = adreno_input_disconnect,
-	.name = "kgsl",
-	.id_table = adreno_input_ids,
-};
-
 /*
 * _soft_reset() - Soft reset GPU
 * @adreno_dev: Pointer to adreno device
@@ -1149,11 +1010,6 @@ static int adreno_of_get_power(struct adreno_device *adreno_dev,
 		&device->pwrctrl.pm_qos_active_latency))
 		device->pwrctrl.pm_qos_active_latency = 501;

-	/* get pm-qos-cpu-mask-latency, set it to default if not found */
-	if (of_property_read_u32(node, "qcom,l2pc-cpu-mask-latency",
-		&device->pwrctrl.pm_qos_cpu_mask_latency))
-		device->pwrctrl.pm_qos_cpu_mask_latency = 501;
-
 	/* get pm-qos-wakeup-latency, set it to default if not found */
 	if (of_property_read_u32(node, "qcom,pm-qos-wakeup-latency",
 		&device->pwrctrl.pm_qos_wakeup_latency))
@@ -1167,9 +1023,6 @@ static int adreno_of_get_power(struct adreno_device *adreno_dev,
 	device->pwrctrl.bus_control = of_property_read_bool(node,
 		"qcom,bus-control");

-	device->pwrctrl.input_disable = of_property_read_bool(node,
-		"qcom,disable-wake-on-touch");
-
 	return 0;
 }

@@ -1471,21 +1324,6 @@ static int adreno_probe(struct platform_device *pdev)
 			"Failed to get gpuhtw LLC slice descriptor %ld\n",
 			PTR_ERR(adreno_dev->gpuhtw_llc_slice));

-#ifdef CONFIG_INPUT
-	if (!device->pwrctrl.input_disable) {
-		adreno_input_handler.private = device;
-		/*
-		 * It isn't fatal if we cannot register the input handler.  Sad,
-		 * perhaps, but not fatal
-		 */
-		if (input_register_handler(&adreno_input_handler)) {
-			adreno_input_handler.private = NULL;
-			KGSL_DRV_ERR(device,
-				"Unable to register the input handler\n");
-		}
-	}
-#endif
-
 	place_marker("M - DRIVER GPU Ready");
 out:
 	if (status) {
@@ -1538,10 +1376,6 @@ static int adreno_remove(struct platform_device *pdev)
 	/* The memory is fading */
 	_adreno_free_memories(adreno_dev);

-#ifdef CONFIG_INPUT
-	if (adreno_input_handler.private)
-		input_unregister_handler(&adreno_input_handler);
-#endif
 	adreno_sysfs_close(adreno_dev);

 	adreno_coresight_remove(adreno_dev);
@@ -1930,10 +1764,6 @@ static int _adreno_start(struct adreno_device *adreno_dev)
 	/* make sure ADRENO_DEVICE_STARTED is not set here */
 	WARN_ON(test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv));

-	/* disallow l2pc during wake up to improve GPU wake up time */
-	kgsl_pwrctrl_update_l2pc(&adreno_dev->dev,
-			KGSL_L2PC_WAKEUP_TIMEOUT);
-
 	pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
 			pmqos_wakeup_vote);

--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -485,7 +485,7 @@ enum gpu_coresight_sources {
 * @dispatcher: Container for adreno GPU dispatcher
 * @pwron_fixup: Command buffer to run a post-power collapse shader workaround
 * @pwron_fixup_dwords: Number of dwords in the command buffer
- * @input_work: Work struct for turning on the GPU after a touch event
+ * @pwr_on_work: Work struct for turning on the GPU
 * @busy_data: Struct holding GPU VBIF busy stats
 * @ram_cycles_lo: Number of DDR clock cycles for the monitor session (Only
 * DDR channel 0 read cycles in case of GBIF)
@@ -565,7 +565,7 @@ struct adreno_device {
 	struct adreno_dispatcher dispatcher;
 	struct kgsl_memdesc pwron_fixup;
 	unsigned int pwron_fixup_dwords;
-	struct work_struct input_work;
+	struct work_struct pwr_on_work;
 	struct adreno_busy_data busy_data;
 	unsigned int ram_cycles_lo;
 	unsigned int ram_cycles_lo_ch1_read;
@@ -1141,7 +1141,6 @@ extern struct adreno_gpudev adreno_a5xx_gpudev;
 extern struct adreno_gpudev adreno_a6xx_gpudev;

 extern int adreno_wake_nice;
-extern unsigned int adreno_wake_timeout;

 int adreno_start(struct kgsl_device *device, int priority);
 int adreno_soft_reset(struct kgsl_device *device);
--- a/drivers/gpu/msm/adreno_dispatch.c
+++ b/drivers/gpu/msm/adreno_dispatch.c
@@ -1153,12 +1153,6 @@ static inline int _verify_cmdobj(struct kgsl_device_private *dev_priv,
 					&ADRENO_CONTEXT(context)->base, ib)
 					== false)
 					return -EINVAL;
-			/*
-			 * Clear the wake on touch bit to indicate an IB has
-			 * been submitted since the last time we set it.
-			 * But only clear it when we have rendering commands.
-			 */
-			device->flags &= ~KGSL_FLAG_WAKE_ON_TOUCH;
 		}

 		/* A3XX does not have support for drawobj profiling */
@@ -1453,10 +1447,6 @@ int adreno_dispatcher_queue_cmds(struct kgsl_device_private *dev_priv,

 	spin_unlock(&drawctxt->lock);

-	if (device->pwrctrl.l2pc_update_queue)
-		kgsl_pwrctrl_update_l2pc(&adreno_dev->dev,
-				KGSL_L2PC_QUEUE_TIMEOUT);
-
 	/* Add the context to the dispatcher pending list */
 	dispatcher_queue_context(adreno_dev, drawctxt);

--- a/drivers/gpu/msm/adreno_sysfs.c
+++ b/drivers/gpu/msm/adreno_sysfs.c
@@ -649,7 +649,6 @@ static ADRENO_SYSFS_BOOL(gpu_llc_slice_enable);
 static ADRENO_SYSFS_BOOL(gpuhtw_llc_slice_enable);

 static DEVICE_INT_ATTR(wake_nice, 0644, adreno_wake_nice);
-static DEVICE_INT_ATTR(wake_timeout, 0644, adreno_wake_timeout);

 static ADRENO_SYSFS_BOOL(sptp_pc);
 static ADRENO_SYSFS_BOOL(lm);
@@ -674,7 +673,6 @@ static const struct device_attribute *_attr_list[] = {
 	&adreno_attr_ft_long_ib_detect.attr,
 	&adreno_attr_ft_hang_intr_status.attr,
 	&dev_attr_wake_nice.attr,
-	&dev_attr_wake_timeout.attr,
 	&adreno_attr_sptp_pc.attr,
 	&adreno_attr_lm.attr,
 	&adreno_attr_preemption.attr,
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -5166,7 +5166,6 @@ int kgsl_device_platform_probe(struct kgsl_device *device)
 {
 	int status = -EINVAL;
 	struct resource *res;
-	int cpu;

 	status = _register_device(device);
 	if (status)
@@ -5303,22 +5302,6 @@ int kgsl_device_platform_probe(struct kgsl_device *device)
 				PM_QOS_CPU_DMA_LATENCY,
 				PM_QOS_DEFAULT_VALUE);

-	if (device->pwrctrl.l2pc_cpus_mask) {
-		struct pm_qos_request *qos = &device->pwrctrl.l2pc_cpus_qos;
-
-		qos->type = PM_QOS_REQ_AFFINE_CORES;
-
-		cpumask_empty(&qos->cpus_affine);
-		for_each_possible_cpu(cpu) {
-			if ((1 << cpu) & device->pwrctrl.l2pc_cpus_mask)
-				cpumask_set_cpu(cpu, &qos->cpus_affine);
-		}
-
-		pm_qos_add_request(&device->pwrctrl.l2pc_cpus_qos,
-				PM_QOS_CPU_DMA_LATENCY,
-				PM_QOS_DEFAULT_VALUE);
-	}
-
 	device->events_wq = alloc_workqueue("kgsl-events",
 		WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);

@@ -5355,8 +5338,6 @@ void kgsl_device_platform_remove(struct kgsl_device *device)
 	kgsl_pwrctrl_uninit_sysfs(device);

 	pm_qos_remove_request(&device->pwrctrl.pm_qos_req_dma);
-	if (device->pwrctrl.l2pc_cpus_mask)
-		pm_qos_remove_request(&device->pwrctrl.l2pc_cpus_qos);

 	idr_destroy(&device->context_idr);

--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -68,7 +68,6 @@ enum kgsl_event_results {
 	KGSL_EVENT_CANCELLED = 2,
 };

-#define KGSL_FLAG_WAKE_ON_TOUCH BIT(0)
 #define KGSL_FLAG_SPARSE        BIT(1)

 /*
--- a/drivers/gpu/msm/kgsl_ioctl.c
+++ b/drivers/gpu/msm/kgsl_ioctl.c
@@ -17,6 +17,7 @@
 #include <linux/fs.h>
 #include "kgsl_device.h"
 #include "kgsl_sync.h"
+#include "adreno.h"

 static const struct kgsl_ioctl kgsl_ioctl_funcs[] = {
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY,
@@ -168,8 +169,13 @@ long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
 {
 	struct kgsl_device_private *dev_priv = filep->private_data;
 	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	long ret;

+	if (cmd == IOCTL_KGSL_GPU_COMMAND &&
+	    READ_ONCE(device->state) != KGSL_STATE_ACTIVE)
+		kgsl_schedule_work(&adreno_dev->pwr_on_work);
+
 	ret = kgsl_ioctl_helper(filep, cmd, arg, kgsl_ioctl_funcs,
 		ARRAY_SIZE(kgsl_ioctl_funcs));

--- a/drivers/gpu/msm/kgsl_pwrctrl.c
+++ b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -592,35 +592,6 @@ void kgsl_pwrctrl_set_constraint(struct kgsl_device *device,
 }
 EXPORT_SYMBOL(kgsl_pwrctrl_set_constraint);

-/**
- * kgsl_pwrctrl_update_l2pc() - Update existing qos request
- * @device: Pointer to the kgsl_device struct
- * @timeout_us: the effective duration of qos request in usecs.
- *
- * Updates an existing qos request to avoid L2PC on the
- * CPUs (which are selected through dtsi) on which GPU
- * thread is running. This would help for performance.
- */
-void kgsl_pwrctrl_update_l2pc(struct kgsl_device *device,
-			unsigned long timeout_us)
-{
-	int cpu;
-
-	if (device->pwrctrl.l2pc_cpus_mask == 0)
-		return;
-
-	cpu = get_cpu();
-	put_cpu();
-
-	if ((1 << cpu) & device->pwrctrl.l2pc_cpus_mask) {
-		pm_qos_update_request_timeout(
-				&device->pwrctrl.l2pc_cpus_qos,
-				device->pwrctrl.pm_qos_cpu_mask_latency,
-				timeout_us);
-	}
-}
-EXPORT_SYMBOL(kgsl_pwrctrl_update_l2pc);
-
 static ssize_t kgsl_pwrctrl_thermal_pwrlevel_store(struct device *dev,
 					 struct device_attribute *attr,
 					 const char *buf, size_t count)
@@ -2351,13 +2322,6 @@ int kgsl_pwrctrl_init(struct kgsl_device *device)

 	pwr->power_flags = 0;

-	kgsl_property_read_u32(device, "qcom,l2pc-cpu-mask",
-			&pwr->l2pc_cpus_mask);
-
-	pwr->l2pc_update_queue = of_property_read_bool(
-				device->pdev->dev.of_node,
-				"qcom,l2pc-update-queue");
-
 	pm_runtime_enable(&pdev->dev);

 	ocmem_bus_node = of_find_node_by_name(
@@ -3033,10 +2997,6 @@ _slumber(struct kgsl_device *device)
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
 		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
 						PM_QOS_DEFAULT_VALUE);
-		if (device->pwrctrl.l2pc_cpus_mask)
-			pm_qos_update_request(
-					&device->pwrctrl.l2pc_cpus_qos,
-					PM_QOS_DEFAULT_VALUE);
 		break;
 	case KGSL_STATE_SUSPEND:
 		complete_all(&device->hwaccess_gate);
--- a/drivers/gpu/msm/kgsl_pwrctrl.h
+++ b/drivers/gpu/msm/kgsl_pwrctrl.h
@@ -57,19 +57,6 @@
 #define KGSL_PWR_DEL_LIMIT 1
 #define KGSL_PWR_SET_LIMIT 2

-/*
- * The effective duration of qos request in usecs at queue time.
- * After timeout, qos request is cancelled automatically.
- * Kept 80ms default, inline with default GPU idle time.
- */
-#define KGSL_L2PC_QUEUE_TIMEOUT	(80 * 1000)
-
-/*
- * The effective duration of qos request in usecs at wakeup time.
- * After timeout, qos request is cancelled automatically.
- */
-#define KGSL_L2PC_WAKEUP_TIMEOUT (10 * 1000)
-
 enum kgsl_pwrctrl_timer_type {
 	KGSL_PWR_IDLE_TIMER,
 };
@@ -150,13 +137,9 @@ struct kgsl_regulator {
 * @ahbpath_pcl - CPU to AHB path bus scale identifier
 * @irq_name - resource name for the IRQ
 * @clk_stats - structure of clock statistics
- * @l2pc_cpus_mask - mask to avoid L2PC on masked CPUs
- * @l2pc_update_queue - Boolean flag to avoid L2PC on masked CPUs at queue time
- * @l2pc_cpus_qos - qos structure to avoid L2PC on CPUs
 * @pm_qos_req_dma - the power management quality of service structure
 * @pm_qos_active_latency - allowed CPU latency in microseconds when active
 * @pm_qos_cpu_mask_latency - allowed CPU mask latency in microseconds
- * @input_disable - To disable GPU wakeup on touch input event
 * @pm_qos_wakeup_latency - allowed CPU latency in microseconds during wakeup
 * @bus_control - true if the bus calculation is independent
 * @bus_mod - modifier from the current power level for the bus vote
@@ -211,14 +194,10 @@ struct kgsl_pwrctrl {
 	uint32_t ahbpath_pcl;
 	const char *irq_name;
 	struct kgsl_clk_stats clk_stats;
-	unsigned int l2pc_cpus_mask;
-	bool l2pc_update_queue;
-	struct pm_qos_request l2pc_cpus_qos;
 	struct pm_qos_request pm_qos_req_dma;
 	unsigned int pm_qos_active_latency;
 	unsigned int pm_qos_cpu_mask_latency;
 	unsigned int pm_qos_wakeup_latency;
-	bool input_disable;
 	bool bus_control;
 	int bus_mod;
 	unsigned int bus_percent_ab;
@@ -286,7 +265,5 @@ int kgsl_active_count_wait(struct kgsl_device *device, int count);
 void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy);
 void kgsl_pwrctrl_set_constraint(struct kgsl_device *device,
 			struct kgsl_pwr_constraint *pwrc, uint32_t id);
-void kgsl_pwrctrl_update_l2pc(struct kgsl_device *device,
-			unsigned long timeout_us);
 void kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device);
 #endif /* __KGSL_PWRCTRL_H */
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1476,7 +1476,7 @@ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags)

 	u32 len = hid_report_len(report) + 7;

-	return kmalloc(len, flags);
+	return kzalloc(len, flags);
 }
 EXPORT_SYMBOL_GPL(hid_alloc_report_buf);

--- a/drivers/hwtracing/coresight/coresight-ost.c
+++ b/drivers/hwtracing/coresight/coresight-ost.c
@@ -62,30 +62,30 @@ static int stm_ost_send(void __iomem *addr, const void *data, uint32_t size)
 	uint32_t len = size;

 	if (((unsigned long)data & 0x1) && (size >= 1)) {
-		writeb_relaxed_no_log(*(uint8_t *)data, addr);
+		writeb_relaxed(*(uint8_t *)data, addr);
 		data++;
 		size--;
 	}
 	if (((unsigned long)data & 0x2) && (size >= 2)) {
-		writew_relaxed_no_log(*(uint16_t *)data, addr);
+		writew_relaxed(*(uint16_t *)data, addr);
 		data += 2;
 		size -= 2;
 	}

 	/* now we are 32bit aligned */
 	while (size >= 4) {
-		writel_relaxed_no_log(*(uint32_t *)data, addr);
+		writel_relaxed(*(uint32_t *)data, addr);
 		data += 4;
 		size -= 4;
 	}

 	if (size >= 2) {
-		writew_relaxed_no_log(*(uint16_t *)data, addr);
+		writew_relaxed(*(uint16_t *)data, addr);
 		data += 2;
 		size -= 2;
 	}
 	if (size >= 1) {
-		writeb_relaxed_no_log(*(uint8_t *)data, addr);
+		writeb_relaxed(*(uint8_t *)data, addr);
 		data++;
 		size--;
 	}
--- a/drivers/hwtracing/google/Kconfig
+++ b/drivers/hwtracing/google/Kconfig
@@ -0,0 +1,15 @@
+config CORESIGHT_PLACEHOLDER
+	tristate "Coresight device placeholder driver"
+	default y
+	depends on !CORESIGHT
+	help
+	  For targets which do not use coresight, this option enables a placeholder
+	  which probes coresight devices to turn down clocks to save power.
+
+config CORESIGHT_AMBA_PLACEHOLDER
+	tristate "Coresight primecell device placeholder driver"
+	default y
+	depends on !CORESIGHT
+	help
+	  For targets which do not use coresight, this option enables a placeholder
+	  which probes coresight AMBA devices to turn down clocks to save power.
--- a/drivers/hwtracing/google/Makefile
+++ b/drivers/hwtracing/google/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CORESIGHT_PLACEHOLDER) += coresight-clk-placeholder.o
+obj-$(CONFIG_CORESIGHT_AMBA_PLACEHOLDER) += coresight-clk-amba-placeholder.o
--- a/drivers/hwtracing/google/coresight-clk-amba-placeholder.c
+++ b/drivers/hwtracing/google/coresight-clk-amba-placeholder.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, Google LLC. All rights reserved.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/amba/bus.h>
+#include <linux/of.h>
+#include <linux/pm_runtime.h>
+
+static int coresight_clk_disable_amba_probe(struct amba_device *adev,
+					    const struct amba_id *id)
+{
+	pm_runtime_put(&adev->dev);
+	return 0;
+}
+
+#define ETM4x_AMBA_ID(pid)                                                     \
+	{                                                                      \
+		.id = pid, .mask = 0x000fffff,                                 \
+	}
+
+#define TMC_ETR_AXI_ARCACHE (0x1U << 1)
+#define TMC_ETR_SAVE_RESTORE (0x1U << 2)
+#define CORESIGHT_SOC_600_ETR_CAPS (TMC_ETR_SAVE_RESTORE | TMC_ETR_AXI_ARCACHE)
+
+static const struct amba_id coresight_ids[] = {
+	/* ETM4 IDs */
+	ETM4x_AMBA_ID(0x000bb95d), /* Cortex-A53 */
+	ETM4x_AMBA_ID(0x000bb95e), /* Cortex-A57 */
+	ETM4x_AMBA_ID(0x000bb95a), /* Cortex-A72 */
+	ETM4x_AMBA_ID(0x000bb959), /* Cortex-A73 */
+	ETM4x_AMBA_ID(0x000bb9da), /* Cortex-A35 */
+	/* sdmmagpie coresight IDs */
+	ETM4x_AMBA_ID(0x0003b908),
+	ETM4x_AMBA_ID(0x0003b909),
+	ETM4x_AMBA_ID(0x0003b961),
+	ETM4x_AMBA_ID(0x0003b962),
+	ETM4x_AMBA_ID(0x0003b966),
+	ETM4x_AMBA_ID(0x0003b968),
+	ETM4x_AMBA_ID(0x0003b969),
+	ETM4x_AMBA_ID(0x0003b999),
+	ETM4x_AMBA_ID(0x000bb95d),
+	/* dynamic-replicator IDs */
+	{
+		.id = 0x000bb909,
+		.mask = 0x000fffff,
+	},
+	{
+		/* Coresight SoC-600 */
+		.id = 0x000bb9ec,
+		.mask = 0x000fffff,
+	},
+	/* dynamic-funnel IDs */
+	{
+		.id = 0x000bb908,
+		.mask = 0x000fffff,
+	},
+	{
+		/* Coresight SoC-600 */
+		.id = 0x000bb9eb,
+		.mask = 0x000fffff,
+	},
+	/* coresight-tmc IDs */
+	{
+		.id = 0x000bb961,
+		.mask = 0x000fffff,
+	},
+	{
+		/* Coresight SoC 600 TMC-ETR/ETS */
+		.id = 0x000bb9e8,
+		.mask = 0x000fffff,
+		.data = (void *)(unsigned long)CORESIGHT_SOC_600_ETR_CAPS,
+	},
+	{
+		/* Coresight SoC 600 TMC-ETB */
+		.id = 0x000bb9e9,
+		.mask = 0x000fffff,
+	},
+	{
+		/* Coresight SoC 600 TMC-ETF */
+		.id = 0x000bb9ea,
+		.mask = 0x000fffff,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver coresight_clk_disable_amba_driver = {
+	.drv = {
+		.name	= "coresight-clk-disable-amba",
+		.suppress_bind_attrs = true,
+	},
+	.probe		= coresight_clk_disable_amba_probe,
+	.id_table	= coresight_ids,
+};
+
+module_amba_driver(coresight_clk_disable_amba_driver);
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("CoreSight DEBUGv8 and ETMv4 clock disable AMBA driver stub");
+MODULE_AUTHOR("J. Avila <elavila@google.com>");
--- a/drivers/hwtracing/google/coresight-clk-placeholder.c
+++ b/drivers/hwtracing/google/coresight-clk-placeholder.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, Google LLC. All rights reserved.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+
+static int coresight_clk_disable_probe(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static int coresight_clk_disable_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static const struct of_device_id coresight_clk_disable_match[] = {
+	{ .compatible = "qcom,coresight-csr" },
+	{}
+};
+
+static struct platform_driver coresight_clk_disable_driver = {
+	.probe          = coresight_clk_disable_probe,
+	.remove         = coresight_clk_disable_remove,
+	.driver         = {
+		.name   = "coresight-clk-disable",
+		.of_match_table	= coresight_clk_disable_match,
+		},
+};
+
+module_platform_driver(coresight_clk_disable_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("CoreSight DEBUGv8 and ETMv4 clock disable driver stub");
+MODULE_AUTHOR("J. Avila <elavila@google.com>");
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -28,7 +28,6 @@
 #include <linux/of_irq.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
-#include <linux/msm_rtb.h>
 #include <linux/wakeup_reason.h>

 #include <linux/irqchip.h>
@@ -116,7 +115,7 @@ static void gic_do_wait_for_rwp(void __iomem *base)
 {
 	u32 count = 1000000;	/* 1s! */

-	while (readl_relaxed_no_log(base + GICD_CTLR) & GICD_CTLR_RWP) {
+	while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) {
 		count--;
 		if (!count) {
 			pr_err_ratelimited("RWP timeout, gone fishing\n");
@@ -234,8 +233,7 @@ static int gic_peek_irq(struct irq_data *d, u32 offset)
 	else
 		base = gic_data.dist_base;

-	return !!(readl_relaxed_no_log
-		(base + offset + (gic_irq(d) / 32) * 4) & mask);
+	return !!(readl_relaxed(base + offset + (gic_irq(d) / 32) * 4) & mask);
 }

 static void gic_poke_irq(struct irq_data *d, u32 offset)
@@ -579,7 +577,6 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
 		if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) {
 			int err;

-			uncached_logk(LOGK_IRQ, (void *)(uintptr_t)irqnr);
 			if (static_key_true(&supports_deactivate))
 				gic_write_eoir(irqnr);
 			else
@@ -600,7 +597,6 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
 			continue;
 		}
 		if (irqnr < 16) {
-			uncached_logk(LOGK_IRQ, (void *)(uintptr_t)irqnr);
 			gic_write_eoir(irqnr);
 			if (static_key_true(&supports_deactivate))
 				gic_write_dir(irqnr);
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -41,7 +41,6 @@
 #include <linux/irqchip.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqchip/arm-gic.h>
-#include <linux/msm_rtb.h>
 #ifdef CONFIG_PM
 #include <linux/syscore_ops.h>
 #endif
@@ -506,7 +505,6 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 				writel_relaxed(irqstat, cpu_base + GIC_CPU_EOI);
 			isb();
 			handle_domain_irq(gic->domain, irqnr, regs);
-			uncached_logk(LOGK_IRQ, (void *)(uintptr_t)irqnr);
 			continue;
 		}
 		if (irqnr < 16) {
@@ -524,7 +522,6 @@ static void __exception_irq_entry gic_handle_irq(struct pt_regs *regs)
 			smp_rmb();
 			handle_IPI(irqnr, regs);
 #endif
-			uncached_logk(LOGK_IRQ, (void *)(uintptr_t)irqnr);
 			continue;
 		}
 		break;
--- a/drivers/media/platform/msm/ais/cam_utils/cam_io_util.c
+++ b/drivers/media/platform/msm/ais/cam_utils/cam_io_util.c
@@ -23,7 +23,7 @@ int cam_io_w(uint32_t data, void __iomem *addr)
 		return -EINVAL;

 	CAM_DBG(CAM_UTIL, "0x%pK %08x", addr, data);
-	writel_relaxed_no_log(data, addr);
+	writel_relaxed(data, addr);

 	return 0;
 }
@@ -36,7 +36,7 @@ int cam_io_w_mb(uint32_t data, void __iomem *addr)
 	CAM_DBG(CAM_UTIL, "0x%pK %08x", addr, data);
 	/* Ensure previous writes are done */
 	wmb();
-	writel_relaxed_no_log(data, addr);
+	writel_relaxed(data, addr);
 	/* Ensure previous writes are done */
 	wmb();

--- a/drivers/media/platform/msm/camera/cam_utils/cam_io_util.c
+++ b/drivers/media/platform/msm/camera/cam_utils/cam_io_util.c
@@ -23,7 +23,7 @@ int cam_io_w(uint32_t data, void __iomem *addr)
 		return -EINVAL;

 	CAM_DBG(CAM_UTIL, "0x%pK %08x", addr, data);
-	writel_relaxed_no_log(data, addr);
+	writel_relaxed(data, addr);

 	return 0;
 }
@@ -36,7 +36,7 @@ int cam_io_w_mb(uint32_t data, void __iomem *addr)
 	CAM_DBG(CAM_UTIL, "0x%pK %08x", addr, data);
 	/* Ensure previous writes are done */
 	wmb();
-	writel_relaxed_no_log(data, addr);
+	writel_relaxed(data, addr);
 	/* Ensure previous writes are done */
 	wmb();

--- a/drivers/media/platform/msm/sde/rotator/sde_rotator_r3.c
+++ b/drivers/media/platform/msm/sde/rotator/sde_rotator_r3.c
@@ -68,12 +68,12 @@
 	do { \
 		SDEROT_DBG("SDEREG.W:[%s:0x%X] <= 0x%X\n", #off, (off),\
 				(u32)(data));\
-		writel_relaxed_no_log( \
+		writel_relaxed( \
 				(REGDMA_OP_REGWRITE | \
 				 ((off) & REGDMA_ADDR_OFFSET_MASK)), \
 				p); \
 		p += sizeof(u32); \
-		writel_relaxed_no_log(data, p); \
+		writel_relaxed(data, p); \
 		p += sizeof(u32); \
 	} while (0)

@@ -81,14 +81,14 @@
 	do { \
 		SDEROT_DBG("SDEREG.M:[%s:0x%X] <= 0x%X\n", #off, (off),\
 				(u32)(data));\
-		writel_relaxed_no_log( \
+		writel_relaxed( \
 				(REGDMA_OP_REGMODIFY | \
 				 ((off) & REGDMA_ADDR_OFFSET_MASK)), \
 				p); \
 		p += sizeof(u32); \
-		writel_relaxed_no_log(mask, p); \
+		writel_relaxed(mask, p); \
 		p += sizeof(u32); \
-		writel_relaxed_no_log(data, p); \
+		writel_relaxed(data, p); \
 		p += sizeof(u32); \
 	} while (0)

@@ -96,25 +96,25 @@
 	do { \
 		SDEROT_DBG("SDEREG.B:[%s:0x%X:0x%X]\n", #off, (off),\
 				(u32)(len));\
-		writel_relaxed_no_log( \
+		writel_relaxed( \
 				(REGDMA_OP_BLKWRITE_INC | \
 				 ((off) & REGDMA_ADDR_OFFSET_MASK)), \
 				p); \
 		p += sizeof(u32); \
-		writel_relaxed_no_log(len, p); \
+		writel_relaxed(len, p); \
 		p += sizeof(u32); \
 	} while (0)

 #define SDE_REGDMA_BLKWRITE_DATA(p, data) \
 	do { \
 		SDEROT_DBG("SDEREG.I:[:] <= 0x%X\n", (u32)(data));\
-		writel_relaxed_no_log(data, p); \
+		writel_relaxed(data, p); \
 		p += sizeof(u32); \
 	} while (0)

 #define SDE_REGDMA_READ(p, data) \
 	do { \
-		data = readl_relaxed_no_log(p); \
+		data = readl_relaxed(p); \
 		p += sizeof(u32); \
 	} while (0)

@@ -2041,7 +2041,7 @@ static u32 sde_hw_rotator_start_no_regdma(struct sde_hw_rotator_context *ctx,
 	/* Write all command stream to Rotator blocks */
 	/* Rotator will start right away after command stream finish writing */
 	while (mem_rdptr < wrptr) {
-		u32 op = REGDMA_OP_MASK & readl_relaxed_no_log(mem_rdptr);
+		u32 op = REGDMA_OP_MASK & readl_relaxed(mem_rdptr);

 		switch (op) {
 		case REGDMA_OP_NOP:
--- a/drivers/media/usb/uvc/uvc_driver.c
+++ b/drivers/media/usb/uvc/uvc_driver.c
@@ -531,7 +531,7 @@ static int uvc_parse_format(struct uvc_device *dev,
 	/* Parse the frame descriptors. Only uncompressed, MJPEG and frame
 	 * based formats have frame descriptors.
 	 */
-	while (buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE &&
+	while (ftype && buflen > 2 && buffer[1] == USB_DT_CS_INTERFACE &&
 	       buffer[2] == ftype) {
 		frame = &format->frame[format->nframes];
 		if (ftype != UVC_VS_FRAME_FRAME_BASED)
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1167,6 +1167,13 @@ int of_phandle_iterator_init(struct of_phandle_iterator *it,

 	memset(it, 0, sizeof(*it));

+	/*
+	 * one of cell_count or cells_name must be provided to determine the
+	 * argument length.
+	 */
+	if (cell_count < 0 && !cells_name)
+		return -EINVAL;
+
 	list = of_get_property(np, list_name, &size);
 	if (!list)
 		return -ENOENT;
@@ -1216,11 +1223,20 @@ int of_phandle_iterator_next(struct of_phandle_iterator *it)

 			if (of_property_read_u32(it->node, it->cells_name,
 						 &count)) {
-				pr_err("%pOF: could not get %s for %pOF\n",
-				       it->parent,
-				       it->cells_name,
-				       it->node);
-				goto err;
+				/*
+				 * If both cell_count and cells_name is given,
+				 * fall back to cell_count in absence
+				 * of the cells_name property
+				 */
+				if (it->cell_count >= 0) {
+					count = it->cell_count;
+				} else {
+					pr_err("%pOF: could not get %s for %pOF\n",
+					       it->parent,
+					       it->cells_name,
+					       it->node);
+					goto err;
+				}
 			}
 		} else {
 			count = it->cell_count;
@@ -1383,10 +1399,17 @@ int of_parse_phandle_with_args(const struct device_node *np, const char *list_na
 				const char *cells_name, int index,
 				struct of_phandle_args *out_args)
 {
+	int cell_count = -1;
+
 	if (index < 0)
 		return -EINVAL;
-	return __of_parse_phandle_with_args(np, list_name, cells_name, 0,
-					    index, out_args);
+
+	/* If cells_name is NULL we assume a cell count of 0 */
+	if (!cells_name)
+		cell_count = 0;
+
+	return __of_parse_phandle_with_args(np, list_name, cells_name,
+					    cell_count, index, out_args);
 }
 EXPORT_SYMBOL(of_parse_phandle_with_args);

@@ -1452,7 +1475,24 @@ int of_count_phandle_with_args(const struct device_node *np, const char *list_na
 	struct of_phandle_iterator it;
 	int rc, cur_index = 0;

-	rc = of_phandle_iterator_init(&it, np, list_name, cells_name, 0);
+	/*
+	 * If cells_name is NULL we assume a cell count of 0. This makes
+	 * counting the phandles trivial as each 32bit word in the list is a
+	 * phandle and no arguments are to consider. So we don't iterate through
+	 * the list but just use the length to determine the phandle count.
+	 */
+	if (!cells_name) {
+		const __be32 *list;
+		int size;
+
+		list = of_get_property(np, list_name, &size);
+		if (!list)
+			return -ENOENT;
+
+		return size / sizeof(*list);
+	}
+
+	rc = of_phandle_iterator_init(&it, np, list_name, cells_name, -1);
 	if (rc)
 		return rc;

--- a/drivers/platform/msm/qcom-geni-se.c
+++ b/drivers/platform/msm/qcom-geni-se.c
@@ -159,7 +159,7 @@ static int geni_se_iommu_map_and_attach(struct geni_se_device *geni_se_dev);
 */
 unsigned int geni_read_reg_nolog(void __iomem *base, int offset)
 {
-	return readl_relaxed_no_log(base + offset);
+	return readl_relaxed(base + offset);
 }
 EXPORT_SYMBOL(geni_read_reg_nolog);

@@ -171,7 +171,7 @@ EXPORT_SYMBOL(geni_read_reg_nolog);
 */
 void geni_write_reg_nolog(unsigned int value, void __iomem *base, int offset)
 {
-	return writel_relaxed_no_log(value, (base + offset));
+	return writel_relaxed(value, (base + offset));
 }
 EXPORT_SYMBOL(geni_write_reg_nolog);

--- a/drivers/scsi/ufs/ufs-qcom-debugfs.c
+++ b/drivers/scsi/ufs/ufs-qcom-debugfs.c
@@ -244,40 +244,6 @@ static const struct file_operations ufs_qcom_dbg_dbg_regs_desc = {
 	.release	= single_release,
 };

-static int ufs_qcom_dbg_pm_qos_show(struct seq_file *file, void *data)
-{
-	struct ufs_qcom_host *host = (struct ufs_qcom_host *)file->private;
-	unsigned long flags;
-	int i;
-
-	spin_lock_irqsave(host->hba->host->host_lock, flags);
-
-	seq_printf(file, "enabled: %d\n", host->pm_qos.is_enabled);
-	for (i = 0; i < host->pm_qos.num_groups && host->pm_qos.groups; i++)
-		seq_printf(file,
-			"CPU Group #%d(mask=0x%lx): active_reqs=%d, state=%d, latency=%d\n",
-			i, host->pm_qos.groups[i].mask.bits[0],
-			host->pm_qos.groups[i].active_reqs,
-			host->pm_qos.groups[i].state,
-			host->pm_qos.groups[i].latency_us);
-
-	spin_unlock_irqrestore(host->hba->host->host_lock, flags);
-
-	return 0;
-}
-
-static int ufs_qcom_dbg_pm_qos_open(struct inode *inode,
-					      struct file *file)
-{
-	return single_open(file, ufs_qcom_dbg_pm_qos_show, inode->i_private);
-}
-
-static const struct file_operations ufs_qcom_dbg_pm_qos_desc = {
-	.open		= ufs_qcom_dbg_pm_qos_open,
-	.read		= seq_read,
-	.release	= single_release,
-};
-
 void ufs_qcom_dbg_add_debugfs(struct ufs_hba *hba, struct dentry *root)
 {
 	struct ufs_qcom_host *host;
@@ -366,17 +332,6 @@ void ufs_qcom_dbg_add_debugfs(struct ufs_hba *hba, struct dentry *root)
 		goto err;
 	}

-	host->debugfs_files.pm_qos =
-		debugfs_create_file("pm_qos", 0400,
-				host->debugfs_files.debugfs_root, host,
-				&ufs_qcom_dbg_pm_qos_desc);
-		if (!host->debugfs_files.dbg_regs) {
-			dev_err(host->hba->dev,
-				"%s: failed create dbg_regs debugfs entry\n",
-				__func__);
-			goto err;
-		}
-
 	return;

 err:
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c
@@ -35,8 +35,6 @@
 #define MAX_PROP_SIZE		   32
 #define VDDP_REF_CLK_MIN_UV        1200000
 #define VDDP_REF_CLK_MAX_UV        1200000
-/* TODO: further tuning for this parameter may be required */
-#define UFS_QCOM_PM_QOS_UNVOTE_TIMEOUT_US	(10000) /* microseconds */

 #define UFS_QCOM_DEFAULT_DBG_PRINT_EN	\
 	(UFS_QCOM_DBG_PRINT_REGS_EN | UFS_QCOM_DBG_PRINT_TEST_BUS_EN)
@@ -64,7 +62,6 @@ static void ufs_qcom_get_default_testbus_cfg(struct ufs_qcom_host *host);
 static int ufs_qcom_set_dme_vs_core_clk_ctrl_clear_div(struct ufs_hba *hba,
 						       u32 clk_1us_cycles,
 						       u32 clk_40ns_cycles);
-static void ufs_qcom_pm_qos_suspend(struct ufs_qcom_host *host);

 static void ufs_qcom_dump_regs(struct ufs_hba *hba, int offset, int len,
 		char *prefix)
@@ -847,8 +844,6 @@ static int ufs_qcom_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 			goto out;
 		}
 	}
-	/* Unvote PM QoS */
-	ufs_qcom_pm_qos_suspend(host);

 out:
 	return ret;
@@ -1480,7 +1475,6 @@ static void ufs_qcom_set_caps(struct ufs_hba *hba)
 	if (!host->disable_lpm) {
 		hba->caps |= UFSHCD_CAP_CLK_GATING;
 		hba->caps |= UFSHCD_CAP_HIBERN8_WITH_CLK_GATING;
-		hba->caps |= UFSHCD_CAP_CLK_SCALING;
 	}
 	hba->caps |= UFSHCD_CAP_AUTO_BKOPS_SUSPEND;

@@ -1558,395 +1552,6 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on,
 	return 0;
 }

-#ifdef CONFIG_SMP /* CONFIG_SMP */
-static int ufs_qcom_cpu_to_group(struct ufs_qcom_host *host, int cpu)
-{
-	int i;
-
-	if (cpu >= 0 && cpu < num_possible_cpus())
-		for (i = 0; i < host->pm_qos.num_groups; i++)
-			if (cpumask_test_cpu(cpu, &host->pm_qos.groups[i].mask))
-				return i;
-
-	return host->pm_qos.default_cpu;
-}
-
-static void ufs_qcom_pm_qos_req_start(struct ufs_hba *hba, struct request *req)
-{
-	unsigned long flags;
-	struct ufs_qcom_host *host;
-	struct ufs_qcom_pm_qos_cpu_group *group;
-
-	if (!hba || !req)
-		return;
-
-	host = ufshcd_get_variant(hba);
-	if (!host->pm_qos.groups)
-		return;
-
-	group = &host->pm_qos.groups[ufs_qcom_cpu_to_group(host, req->cpu)];
-
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	if (!host->pm_qos.is_enabled)
-		goto out;
-
-	group->active_reqs++;
-	if (group->state != PM_QOS_REQ_VOTE &&
-			group->state != PM_QOS_VOTED) {
-		group->state = PM_QOS_REQ_VOTE;
-		queue_work(host->pm_qos.workq, &group->vote_work);
-	}
-out:
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-}
-
-/* hba->host->host_lock is assumed to be held by caller */
-static void __ufs_qcom_pm_qos_req_end(struct ufs_qcom_host *host, int req_cpu)
-{
-	struct ufs_qcom_pm_qos_cpu_group *group;
-
-	if (!host->pm_qos.groups || !host->pm_qos.is_enabled)
-		return;
-
-	group = &host->pm_qos.groups[ufs_qcom_cpu_to_group(host, req_cpu)];
-
-	if (--group->active_reqs)
-		return;
-	group->state = PM_QOS_REQ_UNVOTE;
-	queue_work(host->pm_qos.workq, &group->unvote_work);
-}
-
-static void ufs_qcom_pm_qos_req_end(struct ufs_hba *hba, struct request *req,
-	bool should_lock)
-{
-	unsigned long flags = 0;
-
-	if (!hba || !req)
-		return;
-
-	if (should_lock)
-		spin_lock_irqsave(hba->host->host_lock, flags);
-	__ufs_qcom_pm_qos_req_end(ufshcd_get_variant(hba), req->cpu);
-	if (should_lock)
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
-}
-
-static void ufs_qcom_pm_qos_vote_work(struct work_struct *work)
-{
-	struct ufs_qcom_pm_qos_cpu_group *group =
-		container_of(work, struct ufs_qcom_pm_qos_cpu_group, vote_work);
-	struct ufs_qcom_host *host = group->host;
-	unsigned long flags;
-
-	spin_lock_irqsave(host->hba->host->host_lock, flags);
-
-	if (!host->pm_qos.is_enabled || !group->active_reqs) {
-		spin_unlock_irqrestore(host->hba->host->host_lock, flags);
-		return;
-	}
-
-	group->state = PM_QOS_VOTED;
-	spin_unlock_irqrestore(host->hba->host->host_lock, flags);
-
-	pm_qos_update_request(&group->req, group->latency_us);
-}
-
-static void ufs_qcom_pm_qos_unvote_work(struct work_struct *work)
-{
-	struct ufs_qcom_pm_qos_cpu_group *group = container_of(work,
-		struct ufs_qcom_pm_qos_cpu_group, unvote_work);
-	struct ufs_qcom_host *host = group->host;
-	unsigned long flags;
-
-	/*
-	 * Check if new requests were submitted in the meantime and do not
-	 * unvote if so.
-	 */
-	spin_lock_irqsave(host->hba->host->host_lock, flags);
-
-	if (!host->pm_qos.is_enabled || group->active_reqs) {
-		spin_unlock_irqrestore(host->hba->host->host_lock, flags);
-		return;
-	}
-
-	group->state = PM_QOS_UNVOTED;
-	spin_unlock_irqrestore(host->hba->host->host_lock, flags);
-
-	pm_qos_update_request_timeout(&group->req,
-		group->latency_us, UFS_QCOM_PM_QOS_UNVOTE_TIMEOUT_US);
-}
-
-static ssize_t ufs_qcom_pm_qos_enable_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct ufs_hba *hba = dev_get_drvdata(dev->parent);
-	struct ufs_qcom_host *host = ufshcd_get_variant(hba);
-
-	return snprintf(buf, PAGE_SIZE, "%d\n", host->pm_qos.is_enabled);
-}
-
-static ssize_t ufs_qcom_pm_qos_enable_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct ufs_hba *hba = dev_get_drvdata(dev->parent);
-	struct ufs_qcom_host *host = ufshcd_get_variant(hba);
-	unsigned long value;
-	unsigned long flags;
-	bool enable;
-	int i;
-
-	if (kstrtoul(buf, 0, &value))
-		return -EINVAL;
-
-	enable = !!value;
-
-	/*
-	 * Must take the spinlock and save irqs before changing the enabled
-	 * flag in order to keep correctness of PM QoS release.
-	 */
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	if (enable == host->pm_qos.is_enabled) {
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
-		return count;
-	}
-	host->pm_qos.is_enabled = enable;
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-
-	if (!enable)
-		for (i = 0; i < host->pm_qos.num_groups; i++) {
-			cancel_work_sync(&host->pm_qos.groups[i].vote_work);
-			cancel_work_sync(&host->pm_qos.groups[i].unvote_work);
-			spin_lock_irqsave(hba->host->host_lock, flags);
-			host->pm_qos.groups[i].state = PM_QOS_UNVOTED;
-			host->pm_qos.groups[i].active_reqs = 0;
-			spin_unlock_irqrestore(hba->host->host_lock, flags);
-			pm_qos_update_request(&host->pm_qos.groups[i].req,
-				PM_QOS_DEFAULT_VALUE);
-		}
-
-	return count;
-}
-
-static ssize_t ufs_qcom_pm_qos_latency_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
-{
-	struct ufs_hba *hba = dev_get_drvdata(dev->parent);
-	struct ufs_qcom_host *host = ufshcd_get_variant(hba);
-	int ret;
-	int i;
-	int offset = 0;
-
-	for (i = 0; i < host->pm_qos.num_groups; i++) {
-		ret = snprintf(&buf[offset], PAGE_SIZE,
-			"cpu group #%d(mask=0x%lx): %d\n", i,
-			host->pm_qos.groups[i].mask.bits[0],
-			host->pm_qos.groups[i].latency_us);
-		if (ret > 0)
-			offset += ret;
-		else
-			break;
-	}
-
-	return offset;
-}
-
-static ssize_t ufs_qcom_pm_qos_latency_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t count)
-{
-	struct ufs_hba *hba = dev_get_drvdata(dev->parent);
-	struct ufs_qcom_host *host = ufshcd_get_variant(hba);
-	unsigned long value;
-	unsigned long flags;
-	char *strbuf;
-	char *strbuf_copy;
-	char *token;
-	int i;
-	int ret;
-
-	/* reserve one byte for null termination */
-	strbuf = kmalloc(count + 1, GFP_KERNEL);
-	if (!strbuf)
-		return -ENOMEM;
-	strbuf_copy = strbuf;
-	strlcpy(strbuf, buf, count + 1);
-
-	for (i = 0; i < host->pm_qos.num_groups; i++) {
-		token = strsep(&strbuf, ",");
-		if (!token)
-			break;
-
-		ret = kstrtoul(token, 0, &value);
-		if (ret)
-			break;
-
-		spin_lock_irqsave(hba->host->host_lock, flags);
-		host->pm_qos.groups[i].latency_us = value;
-		spin_unlock_irqrestore(hba->host->host_lock, flags);
-	}
-
-	kfree(strbuf_copy);
-	return count;
-}
-
-static int ufs_qcom_pm_qos_init(struct ufs_qcom_host *host)
-{
-	struct device_node *node = host->hba->dev->of_node;
-	struct device_attribute *attr;
-	int ret = 0;
-	int num_groups;
-	int num_values;
-	char wq_name[sizeof("ufs_pm_qos_00")];
-	int i;
-
-	num_groups = of_property_count_u32_elems(node,
-		"qcom,pm-qos-cpu-groups");
-	if (num_groups <= 0)
-		goto no_pm_qos;
-
-	num_values = of_property_count_u32_elems(node,
-		"qcom,pm-qos-cpu-group-latency-us");
-	if (num_values <= 0)
-		goto no_pm_qos;
-
-	if (num_values != num_groups || num_groups > num_possible_cpus()) {
-		dev_err(host->hba->dev, "%s: invalid count: num_groups=%d, num_values=%d, num_possible_cpus=%d\n",
-			__func__, num_groups, num_values, num_possible_cpus());
-		goto no_pm_qos;
-	}
-
-	host->pm_qos.num_groups = num_groups;
-	host->pm_qos.groups = kcalloc(host->pm_qos.num_groups,
-			sizeof(struct ufs_qcom_pm_qos_cpu_group), GFP_KERNEL);
-	if (!host->pm_qos.groups)
-		return -ENOMEM;
-
-	for (i = 0; i < host->pm_qos.num_groups; i++) {
-		u32 mask;
-
-		ret = of_property_read_u32_index(node, "qcom,pm-qos-cpu-groups",
-			i, &mask);
-		if (ret)
-			goto free_groups;
-		host->pm_qos.groups[i].mask.bits[0] = mask;
-		if (!cpumask_subset(&host->pm_qos.groups[i].mask,
-			cpu_possible_mask)) {
-			dev_err(host->hba->dev, "%s: invalid mask 0x%x for cpu group\n",
-				__func__, mask);
-			goto free_groups;
-		}
-
-		ret = of_property_read_u32_index(node,
-			"qcom,pm-qos-cpu-group-latency-us", i,
-			&host->pm_qos.groups[i].latency_us);
-		if (ret)
-			goto free_groups;
-
-		host->pm_qos.groups[i].req.type = PM_QOS_REQ_AFFINE_CORES;
-		host->pm_qos.groups[i].req.cpus_affine =
-			host->pm_qos.groups[i].mask;
-		host->pm_qos.groups[i].state = PM_QOS_UNVOTED;
-		host->pm_qos.groups[i].active_reqs = 0;
-		host->pm_qos.groups[i].host = host;
-
-		INIT_WORK(&host->pm_qos.groups[i].vote_work,
-			ufs_qcom_pm_qos_vote_work);
-		INIT_WORK(&host->pm_qos.groups[i].unvote_work,
-			ufs_qcom_pm_qos_unvote_work);
-	}
-
-	ret = of_property_read_u32(node, "qcom,pm-qos-default-cpu",
-		&host->pm_qos.default_cpu);
-	if (ret || host->pm_qos.default_cpu > num_possible_cpus())
-		host->pm_qos.default_cpu = 0;
-
-	/*
-	 * Use a single-threaded workqueue to assure work submitted to the queue
-	 * is performed in order. Consider the following 2 possible cases:
-	 *
-	 * 1. A new request arrives and voting work is scheduled for it. Before
-	 *    the voting work is performed the request is finished and unvote
-	 *    work is also scheduled.
-	 * 2. A request is finished and unvote work is scheduled. Before the
-	 *    work is performed a new request arrives and voting work is also
-	 *    scheduled.
-	 *
-	 * In both cases a vote work and unvote work wait to be performed.
-	 * If ordering is not guaranteed, then the end state might be the
-	 * opposite of the desired state.
-	 */
-	snprintf(wq_name, ARRAY_SIZE(wq_name), "%s_%d", "ufs_pm_qos",
-		host->hba->host->host_no);
-	host->pm_qos.workq = create_singlethread_workqueue(wq_name);
-	if (!host->pm_qos.workq) {
-		dev_err(host->hba->dev, "%s: failed to create the workqueue\n",
-				__func__);
-		ret = -ENOMEM;
-		goto free_groups;
-	}
-
-	/* Initialization was ok, add all PM QoS requests */
-	for (i = 0; i < host->pm_qos.num_groups; i++)
-		pm_qos_add_request(&host->pm_qos.groups[i].req,
-			PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);
-
-	/* PM QoS latency sys-fs attribute */
-	attr = &host->pm_qos.latency_attr;
-	attr->show = ufs_qcom_pm_qos_latency_show;
-	attr->store = ufs_qcom_pm_qos_latency_store;
-	sysfs_attr_init(&attr->attr);
-	attr->attr.name = "pm_qos_latency_us";
-	attr->attr.mode = 0644;
-	if (device_create_file(host->hba->var->dev, attr))
-		dev_dbg(host->hba->dev, "Failed to create sysfs for pm_qos_latency_us\n");
-
-	/* PM QoS enable sys-fs attribute */
-	attr = &host->pm_qos.enable_attr;
-	attr->show = ufs_qcom_pm_qos_enable_show;
-	attr->store = ufs_qcom_pm_qos_enable_store;
-	sysfs_attr_init(&attr->attr);
-	attr->attr.name = "pm_qos_enable";
-	attr->attr.mode = 0644;
-	if (device_create_file(host->hba->var->dev, attr))
-		dev_dbg(host->hba->dev, "Failed to create sysfs for pm_qos enable\n");
-
-	host->pm_qos.is_enabled = true;
-
-	return 0;
-
-free_groups:
-	kfree(host->pm_qos.groups);
-no_pm_qos:
-	host->pm_qos.groups = NULL;
-	return ret ? ret : -ENOTSUPP;
-}
-
-static void ufs_qcom_pm_qos_suspend(struct ufs_qcom_host *host)
-{
-	int i;
-
-	if (!host->pm_qos.groups)
-		return;
-
-	for (i = 0; i < host->pm_qos.num_groups; i++)
-		flush_work(&host->pm_qos.groups[i].unvote_work);
-}
-
-static void ufs_qcom_pm_qos_remove(struct ufs_qcom_host *host)
-{
-	int i;
-
-	if (!host->pm_qos.groups)
-		return;
-
-	for (i = 0; i < host->pm_qos.num_groups; i++)
-		pm_qos_remove_request(&host->pm_qos.groups[i].req);
-	destroy_workqueue(host->pm_qos.workq);
-
-	kfree(host->pm_qos.groups);
-	host->pm_qos.groups = NULL;
-}
-#endif /* CONFIG_SMP */
-
 #define	ANDROID_BOOT_DEV_MAX	30
 static char android_boot_dev[ANDROID_BOOT_DEV_MAX];

@@ -2109,10 +1714,6 @@ static int ufs_qcom_init(struct ufs_hba *hba)
 		goto out_variant_clear;
 	}

-	err = ufs_qcom_pm_qos_init(host);
-	if (err)
-		dev_info(dev, "%s: PM QoS will be disabled\n", __func__);
-
 	/* restore the secure configuration */
 	ufs_qcom_update_sec_cfg(hba, true);

@@ -2241,7 +1842,6 @@ static void ufs_qcom_exit(struct ufs_hba *hba)
 		host->is_phy_pwr_on = false;
 	}
 	phy_exit(host->generic_phy);
-	ufs_qcom_pm_qos_remove(host);
 }

 static int ufs_qcom_set_dme_vs_core_clk_ctrl_clear_div(struct ufs_hba *hba,
@@ -2708,15 +2308,9 @@ static struct ufs_hba_variant_ops ufs_hba_qcom_vops = {
 #endif
 };

-static struct ufs_hba_pm_qos_variant_ops ufs_hba_pm_qos_variant_ops = {
-	.req_start	= ufs_qcom_pm_qos_req_start,
-	.req_end	= ufs_qcom_pm_qos_req_end,
-};
-
 static struct ufs_hba_variant ufs_hba_qcom_variant = {
 	.name		= "qcom",
 	.vops		= &ufs_hba_qcom_vops,
-	.pm_qos_vops	= &ufs_hba_pm_qos_variant_ops,
 };

 /**
--- a/drivers/scsi/ufs/ufs-qcom.h
+++ b/drivers/scsi/ufs/ufs-qcom.h
@@ -15,7 +15,6 @@
 #define UFS_QCOM_H_

 #include <linux/phy/phy.h>
-#include <linux/pm_qos.h>
 #include "ufshcd.h"

 #define MAX_UFS_QCOM_HOSTS	2
@@ -245,62 +244,9 @@ struct qcom_debugfs_files {
 	struct dentry *testbus_cfg;
 	struct dentry *testbus_bus;
 	struct dentry *dbg_regs;
-	struct dentry *pm_qos;
 };
 #endif

-/* PM QoS voting state  */
-enum ufs_qcom_pm_qos_state {
-	PM_QOS_UNVOTED,
-	PM_QOS_VOTED,
-	PM_QOS_REQ_VOTE,
-	PM_QOS_REQ_UNVOTE,
-};
-
-/**
- * struct ufs_qcom_pm_qos_cpu_group - data related to cluster PM QoS voting
- *	logic
- * @req: request object for PM QoS
- * @vote_work: work object for voting procedure
- * @unvote_work: work object for un-voting procedure
- * @host: back pointer to the main structure
- * @state: voting state machine current state
- * @latency_us: requested latency value used for cluster voting, in
- *	microseconds
- * @mask: cpu mask defined for this cluster
- * @active_reqs: number of active requests on this cluster
- */
-struct ufs_qcom_pm_qos_cpu_group {
-	struct pm_qos_request req;
-	struct work_struct vote_work;
-	struct work_struct unvote_work;
-	struct ufs_qcom_host *host;
-	enum ufs_qcom_pm_qos_state state;
-	s32 latency_us;
-	cpumask_t mask;
-	int active_reqs;
-};
-
-/**
- * struct ufs_qcom_pm_qos - data related to PM QoS voting logic
- * @groups: PM QoS cpu group state array
- * @enable_attr: sysfs attribute to enable/disable PM QoS voting logic
- * @latency_attr: sysfs attribute to set latency value
- * @workq: single threaded workqueue to run PM QoS voting/unvoting
- * @num_clusters: number of clusters defined
- * @default_cpu: cpu to use for voting for request not specifying a cpu
- * @is_enabled: flag specifying whether voting logic is enabled
- */
-struct ufs_qcom_pm_qos {
-	struct ufs_qcom_pm_qos_cpu_group *groups;
-	struct device_attribute enable_attr;
-	struct device_attribute latency_attr;
-	struct workqueue_struct *workq;
-	int num_groups;
-	int default_cpu;
-	bool is_enabled;
-};
-
 struct ufs_qcom_host {
 	/*
 	 * Set this capability if host controller supports the QUniPro mode
@@ -337,9 +283,6 @@ struct ufs_qcom_host {
 	struct clk *rx_l1_sync_clk;
 	struct clk *tx_l1_sync_clk;

-	/* PM Quality-of-Service (QoS) data */
-	struct ufs_qcom_pm_qos pm_qos;
-
 	bool disable_lpm;
 	bool is_lane_clks_enabled;
 	bool sec_cfg_updated;
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1220,22 +1220,6 @@ static void ufshcd_cmd_log_init(struct ufs_hba *hba)
 {
 }

-static void __ufshcd_cmd_log(struct ufs_hba *hba, char *str, char *cmd_type,
-			     unsigned int tag, u8 cmd_id, u8 idn, u8 lun,
-			     sector_t lba, int transfer_len)
-{
-	struct ufshcd_cmd_log_entry entry;
-
-	entry.str = str;
-	entry.lba = lba;
-	entry.cmd_id = cmd_id;
-	entry.transfer_len = transfer_len;
-	entry.doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
-	entry.tag = tag;
-
-	ufshcd_add_command_trace(hba, &entry);
-}
-
 static void ufshcd_dme_cmd_log(struct ufs_hba *hba, char *str, u8 cmd_id)
 {
 }
@@ -3511,7 +3495,19 @@ static void ufshcd_clk_scaling_update_busy(struct ufs_hba *hba)
 static inline
 int ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
 {
-	int ret = 0;
+	if (hba->lrb[task_tag].cmd) {
+		u8 opcode = (u8)(*hba->lrb[task_tag].cmd->cmnd);
+
+		if (opcode == SECURITY_PROTOCOL_OUT && hba->security_in) {
+			hba->security_in--;
+		} else if (opcode == SECURITY_PROTOCOL_IN) {
+			if (hba->security_in) {
+				WARN_ON(1);
+				return -EINVAL;
+			}
+			hba->security_in++;
+		}
+	}

 	hba->lrb[task_tag].issue_time_stamp = ktime_get();
 	hba->lrb[task_tag].complete_time_stamp = ktime_set(0, 0);
@@ -3523,7 +3519,7 @@ int ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
 	ufshcd_cond_add_cmd_trace(hba, task_tag,
 			hba->lrb[task_tag].cmd ? "scsi_send" : "dev_cmd_send");
 	ufshcd_update_tag_stats(hba, task_tag);
-	return ret;
+	return 0;
 }

 /**
@@ -4221,6 +4217,48 @@ static inline void ufshcd_put_read_lock(struct ufs_hba *hba)
 	up_read(&hba->lock);
 }

+static void ufshcd_pm_qos_get_worker(struct work_struct *work)
+{
+	struct ufs_hba *hba = container_of(work, typeof(*hba), pm_qos.get_work);
+
+	if (!atomic_read(&hba->pm_qos.count))
+		return;
+
+	mutex_lock(&hba->pm_qos.lock);
+	if (atomic_read(&hba->pm_qos.count) && !hba->pm_qos.active) {
+		pm_qos_update_request(&hba->pm_qos.req, 100);
+		hba->pm_qos.active = true;
+	}
+	mutex_unlock(&hba->pm_qos.lock);
+}
+
+static void ufshcd_pm_qos_put_worker(struct work_struct *work)
+{
+	struct ufs_hba *hba = container_of(work, typeof(*hba), pm_qos.put_work);
+
+	if (atomic_read(&hba->pm_qos.count))
+		return;
+
+	mutex_lock(&hba->pm_qos.lock);
+	if (!atomic_read(&hba->pm_qos.count) && hba->pm_qos.active) {
+		pm_qos_update_request(&hba->pm_qos.req, PM_QOS_DEFAULT_VALUE);
+		hba->pm_qos.active = false;
+	}
+	mutex_unlock(&hba->pm_qos.lock);
+}
+
+static void ufshcd_pm_qos_get(struct ufs_hba *hba)
+{
+	if (atomic_inc_return(&hba->pm_qos.count) == 1)
+		queue_work(system_unbound_wq, &hba->pm_qos.get_work);
+}
+
+static void ufshcd_pm_qos_put(struct ufs_hba *hba)
+{
+	if (atomic_dec_return(&hba->pm_qos.count) == 0)
+		queue_work(system_unbound_wq, &hba->pm_qos.put_work);
+}
+
 /**
 * ufshcd_queuecommand - main entry point for SCSI requests
 * @cmd: command from SCSI Midlayer
@@ -4236,12 +4274,16 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	int tag;
 	int err = 0;
 	bool has_read_lock = false;
+	bool cmd_sent = false;

 	hba = shost_priv(host);

 	if (!cmd || !cmd->request || !hba)
 		return -EINVAL;

+	/* Wake the CPU managing the IRQ as soon as possible */
+	ufshcd_pm_qos_get(hba);
+
 	tag = cmd->request->tag;
 	if (!ufshcd_valid_tag(hba, tag)) {
 		dev_err(hba->dev,
@@ -4253,10 +4295,13 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	err = ufshcd_get_read_lock(hba, cmd->device->lun);
 	if (unlikely(err < 0)) {
 		if (err == -EPERM) {
-			return SCSI_MLQUEUE_HOST_BUSY;
+			err = SCSI_MLQUEUE_HOST_BUSY;
+			goto out_pm_qos;
+		}
+		if (err == -EAGAIN) {
+			err = SCSI_MLQUEUE_HOST_BUSY;
+			goto out_pm_qos;
 		}
-		if (err == -EAGAIN)
-			return SCSI_MLQUEUE_HOST_BUSY;
 	} else if (err == 1) {
 		has_read_lock = true;
 	}
@@ -4337,9 +4382,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	if (ufshcd_is_hibern8_on_idle_allowed(hba))
 		WARN_ON(hba->hibern8_on_idle.state != HIBERN8_EXITED);

-	/* Vote PM QoS for the request */
-	ufshcd_vops_pm_qos_req_start(hba, cmd->request);
-
 	/* IO svc time latency histogram */
 	if (hba != NULL && cmd->request != NULL) {
 		if (hba->latency_hist_enabled) {
@@ -4384,7 +4426,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		lrbp->cmd = NULL;
 		clear_bit_unlock(tag, &hba->lrb_in_use);
 		ufshcd_release_all(hba);
-		ufshcd_vops_pm_qos_req_end(hba, cmd->request, true);
 		goto out;
 	}

@@ -4394,7 +4435,6 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		lrbp->cmd = NULL;
 		clear_bit_unlock(tag, &hba->lrb_in_use);
 		ufshcd_release_all(hba);
-		ufshcd_vops_pm_qos_req_end(hba, cmd->request, true);
 		goto out;
 	}

@@ -4412,18 +4452,29 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 		lrbp->cmd = NULL;
 		clear_bit_unlock(tag, &hba->lrb_in_use);
 		ufshcd_release_all(hba);
-		ufshcd_vops_pm_qos_req_end(hba, cmd->request, true);
 		dev_err(hba->dev, "%s: failed sending command, %d\n",
 							__func__, err);
-		err = DID_ERROR;
+		if (err == -EINVAL) {
+			set_host_byte(cmd, DID_ERROR);
+			if (has_read_lock)
+				ufshcd_put_read_lock(hba);
+			cmd->scsi_done(cmd);
+			err = 0;
+			goto out_pm_qos;
+		}
 		goto out;
 	}

+	cmd_sent = true;
+
 out_unlock:
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 out:
 	if (has_read_lock)
 		ufshcd_put_read_lock(hba);
+out_pm_qos:
+	if (!cmd_sent)
+		ufshcd_pm_qos_put(hba);
 	return err;
 }

@@ -7481,12 +7532,11 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 				 * this must be called before calling
 				 * ->scsi_done() callback.
 				 */
-				ufshcd_vops_pm_qos_req_end(hba, cmd->request,
-					false);
 			}

 			req = cmd->request;
 			if (req) {
+				ufshcd_pm_qos_put(hba);
 				/* Update IO svc time latency histogram */
 				if (req->lat_hist_enabled) {
 					ktime_t completion;
@@ -7557,15 +7607,8 @@ void ufshcd_abort_outstanding_transfer_requests(struct ufs_hba *hba, int result)
 			/* Mark completed command as NULL in LRB */
 			lrbp->cmd = NULL;
 			ufshcd_release_all(hba);
-			if (cmd->request) {
-				/*
-				 * As we are accessing the "request" structure,
-				 * this must be called before calling
-				 * ->scsi_done() callback.
-				 */
-				ufshcd_vops_pm_qos_req_end(hba, cmd->request,
-					true);
-			}
+			if (cmd->request)
+				ufshcd_pm_qos_put(hba);
 			/* Do not touch lrbp after scsi done */
 			cmd->scsi_done(cmd);
 		} else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE) {
@@ -12699,6 +12742,9 @@ void ufshcd_remove(struct ufs_hba *hba)
 	/* disable interrupts */
 	ufshcd_disable_intr(hba, hba->intr_mask);
 	ufshcd_hba_stop(hba, true);
+	cancel_work_sync(&hba->pm_qos.put_work);
+	cancel_work_sync(&hba->pm_qos.get_work);
+	pm_qos_remove_request(&hba->pm_qos.req);

 	ufshcd_exit_clk_gating(hba);
 	ufshcd_exit_hibern8_on_idle(hba);
@@ -12977,6 +13023,14 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	 */
 	ufshcd_readl(hba, REG_INTERRUPT_ENABLE);

+	mutex_init(&hba->pm_qos.lock);
+	INIT_WORK(&hba->pm_qos.get_work, ufshcd_pm_qos_get_worker);
+	INIT_WORK(&hba->pm_qos.put_work, ufshcd_pm_qos_put_worker);
+	hba->pm_qos.req.type = PM_QOS_REQ_AFFINE_IRQ;
+	hba->pm_qos.req.irq = irq;
+	pm_qos_add_request(&hba->pm_qos.req, PM_QOS_CPU_DMA_LATENCY,
+			   PM_QOS_DEFAULT_VALUE);
+
 	/* IRQ registration */
 	err = devm_request_irq(dev, irq, ufshcd_intr, IRQF_SHARED,
 				dev_name(dev), hba);
@@ -13083,6 +13137,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 out_remove_scsi_host:
 	scsi_remove_host(hba->host);
 exit_gating:
+	pm_qos_remove_request(&hba->pm_qos.req);
 	ufshcd_exit_clk_gating(hba);
 	ufshcd_exit_latency_hist(hba);
 out_disable:
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -58,6 +58,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 #include <linux/extcon.h>
+#include <linux/pm_qos.h>
 #include "unipro.h"

 #include <asm/irq.h>
@@ -402,14 +403,6 @@ struct ufs_hba_variant_ops {
 			       const union ufs_crypto_cfg_entry *cfg, int slot);
 };

-/**
-* struct ufs_hba_pm_qos_variant_ops - variant specific PM QoS callbacks
-*/
-struct ufs_hba_pm_qos_variant_ops {
-	void		(*req_start)(struct ufs_hba *, struct request *);
-	void		(*req_end)(struct ufs_hba *, struct request *, bool);
-};
-
 /**
 * struct ufs_hba_variant - variant specific parameters
 * @name: variant name
@@ -418,7 +411,6 @@ struct ufs_hba_variant {
 	struct device				*dev;
 	const char				*name;
 	struct ufs_hba_variant_ops		*vops;
-	struct ufs_hba_pm_qos_variant_ops	*pm_qos_vops;
 };

 struct keyslot_mgmt_ll_ops;
@@ -1112,6 +1104,8 @@ struct ufs_hba {
 	/* Number of requests aborts */
 	int req_abort_count;

+	u32 security_in;
+
 	/* Number of lanes available (1 or 2) for Rx/Tx */
 	u32 lanes_per_direction;

@@ -1221,6 +1215,15 @@ struct ufs_hba {
 	void *crypto_DO_NOT_USE[8];
 #endif /* CONFIG_SCSI_UFS_CRYPTO */

+	struct {
+		struct pm_qos_request req;
+		struct work_struct get_work;
+		struct work_struct put_work;
+		struct mutex lock;
+		atomic_t count;
+		bool active;
+	} pm_qos;
+
 #if IS_ENABLED(CONFIG_BLK_TURBO_WRITE)
 	bool support_tw;
 	bool tw_state_not_allowed;
@@ -1694,21 +1697,6 @@ static inline void ufshcd_vops_remove_debugfs(struct ufs_hba *hba)
 }
 #endif

-static inline void ufshcd_vops_pm_qos_req_start(struct ufs_hba *hba,
-		struct request *req)
-{
-	if (hba->var && hba->var->pm_qos_vops &&
-		hba->var->pm_qos_vops->req_start)
-		hba->var->pm_qos_vops->req_start(hba, req);
-}
-
-static inline void ufshcd_vops_pm_qos_req_end(struct ufs_hba *hba,
-		struct request *req, bool lock)
-{
-	if (hba->var && hba->var->pm_qos_vops && hba->var->pm_qos_vops->req_end)
-		hba->var->pm_qos_vops->req_end(hba, req, lock);
-}
-
 #define UFS_DEV_ATTR(name, fmt, args...)					\
 static ssize_t ufs_##name##_show(struct device *dev, struct device_attribute *attr, char *buf)	\
 {										\
--- a/drivers/soc/qcom/boot_stats.c
+++ b/drivers/soc/qcom/boot_stats.c
@@ -127,11 +127,11 @@ unsigned long long int msm_timer_get_sclk_ticks(void)
 	if (!sclk_tick)
 		return -EINVAL;
 	while (loop_zero_count--) {
-		t1 = __raw_readl_no_log(sclk_tick);
+		t1 = __raw_readl(sclk_tick);
 		do {
 			udelay(1);
 			t2 = t1;
-			t1 = __raw_readl_no_log(sclk_tick);
+			t1 = __raw_readl(sclk_tick);
 		} while ((t2 != t1) && --loop_count);
 		if (!loop_count) {
 			pr_err("boot_stats: SCLK  did not stabilize\n");
--- a/drivers/soc/qcom/dcc_v2.c
+++ b/drivers/soc/qcom/dcc_v2.c
@@ -197,7 +197,7 @@ static void dcc_sram_memset(const struct device *dev, void __iomem *dst,
 	}

 	while (count >= 4) {
-		__raw_writel_no_log(qc, dst);
+		__raw_writel(qc, dst);
 		dst += 4;
 		count -= 4;
 	}
@@ -213,7 +213,7 @@ static int dcc_sram_memcpy(void *to, const void __iomem *from,
 	}

 	while (count >= 4) {
-		*(unsigned int *)to = __raw_readl_no_log(from);
+		*(unsigned int *)to = __raw_readl(from);
 		to += 4;
 		from += 4;
 		count -= 4;
@@ -1929,7 +1929,7 @@ static int dcc_v2_restore(struct device *dev)
 	data = drvdata->sram_save_state;

 	for (i = 0; i < drvdata->ram_size / 4; i++)
-		__raw_writel_no_log(data[i],
+		__raw_writel(data[i],
 					drvdata->ram_base + (i * 4));

 	state = drvdata->reg_save_state;
--- a/drivers/soc/qcom/jtagv8-etm.c
+++ b/drivers/soc/qcom/jtagv8-etm.c
@@ -186,7 +186,7 @@
 /* spread out etm register write */
 #define etm_writel(etm, val, off)	\
 do {							\
-	writel_relaxed_no_log(val, etm->base + off);	\
+	writel_relaxed(val, etm->base + off);	\
 	udelay(20);					\
 } while (0)

@@ -194,13 +194,13 @@ do {							\
 		   __raw_writel(val, etm->base + off)

 #define etm_readl(etm, off)		\
-		   readl_relaxed_no_log(etm->base + off)
+		   readl_relaxed(etm->base + off)

 #define etm_writeq(etm, val, off)	\
-		   writeq_relaxed_no_log(val, etm->base + off)
+		   writeq_relaxed(val, etm->base + off)

 #define etm_readq(etm, off)		\
-		   readq_relaxed_no_log(etm->base + off)
+		   readq_relaxed(etm->base + off)

 #define ETM_LOCK(base)							\
 do {									\
--- a/drivers/thermal/tsens2xxx.c
+++ b/drivers/thermal/tsens2xxx.c
@@ -155,7 +155,7 @@ static int tsens2xxx_get_temp(struct tsens_sensor *sensor, int *temp)
 	sensor_addr = TSENS_TM_SN_STATUS(tmdev->tsens_tm_addr);
 	trdy = TSENS_TM_TRDY(tmdev->tsens_tm_addr);

-	code = readl_relaxed_no_log(trdy);
+	code = readl_relaxed(trdy);

 	if (!((code & TSENS_TM_TRDY_FIRST_ROUND_COMPLETE) >>
 		    TSENS_TM_TRDY_FIRST_ROUND_COMPLETE_SHIFT)) {
@@ -170,7 +170,7 @@ static int tsens2xxx_get_temp(struct tsens_sensor *sensor, int *temp)
 		/* Wait for 2.5 ms for tsens controller to recover */
 		do {
 			udelay(500);
-			code = readl_relaxed_no_log(trdy);
+			code = readl_relaxed(trdy);
 			if (code & TSENS_TM_TRDY_FIRST_ROUND_COMPLETE) {
 				TSENS_DUMP(tmdev, "%s",
 					"tsens controller recovered\n");
@@ -296,7 +296,7 @@ sensor_read:

 	tmdev->trdy_fail_ctr = 0;

-	code = readl_relaxed_no_log(sensor_addr +
+	code = readl_relaxed(sensor_addr +
 			(sensor->hw_id << TSENS_STATUS_ADDR_OFFSET));
 	last_temp = code & TSENS_TM_SN_LAST_TEMP_MASK;

@@ -305,7 +305,7 @@ sensor_read:
 		goto dbg;
 	}

-	code = readl_relaxed_no_log(sensor_addr +
+	code = readl_relaxed(sensor_addr +
 		(sensor->hw_id << TSENS_STATUS_ADDR_OFFSET));
 	last_temp2 = code & TSENS_TM_SN_LAST_TEMP_MASK;
 	if (code & TSENS_TM_SN_STATUS_VALID_BIT) {
@@ -314,7 +314,7 @@ sensor_read:
 		goto dbg;
 	}

-	code = readl_relaxed_no_log(sensor_addr +
+	code = readl_relaxed(sensor_addr +
 			(sensor->hw_id <<
 			TSENS_STATUS_ADDR_OFFSET));
 	last_temp3 = code & TSENS_TM_SN_LAST_TEMP_MASK;
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -15,10 +15,13 @@
 #ifndef _ASM_GENERIC__TLB_H
 #define _ASM_GENERIC__TLB_H

+#include <linux/mmu_notifier.h>
 #include <linux/swap.h>
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>

+#ifdef CONFIG_MMU
+
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 /*
 * Semi RCU freeing of the page directories.
@@ -102,12 +105,30 @@ struct mmu_gather {
 #endif
 	unsigned long		start;
 	unsigned long		end;
-	/* we are in the middle of an operation to clear
-	 * a full mm and can make some optimizations */
-	unsigned int		fullmm : 1,
-	/* we have performed an operation which
-	 * requires a complete flush of the tlb */
-				need_flush_all : 1;
+	/*
+	 * we are in the middle of an operation to clear
+	 * a full mm and can make some optimizations
+	 */
+	unsigned int		fullmm : 1;
+
+	/*
+	 * we have performed an operation which
+	 * requires a complete flush of the tlb
+	 */
+	unsigned int		need_flush_all : 1;
+
+	/*
+	 * we have removed page directories
+	 */
+	unsigned int		freed_tables : 1;
+
+	/*
+	 * at which levels have we cleared entries?
+	 */
+	unsigned int		cleared_ptes : 1;
+	unsigned int		cleared_pmds : 1;
+	unsigned int		cleared_puds : 1;
+	unsigned int		cleared_p4ds : 1;

 	struct mmu_gather_batch *active;
 	struct mmu_gather_batch	local;
@@ -124,7 +145,8 @@ void tlb_flush_mmu(struct mmu_gather *tlb);
 void arch_tlb_finish_mmu(struct mmu_gather *tlb,
 			 unsigned long start, unsigned long end, bool force);
 void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address,
-			 unsigned long size);
+                         unsigned long size);
+void tlb_flush_mmu_free(struct mmu_gather *tlb);
 extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
 				   int page_size);

@@ -144,6 +166,21 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb)
 		tlb->start = TASK_SIZE;
 		tlb->end = 0;
 	}
+	tlb->freed_tables = 0;
+	tlb->cleared_ptes = 0;
+	tlb->cleared_pmds = 0;
+	tlb->cleared_puds = 0;
+	tlb->cleared_p4ds = 0;
+}
+
+static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
+{
+	if (!tlb->end)
+		return;
+
+	tlb_flush(tlb);
+	mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
+	__tlb_reset_range(tlb);
 }

 static inline void tlb_remove_page_size(struct mmu_gather *tlb,
@@ -183,6 +220,25 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 }
 #endif

+static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb)
+{
+	if (tlb->cleared_ptes)
+		return PAGE_SHIFT;
+	if (tlb->cleared_pmds)
+		return PMD_SHIFT;
+	if (tlb->cleared_puds)
+		return PUD_SHIFT;
+	if (tlb->cleared_p4ds)
+		return P4D_SHIFT;
+
+	return PAGE_SHIFT;
+}
+
+static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
+{
+	return 1UL << tlb_get_unmap_shift(tlb);
+}
+
 /*
 * In the case of tlb vma handling, we can optimise these away in the
 * case where we're doing a full MM flush.  When we're doing a munmap,
@@ -194,10 +250,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,

 #define __tlb_end_vma(tlb, vma)					\
 	do {							\
-		if (!tlb->fullmm && tlb->end) {			\
-			tlb_flush(tlb);				\
-			__tlb_reset_range(tlb);			\
-		}						\
+		if (!tlb->fullmm)				\
+			tlb_flush_mmu_tlbonly(tlb);		\
 	} while (0)

 #ifndef tlb_end_vma
@@ -218,13 +272,19 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #define tlb_remove_tlb_entry(tlb, ptep, address)		\
 	do {							\
 		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
+		tlb->cleared_ptes = 1;				\
 		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)

-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	     \
-	do {							     \
-		__tlb_adjust_range(tlb, address, huge_page_size(h)); \
-		__tlb_remove_tlb_entry(tlb, ptep, address);	     \
+#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)	\
+	do {							\
+		unsigned long _sz = huge_page_size(h);		\
+		__tlb_adjust_range(tlb, address, _sz);		\
+		if (_sz == PMD_SIZE)				\
+			tlb->cleared_pmds = 1;			\
+		else if (_sz == PUD_SIZE)			\
+			tlb->cleared_puds = 1;			\
+		__tlb_remove_tlb_entry(tlb, ptep, address);	\
 	} while (0)

 /**
@@ -238,6 +298,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)			\
 	do {								\
 		__tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE);	\
+		tlb->cleared_pmds = 1;					\
 		__tlb_remove_pmd_tlb_entry(tlb, pmdp, address);		\
 	} while (0)

@@ -252,6 +313,7 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #define tlb_remove_pud_tlb_entry(tlb, pudp, address)			\
 	do {								\
 		__tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE);	\
+		tlb->cleared_puds = 1;					\
 		__tlb_remove_pud_tlb_entry(tlb, pudp, address);		\
 	} while (0)

@@ -276,12 +338,16 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #define pte_free_tlb(tlb, ptep, address)			\
 	do {							\
 		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
+		tlb->freed_tables = 1;				\
+		tlb->cleared_pmds = 1;				\
 		__pte_free_tlb(tlb, ptep, address);		\
 	} while (0)

 #define pmd_free_tlb(tlb, pmdp, address)			\
 	do {							\
-		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
+		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
+		tlb->freed_tables = 1;				\
+		tlb->cleared_puds = 1;				\
 		__pmd_free_tlb(tlb, pmdp, address);		\
 	} while (0)

@@ -289,6 +355,8 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #define pud_free_tlb(tlb, pudp, address)			\
 	do {							\
 		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
+		tlb->freed_tables = 1;				\
+		tlb->cleared_p4ds = 1;				\
 		__pud_free_tlb(tlb, pudp, address);		\
 	} while (0)
 #endif
@@ -296,11 +364,14 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 #ifndef __ARCH_HAS_5LEVEL_HACK
 #define p4d_free_tlb(tlb, pudp, address)			\
 	do {							\
-		__tlb_adjust_range(tlb, address, PAGE_SIZE);		\
+		__tlb_adjust_range(tlb, address, PAGE_SIZE);	\
+		tlb->freed_tables = 1;				\
 		__p4d_free_tlb(tlb, pudp, address);		\
 	} while (0)
 #endif

+#endif /* CONFIG_MMU */
+
 #define tlb_migrate_finish(mm) do {} while (0)

 #endif /* _ASM_GENERIC__TLB_H */
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -957,6 +957,4 @@ unsigned int cpufreq_generic_get(unsigned int cpu);
 int cpufreq_generic_init(struct cpufreq_policy *policy,
 		struct cpufreq_frequency_table *table,
 		unsigned int transition_latency);
-
-extern unsigned int cpuinfo_max_freq_cached;
 #endif /* _LINUX_CPUFREQ_H */
--- a/include/linux/lz4.h
+++ b/include/linux/lz4.h
@@ -1,648 +1,17 @@
-/* LZ4 Kernel Interface
- *
- * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
- * Copyright (C) 2016, Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This file is based on the original header file
- * for LZ4 - Fast LZ compression algorithm.
- *
- * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2016, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *	* Redistributions of source code must retain the above copyright
- *	  notice, this list of conditions and the following disclaimer.
- *	* Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * You can contact the author at :
- *	- LZ4 homepage : http://www.lz4.org
- *	- LZ4 source repository : https://github.com/lz4/lz4
- */
+/* SPDX-License-Identifier: BSD-2-Clause */
+// LZ4 compatibility wrapper for Linux kernel

-#ifndef __LZ4_H__
-#define __LZ4_H__
+#ifndef __LINUX_LZ4_H__
+#define __LINUX_LZ4_H__

-#include <linux/types.h>
-#include <linux/string.h>	 /* memset, memcpy */
+#include "../../lib/lz4/lz4.h"
+#include "../../lib/lz4/lz4hc.h"

-/*-************************************************************************
- *	CONSTANTS
- **************************************************************************/
-/*
- * LZ4_MEMORY_USAGE :
- * Memory usage formula : N->2^N Bytes
- * (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
- * Increasing memory usage improves compression ratio
- * Reduced memory usage can improve speed, due to cache effect
- * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
- */
-#define LZ4_MEMORY_USAGE 14
+#define LZ4_MEM_COMPRESS	LZ4_STREAM_MINSIZE
+#define LZ4HC_MEM_COMPRESS	LZ4_STREAMHC_MINSIZE

-#define LZ4_MAX_INPUT_SIZE	0x7E000000 /* 2 113 929 216 bytes */
-#define LZ4_COMPRESSBOUND(isize)	(\
-	(unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE \
-	? 0 \
-	: (isize) + ((isize)/255) + 16)
-
-#define LZ4_ACCELERATION_DEFAULT 1
-#define LZ4_HASHLOG	 (LZ4_MEMORY_USAGE-2)
-#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
-#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG)
-
-#define LZ4HC_MIN_CLEVEL			3
-#define LZ4HC_DEFAULT_CLEVEL			9
-#define LZ4HC_MAX_CLEVEL			16
-
-#define LZ4HC_DICTIONARY_LOGSIZE 16
-#define LZ4HC_MAXD (1<<LZ4HC_DICTIONARY_LOGSIZE)
-#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
-#define LZ4HC_HASH_LOG (LZ4HC_DICTIONARY_LOGSIZE - 1)
-#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
-#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
-
-/*-************************************************************************
- *	STREAMING CONSTANTS AND STRUCTURES
- **************************************************************************/
-#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE - 3)) + 4)
-#define LZ4_STREAMSIZE	(LZ4_STREAMSIZE_U64 * sizeof(unsigned long long))
-
-#define LZ4_STREAMHCSIZE        262192
-#define LZ4_STREAMHCSIZE_SIZET (262192 / sizeof(size_t))
-
-#define LZ4_STREAMDECODESIZE_U64	4
-#define LZ4_STREAMDECODESIZE		 (LZ4_STREAMDECODESIZE_U64 * \
-	sizeof(unsigned long long))
-
-/*
- * LZ4_stream_t - information structure to track an LZ4 stream.
- */
-typedef struct {
-	uint32_t hashTable[LZ4_HASH_SIZE_U32];
-	uint32_t currentOffset;
-	uint32_t initCheck;
-	const uint8_t *dictionary;
-	uint8_t *bufferStart;
-	uint32_t dictSize;
-} LZ4_stream_t_internal;
-typedef union {
-	unsigned long long table[LZ4_STREAMSIZE_U64];
-	LZ4_stream_t_internal internal_donotuse;
-} LZ4_stream_t;
-
-/*
- * LZ4_streamHC_t - information structure to track an LZ4HC stream.
- */
-typedef struct {
-	unsigned int	 hashTable[LZ4HC_HASHTABLESIZE];
-	unsigned short	 chainTable[LZ4HC_MAXD];
-	/* next block to continue on current prefix */
-	const unsigned char *end;
-	/* All index relative to this position */
-	const unsigned char *base;
-	/* alternate base for extDict */
-	const unsigned char *dictBase;
-	/* below that point, need extDict */
-	unsigned int	 dictLimit;
-	/* below that point, no more dict */
-	unsigned int	 lowLimit;
-	/* index from which to continue dict update */
-	unsigned int	 nextToUpdate;
-	unsigned int	 compressionLevel;
-} LZ4HC_CCtx_internal;
-typedef union {
-	size_t table[LZ4_STREAMHCSIZE_SIZET];
-	LZ4HC_CCtx_internal internal_donotuse;
-} LZ4_streamHC_t;
-
-/*
- * LZ4_streamDecode_t - information structure to track an
- *	LZ4 stream during decompression.
- *
- * init this structure using LZ4_setStreamDecode (or memset()) before first use
- */
-typedef struct {
-	const uint8_t *externalDict;
-	size_t extDictSize;
-	const uint8_t *prefixEnd;
-	size_t prefixSize;
-} LZ4_streamDecode_t_internal;
-typedef union {
-	unsigned long long table[LZ4_STREAMDECODESIZE_U64];
-	LZ4_streamDecode_t_internal internal_donotuse;
-} LZ4_streamDecode_t;
-
-/*-************************************************************************
- *	SIZE OF STATE
- **************************************************************************/
-#define LZ4_MEM_COMPRESS	LZ4_STREAMSIZE
-#define LZ4HC_MEM_COMPRESS	LZ4_STREAMHCSIZE
-
-/*-************************************************************************
- *	Compression Functions
- **************************************************************************/
-
-/**
- * LZ4_compressBound() - Max. output size in worst case szenarios
- * @isize: Size of the input data
- *
- * Return: Max. size LZ4 may output in a "worst case" szenario
- * (data not compressible)
- */
-static inline int LZ4_compressBound(size_t isize)
-{
-	return LZ4_COMPRESSBOUND(isize);
-}
-
-/**
- * LZ4_compress_default() - Compress data from source to dest
- * @source: source address of the original data
- * @dest: output buffer address of the compressed data
- * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
- * @maxOutputSize: full or partial size of buffer 'dest'
- *	which must be already allocated
- * @wrkmem: address of the working memory.
- *	This requires 'workmem' of LZ4_MEM_COMPRESS.
- *
- * Compresses 'sourceSize' bytes from buffer 'source'
- * into already allocated 'dest' buffer of size 'maxOutputSize'.
- * Compression is guaranteed to succeed if
- * 'maxOutputSize' >= LZ4_compressBound(inputSize).
- * It also runs faster, so it's a recommended setting.
- * If the function cannot compress 'source' into a more limited 'dest' budget,
- * compression stops *immediately*, and the function result is zero.
- * As a consequence, 'dest' content is not valid.
- *
- * Return: Number of bytes written into buffer 'dest'
- *	(necessarily <= maxOutputSize) or 0 if compression fails
- */
-int LZ4_compress_default(const char *source, char *dest, int inputSize,
-	int maxOutputSize, void *wrkmem);
-
-/**
- * LZ4_compress_fast() - As LZ4_compress_default providing an acceleration param
- * @source: source address of the original data
- * @dest: output buffer address of the compressed data
- * @inputSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
- * @maxOutputSize: full or partial size of buffer 'dest'
- *	which must be already allocated
- * @acceleration: acceleration factor
- * @wrkmem: address of the working memory.
- *	This requires 'workmem' of LZ4_MEM_COMPRESS.
- *
- * Same as LZ4_compress_default(), but allows to select an "acceleration"
- * factor. The larger the acceleration value, the faster the algorithm,
- * but also the lesser the compression. It's a trade-off. It can be fine tuned,
- * with each successive value providing roughly +~3% to speed.
- * An acceleration value of "1" is the same as regular LZ4_compress_default()
- * Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT, which is 1.
- *
- * Return: Number of bytes written into buffer 'dest'
- *	(necessarily <= maxOutputSize) or 0 if compression fails
- */
-int LZ4_compress_fast(const char *source, char *dest, int inputSize,
-	int maxOutputSize, int acceleration, void *wrkmem);
-
-/**
- * LZ4_compress_destSize() - Compress as much data as possible
- *	from source to dest
- * @source: source address of the original data
- * @dest: output buffer address of the compressed data
- * @sourceSizePtr: will be modified to indicate how many bytes where read
- *	from 'source' to fill 'dest'. New value is necessarily <= old value.
- * @targetDestSize: Size of buffer 'dest' which must be already allocated
- * @wrkmem: address of the working memory.
- *	This requires 'workmem' of LZ4_MEM_COMPRESS.
- *
- * Reverse the logic, by compressing as much data as possible
- * from 'source' buffer into already allocated buffer 'dest'
- * of size 'targetDestSize'.
- * This function either compresses the entire 'source' content into 'dest'
- * if it's large enough, or fill 'dest' buffer completely with as much data as
- * possible from 'source'.
- *
- * Return: Number of bytes written into 'dest' (necessarily <= targetDestSize)
- *	or 0 if compression fails
- */
-int LZ4_compress_destSize(const char *source, char *dest, int *sourceSizePtr,
-	int targetDestSize, void *wrkmem);
-
-/*-************************************************************************
- *	Decompression Functions
- **************************************************************************/
-
-/**
- * LZ4_decompress_fast() - Decompresses data from 'source' into 'dest'
- * @source: source address of the compressed data
- * @dest: output buffer address of the uncompressed data
- *	which must be already allocated with 'originalSize' bytes
- * @originalSize: is the original and therefore uncompressed size
- *
- * Decompresses data from 'source' into 'dest'.
- * This function fully respect memory boundaries for properly formed
- * compressed data.
- * It is a bit faster than LZ4_decompress_safe().
- * However, it does not provide any protection against intentionally
- * modified data stream (malicious input).
- * Use this function in trusted environment only
- * (data to decode comes from a trusted source).
- *
- * Return: number of bytes read from the source buffer
- *	or a negative result if decompression fails.
- */
-int LZ4_decompress_fast(const char *source, char *dest, int originalSize);
-
-/**
- * LZ4_decompress_safe() - Decompression protected against buffer overflow
- * @source: source address of the compressed data
- * @dest: output buffer address of the uncompressed data
- *	which must be already allocated
- * @compressedSize: is the precise full size of the compressed block
- * @maxDecompressedSize: is the size of 'dest' buffer
- *
- * Decompresses data fom 'source' into 'dest'.
- * If the source stream is detected malformed, the function will
- * stop decoding and return a negative result.
- * This function is protected against buffer overflow exploits,
- * including malicious data packets. It never writes outside output buffer,
- * nor reads outside input buffer.
- *
- * Return: number of bytes decompressed into destination buffer
- *	(necessarily <= maxDecompressedSize)
- *	or a negative result in case of error
- */
-int LZ4_decompress_safe(const char *source, char *dest, int compressedSize,
-	int maxDecompressedSize);
-
-/**
- * LZ4_decompress_safe_partial() - Decompress a block of size 'compressedSize'
- *	at position 'source' into buffer 'dest'
- * @source: source address of the compressed data
- * @dest: output buffer address of the decompressed data which must be
- *	already allocated
- * @compressedSize: is the precise full size of the compressed block.
- * @targetOutputSize: the decompression operation will try
- *	to stop as soon as 'targetOutputSize' has been reached
- * @maxDecompressedSize: is the size of destination buffer
- *
- * This function decompresses a compressed block of size 'compressedSize'
- * at position 'source' into destination buffer 'dest'
- * of size 'maxDecompressedSize'.
- * The function tries to stop decompressing operation as soon as
- * 'targetOutputSize' has been reached, reducing decompression time.
- * This function never writes outside of output buffer,
- * and never reads outside of input buffer.
- * It is therefore protected against malicious data packets.
- *
- * Return: the number of bytes decoded in the destination buffer
- *	(necessarily <= maxDecompressedSize)
- *	or a negative result in case of error
- *
- */
-int LZ4_decompress_safe_partial(const char *source, char *dest,
-	int compressedSize, int targetOutputSize, int maxDecompressedSize);
-
-/*-************************************************************************
- *	LZ4 HC Compression
- **************************************************************************/
-
-/**
- * LZ4_compress_HC() - Compress data from `src` into `dst`, using HC algorithm
- * @src: source address of the original data
- * @dst: output buffer address of the compressed data
- * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
- * @dstCapacity: full or partial size of buffer 'dst',
- *	which must be already allocated
- * @compressionLevel: Recommended values are between 4 and 9, although any
- *	value between 1 and LZ4HC_MAX_CLEVEL will work.
- *	Values >LZ4HC_MAX_CLEVEL behave the same as 16.
- * @wrkmem: address of the working memory.
- *	This requires 'wrkmem' of size LZ4HC_MEM_COMPRESS.
- *
- * Compress data from 'src' into 'dst', using the more powerful
- * but slower "HC" algorithm. Compression is guaranteed to succeed if
- * `dstCapacity >= LZ4_compressBound(srcSize)
- *
- * Return : the number of bytes written into 'dst' or 0 if compression fails.
- */
-int LZ4_compress_HC(const char *src, char *dst, int srcSize, int dstCapacity,
-	int compressionLevel, void *wrkmem);
-
-/**
- * LZ4_resetStreamHC() - Init an allocated 'LZ4_streamHC_t' structure
- * @streamHCPtr: pointer to the 'LZ4_streamHC_t' structure
- * @compressionLevel: Recommended values are between 4 and 9, although any
- *	value between 1 and LZ4HC_MAX_CLEVEL will work.
- *	Values >LZ4HC_MAX_CLEVEL behave the same as 16.
- *
- * An LZ4_streamHC_t structure can be allocated once
- * and re-used multiple times.
- * Use this function to init an allocated `LZ4_streamHC_t` structure
- * and start a new compression.
- */
-void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr, int compressionLevel);
-
-/**
- * LZ4_loadDictHC() - Load a static dictionary into LZ4_streamHC
- * @streamHCPtr: pointer to the LZ4HC_stream_t
- * @dictionary: dictionary to load
- * @dictSize: size of dictionary
- *
- * Use this function to load a static dictionary into LZ4HC_stream.
- * Any previous data will be forgotten, only 'dictionary'
- * will remain in memory.
- * Loading a size of 0 is allowed.
- *
- * Return : dictionary size, in bytes (necessarily <= 64 KB)
- */
-int	LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr, const char *dictionary,
-	int dictSize);
-
-/**
- * LZ4_compress_HC_continue() - Compress 'src' using data from previously
- *	compressed blocks as a dictionary using the HC algorithm
- * @streamHCPtr: Pointer to the previous 'LZ4_streamHC_t' structure
- * @src: source address of the original data
- * @dst: output buffer address of the compressed data,
- *	which must be already allocated
- * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
- * @maxDstSize: full or partial size of buffer 'dest'
- *	which must be already allocated
- *
- * These functions compress data in successive blocks of any size, using
- * previous blocks as dictionary. One key assumption is that previous
- * blocks (up to 64 KB) remain read-accessible while
- * compressing next blocks. There is an exception for ring buffers,
- * which can be smaller than 64 KB.
- * Ring buffers scenario is automatically detected and handled by
- * LZ4_compress_HC_continue().
- * Before starting compression, state must be properly initialized,
- * using LZ4_resetStreamHC().
- * A first "fictional block" can then be designated as
- * initial dictionary, using LZ4_loadDictHC() (Optional).
- * Then, use LZ4_compress_HC_continue()
- * to compress each successive block. Previous memory blocks
- * (including initial dictionary when present) must remain accessible
- * and unmodified during compression.
- * 'dst' buffer should be sized to handle worst case scenarios, using
- *  LZ4_compressBound(), to ensure operation success.
- *  If, for any reason, previous data blocks can't be preserved unmodified
- *  in memory during next compression block,
- *  you must save it to a safer memory space, using LZ4_saveDictHC().
- * Return value of LZ4_saveDictHC() is the size of dictionary
- * effectively saved into 'safeBuffer'.
- *
- * Return: Number of bytes written into buffer 'dst'  or 0 if compression fails
- */
-int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr, const char *src,
-	char *dst, int srcSize, int maxDstSize);
-
-/**
- * LZ4_saveDictHC() - Save static dictionary from LZ4HC_stream
- * @streamHCPtr: pointer to the 'LZ4HC_stream_t' structure
- * @safeBuffer: buffer to save dictionary to, must be already allocated
- * @maxDictSize: size of 'safeBuffer'
- *
- * If previously compressed data block is not guaranteed
- * to remain available at its memory location,
- * save it into a safer place (char *safeBuffer).
- * Note : you don't need to call LZ4_loadDictHC() afterwards,
- * dictionary is immediately usable, you can therefore call
- * LZ4_compress_HC_continue().
- *
- * Return : saved dictionary size in bytes (necessarily <= maxDictSize),
- *	or 0 if error.
- */
-int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
-	int maxDictSize);
-
-/*-*********************************************
- *	Streaming Compression Functions
- ***********************************************/
-
-/**
- * LZ4_resetStream() - Init an allocated 'LZ4_stream_t' structure
- * @LZ4_stream: pointer to the 'LZ4_stream_t' structure
- *
- * An LZ4_stream_t structure can be allocated once
- * and re-used multiple times.
- * Use this function to init an allocated `LZ4_stream_t` structure
- * and start a new compression.
- */
-void LZ4_resetStream(LZ4_stream_t *LZ4_stream);
-
-/**
- * LZ4_loadDict() - Load a static dictionary into LZ4_stream
- * @streamPtr: pointer to the LZ4_stream_t
- * @dictionary: dictionary to load
- * @dictSize: size of dictionary
- *
- * Use this function to load a static dictionary into LZ4_stream.
- * Any previous data will be forgotten, only 'dictionary'
- * will remain in memory.
- * Loading a size of 0 is allowed.
- *
- * Return : dictionary size, in bytes (necessarily <= 64 KB)
- */
-int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
-	int dictSize);
-
-/**
- * LZ4_saveDict() - Save static dictionary from LZ4_stream
- * @streamPtr: pointer to the 'LZ4_stream_t' structure
- * @safeBuffer: buffer to save dictionary to, must be already allocated
- * @dictSize: size of 'safeBuffer'
- *
- * If previously compressed data block is not guaranteed
- * to remain available at its memory location,
- * save it into a safer place (char *safeBuffer).
- * Note : you don't need to call LZ4_loadDict() afterwards,
- * dictionary is immediately usable, you can therefore call
- * LZ4_compress_fast_continue().
- *
- * Return : saved dictionary size in bytes (necessarily <= dictSize),
- *	or 0 if error.
- */
-int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer, int dictSize);
-
-/**
- * LZ4_compress_fast_continue() - Compress 'src' using data from previously
- *	compressed blocks as a dictionary
- * @streamPtr: Pointer to the previous 'LZ4_stream_t' structure
- * @src: source address of the original data
- * @dst: output buffer address of the compressed data,
- *	which must be already allocated
- * @srcSize: size of the input data. Max supported value is LZ4_MAX_INPUT_SIZE
- * @maxDstSize: full or partial size of buffer 'dest'
- *	which must be already allocated
- * @acceleration: acceleration factor
- *
- * Compress buffer content 'src', using data from previously compressed blocks
- * as dictionary to improve compression ratio.
- * Important : Previous data blocks are assumed to still
- * be present and unmodified !
- * If maxDstSize >= LZ4_compressBound(srcSize),
- * compression is guaranteed to succeed, and runs faster.
- *
- * Return: Number of bytes written into buffer 'dst'  or 0 if compression fails
- */
-int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr, const char *src,
-	char *dst, int srcSize, int maxDstSize, int acceleration);
-
-/**
- * LZ4_setStreamDecode() - Instruct where to find dictionary
- * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
- * @dictionary: dictionary to use
- * @dictSize: size of dictionary
- *
- * Use this function to instruct where to find the dictionary.
- *	Setting a size of 0 is allowed (same effect as reset).
- *
- * Return: 1 if OK, 0 if error
- */
-int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
-	const char *dictionary, int dictSize);
-
-/**
- * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
- * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
- * @source: source address of the compressed data
- * @dest: output buffer address of the uncompressed data
- *	which must be already allocated
- * @compressedSize: is the precise full size of the compressed block
- * @maxDecompressedSize: is the size of 'dest' buffer
- *
- * These decoding function allows decompression of multiple blocks
- * in "streaming" mode.
- * Previously decoded blocks *must* remain available at the memory position
- * where they were decoded (up to 64 KB)
- * In the case of a ring buffers, decoding buffer must be either :
- *    - Exactly same size as encoding buffer, with same update rule
- *      (block boundaries at same positions) In which case,
- *      the decoding & encoding ring buffer can have any size,
- *      including very small ones ( < 64 KB).
- *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
- *      maxBlockSize is implementation dependent.
- *      It's the maximum size you intend to compress into a single block.
- *      In which case, encoding and decoding buffers do not need
- *      to be synchronized, and encoding ring buffer can have any size,
- *      including small ones ( < 64 KB).
- *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
- *      In which case, encoding and decoding buffers do not need to be
- *      synchronized, and encoding ring buffer can have any size,
- *      including larger than decoding buffer. W
- * Whenever these conditions are not possible, save the last 64KB of decoded
- * data into a safe buffer, and indicate where it is saved
- * using LZ4_setStreamDecode()
- *
- * Return: number of bytes decompressed into destination buffer
- *	(necessarily <= maxDecompressedSize)
- *	or a negative result in case of error
- */
-int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
-	const char *source, char *dest, int compressedSize,
-	int maxDecompressedSize);
-
-/**
- * LZ4_decompress_fast_continue() - Decompress blocks in streaming mode
- * @LZ4_streamDecode: the 'LZ4_streamDecode_t' structure
- * @source: source address of the compressed data
- * @dest: output buffer address of the uncompressed data
- *	which must be already allocated with 'originalSize' bytes
- * @originalSize: is the original and therefore uncompressed size
- *
- * These decoding function allows decompression of multiple blocks
- * in "streaming" mode.
- * Previously decoded blocks *must* remain available at the memory position
- * where they were decoded (up to 64 KB)
- * In the case of a ring buffers, decoding buffer must be either :
- *    - Exactly same size as encoding buffer, with same update rule
- *      (block boundaries at same positions) In which case,
- *      the decoding & encoding ring buffer can have any size,
- *      including very small ones ( < 64 KB).
- *    - Larger than encoding buffer, by a minimum of maxBlockSize more bytes.
- *      maxBlockSize is implementation dependent.
- *      It's the maximum size you intend to compress into a single block.
- *      In which case, encoding and decoding buffers do not need
- *      to be synchronized, and encoding ring buffer can have any size,
- *      including small ones ( < 64 KB).
- *    - _At least_ 64 KB + 8 bytes + maxBlockSize.
- *      In which case, encoding and decoding buffers do not need to be
- *      synchronized, and encoding ring buffer can have any size,
- *      including larger than decoding buffer. W
- * Whenever these conditions are not possible, save the last 64KB of decoded
- * data into a safe buffer, and indicate where it is saved
- * using LZ4_setStreamDecode()
- *
- * Return: number of bytes decompressed into destination buffer
- *	(necessarily <= maxDecompressedSize)
- *	or a negative result in case of error
- */
-int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
-	const char *source, char *dest, int originalSize);
-
-/**
- * LZ4_decompress_safe_usingDict() - Same as LZ4_setStreamDecode()
- *	followed by LZ4_decompress_safe_continue()
- * @source: source address of the compressed data
- * @dest: output buffer address of the uncompressed data
- *	which must be already allocated
- * @compressedSize: is the precise full size of the compressed block
- * @maxDecompressedSize: is the size of 'dest' buffer
- * @dictStart: pointer to the start of the dictionary in memory
- * @dictSize: size of dictionary
- *
- * These decoding function works the same as
- * a combination of LZ4_setStreamDecode() followed by
- * LZ4_decompress_safe_continue()
- * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
- *
- * Return: number of bytes decompressed into destination buffer
- *	(necessarily <= maxDecompressedSize)
- *	or a negative result in case of error
- */
-int LZ4_decompress_safe_usingDict(const char *source, char *dest,
-	int compressedSize, int maxDecompressedSize, const char *dictStart,
-	int dictSize);
-
-/**
- * LZ4_decompress_fast_usingDict() - Same as LZ4_setStreamDecode()
- *	followed by LZ4_decompress_fast_continue()
- * @source: source address of the compressed data
- * @dest: output buffer address of the uncompressed data
- *	which must be already allocated with 'originalSize' bytes
- * @originalSize: is the original and therefore uncompressed size
- * @dictStart: pointer to the start of the dictionary in memory
- * @dictSize: size of dictionary
- *
- * These decoding function works the same as
- * a combination of LZ4_setStreamDecode() followed by
- * LZ4_decompress_safe_continue()
- * It is stand-alone, and don'tn eed a LZ4_streamDecode_t structure.
- *
- * Return: number of bytes decompressed into destination buffer
- *	(necessarily <= maxDecompressedSize)
- *	or a negative result in case of error
- */
-int LZ4_decompress_fast_usingDict(const char *source, char *dest,
-	int originalSize, const char *dictStart, int dictSize);
+#define LZ4HC_MIN_CLEVEL	LZ4HC_CLEVEL_MIN
+#define LZ4HC_DEFAULT_CLEVEL	LZ4HC_CLEVEL_DEFAULT
+#define LZ4HC_MAX_CLEVEL	LZ4HC_CLEVEL_MAX

 #endif
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1972,7 +1972,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 # define vcpu_is_preempted(cpu)	false
 #endif

-extern long msm_sched_setaffinity(pid_t pid, struct cpumask *new_mask);
 extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
 extern long sched_getaffinity(pid_t pid, struct cpumask *mask);

--- a/include/linux/sched/core_ctl.h
+++ b/include/linux/sched/core_ctl.h
@@ -25,7 +25,8 @@ int core_ctl_set_boost(bool boost);
 void core_ctl_notifier_register(struct notifier_block *n);
 void core_ctl_notifier_unregister(struct notifier_block *n);
 #else
-static inline void core_ctl_check(u64 wallclock) {}
+void rotation_ctl_check(u64 wallclock);
+#define core_ctl_check rotation_ctl_check
 static inline int core_ctl_set_boost(bool boost)
 {
 	return 0;
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -134,9 +134,4 @@ extern int sched_little_cluster_coloc_fmin_khz_handler(struct ctl_table *table,
 					size_t *lenp, loff_t *ppos);
 #endif

-#define LIB_PATH_LENGTH 512
-extern char sched_lib_name[LIB_PATH_LENGTH];
-extern unsigned int sched_lib_mask_force;
-extern bool is_sched_lib_based_app(pid_t pid);
-
 #endif /* _LINUX_SCHED_SYSCTL_H */
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -31,4 +31,7 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq.o
 obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o
 obj-$(CONFIG_MEMBARRIER) += membarrier.o
 obj-$(CONFIG_SCHED_CORE_CTL) += core_ctl.o
+ifndef CONFIG_SCHED_CORE_CTL
+obj-$(CONFIG_SCHED_WALT) += rotation_ctl.o
+endif
 obj-$(CONFIG_PSI) += psi.o
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -34,7 +34,6 @@
 #include <linux/kthread.h>

 #include <asm/switch_to.h>
-#include <linux/msm_rtb.h>
 #include <asm/tlb.h>
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
@@ -1345,16 +1344,17 @@ unlock:
 /*
 * Cross migrate two tasks
 */
-int migrate_swap(struct task_struct *cur, struct task_struct *p)
+int migrate_swap(struct task_struct *cur, struct task_struct *p,
+		int target_cpu, int curr_cpu)
 {
 	struct migration_swap_arg arg;
 	int ret = -EINVAL;

 	arg = (struct migration_swap_arg){
 		.src_task = cur,
-		.src_cpu = task_cpu(cur),
+		.src_cpu = curr_cpu,
 		.dst_task = p,
-		.dst_cpu = task_cpu(p),
+		.dst_cpu = target_cpu,
 	};

 	if (arg.src_cpu == arg.dst_cpu)
@@ -2947,7 +2947,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	 */
 	rq_unpin_lock(rq, rf);
 	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
-	uncached_logk(LOGK_CTXID, (void *)(u64)next->pid);
+
 	/* Here we just switch the register state and the stack. */
 	switch_to(prev, next, prev);
 	barrier();
@@ -3213,6 +3213,13 @@ void scheduler_tick(void)

 	if (curr->sched_class == &fair_sched_class)
 		check_for_migration(rq, curr);
+
+#ifdef CONFIG_SMP
+	rq_lock(rq, &rf);
+	if (idle_cpu(cpu) && is_reserved(cpu) && !rq->active_balance)
+		clear_reserved(cpu);
+	rq_unlock(rq, &rf);
+#endif
 }

 #ifdef CONFIG_NO_HZ_FULL
@@ -4936,73 +4943,6 @@ out_put_task:
 	return retval;
 }

-char sched_lib_name[LIB_PATH_LENGTH];
-unsigned int sched_lib_mask_force;
-bool is_sched_lib_based_app(pid_t pid)
-{
-	const char *name = NULL;
-	char *libname, *lib_list;
-	struct vm_area_struct *vma;
-	char path_buf[LIB_PATH_LENGTH];
-	char *tmp_lib_name;
-	bool found = false;
-	struct task_struct *p;
-	struct mm_struct *mm;
-
-	if (strnlen(sched_lib_name, LIB_PATH_LENGTH) == 0)
-		return false;
-
-	tmp_lib_name = kmalloc(LIB_PATH_LENGTH, GFP_KERNEL);
-	if (!tmp_lib_name)
-		return false;
-
-	rcu_read_lock();
-
-	p = find_process_by_pid(pid);
-	if (!p) {
-		rcu_read_unlock();
-		kfree(tmp_lib_name);
-		return false;
-	}
-
-	/* Prevent p going away */
-	get_task_struct(p);
-	rcu_read_unlock();
-
-	mm = get_task_mm(p);
-	if (!mm)
-		goto put_task_struct;
-
-	down_read(&mm->mmap_sem);
-	for (vma = mm->mmap; vma ; vma = vma->vm_next) {
-		if (vma->vm_file && vma->vm_flags & VM_EXEC) {
-			name = d_path(&vma->vm_file->f_path,
-					path_buf, LIB_PATH_LENGTH);
-			if (IS_ERR(name))
-				goto release_sem;
-
-			strlcpy(tmp_lib_name, sched_lib_name, LIB_PATH_LENGTH);
-			lib_list = tmp_lib_name;
-			while ((libname = strsep(&lib_list, ","))) {
-				libname = skip_spaces(libname);
-				if (strnstr(name, libname,
-					strnlen(name, LIB_PATH_LENGTH))) {
-					found = true;
-					goto release_sem;
-				}
-			}
-		}
-	}
-
-release_sem:
-	up_read(&mm->mmap_sem);
-	mmput(mm);
-put_task_struct:
-	put_task_struct(p);
-	kfree(tmp_lib_name);
-	return found;
-}
-
 static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len,
 			     struct cpumask *new_mask)
 {
--- a/kernel/sched/core_ctl.h
+++ b/kernel/sched/core_ctl.h
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2016, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __CORE_CTL_H
-#define __CORE_CTL_H
-
-#ifdef CONFIG_SCHED_CORE_CTL
-void core_ctl_check(u64 wallclock);
-int core_ctl_set_boost(bool boost);
-#else
-static inline void core_ctl_check(u64 wallclock) {}
-static inline int core_ctl_set_boost(bool boost)
-{
-	return 0;
-}
-#endif
-#endif
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -287,6 +287,7 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
 	struct cpufreq_policy *policy = sg_policy->policy;
 	unsigned int freq = arch_scale_freq_invariant() ?
 				policy->cpuinfo.max_freq : policy->cur;
+	unsigned int idx, l_freq, h_freq;

 	freq = (freq + (freq >> 2)) * util / max;
 	trace_sugov_next_freq(policy->cpu, util, max, freq);
@@ -294,7 +295,21 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
 	if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
 		return sg_policy->next_freq;
 	sg_policy->cached_raw_freq = freq;
-	return cpufreq_driver_resolve_freq(policy, freq);
+	l_freq = cpufreq_driver_resolve_freq(policy, freq);
+	idx = cpufreq_frequency_table_target(policy, freq, CPUFREQ_RELATION_H);
+	h_freq = policy->freq_table[idx].frequency;
+	h_freq = clamp(h_freq, policy->min, policy->max);
+	if (l_freq <= h_freq || l_freq == policy->min)
+		return l_freq;
+
+	/*
+	 * Use the frequency step below if the calculated frequency is <20%
+	 * higher than it.
+	 */
+	if (mult_frac(100, freq - h_freq, l_freq - h_freq) < 20)
+		return h_freq;
+
+	return l_freq;
 }

 static void sugov_get_util(unsigned long *util, unsigned long *max, int cpu)
@@ -479,7 +494,6 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
-	u64 last_freq_update_time = sg_policy->last_freq_update_time;
 	unsigned long util = 0, max = 1;
 	unsigned int j;

@@ -495,7 +509,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 		 * enough, don't take the CPU into account as it probably is
 		 * idle now (and clear iowait_boost for it).
 		 */
-		delta_ns = last_freq_update_time - j_sg_cpu->last_update;
+		delta_ns = time - j_sg_cpu->last_update;
 		if (delta_ns > stale_ns) {
 			j_sg_cpu->iowait_boost = 0;
 			j_sg_cpu->iowait_boost_pending = false;
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -157,11 +157,11 @@ __read_mostly unsigned int sysctl_sched_walt_cpu_high_irqload =

 #ifdef CONFIG_SMP
 /*
- * For asym packing, by default the lower numbered cpu has higher priority.
+ * For asym packing, by default the lower max-capacity cpu has higher priority.
 */
 int __weak arch_asym_cpu_priority(int cpu)
 {
-	return -cpu;
+	return -arch_scale_cpu_capacity(NULL, cpu);
 }
 #endif

@@ -1938,7 +1938,8 @@ static int task_numa_migrate(struct task_struct *p)
 		return ret;
 	}

-	ret = migrate_swap(p, env.best_task);
+	ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu);
+
 	if (ret != 0)
 		trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task));
 	put_task_struct(env.best_task);
@@ -11271,9 +11272,16 @@ no_move:
 			raw_spin_unlock_irqrestore(&busiest->lock, flags);

 			if (active_balance) {
-				stop_one_cpu_nowait(cpu_of(busiest),
+				int ret;
+
+				ret = stop_one_cpu_nowait(cpu_of(busiest),
 					active_load_balance_cpu_stop, busiest,
 					&busiest->active_balance_work);
+				if (!ret) {
+					clear_reserved(this_cpu);
+					busiest->active_balance = 0;
+					active_balance = 0;
+				}
 				*continue_balancing = 0;
 			}

@@ -13035,12 +13043,23 @@ static void walt_rotate_work_func(struct work_struct *work)
 {
 	struct walt_rotate_work *wr = container_of(work,
 				struct walt_rotate_work, w);
+	struct rq *src_rq = cpu_rq(wr->src_cpu), *dst_rq = cpu_rq(wr->dst_cpu);
+	unsigned long flags;

-	migrate_swap(wr->src_task, wr->dst_task);
+	migrate_swap(wr->src_task, wr->dst_task, wr->dst_cpu, wr->src_cpu);

 	put_task_struct(wr->src_task);
 	put_task_struct(wr->dst_task);

+	local_irq_save(flags);
+	double_rq_lock(src_rq, dst_rq);
+
+	dst_rq->active_balance = 0;
+	src_rq->active_balance = 0;
+
+	double_rq_unlock(src_rq, dst_rq);
+	local_irq_restore(flags);
+
 	clear_reserved(wr->src_cpu);
 	clear_reserved(wr->dst_cpu);
 }
@@ -13127,7 +13146,10 @@ static void walt_check_for_rotation(struct rq *src_rq)
 	dst_rq = cpu_rq(dst_cpu);

 	double_rq_lock(src_rq, dst_rq);
-	if (dst_rq->curr->sched_class == &fair_sched_class) {
+	if (dst_rq->curr->sched_class == &fair_sched_class &&
+		!src_rq->active_balance && !dst_rq->active_balance &&
+		cpumask_test_cpu(dst_cpu, &src_rq->curr->cpus_allowed) &&
+		cpumask_test_cpu(src_cpu, &dst_rq->curr->cpus_allowed)) {
 		get_task_struct(src_rq->curr);
 		get_task_struct(dst_rq->curr);

@@ -13140,7 +13162,10 @@ static void walt_check_for_rotation(struct rq *src_rq)

 		wr->src_cpu = src_cpu;
 		wr->dst_cpu = dst_cpu;
+		dst_rq->active_balance = 1;
+		src_rq->active_balance = 1;
 	}
+
 	double_rq_unlock(src_rq, dst_rq);

 	if (wr)
@@ -13159,6 +13184,7 @@ void check_for_migration(struct rq *rq, struct task_struct *p)
 	int new_cpu = -1;
 	int cpu = smp_processor_id();
 	int prev_cpu = task_cpu(p);
+	int ret;
 	struct sched_domain *sd = NULL;

 	if (rq->misfit_task_load) {
@@ -13169,6 +13195,13 @@ void check_for_migration(struct rq *rq, struct task_struct *p)
 		if (task_will_be_throttled(p))
 			return;

+		if (walt_rotation_enabled) {
+			raw_spin_lock(&migration_lock);
+			walt_check_for_rotation(rq);
+			raw_spin_unlock(&migration_lock);
+			return;
+		}
+
 		raw_spin_lock(&migration_lock);
 		rcu_read_lock();
 		new_cpu = find_energy_efficient_cpu(sd, p, cpu, prev_cpu, 0, 1);
@@ -13179,13 +13212,15 @@ void check_for_migration(struct rq *rq, struct task_struct *p)
 			if (active_balance) {
 				mark_reserved(new_cpu);
 				raw_spin_unlock(&migration_lock);
-				stop_one_cpu_nowait(prev_cpu,
+				ret = stop_one_cpu_nowait(prev_cpu,
 					active_load_balance_cpu_stop, rq,
 					&rq->active_balance_work);
+				if (!ret)
+					clear_reserved(new_cpu);
+				else
+					wake_up_if_idle(new_cpu);
 				return;
 			}
-		} else {
-			walt_check_for_rotation(rq);
 		}
 		raw_spin_unlock(&migration_lock);
 	}
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -84,7 +84,7 @@ SCHED_FEAT(WARN_DOUBLE_CLOCK, false)
 SCHED_FEAT(RT_PUSH_IPI, true)
 #endif

-SCHED_FEAT(RT_RUNTIME_SHARE, true)
+SCHED_FEAT(RT_RUNTIME_SHARE, false)
 SCHED_FEAT(LB_MIN, false)
 SCHED_FEAT(ATTACH_AGE_LOAD, true)

--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -66,7 +66,8 @@ static noinline int __cpuidle cpu_idle_poll(void)
 	local_irq_enable();
 	stop_critical_timings();
 	while (!tif_need_resched() &&
-		(cpu_idle_force_poll || tick_check_broadcast_expired()))
+		(cpu_idle_force_poll || tick_check_broadcast_expired() ||
+		is_reserved(smp_processor_id())))
 		cpu_relax();
 	start_critical_timings();
 	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
@@ -261,7 +262,8 @@ static void do_idle(void)
 		 * broadcast device expired for us, we don't want to go deep
 		 * idle as we know that the IPI is going to arrive right away.
 		 */
-		if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
+		if (cpu_idle_force_poll || tick_check_broadcast_expired() ||
+				is_reserved(smp_processor_id())) {
 			tick_nohz_idle_restart_tick();
 			cpu_idle_poll();
 		} else {
--- a/kernel/sched/rotation_ctl.c
+++ b/kernel/sched/rotation_ctl.c
@@ -0,0 +1,144 @@
+/* Copyright (c) 2014-2018, 2020, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt)	"rotation_ctl: " fmt
+
+#include "sched.h"
+#include "walt.h"
+
+struct cluster_data {
+	cpumask_t cpu_mask;
+	unsigned int first_cpu;
+	bool inited;
+};
+
+static struct cluster_data cluster_state[MAX_CLUSTERS];
+static unsigned int num_clusters;
+
+#define for_each_cluster(cluster, idx) \
+	for (; (idx) < num_clusters && ((cluster) = &cluster_state[idx]);\
+		(idx)++)
+
+static bool initialized;
+
+static int cluster_real_big_tasks(int index, const struct sched_avg_stats *nr_stats)
+{
+	int nr_big = 0;
+	int cpu;
+	const struct cluster_data *cluster = &cluster_state[index];
+
+	if (!index) {
+		for_each_cpu(cpu, &cluster->cpu_mask)
+			nr_big += nr_stats[cpu].nr_misfit;
+	} else {
+		for_each_cpu(cpu, &cluster->cpu_mask)
+			nr_big += nr_stats[cpu].nr;
+	}
+
+	return nr_big;
+}
+
+static void update_running_avg(void)
+{
+	struct sched_avg_stats nr_stats[NR_CPUS];
+	const struct cluster_data *cluster;
+	unsigned int index = 0;
+	int big_avg = 0;
+
+	sched_get_nr_running_avg(nr_stats);
+
+	for_each_cluster(cluster, index) {
+		if (!cluster->inited)
+			continue;
+
+		big_avg += cluster_real_big_tasks(index, nr_stats);
+	}
+
+	walt_rotation_checkpoint(big_avg);
+}
+
+static u64 rotation_ctl_check_timestamp;
+
+void rotation_ctl_check(u64 window_start)
+{
+	if (unlikely(!initialized))
+		return;
+
+	if (window_start == rotation_ctl_check_timestamp)
+		return;
+
+	rotation_ctl_check_timestamp = window_start;
+
+	update_running_avg();
+}
+
+/* ============================ init code ============================== */
+
+static const struct cluster_data *find_cluster_by_first_cpu(unsigned int first_cpu)
+{
+	unsigned int i;
+
+	for (i = 0; i < num_clusters; ++i) {
+		if (cluster_state[i].first_cpu == first_cpu)
+			return &cluster_state[i];
+	}
+
+	return NULL;
+}
+
+static int cluster_init(const struct cpumask *mask)
+{
+	struct device *dev;
+	unsigned int first_cpu = cpumask_first(mask);
+	struct cluster_data *cluster;
+
+	if (find_cluster_by_first_cpu(first_cpu))
+		return 0;
+
+	dev = get_cpu_device(first_cpu);
+	if (!dev)
+		return -ENODEV;
+
+	pr_info("Creating CPU group %d\n", first_cpu);
+
+	if (num_clusters == MAX_CLUSTERS) {
+		pr_err("Unsupported number of clusters. Only %u supported\n",
+								MAX_CLUSTERS);
+		return -EINVAL;
+	}
+	cluster = &cluster_state[num_clusters];
+	++num_clusters;
+
+	cpumask_copy(&cluster->cpu_mask, mask);
+	cluster->first_cpu = first_cpu;
+
+	cluster->inited = true;
+
+	return 0;
+}
+
+static int __init rotation_ctl_init(void)
+{
+	const struct sched_cluster *cluster;
+	int ret;
+
+	for_each_sched_cluster(cluster) {
+		ret = cluster_init(&cluster->cpus);
+		if (ret)
+			pr_warn("unable to create rotation ctl group: %d\n", ret);
+	}
+
+	initialized = true;
+	return 0;
+}
+
+late_initcall(rotation_ctl_init);
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1221,7 +1221,8 @@ enum numa_faults_stats {
 extern void sched_setnuma(struct task_struct *p, int node);
 extern int migrate_task_to(struct task_struct *p, int cpu);
 #endif /* CONFIG_NUMA_BALANCING */
-extern int migrate_swap(struct task_struct *cur, struct task_struct *p);
+extern int migrate_swap(struct task_struct *p, struct task_struct *t,
+			int cpu, int scpu);

 #ifdef CONFIG_SMP

--- a/kernel/sched/sched_avg.c
+++ b/kernel/sched/sched_avg.c
@@ -35,7 +35,7 @@ static s64 last_get_time;

 static DEFINE_PER_CPU(atomic64_t, last_busy_time) = ATOMIC64_INIT(0);

-#define NR_THRESHOLD_PCT		15
+#define NR_THRESHOLD_PCT		40

 /**
 * sched_get_nr_running_avg
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -435,10 +435,10 @@ void clear_walt_request(int cpu)

 		raw_spin_lock_irqsave(&rq->lock, flags);
 		if (rq->push_task) {
-			clear_reserved(rq->push_cpu);
 			push_task = rq->push_task;
 			rq->push_task = NULL;
 		}
+		clear_reserved(rq->push_cpu);
 		rq->active_balance = 0;
 		raw_spin_unlock_irqrestore(&rq->lock, flags);
 		if (push_task)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -149,7 +149,6 @@ static int ten_thousand = 10000;
 #ifdef CONFIG_PERF_EVENTS
 static int six_hundred_forty_kb = 640 * 1024;
 #endif
-static int two_hundred_fifty_five = 255;

 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
 static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
@@ -651,22 +650,6 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &one,
 	},
 #endif
-	{
-		.procname	= "sched_lib_name",
-		.data		= sched_lib_name,
-		.maxlen		= LIB_PATH_LENGTH,
-		.mode		= 0644,
-		.proc_handler	= proc_dostring,
-	},
-	{
-		.procname	= "sched_lib_mask_force",
-		.data		= &sched_lib_mask_force,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_douintvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &two_hundred_fifty_five,
-	},
 #ifdef CONFIG_PROVE_LOCKING
 	{
 		.procname	= "prove_locking",
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -96,24 +96,6 @@ config IPC_LOGGING

 	  If in doubt, say no.

-config QCOM_RTB
-	bool "Register tracing"
-	help
-	  Enable the kernel to trace every kernel function. This is done
-	  Add support for logging different events to a small uncached
-	  region. This is designed to aid in debugging reset cases where the
-	  caches may not be flushed before the target resets.
-
-config QCOM_RTB_SEPARATE_CPUS
-	bool "Separate entries for each cpu"
-	depends on QCOM_RTB
-	depends on SMP
-	help
-	  Under some circumstances, it may be beneficial to give dedicated space
-	  for each cpu to log accesses. Selecting this option will log each cpu
-	  separately. This will guarantee that the last acesses for each cpu
-	  will be logged but there will be fewer entries per cpu
-
 # All tracer options should select GENERIC_TRACER. For those options that are
 # enabled by all tracers (context switch and event tracer) they select TRACING.
 # This allows those options to appear when no other tracer is selected. But the
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -73,7 +73,6 @@ obj-$(CONFIG_GPU_TRACEPOINTS) += gpu-traces.o

 obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o

-obj-$(CONFIG_QCOM_RTB) += msm_rtb.o
 obj-$(CONFIG_IPC_LOGGING) += ipc_logging.o
 ifdef CONFIG_DEBUG_FS
 obj-$(CONFIG_IPC_LOGGING) += ipc_logging_debug.o
--- a/lib/iomap.c
+++ b/lib/iomap.c
@@ -6,7 +6,6 @@
 */
 #include <linux/pci.h>
 #include <linux/io.h>
-#include <linux/msm_rtb.h>

 #include <linux/export.h>

@@ -72,31 +71,26 @@ static void bad_io_access(unsigned long port, const char *access)

 unsigned int ioread8(void __iomem *addr)
 {
-	uncached_logk_pc(LOGK_READL, __builtin_return_address(0), addr);
-	IO_COND(addr, return inb(port), return readb_no_log(addr));
+	IO_COND(addr, return inb(port), return readb(addr));
 	return 0xff;
 }
 unsigned int ioread16(void __iomem *addr)
 {
-	uncached_logk_pc(LOGK_READL, __builtin_return_address(0), addr);
-	IO_COND(addr, return inw(port), return readw_no_log(addr));
+	IO_COND(addr, return inw(port), return readw(addr));
 	return 0xffff;
 }
 unsigned int ioread16be(void __iomem *addr)
 {
-	uncached_logk_pc(LOGK_READL, __builtin_return_address(0), addr);
 	IO_COND(addr, return pio_read16be(port), return mmio_read16be(addr));
 	return 0xffff;
 }
 unsigned int ioread32(void __iomem *addr)
 {
-	uncached_logk_pc(LOGK_READL, __builtin_return_address(0), addr);
-	IO_COND(addr, return inl(port), return readl_no_log(addr));
+	IO_COND(addr, return inl(port), return readl(addr));
 	return 0xffffffff;
 }
 unsigned int ioread32be(void __iomem *addr)
 {
-	uncached_logk_pc(LOGK_READL, __builtin_return_address(0), addr);
 	IO_COND(addr, return pio_read32be(port), return mmio_read32be(addr));
 	return 0xffffffff;
 }
@@ -118,27 +112,22 @@ EXPORT_SYMBOL(ioread32be);

 void iowrite8(u8 val, void __iomem *addr)
 {
-	uncached_logk_pc(LOGK_WRITEL, __builtin_return_address(0), addr);
-	IO_COND(addr, outb(val, port), writeb_no_log(val, addr));
+	IO_COND(addr, outb(val,port), writeb(val, addr));
 }
 void iowrite16(u16 val, void __iomem *addr)
 {
-	uncached_logk_pc(LOGK_WRITEL, __builtin_return_address(0), addr);
-	IO_COND(addr, outw(val, port), writew_no_log(val, addr));
+	IO_COND(addr, outw(val,port), writew(val, addr));
 }
 void iowrite16be(u16 val, void __iomem *addr)
 {
-	uncached_logk_pc(LOGK_WRITEL, __builtin_return_address(0), addr);
 	IO_COND(addr, pio_write16be(val,port), mmio_write16be(val, addr));
 }
 void iowrite32(u32 val, void __iomem *addr)
 {
-	uncached_logk_pc(LOGK_WRITEL, __builtin_return_address(0), addr);
-	IO_COND(addr, outl(val, port), writel_no_log(val, addr));
+	IO_COND(addr, outl(val,port), writel(val, addr));
 }
 void iowrite32be(u32 val, void __iomem *addr)
 {
-	uncached_logk_pc(LOGK_WRITEL, __builtin_return_address(0), addr);
 	IO_COND(addr, pio_write32be(val,port), mmio_write32be(val, addr));
 }
 EXPORT_SYMBOL(iowrite8);
--- a/lib/lz4/Makefile
+++ b/lib/lz4/Makefile
@@ -1,5 +1,7 @@
-ccflags-y += -O3
+ccflags-y += -O3 \
+     -DLZ4_FREESTANDING=1 \
+     -DLZ4_FAST_DEC_LOOP=1

-obj-$(CONFIG_LZ4_COMPRESS) += lz4_compress.o
-obj-$(CONFIG_LZ4HC_COMPRESS) += lz4hc_compress.o
-obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o
+obj-y += lz4.o lz4hc.o
+
+obj-$(CONFIG_ARM64) += $(addprefix lz4armv8/, lz4accel.o lz4armv8.o)
--- a/lib/lz4/lz4.c
+++ b/lib/lz4/lz4.c
--- a/lib/lz4/lz4.h
+++ b/lib/lz4/lz4.h
@@ -0,0 +1,984 @@
+/*
+ *  LZ4 - Fast LZ compression algorithm
+ *  Header File
+ *  Copyright (C) 2011-2023, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+    - LZ4 homepage : http://www.lz4.org
+    - LZ4 source repository : https://github.com/lz4/lz4
+*/
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#ifndef LZ4_H_2983827168210
+#define LZ4_H_2983827168210
+
+/**
+  Introduction
+
+  LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
+  scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
+  multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
+
+  The LZ4 compression library provides in-memory compression and decompression functions.
+  It gives full buffer control to user.
+  Compression can be done in:
+    - a single step (described as Simple Functions)
+    - a single step, reusing a context (described in Advanced Functions)
+    - unbounded multiple steps (described as Streaming compression)
+
+  lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
+  Decompressing such a compressed block requires additional metadata.
+  Exact metadata depends on exact decompression function.
+  For the typical case of LZ4_decompress_safe(),
+  metadata includes block's compressed size, and maximum bound of decompressed size.
+  Each application is free to encode and pass such metadata in whichever way it wants.
+
+  lz4.h only handle blocks, it can not generate Frames.
+
+  Blocks are different from Frames (doc/lz4_Frame_format.md).
+  Frames bundle both blocks and metadata in a specified manner.
+  Embedding metadata is required for compressed data to be self-contained and portable.
+  Frame format is delivered through a companion API, declared in lz4frame.h.
+  The `lz4` CLI can only manage frames.
+*/
+
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/string.h>
+
+#include "lz4armv8/lz4accel.h"
+
+#define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
+
+/*^***************************************************************
+*  Export parameters
+*****************************************************************/
+/*
+*  LZ4_DLL_EXPORT :
+*  Enable exporting of functions when building a Windows DLL
+*  LZ4LIB_VISIBILITY :
+*  Control library symbols visibility.
+*/
+#ifndef LZ4LIB_VISIBILITY
+#if defined(__GNUC__) && (__GNUC__ >= 4)
+#define LZ4LIB_VISIBILITY __attribute__((visibility("default")))
+#else
+#define LZ4LIB_VISIBILITY
+#endif
+#endif
+#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT == 1)
+#define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY
+#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT == 1)
+#define LZ4LIB_API                                                             \
+	__declspec(dllimport)                                                  \
+		LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
+#else
+#define LZ4LIB_API LZ4LIB_VISIBILITY
+#endif
+
+/*-************************************
+*  Reading and writing into memory
+**************************************/
+
+/**
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
+ * environments, the compiler can't assume the implementation of memcpy() is
+ * standard compliant, so it can't apply its specialized memcpy() inlining
+ * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
+ * memcpy() as if it were standard compliant, so it can inline it in freestanding
+ * environments. This is needed when decompressing the Linux Kernel, for example.
+ */
+#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
+#define LZ4_memset(dst, src, size) __builtin_memset(dst, src, size)
+#define LZ4_memmove(dst, src, size) __builtin_memmove(dst, src, size)
+
+/*! LZ4_FREESTANDING :
+ *  When this macro is set to 1, it enables "freestanding mode" that is
+ *  suitable for typical freestanding environment which doesn't support
+ *  standard C library.
+ *
+ *  - LZ4_FREESTANDING is a compile-time switch.
+ *  - It requires the following macros to be defined:
+ *    LZ4_memcpy, LZ4_memmove, LZ4_memset.
+ *  - It only enables LZ4/HC functions which don't use heap.
+ *    All LZ4F_* functions are not supported.
+ *  - See tests/freestanding.c to check its basic setup.
+ */
+#if defined(LZ4_FREESTANDING) && (LZ4_FREESTANDING == 1)
+#define LZ4_HEAPMODE 0
+#define LZ4HC_HEAPMODE 0
+#define LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION 1
+#if !defined(LZ4_memcpy)
+#error "LZ4_FREESTANDING requires macro 'LZ4_memcpy'."
+#endif
+#if !defined(LZ4_memset)
+#error "LZ4_FREESTANDING requires macro 'LZ4_memset'."
+#endif
+#if !defined(LZ4_memmove)
+#error "LZ4_FREESTANDING requires macro 'LZ4_memmove'."
+#endif
+#elif !defined(LZ4_FREESTANDING)
+#define LZ4_FREESTANDING 0
+#endif
+
+/*------   Version   ------*/
+#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes  */
+#define LZ4_VERSION_MINOR 10 /* for new (non-breaking) interface capabilities */
+#define LZ4_VERSION_RELEASE 0 /* for tweaks, bug-fixes, or development */
+
+#define LZ4_VERSION_NUMBER                                                     \
+	(LZ4_VERSION_MAJOR * 100 * 100 + LZ4_VERSION_MINOR * 100 +             \
+	 LZ4_VERSION_RELEASE)
+
+#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
+#define LZ4_QUOTE(str) #str
+#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str)
+#define LZ4_VERSION_STRING                                                     \
+	LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) /* requires v1.7.3+ */
+
+LZ4LIB_API int LZ4_versionNumber(
+	void); /**< library version number; useful to check dll version; requires v1.3.0+ */
+LZ4LIB_API const char *LZ4_versionString(
+	void); /**< library version string; useful to check dll version; requires v1.7.5+ */
+
+/*-************************************
+*  Tuning memory usage
+**************************************/
+/*!
+ * LZ4_MEMORY_USAGE :
+ * Can be selected at compile time, by setting LZ4_MEMORY_USAGE.
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB)
+ * Increasing memory usage improves compression ratio, generally at the cost of speed.
+ * Reduced memory usage may improve speed at the cost of ratio, thanks to better cache locality.
+ * Default value is 14, for 16KB, which nicely fits into most L1 caches.
+ */
+#ifndef LZ4_MEMORY_USAGE
+#define LZ4_MEMORY_USAGE LZ4_MEMORY_USAGE_DEFAULT
+#endif
+
+/* These are absolute limits, they should not be changed by users */
+#define LZ4_MEMORY_USAGE_MIN 10
+#define LZ4_MEMORY_USAGE_DEFAULT 14
+#define LZ4_MEMORY_USAGE_MAX 20
+
+#if (LZ4_MEMORY_USAGE < LZ4_MEMORY_USAGE_MIN)
+#error "LZ4_MEMORY_USAGE is too small !"
+#endif
+
+#if (LZ4_MEMORY_USAGE > LZ4_MEMORY_USAGE_MAX)
+#error "LZ4_MEMORY_USAGE is too large !"
+#endif
+
+/*
+ * LZ4_ACCELERATION_DEFAULT :
+ * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
+ */
+#define LZ4_ACCELERATION_DEFAULT 1
+/*
+ * LZ4_ACCELERATION_MAX :
+ * Any "acceleration" value higher than this threshold
+ * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
+ */
+#define LZ4_ACCELERATION_MAX 65537
+
+/*-************************************
+*  Simple Functions
+**************************************/
+/*! LZ4_compress_default() :
+ *  Compresses 'srcSize' bytes from buffer 'src'
+ *  into already allocated 'dst' buffer of size 'dstCapacity'.
+ *  Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
+ *  It also runs faster, so it's a recommended setting.
+ *  If the function cannot compress 'src' into a more limited 'dst' budget,
+ *  compression stops *immediately*, and the function result is zero.
+ *  In which case, 'dst' content is undefined (invalid).
+ *      srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
+ *      dstCapacity : size of buffer 'dst' (which must be already allocated)
+ *     @return  : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
+ *                or 0 if compression fails
+ * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
+ */
+LZ4LIB_API int LZ4_compress_default(const char *src, char *dst, int srcSize,
+				    int dstCapacity, void *wrkmem);
+
+/*! LZ4_decompress_safe() :
+ * @compressedSize : is the exact complete size of the compressed block.
+ * @dstCapacity : is the size of destination buffer (which must be already allocated),
+ *                presumed an upper bound of decompressed size.
+ * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+ *           If destination buffer is not large enough, decoding will stop and output an error code (negative value).
+ *           If the source stream is detected malformed, the function will stop decoding and return a negative result.
+ * Note 1 : This function is protected against malicious data packets :
+ *          it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
+ *          even if the compressed block is maliciously modified to order the decoder to do these actions.
+ *          In such case, the decoder stops immediately, and considers the compressed block malformed.
+ * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
+ *          The implementation is free to send / store / derive this information in whichever way is most beneficial.
+ *          If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
+ */
+LZ4LIB_API int LZ4_decompress_safe(const char *src, char *dst,
+				   int compressedSize, int dstCapacity);
+
+/*-************************************
+*  Advanced Functions
+**************************************/
+#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */
+#define LZ4_COMPRESSBOUND(isize)                                               \
+	((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ?                    \
+		 0 :                                                           \
+		 (isize) + ((isize) / 255) + 16)
+
+/*! LZ4_compressBound() :
+    Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible)
+    This function is primarily useful for memory allocation purposes (destination buffer size).
+    Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example).
+    Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize)
+        inputSize  : max supported value is LZ4_MAX_INPUT_SIZE
+        return : maximum output size in a "worst case" scenario
+              or 0, if input size is incorrect (too large or negative)
+*/
+LZ4LIB_API int LZ4_compressBound(int inputSize);
+
+/*! LZ4_compress_fast() :
+    Same as LZ4_compress_default(), but allows selection of "acceleration" factor.
+    The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
+    It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
+    An acceleration value of "1" is the same as regular LZ4_compress_default()
+    Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+    Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
+*/
+LZ4LIB_API int LZ4_compress_fast(const char *src, char *dst, int srcSize,
+				 int dstCapacity, int acceleration, void *wrkmem);
+
+/*! LZ4_compress_fast_extState() :
+ *  Same as LZ4_compress_fast(), using an externally allocated memory space for its state.
+ *  Use LZ4_sizeofState() to know how much memory must be allocated,
+ *  and allocate it on 8-bytes boundaries (using `malloc()` typically).
+ *  Then, provide this buffer as `void* state` to compression function.
+ */
+LZ4LIB_API int LZ4_sizeofState(void);
+LZ4LIB_API int LZ4_compress_fast_extState(void *state, const char *src,
+					  char *dst, int srcSize,
+					  int dstCapacity, int acceleration);
+
+/*! LZ4_compress_destSize() :
+ *  Reverse the logic : compresses as much data as possible from 'src' buffer
+ *  into already allocated buffer 'dst', of size >= 'dstCapacity'.
+ *  This function either compresses the entire 'src' content into 'dst' if it's large enough,
+ *  or fill 'dst' buffer completely with as much data as possible from 'src'.
+ *  note: acceleration parameter is fixed to "default".
+ *
+ * *srcSizePtr : in+out parameter. Initially contains size of input.
+ *               Will be modified to indicate how many bytes where read from 'src' to fill 'dst'.
+ *               New value is necessarily <= input value.
+ * @return : Nb bytes written into 'dst' (necessarily <= dstCapacity)
+ *           or 0 if compression fails.
+ *
+ * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed in v1.9.2+):
+ *        the produced compressed content could, in specific circumstances,
+ *        require to be decompressed into a destination buffer larger
+ *        by at least 1 byte than the content to decompress.
+ *        If an application uses `LZ4_compress_destSize()`,
+ *        it's highly recommended to update liblz4 to v1.9.2 or better.
+ *        If this can't be done or ensured,
+ *        the receiving decompression function should provide
+ *        a dstCapacity which is > decompressedSize, by at least 1 byte.
+ *        See https://github.com/lz4/lz4/issues/859 for details
+ */
+LZ4LIB_API int LZ4_compress_destSize(const char *src, char *dst,
+				     int *srcSizePtr, int targetDstSize);
+
+/*! LZ4_decompress_safe_partial() :
+ *  Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
+ *  into destination buffer 'dst' of size 'dstCapacity'.
+ *  Up to 'targetOutputSize' bytes will be decoded.
+ *  The function stops decoding on reaching this objective.
+ *  This can be useful to boost performance
+ *  whenever only the beginning of a block is required.
+ *
+ * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
+ *           If source stream is detected malformed, function returns a negative result.
+ *
+ *  Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
+ *
+ *  Note 2 : targetOutputSize must be <= dstCapacity
+ *
+ *  Note 3 : this function effectively stops decoding on reaching targetOutputSize,
+ *           so dstCapacity is kind of redundant.
+ *           This is because in older versions of this function,
+ *           decoding operation would still write complete sequences.
+ *           Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
+ *           it could write more bytes, though only up to dstCapacity.
+ *           Some "margin" used to be required for this operation to work properly.
+ *           Thankfully, this is no longer necessary.
+ *           The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
+ *
+ *  Note 4 : If srcSize is the exact size of the block,
+ *           then targetOutputSize can be any value,
+ *           including larger than the block's decompressed size.
+ *           The function will, at most, generate block's decompressed size.
+ *
+ *  Note 5 : If srcSize is _larger_ than block's compressed size,
+ *           then targetOutputSize **MUST** be <= block's decompressed size.
+ *           Otherwise, *silent corruption will occur*.
+ */
+LZ4LIB_API int LZ4_decompress_safe_partial(const char *src, char *dst,
+					   int srcSize, int targetOutputSize,
+					   int dstCapacity);
+
+/*-*********************************************
+*  Streaming Compression Functions
+***********************************************/
+typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */
+
+/*!
+ Note about RC_INVOKED
+
+ - RC_INVOKED is predefined symbol of rc.exe (the resource compiler which is part of MSVC/Visual Studio).
+   https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros
+
+ - Since rc.exe is a legacy compiler, it truncates long symbol (> 30 chars)
+   and reports warning "RC4011: identifier truncated".
+
+ - To eliminate the warning, we surround long preprocessor symbol with
+   "#if !defined(RC_INVOKED) ... #endif" block that means
+   "skip this block when rc.exe is trying to read it".
+*/
+#if !defined(                                                                  \
+	RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+LZ4LIB_API LZ4_stream_t *LZ4_createStream(void);
+LZ4LIB_API int LZ4_freeStream(LZ4_stream_t *streamPtr);
+#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */
+#endif
+
+/*! LZ4_resetStream_fast() : v1.9.0+
+ *  Use this to prepare an LZ4_stream_t for a new chain of dependent blocks
+ *  (e.g., LZ4_compress_fast_continue()).
+ *
+ *  An LZ4_stream_t must be initialized once before usage.
+ *  This is automatically done when created by LZ4_createStream().
+ *  However, should the LZ4_stream_t be simply declared on stack (for example),
+ *  it's necessary to initialize it first, using LZ4_initStream().
+ *
+ *  After init, start any new stream with LZ4_resetStream_fast().
+ *  A same LZ4_stream_t can be re-used multiple times consecutively
+ *  and compress multiple streams,
+ *  provided that it starts each new stream with LZ4_resetStream_fast().
+ *
+ *  LZ4_resetStream_fast() is much faster than LZ4_initStream(),
+ *  but is not compatible with memory regions containing garbage data.
+ *
+ *  Note: it's only useful to call LZ4_resetStream_fast()
+ *        in the context of streaming compression.
+ *        The *extState* functions perform their own resets.
+ *        Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive.
+ */
+LZ4LIB_API void LZ4_resetStream_fast(LZ4_stream_t *streamPtr);
+
+/*! LZ4_loadDict() :
+ *  Use this function to reference a static dictionary into LZ4_stream_t.
+ *  The dictionary must remain available during compression.
+ *  LZ4_loadDict() triggers a reset, so any previous data will be forgotten.
+ *  The same dictionary will have to be loaded on decompression side for successful decoding.
+ *  Dictionary are useful for better compression of small data (KB range).
+ *  While LZ4 itself accepts any input as dictionary, dictionary efficiency is also a topic.
+ *  When in doubt, employ the Zstandard's Dictionary Builder.
+ *  Loading a size of 0 is allowed, and is the same as reset.
+ * @return : loaded dictionary size, in bytes (note: only the last 64 KB are loaded)
+ */
+LZ4LIB_API int LZ4_loadDict(LZ4_stream_t *streamPtr, const char *dictionary,
+			    int dictSize);
+
+/*! LZ4_loadDictSlow() : v1.10.0+
+ *  Same as LZ4_loadDict(),
+ *  but uses a bit more cpu to reference the dictionary content more thoroughly.
+ *  This is expected to slightly improve compression ratio.
+ *  The extra-cpu cost is likely worth it if the dictionary is re-used across multiple sessions.
+ * @return : loaded dictionary size, in bytes (note: only the last 64 KB are loaded)
+ */
+LZ4LIB_API int LZ4_loadDictSlow(LZ4_stream_t *streamPtr, const char *dictionary,
+				int dictSize);
+
+/*! LZ4_attach_dictionary() : stable since v1.10.0
+ *
+ *  This allows efficient re-use of a static dictionary multiple times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a
+ *  working LZ4_stream_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references @dictionaryStream in-place.
+ *
+ *  Several assumptions are made about the state of @dictionaryStream.
+ *  Currently, only states which have been prepared by LZ4_loadDict() or
+ *  LZ4_loadDictSlow() should be expected to work.
+ *
+ *  Alternatively, the provided @dictionaryStream may be NULL,
+ *  in which case any existing dictionary stream is unset.
+ *
+ *  If a dictionary is provided, it replaces any pre-existing stream history.
+ *  The dictionary contents are the only history that can be referenced and
+ *  logically immediately precede the data compressed in the first subsequent
+ *  compression call.
+ *
+ *  The dictionary will only remain attached to the working stream through the
+ *  first compression call, at the end of which it is cleared.
+ * @dictionaryStream stream (and source buffer) must remain in-place / accessible / unchanged
+ *  through the completion of the compression session.
+ *
+ *  Note: there is no equivalent LZ4_attach_*() method on the decompression side
+ *  because there is no initialization cost, hence no need to share the cost across multiple sessions.
+ *  To decompress LZ4 blocks using dictionary, attached or not,
+ *  just employ the regular LZ4_setStreamDecode() for streaming,
+ *  or the stateless LZ4_decompress_safe_usingDict() for one-shot decompression.
+ */
+LZ4LIB_API void LZ4_attach_dictionary(LZ4_stream_t *workingStream,
+				      const LZ4_stream_t *dictionaryStream);
+
+/*! LZ4_compress_fast_continue() :
+ *  Compress 'src' content using data from previously compressed blocks, for better compression ratio.
+ * 'dst' buffer must be already allocated.
+ *  If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster.
+ *
+ * @return : size of compressed block
+ *           or 0 if there is an error (typically, cannot fit into 'dst').
+ *
+ *  Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block.
+ *           Each block has precise boundaries.
+ *           Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata.
+ *           It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together.
+ *
+ *  Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory !
+ *
+ *  Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB.
+ *           Make sure that buffers are separated, by at least one byte.
+ *           This construction ensures that each block only depends on previous block.
+ *
+ *  Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB.
+ *
+ *  Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed.
+ */
+LZ4LIB_API int LZ4_compress_fast_continue(LZ4_stream_t *streamPtr,
+					  const char *src, char *dst,
+					  int srcSize, int dstCapacity,
+					  int acceleration);
+
+/*! LZ4_saveDict() :
+ *  If last 64KB data cannot be guaranteed to remain available at its current memory location,
+ *  save it into a safer place (char* safeBuffer).
+ *  This is schematically equivalent to a memcpy() followed by LZ4_loadDict(),
+ *  but is much faster, because LZ4_saveDict() doesn't need to rebuild tables.
+ * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error.
+ */
+LZ4LIB_API int LZ4_saveDict(LZ4_stream_t *streamPtr, char *safeBuffer,
+			    int maxDictSize);
+
+/*-**********************************************
+*  Streaming Decompression Functions
+*  Bufferless synchronous API
+************************************************/
+typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */
+
+/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() :
+ *  creation / destruction of streaming decompression tracking context.
+ *  A tracking context can be re-used multiple times.
+ */
+#if !defined(                                                                  \
+	RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+LZ4LIB_API LZ4_streamDecode_t *LZ4_createStreamDecode(void);
+LZ4LIB_API int LZ4_freeStreamDecode(LZ4_streamDecode_t *LZ4_stream);
+#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */
+#endif
+
+/*! LZ4_setStreamDecode() :
+ *  An LZ4_streamDecode_t context can be allocated once and re-used multiple times.
+ *  Use this function to start decompression of a new stream of blocks.
+ *  A dictionary can optionally be set. Use NULL or size 0 for a reset order.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during next decompression.
+ * @return : 1 if OK, 0 if error
+ */
+LZ4LIB_API int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
+				   const char *dictionary, int dictSize);
+
+/*! LZ4_decoderRingBufferSize() : v1.8.2+
+ *  Note : in a ring buffer scenario (optional),
+ *  blocks are presumed decompressed next to each other
+ *  up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize),
+ *  at which stage it resumes from beginning of ring buffer.
+ *  When setting such a ring buffer for streaming decompression,
+ *  provides the minimum size of this ring buffer
+ *  to be compatible with any source respecting maxBlockSize condition.
+ * @return : minimum ring buffer size,
+ *           or 0 if there is an error (invalid maxBlockSize).
+ */
+LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize);
+#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize)                             \
+	(65536 + 14 +                                                          \
+	 (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */
+
+/*! LZ4_decompress_safe_continue() :
+ *  This decoding function allows decompression of consecutive blocks in "streaming" mode.
+ *  The difference with the usual independent blocks is that
+ *  new blocks are allowed to find references into former blocks.
+ *  A block is an unsplittable entity, and must be presented entirely to the decompression function.
+ *  LZ4_decompress_safe_continue() only accepts one block at a time.
+ *  It's modeled after `LZ4_decompress_safe()` and behaves similarly.
+ *
+ * @LZ4_streamDecode : decompression state, tracking the position in memory of past data
+ * @compressedSize : exact complete size of one compressed block.
+ * @dstCapacity : size of destination buffer (which must be already allocated),
+ *                must be an upper bound of decompressed size.
+ * @return : number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
+ *           If destination buffer is not large enough, decoding will stop and output an error code (negative value).
+ *           If the source stream is detected malformed, the function will stop decoding and return a negative result.
+ *
+ *  The last 64KB of previously decoded data *must* remain available and unmodified
+ *  at the memory position where they were previously decoded.
+ *  If less than 64KB of data has been decoded, all the data must be present.
+ *
+ *  Special : if decompression side sets a ring buffer, it must respect one of the following conditions :
+ *  - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize).
+ *    maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized.
+ *    Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize.
+ *  - Synchronized mode :
+ *    Decompression buffer size is _exactly_ the same as compression buffer size,
+ *    and follows exactly same update rule (block boundaries at same positions),
+ *    and decoding function is provided with exact decompressed size of each block (exception for last block of the stream),
+ *    _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *  - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes.
+ *    In which case, encoding and decoding buffers do not need to be synchronized,
+ *    and encoding ring buffer can have any size, including small ones ( < 64 KB).
+ *
+ *  Whenever these conditions are not possible,
+ *  save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression,
+ *  then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block.
+*/
+LZ4LIB_API int
+LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+			     const char *src, char *dst, int srcSize,
+			     int dstCapacity);
+
+LZ4LIB_API ssize_t LZ4_arm64_decompress_safe_partial(const void *source,
+						     void *dest,
+						     size_t inputSize,
+						     size_t outputSize,
+						     bool dip);
+
+LZ4LIB_API ssize_t LZ4_arm64_decompress_safe(const void *source, void *dest,
+					     size_t inputSize,
+					     size_t outputSize, bool dip);
+
+/*! LZ4_decompress_safe_usingDict() :
+ *  Works the same as
+ *  a combination of LZ4_setStreamDecode() followed by LZ4_decompress_safe_continue()
+ *  However, it's stateless: it doesn't need any LZ4_streamDecode_t state.
+ *  Dictionary is presumed stable : it must remain accessible and unmodified during decompression.
+ *  Performance tip : Decompression speed can be substantially increased
+ *                    when dst == dictStart + dictSize.
+ */
+LZ4LIB_API int LZ4_decompress_safe_usingDict(const char *src, char *dst,
+					     int srcSize, int dstCapacity,
+					     const char *dictStart,
+					     int dictSize);
+
+/*! LZ4_decompress_safe_partial_usingDict() :
+ *  Behaves the same as LZ4_decompress_safe_partial()
+ *  with the added ability to specify a memory segment for past data.
+ *  Performance tip : Decompression speed can be substantially increased
+ *                    when dst == dictStart + dictSize.
+ */
+LZ4LIB_API int LZ4_decompress_safe_partial_usingDict(
+	const char *src, char *dst, int compressedSize, int targetOutputSize,
+	int maxOutputSize, const char *dictStart, int dictSize);
+
+#endif /* LZ4_H_2983827168210 */
+
+/*^*************************************
+ * !!!!!!   STATIC LINKING ONLY   !!!!!!
+ ***************************************/
+
+/*-****************************************************************************
+ * Experimental section
+ *
+ * Symbols declared in this section must be considered unstable. Their
+ * signatures or semantics may change, or they may be removed altogether in the
+ * future. They are therefore only safe to depend on when the caller is
+ * statically linked against the library.
+ *
+ * To protect against unsafe usage, not only are the declarations guarded,
+ * the definitions are hidden by default
+ * when building LZ4 as a shared/dynamic library.
+ *
+ * In order to access these declarations,
+ * define LZ4_STATIC_LINKING_ONLY in your application
+ * before including LZ4's headers.
+ *
+ * In order to make their implementations accessible dynamically, you must
+ * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
+ ******************************************************************************/
+
+#ifdef LZ4_STATIC_LINKING_ONLY
+
+#ifndef LZ4_STATIC_3504398509
+#define LZ4_STATIC_3504398509
+
+#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
+#define LZ4LIB_STATIC_API LZ4LIB_API
+#else
+#define LZ4LIB_STATIC_API
+#endif
+
+/*! LZ4_compress_fast_extState_fastReset() :
+ *  A variant of LZ4_compress_fast_extState().
+ *
+ *  Using this variant avoids an expensive initialization step.
+ *  It is only safe to call if the state buffer is known to be correctly initialized already
+ *  (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized").
+ *  From a high level, the difference is that
+ *  this function initializes the provided state with a call to something like LZ4_resetStream_fast()
+ *  while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream().
+ */
+LZ4LIB_STATIC_API int
+LZ4_compress_fast_extState_fastReset(void *state, const char *src, char *dst,
+				     int srcSize, int dstCapacity,
+				     int acceleration);
+
+/*! LZ4_compress_destSize_extState() : introduced in v1.10.0
+ *  Same as LZ4_compress_destSize(), but using an externally allocated state.
+ *  Also: exposes @acceleration
+ */
+int LZ4_compress_destSize_extState(void *state, const char *src, char *dst,
+				   int *srcSizePtr, int targetDstSize,
+				   int acceleration);
+
+/*! In-place compression and decompression
+ *
+ * It's possible to have input and output sharing the same buffer,
+ * for highly constrained memory environments.
+ * In both cases, it requires input to lay at the end of the buffer,
+ * and decompression to start at beginning of the buffer.
+ * Buffer size must feature some margin, hence be larger than final size.
+ *
+ * |<------------------------buffer--------------------------------->|
+ *                             |<-----------compressed data--------->|
+ * |<-----------decompressed size------------------>|
+ *                                                  |<----margin---->|
+ *
+ * This technique is more useful for decompression,
+ * since decompressed size is typically larger,
+ * and margin is short.
+ *
+ * In-place decompression will work inside any buffer
+ * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
+ * This presumes that decompressedSize > compressedSize.
+ * Otherwise, it means compression actually expanded data,
+ * and it would be more efficient to store such data with a flag indicating it's not compressed.
+ * This can happen when data is not compressible (already compressed, or encrypted).
+ *
+ * For in-place compression, margin is larger, as it must be able to cope with both
+ * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
+ * and data expansion, which can happen when input is not compressible.
+ * As a consequence, buffer size requirements are much higher,
+ * and memory savings offered by in-place compression are more limited.
+ *
+ * There are ways to limit this cost for compression :
+ * - Reduce history size, by modifying LZ4_DISTANCE_MAX.
+ *   Note that it is a compile-time constant, so all compressions will apply this limit.
+ *   Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
+ *   so it's a reasonable trick when inputs are known to be small.
+ * - Require the compressor to deliver a "maximum compressed size".
+ *   This is the `dstCapacity` parameter in `LZ4_compress*()`.
+ *   When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
+ *   in which case, the return code will be 0 (zero).
+ *   The caller must be ready for these cases to happen,
+ *   and typically design a backup scheme to send data uncompressed.
+ * The combination of both techniques can significantly reduce
+ * the amount of margin required for in-place compression.
+ *
+ * In-place compression can work in any buffer
+ * which size is >= (maxCompressedSize)
+ * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
+ * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
+ * so it's possible to reduce memory requirements by playing with them.
+ */
+
+#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize)                          \
+	(((compressedSize) >> 8) + 32)
+#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize)                   \
+	((decompressedSize) +                                                  \
+	 LZ4_DECOMPRESS_INPLACE_MARGIN(                                        \
+		 decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
+
+#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */
+#define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
+#endif
+
+#define LZ4_COMPRESS_INPLACE_MARGIN                                            \
+	(LZ4_DISTANCE_MAX +                                                    \
+	 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
+#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize)                    \
+	((maxCompressedSize) +                                                 \
+	 LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
+
+#endif /* LZ4_STATIC_3504398509 */
+#endif /* LZ4_STATIC_LINKING_ONLY */
+
+#ifndef LZ4_H_98237428734687
+#define LZ4_H_98237428734687
+
+/*-************************************************************
+ *  Private Definitions
+ **************************************************************
+ * Do not use these definitions directly.
+ * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`.
+ * Accessing members will expose user code to API and/or ABI break in future versions of the library.
+ **************************************************************/
+#define LZ4_HASHLOG (LZ4_MEMORY_USAGE - 2)
+#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE)
+#define LZ4_HASH_SIZE_U32                                                      \
+	(1 << LZ4_HASHLOG) /* required as macro for static allocation */
+
+#include <linux/types.h>
+#include <linux/limits.h>
+typedef int8_t LZ4_i8;
+typedef uint8_t LZ4_byte;
+typedef uint16_t LZ4_u16;
+typedef uint32_t LZ4_u32;
+
+/*! LZ4_stream_t :
+ *  Never ever use below internal definitions directly !
+ *  These definitions are not API/ABI safe, and may change in future versions.
+ *  If you need static allocation, declare or allocate an LZ4_stream_t object.
+**/
+
+typedef struct LZ4_stream_t_internal LZ4_stream_t_internal;
+struct LZ4_stream_t_internal {
+	LZ4_u32 hashTable[LZ4_HASH_SIZE_U32];
+	const LZ4_byte *dictionary;
+	const LZ4_stream_t_internal *dictCtx;
+	LZ4_u32 currentOffset;
+	LZ4_u32 tableType;
+	LZ4_u32 dictSize;
+	/* Implicit padding to ensure structure is aligned */
+};
+
+#define LZ4_STREAM_MINSIZE                                                     \
+	((1UL << (LZ4_MEMORY_USAGE)) +                                         \
+	 32) /* static size, for inter-version compatibility */
+union LZ4_stream_u {
+	char minStateSize[LZ4_STREAM_MINSIZE];
+	LZ4_stream_t_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_stream_t */
+
+/*! LZ4_initStream() : v1.9.0+
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is automatically done when invoking LZ4_createStream(),
+ *  but it's not when the structure is simply declared on stack (for example).
+ *
+ *  Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t.
+ *  It can also initialize any arbitrary buffer of sufficient size,
+ *  and will @return a pointer of proper type upon initialization.
+ *
+ *  Note : initialization fails if size and alignment conditions are not respected.
+ *         In which case, the function will @return NULL.
+ *  Note2: An LZ4_stream_t structure guarantees correct alignment and size.
+ *  Note3: Before v1.9.0, use LZ4_resetStream() instead
+**/
+LZ4LIB_API LZ4_stream_t *LZ4_initStream(void *stateBuffer, size_t size);
+
+/*! LZ4_streamDecode_t :
+ *  Never ever use below internal definitions directly !
+ *  These definitions are not API/ABI safe, and may change in future versions.
+ *  If you need static allocation, declare or allocate an LZ4_streamDecode_t object.
+**/
+typedef struct {
+	const LZ4_byte *externalDict;
+	const LZ4_byte *prefixEnd;
+	size_t extDictSize;
+	size_t prefixSize;
+} LZ4_streamDecode_t_internal;
+
+#define LZ4_STREAMDECODE_MINSIZE 32
+union LZ4_streamDecode_u {
+	char minStateSize[LZ4_STREAMDECODE_MINSIZE];
+	LZ4_streamDecode_t_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_streamDecode_t */
+
+/*-************************************
+*  Obsolete Functions
+**************************************/
+
+/*! Deprecation warnings
+ *
+ *  Deprecated functions make the compiler generate a warning when invoked.
+ *  This is meant to invite users to update their source code.
+ *  Should deprecation warnings be a problem, it is generally possible to disable them,
+ *  typically with -Wno-deprecated-declarations for gcc
+ *  or _CRT_SECURE_NO_WARNINGS in Visual.
+ *
+ *  Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS
+ *  before including the header file.
+ */
+#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS
+#define LZ4_DEPRECATED(message) /* disable deprecation warnings */
+#else
+#if defined(__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */
+#define LZ4_DEPRECATED(message) [[deprecated(message)]]
+#elif defined(_MSC_VER)
+#define LZ4_DEPRECATED(message) __declspec(deprecated(message))
+#elif defined(__clang__) ||                                                    \
+	(defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45))
+#define LZ4_DEPRECATED(message) __attribute__((deprecated(message)))
+#elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31)
+#define LZ4_DEPRECATED(message) __attribute__((deprecated))
+#else
+#pragma message(                                                               \
+	"WARNING: LZ4_DEPRECATED needs custom implementation for this compiler")
+#define LZ4_DEPRECATED(message) /* disabled */
+#endif
+#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
+
+/*! Obsolete compression functions (since v1.7.3) */
+LZ4_DEPRECATED("use LZ4_compress_default() instead")
+LZ4LIB_API int LZ4_compress(const char *src, char *dest, int srcSize);
+LZ4_DEPRECATED("use LZ4_compress_default() instead")
+LZ4LIB_API int LZ4_compress_limitedOutput(const char *src, char *dest,
+					  int srcSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead")
+LZ4LIB_API int LZ4_compress_withState(void *state, const char *source,
+				      char *dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead")
+LZ4LIB_API int LZ4_compress_limitedOutput_withState(void *state,
+						    const char *source,
+						    char *dest, int inputSize,
+						    int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead")
+LZ4LIB_API int LZ4_compress_continue(LZ4_stream_t *LZ4_streamPtr,
+				     const char *source, char *dest,
+				     int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead")
+LZ4LIB_API int LZ4_compress_limitedOutput_continue(LZ4_stream_t *LZ4_streamPtr,
+						   const char *source,
+						   char *dest, int inputSize,
+						   int maxOutputSize);
+
+/*! Obsolete decompression functions (since v1.8.0) */
+LZ4_DEPRECATED("use LZ4_decompress_fast() instead")
+LZ4LIB_API int LZ4_uncompress(const char *source, char *dest, int outputSize);
+LZ4_DEPRECATED("use LZ4_decompress_safe() instead")
+LZ4LIB_API int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
+						int isize, int maxOutputSize);
+
+/* Obsolete streaming functions (since v1.7.0)
+ * degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, they don't
+ * actually retain any history between compression calls. The compression ratio
+ * achieved will therefore be no better than compressing each chunk
+ * independently.
+ */
+LZ4_DEPRECATED("Use LZ4_createStream() instead")
+LZ4LIB_API void *LZ4_create(char *inputBuffer);
+LZ4_DEPRECATED("Use LZ4_createStream() instead")
+LZ4LIB_API int LZ4_sizeofStreamState(void);
+LZ4_DEPRECATED("Use LZ4_resetStream() instead")
+LZ4LIB_API int LZ4_resetStreamState(void *state, char *inputBuffer);
+LZ4_DEPRECATED("Use LZ4_saveDict() instead")
+LZ4LIB_API char *LZ4_slideInputBuffer(void *state);
+
+/*! Obsolete streaming decoding functions (since v1.7.0) */
+LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead")
+LZ4LIB_API int LZ4_decompress_safe_withPrefix64k(const char *src, char *dst,
+						 int compressedSize,
+						 int maxDstSize);
+LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead")
+LZ4LIB_API int LZ4_decompress_fast_withPrefix64k(const char *src, char *dst,
+						 int originalSize);
+
+/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) :
+ *  These functions used to be faster than LZ4_decompress_safe(),
+ *  but this is no longer the case. They are now slower.
+ *  This is because LZ4_decompress_fast() doesn't know the input size,
+ *  and therefore must progress more cautiously into the input buffer to not read beyond the end of block.
+ *  On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability.
+ *  As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated.
+ *
+ *  The last remaining LZ4_decompress_fast() specificity is that
+ *  it can decompress a block without knowing its compressed size.
+ *  Such functionality can be achieved in a more secure manner
+ *  by employing LZ4_decompress_safe_partial().
+ *
+ *  Parameters:
+ *  originalSize : is the uncompressed size to regenerate.
+ *                 `dst` must be already allocated, its size must be >= 'originalSize' bytes.
+ * @return : number of bytes read from source buffer (== compressed size).
+ *           The function expects to finish at block's end exactly.
+ *           If the source stream is detected malformed, the function stops decoding and returns a negative result.
+ *  note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer.
+ *         However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds.
+ *         Also, since match offsets are not validated, match reads from 'src' may underflow too.
+ *         These issues never happen if input (compressed) data is correct.
+ *         But they may happen if input data is invalid (error or intentional tampering).
+ *         As a consequence, use these functions in trusted environments with trusted data **only**.
+ */
+LZ4_DEPRECATED(
+	"This function is deprecated and unsafe. Consider using LZ4_decompress_safe_partial() instead")
+LZ4LIB_API int LZ4_decompress_fast(const char *src, char *dst,
+				   int originalSize);
+LZ4_DEPRECATED(
+	"This function is deprecated and unsafe. Consider migrating towards LZ4_decompress_safe_continue() instead. "
+	"Note that the contract will change (requires block's compressed size, instead of decompressed size)")
+LZ4LIB_API int
+LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
+			     const char *src, char *dst, int originalSize);
+LZ4_DEPRECATED(
+	"This function is deprecated and unsafe. Consider using LZ4_decompress_safe_partial_usingDict() instead")
+LZ4LIB_API int LZ4_decompress_fast_usingDict(const char *src, char *dst,
+					     int originalSize,
+					     const char *dictStart,
+					     int dictSize);
+
+/*! LZ4_resetStream() :
+ *  An LZ4_stream_t structure must be initialized at least once.
+ *  This is done with LZ4_initStream(), or LZ4_resetStream().
+ *  Consider switching to LZ4_initStream(),
+ *  invoking LZ4_resetStream() will trigger deprecation warnings in the future.
+ */
+LZ4LIB_API void LZ4_resetStream(LZ4_stream_t *streamPtr);
+
+#endif /* LZ4_H_98237428734687 */
+
+#if defined(__cplusplus)
+}
+#endif
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -1,940 +0,0 @@
-/*
- * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011 - 2016, Yann Collet.
- * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *	* Redistributions of source code must retain the above copyright
- *	  notice, this list of conditions and the following disclaimer.
- *	* Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * You can contact the author at :
- *	- LZ4 homepage : http://www.lz4.org
- *	- LZ4 source repository : https://github.com/lz4/lz4
- *
- *	Changed for kernel usage by:
- *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
- */
-
-/*-************************************
- *	Dependencies
- **************************************/
-#include <linux/lz4.h>
-#include "lz4defs.h"
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <asm/unaligned.h>
-
-static const int LZ4_minLength = (MFLIMIT + 1);
-static const int LZ4_64Klimit = ((64 * KB) + (MFLIMIT - 1));
-
-/*-******************************
- *	Compression functions
- ********************************/
-static FORCE_INLINE U32 LZ4_hash4(
-	U32 sequence,
-	tableType_t const tableType)
-{
-	if (tableType == byU16)
-		return ((sequence * 2654435761U)
-			>> ((MINMATCH * 8) - (LZ4_HASHLOG + 1)));
-	else
-		return ((sequence * 2654435761U)
-			>> ((MINMATCH * 8) - LZ4_HASHLOG));
-}
-
-static FORCE_INLINE U32 LZ4_hash5(
-	U64 sequence,
-	tableType_t const tableType)
-{
-	const U32 hashLog = (tableType == byU16)
-		? LZ4_HASHLOG + 1
-		: LZ4_HASHLOG;
-
-#if LZ4_LITTLE_ENDIAN
-	static const U64 prime5bytes = 889523592379ULL;
-
-	return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
-#else
-	static const U64 prime8bytes = 11400714785074694791ULL;
-
-	return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
-#endif
-}
-
-static FORCE_INLINE U32 LZ4_hashPosition(
-	const void *p,
-	tableType_t const tableType)
-{
-#if LZ4_ARCH64
-	if (tableType == byU32)
-		return LZ4_hash5(LZ4_read_ARCH(p), tableType);
-#endif
-
-	return LZ4_hash4(LZ4_read32(p), tableType);
-}
-
-static void LZ4_putPositionOnHash(
-	const BYTE *p,
-	U32 h,
-	void *tableBase,
-	tableType_t const tableType,
-	const BYTE *srcBase)
-{
-	switch (tableType) {
-	case byPtr:
-	{
-		const BYTE **hashTable = (const BYTE **)tableBase;
-
-		hashTable[h] = p;
-		return;
-	}
-	case byU32:
-	{
-		U32 *hashTable = (U32 *) tableBase;
-
-		hashTable[h] = (U32)(p - srcBase);
-		return;
-	}
-	case byU16:
-	{
-		U16 *hashTable = (U16 *) tableBase;
-
-		hashTable[h] = (U16)(p - srcBase);
-		return;
-	}
-	}
-}
-
-static FORCE_INLINE void LZ4_putPosition(
-	const BYTE *p,
-	void *tableBase,
-	tableType_t tableType,
-	const BYTE *srcBase)
-{
-	U32 const h = LZ4_hashPosition(p, tableType);
-
-	LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
-}
-
-static const BYTE *LZ4_getPositionOnHash(
-	U32 h,
-	void *tableBase,
-	tableType_t tableType,
-	const BYTE *srcBase)
-{
-	if (tableType == byPtr) {
-		const BYTE **hashTable = (const BYTE **) tableBase;
-
-		return hashTable[h];
-	}
-
-	if (tableType == byU32) {
-		const U32 * const hashTable = (U32 *) tableBase;
-
-		return hashTable[h] + srcBase;
-	}
-
-	{
-		/* default, to ensure a return */
-		const U16 * const hashTable = (U16 *) tableBase;
-
-		return hashTable[h] + srcBase;
-	}
-}
-
-static FORCE_INLINE const BYTE *LZ4_getPosition(
-	const BYTE *p,
-	void *tableBase,
-	tableType_t tableType,
-	const BYTE *srcBase)
-{
-	U32 const h = LZ4_hashPosition(p, tableType);
-
-	return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
-}
-
-
-/*
- * LZ4_compress_generic() :
- * inlined, to ensure branches are decided at compilation time
- */
-static FORCE_INLINE int LZ4_compress_generic(
-	LZ4_stream_t_internal * const dictPtr,
-	const char * const source,
-	char * const dest,
-	const int inputSize,
-	const int maxOutputSize,
-	const limitedOutput_directive outputLimited,
-	const tableType_t tableType,
-	const dict_directive dict,
-	const dictIssue_directive dictIssue,
-	const U32 acceleration)
-{
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE *base;
-	const BYTE *lowLimit;
-	const BYTE * const lowRefLimit = ip - dictPtr->dictSize;
-	const BYTE * const dictionary = dictPtr->dictionary;
-	const BYTE * const dictEnd = dictionary + dictPtr->dictSize;
-	const size_t dictDelta = dictEnd - (const BYTE *)source;
-	const BYTE *anchor = (const BYTE *) source;
-	const BYTE * const iend = ip + inputSize;
-	const BYTE * const mflimit = iend - MFLIMIT;
-	const BYTE * const matchlimit = iend - LASTLITERALS;
-
-	BYTE *op = (BYTE *) dest;
-	BYTE * const olimit = op + maxOutputSize;
-
-	U32 forwardH;
-	size_t refDelta = 0;
-
-	/* Init conditions */
-	if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) {
-		/* Unsupported inputSize, too large (or negative) */
-		return 0;
-	}
-
-	switch (dict) {
-	case noDict:
-	default:
-		base = (const BYTE *)source;
-		lowLimit = (const BYTE *)source;
-		break;
-	case withPrefix64k:
-		base = (const BYTE *)source - dictPtr->currentOffset;
-		lowLimit = (const BYTE *)source - dictPtr->dictSize;
-		break;
-	case usingExtDict:
-		base = (const BYTE *)source - dictPtr->currentOffset;
-		lowLimit = (const BYTE *)source;
-		break;
-	}
-
-	if ((tableType == byU16)
-		&& (inputSize >= LZ4_64Klimit)) {
-		/* Size too large (not within 64K limit) */
-		return 0;
-	}
-
-	if (inputSize < LZ4_minLength) {
-		/* Input too small, no compression (all literals) */
-		goto _last_literals;
-	}
-
-	/* First Byte */
-	LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
-	ip++;
-	forwardH = LZ4_hashPosition(ip, tableType);
-
-	/* Main Loop */
-	for ( ; ; ) {
-		const BYTE *match;
-		BYTE *token;
-
-		/* Find a match */
-		{
-			const BYTE *forwardIp = ip;
-			unsigned int step = 1;
-			unsigned int searchMatchNb = acceleration << LZ4_SKIPTRIGGER;
-
-			do {
-				U32 const h = forwardH;
-
-				ip = forwardIp;
-				forwardIp += step;
-				step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);
-
-				if (unlikely(forwardIp > mflimit))
-					goto _last_literals;
-
-				match = LZ4_getPositionOnHash(h,
-					dictPtr->hashTable,
-					tableType, base);
-
-				if (dict == usingExtDict) {
-					if (match < (const BYTE *)source) {
-						refDelta = dictDelta;
-						lowLimit = dictionary;
-					} else {
-						refDelta = 0;
-						lowLimit = (const BYTE *)source;
-				}	 }
-
-				forwardH = LZ4_hashPosition(forwardIp,
-					tableType);
-
-				LZ4_putPositionOnHash(ip, h, dictPtr->hashTable,
-					tableType, base);
-			} while (((dictIssue == dictSmall)
-					? (match < lowRefLimit)
-					: 0)
-				|| ((tableType == byU16)
-					? 0
-					: (match + MAX_DISTANCE < ip))
-				|| (LZ4_read32(match + refDelta)
-					!= LZ4_read32(ip)));
-		}
-
-		/* Catch up */
-		while (((ip > anchor) & (match + refDelta > lowLimit))
-				&& (unlikely(ip[-1] == match[refDelta - 1]))) {
-			ip--;
-			match--;
-		}
-
-		/* Encode Literals */
-		{
-			unsigned const int litLength = (unsigned int)(ip - anchor);
-
-			token = op++;
-
-			if ((outputLimited) &&
-				/* Check output buffer overflow */
-				(unlikely(op + litLength +
-					(2 + 1 + LASTLITERALS) +
-					(litLength / 255) > olimit)))
-				return 0;
-
-			if (litLength >= RUN_MASK) {
-				int len = (int)litLength - RUN_MASK;
-
-				*token = (RUN_MASK << ML_BITS);
-
-				for (; len >= 255; len -= 255)
-					*op++ = 255;
-				*op++ = (BYTE)len;
-			} else
-				*token = (BYTE)(litLength << ML_BITS);
-
-			/* Copy Literals */
-			LZ4_wildCopy(op, anchor, op + litLength);
-			op += litLength;
-		}
-
-_next_match:
-		/* Encode Offset */
-		LZ4_writeLE16(op, (U16)(ip - match));
-		op += 2;
-
-		/* Encode MatchLength */
-		{
-			unsigned int matchCode;
-
-			if ((dict == usingExtDict)
-				&& (lowLimit == dictionary)) {
-				const BYTE *limit;
-
-				match += refDelta;
-				limit = ip + (dictEnd - match);
-
-				if (limit > matchlimit)
-					limit = matchlimit;
-
-				matchCode = LZ4_count(ip + MINMATCH,
-					match + MINMATCH, limit);
-
-				ip += MINMATCH + matchCode;
-
-				if (ip == limit) {
-					unsigned const int more = LZ4_count(ip,
-						(const BYTE *)source,
-						matchlimit);
-
-					matchCode += more;
-					ip += more;
-				}
-			} else {
-				matchCode = LZ4_count(ip + MINMATCH,
-					match + MINMATCH, matchlimit);
-				ip += MINMATCH + matchCode;
-			}
-
-			if (outputLimited &&
-				/* Check output buffer overflow */
-				(unlikely(op +
-					(1 + LASTLITERALS) +
-					(matchCode >> 8) > olimit)))
-				return 0;
-
-			if (matchCode >= ML_MASK) {
-				*token += ML_MASK;
-				matchCode -= ML_MASK;
-				LZ4_write32(op, 0xFFFFFFFF);
-
-				while (matchCode >= 4 * 255) {
-					op += 4;
-					LZ4_write32(op, 0xFFFFFFFF);
-					matchCode -= 4 * 255;
-				}
-
-				op += matchCode / 255;
-				*op++ = (BYTE)(matchCode % 255);
-			} else
-				*token += (BYTE)(matchCode);
-		}
-
-		anchor = ip;
-
-		/* Test end of chunk */
-		if (ip > mflimit)
-			break;
-
-		/* Fill table */
-		LZ4_putPosition(ip - 2, dictPtr->hashTable, tableType, base);
-
-		/* Test next position */
-		match = LZ4_getPosition(ip, dictPtr->hashTable,
-			tableType, base);
-
-		if (dict == usingExtDict) {
-			if (match < (const BYTE *)source) {
-				refDelta = dictDelta;
-				lowLimit = dictionary;
-			} else {
-				refDelta = 0;
-				lowLimit = (const BYTE *)source;
-			}
-		}
-
-		LZ4_putPosition(ip, dictPtr->hashTable, tableType, base);
-
-		if (((dictIssue == dictSmall) ? (match >= lowRefLimit) : 1)
-			&& (match + MAX_DISTANCE >= ip)
-			&& (LZ4_read32(match + refDelta) == LZ4_read32(ip))) {
-			token = op++;
-			*token = 0;
-			goto _next_match;
-		}
-
-		/* Prepare next loop */
-		forwardH = LZ4_hashPosition(++ip, tableType);
-	}
-
-_last_literals:
-	/* Encode Last Literals */
-	{
-		size_t const lastRun = (size_t)(iend - anchor);
-
-		if ((outputLimited) &&
-			/* Check output buffer overflow */
-			((op - (BYTE *)dest) + lastRun + 1 +
-			((lastRun + 255 - RUN_MASK) / 255) > (U32)maxOutputSize))
-			return 0;
-
-		if (lastRun >= RUN_MASK) {
-			size_t accumulator = lastRun - RUN_MASK;
-			*op++ = RUN_MASK << ML_BITS;
-			for (; accumulator >= 255; accumulator -= 255)
-				*op++ = 255;
-			*op++ = (BYTE) accumulator;
-		} else {
-			*op++ = (BYTE)(lastRun << ML_BITS);
-		}
-
-		memcpy(op, anchor, lastRun);
-
-		op += lastRun;
-	}
-
-	/* End */
-	return (int) (((char *)op) - dest);
-}
-
-static int LZ4_compress_fast_extState(
-	void *state,
-	const char *source,
-	char *dest,
-	int inputSize,
-	int maxOutputSize,
-	int acceleration)
-{
-	LZ4_stream_t_internal *ctx = &((LZ4_stream_t *)state)->internal_donotuse;
-#if LZ4_ARCH64
-	const tableType_t tableType = byU32;
-#else
-	const tableType_t tableType = byPtr;
-#endif
-
-	LZ4_resetStream((LZ4_stream_t *)state);
-
-	if (acceleration < 1)
-		acceleration = LZ4_ACCELERATION_DEFAULT;
-
-	if (maxOutputSize >= LZ4_COMPRESSBOUND(inputSize)) {
-		if (inputSize < LZ4_64Klimit)
-			return LZ4_compress_generic(ctx, source,
-				dest, inputSize, 0,
-				noLimit, byU16, noDict,
-				noDictIssue, acceleration);
-		else
-			return LZ4_compress_generic(ctx, source,
-				dest, inputSize, 0,
-				noLimit, tableType, noDict,
-				noDictIssue, acceleration);
-	} else {
-		if (inputSize < LZ4_64Klimit)
-			return LZ4_compress_generic(ctx, source,
-				dest, inputSize,
-				maxOutputSize, limitedOutput, byU16, noDict,
-				noDictIssue, acceleration);
-		else
-			return LZ4_compress_generic(ctx, source,
-				dest, inputSize,
-				maxOutputSize, limitedOutput, tableType, noDict,
-				noDictIssue, acceleration);
-	}
-}
-
-int LZ4_compress_fast(const char *source, char *dest, int inputSize,
-	int maxOutputSize, int acceleration, void *wrkmem)
-{
-	return LZ4_compress_fast_extState(wrkmem, source, dest, inputSize,
-		maxOutputSize, acceleration);
-}
-EXPORT_SYMBOL(LZ4_compress_fast);
-
-int LZ4_compress_default(const char *source, char *dest, int inputSize,
-	int maxOutputSize, void *wrkmem)
-{
-	return LZ4_compress_fast(source, dest, inputSize,
-		maxOutputSize, LZ4_ACCELERATION_DEFAULT, wrkmem);
-}
-EXPORT_SYMBOL(LZ4_compress_default);
-
-/*-******************************
- *	*_destSize() variant
- ********************************/
-static int LZ4_compress_destSize_generic(
-	LZ4_stream_t_internal * const ctx,
-	const char * const src,
-	char * const dst,
-	int * const srcSizePtr,
-	const int targetDstSize,
-	const tableType_t tableType)
-{
-	const BYTE *ip = (const BYTE *) src;
-	const BYTE *base = (const BYTE *) src;
-	const BYTE *lowLimit = (const BYTE *) src;
-	const BYTE *anchor = ip;
-	const BYTE * const iend = ip + *srcSizePtr;
-	const BYTE * const mflimit = iend - MFLIMIT;
-	const BYTE * const matchlimit = iend - LASTLITERALS;
-
-	BYTE *op = (BYTE *) dst;
-	BYTE * const oend = op + targetDstSize;
-	BYTE * const oMaxLit = op + targetDstSize - 2 /* offset */
-		- 8 /* because 8 + MINMATCH == MFLIMIT */ - 1 /* token */;
-	BYTE * const oMaxMatch = op + targetDstSize
-		- (LASTLITERALS + 1 /* token */);
-	BYTE * const oMaxSeq = oMaxLit - 1 /* token */;
-
-	U32 forwardH;
-
-	/* Init conditions */
-	/* Impossible to store anything */
-	if (targetDstSize < 1)
-		return 0;
-	/* Unsupported input size, too large (or negative) */
-	if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE)
-		return 0;
-	/* Size too large (not within 64K limit) */
-	if ((tableType == byU16) && (*srcSizePtr >= LZ4_64Klimit))
-		return 0;
-	/* Input too small, no compression (all literals) */
-	if (*srcSizePtr < LZ4_minLength)
-		goto _last_literals;
-
-	/* First Byte */
-	*srcSizePtr = 0;
-	LZ4_putPosition(ip, ctx->hashTable, tableType, base);
-	ip++; forwardH = LZ4_hashPosition(ip, tableType);
-
-	/* Main Loop */
-	for ( ; ; ) {
-		const BYTE *match;
-		BYTE *token;
-
-		/* Find a match */
-		{
-			const BYTE *forwardIp = ip;
-			unsigned int step = 1;
-			unsigned int searchMatchNb = 1 << LZ4_SKIPTRIGGER;
-
-			do {
-				U32 h = forwardH;
-
-				ip = forwardIp;
-				forwardIp += step;
-				step = (searchMatchNb++ >> LZ4_SKIPTRIGGER);
-
-				if (unlikely(forwardIp > mflimit))
-					goto _last_literals;
-
-				match = LZ4_getPositionOnHash(h, ctx->hashTable,
-					tableType, base);
-				forwardH = LZ4_hashPosition(forwardIp,
-					tableType);
-				LZ4_putPositionOnHash(ip, h,
-					ctx->hashTable, tableType,
-					base);
-
-			} while (((tableType == byU16)
-				? 0
-				: (match + MAX_DISTANCE < ip))
-				|| (LZ4_read32(match) != LZ4_read32(ip)));
-		}
-
-		/* Catch up */
-		while ((ip > anchor)
-			&& (match > lowLimit)
-			&& (unlikely(ip[-1] == match[-1]))) {
-			ip--;
-			match--;
-		}
-
-		/* Encode Literal length */
-		{
-			unsigned int litLength = (unsigned int)(ip - anchor);
-
-			token = op++;
-			if (op + ((litLength + 240) / 255)
-				+ litLength > oMaxLit) {
-				/* Not enough space for a last match */
-				op--;
-				goto _last_literals;
-			}
-			if (litLength >= RUN_MASK) {
-				unsigned int len = litLength - RUN_MASK;
-				*token = (RUN_MASK<<ML_BITS);
-				for (; len >= 255; len -= 255)
-					*op++ = 255;
-				*op++ = (BYTE)len;
-			} else
-				*token = (BYTE)(litLength << ML_BITS);
-
-			/* Copy Literals */
-			LZ4_wildCopy(op, anchor, op + litLength);
-			op += litLength;
-		}
-
-_next_match:
-		/* Encode Offset */
-		LZ4_writeLE16(op, (U16)(ip - match)); op += 2;
-
-		/* Encode MatchLength */
-		{
-			size_t matchLength = LZ4_count(ip + MINMATCH,
-			match + MINMATCH, matchlimit);
-
-			if (op + ((matchLength + 240)/255) > oMaxMatch) {
-				/* Match description too long : reduce it */
-				matchLength = (15 - 1) + (oMaxMatch - op) * 255;
-			}
-			ip += MINMATCH + matchLength;
-
-			if (matchLength >= ML_MASK) {
-				*token += ML_MASK;
-				matchLength -= ML_MASK;
-				while (matchLength >= 255) {
-					matchLength -= 255;
-					*op++ = 255;
-				}
-				*op++ = (BYTE)matchLength;
-			} else
-				*token += (BYTE)(matchLength);
-		}
-
-		anchor = ip;
-
-		/* Test end of block */
-		if (ip > mflimit)
-			break;
-		if (op > oMaxSeq)
-			break;
-
-		/* Fill table */
-		LZ4_putPosition(ip - 2, ctx->hashTable, tableType, base);
-
-		/* Test next position */
-		match = LZ4_getPosition(ip, ctx->hashTable, tableType, base);
-		LZ4_putPosition(ip, ctx->hashTable, tableType, base);
-
-		if ((match + MAX_DISTANCE >= ip)
-			&& (LZ4_read32(match) == LZ4_read32(ip))) {
-			token = op++; *token = 0;
-			goto _next_match;
-		}
-
-		/* Prepare next loop */
-		forwardH = LZ4_hashPosition(++ip, tableType);
-	}
-
-_last_literals:
-	/* Encode Last Literals */
-	{
-		size_t lastRunSize = (size_t)(iend - anchor);
-
-		if (op + 1 /* token */
-			+ ((lastRunSize + 240) / 255) /* litLength */
-			+ lastRunSize /* literals */ > oend) {
-			/* adapt lastRunSize to fill 'dst' */
-			lastRunSize	= (oend - op) - 1;
-			lastRunSize -= (lastRunSize + 240) / 255;
-		}
-		ip = anchor + lastRunSize;
-
-		if (lastRunSize >= RUN_MASK) {
-			size_t accumulator = lastRunSize - RUN_MASK;
-
-			*op++ = RUN_MASK << ML_BITS;
-			for (; accumulator >= 255; accumulator -= 255)
-				*op++ = 255;
-			*op++ = (BYTE) accumulator;
-		} else {
-			*op++ = (BYTE)(lastRunSize<<ML_BITS);
-		}
-		memcpy(op, anchor, lastRunSize);
-		op += lastRunSize;
-	}
-
-	/* End */
-	*srcSizePtr = (int) (((const char *)ip) - src);
-	return (int) (((char *)op) - dst);
-}
-
-static int LZ4_compress_destSize_extState(
-	LZ4_stream_t *state,
-	const char *src,
-	char *dst,
-	int *srcSizePtr,
-	int targetDstSize)
-{
-#if LZ4_ARCH64
-	const tableType_t tableType = byU32;
-#else
-	const tableType_t tableType = byPtr;
-#endif
-
-	LZ4_resetStream(state);
-
-	if (targetDstSize >= LZ4_COMPRESSBOUND(*srcSizePtr)) {
-		/* compression success is guaranteed */
-		return LZ4_compress_fast_extState(
-			state, src, dst, *srcSizePtr,
-			targetDstSize, 1);
-	} else {
-		if (*srcSizePtr < LZ4_64Klimit)
-			return LZ4_compress_destSize_generic(
-				&state->internal_donotuse,
-				src, dst, srcSizePtr,
-				targetDstSize, byU16);
-		else
-			return LZ4_compress_destSize_generic(
-				&state->internal_donotuse,
-				src, dst, srcSizePtr,
-				targetDstSize, tableType);
-	}
-}
-
-
-int LZ4_compress_destSize(
-	const char *src,
-	char *dst,
-	int *srcSizePtr,
-	int targetDstSize,
-	void *wrkmem)
-{
-	return LZ4_compress_destSize_extState(wrkmem, src, dst, srcSizePtr,
-		targetDstSize);
-}
-EXPORT_SYMBOL(LZ4_compress_destSize);
-
-/*-******************************
- *	Streaming functions
- ********************************/
-void LZ4_resetStream(LZ4_stream_t *LZ4_stream)
-{
-	memset(LZ4_stream, 0, sizeof(LZ4_stream_t));
-}
-
-int LZ4_loadDict(LZ4_stream_t *LZ4_dict,
-	const char *dictionary, int dictSize)
-{
-	LZ4_stream_t_internal *dict = &LZ4_dict->internal_donotuse;
-	const BYTE *p = (const BYTE *)dictionary;
-	const BYTE * const dictEnd = p + dictSize;
-	const BYTE *base;
-
-	if ((dict->initCheck)
-		|| (dict->currentOffset > 1 * GB)) {
-		/* Uninitialized structure, or reuse overflow */
-		LZ4_resetStream(LZ4_dict);
-	}
-
-	if (dictSize < (int)HASH_UNIT) {
-		dict->dictionary = NULL;
-		dict->dictSize = 0;
-		return 0;
-	}
-
-	if ((dictEnd - p) > 64 * KB)
-		p = dictEnd - 64 * KB;
-	dict->currentOffset += 64 * KB;
-	base = p - dict->currentOffset;
-	dict->dictionary = p;
-	dict->dictSize = (U32)(dictEnd - p);
-	dict->currentOffset += dict->dictSize;
-
-	while (p <= dictEnd - HASH_UNIT) {
-		LZ4_putPosition(p, dict->hashTable, byU32, base);
-		p += 3;
-	}
-
-	return dict->dictSize;
-}
-EXPORT_SYMBOL(LZ4_loadDict);
-
-static void LZ4_renormDictT(LZ4_stream_t_internal *LZ4_dict,
-	const BYTE *src)
-{
-	if ((LZ4_dict->currentOffset > 0x80000000) ||
-		((uptrval)LZ4_dict->currentOffset > (uptrval)src)) {
-		/* address space overflow */
-		/* rescale hash table */
-		U32 const delta = LZ4_dict->currentOffset - 64 * KB;
-		const BYTE *dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
-		int i;
-
-		for (i = 0; i < LZ4_HASH_SIZE_U32; i++) {
-			if (LZ4_dict->hashTable[i] < delta)
-				LZ4_dict->hashTable[i] = 0;
-			else
-				LZ4_dict->hashTable[i] -= delta;
-		}
-		LZ4_dict->currentOffset = 64 * KB;
-		if (LZ4_dict->dictSize > 64 * KB)
-			LZ4_dict->dictSize = 64 * KB;
-		LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
-	}
-}
-
-int LZ4_saveDict(LZ4_stream_t *LZ4_dict, char *safeBuffer, int dictSize)
-{
-	LZ4_stream_t_internal * const dict = &LZ4_dict->internal_donotuse;
-	const BYTE * const previousDictEnd = dict->dictionary + dict->dictSize;
-
-	if ((U32)dictSize > 64 * KB) {
-		/* useless to define a dictionary > 64 * KB */
-		dictSize = 64 * KB;
-	}
-	if ((U32)dictSize > dict->dictSize)
-		dictSize = dict->dictSize;
-
-	memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
-
-	dict->dictionary = (const BYTE *)safeBuffer;
-	dict->dictSize = (U32)dictSize;
-
-	return dictSize;
-}
-EXPORT_SYMBOL(LZ4_saveDict);
-
-int LZ4_compress_fast_continue(LZ4_stream_t *LZ4_stream, const char *source,
-	char *dest, int inputSize, int maxOutputSize, int acceleration)
-{
-	LZ4_stream_t_internal *streamPtr = &LZ4_stream->internal_donotuse;
-	const BYTE * const dictEnd = streamPtr->dictionary
-		+ streamPtr->dictSize;
-
-	const BYTE *smallest = (const BYTE *) source;
-
-	if (streamPtr->initCheck) {
-		/* Uninitialized structure detected */
-		return 0;
-	}
-
-	if ((streamPtr->dictSize > 0) && (smallest > dictEnd))
-		smallest = dictEnd;
-
-	LZ4_renormDictT(streamPtr, smallest);
-
-	if (acceleration < 1)
-		acceleration = LZ4_ACCELERATION_DEFAULT;
-
-	/* Check overlapping input/dictionary space */
-	{
-		const BYTE *sourceEnd = (const BYTE *) source + inputSize;
-
-		if ((sourceEnd > streamPtr->dictionary)
-			&& (sourceEnd < dictEnd)) {
-			streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
-			if (streamPtr->dictSize > 64 * KB)
-				streamPtr->dictSize = 64 * KB;
-			if (streamPtr->dictSize < 4)
-				streamPtr->dictSize = 0;
-			streamPtr->dictionary = dictEnd - streamPtr->dictSize;
-		}
-	}
-
-	/* prefix mode : source data follows dictionary */
-	if (dictEnd == (const BYTE *)source) {
-		int result;
-
-		if ((streamPtr->dictSize < 64 * KB) &&
-			(streamPtr->dictSize < streamPtr->currentOffset)) {
-			result = LZ4_compress_generic(
-				streamPtr, source, dest, inputSize,
-				maxOutputSize, limitedOutput, byU32,
-				withPrefix64k, dictSmall, acceleration);
-		} else {
-			result = LZ4_compress_generic(
-				streamPtr, source, dest, inputSize,
-				maxOutputSize, limitedOutput, byU32,
-				withPrefix64k, noDictIssue, acceleration);
-		}
-		streamPtr->dictSize += (U32)inputSize;
-		streamPtr->currentOffset += (U32)inputSize;
-		return result;
-	}
-
-	/* external dictionary mode */
-	{
-		int result;
-
-		if ((streamPtr->dictSize < 64 * KB) &&
-			(streamPtr->dictSize < streamPtr->currentOffset)) {
-			result = LZ4_compress_generic(
-				streamPtr, source, dest, inputSize,
-				maxOutputSize, limitedOutput, byU32,
-				usingExtDict, dictSmall, acceleration);
-		} else {
-			result = LZ4_compress_generic(
-				streamPtr, source, dest, inputSize,
-				maxOutputSize, limitedOutput, byU32,
-				usingExtDict, noDictIssue, acceleration);
-		}
-		streamPtr->dictionary = (const BYTE *)source;
-		streamPtr->dictSize = (U32)inputSize;
-		streamPtr->currentOffset += (U32)inputSize;
-		return result;
-	}
-}
-EXPORT_SYMBOL(LZ4_compress_fast_continue);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 compressor");
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -1,508 +0,0 @@
-/*
- * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011 - 2016, Yann Collet.
- * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *	* Redistributions of source code must retain the above copyright
- *	  notice, this list of conditions and the following disclaimer.
- *	* Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * You can contact the author at :
- *	- LZ4 homepage : http://www.lz4.org
- *	- LZ4 source repository : https://github.com/lz4/lz4
- *
- *	Changed for kernel usage by:
- *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
- */
-
-/*-************************************
- *	Dependencies
- **************************************/
-#include <linux/lz4.h>
-#include "lz4defs.h"
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <asm/unaligned.h>
-
-/*-*****************************
- *	Decompression functions
- *******************************/
-/* LZ4_decompress_generic() :
- * This generic decompression function cover all use cases.
- * It shall be instantiated several times, using different sets of directives
- * Note that it is important this generic function is really inlined,
- * in order to remove useless branches during compilation optimization.
- */
-static FORCE_INLINE int LZ4_decompress_generic(
-	 const char * const source,
-	 char * const dest,
-	 int inputSize,
-		/*
-		 * If endOnInput == endOnInputSize,
-		 * this value is the max size of Output Buffer.
-		 */
-	 int outputSize,
-	 /* endOnOutputSize, endOnInputSize */
-	 int endOnInput,
-	 /* full, partial */
-	 int partialDecoding,
-	 /* only used if partialDecoding == partial */
-	 int targetOutputSize,
-	 /* noDict, withPrefix64k, usingExtDict */
-	 int dict,
-	 /* == dest when no prefix */
-	 const BYTE * const lowPrefix,
-	 /* only if dict == usingExtDict */
-	 const BYTE * const dictStart,
-	 /* note : = 0 if noDict */
-	 const size_t dictSize
-	 )
-{
-	/* Local Variables */
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE * const iend = ip + inputSize;
-
-	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + outputSize;
-	BYTE *cpy;
-	BYTE *oexit = op + targetOutputSize;
-	const BYTE * const lowLimit = lowPrefix - dictSize;
-
-	const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
-	static const unsigned int dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 };
-	static const int dec64table[] = { 0, 0, 0, -1, 0, 1, 2, 3 };
-
-	const int safeDecode = (endOnInput == endOnInputSize);
-	const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
-
-	/* Special cases */
-	/* targetOutputSize too high => decode everything */
-	if ((partialDecoding) && (oexit > oend - MFLIMIT))
-		oexit = oend - MFLIMIT;
-
-	/* Empty output buffer */
-	if ((endOnInput) && (unlikely(outputSize == 0)))
-		return ((inputSize == 1) && (*ip == 0)) ? 0 : -1;
-
-	if ((!endOnInput) && (unlikely(outputSize == 0)))
-		return (*ip == 0 ? 1 : -1);
-
-	/* Main Loop : decode sequences */
-	while (1) {
-		size_t length;
-		const BYTE *match;
-		size_t offset;
-
-		/* get literal length */
-		unsigned int const token = *ip++;
-
-		length = token>>ML_BITS;
-
-		if (length == RUN_MASK) {
-			unsigned int s;
-
-			do {
-				s = *ip++;
-				length += s;
-			} while (likely(endOnInput
-				? ip < iend - RUN_MASK
-				: 1) & (s == 255));
-
-			if ((safeDecode)
-				&& unlikely(
-					(size_t)(op + length) < (size_t)(op))) {
-				/* overflow detection */
-				goto _output_error;
-			}
-			if ((safeDecode)
-				&& unlikely(
-					(size_t)(ip + length) < (size_t)(ip))) {
-				/* overflow detection */
-				goto _output_error;
-			}
-		}
-
-		/* copy literals */
-		cpy = op + length;
-		if (((endOnInput) && ((cpy > (partialDecoding ? oexit : oend - MFLIMIT))
-			|| (ip + length > iend - (2 + 1 + LASTLITERALS))))
-			|| ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
-			if (partialDecoding) {
-				if (cpy > oend) {
-					/*
-					 * Error :
-					 * write attempt beyond end of output buffer
-					 */
-					goto _output_error;
-				}
-				if ((endOnInput)
-					&& (ip + length > iend)) {
-					/*
-					 * Error :
-					 * read attempt beyond
-					 * end of input buffer
-					 */
-					goto _output_error;
-				}
-			} else {
-				if ((!endOnInput)
-					&& (cpy != oend)) {
-					/*
-					 * Error :
-					 * block decoding must
-					 * stop exactly there
-					 */
-					goto _output_error;
-				}
-				if ((endOnInput)
-					&& ((ip + length != iend)
-					|| (cpy > oend))) {
-					/*
-					 * Error :
-					 * input must be consumed
-					 */
-					goto _output_error;
-				}
-			}
-
-			memcpy(op, ip, length);
-			ip += length;
-			op += length;
-			/* Necessarily EOF, due to parsing restrictions */
-			break;
-		}
-
-		LZ4_wildCopy(op, ip, cpy);
-		ip += length;
-		op = cpy;
-
-		/* get offset */
-		offset = LZ4_readLE16(ip);
-		ip += 2;
-		match = op - offset;
-
-		if ((checkOffset) && (unlikely(match < lowLimit))) {
-			/* Error : offset outside buffers */
-			goto _output_error;
-		}
-
-		/* costs ~1%; silence an msan warning when offset == 0 */
-		LZ4_write32(op, (U32)offset);
-
-		/* get matchlength */
-		length = token & ML_MASK;
-		if (length == ML_MASK) {
-			unsigned int s;
-
-			do {
-				s = *ip++;
-
-				if ((endOnInput) && (ip > iend - LASTLITERALS))
-					goto _output_error;
-
-				length += s;
-			} while (s == 255);
-
-			if ((safeDecode)
-				&& unlikely(
-					(size_t)(op + length) < (size_t)op)) {
-				/* overflow detection */
-				goto _output_error;
-			}
-		}
-
-		length += MINMATCH;
-
-		/* check external dictionary */
-		if ((dict == usingExtDict) && (match < lowPrefix)) {
-			if (unlikely(op + length > oend - LASTLITERALS)) {
-				/* doesn't respect parsing restriction */
-				goto _output_error;
-			}
-
-			if (length <= (size_t)(lowPrefix - match)) {
-				/*
-				 * match can be copied as a single segment
-				 * from external dictionary
-				 */
-				memmove(op, dictEnd - (lowPrefix - match),
-					length);
-				op += length;
-			} else {
-				/*
-				 * match encompass external
-				 * dictionary and current block
-				 */
-				size_t const copySize = (size_t)(lowPrefix - match);
-				size_t const restSize = length - copySize;
-
-				memcpy(op, dictEnd - copySize, copySize);
-				op += copySize;
-
-				if (restSize > (size_t)(op - lowPrefix)) {
-					/* overlap copy */
-					BYTE * const endOfMatch = op + restSize;
-					const BYTE *copyFrom = lowPrefix;
-
-					while (op < endOfMatch)
-						*op++ = *copyFrom++;
-				} else {
-					memcpy(op, lowPrefix, restSize);
-					op += restSize;
-				}
-			}
-
-			continue;
-		}
-
-		/* copy match within block */
-		cpy = op + length;
-
-		if (unlikely(offset < 8)) {
-			const int dec64 = dec64table[offset];
-
-			op[0] = match[0];
-			op[1] = match[1];
-			op[2] = match[2];
-			op[3] = match[3];
-			match += dec32table[offset];
-			memcpy(op + 4, match, 4);
-			match -= dec64;
-		} else {
-			LZ4_copy8(op, match);
-			match += 8;
-		}
-
-		op += 8;
-
-		if (unlikely(cpy > oend - 12)) {
-			BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
-
-			if (cpy > oend - LASTLITERALS) {
-				/*
-				 * Error : last LASTLITERALS bytes
-				 * must be literals (uncompressed)
-				 */
-				goto _output_error;
-			}
-
-			if (op < oCopyLimit) {
-				LZ4_wildCopy(op, match, oCopyLimit);
-				match += oCopyLimit - op;
-				op = oCopyLimit;
-			}
-
-			while (op < cpy)
-				*op++ = *match++;
-		} else {
-			LZ4_copy8(op, match);
-
-			if (length > 16)
-				LZ4_wildCopy(op + 8, match + 8, cpy);
-		}
-
-		op = cpy; /* correction */
-	}
-
-	/* end of decoding */
-	if (endOnInput) {
-		/* Nb of output bytes decoded */
-		return (int) (((char *)op) - dest);
-	} else {
-		/* Nb of input bytes read */
-		return (int) (((const char *)ip) - source);
-	}
-
-	/* Overflow error detected */
-_output_error:
-	return -1;
-}
-
-int LZ4_decompress_safe(const char *source, char *dest,
-	int compressedSize, int maxDecompressedSize)
-{
-	return LZ4_decompress_generic(source, dest, compressedSize,
-		maxDecompressedSize, endOnInputSize, full, 0,
-		noDict, (BYTE *)dest, NULL, 0);
-}
-
-int LZ4_decompress_safe_partial(const char *source, char *dest,
-	int compressedSize, int targetOutputSize, int maxDecompressedSize)
-{
-	return LZ4_decompress_generic(source, dest, compressedSize,
-		maxDecompressedSize, endOnInputSize, partial,
-		targetOutputSize, noDict, (BYTE *)dest, NULL, 0);
-}
-
-int LZ4_decompress_fast(const char *source, char *dest, int originalSize)
-{
-	return LZ4_decompress_generic(source, dest, 0, originalSize,
-		endOnOutputSize, full, 0, withPrefix64k,
-		(BYTE *)(dest - 64 * KB), NULL, 64 * KB);
-}
-
-int LZ4_setStreamDecode(LZ4_streamDecode_t *LZ4_streamDecode,
-	const char *dictionary, int dictSize)
-{
-	LZ4_streamDecode_t_internal *lz4sd = (LZ4_streamDecode_t_internal *) LZ4_streamDecode;
-
-	lz4sd->prefixSize = (size_t) dictSize;
-	lz4sd->prefixEnd = (const BYTE *) dictionary + dictSize;
-	lz4sd->externalDict = NULL;
-	lz4sd->extDictSize	= 0;
-	return 1;
-}
-
-/*
- * *_continue() :
- * These decoding functions allow decompression of multiple blocks
- * in "streaming" mode.
- * Previously decoded blocks must still be available at the memory
- * position where they were decoded.
- * If it's not possible, save the relevant part of
- * decoded data into a safe buffer,
- * and indicate where it stands using LZ4_setStreamDecode()
- */
-int LZ4_decompress_safe_continue(LZ4_streamDecode_t *LZ4_streamDecode,
-	const char *source, char *dest, int compressedSize, int maxOutputSize)
-{
-	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
-	int result;
-
-	if (lz4sd->prefixEnd == (BYTE *)dest) {
-		result = LZ4_decompress_generic(source, dest,
-			compressedSize,
-			maxOutputSize,
-			endOnInputSize, full, 0,
-			usingExtDict, lz4sd->prefixEnd - lz4sd->prefixSize,
-			lz4sd->externalDict,
-			lz4sd->extDictSize);
-
-		if (result <= 0)
-			return result;
-
-		lz4sd->prefixSize += result;
-		lz4sd->prefixEnd	+= result;
-	} else {
-		lz4sd->extDictSize = lz4sd->prefixSize;
-		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-		result = LZ4_decompress_generic(source, dest,
-			compressedSize, maxOutputSize,
-			endOnInputSize, full, 0,
-			usingExtDict, (BYTE *)dest,
-			lz4sd->externalDict, lz4sd->extDictSize);
-		if (result <= 0)
-			return result;
-		lz4sd->prefixSize = result;
-		lz4sd->prefixEnd	= (BYTE *)dest + result;
-	}
-
-	return result;
-}
-
-int LZ4_decompress_fast_continue(LZ4_streamDecode_t *LZ4_streamDecode,
-	const char *source, char *dest, int originalSize)
-{
-	LZ4_streamDecode_t_internal *lz4sd = &LZ4_streamDecode->internal_donotuse;
-	int result;
-
-	if (lz4sd->prefixEnd == (BYTE *)dest) {
-		result = LZ4_decompress_generic(source, dest, 0, originalSize,
-			endOnOutputSize, full, 0,
-			usingExtDict,
-			lz4sd->prefixEnd - lz4sd->prefixSize,
-			lz4sd->externalDict, lz4sd->extDictSize);
-
-		if (result <= 0)
-			return result;
-
-		lz4sd->prefixSize += originalSize;
-		lz4sd->prefixEnd	+= originalSize;
-	} else {
-		lz4sd->extDictSize = lz4sd->prefixSize;
-		lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
-		result = LZ4_decompress_generic(source, dest, 0, originalSize,
-			endOnOutputSize, full, 0,
-			usingExtDict, (BYTE *)dest,
-			lz4sd->externalDict, lz4sd->extDictSize);
-		if (result <= 0)
-			return result;
-		lz4sd->prefixSize = originalSize;
-		lz4sd->prefixEnd	= (BYTE *)dest + originalSize;
-	}
-
-	return result;
-}
-
-/*
- * Advanced decoding functions :
- * *_usingDict() :
- * These decoding functions work the same as "_continue" ones,
- * the dictionary must be explicitly provided within parameters
- */
-static FORCE_INLINE int LZ4_decompress_usingDict_generic(const char *source,
-	char *dest, int compressedSize, int maxOutputSize, int safe,
-	const char *dictStart, int dictSize)
-{
-	if (dictSize == 0)
-		return LZ4_decompress_generic(source, dest,
-			compressedSize, maxOutputSize, safe, full, 0,
-			noDict, (BYTE *)dest, NULL, 0);
-	if (dictStart + dictSize == dest) {
-		if (dictSize >= (int)(64 * KB - 1))
-			return LZ4_decompress_generic(source, dest,
-				compressedSize, maxOutputSize, safe, full, 0,
-				withPrefix64k, (BYTE *)dest - 64 * KB, NULL, 0);
-		return LZ4_decompress_generic(source, dest, compressedSize,
-			maxOutputSize, safe, full, 0, noDict,
-			(BYTE *)dest - dictSize, NULL, 0);
-	}
-	return LZ4_decompress_generic(source, dest, compressedSize,
-		maxOutputSize, safe, full, 0, usingExtDict,
-		(BYTE *)dest, (const BYTE *)dictStart, dictSize);
-}
-
-int LZ4_decompress_safe_usingDict(const char *source, char *dest,
-	int compressedSize, int maxOutputSize,
-	const char *dictStart, int dictSize)
-{
-	return LZ4_decompress_usingDict_generic(source, dest,
-		compressedSize, maxOutputSize, 1, dictStart, dictSize);
-}
-
-int LZ4_decompress_fast_usingDict(const char *source, char *dest,
-	int originalSize, const char *dictStart, int dictSize)
-{
-	return LZ4_decompress_usingDict_generic(source, dest, 0,
-		originalSize, 0, dictStart, dictSize);
-}
-
-#ifndef STATIC
-EXPORT_SYMBOL(LZ4_decompress_safe);
-EXPORT_SYMBOL(LZ4_decompress_safe_partial);
-EXPORT_SYMBOL(LZ4_decompress_fast);
-EXPORT_SYMBOL(LZ4_setStreamDecode);
-EXPORT_SYMBOL(LZ4_decompress_safe_continue);
-EXPORT_SYMBOL(LZ4_decompress_fast_continue);
-EXPORT_SYMBOL(LZ4_decompress_safe_usingDict);
-EXPORT_SYMBOL(LZ4_decompress_fast_usingDict);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 decompressor");
-#endif
--- a/lib/lz4/lz4armv8/lz4accel.c
+++ b/lib/lz4/lz4armv8/lz4accel.c
@@ -0,0 +1,48 @@
+#include "lz4accel.h"
+#include <asm/cputype.h>
+
+#ifdef CONFIG_CFI_CLANG
+static inline int
+__cfi_lz4_decompress_asm(uint8_t **dst_ptr, uint8_t *dst_begin,
+			 uint8_t *dst_end, const uint8_t **src_ptr,
+			 const uint8_t *src_end, bool dip)
+{
+	return _lz4_decompress_asm(dst_ptr, dst_begin, dst_end,
+				   src_ptr, src_end, dip);
+}
+
+static inline int
+__cfi_lz4_decompress_asm_noprfm(uint8_t **dst_ptr, uint8_t *dst_begin,
+				uint8_t *dst_end, const uint8_t **src_ptr,
+				const uint8_t *src_end, bool dip)
+{
+	return _lz4_decompress_asm_noprfm(dst_ptr, dst_begin, dst_end,
+					  src_ptr, src_end, dip);
+}
+
+#define _lz4_decompress_asm		__cfi_lz4_decompress_asm
+#define _lz4_decompress_asm_noprfm	__cfi_lz4_decompress_asm_noprfm
+#endif
+
+int lz4_decompress_asm_select(uint8_t **dst_ptr, uint8_t *dst_begin,
+			      uint8_t *dst_end, const uint8_t **src_ptr,
+			      const uint8_t *src_end, bool dip) {
+	const unsigned i = smp_processor_id();
+
+	switch(read_cpuid_part_number()) {
+	case ARM_CPU_PART_CORTEX_A53:
+		lz4_decompress_asm_fn[i] = _lz4_decompress_asm_noprfm;
+		return _lz4_decompress_asm_noprfm(dst_ptr, dst_begin, dst_end,
+						  src_ptr, src_end, dip);
+	}
+	lz4_decompress_asm_fn[i] = _lz4_decompress_asm;
+	return _lz4_decompress_asm(dst_ptr, dst_begin, dst_end,
+				   src_ptr, src_end, dip);
+}
+
+int (*lz4_decompress_asm_fn[NR_CPUS])(uint8_t **dst_ptr, uint8_t *dst_begin,
+	uint8_t *dst_end, const uint8_t **src_ptr,
+	const uint8_t *src_end, bool dip)
+__read_mostly = {
+	[0 ... NR_CPUS-1]  = lz4_decompress_asm_select,
+};
--- a/lib/lz4/lz4armv8/lz4accel.h
+++ b/lib/lz4/lz4armv8/lz4accel.h
@@ -0,0 +1,56 @@
+#include <linux/types.h>
+#include <asm/simd.h>
+
+#define LZ4_FAST_MARGIN                (128)
+
+#if defined(CONFIG_ARM64) && defined(CONFIG_KERNEL_MODE_NEON)
+#include <asm/neon.h>
+#include <asm/cputype.h>
+
+asmlinkage int _lz4_decompress_asm(uint8_t **dst_ptr, uint8_t *dst_begin,
+				   uint8_t *dst_end, const uint8_t **src_ptr,
+				   const uint8_t *src_end, bool dip);
+
+asmlinkage int _lz4_decompress_asm_noprfm(uint8_t **dst_ptr, uint8_t *dst_begin,
+					  uint8_t *dst_end, const uint8_t **src_ptr,
+					  const uint8_t *src_end, bool dip);
+
+static inline int lz4_decompress_accel_enable(void)
+{
+	return	may_use_simd();
+}
+
+extern int (*lz4_decompress_asm_fn[])(uint8_t **dst_ptr, uint8_t *dst_begin,
+	uint8_t *dst_end, const uint8_t **src_ptr,
+	const uint8_t *src_end, bool dip);
+
+static inline ssize_t lz4_decompress_asm(
+	uint8_t **dst_ptr, uint8_t *dst_begin, uint8_t *dst_end,
+	const uint8_t **src_ptr, const uint8_t *src_end, bool dip)
+{
+	int ret;
+
+	kernel_neon_begin();
+	ret = lz4_decompress_asm_fn[smp_processor_id()](dst_ptr, dst_begin,
+							dst_end, src_ptr,
+							src_end, dip);
+	kernel_neon_end();
+	return (ssize_t)ret;
+}
+
+#define __ARCH_HAS_LZ4_ACCELERATOR
+
+#else
+
+static inline int lz4_decompress_accel_enable(void)
+{
+	return	0;
+}
+
+static inline ssize_t lz4_decompress_asm(
+	uint8_t **dst_ptr, uint8_t *dst_begin, uint8_t *dst_end,
+	const uint8_t **src_ptr, const uint8_t *src_end, bool dip)
+{
+	return 0;
+}
+#endif
--- a/lib/lz4/lz4armv8/lz4armv8.S
+++ b/lib/lz4/lz4armv8/lz4armv8.S
@@ -0,0 +1,312 @@
+/*
+ * lz4armv8.S
+ * LZ4 decompression optimization based on arm64 NEON instruction
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <linux/version.h>
+
+/**
+ * _lz4_decompress_asm: The fast LZ4 decompression, lz4 decompression algothrim asm
+ * routine,support Huawei EROFS filesystem striving for maximum decompression speed.
+ * Entry point _lz4_decompress_asm.
+ * @para:
+ * x0 = current destination address ptr
+ * x1 = destination start position
+ * x2 = destination end position
+ * x3 = current source address ptr
+ * x4 = source end position
+ * x5 = flag for DIP
+ * @ret:
+ * 0 on success, -1 on failure
+ *
+ * x7: match_length
+ * x8: literal_legth
+ * x9: copy start ptr
+ * x10: copy end ptr
+ */
+
+
+#define match_length		x7
+#define literal_length		x8
+#define copy_from_ptr		x9    /* copy source ptr*/
+#define copy_to_ptr		x10   /* copy destination ptr*/
+#define w_tmp			w11   /* temp var */
+#define tmp			x11
+#define w_offset		w12
+#define offset			x12
+#define permtable_addr		x13
+#define cplen_table_addr	x14
+#define save_dst		x15
+#define save_src		x16
+#define offset_src_ptr		x17
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(5, 4, 0)
+#define w_tmp_match_length	w6
+#define tmp_match_length	x6
+#else
+#define w_tmp_match_length	w18
+#define tmp_match_length	x18
+#endif
+
+/* x3 >= x4 src overflow */
+.macro check_src_overflow
+	cmp	x3, x4
+	b.hs	Done
+.endm
+
+.macro check_src_overflow1
+	cmp	x3, x4
+	b.hs	Done1
+.endm
+/* x0 >= x2 dst overflow */
+.macro check_dst_overflow
+	cmp	x0, x2
+	b.hs	Done
+.endm
+
+.macro check_dst_overflow1
+	cmp	x0, x2
+	b.hs	Done1
+.endm
+
+.altmacro
+.macro lz4_decompress_asm_generic	doprfm=1
+	stp     x29, x30, [sp, #-16]!
+	mov     x29, sp
+	stp	x3, x0, [sp, #-16]!			/* push src and dst in stack */
+	ldr	x3, [x3]				/* x3 = *src_ptr */
+	ldr	x0, [x0]				/* x0 = *dst_ptr */
+	adr_l	permtable_addr, Permtable
+	adr_l	cplen_table_addr, Copylength_table
+
+1:
+	/*
+	 * Lz4_decompress_begin:
+	 * save current dst and src ,ensure when return from asm routine
+	 * current both of "dst" and "src" save good position.
+	 */
+	mov	save_dst, x0
+	mov	save_src, x3
+
+	check_dst_overflow
+	check_src_overflow
+
+.if \doprfm
+	add tmp, x0, #512
+	cmp x2, tmp
+	b.ls 2f
+	prfm pstl2strm,[x0,#512]
+.endif
+
+2:
+	/* Decode_token: */
+	ldrb	w_tmp, [x3], #1				/* read Token Byte */
+	lsr	literal_length, tmp, #4			/* get literal_length */
+	and	tmp_match_length, tmp, #0xf		/* get match_length */
+	add	match_length, tmp_match_length, #4	/* match_length >=4 */
+
+	/*
+	 * literal_length <= 14 : no more literal length byte,fllowing zero
+	 * or more bytes are liteal bytes.
+	 */
+	cmp	literal_length, #14
+	b.ls	6f
+
+	/*
+	 * literal_length == 15 : more literal length bytes after TokenByte.
+	 * continue decoding more literal length bytes.
+	 */
+3:
+	/* Get_literal_length: */
+	check_src_overflow
+	ldrb	w_tmp, [x3], #1
+	add	literal_length, literal_length, tmp
+	cmp	tmp, #255
+	b.eq	3b
+
+	/* literal copy */
+4:
+	/* Copy_long_literal_hs_15: */
+	mov	copy_from_ptr, x3
+	mov	copy_to_ptr, x0
+	add	x3, x3, literal_length
+	add	x0, x0, literal_length
+	check_dst_overflow
+	check_src_overflow
+
+5:
+	/* Copy_long_literal_loop: */
+	ldr	q0, [copy_from_ptr], #16
+	str	q0, [copy_to_ptr], #16
+
+	cmp	x0, copy_to_ptr
+	b.ls	7f
+	b	5b
+
+6:
+	/* Copy_literal_lt_15: */
+	ldr q0, [x3]
+	str q0, [x0]
+	add	x3, x3, literal_length
+	add	x0, x0, literal_length
+
+	/* Decode offset and match_length */
+7:
+	/* Decode_offset_matchlength: */
+	mov	offset_src_ptr, x3
+	ldrh	w_offset, [x3], #2		/* 2Byte: offset bytes */
+	cbz	offset, Failed			/* match_length == 0 is invalid */
+	sub	copy_from_ptr, x0, offset
+	cmp	copy_from_ptr, x1
+	b.lo	Failed
+	mov	copy_to_ptr, x0
+	/*
+	 * set x0 to the end of "match copy";
+	 */
+	add	x0, x0, match_length
+	cmp	match_length, #19
+	b.lo	9f
+
+	/*
+	 * continue decoding more match length bytes.
+	 */
+8:
+	/* Get_long_matchlength: */
+	check_src_overflow1
+	ldrb	w_tmp, [x3], #1
+	add	x0, x0, tmp
+	add	match_length, match_length, tmp
+	cmp	tmp, #255
+	b.eq	8b
+
+	/*
+	 * here got the matchlength,start "match copy".
+	 */
+9:
+	/* Copy_match_begin: */
+	check_dst_overflow1
+	cmp	offset , match_length
+	b.hs	14f
+
+10:
+	/* Cond_offset_lt_matchlength: */
+	cmp	offset , #32
+	b.hs	14f
+
+11:
+	/* Copy_offset_lt_32: */
+	ldr	q1, [copy_from_ptr]
+	add	tmp, permtable_addr, offset, lsl #5
+	ldp	q2, q3, [tmp]
+	tbl	v0.16b, {v1.16b}, v2.16b
+	tbl	v1.16b, {v1.16b}, v3.16b
+	cmp     offset , #16
+	b.lo    12f
+	ldp     q0, q1, [copy_from_ptr]
+
+12:
+	/* Copy_match_perm: */
+	ldrb	w_tmp, [cplen_table_addr, offset]
+	stp	q0, q1, [copy_to_ptr]
+	add	copy_to_ptr, copy_to_ptr, tmp
+	cmp	x0, copy_to_ptr
+	b.ls	1b
+
+13:
+	/* Copy_offset_lt_32_loop: */
+	stp	q0, q1, [copy_to_ptr]
+	add	copy_to_ptr, copy_to_ptr, tmp
+	stp	q0, q1, [copy_to_ptr]
+	add	copy_to_ptr, copy_to_ptr, tmp
+	cmp	x0, copy_to_ptr
+	b.hi	13b
+	b	1b
+
+	/* offset >= match */
+14:
+	/* Cond_offset_ge_matchlength: */
+	ldr	q0, [copy_from_ptr], #16
+	str	q0, [copy_to_ptr], #16
+
+	cmp	x0, copy_to_ptr
+	b.ls	1b
+
+15:
+	/* Copy_offset_ge_match_loop: */
+	ldp	q0, q1, [copy_from_ptr], #32
+	stp	q0, q1, [copy_to_ptr], #32
+
+	cmp	x0, copy_to_ptr
+	b.hi	15b
+	b	1b
+.endm
+
+.text
+.p2align 4
+
+ENTRY(_lz4_decompress_asm)
+	lz4_decompress_asm_generic
+ENDPROC(_lz4_decompress_asm)
+
+Failed:
+	mov	tmp, #-1
+	b	Exit_here
+
+Done1:
+	cbz	x5, Done
+	sub	save_src, offset_src_ptr, #1
+	strb	w_tmp_match_length, [save_src]
+	add	save_dst,save_dst,literal_length
+Done:
+	mov	tmp, #0
+
+Exit_here:
+	ldp	x3, x0, [sp], #16
+	str	save_src, [x3]
+	str	save_dst, [x0]
+	mov	x0, tmp
+	ldp     x29, x30, [sp], #16
+	ret     x30
+
+/*
+ * In case of offset <= 31 < matchlength ,expand the pattern and store in
+ * repeating pattern size(RPS),store the RPS in Copylength_table.
+ * case 1): 1 <= offset <= 15
+ * expand the pattern according to the Permtable and store their repeating pattern in q0 q1;
+ * RPS = 32 - (32 % offset) offset <= 31
+ * case 2): offset >= 16
+ * read the pattern and store in q0 q1.
+ * RPS = offset.
+ */
+.pushsection	".rodata", "a"
+.p2align 8
+Permtable:
+.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  //offset = 0
+.byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  //offset = 1
+.byte 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1  //offset = 2
+.byte 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1  //offset = 3
+.byte 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3  //offset = 4
+.byte 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1  //offset = 5
+.byte 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1  //offset = 6
+.byte 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3  //offset = 7
+.byte 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7  //offset = 8
+.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4  //offset = 9
+.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1  //offset = 10
+.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9  //offset = 11
+.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 0, 1, 2, 3, 4, 5, 6, 7  //offset = 12
+.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12, 0, 1, 2, 3, 4, 5  //offset = 13
+.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13, 0, 1, 2, 3  //offset = 14
+.byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, 0, 1  //offset = 15
+
+.p2align 8
+Copylength_table:
+.byte 32,32,32,30,32,30,30,28,32,27,30,22,24,26,28,30  // 0  .. 15
+.byte 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31  // 16 .. 31
+.popsection
+
+.text
+.p2align 4
+ENTRY(_lz4_decompress_asm_noprfm)
+	lz4_decompress_asm_generic	0
+ENDPROC(_lz4_decompress_asm_noprfm)
--- a/lib/lz4/lz4defs.h
+++ b/lib/lz4/lz4defs.h
@@ -1,227 +0,0 @@
-#ifndef __LZ4DEFS_H__
-#define __LZ4DEFS_H__
-
-/*
- * lz4defs.h -- common and architecture specific defines for the kernel usage
-
- * LZ4 - Fast LZ compression algorithm
- * Copyright (C) 2011-2016, Yann Collet.
- * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *	* Redistributions of source code must retain the above copyright
- *	  notice, this list of conditions and the following disclaimer.
- *	* Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * You can contact the author at :
- *	- LZ4 homepage : http://www.lz4.org
- *	- LZ4 source repository : https://github.com/lz4/lz4
- *
- *	Changed for kernel usage by:
- *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
- */
-
-#include <asm/unaligned.h>
-#include <linux/string.h>	 /* memset, memcpy */
-
-#define FORCE_INLINE __always_inline
-
-/*-************************************
- *	Basic Types
- **************************************/
-#include <linux/types.h>
-
-typedef	uint8_t BYTE;
-typedef uint16_t U16;
-typedef uint32_t U32;
-typedef	int32_t S32;
-typedef uint64_t U64;
-typedef uintptr_t uptrval;
-
-/*-************************************
- *	Architecture specifics
- **************************************/
-#if defined(CONFIG_64BIT)
-#define LZ4_ARCH64 1
-#else
-#define LZ4_ARCH64 0
-#endif
-
-#if defined(__LITTLE_ENDIAN)
-#define LZ4_LITTLE_ENDIAN 1
-#else
-#define LZ4_LITTLE_ENDIAN 0
-#endif
-
-/*-************************************
- *	Constants
- **************************************/
-#define MINMATCH 4
-
-#define WILDCOPYLENGTH 8
-#define LASTLITERALS 5
-#define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
-
-/* Increase this value ==> compression run slower on incompressible data */
-#define LZ4_SKIPTRIGGER 6
-
-#define HASH_UNIT sizeof(size_t)
-
-#define KB (1 << 10)
-#define MB (1 << 20)
-#define GB (1U << 30)
-
-#define MAXD_LOG 16
-#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
-#define STEPSIZE sizeof(size_t)
-
-#define ML_BITS	4
-#define ML_MASK	((1U << ML_BITS) - 1)
-#define RUN_BITS (8 - ML_BITS)
-#define RUN_MASK ((1U << RUN_BITS) - 1)
-
-/*-************************************
- *	Reading and writing into memory
- **************************************/
-static FORCE_INLINE U16 LZ4_read16(const void *ptr)
-{
-	return get_unaligned((const U16 *)ptr);
-}
-
-static FORCE_INLINE U32 LZ4_read32(const void *ptr)
-{
-	return get_unaligned((const U32 *)ptr);
-}
-
-static FORCE_INLINE size_t LZ4_read_ARCH(const void *ptr)
-{
-	return get_unaligned((const size_t *)ptr);
-}
-
-static FORCE_INLINE void LZ4_write16(void *memPtr, U16 value)
-{
-	put_unaligned(value, (U16 *)memPtr);
-}
-
-static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value)
-{
-	put_unaligned(value, (U32 *)memPtr);
-}
-
-static FORCE_INLINE U16 LZ4_readLE16(const void *memPtr)
-{
-	return get_unaligned_le16(memPtr);
-}
-
-static FORCE_INLINE void LZ4_writeLE16(void *memPtr, U16 value)
-{
-	return put_unaligned_le16(value, memPtr);
-}
-
-static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
-{
-#if LZ4_ARCH64
-	U64 a = get_unaligned((const U64 *)src);
-
-	put_unaligned(a, (U64 *)dst);
-#else
-	U32 a = get_unaligned((const U32 *)src);
-	U32 b = get_unaligned((const U32 *)src + 1);
-
-	put_unaligned(a, (U32 *)dst);
-	put_unaligned(b, (U32 *)dst + 1);
-#endif
-}
-
-/*
- * customized variant of memcpy,
- * which can overwrite up to 7 bytes beyond dstEnd
- */
-static FORCE_INLINE void LZ4_wildCopy(void *dstPtr,
-	const void *srcPtr, void *dstEnd)
-{
-	BYTE *d = (BYTE *)dstPtr;
-	const BYTE *s = (const BYTE *)srcPtr;
-	BYTE *const e = (BYTE *)dstEnd;
-
-	do {
-		LZ4_copy8(d, s);
-		d += 8;
-		s += 8;
-	} while (d < e);
-}
-
-static FORCE_INLINE unsigned int LZ4_NbCommonBytes(register size_t val)
-{
-#if LZ4_LITTLE_ENDIAN
-	return __ffs(val) >> 3;
-#else
-	return (BITS_PER_LONG - 1 - __fls(val)) >> 3;
-#endif
-}
-
-static FORCE_INLINE unsigned int LZ4_count(
-	const BYTE *pIn,
-	const BYTE *pMatch,
-	const BYTE *pInLimit)
-{
-	const BYTE *const pStart = pIn;
-
-	while (likely(pIn < pInLimit - (STEPSIZE - 1))) {
-		size_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
-
-		if (!diff) {
-			pIn += STEPSIZE;
-			pMatch += STEPSIZE;
-			continue;
-		}
-
-		pIn += LZ4_NbCommonBytes(diff);
-
-		return (unsigned int)(pIn - pStart);
-	}
-
-#if LZ4_ARCH64
-	if ((pIn < (pInLimit - 3))
-		&& (LZ4_read32(pMatch) == LZ4_read32(pIn))) {
-		pIn += 4;
-		pMatch += 4;
-	}
-#endif
-
-	if ((pIn < (pInLimit - 1))
-		&& (LZ4_read16(pMatch) == LZ4_read16(pIn))) {
-		pIn += 2;
-		pMatch += 2;
-	}
-
-	if ((pIn < pInLimit) && (*pMatch == *pIn))
-		pIn++;
-
-	return (unsigned int)(pIn - pStart);
-}
-
-typedef enum { noLimit = 0, limitedOutput = 1 } limitedOutput_directive;
-typedef enum { byPtr, byU32, byU16 } tableType_t;
-
-typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
-typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
-
-typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
-typedef enum { full = 0, partial = 1 } earlyEnd_directive;
-
-#endif
--- a/lib/lz4/lz4hc.c
+++ b/lib/lz4/lz4hc.c
--- a/lib/lz4/lz4hc.h
+++ b/lib/lz4/lz4hc.h
@@ -0,0 +1,451 @@
+/*
+   LZ4 HC - High Compression Mode of LZ4
+   Header File
+   Copyright (C) 2011-2020, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : https://github.com/lz4/lz4
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+#ifndef LZ4_HC_H_19834876238432
+#define LZ4_HC_H_19834876238432
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* --- Dependency --- */
+/* note : lz4hc requires lz4.h/lz4.c for compilation */
+#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */
+
+/* --- Useful constants --- */
+#define LZ4HC_CLEVEL_MIN 2
+#define LZ4HC_CLEVEL_DEFAULT 9
+#define LZ4HC_CLEVEL_OPT_MIN 10
+#define LZ4HC_CLEVEL_MAX 12
+
+/*-************************************
+ *  Block Compression
+ **************************************/
+/*! LZ4_compress_HC() :
+ *  Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm.
+ * `dst` must be already allocated.
+ *  Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h")
+ *  Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h")
+ * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work.
+ *                      Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX.
+ * @return : the number of bytes written into 'dst'
+ *           or 0 if compression fails.
+ */
+LZ4LIB_API int LZ4_compress_HC(const char *src, char *dst, int srcSize,
+			       int dstCapacity, int compressionLevel,
+			       void *wrkmem);
+
+/* Note :
+ *   Decompression functions are provided within "lz4.h" (BSD license)
+ */
+
+/*! LZ4_compress_HC_extStateHC() :
+ *  Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`.
+ * `state` size is provided by LZ4_sizeofStateHC().
+ *  Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly).
+ */
+LZ4LIB_API int LZ4_sizeofStateHC(void);
+LZ4LIB_API int LZ4_compress_HC_extStateHC(void *stateHC, const char *src,
+					  char *dst, int srcSize,
+					  int maxDstSize, int compressionLevel);
+
+/*! LZ4_compress_HC_destSize() : v1.9.0+
+ *  Will compress as much data as possible from `src`
+ *  to fit into `targetDstSize` budget.
+ *  Result is provided in 2 parts :
+ * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
+ *           or 0 if compression fails.
+ * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src`
+ */
+LZ4LIB_API int LZ4_compress_HC_destSize(void *stateHC, const char *src,
+					char *dst, int *srcSizePtr,
+					int targetDstSize,
+					int compressionLevel);
+
+/*-************************************
+ *  Streaming Compression
+ *  Bufferless synchronous API
+ **************************************/
+typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */
+
+/*! LZ4_createStreamHC() and LZ4_freeStreamHC() :
+ *  These functions create and release memory for LZ4 HC streaming state.
+ *  Newly created states are automatically initialized.
+ *  A same state can be used multiple times consecutively,
+ *  starting with LZ4_resetStreamHC_fast() to start a new stream of blocks.
+ */
+LZ4LIB_API LZ4_streamHC_t *LZ4_createStreamHC(void);
+LZ4LIB_API int LZ4_freeStreamHC(LZ4_streamHC_t *streamHCPtr);
+
+/*
+  These functions compress data in successive blocks of any size,
+  using previous blocks as dictionary, to improve compression ratio.
+  One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks.
+  There is an exception for ring buffers, which can be smaller than 64 KB.
+  Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue().
+
+  Before starting compression, state must be allocated and properly initialized.
+  LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT.
+
+  Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream)
+  or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental).
+  LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once,
+  which is automatically the case when state is created using LZ4_createStreamHC().
+
+  After reset, a first "fictional block" can be designated as initial dictionary,
+  using LZ4_loadDictHC() (Optional).
+  Note: In order for LZ4_loadDictHC() to create the correct data structure,
+  it is essential to set the compression level _before_ loading the dictionary.
+
+  Invoke LZ4_compress_HC_continue() to compress each successive block.
+  The number of blocks is unlimited.
+  Previous input blocks, including initial dictionary when present,
+  must remain accessible and unmodified during compression.
+
+  It's allowed to update compression level anytime between blocks,
+  using LZ4_setCompressionLevel() (experimental).
+
+ @dst buffer should be sized to handle worst case scenarios
+  (see LZ4_compressBound(), it ensures compression success).
+  In case of failure, the API does not guarantee recovery,
+  so the state _must_ be reset.
+  To ensure compression success
+  whenever @dst buffer size cannot be made >= LZ4_compressBound(),
+  consider using LZ4_compress_HC_continue_destSize().
+
+  Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks,
+  it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC().
+  Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB)
+
+  After completing a streaming compression,
+  it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state,
+  just by resetting it, using LZ4_resetStreamHC_fast().
+*/
+
+LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t *streamHCPtr,
+				       int compressionLevel); /* v1.9.0+ */
+LZ4LIB_API int LZ4_loadDictHC(LZ4_streamHC_t *streamHCPtr,
+			      const char *dictionary, int dictSize);
+
+LZ4LIB_API int LZ4_compress_HC_continue(LZ4_streamHC_t *streamHCPtr,
+					const char *src, char *dst, int srcSize,
+					int maxDstSize);
+
+/*! LZ4_compress_HC_continue_destSize() : v1.9.0+
+ *  Similar to LZ4_compress_HC_continue(),
+ *  but will read as much data as possible from `src`
+ *  to fit into `targetDstSize` budget.
+ *  Result is provided into 2 parts :
+ * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize)
+ *           or 0 if compression fails.
+ * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`.
+ *           Note that this function may not consume the entire input.
+ */
+LZ4LIB_API int
+LZ4_compress_HC_continue_destSize(LZ4_streamHC_t *LZ4_streamHCPtr,
+				  const char *src, char *dst, int *srcSizePtr,
+				  int targetDstSize);
+
+LZ4LIB_API int LZ4_saveDictHC(LZ4_streamHC_t *streamHCPtr, char *safeBuffer,
+			      int maxDictSize);
+
+/*! LZ4_attach_HC_dictionary() : stable since v1.10.0
+ *  This API allows for the efficient re-use of a static dictionary many times.
+ *
+ *  Rather than re-loading the dictionary buffer into a working context before
+ *  each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a
+ *  working LZ4_streamHC_t, this function introduces a no-copy setup mechanism,
+ *  in which the working stream references the dictionary stream in-place.
+ *
+ *  Several assumptions are made about the state of the dictionary stream.
+ *  Currently, only streams which have been prepared by LZ4_loadDictHC() should
+ *  be expected to work.
+ *
+ *  Alternatively, the provided dictionary stream pointer may be NULL, in which
+ *  case any existing dictionary stream is unset.
+ *
+ *  A dictionary should only be attached to a stream without any history (i.e.,
+ *  a stream that has just been reset).
+ *
+ *  The dictionary will remain attached to the working stream only for the
+ *  current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the
+ *  dictionary context association from the working stream. The dictionary
+ *  stream (and source buffer) must remain in-place / accessible / unchanged
+ *  through the lifetime of the stream session.
+ */
+LZ4LIB_API void
+LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream,
+			 const LZ4_streamHC_t *dictionary_stream);
+
+/*^**********************************************
+ * !!!!!!   STATIC LINKING ONLY   !!!!!!
+ ***********************************************/
+
+/*-******************************************************************
+ * PRIVATE DEFINITIONS :
+ * Do not use these definitions directly.
+ * They are merely exposed to allow static allocation of `LZ4_streamHC_t`.
+ * Declare an `LZ4_streamHC_t` directly, rather than any type below.
+ * Even then, only do so in the context of static linking, as definitions may change between versions.
+ ********************************************************************/
+
+#define LZ4HC_DICTIONARY_LOGSIZE 16
+#define LZ4HC_MAXD (1 << LZ4HC_DICTIONARY_LOGSIZE)
+#define LZ4HC_MAXD_MASK (LZ4HC_MAXD - 1)
+
+#define LZ4HC_HASH_LOG 15
+#define LZ4HC_HASHTABLESIZE (1 << LZ4HC_HASH_LOG)
+#define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1)
+
+/* Never ever use these definitions directly !
+ * Declare or allocate an LZ4_streamHC_t instead.
+**/
+typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal;
+struct LZ4HC_CCtx_internal {
+	LZ4_u32 hashTable[LZ4HC_HASHTABLESIZE];
+	LZ4_u16 chainTable[LZ4HC_MAXD];
+	const LZ4_byte *end; /* next block here to continue on current prefix */
+	const LZ4_byte *prefixStart; /* Indexes relative to this position */
+	const LZ4_byte *dictStart; /* alternate reference for extDict */
+	LZ4_u32 dictLimit; /* below that point, need extDict */
+	LZ4_u32 lowLimit; /* below that point, no more history */
+	LZ4_u32 nextToUpdate; /* index from which to continue dictionary update */
+	short compressionLevel;
+	LZ4_i8 favorDecSpeed; /* favor decompression speed if this flag set,
+                                otherwise, favor compression ratio */
+	LZ4_i8 dirty; /* stream has to be fully reset if this flag is set */
+	const LZ4HC_CCtx_internal *dictCtx;
+};
+
+#define LZ4_STREAMHC_MINSIZE                                                   \
+	262200 /* static size, for inter-version compatibility */
+union LZ4_streamHC_u {
+	char minStateSize[LZ4_STREAMHC_MINSIZE];
+	LZ4HC_CCtx_internal internal_donotuse;
+}; /* previously typedef'd to LZ4_streamHC_t */
+
+/* LZ4_streamHC_t :
+ * This structure allows static allocation of LZ4 HC streaming state.
+ * This can be used to allocate statically on stack, or as part of a larger structure.
+ *
+ * Such state **must** be initialized using LZ4_initStreamHC() before first use.
+ *
+ * Note that invoking LZ4_initStreamHC() is not required when
+ * the state was created using LZ4_createStreamHC() (which is recommended).
+ * Using the normal builder, a newly created state is automatically initialized.
+ *
+ * Static allocation shall only be used in combination with static linking.
+ */
+
+/* LZ4_initStreamHC() : v1.9.0+
+ * Required before first use of a statically allocated LZ4_streamHC_t.
+ * Before v1.9.0 : use LZ4_resetStreamHC() instead
+ */
+LZ4LIB_API LZ4_streamHC_t *LZ4_initStreamHC(void *buffer, size_t size);
+
+/*-************************************
+*  Deprecated Functions
+**************************************/
+/* see lz4.h LZ4_DISABLE_DEPRECATE_WARNINGS to turn off deprecation warnings */
+
+/* deprecated compression functions */
+LZ4_DEPRECATED("use LZ4_compress_HC() instead")
+LZ4LIB_API int LZ4_compressHC(const char *source, char *dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead")
+LZ4LIB_API int LZ4_compressHC_limitedOutput(const char *source, char *dest,
+					    int inputSize, int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead")
+LZ4LIB_API int LZ4_compressHC2(const char *source, char *dest, int inputSize,
+			       int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC() instead")
+LZ4LIB_API int LZ4_compressHC2_limitedOutput(const char *source, char *dest,
+					     int inputSize, int maxOutputSize,
+					     int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead")
+LZ4LIB_API int LZ4_compressHC_withStateHC(void *state, const char *source,
+					  char *dest, int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead")
+LZ4LIB_API
+int LZ4_compressHC_limitedOutput_withStateHC(void *state, const char *source,
+					     char *dest, int inputSize,
+					     int maxOutputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead")
+LZ4LIB_API int LZ4_compressHC2_withStateHC(void *state, const char *source,
+					   char *dest, int inputSize,
+					   int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_extStateHC() instead")
+LZ4LIB_API
+int LZ4_compressHC2_limitedOutput_withStateHC(void *state, const char *source,
+					      char *dest, int inputSize,
+					      int maxOutputSize,
+					      int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead")
+LZ4LIB_API int LZ4_compressHC_continue(LZ4_streamHC_t *LZ4_streamHCPtr,
+				       const char *source, char *dest,
+				       int inputSize);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead")
+LZ4LIB_API int
+LZ4_compressHC_limitedOutput_continue(LZ4_streamHC_t *LZ4_streamHCPtr,
+				      const char *source, char *dest,
+				      int inputSize, int maxOutputSize);
+
+/* Obsolete streaming functions; degraded functionality; do not use!
+ *
+ * In order to perform streaming compression, these functions depended on data
+ * that is no longer tracked in the state. They have been preserved as well as
+ * possible: using them will still produce a correct output. However, use of
+ * LZ4_slideInputBufferHC() will truncate the history of the stream, rather
+ * than preserve a window-sized chunk of history.
+ */
+#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead")
+LZ4LIB_API void *LZ4_createHC(const char *inputBuffer);
+LZ4_DEPRECATED("use LZ4_freeStreamHC() instead")
+LZ4LIB_API int LZ4_freeHC(void *LZ4HC_Data);
+#endif
+LZ4_DEPRECATED("use LZ4_saveDictHC() instead")
+LZ4LIB_API char *LZ4_slideInputBufferHC(void *LZ4HC_Data);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead")
+LZ4LIB_API int LZ4_compressHC2_continue(void *LZ4HC_Data, const char *source,
+					char *dest, int inputSize,
+					int compressionLevel);
+LZ4_DEPRECATED("use LZ4_compress_HC_continue() instead")
+LZ4LIB_API int LZ4_compressHC2_limitedOutput_continue(void *LZ4HC_Data,
+						      const char *source,
+						      char *dest, int inputSize,
+						      int maxOutputSize,
+						      int compressionLevel);
+LZ4_DEPRECATED("use LZ4_createStreamHC() instead")
+LZ4LIB_API int LZ4_sizeofStreamStateHC(void);
+LZ4_DEPRECATED("use LZ4_initStreamHC() instead")
+LZ4LIB_API int LZ4_resetStreamStateHC(void *state, char *inputBuffer);
+
+/* LZ4_resetStreamHC() is now replaced by LZ4_initStreamHC().
+ * The intention is to emphasize the difference with LZ4_resetStreamHC_fast(),
+ * which is now the recommended function to start a new stream of blocks,
+ * but cannot be used to initialize a memory segment containing arbitrary garbage data.
+ *
+ * It is recommended to switch to LZ4_initStreamHC().
+ * LZ4_resetStreamHC() will generate deprecation warnings in a future version.
+ */
+LZ4LIB_API void LZ4_resetStreamHC(LZ4_streamHC_t *streamHCPtr,
+				  int compressionLevel);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* LZ4_HC_H_19834876238432 */
+
+/*-**************************************************
+ * !!!!!     STATIC LINKING ONLY     !!!!!
+ * Following definitions are considered experimental.
+ * They should not be linked from DLL,
+ * as there is no guarantee of API stability yet.
+ * Prototypes will be promoted to "stable" status
+ * after successful usage in real-life scenarios.
+ ***************************************************/
+#ifdef LZ4_HC_STATIC_LINKING_ONLY /* protection macro */
+#ifndef LZ4_HC_SLO_098092834
+#define LZ4_HC_SLO_098092834
+
+#define LZ4_STATIC_LINKING_ONLY /* LZ4LIB_STATIC_API */
+#include "lz4.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/*! LZ4_setCompressionLevel() : v1.8.0+ (experimental)
+ *  It's possible to change compression level
+ *  between successive invocations of LZ4_compress_HC_continue*()
+ *  for dynamic adaptation.
+ */
+LZ4LIB_STATIC_API void LZ4_setCompressionLevel(LZ4_streamHC_t *LZ4_streamHCPtr,
+					       int compressionLevel);
+
+/*! LZ4_favorDecompressionSpeed() : v1.8.2+ (experimental)
+ *  Opt. Parser will favor decompression speed over compression ratio.
+ *  Only applicable to levels >= LZ4HC_CLEVEL_OPT_MIN.
+ */
+LZ4LIB_STATIC_API void
+LZ4_favorDecompressionSpeed(LZ4_streamHC_t *LZ4_streamHCPtr, int favor);
+
+/*! LZ4_resetStreamHC_fast() : v1.9.0+
+ *  When an LZ4_streamHC_t is known to be in a internally coherent state,
+ *  it can often be prepared for a new compression with almost no work, only
+ *  sometimes falling back to the full, expensive reset that is always required
+ *  when the stream is in an indeterminate state (i.e., the reset performed by
+ *  LZ4_resetStreamHC()).
+ *
+ *  LZ4_streamHCs are guaranteed to be in a valid state when:
+ *  - returned from LZ4_createStreamHC()
+ *  - reset by LZ4_resetStreamHC()
+ *  - memset(stream, 0, sizeof(LZ4_streamHC_t))
+ *  - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast()
+ *  - the stream was in a valid state and was then used in any compression call
+ *    that returned success
+ *  - the stream was in an indeterminate state and was used in a compression
+ *    call that fully reset the state (LZ4_compress_HC_extStateHC()) and that
+ *    returned success
+ *
+ *  Note:
+ *  A stream that was last used in a compression call that returned an error
+ *  may be passed to this function. However, it will be fully reset, which will
+ *  clear any existing history and settings from the context.
+ */
+LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t *LZ4_streamHCPtr,
+					      int compressionLevel);
+
+/*! LZ4_compress_HC_extStateHC_fastReset() :
+ *  A variant of LZ4_compress_HC_extStateHC().
+ *
+ *  Using this variant avoids an expensive initialization step. It is only safe
+ *  to call if the state buffer is known to be correctly initialized already
+ *  (see above comment on LZ4_resetStreamHC_fast() for a definition of
+ *  "correctly initialized"). From a high level, the difference is that this
+ *  function initializes the provided state with a call to
+ *  LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a
+ *  call to LZ4_resetStreamHC().
+ */
+LZ4LIB_STATIC_API int
+LZ4_compress_HC_extStateHC_fastReset(void *state, const char *src, char *dst,
+				     int srcSize, int dstCapacity,
+				     int compressionLevel);
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* LZ4_HC_SLO_098092834 */
+#endif /* LZ4_HC_STATIC_LINKING_ONLY */
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -1,769 +0,0 @@
-/*
- * LZ4 HC - High Compression Mode of LZ4
- * Copyright (C) 2011-2015, Yann Collet.
- *
- * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *	* Redistributions of source code must retain the above copyright
- *	  notice, this list of conditions and the following disclaimer.
- *	* Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following disclaimer
- * in the documentation and/or other materials provided with the
- * distribution.
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- * You can contact the author at :
- *	- LZ4 homepage : http://www.lz4.org
- *	- LZ4 source repository : https://github.com/lz4/lz4
- *
- *	Changed for kernel usage by:
- *	Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
- */
-
-/*-************************************
- *	Dependencies
- **************************************/
-#include <linux/lz4.h>
-#include "lz4defs.h"
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/string.h> /* memset */
-
-/* *************************************
- *	Local Constants and types
- ***************************************/
-
-#define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH)
-
-#define HASH_FUNCTION(i)	(((i) * 2654435761U) \
-	>> ((MINMATCH*8) - LZ4HC_HASH_LOG))
-#define DELTANEXTU16(p)	chainTable[(U16)(p)] /* faster */
-
-static U32 LZ4HC_hashPtr(const void *ptr)
-{
-	return HASH_FUNCTION(LZ4_read32(ptr));
-}
-
-/**************************************
- *	HC Compression
- **************************************/
-static void LZ4HC_init(LZ4HC_CCtx_internal *hc4, const BYTE *start)
-{
-	memset((void *)hc4->hashTable, 0, sizeof(hc4->hashTable));
-	memset(hc4->chainTable, 0xFF, sizeof(hc4->chainTable));
-	hc4->nextToUpdate = 64 * KB;
-	hc4->base = start - 64 * KB;
-	hc4->end = start;
-	hc4->dictBase = start - 64 * KB;
-	hc4->dictLimit = 64 * KB;
-	hc4->lowLimit = 64 * KB;
-}
-
-/* Update chains up to ip (excluded) */
-static FORCE_INLINE void LZ4HC_Insert(LZ4HC_CCtx_internal *hc4,
-	const BYTE *ip)
-{
-	U16 * const chainTable = hc4->chainTable;
-	U32 * const hashTable	= hc4->hashTable;
-	const BYTE * const base = hc4->base;
-	U32 const target = (U32)(ip - base);
-	U32 idx = hc4->nextToUpdate;
-
-	while (idx < target) {
-		U32 const h = LZ4HC_hashPtr(base + idx);
-		size_t delta = idx - hashTable[h];
-
-		if (delta > MAX_DISTANCE)
-			delta = MAX_DISTANCE;
-
-		DELTANEXTU16(idx) = (U16)delta;
-
-		hashTable[h] = idx;
-		idx++;
-	}
-
-	hc4->nextToUpdate = target;
-}
-
-static FORCE_INLINE int LZ4HC_InsertAndFindBestMatch(
-	LZ4HC_CCtx_internal *hc4, /* Index table will be updated */
-	const BYTE *ip,
-	const BYTE * const iLimit,
-	const BYTE **matchpos,
-	const int maxNbAttempts)
-{
-	U16 * const chainTable = hc4->chainTable;
-	U32 * const HashTable = hc4->hashTable;
-	const BYTE * const base = hc4->base;
-	const BYTE * const dictBase = hc4->dictBase;
-	const U32 dictLimit = hc4->dictLimit;
-	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
-		? hc4->lowLimit
-		: (U32)(ip - base) - (64 * KB - 1);
-	U32 matchIndex;
-	int nbAttempts = maxNbAttempts;
-	size_t ml = 0;
-
-	/* HC4 match finder */
-	LZ4HC_Insert(hc4, ip);
-	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
-
-	while ((matchIndex >= lowLimit)
-		&& (nbAttempts)) {
-		nbAttempts--;
-		if (matchIndex >= dictLimit) {
-			const BYTE * const match = base + matchIndex;
-
-			if (*(match + ml) == *(ip + ml)
-				&& (LZ4_read32(match) == LZ4_read32(ip))) {
-				size_t const mlt = LZ4_count(ip + MINMATCH,
-					match + MINMATCH, iLimit) + MINMATCH;
-
-				if (mlt > ml) {
-					ml = mlt;
-					*matchpos = match;
-				}
-			}
-		} else {
-			const BYTE * const match = dictBase + matchIndex;
-
-			if (LZ4_read32(match) == LZ4_read32(ip)) {
-				size_t mlt;
-				const BYTE *vLimit = ip
-					+ (dictLimit - matchIndex);
-
-				if (vLimit > iLimit)
-					vLimit = iLimit;
-				mlt = LZ4_count(ip + MINMATCH,
-					match + MINMATCH, vLimit) + MINMATCH;
-				if ((ip + mlt == vLimit)
-					&& (vLimit < iLimit))
-					mlt += LZ4_count(ip + mlt,
-						base + dictLimit,
-						iLimit);
-				if (mlt > ml) {
-					/* virtual matchpos */
-					ml = mlt;
-					*matchpos = base + matchIndex;
-				}
-			}
-		}
-		matchIndex -= DELTANEXTU16(matchIndex);
-	}
-
-	return (int)ml;
-}
-
-static FORCE_INLINE int LZ4HC_InsertAndGetWiderMatch(
-	LZ4HC_CCtx_internal *hc4,
-	const BYTE * const ip,
-	const BYTE * const iLowLimit,
-	const BYTE * const iHighLimit,
-	int longest,
-	const BYTE **matchpos,
-	const BYTE **startpos,
-	const int maxNbAttempts)
-{
-	U16 * const chainTable = hc4->chainTable;
-	U32 * const HashTable = hc4->hashTable;
-	const BYTE * const base = hc4->base;
-	const U32 dictLimit = hc4->dictLimit;
-	const BYTE * const lowPrefixPtr = base + dictLimit;
-	const U32 lowLimit = (hc4->lowLimit + 64 * KB > (U32)(ip - base))
-		? hc4->lowLimit
-		: (U32)(ip - base) - (64 * KB - 1);
-	const BYTE * const dictBase = hc4->dictBase;
-	U32 matchIndex;
-	int nbAttempts = maxNbAttempts;
-	int delta = (int)(ip - iLowLimit);
-
-	/* First Match */
-	LZ4HC_Insert(hc4, ip);
-	matchIndex = HashTable[LZ4HC_hashPtr(ip)];
-
-	while ((matchIndex >= lowLimit)
-		&& (nbAttempts)) {
-		nbAttempts--;
-		if (matchIndex >= dictLimit) {
-			const BYTE *matchPtr = base + matchIndex;
-
-			if (*(iLowLimit + longest)
-				== *(matchPtr - delta + longest)) {
-				if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
-					int mlt = MINMATCH + LZ4_count(
-						ip + MINMATCH,
-						matchPtr + MINMATCH,
-						iHighLimit);
-					int back = 0;
-
-					while ((ip + back > iLowLimit)
-						&& (matchPtr + back > lowPrefixPtr)
-						&& (ip[back - 1] == matchPtr[back - 1]))
-						back--;
-
-					mlt -= back;
-
-					if (mlt > longest) {
-						longest = (int)mlt;
-						*matchpos = matchPtr + back;
-						*startpos = ip + back;
-					}
-				}
-			}
-		} else {
-			const BYTE * const matchPtr = dictBase + matchIndex;
-
-			if (LZ4_read32(matchPtr) == LZ4_read32(ip)) {
-				size_t mlt;
-				int back = 0;
-				const BYTE *vLimit = ip + (dictLimit - matchIndex);
-
-				if (vLimit > iHighLimit)
-					vLimit = iHighLimit;
-
-				mlt = LZ4_count(ip + MINMATCH,
-					matchPtr + MINMATCH, vLimit) + MINMATCH;
-
-				if ((ip + mlt == vLimit) && (vLimit < iHighLimit))
-					mlt += LZ4_count(ip + mlt, base + dictLimit,
-						iHighLimit);
-				while ((ip + back > iLowLimit)
-					&& (matchIndex + back > lowLimit)
-					&& (ip[back - 1] == matchPtr[back - 1]))
-					back--;
-
-				mlt -= back;
-
-				if ((int)mlt > longest) {
-					longest = (int)mlt;
-					*matchpos = base + matchIndex + back;
-					*startpos = ip + back;
-				}
-			}
-		}
-
-		matchIndex -= DELTANEXTU16(matchIndex);
-	}
-
-	return longest;
-}
-
-static FORCE_INLINE int LZ4HC_encodeSequence(
-	const BYTE **ip,
-	BYTE **op,
-	const BYTE **anchor,
-	int matchLength,
-	const BYTE * const match,
-	limitedOutput_directive limitedOutputBuffer,
-	BYTE *oend)
-{
-	int length;
-	BYTE *token;
-
-	/* Encode Literal length */
-	length = (int)(*ip - *anchor);
-	token = (*op)++;
-
-	if ((limitedOutputBuffer)
-		&& ((*op + (length>>8)
-			+ length + (2 + 1 + LASTLITERALS)) > oend)) {
-		/* Check output limit */
-		return 1;
-	}
-	if (length >= (int)RUN_MASK) {
-		int len;
-
-		*token = (RUN_MASK<<ML_BITS);
-		len = length - RUN_MASK;
-		for (; len > 254 ; len -= 255)
-			*(*op)++ = 255;
-		*(*op)++ = (BYTE)len;
-	} else
-		*token = (BYTE)(length<<ML_BITS);
-
-	/* Copy Literals */
-	LZ4_wildCopy(*op, *anchor, (*op) + length);
-	*op += length;
-
-	/* Encode Offset */
-	LZ4_writeLE16(*op, (U16)(*ip - match));
-	*op += 2;
-
-	/* Encode MatchLength */
-	length = (int)(matchLength - MINMATCH);
-
-	if ((limitedOutputBuffer)
-		&& (*op + (length>>8)
-			+ (1 + LASTLITERALS) > oend)) {
-		/* Check output limit */
-		return 1;
-	}
-
-	if (length >= (int)ML_MASK) {
-		*token += ML_MASK;
-		length -= ML_MASK;
-
-		for (; length > 509 ; length -= 510) {
-			*(*op)++ = 255;
-			*(*op)++ = 255;
-		}
-
-		if (length > 254) {
-			length -= 255;
-			*(*op)++ = 255;
-		}
-
-		*(*op)++ = (BYTE)length;
-	} else
-		*token += (BYTE)(length);
-
-	/* Prepare next loop */
-	*ip += matchLength;
-	*anchor = *ip;
-
-	return 0;
-}
-
-static int LZ4HC_compress_generic(
-	LZ4HC_CCtx_internal *const ctx,
-	const char * const source,
-	char * const dest,
-	int const inputSize,
-	int const maxOutputSize,
-	int compressionLevel,
-	limitedOutput_directive limit
-	)
-{
-	const BYTE *ip = (const BYTE *) source;
-	const BYTE *anchor = ip;
-	const BYTE * const iend = ip + inputSize;
-	const BYTE * const mflimit = iend - MFLIMIT;
-	const BYTE * const matchlimit = (iend - LASTLITERALS);
-
-	BYTE *op = (BYTE *) dest;
-	BYTE * const oend = op + maxOutputSize;
-
-	unsigned int maxNbAttempts;
-	int ml, ml2, ml3, ml0;
-	const BYTE *ref = NULL;
-	const BYTE *start2 = NULL;
-	const BYTE *ref2 = NULL;
-	const BYTE *start3 = NULL;
-	const BYTE *ref3 = NULL;
-	const BYTE *start0;
-	const BYTE *ref0;
-
-	/* init */
-	if (compressionLevel > LZ4HC_MAX_CLEVEL)
-		compressionLevel = LZ4HC_MAX_CLEVEL;
-	if (compressionLevel < 1)
-		compressionLevel = LZ4HC_DEFAULT_CLEVEL;
-	maxNbAttempts = 1 << (compressionLevel - 1);
-	ctx->end += inputSize;
-
-	ip++;
-
-	/* Main Loop */
-	while (ip < mflimit) {
-		ml = LZ4HC_InsertAndFindBestMatch(ctx, ip,
-			matchlimit, (&ref), maxNbAttempts);
-		if (!ml) {
-			ip++;
-			continue;
-		}
-
-		/* saved, in case we would skip too much */
-		start0 = ip;
-		ref0 = ref;
-		ml0 = ml;
-
-_Search2:
-		if (ip + ml < mflimit)
-			ml2 = LZ4HC_InsertAndGetWiderMatch(ctx,
-				ip + ml - 2, ip + 0,
-				matchlimit, ml, &ref2,
-				&start2, maxNbAttempts);
-		else
-			ml2 = ml;
-
-		if (ml2 == ml) {
-			/* No better match */
-			if (LZ4HC_encodeSequence(&ip, &op,
-				&anchor, ml, ref, limit, oend))
-				return 0;
-			continue;
-		}
-
-		if (start0 < ip) {
-			if (start2 < ip + ml0) {
-				/* empirical */
-				ip = start0;
-				ref = ref0;
-				ml = ml0;
-			}
-		}
-
-		/* Here, start0 == ip */
-		if ((start2 - ip) < 3) {
-			/* First Match too small : removed */
-			ml = ml2;
-			ip = start2;
-			ref = ref2;
-			goto _Search2;
-		}
-
-_Search3:
-		/*
-		* Currently we have :
-		* ml2 > ml1, and
-		* ip1 + 3 <= ip2 (usually < ip1 + ml1)
-		*/
-		if ((start2 - ip) < OPTIMAL_ML) {
-			int correction;
-			int new_ml = ml;
-
-			if (new_ml > OPTIMAL_ML)
-				new_ml = OPTIMAL_ML;
-			if (ip + new_ml > start2 + ml2 - MINMATCH)
-				new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
-
-			correction = new_ml - (int)(start2 - ip);
-
-			if (correction > 0) {
-				start2 += correction;
-				ref2 += correction;
-				ml2 -= correction;
-			}
-		}
-		/*
-		 * Now, we have start2 = ip + new_ml,
-		 * with new_ml = min(ml, OPTIMAL_ML = 18)
-		 */
-
-		if (start2 + ml2 < mflimit)
-			ml3 = LZ4HC_InsertAndGetWiderMatch(ctx,
-				start2 + ml2 - 3, start2,
-				matchlimit, ml2, &ref3, &start3,
-				maxNbAttempts);
-		else
-			ml3 = ml2;
-
-		if (ml3 == ml2) {
-			/* No better match : 2 sequences to encode */
-			/* ip & ref are known; Now for ml */
-			if (start2 < ip + ml)
-				ml = (int)(start2 - ip);
-			/* Now, encode 2 sequences */
-			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
-				ml, ref, limit, oend))
-				return 0;
-			ip = start2;
-			if (LZ4HC_encodeSequence(&ip, &op, &anchor,
-				ml2, ref2, limit, oend))
-				return 0;
-			continue;
-		}
-
-		if (start3 < ip + ml + 3) {
-			/* Not enough space for match 2 : remove it */
-			if (start3 >= (ip + ml)) {
-				/* can write Seq1 immediately
-				 * ==> Seq2 is removed,
-				 * so Seq3 becomes Seq1
-				 */
-				if (start2 < ip + ml) {
-					int correction = (int)(ip + ml - start2);
-
-					start2 += correction;
-					ref2 += correction;
-					ml2 -= correction;
-					if (ml2 < MINMATCH) {
-						start2 = start3;
-						ref2 = ref3;
-						ml2 = ml3;
-					}
-				}
-
-				if (LZ4HC_encodeSequence(&ip, &op, &anchor,
-					ml, ref, limit, oend))
-					return 0;
-				ip = start3;
-				ref = ref3;
-				ml = ml3;
-
-				start0 = start2;
-				ref0 = ref2;
-				ml0 = ml2;
-				goto _Search2;
-			}
-
-			start2 = start3;
-			ref2 = ref3;
-			ml2 = ml3;
-			goto _Search3;
-		}
-
-		/*
-		* OK, now we have 3 ascending matches;
-		* let's write at least the first one
-		* ip & ref are known; Now for ml
-		*/
-		if (start2 < ip + ml) {
-			if ((start2 - ip) < (int)ML_MASK) {
-				int correction;
-
-				if (ml > OPTIMAL_ML)
-					ml = OPTIMAL_ML;
-				if (ip + ml > start2 + ml2 - MINMATCH)
-					ml = (int)(start2 - ip) + ml2 - MINMATCH;
-				correction = ml - (int)(start2 - ip);
-				if (correction > 0) {
-					start2 += correction;
-					ref2 += correction;
-					ml2 -= correction;
-				}
-			} else
-				ml = (int)(start2 - ip);
-		}
-		if (LZ4HC_encodeSequence(&ip, &op, &anchor, ml,
-			ref, limit, oend))
-			return 0;
-
-		ip = start2;
-		ref = ref2;
-		ml = ml2;
-
-		start2 = start3;
-		ref2 = ref3;
-		ml2 = ml3;
-
-		goto _Search3;
-	}
-
-	/* Encode Last Literals */
-	{
-		int lastRun = (int)(iend - anchor);
-
-		if ((limit)
-			&& (((char *)op - dest) + lastRun + 1
-				+ ((lastRun + 255 - RUN_MASK)/255)
-					> (U32)maxOutputSize)) {
-			/* Check output limit */
-			return 0;
-		}
-		if (lastRun >= (int)RUN_MASK) {
-			*op++ = (RUN_MASK<<ML_BITS);
-			lastRun -= RUN_MASK;
-			for (; lastRun > 254 ; lastRun -= 255)
-				*op++ = 255;
-			*op++ = (BYTE) lastRun;
-		} else
-			*op++ = (BYTE)(lastRun<<ML_BITS);
-		memcpy(op, anchor, iend - anchor);
-		op += iend - anchor;
-	}
-
-	/* End */
-	return (int) (((char *)op) - dest);
-}
-
-static int LZ4_compress_HC_extStateHC(
-	void *state,
-	const char *src,
-	char *dst,
-	int srcSize,
-	int maxDstSize,
-	int compressionLevel)
-{
-	LZ4HC_CCtx_internal *ctx = &((LZ4_streamHC_t *)state)->internal_donotuse;
-
-	if (((size_t)(state)&(sizeof(void *) - 1)) != 0) {
-		/* Error : state is not aligned
-		 * for pointers (32 or 64 bits)
-		 */
-		return 0;
-	}
-
-	LZ4HC_init(ctx, (const BYTE *)src);
-
-	if (maxDstSize < LZ4_compressBound(srcSize))
-		return LZ4HC_compress_generic(ctx, src, dst,
-			srcSize, maxDstSize, compressionLevel, limitedOutput);
-	else
-		return LZ4HC_compress_generic(ctx, src, dst,
-			srcSize, maxDstSize, compressionLevel, noLimit);
-}
-
-int LZ4_compress_HC(const char *src, char *dst, int srcSize,
-	int maxDstSize, int compressionLevel, void *wrkmem)
-{
-	return LZ4_compress_HC_extStateHC(wrkmem, src, dst,
-		srcSize, maxDstSize, compressionLevel);
-}
-EXPORT_SYMBOL(LZ4_compress_HC);
-
-/**************************************
- *	Streaming Functions
- **************************************/
-void LZ4_resetStreamHC(LZ4_streamHC_t *LZ4_streamHCPtr, int compressionLevel)
-{
-	LZ4_streamHCPtr->internal_donotuse.base = NULL;
-	LZ4_streamHCPtr->internal_donotuse.compressionLevel = (unsigned int)compressionLevel;
-}
-
-int LZ4_loadDictHC(LZ4_streamHC_t *LZ4_streamHCPtr,
-	const char *dictionary,
-	int dictSize)
-{
-	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
-
-	if (dictSize > 64 * KB) {
-		dictionary += dictSize - 64 * KB;
-		dictSize = 64 * KB;
-	}
-	LZ4HC_init(ctxPtr, (const BYTE *)dictionary);
-	if (dictSize >= 4)
-		LZ4HC_Insert(ctxPtr, (const BYTE *)dictionary + (dictSize - 3));
-	ctxPtr->end = (const BYTE *)dictionary + dictSize;
-	return dictSize;
-}
-EXPORT_SYMBOL(LZ4_loadDictHC);
-
-/* compression */
-
-static void LZ4HC_setExternalDict(
-	LZ4HC_CCtx_internal *ctxPtr,
-	const BYTE *newBlock)
-{
-	if (ctxPtr->end >= ctxPtr->base + 4) {
-		/* Referencing remaining dictionary content */
-		LZ4HC_Insert(ctxPtr, ctxPtr->end - 3);
-	}
-
-	/*
-	 * Only one memory segment for extDict,
-	 * so any previous extDict is lost at this stage
-	 */
-	ctxPtr->lowLimit	= ctxPtr->dictLimit;
-	ctxPtr->dictLimit = (U32)(ctxPtr->end - ctxPtr->base);
-	ctxPtr->dictBase	= ctxPtr->base;
-	ctxPtr->base = newBlock - ctxPtr->dictLimit;
-	ctxPtr->end	= newBlock;
-	/* match referencing will resume from there */
-	ctxPtr->nextToUpdate = ctxPtr->dictLimit;
-}
-EXPORT_SYMBOL(LZ4HC_setExternalDict);
-
-static int LZ4_compressHC_continue_generic(
-	LZ4_streamHC_t *LZ4_streamHCPtr,
-	const char *source,
-	char *dest,
-	int inputSize,
-	int maxOutputSize,
-	limitedOutput_directive limit)
-{
-	LZ4HC_CCtx_internal *ctxPtr = &LZ4_streamHCPtr->internal_donotuse;
-
-	/* auto - init if forgotten */
-	if (ctxPtr->base == NULL)
-		LZ4HC_init(ctxPtr, (const BYTE *) source);
-
-	/* Check overflow */
-	if ((size_t)(ctxPtr->end - ctxPtr->base) > 2 * GB) {
-		size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->base)
-			- ctxPtr->dictLimit;
-		if (dictSize > 64 * KB)
-			dictSize = 64 * KB;
-		LZ4_loadDictHC(LZ4_streamHCPtr,
-			(const char *)(ctxPtr->end) - dictSize, (int)dictSize);
-	}
-
-	/* Check if blocks follow each other */
-	if ((const BYTE *)source != ctxPtr->end)
-		LZ4HC_setExternalDict(ctxPtr, (const BYTE *)source);
-
-	/* Check overlapping input/dictionary space */
-	{
-		const BYTE *sourceEnd = (const BYTE *) source + inputSize;
-		const BYTE * const dictBegin = ctxPtr->dictBase + ctxPtr->lowLimit;
-		const BYTE * const dictEnd = ctxPtr->dictBase + ctxPtr->dictLimit;
-
-		if ((sourceEnd > dictBegin)
-			&& ((const BYTE *)source < dictEnd)) {
-			if (sourceEnd > dictEnd)
-				sourceEnd = dictEnd;
-			ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase);
-
-			if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4)
-				ctxPtr->lowLimit = ctxPtr->dictLimit;
-		}
-	}
-
-	return LZ4HC_compress_generic(ctxPtr, source, dest,
-		inputSize, maxOutputSize, ctxPtr->compressionLevel, limit);
-}
-
-int LZ4_compress_HC_continue(
-	LZ4_streamHC_t *LZ4_streamHCPtr,
-	const char *source,
-	char *dest,
-	int inputSize,
-	int maxOutputSize)
-{
-	if (maxOutputSize < LZ4_compressBound(inputSize))
-		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
-			source, dest, inputSize, maxOutputSize, limitedOutput);
-	else
-		return LZ4_compressHC_continue_generic(LZ4_streamHCPtr,
-			source, dest, inputSize, maxOutputSize, noLimit);
-}
-EXPORT_SYMBOL(LZ4_compress_HC_continue);
-
-/* dictionary saving */
-
-int LZ4_saveDictHC(
-	LZ4_streamHC_t *LZ4_streamHCPtr,
-	char *safeBuffer,
-	int dictSize)
-{
-	LZ4HC_CCtx_internal *const streamPtr = &LZ4_streamHCPtr->internal_donotuse;
-	int const prefixSize = (int)(streamPtr->end
-		- (streamPtr->base + streamPtr->dictLimit));
-
-	if (dictSize > 64 * KB)
-		dictSize = 64 * KB;
-	if (dictSize < 4)
-		dictSize = 0;
-	if (dictSize > prefixSize)
-		dictSize = prefixSize;
-
-	memmove(safeBuffer, streamPtr->end - dictSize, dictSize);
-
-	{
-		U32 const endIndex = (U32)(streamPtr->end - streamPtr->base);
-
-		streamPtr->end = (const BYTE *)safeBuffer + dictSize;
-		streamPtr->base = streamPtr->end - endIndex;
-		streamPtr->dictLimit = endIndex - dictSize;
-		streamPtr->lowLimit = endIndex - dictSize;
-
-		if (streamPtr->nextToUpdate < streamPtr->dictLimit)
-			streamPtr->nextToUpdate = streamPtr->dictLimit;
-	}
-	return dictSize;
-}
-EXPORT_SYMBOL(LZ4_saveDictHC);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("LZ4 HC compressor");
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -23,9 +23,9 @@ KCOV_INSTRUMENT_vmstat.o := n

 mmu-y			:= nommu.o
 mmu-$(CONFIG_MMU)	:= gup.o highmem.o memory.o mincore.o \
-			   mlock.o mmap.o mprotect.o mremap.o msync.o \
-			   page_vma_mapped.o pagewalk.o pgtable-generic.o \
-			   rmap.o vmalloc.o
+			   mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \
+			   msync.o page_vma_mapped.o pagewalk.o \
+			   pgtable-generic.o rmap.o vmalloc.o


 ifdef CONFIG_CROSS_MEMORY_ATTACH
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -199,271 +199,6 @@ static void check_sync_rss_stat(struct task_struct *task)

 #endif /* SPLIT_RSS_COUNTING */

-#ifdef HAVE_GENERIC_MMU_GATHER
-
-static bool tlb_next_batch(struct mmu_gather *tlb)
-{
-	struct mmu_gather_batch *batch;
-
-	batch = tlb->active;
-	if (batch->next) {
-		tlb->active = batch->next;
-		return true;
-	}
-
-	if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
-		return false;
-
-	batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
-	if (!batch)
-		return false;
-
-	tlb->batch_count++;
-	batch->next = NULL;
-	batch->nr   = 0;
-	batch->max  = MAX_GATHER_BATCH;
-
-	tlb->active->next = batch;
-	tlb->active = batch;
-
-	return true;
-}
-
-void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
-				unsigned long start, unsigned long end)
-{
-	tlb->mm = mm;
-
-	/* Is it from 0 to ~0? */
-	tlb->fullmm     = !(start | (end+1));
-	tlb->need_flush_all = 0;
-	tlb->local.next = NULL;
-	tlb->local.nr   = 0;
-	tlb->local.max  = ARRAY_SIZE(tlb->__pages);
-	tlb->active     = &tlb->local;
-	tlb->batch_count = 0;
-
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-	tlb->batch = NULL;
-#endif
-	tlb->page_size = 0;
-
-	__tlb_reset_range(tlb);
-}
-
-static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
-	if (!tlb->end)
-		return;
-
-	tlb_flush(tlb);
-	mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
-	__tlb_reset_range(tlb);
-}
-
-static void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
-	struct mmu_gather_batch *batch;
-
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-	tlb_table_flush(tlb);
-#endif
-	for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
-		free_pages_and_swap_cache(batch->pages, batch->nr);
-		batch->nr = 0;
-	}
-	tlb->active = &tlb->local;
-}
-
-void tlb_flush_mmu(struct mmu_gather *tlb)
-{
-	tlb_flush_mmu_tlbonly(tlb);
-	tlb_flush_mmu_free(tlb);
-}
-
-/* tlb_finish_mmu
- *	Called at the end of the shootdown operation to free up any resources
- *	that were required.
- */
-void arch_tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end, bool force)
-{
-	struct mmu_gather_batch *batch, *next;
-
-	if (force)
-		__tlb_adjust_range(tlb, start, end - start);
-
-	tlb_flush_mmu(tlb);
-
-	/* keep the page table cache within bounds */
-	check_pgt_cache();
-
-	for (batch = tlb->local.next; batch; batch = next) {
-		next = batch->next;
-		free_pages((unsigned long)batch, 0);
-	}
-	tlb->local.next = NULL;
-}
-
-/* __tlb_remove_page
- *	Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
- *	handling the additional races in SMP caused by other CPUs caching valid
- *	mappings in their TLBs. Returns the number of free page slots left.
- *	When out of page slots we must call tlb_flush_mmu().
- *returns true if the caller should flush.
- */
-bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size)
-{
-	struct mmu_gather_batch *batch;
-
-	VM_BUG_ON(!tlb->end);
-	VM_WARN_ON(tlb->page_size != page_size);
-
-	batch = tlb->active;
-	/*
-	 * Add the page and check if we are full. If so
-	 * force a flush.
-	 */
-	batch->pages[batch->nr++] = page;
-	if (batch->nr == batch->max) {
-		if (!tlb_next_batch(tlb))
-			return true;
-		batch = tlb->active;
-	}
-	VM_BUG_ON_PAGE(batch->nr > batch->max, page);
-
-	return false;
-}
-
-void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address,
-			 unsigned long size)
-{
-	if (tlb->page_size != 0 && tlb->page_size != PMD_SIZE)
-		tlb_flush_mmu(tlb);
-
-	tlb->page_size = PMD_SIZE;
-	tlb->start = min(tlb->start, address);
-	tlb->end = max(tlb->end, address + size);
-}
-#endif /* HAVE_GENERIC_MMU_GATHER */
-
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-
-/*
- * See the comment near struct mmu_table_batch.
- */
-
-/*
- * If we want tlb_remove_table() to imply TLB invalidates.
- */
-static inline void tlb_table_invalidate(struct mmu_gather *tlb)
-{
-#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE
-	/*
-	 * Invalidate page-table caches used by hardware walkers. Then we still
-	 * need to RCU-sched wait while freeing the pages because software
-	 * walkers can still be in-flight.
-	 */
-	tlb_flush_mmu_tlbonly(tlb);
-#endif
-}
-
-static void tlb_remove_table_smp_sync(void *arg)
-{
-	/* Simply deliver the interrupt */
-}
-
-void tlb_remove_table_sync_one(void)
-{
-	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
-}
-
-static void tlb_remove_table_one(void *table)
-{
-	/*
-	 * This isn't an RCU grace period and hence the page-tables cannot be
-	 * assumed to be actually RCU-freed.
-	 *
-	 * It is however sufficient for software page-table walkers that rely on
-	 * IRQ disabling. See the comment near struct mmu_table_batch.
-	 */
-	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
-	__tlb_remove_table(table);
-}
-
-static void tlb_remove_table_rcu(struct rcu_head *head)
-{
-	struct mmu_table_batch *batch;
-	int i;
-
-	batch = container_of(head, struct mmu_table_batch, rcu);
-
-	for (i = 0; i < batch->nr; i++)
-		__tlb_remove_table(batch->tables[i]);
-
-	free_page((unsigned long)batch);
-}
-
-void tlb_table_flush(struct mmu_gather *tlb)
-{
-	struct mmu_table_batch **batch = &tlb->batch;
-
-	if (*batch) {
-		tlb_table_invalidate(tlb);
-		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
-		*batch = NULL;
-	}
-}
-
-void tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
-	struct mmu_table_batch **batch = &tlb->batch;
-
-	if (*batch == NULL) {
-		*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
-		if (*batch == NULL) {
-			tlb_table_invalidate(tlb);
-			tlb_remove_table_one(table);
-			return;
-		}
-		(*batch)->nr = 0;
-	}
-
-	(*batch)->tables[(*batch)->nr++] = table;
-	if ((*batch)->nr == MAX_TABLE_BATCH)
-		tlb_table_flush(tlb);
-}
-
-#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
-
-/* tlb_gather_mmu
- *	Called to initialize an (on-stack) mmu_gather structure for page-table
- *	tear-down from @mm. The @fullmm argument is used when @mm is without
- *	users and we're going to destroy the full address space (exit/execve).
- */
-void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
-			unsigned long start, unsigned long end)
-{
-	arch_tlb_gather_mmu(tlb, mm, start, end);
-	inc_tlb_flush_pending(tlb->mm);
-}
-
-void tlb_finish_mmu(struct mmu_gather *tlb,
-		unsigned long start, unsigned long end)
-{
-	/*
-	 * If there are parallel threads are doing PTE changes on same range
-	 * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB
-	 * flush by batching, a thread has stable TLB entry can fail to flush
-	 * the TLB by observing pte_none|!pte_dirty, for example so flush TLB
-	 * forcefully if we detect parallel PTE batching threads.
-	 */
-	bool force = mm_tlb_flush_nested(tlb->mm);
-
-	arch_tlb_finish_mmu(tlb, start, end, force);
-	dec_tlb_flush_pending(tlb->mm);
-}
-
 /*
 * Note: this doesn't free the actual pages themselves. That
 * has been handled earlier when unmapping all the memory regions.
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -0,0 +1,276 @@
+#include <linux/gfp.h>
+#include <linux/highmem.h>
+#include <linux/kernel.h>
+#include <linux/mmdebug.h>
+#include <linux/mm_types.h>
+#include <linux/pagemap.h>
+#include <linux/rcupdate.h>
+#include <linux/smp.h>
+#include <linux/swap.h>
+
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+
+#ifdef HAVE_GENERIC_MMU_GATHER
+
+static bool tlb_next_batch(struct mmu_gather *tlb)
+{
+	struct mmu_gather_batch *batch;
+
+	batch = tlb->active;
+	if (batch->next) {
+		tlb->active = batch->next;
+		return true;
+	}
+
+	if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
+		return false;
+
+	batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
+	if (!batch)
+		return false;
+
+	tlb->batch_count++;
+	batch->next = NULL;
+	batch->nr   = 0;
+	batch->max  = MAX_GATHER_BATCH;
+
+	tlb->active->next = batch;
+	tlb->active = batch;
+
+	return true;
+}
+
+void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+				unsigned long start, unsigned long end)
+{
+	tlb->mm = mm;
+
+	/* Is it from 0 to ~0? */
+	tlb->fullmm     = !(start | (end+1));
+	tlb->need_flush_all = 0;
+	tlb->local.next = NULL;
+	tlb->local.nr   = 0;
+	tlb->local.max  = ARRAY_SIZE(tlb->__pages);
+	tlb->active     = &tlb->local;
+	tlb->batch_count = 0;
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	tlb->batch = NULL;
+#endif
+	tlb->page_size = 0;
+
+	__tlb_reset_range(tlb);
+}
+
+void tlb_flush_mmu_free(struct mmu_gather *tlb)
+{
+	struct mmu_gather_batch *batch;
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+	tlb_table_flush(tlb);
+#endif
+	for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
+		free_pages_and_swap_cache(batch->pages, batch->nr);
+		batch->nr = 0;
+	}
+	tlb->active = &tlb->local;
+}
+
+void tlb_flush_mmu(struct mmu_gather *tlb)
+{
+	tlb_flush_mmu_tlbonly(tlb);
+	tlb_flush_mmu_free(tlb);
+}
+
+/* tlb_finish_mmu
+ *	Called at the end of the shootdown operation to free up any resources
+ *	that were required.
+ */
+void arch_tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end, bool force)
+{
+	struct mmu_gather_batch *batch, *next;
+
+	if (force) {
+		__tlb_reset_range(tlb);
+		__tlb_adjust_range(tlb, start, end - start);
+	}
+
+	tlb_flush_mmu(tlb);
+
+	/* keep the page table cache within bounds */
+	check_pgt_cache();
+
+	for (batch = tlb->local.next; batch; batch = next) {
+		next = batch->next;
+		free_pages((unsigned long)batch, 0);
+	}
+	tlb->local.next = NULL;
+}
+
+/* __tlb_remove_page
+ *	Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while
+ *	handling the additional races in SMP caused by other CPUs caching valid
+ *	mappings in their TLBs. Returns the number of free page slots left.
+ *	When out of page slots we must call tlb_flush_mmu().
+ *returns true if the caller should flush.
+ */
+bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size)
+{
+	struct mmu_gather_batch *batch;
+
+	VM_BUG_ON(!tlb->end);
+	VM_WARN_ON(tlb->page_size != page_size);
+
+	batch = tlb->active;
+	/*
+	 * Add the page and check if we are full. If so
+	 * force a flush.
+	 */
+	batch->pages[batch->nr++] = page;
+	if (batch->nr == batch->max) {
+		if (!tlb_next_batch(tlb))
+			return true;
+		batch = tlb->active;
+	}
+	VM_BUG_ON_PAGE(batch->nr > batch->max, page);
+
+	return false;
+}
+
+void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address,
+			 unsigned long size)
+{
+	if (tlb->page_size != 0 && tlb->page_size != PMD_SIZE)
+		tlb_flush_mmu(tlb);
+
+	tlb->page_size = PMD_SIZE;
+	tlb->start = min(tlb->start, address);
+	tlb->end = max(tlb->end, address + size);
+}
+#endif /* HAVE_GENERIC_MMU_GATHER */
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+
+/*
+ * See the comment near struct mmu_table_batch.
+ */
+
+/*
+ * If we want tlb_remove_table() to imply TLB invalidates.
+ */
+static inline void tlb_table_invalidate(struct mmu_gather *tlb)
+{
+#ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE
+	/*
+	 * Invalidate page-table caches used by hardware walkers. Then we still
+	 * need to RCU-sched wait while freeing the pages because software
+	 * walkers can still be in-flight.
+	 */
+	tlb_flush_mmu_tlbonly(tlb);
+#endif
+}
+
+static void tlb_remove_table_smp_sync(void *arg)
+{
+	/* Simply deliver the interrupt */
+}
+
+void tlb_remove_table_sync_one(void)
+{
+	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+}
+
+static void tlb_remove_table_one(void *table)
+{
+	/*
+	 * This isn't an RCU grace period and hence the page-tables cannot be
+	 * assumed to be actually RCU-freed.
+	 *
+	 * It is however sufficient for software page-table walkers that rely on
+	 * IRQ disabling. See the comment near struct mmu_table_batch.
+	 */
+	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+	__tlb_remove_table(table);
+}
+
+static void tlb_remove_table_rcu(struct rcu_head *head)
+{
+	struct mmu_table_batch *batch;
+	int i;
+
+	batch = container_of(head, struct mmu_table_batch, rcu);
+
+	for (i = 0; i < batch->nr; i++)
+		__tlb_remove_table(batch->tables[i]);
+
+	free_page((unsigned long)batch);
+}
+
+void tlb_table_flush(struct mmu_gather *tlb)
+{
+	struct mmu_table_batch **batch = &tlb->batch;
+
+	if (*batch) {
+		tlb_table_invalidate(tlb);
+		call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
+		*batch = NULL;
+	}
+}
+
+void tlb_remove_table(struct mmu_gather *tlb, void *table)
+{
+	struct mmu_table_batch **batch = &tlb->batch;
+
+	if (*batch == NULL) {
+		*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
+		if (*batch == NULL) {
+			tlb_table_invalidate(tlb);
+			tlb_remove_table_one(table);
+			return;
+		}
+		(*batch)->nr = 0;
+	}
+
+	(*batch)->tables[(*batch)->nr++] = table;
+	if ((*batch)->nr == MAX_TABLE_BATCH)
+		tlb_table_flush(tlb);
+}
+
+#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+
+/**
+ * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
+ * @tlb: the mmu_gather structure to initialize
+ * @mm: the mm_struct of the target address space
+ * @start: start of the region that will be removed from the page-table
+ * @end: end of the region that will be removed from the page-table
+ *
+ * Called to initialize an (on-stack) mmu_gather structure for page-table
+ * tear-down from @mm. The @start and @end are set to 0 and -1
+ * respectively when @mm is without users and we're going to destroy
+ * the full address space (exit/execve).
+ */
+void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
+			unsigned long start, unsigned long end)
+{
+	arch_tlb_gather_mmu(tlb, mm, start, end);
+	inc_tlb_flush_pending(tlb->mm);
+}
+
+void tlb_finish_mmu(struct mmu_gather *tlb,
+		unsigned long start, unsigned long end)
+{
+	/*
+	 * If there are parallel threads are doing PTE changes on same range
+	 * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB
+	 * flush by batching, a thread has stable TLB entry can fail to flush
+	 * the TLB by observing pte_none|!pte_dirty, for example so flush TLB
+	 * forcefully if we detect parallel PTE batching threads.
+	 */
+	bool force = mm_tlb_flush_nested(tlb->mm);
+
+	arch_tlb_finish_mmu(tlb, start, end, force);
+	dec_tlb_flush_pending(tlb->mm);
+}
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -582,6 +582,7 @@ int snd_usb_create_quirk(struct snd_usb_audio *chip,
 static int snd_usb_extigy_boot_quirk(struct usb_device *dev, struct usb_interface *intf)
 {
 	struct usb_host_config *config = dev->actconfig;
+	struct usb_device_descriptor *new_device_descriptor = NULL;
 	int err;

 	if (le16_to_cpu(get_cfg_desc(config)->wTotalLength) == EXTIGY_FIRMWARE_SIZE_OLD ||
@@ -592,11 +593,20 @@ static int snd_usb_extigy_boot_quirk(struct usb_device *dev, struct usb_interfac
 				      0x10, 0x43, 0x0001, 0x000a, NULL, 0);
 		if (err < 0)
 			dev_dbg(&dev->dev, "error sending boot message: %d\n", err);
+
+		new_device_descriptor = kmalloc(sizeof(*new_device_descriptor), GFP_KERNEL);
+		if (!new_device_descriptor)
+			return -ENOMEM;
 		err = usb_get_descriptor(dev, USB_DT_DEVICE, 0,
-				&dev->descriptor, sizeof(dev->descriptor));
-		config = dev->actconfig;
+				new_device_descriptor, sizeof(*new_device_descriptor));
 		if (err < 0)
 			dev_dbg(&dev->dev, "error usb_get_descriptor: %d\n", err);
+		if (new_device_descriptor->bNumConfigurations > dev->descriptor.bNumConfigurations)
+			dev_dbg(&dev->dev, "error too large bNumConfigurations: %d\n",
+				new_device_descriptor->bNumConfigurations);
+		else
+			memcpy(&dev->descriptor, new_device_descriptor, sizeof(dev->descriptor));
+		kfree(new_device_descriptor);
 		err = usb_reset_configuration(dev);
 		if (err < 0)
 			dev_dbg(&dev->dev, "error usb_reset_configuration: %d\n", err);
@@ -812,6 +822,7 @@ static void mbox2_setup_48_24_magic(struct usb_device *dev)
 static int snd_usb_mbox2_boot_quirk(struct usb_device *dev)
 {
 	struct usb_host_config *config = dev->actconfig;
+	struct usb_device_descriptor *new_device_descriptor = NULL;
 	int err;
 	u8 bootresponse[0x12];
 	int fwsize;
@@ -846,11 +857,21 @@ static int snd_usb_mbox2_boot_quirk(struct usb_device *dev)

 	dev_dbg(&dev->dev, "device initialised!\n");

+	new_device_descriptor = kmalloc(sizeof(*new_device_descriptor), GFP_KERNEL);
+	if (!new_device_descriptor)
+		return -ENOMEM;
+
 	err = usb_get_descriptor(dev, USB_DT_DEVICE, 0,
-		&dev->descriptor, sizeof(dev->descriptor));
-	config = dev->actconfig;
+		new_device_descriptor, sizeof(*new_device_descriptor));
 	if (err < 0)
 		dev_dbg(&dev->dev, "error usb_get_descriptor: %d\n", err);
+	if (new_device_descriptor->bNumConfigurations > dev->descriptor.bNumConfigurations)
+		dev_dbg(&dev->dev, "error too large bNumConfigurations: %d\n",
+			new_device_descriptor->bNumConfigurations);
+	else
+		memcpy(&dev->descriptor, new_device_descriptor, sizeof(dev->descriptor));
+
+	kfree(new_device_descriptor);

 	err = usb_reset_configuration(dev);
 	if (err < 0)
--- a/techpack/audio/4.0/asoc/codecs/wcd937x/wcd937x.c
+++ b/techpack/audio/4.0/asoc/codecs/wcd937x/wcd937x.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
 * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 */

 #include <linux/module.h>
@@ -226,6 +227,10 @@ static int wcd937x_parse_port_mapping(struct device *dev,

 	for (i = 0; i < map_length; i++) {
 		port_num = dt_array[NUM_SWRS_DT_PARAMS * i];
+		if (port_num >= MAX_PORT || ch_iter >= MAX_CH_PER_PORT) {
+			dev_err(dev, "%s: Invalid port or channel number\n", __func__);
+			goto err_pdata_fail;
+		}
 		slave_port_type = dt_array[NUM_SWRS_DT_PARAMS * i + 1];
 		ch_mask = dt_array[NUM_SWRS_DT_PARAMS * i + 2];
 		ch_rate = dt_array[NUM_SWRS_DT_PARAMS * i + 3];
--- a/techpack/audio/4.0/asoc/codecs/wcd938x/wcd938x.c
+++ b/techpack/audio/4.0/asoc/codecs/wcd938x/wcd938x.c
@@ -377,6 +377,12 @@ static int wcd938x_parse_port_mapping(struct device *dev,

 	for (i = 0; i < map_length; i++) {
 		port_num = dt_array[NUM_SWRS_DT_PARAMS * i];
+
+		if (port_num >= MAX_PORT || ch_iter >= MAX_CH_PER_PORT) {
+			dev_err(dev, "%s: Invalid port or channel number\n", __func__);
+			goto err_pdata_fail;
+		}
+
 		slave_port_type = dt_array[NUM_SWRS_DT_PARAMS * i + 1];
 		ch_mask = dt_array[NUM_SWRS_DT_PARAMS * i + 2];
 		ch_rate = dt_array[NUM_SWRS_DT_PARAMS * i + 3];
--- a/techpack/audio/asoc/codecs/wcd937x/wcd937x.c
+++ b/techpack/audio/asoc/codecs/wcd937x/wcd937x.c
@@ -1,5 +1,6 @@
 /*
 * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 and
@@ -227,6 +228,10 @@ static int wcd937x_parse_port_mapping(struct device *dev,

 	for (i = 0; i < map_length; i++) {
 		port_num = dt_array[NUM_SWRS_DT_PARAMS * i];
+		if (port_num >= MAX_PORT || ch_iter >= MAX_CH_PER_PORT) {
+			dev_err(dev, "%s: Invalid port or channel number\n", __func__);
+			goto err_pdata_fail;
+		}
 		slave_port_type = dt_array[NUM_SWRS_DT_PARAMS * i + 1];
 		ch_mask = dt_array[NUM_SWRS_DT_PARAMS * i + 2];
 		ch_rate = dt_array[NUM_SWRS_DT_PARAMS * i + 3];
--- a/techpack/audio/dsp/q6afe.c
+++ b/techpack/audio/dsp/q6afe.c
@@ -404,24 +404,49 @@ static int32_t sp_make_afe_callback(uint32_t opcode, uint32_t *payload,
 	switch (param_hdr.param_id) {
 	case AFE_PARAM_ID_CALIB_RES_CFG_V2:
 		expected_size += sizeof(struct asm_calib_res_cfg);
+		if (param_hdr.param_size != sizeof(struct asm_calib_res_cfg)) {
+			pr_err("%s: Error: param_size %d is greater than expected\n",
+				__func__,param_hdr.param_size);
+			return -EINVAL;
+		}
 		data_dest = (u32 *) &this_afe.calib_data;
 		break;
 	case AFE_PARAM_ID_SP_V2_TH_VI_FTM_PARAMS:
 		expected_size += sizeof(struct afe_sp_th_vi_ftm_params);
+		if (param_hdr.param_size != sizeof(struct afe_sp_th_vi_ftm_params)) {
+			pr_err("%s: Error: param_size %d is greater than expected\n",
+				__func__,param_hdr.param_size);
+			return -EINVAL;
+		}
 		data_dest = (u32 *) &this_afe.th_vi_resp;
 		break;
 	case AFE_PARAM_ID_SP_V2_TH_VI_V_VALI_PARAMS:
 		pr_err("%s: got response pkt\n", __func__);
 		expected_size += sizeof(struct afe_sp_th_vi_v_vali_params);
+		if (param_hdr.param_size != sizeof(struct afe_sp_th_vi_v_vali_params)) {
+			pr_err("%s: Error: param_size %d is greater than expected\n",
+				__func__,param_hdr.param_size);
+			return -EINVAL;
+		}
 		data_dest = (u32 *) &this_afe.th_vi_v_vali_resp;
 		break;
 	case AFE_PARAM_ID_SP_V2_EX_VI_FTM_PARAMS:
 		expected_size += sizeof(struct afe_sp_ex_vi_ftm_params);
+		if (param_hdr.param_size != sizeof(struct afe_sp_ex_vi_ftm_params)) {
+			pr_err("%s: Error: param_size %d is greater than expected\n",
+				__func__,param_hdr.param_size);
+			return -EINVAL;
+		}
 		data_dest = (u32 *) &this_afe.ex_vi_resp;
 		break;
 	case AFE_PARAM_ID_SP_RX_TMAX_XMAX_LOGGING:
 		expected_size += sizeof(
 				struct afe_sp_rx_tmax_xmax_logging_param);
+		if (param_hdr.param_size != sizeof(struct afe_sp_rx_tmax_xmax_logging_param)) {
+			pr_err("%s: Error: param_size %d is greater than expected\n",
+				__func__,param_hdr.param_size);
+			return -EINVAL;
+		}
 		data_dest = (u32 *) &this_afe.xt_logging_resp;
 		break;
 	default: