arch: arm64: configs: Disable AEE features

Signed-off-by: claxten10 <claxten10@gmail.com>
arch: arm64: Build connectivity modules inline
2025-10-18 10:51:12 +00:00 · 2025-10-18 10:51:12 +00:00 · 2025-10-18 10:51:12 +00:00 · 2025-10-18 10:51:12 +00:00 · 2025-10-18 10:51:12 +00:00 · 2025-10-18 10:51:12 +00:00
85 changed files with 2568 additions and 2431 deletions
--- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
+++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html
@@ -485,7 +485,7 @@ section that the grace period must wait on.
 noted by <tt>rcu_node_context_switch()</tt> on the left.
 On the other hand, if the CPU takes a scheduler-clock interrupt
 while executing in usermode, a quiescent state will be noted by
-<tt>rcu_sched_clock_irq()</tt> on the right.
+<tt>rcu_check_callbacks()</tt> on the right.
 Either way, the passage through a quiescent state will be noted
 in a per-CPU variable.

@@ -651,7 +651,7 @@ to end.
 These callbacks are identified by <tt>rcu_advance_cbs()</tt>,
 which is usually invoked by <tt>__note_gp_changes()</tt>.
 As shown in the diagram below, this invocation can be triggered by
-the scheduling-clock interrupt (<tt>rcu_sched_clock_irq()</tt> on
+the scheduling-clock interrupt (<tt>rcu_check_callbacks()</tt> on
 the left) or by idle entry (<tt>rcu_cleanup_after_idle()</tt> on
 the right, but only for kernels build with
 <tt>CONFIG_RCU_FAST_NO_HZ=y</tt>).
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-callback-invocation.svg
@@ -349,7 +349,7 @@
       font-weight="bold"
       font-size="192"
       id="text202-7-5"
-       style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_sched_clock_irq()</text>
+       style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_check_callbacks()</text>
    <rect
       x="7069.6187"
       y="5087.4678"
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-gp.svg
@@ -3902,7 +3902,7 @@
         font-style="normal"
         y="-4418.6582"
         x="3745.7725"
-         xml:space="preserve">rcu_sched_clock_irq()</text>
+         xml:space="preserve">rcu_check_callbacks()</text>
    </g>
    <g
       transform="translate(-850.30204,55463.106)"
@@ -4968,7 +4968,7 @@
       font-weight="bold"
       font-size="192"
       id="text202-7-5-19"
-       style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_sched_clock_irq()</text>
+       style="font-size:192px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;stroke-width:0.025in;font-family:Courier">rcu_check_callbacks()</text>
    <rect
       x="5314.2671"
       y="82817.688"
--- a/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
+++ b/Documentation/RCU/Design/Memory-Ordering/TreeRCU-qs.svg
@@ -775,7 +775,7 @@
         font-style="normal"
         y="-4418.6582"
         x="3745.7725"
-         xml:space="preserve">rcu_sched_clock_irq()</text>
+         xml:space="preserve">rcu_check_callbacks()</text>
    </g>
    <g
       transform="translate(399.7744,828.86448)"
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3771,9 +3771,7 @@
 				see CONFIG_RAS_CEC help text.

 	rcu_nocbs=	[KNL]
-			The argument is a cpu list, as described above,
-			except that the string "all" can be used to
-			specify every CPU on the system.
+			The argument is a cpu list, as described above.

 			In kernels built with CONFIG_RCU_NOCB_CPU=y, set
 			the specified list of CPUs to be no-callback CPUs.
--- a/arch/arm64/boot/dts/mediatek/mt6781.dts
+++ b/arch/arm64/boot/dts/mediatek/mt6781.dts
@@ -30,6 +30,7 @@
 	chosen: chosen {
 		bootargs = "root=/dev/ram \
 			vmalloc=400M swiotlb=noforce \
+			initcall_debug=1 \
 			firmware_class.path=/vendor/firmware \
 			page_owner=on quiet loop.max_part=7";
 			kaslr-seed = <0 0>;
--- a/arch/arm64/configs/fleur_defconfig
+++ b/arch/arm64/configs/fleur_defconfig
@@ -29,7 +29,6 @@ CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES="mediatek/mt6781"
 CONFIG_BUILD_ARM64_DTB_OVERLAY_IMAGE=y
 CONFIG_BUILD_ARM64_DTB_OVERLAY_IMAGE_NAMES="mediatek/fleur"
 CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
-# CONFIG_PD_DBG_INFO is not set
 CONFIG_ENERGY_MODEL=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_STAT=y
@@ -42,7 +41,6 @@ CONFIG_MODULE_SRCVERSION_ALL=y
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_BLK_INLINE_ENCRYPTION=y
 CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y
-CONFIG_MQ_IOSCHED_KYBER=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_ZSMALLOC=y
 CONFIG_XFRM_MIGRATE=y
@@ -57,7 +55,6 @@ CONFIG_FW_LOADER_USER_HELPER=y
 CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y
 # CONFIG_FW_CACHE is not set
 CONFIG_ZRAM=y
-CONFIG_ZRAM_WRITEBACK=y
 CONFIG_BLK_DEV_LOOP_MIN_COUNT=16
 CONFIG_ANDROID_DEFAULT_SETTING=y
 CONFIG_MTK_ANDROID_DEFAULT_SETTING=y
@@ -70,8 +67,6 @@ CONFIG_MTK_MUSB_DUAL_ROLE=y
 CONFIG_MTK_PLATFORM="mt6785"
 CONFIG_ARCH_MTK_PROJECT="fleur"
 CONFIG_BLK_CGROUP=y
-# CONFIG_MEMCG is not set
-# CONFIG_MEMCG_SWAP is not set
 CONFIG_MTK_BATTERY_OC_POWER_THROTTLING=y
 CONFIG_MTK_BATTERY_PERCENTAGE_POWER_THROTTLING=y
 CONFIG_MTK_LOW_BATTERY_POWER_THROTTLING=y
@@ -231,7 +226,6 @@ CONFIG_PPP_MULTILINK=y
 CONFIG_PPPOE=y
 CONFIG_PPP_ASYNC=y
 CONFIG_PPP_SYNC_TTY=y
-CONFIG_POWERCAP=y
 CONFIG_USB_USBNET=y
 # CONFIG_KEYBOARD_ATKBD is not set
 CONFIG_KEYBOARD_MTK=y
@@ -407,7 +401,7 @@ CONFIG_CRYPTO_TWOFISH=y
 CONFIG_CRYPTO_BLAKE2B=y
 # CONFIG_CRYPTO_HW is not set
 CONFIG_PRINTK_TIME=y
-CONFIG_DEBUG_INFO_REDUCED=y
+CONFIG_DEBUG_INFO=y
 CONFIG_FRAME_WARN=2800
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -1,131 +1,258 @@
 /*
- * Copyright (c) 2017 ARM Ltd
- * All rights reserved.
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2013 Linaro.
 *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
+ * This code is based on glibc cortex strings work originally authored by Linaro
+ * and re-licensed under GPLv2 for the Linux kernel. The original code can
+ * be found @
 *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/
+ * files/head:/src/aarch64/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-/* Assumptions:
- *
- * ARMv8-a, AArch64, unaligned accesses.
- */
-
-/* includes here */
 #include <linux/linkage.h>
 #include <asm/assembler.h>

+/*
+* compare memory areas(when two memory areas' offset are different,
+* alignment handled by the hardware)
+*
+* Parameters:
+*  x0 - const memory area 1 pointer
+*  x1 - const memory area 2 pointer
+*  x2 - the maximal compare byte length
+* Returns:
+*  x0 - a compare result, maybe less than, equal to, or greater than ZERO
+*/
+
 /* Parameters and result.  */
-#define src1		x0
-#define src2		x1
-#define limit		x2
-#define result		w0
+src1		.req	x0
+src2		.req	x1
+limit		.req	x2
+result		.req	x0

 /* Internal variables.  */
-#define data1		x3
-#define data1w		w3
-#define data2		x4
-#define data2w		w4
-#define tmp1		x5
+data1		.req	x3
+data1w		.req	w3
+data2		.req	x4
+data2w		.req	w4
+has_nul		.req	x5
+diff		.req	x6
+endloop		.req	x7
+tmp1		.req	x8
+tmp2		.req	x9
+tmp3		.req	x10
+pos		.req	x11
+limit_wd	.req	x12
+mask		.req	x13

-/* Small inputs of less than 8 bytes are handled separately.  This allows the
-   main code to be sped up using unaligned loads since there are now at least
-   8 bytes to be compared.  If the first 8 bytes are equal, align src1.
-   This ensures each iteration does at most one unaligned access even if both
-   src1 and src2 are unaligned, and mutually aligned inputs behave as if
-   aligned.  After the main loop, process the last 8 bytes using unaligned
-   accesses.  */
-
-.p2align 6
 WEAK(memcmp)
-	subs	limit, limit, 8
-	b.lo	.Lless8
+	cbz	limit, .Lret0
+	eor	tmp1, src1, src2
+	tst	tmp1, #7
+	b.ne	.Lmisaligned8
+	ands	tmp1, src1, #7
+	b.ne	.Lmutual_align
+	sub	limit_wd, limit, #1 /* limit != 0, so no underflow.  */
+	lsr	limit_wd, limit_wd, #3 /* Convert to Dwords.  */
+	/*
+	* The input source addresses are at alignment boundary.
+	* Directly compare eight bytes each time.
+	*/
+.Lloop_aligned:
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+.Lstart_realigned:
+	subs	limit_wd, limit_wd, #1
+	eor	diff, data1, data2	/* Non-zero if differences found.  */
+	csinv	endloop, diff, xzr, cs	/* Last Dword or differences.  */
+	cbz	endloop, .Lloop_aligned

-	/* Limit >= 8, so check first 8 bytes using unaligned loads.  */
-	ldr	data1, [src1], 8
-	ldr	data2, [src2], 8
-	and	tmp1, src1, 7
-	add	limit, limit, tmp1
-	cmp	data1, data2
-	bne	.Lreturn
+	/* Not reached the limit, must have found a diff.  */
+	tbz	limit_wd, #63, .Lnot_limit

-	/* Align src1 and adjust src2 with bytes not yet done.  */
-	sub	src1, src1, tmp1
-	sub	src2, src2, tmp1
+	/* Limit % 8 == 0 => the diff is in the last 8 bytes. */
+	ands	limit, limit, #7
+	b.eq	.Lnot_limit
+	/*
+	* The remained bytes less than 8. It is needed to extract valid data
+	* from last eight bytes of the intended memory range.
+	*/
+	lsl	limit, limit, #3	/* bytes-> bits.  */
+	mov	mask, #~0
+CPU_BE( lsr	mask, mask, limit )
+CPU_LE( lsl	mask, mask, limit )
+	bic	data1, data1, mask
+	bic	data2, data2, mask

-	subs	limit, limit, 8
-	b.ls	.Llast_bytes
+	orr	diff, diff, mask
+	b	.Lnot_limit

-	/* Loop performing 8 bytes per iteration using aligned src1.
-	   Limit is pre-decremented by 8 and must be larger than zero.
-	   Exit if <= 8 bytes left to do or if the data is not equal.  */
-	.p2align 4
-.Lloop8:
-	ldr	data1, [src1], 8
-	ldr	data2, [src2], 8
-	subs	limit, limit, 8
-	ccmp	data1, data2, 0, hi  /* NZCV = 0b0000.  */
-	b.eq	.Lloop8
+.Lmutual_align:
+	/*
+	* Sources are mutually aligned, but are not currently at an
+	* alignment boundary. Round down the addresses and then mask off
+	* the bytes that precede the start point.
+	*/
+	bic	src1, src1, #7
+	bic	src2, src2, #7
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+	/*
+	* We can not add limit with alignment offset(tmp1) here. Since the
+	* addition probably make the limit overflown.
+	*/
+	sub	limit_wd, limit, #1/*limit != 0, so no underflow.*/
+	and	tmp3, limit_wd, #7
+	lsr	limit_wd, limit_wd, #3
+	add	tmp3, tmp3, tmp1
+	add	limit_wd, limit_wd, tmp3, lsr #3
+	add	limit, limit, tmp1/* Adjust the limit for the extra.  */

-	cmp	data1, data2
-	bne	.Lreturn
+	lsl	tmp1, tmp1, #3/* Bytes beyond alignment -> bits.*/
+	neg	tmp1, tmp1/* Bits to alignment -64.  */
+	mov	tmp2, #~0
+	/*mask off the non-intended bytes before the start address.*/
+CPU_BE( lsl	tmp2, tmp2, tmp1 )/*Big-endian.Early bytes are at MSB*/
+	/* Little-endian.  Early bytes are at LSB.  */
+CPU_LE( lsr	tmp2, tmp2, tmp1 )

-	/* Compare last 1-8 bytes using unaligned access.  */
-.Llast_bytes:
-	ldr	data1, [src1, limit]
-	ldr	data2, [src2, limit]
+	orr	data1, data1, tmp2
+	orr	data2, data2, tmp2
+	b	.Lstart_realigned

-	/* Compare data bytes and set return value to 0, -1 or 1.  */
-.Lreturn:
-#ifndef __AARCH64EB__
-	rev	data1, data1
-	rev	data2, data2
-#endif
-	cmp     data1, data2
-.Lret_eq:
-	cset	result, ne
-	cneg	result, result, lo
-        ret
+	/*src1 and src2 have different alignment offset.*/
+.Lmisaligned8:
+	cmp	limit, #8
+	b.lo	.Ltiny8proc /*limit < 8: compare byte by byte*/

-	.p2align 4
-	/* Compare up to 8 bytes.  Limit is [-8..-1].  */
-.Lless8:
-	adds	limit, limit, 4
-	b.lo	.Lless4
-	ldr	data1w, [src1], 4
-	ldr	data2w, [src2], 4
+	and	tmp1, src1, #7
+	neg	tmp1, tmp1
+	add	tmp1, tmp1, #8/*valid length in the first 8 bytes of src1*/
+	and	tmp2, src2, #7
+	neg	tmp2, tmp2
+	add	tmp2, tmp2, #8/*valid length in the first 8 bytes of src2*/
+	subs	tmp3, tmp1, tmp2
+	csel	pos, tmp1, tmp2, hi /*Choose the maximum.*/
+
+	sub	limit, limit, pos
+	/*compare the proceeding bytes in the first 8 byte segment.*/
+.Ltinycmp:
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	subs	pos, pos, #1
+	ccmp	data1w, data2w, #0, ne  /* NZCV = 0b0000.  */
+	b.eq	.Ltinycmp
+	cbnz	pos, 1f /*diff occurred before the last byte.*/
 	cmp	data1w, data2w
-	b.ne	.Lreturn
-	sub	limit, limit, 4
-.Lless4:
-	adds	limit, limit, 4
-	beq	.Lret_eq
-.Lbyte_loop:
-	ldrb	data1w, [src1], 1
-	ldrb	data2w, [src2], 1
-	subs	limit, limit, 1
-	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
-	b.eq	.Lbyte_loop
-	sub	result, data1w, data2w
+	b.eq	.Lstart_align
+1:
+	sub	result, data1, data2
+	ret
+
+.Lstart_align:
+	lsr	limit_wd, limit, #3
+	cbz	limit_wd, .Lremain8
+
+	ands	xzr, src1, #7
+	b.eq	.Lrecal_offset
+	/*process more leading bytes to make src1 aligned...*/
+	add	src1, src1, tmp3 /*backwards src1 to alignment boundary*/
+	add	src2, src2, tmp3
+	sub	limit, limit, tmp3
+	lsr	limit_wd, limit, #3
+	cbz	limit_wd, .Lremain8
+	/*load 8 bytes from aligned SRC1..*/
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+
+	subs	limit_wd, limit_wd, #1
+	eor	diff, data1, data2  /*Non-zero if differences found.*/
+	csinv	endloop, diff, xzr, ne
+	cbnz	endloop, .Lunequal_proc
+	/*How far is the current SRC2 from the alignment boundary...*/
+	and	tmp3, tmp3, #7
+
+.Lrecal_offset:/*src1 is aligned now..*/
+	neg	pos, tmp3
+.Lloopcmp_proc:
+	/*
+	* Divide the eight bytes into two parts. First,backwards the src2
+	* to an alignment boundary,load eight bytes and compare from
+	* the SRC2 alignment boundary. If all 8 bytes are equal,then start
+	* the second part's comparison. Otherwise finish the comparison.
+	* This special handle can garantee all the accesses are in the
+	* thread/task space in avoid to overrange access.
+	*/
+	ldr	data1, [src1,pos]
+	ldr	data2, [src2,pos]
+	eor	diff, data1, data2  /* Non-zero if differences found.  */
+	cbnz	diff, .Lnot_limit
+
+	/*The second part process*/
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+	eor	diff, data1, data2  /* Non-zero if differences found.  */
+	subs	limit_wd, limit_wd, #1
+	csinv	endloop, diff, xzr, ne/*if limit_wd is 0,will finish the cmp*/
+	cbz	endloop, .Lloopcmp_proc
+.Lunequal_proc:
+	cbz	diff, .Lremain8
+
+/* There is difference occurred in the latest comparison. */
+.Lnot_limit:
+/*
+* For little endian,reverse the low significant equal bits into MSB,then
+* following CLZ can find how many equal bits exist.
+*/
+CPU_LE( rev	diff, diff )
+CPU_LE( rev	data1, data1 )
+CPU_LE( rev	data2, data2 )
+
+	/*
+	* The MS-non-zero bit of DIFF marks either the first bit
+	* that is different, or the end of the significant data.
+	* Shifting left now will bring the critical information into the
+	* top bits.
+	*/
+	clz	pos, diff
+	lsl	data1, data1, pos
+	lsl	data2, data2, pos
+	/*
+	* We need to zero-extend (char is unsigned) the value and then
+	* perform a signed subtraction.
+	*/
+	lsr	data1, data1, #56
+	sub	result, data1, data2, lsr #56
+	ret
+
+.Lremain8:
+	/* Limit % 8 == 0 =>. all data are equal.*/
+	ands	limit, limit, #7
+	b.eq	.Lret0
+
+.Ltiny8proc:
+	ldrb	data1w, [src1], #1
+	ldrb	data2w, [src2], #1
+	subs	limit, limit, #1
+
+	ccmp	data1w, data2w, #0, ne  /* NZCV = 0b0000. */
+	b.eq	.Ltiny8proc
+	sub	result, data1, data2
+	ret
+.Lret0:
+	mov	result, #0
 	ret
 ENDPIPROC(memcmp)
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -60,7 +60,6 @@ tmp3		.req	x9
 zeroones	.req	x10
 pos		.req	x11

-	.p2align 6
 WEAK(strcmp)
 	eor	tmp1, src1, src2
 	mov	zeroones, #REP8_01
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -10,7 +10,8 @@

 #include "blk.h"

-struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp)
+static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
+		gfp_t gfp)
 {
 	struct bio *new = bio_alloc(gfp, nr_pages);

@@ -60,7 +61,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,

 		WARN_ON_ONCE((req_sects << 9) > UINT_MAX);

-		bio = blk_next_bio(bio, 0, gfp_mask);
+		bio = next_bio(bio, 0, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		bio_set_dev(bio, bdev);
 		bio_set_op_attrs(bio, op, 0);
@@ -154,7 +155,7 @@ static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 	max_write_same_sectors = bio_allowed_max_sectors(q);

 	while (nr_sects) {
-		bio = blk_next_bio(bio, 1, gfp_mask);
+		bio = next_bio(bio, 1, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		bio_set_dev(bio, bdev);
 		bio->bi_vcnt = 1;
@@ -230,7 +231,7 @@ static int __blkdev_issue_write_zeroes(struct block_device *bdev,
 		return -EOPNOTSUPP;

 	while (nr_sects) {
-		bio = blk_next_bio(bio, 0, gfp_mask);
+		bio = next_bio(bio, 0, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		bio_set_dev(bio, bdev);
 		bio->bi_opf = REQ_OP_WRITE_ZEROES;
@@ -281,8 +282,8 @@ static int __blkdev_issue_zero_pages(struct block_device *bdev,
 		return -EPERM;

 	while (nr_sects != 0) {
-		bio = blk_next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
-				   gfp_mask);
+		bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
+			       gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		bio_set_dev(bio, bdev);
 		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -50,12 +50,12 @@ blk_mq_sched_allow_merge(struct request_queue *q, struct request *rq,
 	return true;
 }

-static inline void blk_mq_sched_completed_request(struct request *rq, u64 now)
+static inline void blk_mq_sched_completed_request(struct request *rq)
 {
 	struct elevator_queue *e = rq->q->elevator;

 	if (e && e->type->ops.mq.completed_request)
-		e->type->ops.mq.completed_request(rq, now);
+		e->type->ops.mq.completed_request(rq);
 }

 static inline void blk_mq_sched_started_request(struct request *rq)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -527,9 +527,6 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
 		blk_stat_add(rq, now);
 	}

-	if (rq->internal_tag != -1)
-		blk_mq_sched_completed_request(rq, now);
-
 	blk_account_io_done(rq, now);

 	if (rq->end_io) {
@@ -566,6 +563,8 @@ static void __blk_mq_complete_request(struct request *rq)

 	if (!blk_mq_mark_complete(rq))
 		return;
+	if (rq->internal_tag != -1)
+		blk_mq_sched_completed_request(rq);

 	if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
 		rq->q->softirq_done_fn(rq);
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -300,11 +300,6 @@ static ssize_t queue_zoned_show(struct request_queue *q, char *page)
 	}
 }

-static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
-{
-	return queue_var_show(blk_queue_nr_zones(q), page);
-}
-
 static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
 {
 	return queue_var_show((blk_queue_nomerges(q) << 1) |
@@ -642,11 +637,6 @@ static struct queue_sysfs_entry queue_zoned_entry = {
 	.show = queue_zoned_show,
 };

-static struct queue_sysfs_entry queue_nr_zones_entry = {
-	.attr = {.name = "nr_zones", .mode = 0444 },
-	.show = queue_nr_zones_show,
-};
-
 static struct queue_sysfs_entry queue_nomerges_entry = {
 	.attr = {.name = "nomerges", .mode = 0644 },
 	.show = queue_nomerges_show,
@@ -737,7 +727,6 @@ static struct attribute *default_attrs[] = {
 	&queue_write_zeroes_max_entry.attr,
 	&queue_nonrot_entry.attr,
 	&queue_zoned_entry.attr,
-	&queue_nr_zones_entry.attr,
 	&queue_nomerges_entry.attr,
 	&queue_rq_affinity_entry.attr,
 	&queue_iostats_entry.attr,
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -13,8 +13,6 @@
 #include <linux/rbtree.h>
 #include <linux/blkdev.h>

-#include "blk.h"
-
 static inline sector_t blk_zone_start(struct request_queue *q,
 				      sector_t sector)
 {
@@ -65,33 +63,6 @@ void __blk_req_zone_write_unlock(struct request *rq)
 }
 EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);

-static inline unsigned int __blkdev_nr_zones(struct request_queue *q,
-					     sector_t nr_sectors)
-{
-	unsigned long zone_sectors = blk_queue_zone_sectors(q);
-
-	return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors);
-}
-
-/**
- * blkdev_nr_zones - Get number of zones
- * @bdev:	Target block device
- *
- * Description:
- *    Return the total number of zones of a zoned block device.
- *    For a regular block device, the number of zones is always 0.
- */
-unsigned int blkdev_nr_zones(struct block_device *bdev)
-{
-	struct request_queue *q = bdev_get_queue(bdev);
-
-	if (!blk_queue_is_zoned(q))
-		return 0;
-
-	return __blkdev_nr_zones(q, bdev->bd_part->nr_sects);
-}
-EXPORT_SYMBOL_GPL(blkdev_nr_zones);
-
 /*
 * Check that a zone report belongs to the partition.
 * If yes, fix its start sector and write pointer, copy it in the
@@ -282,13 +253,13 @@ int blkdev_reset_zones(struct block_device *bdev,
 	struct bio *bio;
 	int ret;

+	if (!q)
+		return -ENXIO;
+
 	if (!blk_queue_is_zoned(q))
 		return -EOPNOTSUPP;

-	if (bdev_read_only(bdev))
-		return -EPERM;
-
-	if (!nr_sectors || end_sector > bdev->bd_part->nr_sects)
+	if (end_sector > bdev->bd_part->nr_sects)
 		/* Out of range */
 		return -EINVAL;

@@ -301,14 +272,19 @@ int blkdev_reset_zones(struct block_device *bdev,
 	    end_sector != bdev->bd_part->nr_sects)
 		return -EINVAL;

-	blk_start_plug(&plug);
 	while (sector < end_sector) {

-		bio = blk_next_bio(bio, 0, gfp_mask);
+		bio = bio_alloc(gfp_mask, 0);
 		bio->bi_iter.bi_sector = sector;
 		bio_set_dev(bio, bdev);
 		bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);

+		ret = submit_bio_wait(bio);
+		bio_put(bio);
+
+		if (ret)
+			return ret;
+
 		sector += zone_sectors;

 		/* This may take a while, so be nice to others */
@@ -316,12 +292,7 @@ int blkdev_reset_zones(struct block_device *bdev,

 	}

-	ret = submit_bio_wait(bio);
-	bio_put(bio);
-
-	blk_finish_plug(&plug);
-
-	return ret;
+	return 0;
 }
 EXPORT_SYMBOL_GPL(blkdev_reset_zones);

@@ -354,7 +325,8 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
 	if (!rep.nr_zones)
 		return -EINVAL;

-	rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones);
+	if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone))
+		return -ERANGE;

 	zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
 			       GFP_KERNEL | __GFP_ZERO);
--- a/block/blk.h
+++ b/block/blk.h
@@ -438,6 +438,4 @@ extern int blk_iolatency_init(struct request_queue *q);
 static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
 #endif

-struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
-
 #endif /* BLK_INTERNAL_H */
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -537,10 +537,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
 	case BLKRESETZONE:
 		return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
-	case BLKGETZONESZ:
-		return put_uint(arg, bdev_zone_sectors(bdev));
-	case BLKGETNRZONES:
-		return put_uint(arg, blkdev_nr_zones(bdev));
 	case HDIO_GETGEO:
 		return blkdev_getgeo(bdev, argp);
 	case BLKRAGET:
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -29,30 +29,19 @@
 #include "blk-mq-debugfs.h"
 #include "blk-mq-sched.h"
 #include "blk-mq-tag.h"
+#include "blk-stat.h"

-#define CREATE_TRACE_POINTS
-#include <trace/events/kyber.h>
-
-/*
- * Scheduling domains: the device is divided into multiple domains based on the
- * request type.
- */
+/* Scheduling domains. */
 enum {
 	KYBER_READ,
-	KYBER_WRITE,
-	KYBER_DISCARD,
-	KYBER_OTHER,
+	KYBER_SYNC_WRITE,
+	KYBER_OTHER, /* Async writes, discard, etc. */
 	KYBER_NUM_DOMAINS,
 };

-static const char *kyber_domain_names[] = {
-	[KYBER_READ] = "READ",
-	[KYBER_WRITE] = "WRITE",
-	[KYBER_DISCARD] = "DISCARD",
-	[KYBER_OTHER] = "OTHER",
-};
-
 enum {
+	KYBER_MIN_DEPTH = 256,
+
 	/*
 	 * In order to prevent starvation of synchronous requests by a flood of
 	 * asynchronous requests, we reserve 25% of requests for synchronous
@@ -62,87 +51,25 @@ enum {
 };

 /*
- * Maximum device-wide depth for each scheduling domain.
+ * Initial device-wide depths for each scheduling domain.
 *
- * Even for fast devices with lots of tags like NVMe, you can saturate the
- * device with only a fraction of the maximum possible queue depth. So, we cap
- * these to a reasonable value.
+ * Even for fast devices with lots of tags like NVMe, you can saturate
+ * the device with only a fraction of the maximum possible queue depth.
+ * So, we cap these to a reasonable value.
 */
 static const unsigned int kyber_depth[] = {
 	[KYBER_READ] = 256,
-	[KYBER_WRITE] = 128,
-	[KYBER_DISCARD] = 64,
-	[KYBER_OTHER] = 16,
+	[KYBER_SYNC_WRITE] = 128,
+	[KYBER_OTHER] = 64,
 };

 /*
- * Default latency targets for each scheduling domain.
- */
-static const u64 kyber_latency_targets[] = {
-	[KYBER_READ] = 2ULL * NSEC_PER_MSEC,
-	[KYBER_WRITE] = 10ULL * NSEC_PER_MSEC,
-	[KYBER_DISCARD] = 5ULL * NSEC_PER_SEC,
-};
-
-/*
- * Batch size (number of requests we'll dispatch in a row) for each scheduling
- * domain.
+ * Scheduling domain batch sizes. We favor reads.
 */
 static const unsigned int kyber_batch_size[] = {
 	[KYBER_READ] = 16,
-	[KYBER_WRITE] = 8,
-	[KYBER_DISCARD] = 1,
-	[KYBER_OTHER] = 1,
-};
-
-/*
- * Requests latencies are recorded in a histogram with buckets defined relative
- * to the target latency:
- *
- * <= 1/4 * target latency
- * <= 1/2 * target latency
- * <= 3/4 * target latency
- * <= target latency
- * <= 1 1/4 * target latency
- * <= 1 1/2 * target latency
- * <= 1 3/4 * target latency
- * > 1 3/4 * target latency
- */
-enum {
-	/*
-	 * The width of the latency histogram buckets is
-	 * 1 / (1 << KYBER_LATENCY_SHIFT) * target latency.
-	 */
-	KYBER_LATENCY_SHIFT = 2,
-	/*
-	 * The first (1 << KYBER_LATENCY_SHIFT) buckets are <= target latency,
-	 * thus, "good".
-	 */
-	KYBER_GOOD_BUCKETS = 1 << KYBER_LATENCY_SHIFT,
-	/* There are also (1 << KYBER_LATENCY_SHIFT) "bad" buckets. */
-	KYBER_LATENCY_BUCKETS = 2 << KYBER_LATENCY_SHIFT,
-};
-
-/*
- * We measure both the total latency and the I/O latency (i.e., latency after
- * submitting to the device).
- */
-enum {
-	KYBER_TOTAL_LATENCY,
-	KYBER_IO_LATENCY,
-};
-
-static const char *kyber_latency_type_names[] = {
-	[KYBER_TOTAL_LATENCY] = "total",
-	[KYBER_IO_LATENCY] = "I/O",
-};
-
-/*
- * Per-cpu latency histograms: total latency and I/O latency for each scheduling
- * domain except for KYBER_OTHER.
- */
-struct kyber_cpu_latency {
-	atomic_t buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
+	[KYBER_SYNC_WRITE] = 8,
+	[KYBER_OTHER] = 8,
 };

 /*
@@ -161,9 +88,12 @@ struct kyber_ctx_queue {
 struct kyber_queue_data {
 	struct request_queue *q;

+	struct blk_stat_callback *cb;
+
 	/*
-	 * Each scheduling domain has a limited number of in-flight requests
-	 * device-wide, limited by these tokens.
+	 * The device is divided into multiple scheduling domains based on the
+	 * request type. Each domain has a fixed number of in-flight requests of
+	 * that type device-wide, limited by these tokens.
 	 */
 	struct sbitmap_queue domain_tokens[KYBER_NUM_DOMAINS];

@@ -173,19 +103,8 @@ struct kyber_queue_data {
 	 */
 	unsigned int async_depth;

-	struct kyber_cpu_latency __percpu *cpu_latency;
-
-	/* Timer for stats aggregation and adjusting domain tokens. */
-	struct timer_list timer;
-
-	unsigned int latency_buckets[KYBER_OTHER][2][KYBER_LATENCY_BUCKETS];
-
-	unsigned long latency_timeout[KYBER_OTHER];
-
-	int domain_p99[KYBER_OTHER];
-
 	/* Target latencies in nanoseconds. */
-	u64 latency_targets[KYBER_OTHER];
+	u64 read_lat_nsec, write_lat_nsec;
 };

 struct kyber_hctx_data {
@@ -205,219 +124,233 @@ static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,

 static unsigned int kyber_sched_domain(unsigned int op)
 {
-	switch (op & REQ_OP_MASK) {
-	case REQ_OP_READ:
+	if ((op & REQ_OP_MASK) == REQ_OP_READ)
 		return KYBER_READ;
-	case REQ_OP_WRITE:
-		return KYBER_WRITE;
-	case REQ_OP_DISCARD:
-		return KYBER_DISCARD;
-	default:
+	else if ((op & REQ_OP_MASK) == REQ_OP_WRITE && op_is_sync(op))
+		return KYBER_SYNC_WRITE;
+	else
 		return KYBER_OTHER;
-	}
 }

-static void flush_latency_buckets(struct kyber_queue_data *kqd,
-				  struct kyber_cpu_latency *cpu_latency,
-				  unsigned int sched_domain, unsigned int type)
-{
-	unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
-	atomic_t *cpu_buckets = cpu_latency->buckets[sched_domain][type];
-	unsigned int bucket;
+enum {
+	NONE = 0,
+	GOOD = 1,
+	GREAT = 2,
+	BAD = -1,
+	AWFUL = -2,
+};

-	for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
-		buckets[bucket] += atomic_xchg(&cpu_buckets[bucket], 0);
+#define IS_GOOD(status) ((status) > 0)
+#define IS_BAD(status) ((status) < 0)
+
+static int kyber_lat_status(struct blk_stat_callback *cb,
+			    unsigned int sched_domain, u64 target)
+{
+	u64 latency;
+
+	if (!cb->stat[sched_domain].nr_samples)
+		return NONE;
+
+	latency = cb->stat[sched_domain].mean;
+	if (latency >= 2 * target)
+		return AWFUL;
+	else if (latency > target)
+		return BAD;
+	else if (latency <= target / 2)
+		return GREAT;
+	else /* (latency <= target) */
+		return GOOD;
 }

 /*
- * Calculate the histogram bucket with the given percentile rank, or -1 if there
- * aren't enough samples yet.
+ * Adjust the read or synchronous write depth given the status of reads and
+ * writes. The goal is that the latencies of the two domains are fair (i.e., if
+ * one is good, then the other is good).
 */
-static int calculate_percentile(struct kyber_queue_data *kqd,
-				unsigned int sched_domain, unsigned int type,
-				unsigned int percentile)
+static void kyber_adjust_rw_depth(struct kyber_queue_data *kqd,
+				  unsigned int sched_domain, int this_status,
+				  int other_status)
 {
-	unsigned int *buckets = kqd->latency_buckets[sched_domain][type];
-	unsigned int bucket, samples = 0, percentile_samples;
-
-	for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS; bucket++)
-		samples += buckets[bucket];
-
-	if (!samples)
-		return -1;
+	unsigned int orig_depth, depth;

 	/*
-	 * We do the calculation once we have 500 samples or one second passes
-	 * since the first sample was recorded, whichever comes first.
+	 * If this domain had no samples, or reads and writes are both good or
+	 * both bad, don't adjust the depth.
 	 */
-	if (!kqd->latency_timeout[sched_domain])
-		kqd->latency_timeout[sched_domain] = max(jiffies + HZ, 1UL);
-	if (samples < 500 &&
-	    time_is_after_jiffies(kqd->latency_timeout[sched_domain])) {
-		return -1;
-	}
-	kqd->latency_timeout[sched_domain] = 0;
+	if (this_status == NONE ||
+	    (IS_GOOD(this_status) && IS_GOOD(other_status)) ||
+	    (IS_BAD(this_status) && IS_BAD(other_status)))
+		return;

-	percentile_samples = DIV_ROUND_UP(samples * percentile, 100);
-	for (bucket = 0; bucket < KYBER_LATENCY_BUCKETS - 1; bucket++) {
-		if (buckets[bucket] >= percentile_samples)
+	orig_depth = depth = kqd->domain_tokens[sched_domain].sb.depth;
+
+	if (other_status == NONE) {
+		depth++;
+	} else {
+		switch (this_status) {
+		case GOOD:
+			if (other_status == AWFUL)
+				depth -= max(depth / 4, 1U);
+			else
+				depth -= max(depth / 8, 1U);
 			break;
-		percentile_samples -= buckets[bucket];
+		case GREAT:
+			if (other_status == AWFUL)
+				depth /= 2;
+			else
+				depth -= max(depth / 4, 1U);
+			break;
+		case BAD:
+			depth++;
+			break;
+		case AWFUL:
+			if (other_status == GREAT)
+				depth += 2;
+			else
+				depth++;
+			break;
+		}
 	}
-	memset(buckets, 0, sizeof(kqd->latency_buckets[sched_domain][type]));

-	trace_kyber_latency(kqd->q, kyber_domain_names[sched_domain],
-			    kyber_latency_type_names[type], percentile,
-			    bucket + 1, 1 << KYBER_LATENCY_SHIFT, samples);
-
-	return bucket;
-}
-
-static void kyber_resize_domain(struct kyber_queue_data *kqd,
-				unsigned int sched_domain, unsigned int depth)
-{
 	depth = clamp(depth, 1U, kyber_depth[sched_domain]);
-	if (depth != kqd->domain_tokens[sched_domain].sb.depth) {
+	if (depth != orig_depth)
 		sbitmap_queue_resize(&kqd->domain_tokens[sched_domain], depth);
-		trace_kyber_adjust(kqd->q, kyber_domain_names[sched_domain],
-				   depth);
-	}
 }

-static void kyber_timer_fn(struct timer_list *t)
+/*
+ * Adjust the depth of other requests given the status of reads and synchronous
+ * writes. As long as either domain is doing fine, we don't throttle, but if
+ * both domains are doing badly, we throttle heavily.
+ */
+static void kyber_adjust_other_depth(struct kyber_queue_data *kqd,
+				     int read_status, int write_status,
+				     bool have_samples)
 {
-	struct kyber_queue_data *kqd = from_timer(kqd, t, timer);
-	unsigned int sched_domain;
-	int cpu;
-	bool bad = false;
+	unsigned int orig_depth, depth;
+	int status;

-	/* Sum all of the per-cpu latency histograms. */
-	for_each_online_cpu(cpu) {
-		struct kyber_cpu_latency *cpu_latency;
+	orig_depth = depth = kqd->domain_tokens[KYBER_OTHER].sb.depth;

-		cpu_latency = per_cpu_ptr(kqd->cpu_latency, cpu);
-		for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
-			flush_latency_buckets(kqd, cpu_latency, sched_domain,
-					      KYBER_TOTAL_LATENCY);
-			flush_latency_buckets(kqd, cpu_latency, sched_domain,
-					      KYBER_IO_LATENCY);
+	if (read_status == NONE && write_status == NONE) {
+		depth += 2;
+	} else if (have_samples) {
+		if (read_status == NONE)
+			status = write_status;
+		else if (write_status == NONE)
+			status = read_status;
+		else
+			status = max(read_status, write_status);
+		switch (status) {
+		case GREAT:
+			depth += 2;
+			break;
+		case GOOD:
+			depth++;
+			break;
+		case BAD:
+			depth -= max(depth / 4, 1U);
+			break;
+		case AWFUL:
+			depth /= 2;
+			break;
 		}
 	}

-	/*
-	 * Check if any domains have a high I/O latency, which might indicate
-	 * congestion in the device. Note that we use the p90; we don't want to
-	 * be too sensitive to outliers here.
-	 */
-	for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
-		int p90;
-
-		p90 = calculate_percentile(kqd, sched_domain, KYBER_IO_LATENCY,
-					   90);
-		if (p90 >= KYBER_GOOD_BUCKETS)
-			bad = true;
-	}
-
-	/*
-	 * Adjust the scheduling domain depths. If we determined that there was
-	 * congestion, we throttle all domains with good latencies. Either way,
-	 * we ease up on throttling domains with bad latencies.
-	 */
-	for (sched_domain = 0; sched_domain < KYBER_OTHER; sched_domain++) {
-		unsigned int orig_depth, depth;
-		int p99;
-
-		p99 = calculate_percentile(kqd, sched_domain,
-					   KYBER_TOTAL_LATENCY, 99);
-		/*
-		 * This is kind of subtle: different domains will not
-		 * necessarily have enough samples to calculate the latency
-		 * percentiles during the same window, so we have to remember
-		 * the p99 for the next time we observe congestion; once we do,
-		 * we don't want to throttle again until we get more data, so we
-		 * reset it to -1.
-		 */
-		if (bad) {
-			if (p99 < 0)
-				p99 = kqd->domain_p99[sched_domain];
-			kqd->domain_p99[sched_domain] = -1;
-		} else if (p99 >= 0) {
-			kqd->domain_p99[sched_domain] = p99;
-		}
-		if (p99 < 0)
-			continue;
-
-		/*
-		 * If this domain has bad latency, throttle less. Otherwise,
-		 * throttle more iff we determined that there is congestion.
-		 *
-		 * The new depth is scaled linearly with the p99 latency vs the
-		 * latency target. E.g., if the p99 is 3/4 of the target, then
-		 * we throttle down to 3/4 of the current depth, and if the p99
-		 * is 2x the target, then we double the depth.
-		 */
-		if (bad || p99 >= KYBER_GOOD_BUCKETS) {
-			orig_depth = kqd->domain_tokens[sched_domain].sb.depth;
-			depth = (orig_depth * (p99 + 1)) >> KYBER_LATENCY_SHIFT;
-			kyber_resize_domain(kqd, sched_domain, depth);
-		}
-	}
+	depth = clamp(depth, 1U, kyber_depth[KYBER_OTHER]);
+	if (depth != orig_depth)
+		sbitmap_queue_resize(&kqd->domain_tokens[KYBER_OTHER], depth);
 }

-static unsigned int kyber_sched_tags_shift(struct request_queue *q)
+/*
+ * Apply heuristics for limiting queue depths based on gathered latency
+ * statistics.
+ */
+static void kyber_stat_timer_fn(struct blk_stat_callback *cb)
+{
+	struct kyber_queue_data *kqd = cb->data;
+	int read_status, write_status;
+
+	read_status = kyber_lat_status(cb, KYBER_READ, kqd->read_lat_nsec);
+	write_status = kyber_lat_status(cb, KYBER_SYNC_WRITE, kqd->write_lat_nsec);
+
+	kyber_adjust_rw_depth(kqd, KYBER_READ, read_status, write_status);
+	kyber_adjust_rw_depth(kqd, KYBER_SYNC_WRITE, write_status, read_status);
+	kyber_adjust_other_depth(kqd, read_status, write_status,
+				 cb->stat[KYBER_OTHER].nr_samples != 0);
+
+	/*
+	 * Continue monitoring latencies if we aren't hitting the targets or
+	 * we're still throttling other requests.
+	 */
+	if (!blk_stat_is_active(kqd->cb) &&
+	    ((IS_BAD(read_status) || IS_BAD(write_status) ||
+	      kqd->domain_tokens[KYBER_OTHER].sb.depth < kyber_depth[KYBER_OTHER])))
+		blk_stat_activate_msecs(kqd->cb, 100);
+}
+
+static unsigned int kyber_sched_tags_shift(struct kyber_queue_data *kqd)
 {
 	/*
 	 * All of the hardware queues have the same depth, so we can just grab
 	 * the shift of the first one.
 	 */
-	return q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
+	return kqd->q->queue_hw_ctx[0]->sched_tags->bitmap_tags.sb.shift;
+}
+
+static int kyber_bucket_fn(const struct request *rq)
+{
+	return kyber_sched_domain(rq->cmd_flags);
 }

 static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
 {
 	struct kyber_queue_data *kqd;
+	unsigned int max_tokens;
 	unsigned int shift;
 	int ret = -ENOMEM;
 	int i;

-	kqd = kzalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
+	kqd = kmalloc_node(sizeof(*kqd), GFP_KERNEL, q->node);
 	if (!kqd)
 		goto err;
-
 	kqd->q = q;

-	kqd->cpu_latency = alloc_percpu_gfp(struct kyber_cpu_latency,
-					    GFP_KERNEL | __GFP_ZERO);
-	if (!kqd->cpu_latency)
+	kqd->cb = blk_stat_alloc_callback(kyber_stat_timer_fn, kyber_bucket_fn,
+					  KYBER_NUM_DOMAINS, kqd);
+	if (!kqd->cb)
 		goto err_kqd;

-	timer_setup(&kqd->timer, kyber_timer_fn, 0);
-
+	/*
+	 * The maximum number of tokens for any scheduling domain is at least
+	 * the queue depth of a single hardware queue. If the hardware doesn't
+	 * have many tags, still provide a reasonable number.
+	 */
+	max_tokens = max_t(unsigned int, q->tag_set->queue_depth,
+			   KYBER_MIN_DEPTH);
 	for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
 		WARN_ON(!kyber_depth[i]);
 		WARN_ON(!kyber_batch_size[i]);
 		ret = sbitmap_queue_init_node(&kqd->domain_tokens[i],
-					      kyber_depth[i], -1, false,
-					      GFP_KERNEL, q->node);
+					      max_tokens, -1, false, GFP_KERNEL,
+					      q->node);
 		if (ret) {
 			while (--i >= 0)
 				sbitmap_queue_free(&kqd->domain_tokens[i]);
-			goto err_buckets;
+			goto err_cb;
 		}
+		sbitmap_queue_resize(&kqd->domain_tokens[i], kyber_depth[i]);
 	}

-	for (i = 0; i < KYBER_OTHER; i++) {
-		kqd->domain_p99[i] = -1;
-		kqd->latency_targets[i] = kyber_latency_targets[i];
-	}
-
-	shift = kyber_sched_tags_shift(q);
+	shift = kyber_sched_tags_shift(kqd);
 	kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;

+	kqd->read_lat_nsec = 2000000ULL;
+	kqd->write_lat_nsec = 10000000ULL;
+
 	return kqd;

-err_buckets:
-	free_percpu(kqd->cpu_latency);
+err_cb:
+	blk_stat_free_callback(kqd->cb);
 err_kqd:
 	kfree(kqd);
 err:
@@ -439,24 +372,25 @@ static int kyber_init_sched(struct request_queue *q, struct elevator_type *e)
 		return PTR_ERR(kqd);
 	}

-	blk_stat_enable_accounting(q);
-
 	eq->elevator_data = kqd;
 	q->elevator = eq;

+	blk_stat_add_callback(q, kqd->cb);
+
 	return 0;
 }

 static void kyber_exit_sched(struct elevator_queue *e)
 {
 	struct kyber_queue_data *kqd = e->elevator_data;
+	struct request_queue *q = kqd->q;
 	int i;

-	del_timer_sync(&kqd->timer);
+	blk_stat_remove_callback(q, kqd->cb);

 	for (i = 0; i < KYBER_NUM_DOMAINS; i++)
 		sbitmap_queue_free(&kqd->domain_tokens[i]);
-	free_percpu(kqd->cpu_latency);
+	blk_stat_free_callback(kqd->cb);
 	kfree(kqd);
 }

@@ -624,44 +558,41 @@ static void kyber_finish_request(struct request *rq)
 	rq_clear_domain_token(kqd, rq);
 }

-static void add_latency_sample(struct kyber_cpu_latency *cpu_latency,
-			       unsigned int sched_domain, unsigned int type,
-			       u64 target, u64 latency)
+static void kyber_completed_request(struct request *rq)
 {
-	unsigned int bucket;
-	u64 divisor;
+	struct request_queue *q = rq->q;
+	struct kyber_queue_data *kqd = q->elevator->elevator_data;
+	unsigned int sched_domain;
+	u64 now, latency, target;

-	if (latency > 0) {
-		divisor = max_t(u64, target >> KYBER_LATENCY_SHIFT, 1);
-		bucket = min_t(unsigned int, div64_u64(latency - 1, divisor),
-			       KYBER_LATENCY_BUCKETS - 1);
-	} else {
-		bucket = 0;
+	/*
+	 * Check if this request met our latency goal. If not, quickly gather
+	 * some statistics and start throttling.
+	 */
+	sched_domain = kyber_sched_domain(rq->cmd_flags);
+	switch (sched_domain) {
+	case KYBER_READ:
+		target = kqd->read_lat_nsec;
+		break;
+	case KYBER_SYNC_WRITE:
+		target = kqd->write_lat_nsec;
+		break;
+	default:
+		return;
 	}

-	atomic_inc(&cpu_latency->buckets[sched_domain][type][bucket]);
-}
-
-static void kyber_completed_request(struct request *rq, u64 now)
-{
-	struct kyber_queue_data *kqd = rq->q->elevator->elevator_data;
-	struct kyber_cpu_latency *cpu_latency;
-	unsigned int sched_domain;
-	u64 target;
-
-	sched_domain = kyber_sched_domain(rq->cmd_flags);
-	if (sched_domain == KYBER_OTHER)
+	/* If we are already monitoring latencies, don't check again. */
+	if (blk_stat_is_active(kqd->cb))
 		return;

-	cpu_latency = get_cpu_ptr(kqd->cpu_latency);
-	target = kqd->latency_targets[sched_domain];
-	add_latency_sample(cpu_latency, sched_domain, KYBER_TOTAL_LATENCY,
-			   target, now - rq->start_time_ns);
-	add_latency_sample(cpu_latency, sched_domain, KYBER_IO_LATENCY, target,
-			   now - rq->io_start_time_ns);
-	put_cpu_ptr(kqd->cpu_latency);
+	now = ktime_get_ns();
+	if (now < rq->io_start_time_ns)
+		return;

-	timer_reduce(&kqd->timer, jiffies + HZ / 10);
+	latency = now - rq->io_start_time_ns;
+
+	if (latency > target)
+		blk_stat_activate_msecs(kqd->cb, 10);
 }

 struct flush_kcq_data {
@@ -782,9 +713,6 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
 			rq_set_domain_token(rq, nr);
 			list_del_init(&rq->queuelist);
 			return rq;
-		} else {
-			trace_kyber_throttled(kqd->q,
-					      kyber_domain_names[khd->cur_domain]);
 		}
 	} else if (sbitmap_any_bit_set(&khd->kcq_map[khd->cur_domain])) {
 		nr = kyber_get_domain_token(kqd, khd, hctx);
@@ -795,9 +723,6 @@ kyber_dispatch_cur_domain(struct kyber_queue_data *kqd,
 			rq_set_domain_token(rq, nr);
 			list_del_init(&rq->queuelist);
 			return rq;
-		} else {
-			trace_kyber_throttled(kqd->q,
-					      kyber_domain_names[khd->cur_domain]);
 		}
 	}

@@ -865,17 +790,17 @@ static bool kyber_has_work(struct blk_mq_hw_ctx *hctx)
 	return false;
 }

-#define KYBER_LAT_SHOW_STORE(domain, name)				\
-static ssize_t kyber_##name##_lat_show(struct elevator_queue *e,	\
-				       char *page)			\
+#define KYBER_LAT_SHOW_STORE(op)					\
+static ssize_t kyber_##op##_lat_show(struct elevator_queue *e,		\
+				     char *page)			\
 {									\
 	struct kyber_queue_data *kqd = e->elevator_data;		\
 									\
-	return sprintf(page, "%llu\n", kqd->latency_targets[domain]);	\
+	return sprintf(page, "%llu\n", kqd->op##_lat_nsec);		\
 }									\
 									\
-static ssize_t kyber_##name##_lat_store(struct elevator_queue *e,	\
-					const char *page, size_t count)	\
+static ssize_t kyber_##op##_lat_store(struct elevator_queue *e,		\
+				      const char *page, size_t count)	\
 {									\
 	struct kyber_queue_data *kqd = e->elevator_data;		\
 	unsigned long long nsec;					\
@@ -885,12 +810,12 @@ static ssize_t kyber_##name##_lat_store(struct elevator_queue *e,	\
 	if (ret)							\
 		return ret;						\
 									\
-	kqd->latency_targets[domain] = nsec;				\
+	kqd->op##_lat_nsec = nsec;					\
 									\
 	return count;							\
 }
-KYBER_LAT_SHOW_STORE(KYBER_READ, read);
-KYBER_LAT_SHOW_STORE(KYBER_WRITE, write);
+KYBER_LAT_SHOW_STORE(read);
+KYBER_LAT_SHOW_STORE(write);
 #undef KYBER_LAT_SHOW_STORE

 #define KYBER_LAT_ATTR(op) __ATTR(op##_lat_nsec, 0644, kyber_##op##_lat_show, kyber_##op##_lat_store)
@@ -957,8 +882,7 @@ static int kyber_##name##_waiting_show(void *data, struct seq_file *m)	\
 	return 0;							\
 }
 KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_READ, read)
-KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_WRITE, write)
-KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_DISCARD, discard)
+KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_SYNC_WRITE, sync_write)
 KYBER_DEBUGFS_DOMAIN_ATTRS(KYBER_OTHER, other)
 #undef KYBER_DEBUGFS_DOMAIN_ATTRS

@@ -976,7 +900,20 @@ static int kyber_cur_domain_show(void *data, struct seq_file *m)
 	struct blk_mq_hw_ctx *hctx = data;
 	struct kyber_hctx_data *khd = hctx->sched_data;

-	seq_printf(m, "%s\n", kyber_domain_names[khd->cur_domain]);
+	switch (khd->cur_domain) {
+	case KYBER_READ:
+		seq_puts(m, "READ\n");
+		break;
+	case KYBER_SYNC_WRITE:
+		seq_puts(m, "SYNC_WRITE\n");
+		break;
+	case KYBER_OTHER:
+		seq_puts(m, "OTHER\n");
+		break;
+	default:
+		seq_printf(m, "%u\n", khd->cur_domain);
+		break;
+	}
 	return 0;
 }

@@ -993,8 +930,7 @@ static int kyber_batching_show(void *data, struct seq_file *m)
 	{#name "_tokens", 0400, kyber_##name##_tokens_show}
 static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
 	KYBER_QUEUE_DOMAIN_ATTRS(read),
-	KYBER_QUEUE_DOMAIN_ATTRS(write),
-	KYBER_QUEUE_DOMAIN_ATTRS(discard),
+	KYBER_QUEUE_DOMAIN_ATTRS(sync_write),
 	KYBER_QUEUE_DOMAIN_ATTRS(other),
 	{"async_depth", 0400, kyber_async_depth_show},
 	{},
@@ -1006,8 +942,7 @@ static const struct blk_mq_debugfs_attr kyber_queue_debugfs_attrs[] = {
 	{#name "_waiting", 0400, kyber_##name##_waiting_show}
 static const struct blk_mq_debugfs_attr kyber_hctx_debugfs_attrs[] = {
 	KYBER_HCTX_DOMAIN_ATTRS(read),
-	KYBER_HCTX_DOMAIN_ATTRS(write),
-	KYBER_HCTX_DOMAIN_ATTRS(discard),
+	KYBER_HCTX_DOMAIN_ATTRS(sync_write),
 	KYBER_HCTX_DOMAIN_ATTRS(other),
 	{"cur_domain", 0400, kyber_cur_domain_show},
 	{"batching", 0400, kyber_batching_show},
--- a/drivers/gpu/mediatek/gpu_mali/mali_valhall/mali-r32p1/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c
+++ b/drivers/gpu/mediatek/gpu_mali/mali_valhall/mali-r32p1/drivers/gpu/arm/midgard/csf/mali_kbase_csf.c
@@ -2762,33 +2762,6 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req,
 	}
 }

-static void order_job_irq_clear_with_iface_mem_read(void)
-{
-	/* Ensure that write to the JOB_IRQ_CLEAR is ordered with regards to the
-	 * read from interface memory. The ordering is needed considering the way
-	 * FW & Kbase writes to the JOB_IRQ_RAWSTAT and JOB_IRQ_CLEAR registers
-	 * without any synchronization. Without the barrier there is no guarantee
-	 * about the ordering, the write to IRQ_CLEAR can take effect after the read
-	 * from interface memory and that could cause a problem for the scenario where
-	 * FW sends back to back notifications for the same CSG for events like
-	 * SYNC_UPDATE and IDLE, but Kbase gets a single IRQ and observes only the
-	 * first event. Similar thing can happen with glb events like CFG_ALLOC_EN
-	 * acknowledgment and GPU idle notification.
-	 *
-	 *       MCU                                    CPU
-	 *  ---------------                         ----------------
-	 *  Update interface memory                 Write to IRQ_CLEAR to clear current IRQ
-	 *  <barrier>                               <barrier>
-	 *  Write to IRQ_RAWSTAT to raise new IRQ   Read interface memory
-	 */
-#if KERNEL_VERSION(5, 10, 0) <= LINUX_VERSION_CODE
-	__iomb();
-#else
-	/* CPU and GPU would be in the same Outer shareable domain */
-	dmb(osh);
-#endif
-}
-
 void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
 {
 	unsigned long flags;
@@ -2798,7 +2771,6 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)

 	KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT, NULL, val);
 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
-	order_job_irq_clear_with_iface_mem_read();

 	if (val & JOB_IRQ_GLOBAL_IF) {
 		const struct kbase_csf_global_iface *const global_iface =
--- a/drivers/gpu/mediatek/gpu_mali/mali_valhall/mali-r32p1/drivers/gpu/arm/midgard/csf/mali_kbase_csf_defs.h
+++ b/drivers/gpu/mediatek/gpu_mali/mali_valhall/mali-r32p1/drivers/gpu/arm/midgard/csf/mali_kbase_csf_defs.h
@@ -539,8 +539,6 @@ struct kbase_csf_cpu_queue_context {
 /**
 * struct kbase_csf_heap_context_allocator - Allocator of heap contexts
 *
- * @heap_context_size_aligned: Size of a heap context structure, in bytes,
- *                             aligned to GPU cacheline size.
 * Heap context structures are allocated by the kernel for use by the firmware.
 * The current implementation subdivides a single GPU memory region for use as
 * a sparse array.
@@ -562,7 +560,6 @@ struct kbase_csf_heap_context_allocator {
 	u64 gpu_va;
 	struct mutex lock;
 	DECLARE_BITMAP(in_use, MAX_TILER_HEAPS);
-	u32 heap_context_size_aligned;
 };

 /**
--- a/drivers/gpu/mediatek/gpu_mali/mali_valhall/mali-r32p1/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.c
+++ b/drivers/gpu/mediatek/gpu_mali/mali_valhall/mali-r32p1/drivers/gpu/arm/midgard/csf/mali_kbase_csf_heap_context_alloc.c
@@ -23,7 +23,10 @@
 #include "mali_kbase_csf_heap_context_alloc.h"

 /* Size of one heap context structure, in bytes. */
-#define HEAP_CTX_SIZE ((u32)32)
+#define HEAP_CTX_SIZE ((size_t)32)
+
+/* Total size of the GPU memory region allocated for heap contexts, in bytes. */
+#define HEAP_CTX_REGION_SIZE (MAX_TILER_HEAPS * HEAP_CTX_SIZE)

 /**
 * sub_alloc - Sub-allocate a heap context from a GPU memory region
@@ -35,8 +38,8 @@
 static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
 {
 	struct kbase_context *const kctx = ctx_alloc->kctx;
-	unsigned long heap_nr = 0;
-	u32 ctx_offset = 0;
+	int heap_nr = 0;
+	size_t ctx_offset = 0;
 	u64 heap_gpu_va = 0;
 	struct kbase_vmap_struct mapping;
 	void *ctx_ptr = NULL;
@@ -52,24 +55,24 @@ static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
 		return 0;
 	}

-	ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned;
+	ctx_offset = heap_nr * HEAP_CTX_SIZE;
 	heap_gpu_va = ctx_alloc->gpu_va + ctx_offset;
 	ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va,
-		ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping);
+		HEAP_CTX_SIZE, KBASE_REG_CPU_WR, &mapping);

 	if (unlikely(!ctx_ptr)) {
 		dev_err(kctx->kbdev->dev,
-			"Failed to map tiler heap context %lu (0x%llX)\n",
+			"Failed to map tiler heap context %d (0x%llX)\n",
 			heap_nr, heap_gpu_va);
 		return 0;
 	}

-	memset(ctx_ptr, 0, ctx_alloc->heap_context_size_aligned);
+	memset(ctx_ptr, 0, HEAP_CTX_SIZE);
 	kbase_vunmap(ctx_ptr, &mapping);

 	bitmap_set(ctx_alloc->in_use, heap_nr, 1);

-	dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n",
+	dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %d (0x%llX)\n",
 		heap_nr, heap_gpu_va);

 	return heap_gpu_va;
@@ -85,7 +88,7 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
 	u64 const heap_gpu_va)
 {
 	struct kbase_context *const kctx = ctx_alloc->kctx;
-	u32 ctx_offset = 0;
+	u64 ctx_offset = 0;
 	unsigned int heap_nr = 0;

 	lockdep_assert_held(&ctx_alloc->lock);
@@ -96,15 +99,15 @@ static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
 	if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va))
 		return;

-	ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
+	ctx_offset = heap_gpu_va - ctx_alloc->gpu_va;

-	if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) ||
-		WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned))
+	if (WARN_ON(ctx_offset >= HEAP_CTX_REGION_SIZE) ||
+		WARN_ON(ctx_offset % HEAP_CTX_SIZE))
 		return;

-	heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned;
+	heap_nr = ctx_offset / HEAP_CTX_SIZE;
 	dev_dbg(kctx->kbdev->dev,
-		"Freed tiler heap context %lu (0x%llX)\n", heap_nr, heap_gpu_va);
+		"Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va);

 	bitmap_clear(ctx_alloc->in_use, heap_nr, 1);
 }
@@ -113,17 +116,12 @@ int kbase_csf_heap_context_allocator_init(
 	struct kbase_csf_heap_context_allocator *const ctx_alloc,
 	struct kbase_context *const kctx)
 {
-	const u32 gpu_cache_line_size =
-		(1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
-
 	/* We cannot pre-allocate GPU memory here because the
 	 * custom VA zone may not have been created yet.
 	 */
 	ctx_alloc->kctx = kctx;
 	ctx_alloc->region = NULL;
 	ctx_alloc->gpu_va = 0;
-	ctx_alloc->heap_context_size_aligned =
-		(HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1);

 	mutex_init(&ctx_alloc->lock);
 	bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS);
@@ -158,7 +156,7 @@ u64 kbase_csf_heap_context_allocator_alloc(
 	struct kbase_context *const kctx = ctx_alloc->kctx;
 	u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR |
 		BASE_MEM_PROT_CPU_WR | BASEP_MEM_NO_USER_FREE;
-	u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned);
+	u64 nr_pages = PFN_UP(HEAP_CTX_REGION_SIZE);
 	u64 heap_gpu_va = 0;

 #ifdef CONFIG_MALI_VECTOR_DUMP
--- a/drivers/gpu/mediatek/gpu_mali/mali_valhall/mali-r32p1/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
+++ b/drivers/gpu/mediatek/gpu_mali/mali_valhall/mali-r32p1/drivers/gpu/arm/midgard/mali_kbase_core_linux.c
@@ -1488,8 +1488,8 @@ static int kbasep_cs_tiler_heap_init(struct kbase_context *kctx,
 {
 	if (heap_init->in.group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)
 		return -EINVAL;
-	else
-		kctx->jit_group_id = heap_init->in.group_id;
+
+	kctx->jit_group_id = heap_init->in.group_id;

 	return kbase_csf_tiler_heap_init(kctx, heap_init->in.chunk_size,
 		heap_init->in.initial_chunks, heap_init->in.max_chunks,
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -725,7 +725,8 @@ static int dmz_get_zoned_device(struct dm_target *ti, char *path)
 	dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors);
 	dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks);

-	dev->nr_zones = blkdev_nr_zones(dev->bdev);
+	dev->nr_zones = (dev->capacity + dev->zone_nr_sectors - 1)
+		>> dev->zone_nr_sectors_shift;

 	dmz->dev = dev;

--- a/drivers/misc/aw8622_haptic/aw8622_haptic.c
+++ b/drivers/misc/aw8622_haptic/aw8622_haptic.c
--- a/drivers/misc/aw8622_haptic/aw8622_haptic.h
+++ b/drivers/misc/aw8622_haptic/aw8622_haptic.h
@@ -1,42 +1,93 @@
 #ifndef _AW8622_HAPTIC_H_
 #define _AW8622_HAPTIC_H_

-#include <linux/hrtimer.h>
-#include <linux/mutex.h>
-#include <linux/workqueue.h>
-#include <linux/pinctrl/consumer.h>
-
-#define AW_GPIO_MODE_LED_DEFAULT                (0)
-#define HAPTIC_GPIO_AW8622_DEFAULT              (0)
-#define HAPTIC_GPIO_AW8622_SET                  (1)
-#define HAPTIC_PWM_MEMORY_MODE_CLOCK            (26000000)
-#define HAPTIC_PWM_OLD_MODE_CLOCK               (26000000)
-
-#define DEFAULT_FREQUENCY                       (208)
-#define MIN_FREQUENCY                           (203)
-#define MAX_FREQUENCY                           (212)
-
-struct aw8622_haptic {
-	struct device *dev;
-	struct hrtimer timer;
-	struct work_struct play_work;
-	struct work_struct stop_play_work;
-	struct delayed_work hw_off_work;
-	struct workqueue_struct *aw8622_wq;
-	struct mutex mutex_lock;
-	struct pinctrl *ppinctrl_pwm;
-
-	int hwen_gpio;
-	unsigned int pwm_ch;
-	unsigned int duration;
-	unsigned int frequency;
-	unsigned int center_freq;
-	unsigned int default_pwm_freq;
-	unsigned int wave_sample_period;
-
-	bool is_power_on;
-	bool is_actived;
-	bool is_hwen;
+struct aw8622_effect_state {
+	int effect_idx;
+	int duration;
+	int secs;
+	unsigned long nsces;
+	bool is_shock_stop;
 };

-#endif /* _AW8622_HAPTIC_H_ */
+struct waveform_data_info {
+	bool is_loaded;
+	const char *waveform_name;
+	unsigned int waveform_period; // The time of the whole waveform unit is ms
+	unsigned int sample_freq;
+	unsigned int sample_nums;
+	unsigned int us_time_len; //unit us	
+	unsigned int max_sample_val;
+	unsigned int len;
+	unsigned char *data;
+};
+
+struct aw8622_haptic {
+
+	/* Hardware info */
+	unsigned int pwm_ch;
+	struct device *dev;
+	int hwen_gpio;
+	struct pinctrl *ppinctrl_pwm;
+
+	unsigned int default_pwm_freq;
+	unsigned int h_l_period;
+
+
+	/* Vibration waveform data field */ 
+	struct delayed_work load_waveform_work;
+	struct delayed_work hw_off_work;
+	unsigned int wave_sample_period; //wave sample period is ns
+	struct waveform_data_info *p_waveform_data;
+	int waveform_data_nums;
+	unsigned int wave_max_len;
+	bool is_malloc_wavedata_info;
+	int cur_load_idx;
+	unsigned int load_idx_offset;
+
+	bool is_malloc_dma_memory;
+	dma_addr_t wave_phy;
+	void *wave_vir;
+	unsigned dma_len;
+	
+
+	
+	spinlock_t	spin_lock;
+	
+
+	/* Vibration control field */
+	bool is_actived;
+	bool is_real_play;
+	bool is_power_on;
+	bool is_wavefrom_ready;
+
+	bool is_hwen;
+	int effect_idx;
+	unsigned int duration;
+	unsigned int interval;
+	unsigned int center_freq;
+
+	struct workqueue_struct *aw8622_wq;
+	struct work_struct play_work;
+	struct work_struct stop_play_work;
+	struct work_struct test_work;
+	unsigned int test_cnt;
+	struct mutex mutex_lock;
+	struct hrtimer timer;
+	struct aw8622_effect_state effect_state;
+
+
+};
+
+#define LONG_SHOCK_BIT_NUMS_PER_SAMPLED_VALE	(80)
+
+#define WAVEFORM_DATA_OFFSET		(12)
+#define BIT_NUMS_PER_SAMPLED_VALE	(250)
+#define BIT_NUMS_PER_BYTE			(8)
+#define WAVEFORM_MAX_SAMPLE_VAL		(127)
+#define WAVEFORM_MIN_SAMPLE_VAL		(-127)
+
+#define MAX_NUMS_NONNEGATIVE_SIGNEC_8BIT	(128)	//The number of non-negative integers that a signed 8bit of data can represent
+#define MAX_NUMS_POSITIVE_SIGNEC_8BIT		(128)
+#define MAX_COUNT_SIGNEC_8BIT				(255)
+
+#endif
--- a/drivers/misc/mediatek/connectivity/wlan/core/gen4m/common/wlan_lib.c
+++ b/drivers/misc/mediatek/connectivity/wlan/core/gen4m/common/wlan_lib.c
@@ -7552,7 +7552,7 @@ void wlanInitFeatureOption(IN struct ADAPTER *prAdapter)
 	prWifiVar->ucApBandwidth = (uint8_t) wlanCfgGetUint32(
 				prAdapter, "ApBw", MAX_BW_160MHZ);
 	prWifiVar->ucAp2gBandwidth = (uint8_t) wlanCfgGetUint32(
-				prAdapter, "Ap2gBw", MAX_BW_40MHZ);
+				prAdapter, "Ap2gBw", MAX_BW_20MHZ);
 	prWifiVar->ucAp5gBandwidth = (uint8_t) wlanCfgGetUint32(
 				prAdapter, "Ap5gBw", MAX_BW_80MHZ);
 	prWifiVar->ucAp6gBandwidth = (uint8_t) wlanCfgGetUint32(
--- a/drivers/misc/mediatek/connectivity/wlan/core/gen4m/include/config.h
+++ b/drivers/misc/mediatek/connectivity/wlan/core/gen4m/include/config.h
@@ -1663,7 +1663,7 @@
 /* 1(default): Run on big core when tput over threshold
 * 0: Disable (Let system scheduler decide)
 */
-#define CFG_SUPPORT_TPUT_ON_BIG_CORE 0
+#define CFG_SUPPORT_TPUT_ON_BIG_CORE 1

 #define CFG_SUPPORT_LITTLE_CPU_BOOST 0

--- a/drivers/misc/mediatek/connectivity/wlan/core/gen4m/include/nic_cmd_event.h
+++ b/drivers/misc/mediatek/connectivity/wlan/core/gen4m/include/nic_cmd_event.h
@@ -1392,7 +1392,7 @@ struct CMD_ACCESS_RX_STAT {
 struct EVENT_ACCESS_RX_STAT {
 	uint32_t u4SeqNum;
 	uint32_t u4TotalNum;
-	uint32_t au4Buffer[];
+	uint32_t au4Buffer[1];
 };

 #if CFG_SUPPORT_TX_BF
--- a/drivers/misc/mediatek/connectivity/wlan/core/gen4m/mgmt/stats.c
+++ b/drivers/misc/mediatek/connectivity/wlan/core/gen4m/mgmt/stats.c
@@ -323,11 +323,7 @@ void statsParseARPInfo(struct sk_buff *skb,
 				IPV4TOSTR(&pucEthBody[ARP_SENDER_IP_OFFSET]));
 		break;
 	case EVENT_TX:
-#if BUILD_QA_DBG
 		DBGLOG(TX, INFO,
-#else
-		DBGLOG_LIMITED(TX, INFO,
-#endif
 			"ARP %s SRC MAC/IP["
 			MACSTR "]/[" IPV4STR "], TAR MAC/IP["
 			MACSTR "]/[" IPV4STR "], SeqNo: %d\n",
--- a/drivers/misc/mediatek/connectivity/wlan/core/gen4m/os/linux/gl_cfg80211.c
+++ b/drivers/misc/mediatek/connectivity/wlan/core/gen4m/os/linux/gl_cfg80211.c
@@ -1443,10 +1443,6 @@ int mtk_cfg80211_connect(struct wiphy *wiphy,
 			prWpaInfo->u4CipherPairwise =
 							IW_AUTH_CIPHER_CCMP;
 			break;
-#if CFG_SUPPORT_WAPI
-		case WLAN_CIPHER_SUITE_SMS4:
-			break;
-#endif
 		case WLAN_CIPHER_SUITE_BIP_GMAC_256:
 			prWpaInfo->u4CipherPairwise =
 							IW_AUTH_CIPHER_GCMP256;
@@ -1505,10 +1501,6 @@ int mtk_cfg80211_connect(struct wiphy *wiphy,
 			prWpaInfo->u4CipherGroup =
 							IW_AUTH_CIPHER_GCMP128;
 			break;
-#if CFG_SUPPORT_WAPI
-		case WLAN_CIPHER_SUITE_SMS4:
-			break;
-#endif
 		case WLAN_CIPHER_SUITE_NO_GROUP_ADDR:
 			break;
 		default:
--- a/drivers/misc/mediatek/connectivity/wlan/core/gen4m/os/linux/gl_init.c
+++ b/drivers/misc/mediatek/connectivity/wlan/core/gen4m/os/linux/gl_init.c
@@ -655,9 +655,6 @@ const uint32_t mtk_cipher_suites[] = {
 	WLAN_CIPHER_SUITE_WEP104,
 	WLAN_CIPHER_SUITE_TKIP,
 	WLAN_CIPHER_SUITE_CCMP,
-#if CFG_SUPPORT_WAPI
-	WLAN_CIPHER_SUITE_SMS4,
-#endif

 	/* keep last -- depends on hw flags! */
 	WLAN_CIPHER_SUITE_AES_CMAC,
@@ -1482,8 +1479,6 @@ static const struct wiphy_wowlan_support mtk_wlan_wowlan_support = {
 *******************************************************************************
 */

-static void wlanRemove(void);
-
 /*******************************************************************************
 *                              F U N C T I O N S
 *******************************************************************************
@@ -5672,8 +5667,6 @@ static int32_t wlanOnAtReset(void)
 		 * If WMT being removed in the future, you should invoke
 		 * wlanRemove directly from here
 		 */
-		kalSendAeeWarning("WFSYS", "wlanOnAtReset fail\n");
-		wlanRemove();
 #if 0
 		switch (eFailReason) {
 		case ADAPTER_START_FAIL:
@@ -6064,7 +6057,7 @@ wlanOffNotifyCfg80211Disconnect(IN struct GLUE_INFO *prGlueInfo)
 * \return (none)
 */
 /*----------------------------------------------------------------------------*/
-void wlanRemove(void)
+static void wlanRemove(void)
 {
 	struct net_device *prDev = NULL;
 	struct WLANDEV_INFO *prWlandevInfo = NULL;
--- a/drivers/misc/mediatek/connectivity/wlan/core/gen4m/os/linux/include/gl_p2p_ioctl.h
+++ b/drivers/misc/mediatek/connectivity/wlan/core/gen4m/os/linux/include/gl_p2p_ioctl.h
@@ -321,7 +321,7 @@ struct iw_p2p_version {
 extern struct ieee80211_supported_band mtk_band_2ghz;
 extern struct ieee80211_supported_band mtk_band_5ghz;

-extern const uint32_t mtk_cipher_suites[];
+extern const uint32_t mtk_cipher_suites[9];


 /******************************************************************************
--- a/drivers/misc/mediatek/performance/observer/platform/mt6768/qos_pfm.c
+++ b/drivers/misc/mediatek/performance/observer/platform/mt6768/qos_pfm.c
@@ -5,8 +5,6 @@
 #define pr_fmt(fmt) "pob_qos: " fmt
 #include <linux/notifier.h>
 #include <mt-plat/mtk_perfobserver.h>
-#include <linux/pm.h>
-#include <linux/suspend.h>

 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
@@ -29,7 +27,7 @@
 #include "obpfm_qos_bound.h"

 #define MS_TO_NS 1000000
-#define ADJUST_INTERVAL_MS 64
+#define ADJUST_INTERVAL_MS 32

 enum POBQOS_NTF_PUSH_TYPE {
 	POBQOS_NTF_TIMER = 0x00,
@@ -43,24 +41,6 @@ struct POBQOS_NTF_PUSH_TAG {
 	struct work_struct sWork;
 };

-static DEFINE_MUTEX(pob_timer_lock);
-static bool pob_timer_active;
-
-#ifdef CONFIG_PM_SLEEP
-static bool pob_timer_needs_restart;
-
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data);
-static struct notifier_block pob_pm_nb = {
-	.notifier_call = pob_pm_notifier_cb,
-};
-#endif
-
-static inline ktime_t pob_timer_interval(void)
-{
-	return ms_to_ktime(ADJUST_INTERVAL_MS);
-}
-
 static void pob_enable_timer(void);
 static void pob_disable_timer(void);

@@ -107,63 +87,17 @@ static struct hrtimer _pobqos_hrt;

 static void pob_enable_timer(void)
 {
-	ktime_t interval = pob_timer_interval();
+	ktime_t ktime;

-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		mutex_unlock(&pob_timer_lock);
-		return;
-	}
-	pob_timer_active = true;
-	mutex_unlock(&pob_timer_lock);
-
-	hrtimer_start(&_pobqos_hrt, interval, HRTIMER_MODE_REL);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_start(&_pobqos_hrt, ktime, HRTIMER_MODE_REL);
 }

 static void pob_disable_timer(void)
 {
-	bool should_cancel = false;
-
-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		pob_timer_active = false;
-		should_cancel = true;
-	}
-	mutex_unlock(&pob_timer_lock);
-
-	if (should_cancel)
-		hrtimer_cancel(&_pobqos_hrt);
+	hrtimer_cancel(&_pobqos_hrt);
 }

-#ifdef CONFIG_PM_SLEEP
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data)
-{
-	switch (action) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-		mutex_lock(&pob_timer_lock);
-		pob_timer_needs_restart = pob_timer_active;
-		mutex_unlock(&pob_timer_lock);
-
-		if (pob_timer_needs_restart)
-			pob_disable_timer();
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-		if (pob_timer_needs_restart) {
-			pob_enable_timer();
-			pob_timer_needs_restart = false;
-		}
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-#endif
-
 static void pobqos_hrt_wq_cb(struct work_struct *psWork)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush =
@@ -255,13 +189,11 @@ final:
 static enum hrtimer_restart pobqos_hrt_cb(struct hrtimer *timer)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush = NULL;
-	ktime_t interval;

-	if (!READ_ONCE(pob_timer_active))
-		return HRTIMER_NORESTART;
+	ktime_t ktime;

-	interval = pob_timer_interval();
-	hrtimer_forward_now(timer, interval);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_add_expires(timer, ktime);

 	if (_gpPOBQoSNtfWQ)
 		vpPush =
@@ -348,19 +280,12 @@ int __init pob_qos_pfm_init(void)
 void __exit pob_qos_pfm_exit(void)
 {
 	unregister_qos_notifier(&pob_pfm_qos_notifier);
-
-#ifdef CONFIG_PM_SLEEP
-	register_pm_notifier(&pob_pm_nb);
-#endif
 }

 int pob_qos_pfm_enable(void)
 {
 	pob_qos_set_last_time_ms(0);
 	pob_enable_timer();
-#ifdef CONFIG_PM_SLEEP
-	unregister_pm_notifier(&pob_pm_nb);
-#endif

 	return 0;
 }
@@ -368,9 +293,6 @@ int pob_qos_pfm_enable(void)
 int pob_qos_pfm_disable(void)
 {
 	pob_disable_timer();
-#ifdef CONFIG_PM_SLEEP
-	pob_timer_needs_restart = false;
-#endif
 	pob_qos_set_last_time_ms(1);

 	return 0;
--- a/drivers/misc/mediatek/performance/observer/platform/mt6781/qos_pfm.c
+++ b/drivers/misc/mediatek/performance/observer/platform/mt6781/qos_pfm.c
@@ -5,8 +5,6 @@
 #define pr_fmt(fmt) "pob_qos: " fmt
 #include <linux/notifier.h>
 #include <mt-plat/mtk_perfobserver.h>
-#include <linux/pm.h>
-#include <linux/suspend.h>

 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
@@ -31,7 +29,7 @@
 #endif

 #define MS_TO_NS 1000000
-#define ADJUST_INTERVAL_MS 64
+#define ADJUST_INTERVAL_MS 32

 enum POBQOS_NTF_PUSH_TYPE {
 	POBQOS_NTF_TIMER = 0x00,
@@ -46,24 +44,6 @@ struct POBQOS_NTF_PUSH_TAG {
 };

 #ifdef CONFIG_MTK_QOS_FRAMEWORK
-static DEFINE_MUTEX(pob_timer_lock);
-static bool pob_timer_active;
-
-#ifdef CONFIG_PM_SLEEP
-static bool pob_timer_needs_restart;
-
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data);
-static struct notifier_block pob_pm_nb = {
-	.notifier_call = pob_pm_notifier_cb,
-};
-#endif
-
-static inline ktime_t pob_timer_interval(void)
-{
-	return ms_to_ktime(ADJUST_INTERVAL_MS);
-}
-
 static void pob_enable_timer(void);
 static void pob_disable_timer(void);

@@ -110,63 +90,17 @@ static struct hrtimer _pobqos_hrt;

 static void pob_enable_timer(void)
 {
-	ktime_t interval = pob_timer_interval();
+	ktime_t ktime;

-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		mutex_unlock(&pob_timer_lock);
-		return;
-	}
-	pob_timer_active = true;
-	mutex_unlock(&pob_timer_lock);
-
-	hrtimer_start(&_pobqos_hrt, interval, HRTIMER_MODE_REL);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_start(&_pobqos_hrt, ktime, HRTIMER_MODE_REL);
 }

 static void pob_disable_timer(void)
 {
-	bool should_cancel = false;
-
-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		pob_timer_active = false;
-		should_cancel = true;
-	}
-	mutex_unlock(&pob_timer_lock);
-
-	if (should_cancel)
-		hrtimer_cancel(&_pobqos_hrt);
+	hrtimer_cancel(&_pobqos_hrt);
 }

-#ifdef CONFIG_PM_SLEEP
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data)
-{
-	switch (action) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-		mutex_lock(&pob_timer_lock);
-		pob_timer_needs_restart = pob_timer_active;
-		mutex_unlock(&pob_timer_lock);
-
-		if (pob_timer_needs_restart)
-			pob_disable_timer();
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-		if (pob_timer_needs_restart) {
-			pob_enable_timer();
-			pob_timer_needs_restart = false;
-		}
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-#endif
-
 static void pobqos_hrt_wq_cb(struct work_struct *psWork)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush =
@@ -258,13 +192,11 @@ final:
 static enum hrtimer_restart pobqos_hrt_cb(struct hrtimer *timer)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush = NULL;
-	ktime_t interval;

-	if (!READ_ONCE(pob_timer_active))
-		return HRTIMER_NORESTART;
+	ktime_t ktime;

-	interval = pob_timer_interval();
-	hrtimer_forward_now(timer, interval);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_add_expires(timer, ktime);

 	if (_gpPOBQoSNtfWQ)
 		vpPush =
@@ -343,18 +275,11 @@ int __init pob_qos_pfm_init(void)

 	register_qos_notifier(&pob_pfm_qos_notifier);

-#ifdef CONFIG_PM_SLEEP
-	register_pm_notifier(&pob_pm_nb);
-#endif
-
 	return 0;
 }

 void __exit pob_qos_pfm_exit(void)
 {
-#ifdef CONFIG_PM_SLEEP
-	unregister_pm_notifier(&pob_pm_nb);
-#endif
 	unregister_qos_notifier(&pob_pfm_qos_notifier);
 }

@@ -369,9 +294,6 @@ int pob_qos_pfm_enable(void)
 int pob_qos_pfm_disable(void)
 {
 	pob_disable_timer();
-#ifdef CONFIG_PM_SLEEP
-	pob_timer_needs_restart = false;
-#endif
 	pob_qos_set_last_time_ms(1);

 	return 0;
--- a/drivers/misc/mediatek/performance/observer/platform/mt6785/qos_pfm.c
+++ b/drivers/misc/mediatek/performance/observer/platform/mt6785/qos_pfm.c
@@ -5,8 +5,6 @@
 #define pr_fmt(fmt) "pob_qos: " fmt
 #include <linux/notifier.h>
 #include <mt-plat/mtk_perfobserver.h>
-#include <linux/pm.h>
-#include <linux/suspend.h>

 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
@@ -31,7 +29,7 @@
 #endif

 #define MS_TO_NS 1000000
-#define ADJUST_INTERVAL_MS 64
+#define ADJUST_INTERVAL_MS 32

 enum POBQOS_NTF_PUSH_TYPE {
 	POBQOS_NTF_TIMER = 0x00,
@@ -46,24 +44,6 @@ struct POBQOS_NTF_PUSH_TAG {
 };

 #ifdef CONFIG_MTK_QOS_FRAMEWORK
-static DEFINE_MUTEX(pob_timer_lock);
-static bool pob_timer_active;
-
-#ifdef CONFIG_PM_SLEEP
-static bool pob_timer_needs_restart;
-
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data);
-static struct notifier_block pob_pm_nb = {
-	.notifier_call = pob_pm_notifier_cb,
-};
-#endif
-
-static inline ktime_t pob_timer_interval(void)
-{
-	return ms_to_ktime(ADJUST_INTERVAL_MS);
-}
-
 static void pob_enable_timer(void);
 static void pob_disable_timer(void);

@@ -110,63 +90,17 @@ static struct hrtimer _pobqos_hrt;

 static void pob_enable_timer(void)
 {
-	ktime_t interval = pob_timer_interval();
+	ktime_t ktime;

-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		mutex_unlock(&pob_timer_lock);
-		return;
-	}
-	pob_timer_active = true;
-	mutex_unlock(&pob_timer_lock);
-
-	hrtimer_start(&_pobqos_hrt, interval, HRTIMER_MODE_REL);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_start(&_pobqos_hrt, ktime, HRTIMER_MODE_REL);
 }

 static void pob_disable_timer(void)
 {
-	bool should_cancel = false;
-
-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		pob_timer_active = false;
-		should_cancel = true;
-	}
-	mutex_unlock(&pob_timer_lock);
-
-	if (should_cancel)
-		hrtimer_cancel(&_pobqos_hrt);
+	hrtimer_cancel(&_pobqos_hrt);
 }

-#ifdef CONFIG_PM_SLEEP
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data)
-{
-	switch (action) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-		mutex_lock(&pob_timer_lock);
-		pob_timer_needs_restart = pob_timer_active;
-		mutex_unlock(&pob_timer_lock);
-
-		if (pob_timer_needs_restart)
-			pob_disable_timer();
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-		if (pob_timer_needs_restart) {
-			pob_enable_timer();
-			pob_timer_needs_restart = false;
-		}
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-#endif
-
 static void pobqos_hrt_wq_cb(struct work_struct *psWork)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush =
@@ -258,13 +192,11 @@ final:
 static enum hrtimer_restart pobqos_hrt_cb(struct hrtimer *timer)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush = NULL;
-	ktime_t interval;

-	if (!READ_ONCE(pob_timer_active))
-		return HRTIMER_NORESTART;
+	ktime_t ktime;

-	interval = pob_timer_interval();
-	hrtimer_forward_now(timer, interval);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_add_expires(timer, ktime);

 	if (_gpPOBQoSNtfWQ)
 		vpPush =
@@ -343,18 +275,11 @@ int __init pob_qos_pfm_init(void)

 	register_qos_notifier(&pob_pfm_qos_notifier);

-#ifdef CONFIG_PM_SLEEP
-	register_pm_notifier(&pob_pm_nb);
-#endif
-
 	return 0;
 }

 void __exit pob_qos_pfm_exit(void)
 {
-#ifdef CONFIG_PM_SLEEP
-	unregister_pm_notifier(&pob_pm_nb);
-#endif
 	unregister_qos_notifier(&pob_pfm_qos_notifier);
 }

@@ -369,9 +294,6 @@ int pob_qos_pfm_enable(void)
 int pob_qos_pfm_disable(void)
 {
 	pob_disable_timer();
-#ifdef CONFIG_PM_SLEEP
-	pob_timer_needs_restart = false;
-#endif
 	pob_qos_set_last_time_ms(1);

 	return 0;
--- a/drivers/misc/mediatek/performance/observer/platform/mt6833/qos_pfm.c
+++ b/drivers/misc/mediatek/performance/observer/platform/mt6833/qos_pfm.c
@@ -5,8 +5,6 @@
 #define pr_fmt(fmt) "pob_qos: " fmt
 #include <linux/notifier.h>
 #include <mt-plat/mtk_perfobserver.h>
-#include <linux/pm.h>
-#include <linux/suspend.h>

 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
@@ -31,7 +29,7 @@
 #endif

 #define MS_TO_NS 1000000
-#define ADJUST_INTERVAL_MS 64
+#define ADJUST_INTERVAL_MS 32

 enum POBQOS_NTF_PUSH_TYPE {
 	POBQOS_NTF_TIMER = 0x00,
@@ -46,24 +44,6 @@ struct POBQOS_NTF_PUSH_TAG {
 };

 #ifdef CONFIG_MTK_QOS_FRAMEWORK
-static DEFINE_MUTEX(pob_timer_lock);
-static bool pob_timer_active;
-
-#ifdef CONFIG_PM_SLEEP
-static bool pob_timer_needs_restart;
-
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data);
-static struct notifier_block pob_pm_nb = {
-	.notifier_call = pob_pm_notifier_cb,
-};
-#endif
-
-static inline ktime_t pob_timer_interval(void)
-{
-	return ms_to_ktime(ADJUST_INTERVAL_MS);
-}
-
 static void pob_enable_timer(void);
 static void pob_disable_timer(void);

@@ -110,63 +90,17 @@ static struct hrtimer _pobqos_hrt;

 static void pob_enable_timer(void)
 {
-	ktime_t interval = pob_timer_interval();
+	ktime_t ktime;

-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		mutex_unlock(&pob_timer_lock);
-		return;
-	}
-	pob_timer_active = true;
-	mutex_unlock(&pob_timer_lock);
-
-	hrtimer_start(&_pobqos_hrt, interval, HRTIMER_MODE_REL);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_start(&_pobqos_hrt, ktime, HRTIMER_MODE_REL);
 }

 static void pob_disable_timer(void)
 {
-	bool should_cancel = false;
-
-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		pob_timer_active = false;
-		should_cancel = true;
-	}
-	mutex_unlock(&pob_timer_lock);
-
-	if (should_cancel)
-		hrtimer_cancel(&_pobqos_hrt);
+	hrtimer_cancel(&_pobqos_hrt);
 }

-#ifdef CONFIG_PM_SLEEP
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data)
-{
-	switch (action) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-		mutex_lock(&pob_timer_lock);
-		pob_timer_needs_restart = pob_timer_active;
-		mutex_unlock(&pob_timer_lock);
-
-		if (pob_timer_needs_restart)
-			pob_disable_timer();
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-		if (pob_timer_needs_restart) {
-			pob_enable_timer();
-			pob_timer_needs_restart = false;
-		}
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-#endif
-
 static void pobqos_hrt_wq_cb(struct work_struct *psWork)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush =
@@ -258,13 +192,11 @@ final:
 static enum hrtimer_restart pobqos_hrt_cb(struct hrtimer *timer)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush = NULL;
-	ktime_t interval;

-	if (!READ_ONCE(pob_timer_active))
-		return HRTIMER_NORESTART;
+	ktime_t ktime;

-	interval = pob_timer_interval();
-	hrtimer_forward_now(timer, interval);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_add_expires(timer, ktime);

 	if (_gpPOBQoSNtfWQ)
 		vpPush =
@@ -343,18 +275,11 @@ int __init pob_qos_pfm_init(void)

 	register_qos_notifier(&pob_pfm_qos_notifier);

-#ifdef CONFIG_PM_SLEEP
-	register_pm_notifier(&pob_pm_nb);
-#endif
-
 	return 0;
 }

 void __exit pob_qos_pfm_exit(void)
 {
-#ifdef CONFIG_PM_SLEEP
-	unregister_pm_notifier(&pob_pm_nb);
-#endif
 	unregister_qos_notifier(&pob_pfm_qos_notifier);
 }

@@ -369,9 +294,6 @@ int pob_qos_pfm_enable(void)
 int pob_qos_pfm_disable(void)
 {
 	pob_disable_timer();
-#ifdef CONFIG_PM_SLEEP
-	pob_timer_needs_restart = false;
-#endif
 	pob_qos_set_last_time_ms(1);

 	return 0;
--- a/drivers/misc/mediatek/performance/observer/platform/mt6853/qos_pfm.c
+++ b/drivers/misc/mediatek/performance/observer/platform/mt6853/qos_pfm.c
@@ -5,8 +5,6 @@
 #define pr_fmt(fmt) "pob_qos: " fmt
 #include <linux/notifier.h>
 #include <mt-plat/mtk_perfobserver.h>
-#include <linux/pm.h>
-#include <linux/suspend.h>

 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
@@ -31,7 +29,7 @@
 #endif

 #define MS_TO_NS 1000000
-#define ADJUST_INTERVAL_MS 64
+#define ADJUST_INTERVAL_MS 32

 enum POBQOS_NTF_PUSH_TYPE {
 	POBQOS_NTF_TIMER = 0x00,
@@ -46,24 +44,6 @@ struct POBQOS_NTF_PUSH_TAG {
 };

 #ifdef CONFIG_MTK_QOS_FRAMEWORK
-static DEFINE_MUTEX(pob_timer_lock);
-static bool pob_timer_active;
-
-#ifdef CONFIG_PM_SLEEP
-static bool pob_timer_needs_restart;
-
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data);
-static struct notifier_block pob_pm_nb = {
-	.notifier_call = pob_pm_notifier_cb,
-};
-#endif
-
-static inline ktime_t pob_timer_interval(void)
-{
-	return ms_to_ktime(ADJUST_INTERVAL_MS);
-}
-
 static void pob_enable_timer(void);
 static void pob_disable_timer(void);

@@ -110,63 +90,17 @@ static struct hrtimer _pobqos_hrt;

 static void pob_enable_timer(void)
 {
-	ktime_t interval = pob_timer_interval();
+	ktime_t ktime;

-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		mutex_unlock(&pob_timer_lock);
-		return;
-	}
-	pob_timer_active = true;
-	mutex_unlock(&pob_timer_lock);
-
-	hrtimer_start(&_pobqos_hrt, interval, HRTIMER_MODE_REL);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_start(&_pobqos_hrt, ktime, HRTIMER_MODE_REL);
 }

 static void pob_disable_timer(void)
 {
-	bool should_cancel = false;
-
-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		pob_timer_active = false;
-		should_cancel = true;
-	}
-	mutex_unlock(&pob_timer_lock);
-
-	if (should_cancel)
-		hrtimer_cancel(&_pobqos_hrt);
+	hrtimer_cancel(&_pobqos_hrt);
 }

-#ifdef CONFIG_PM_SLEEP
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data)
-{
-	switch (action) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-		mutex_lock(&pob_timer_lock);
-		pob_timer_needs_restart = pob_timer_active;
-		mutex_unlock(&pob_timer_lock);
-
-		if (pob_timer_needs_restart)
-			pob_disable_timer();
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-		if (pob_timer_needs_restart) {
-			pob_enable_timer();
-			pob_timer_needs_restart = false;
-		}
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-#endif
-
 static void pobqos_hrt_wq_cb(struct work_struct *psWork)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush =
@@ -258,13 +192,11 @@ final:
 static enum hrtimer_restart pobqos_hrt_cb(struct hrtimer *timer)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush = NULL;
-	ktime_t interval;

-	if (!READ_ONCE(pob_timer_active))
-		return HRTIMER_NORESTART;
+	ktime_t ktime;

-	interval = pob_timer_interval();
-	hrtimer_forward_now(timer, interval);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_add_expires(timer, ktime);

 	if (_gpPOBQoSNtfWQ)
 		vpPush =
@@ -343,18 +275,11 @@ int __init pob_qos_pfm_init(void)

 	register_qos_notifier(&pob_pfm_qos_notifier);

-#ifdef CONFIG_PM_SLEEP
-	register_pm_notifier(&pob_pm_nb);
-#endif
-
 	return 0;
 }

 void __exit pob_qos_pfm_exit(void)
 {
-#ifdef CONFIG_PM_SLEEP
-	unregister_pm_notifier(&pob_pm_nb);
-#endif
 	unregister_qos_notifier(&pob_pfm_qos_notifier);
 }

@@ -369,9 +294,6 @@ int pob_qos_pfm_enable(void)
 int pob_qos_pfm_disable(void)
 {
 	pob_disable_timer();
-#ifdef CONFIG_PM_SLEEP
-	pob_timer_needs_restart = false;
-#endif
 	pob_qos_set_last_time_ms(1);

 	return 0;
--- a/drivers/misc/mediatek/performance/observer/platform/mt6873/qos_pfm.c
+++ b/drivers/misc/mediatek/performance/observer/platform/mt6873/qos_pfm.c
@@ -5,8 +5,6 @@
 #define pr_fmt(fmt) "pob_qos: " fmt
 #include <linux/notifier.h>
 #include <mt-plat/mtk_perfobserver.h>
-#include <linux/pm.h>
-#include <linux/suspend.h>

 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
@@ -31,7 +29,7 @@
 #endif

 #define MS_TO_NS 1000000
-#define ADJUST_INTERVAL_MS 64
+#define ADJUST_INTERVAL_MS 32

 enum POBQOS_NTF_PUSH_TYPE {
 	POBQOS_NTF_TIMER = 0x00,
@@ -46,24 +44,6 @@ struct POBQOS_NTF_PUSH_TAG {
 };

 #ifdef CONFIG_MTK_QOS_FRAMEWORK
-static DEFINE_MUTEX(pob_timer_lock);
-static bool pob_timer_active;
-
-#ifdef CONFIG_PM_SLEEP
-static bool pob_timer_needs_restart;
-
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data);
-static struct notifier_block pob_pm_nb = {
-	.notifier_call = pob_pm_notifier_cb,
-};
-#endif
-
-static inline ktime_t pob_timer_interval(void)
-{
-	return ms_to_ktime(ADJUST_INTERVAL_MS);
-}
-
 static void pob_enable_timer(void);
 static void pob_disable_timer(void);

@@ -110,63 +90,17 @@ static struct hrtimer _pobqos_hrt;

 static void pob_enable_timer(void)
 {
-	ktime_t interval = pob_timer_interval();
+	ktime_t ktime;

-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		mutex_unlock(&pob_timer_lock);
-		return;
-	}
-	pob_timer_active = true;
-	mutex_unlock(&pob_timer_lock);
-
-	hrtimer_start(&_pobqos_hrt, interval, HRTIMER_MODE_REL);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_start(&_pobqos_hrt, ktime, HRTIMER_MODE_REL);
 }

 static void pob_disable_timer(void)
 {
-	bool should_cancel = false;
-
-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		pob_timer_active = false;
-		should_cancel = true;
-	}
-	mutex_unlock(&pob_timer_lock);
-
-	if (should_cancel)
-		hrtimer_cancel(&_pobqos_hrt);
+	hrtimer_cancel(&_pobqos_hrt);
 }

-#ifdef CONFIG_PM_SLEEP
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data)
-{
-	switch (action) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-		mutex_lock(&pob_timer_lock);
-		pob_timer_needs_restart = pob_timer_active;
-		mutex_unlock(&pob_timer_lock);
-
-		if (pob_timer_needs_restart)
-			pob_disable_timer();
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-		if (pob_timer_needs_restart) {
-			pob_enable_timer();
-			pob_timer_needs_restart = false;
-		}
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-#endif
-
 static void pobqos_hrt_wq_cb(struct work_struct *psWork)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush =
@@ -258,13 +192,11 @@ final:
 static enum hrtimer_restart pobqos_hrt_cb(struct hrtimer *timer)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush = NULL;
-	ktime_t interval;

-	if (!READ_ONCE(pob_timer_active))
-		return HRTIMER_NORESTART;
+	ktime_t ktime;

-	interval = pob_timer_interval();
-	hrtimer_forward_now(timer, interval);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_add_expires(timer, ktime);

 	if (_gpPOBQoSNtfWQ)
 		vpPush =
@@ -343,18 +275,11 @@ int __init pob_qos_pfm_init(void)

 	register_qos_notifier(&pob_pfm_qos_notifier);

-#ifdef CONFIG_PM_SLEEP
-	register_pm_notifier(&pob_pm_nb);
-#endif
-
 	return 0;
 }

 void __exit pob_qos_pfm_exit(void)
 {
-#ifdef CONFIG_PM_SLEEP
-	unregister_pm_notifier(&pob_pm_nb);
-#endif
 	unregister_qos_notifier(&pob_pfm_qos_notifier);
 }

@@ -369,9 +294,6 @@ int pob_qos_pfm_enable(void)
 int pob_qos_pfm_disable(void)
 {
 	pob_disable_timer();
-#ifdef CONFIG_PM_SLEEP
-	pob_timer_needs_restart = false;
-#endif
 	pob_qos_set_last_time_ms(1);

 	return 0;
--- a/drivers/misc/mediatek/performance/observer/platform/mt6885/qos_pfm.c
+++ b/drivers/misc/mediatek/performance/observer/platform/mt6885/qos_pfm.c
@@ -5,8 +5,6 @@
 #define pr_fmt(fmt) "pob_qos: " fmt
 #include <linux/notifier.h>
 #include <mt-plat/mtk_perfobserver.h>
-#include <linux/pm.h>
-#include <linux/suspend.h>

 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
@@ -31,7 +29,7 @@
 #endif

 #define MS_TO_NS 1000000
-#define ADJUST_INTERVAL_MS 64
+#define ADJUST_INTERVAL_MS 32

 enum POBQOS_NTF_PUSH_TYPE {
 	POBQOS_NTF_TIMER = 0x00,
@@ -46,24 +44,6 @@ struct POBQOS_NTF_PUSH_TAG {
 };

 #ifdef CONFIG_MTK_QOS_FRAMEWORK
-static DEFINE_MUTEX(pob_timer_lock);
-static bool pob_timer_active;
-
-#ifdef CONFIG_PM_SLEEP
-static bool pob_timer_needs_restart;
-
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data);
-static struct notifier_block pob_pm_nb = {
-	.notifier_call = pob_pm_notifier_cb,
-};
-#endif
-
-static inline ktime_t pob_timer_interval(void)
-{
-	return ms_to_ktime(ADJUST_INTERVAL_MS);
-}
-
 static void pob_enable_timer(void);
 static void pob_disable_timer(void);

@@ -110,63 +90,17 @@ static struct hrtimer _pobqos_hrt;

 static void pob_enable_timer(void)
 {
-	ktime_t interval = pob_timer_interval();
+	ktime_t ktime;

-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		mutex_unlock(&pob_timer_lock);
-		return;
-	}
-	pob_timer_active = true;
-	mutex_unlock(&pob_timer_lock);
-
-	hrtimer_start(&_pobqos_hrt, interval, HRTIMER_MODE_REL);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_start(&_pobqos_hrt, ktime, HRTIMER_MODE_REL);
 }

 static void pob_disable_timer(void)
 {
-	bool should_cancel = false;
-
-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		pob_timer_active = false;
-		should_cancel = true;
-	}
-	mutex_unlock(&pob_timer_lock);
-
-	if (should_cancel)
-		hrtimer_cancel(&_pobqos_hrt);
+	hrtimer_cancel(&_pobqos_hrt);
 }

-#ifdef CONFIG_PM_SLEEP
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data)
-{
-	switch (action) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-		mutex_lock(&pob_timer_lock);
-		pob_timer_needs_restart = pob_timer_active;
-		mutex_unlock(&pob_timer_lock);
-
-		if (pob_timer_needs_restart)
-			pob_disable_timer();
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-		if (pob_timer_needs_restart) {
-			pob_enable_timer();
-			pob_timer_needs_restart = false;
-		}
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-#endif
-
 static void pobqos_hrt_wq_cb(struct work_struct *psWork)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush =
@@ -258,13 +192,11 @@ final:
 static enum hrtimer_restart pobqos_hrt_cb(struct hrtimer *timer)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush = NULL;
-	ktime_t interval;

-	if (!READ_ONCE(pob_timer_active))
-		return HRTIMER_NORESTART;
+	ktime_t ktime;

-	interval = pob_timer_interval();
-	hrtimer_forward_now(timer, interval);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_add_expires(timer, ktime);

 	if (_gpPOBQoSNtfWQ)
 		vpPush =
@@ -343,18 +275,11 @@ int __init pob_qos_pfm_init(void)

 	register_qos_notifier(&pob_pfm_qos_notifier);

-#ifdef CONFIG_PM_SLEEP
-	register_pm_notifier(&pob_pm_nb);
-#endif
-
 	return 0;
 }

 void __exit pob_qos_pfm_exit(void)
 {
-#ifdef CONFIG_PM_SLEEP
-	unregister_pm_notifier(&pob_pm_nb);
-#endif
 	unregister_qos_notifier(&pob_pfm_qos_notifier);
 }

@@ -369,9 +294,6 @@ int pob_qos_pfm_enable(void)
 int pob_qos_pfm_disable(void)
 {
 	pob_disable_timer();
-#ifdef CONFIG_PM_SLEEP
-	pob_timer_needs_restart = false;
-#endif
 	pob_qos_set_last_time_ms(1);

 	return 0;
--- a/drivers/misc/mediatek/performance/observer/platform/mt6893/qos_pfm.c
+++ b/drivers/misc/mediatek/performance/observer/platform/mt6893/qos_pfm.c
@@ -5,8 +5,6 @@
 #define pr_fmt(fmt) "pob_qos: " fmt
 #include <linux/notifier.h>
 #include <mt-plat/mtk_perfobserver.h>
-#include <linux/pm.h>
-#include <linux/suspend.h>

 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
@@ -31,7 +29,7 @@
 #endif

 #define MS_TO_NS 1000000
-#define ADJUST_INTERVAL_MS 64
+#define ADJUST_INTERVAL_MS 32

 enum POBQOS_NTF_PUSH_TYPE {
 	POBQOS_NTF_TIMER = 0x00,
@@ -46,24 +44,6 @@ struct POBQOS_NTF_PUSH_TAG {
 };

 #ifdef CONFIG_MTK_QOS_FRAMEWORK
-static DEFINE_MUTEX(pob_timer_lock);
-static bool pob_timer_active;
-
-#ifdef CONFIG_PM_SLEEP
-static bool pob_timer_needs_restart;
-
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data);
-static struct notifier_block pob_pm_nb = {
-	.notifier_call = pob_pm_notifier_cb,
-};
-#endif
-
-static inline ktime_t pob_timer_interval(void)
-{
-	return ms_to_ktime(ADJUST_INTERVAL_MS);
-}
-
 static void pob_enable_timer(void);
 static void pob_disable_timer(void);

@@ -110,63 +90,17 @@ static struct hrtimer _pobqos_hrt;

 static void pob_enable_timer(void)
 {
-	ktime_t interval = pob_timer_interval();
+	ktime_t ktime;

-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		mutex_unlock(&pob_timer_lock);
-		return;
-	}
-	pob_timer_active = true;
-	mutex_unlock(&pob_timer_lock);
-
-	hrtimer_start(&_pobqos_hrt, interval, HRTIMER_MODE_REL);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_start(&_pobqos_hrt, ktime, HRTIMER_MODE_REL);
 }

 static void pob_disable_timer(void)
 {
-	bool should_cancel = false;
-
-	mutex_lock(&pob_timer_lock);
-	if (pob_timer_active) {
-		pob_timer_active = false;
-		should_cancel = true;
-	}
-	mutex_unlock(&pob_timer_lock);
-
-	if (should_cancel)
-		hrtimer_cancel(&_pobqos_hrt);
+	hrtimer_cancel(&_pobqos_hrt);
 }

-#ifdef CONFIG_PM_SLEEP
-static int pob_pm_notifier_cb(struct notifier_block *nb,
-			      unsigned long action, void *data)
-{
-	switch (action) {
-	case PM_SUSPEND_PREPARE:
-	case PM_HIBERNATION_PREPARE:
-		mutex_lock(&pob_timer_lock);
-		pob_timer_needs_restart = pob_timer_active;
-		mutex_unlock(&pob_timer_lock);
-
-		if (pob_timer_needs_restart)
-			pob_disable_timer();
-		break;
-	case PM_POST_SUSPEND:
-	case PM_POST_HIBERNATION:
-		if (pob_timer_needs_restart) {
-			pob_enable_timer();
-			pob_timer_needs_restart = false;
-		}
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-#endif
-
 static void pobqos_hrt_wq_cb(struct work_struct *psWork)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush =
@@ -258,13 +192,11 @@ final:
 static enum hrtimer_restart pobqos_hrt_cb(struct hrtimer *timer)
 {
 	struct POBQOS_NTF_PUSH_TAG *vpPush = NULL;
-	ktime_t interval;

-	if (!READ_ONCE(pob_timer_active))
-		return HRTIMER_NORESTART;
+	ktime_t ktime;

-	interval = pob_timer_interval();
-	hrtimer_forward_now(timer, interval);
+	ktime = ktime_set(0, ADJUST_INTERVAL_MS * MS_TO_NS);
+	hrtimer_add_expires(timer, ktime);

 	if (_gpPOBQoSNtfWQ)
 		vpPush =
@@ -343,18 +275,11 @@ int __init pob_qos_pfm_init(void)

 	register_qos_notifier(&pob_pfm_qos_notifier);

-#ifdef CONFIG_PM_SLEEP
-	register_pm_notifier(&pob_pm_nb);
-#endif
-
 	return 0;
 }

 void __exit pob_qos_pfm_exit(void)
 {
-#ifdef CONFIG_PM_SLEEP
-	unregister_pm_notifier(&pob_pm_nb);
-#endif
 	unregister_qos_notifier(&pob_pfm_qos_notifier);
 }

@@ -369,9 +294,6 @@ int pob_qos_pfm_enable(void)
 int pob_qos_pfm_disable(void)
 {
 	pob_disable_timer();
-#ifdef CONFIG_PM_SLEEP
-	pob_timer_needs_restart = false;
-#endif
 	pob_qos_set_last_time_ms(1);

 	return 0;
--- a/drivers/misc/mediatek/typec/tcpc/tcpc_mt6360.c
+++ b/drivers/misc/mediatek/typec/tcpc/tcpc_mt6360.c
@@ -2121,9 +2121,9 @@ static int mt6360_set_bist_carrier_mode(struct tcpc_device *tcpc, u8 pattern)
 	return 0;
 }

-/* transmit count (1byte) + message header (2byte) + data object (7*4) */
+/* message header (2byte) + data object (7*4) */
 #define MT6360_TRANSMIT_MAX_SIZE \
-	(1 + sizeof(u16) + sizeof(u32) * 7)
+	(sizeof(u16) + sizeof(u32) * 7)

 #ifdef CONFIG_USB_PD_RETRY_CRC_DISCARD
 static int mt6360_retransmit(struct tcpc_device *tcpc)
--- a/drivers/misc/mediatek/typec/tcpc/tcpc_mt6362.c
+++ b/drivers/misc/mediatek/typec/tcpc/tcpc_mt6362.c
@@ -1576,8 +1576,8 @@ static int mt6362_get_message(struct tcpc_device *tcpc, u32 *payload,
 	return ret;
 }

-/* transmit count (1byte) + message header (2byte) + data object (7*4) */
-#define MT6362_TRANSMIT_MAX_SIZE	(1 + sizeof(u16) + sizeof(u32) * 7)
+/* message header (2byte) + data object (7*4) */
+#define MT6362_TRANSMIT_MAX_SIZE	(sizeof(u16) + sizeof(u32) * 7)

 static int mt6362_transmit(struct tcpc_device *tcpc,
 			   enum tcpm_transmit_type type, u16 header,
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -26,7 +26,6 @@
 * datablocks and metadata blocks.
 */

-#include <linux/blkdev.h>
 #include <linux/fs.h>
 #include <linux/vfs.h>
 #include <linux/slab.h>
@@ -41,104 +40,45 @@
 #include "page_actor.h"

 /*
- * Returns the amount of bytes copied to the page actor.
+ * Read the metadata block length, this is stored in the first two
+ * bytes of the metadata block.
 */
-static int copy_bio_to_actor(struct bio *bio,
-			     struct squashfs_page_actor *actor,
-			     int offset, int req_length)
-{
-	void *actor_addr = squashfs_first_page(actor);
-	struct bvec_iter_all iter_all = {};
-	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
-	int copied_bytes = 0;
-	int actor_offset = 0;
-
-	if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all)))
-		return 0;
-
-	while (copied_bytes < req_length) {
-		int bytes_to_copy = min_t(int, bvec->bv_len - offset,
-					  PAGE_SIZE - actor_offset);
-
-		bytes_to_copy = min_t(int, bytes_to_copy,
-				      req_length - copied_bytes);
-		memcpy(actor_addr + actor_offset,
-		       page_address(bvec->bv_page) + bvec->bv_offset + offset,
-		       bytes_to_copy);
-
-		actor_offset += bytes_to_copy;
-		copied_bytes += bytes_to_copy;
-		offset += bytes_to_copy;
-
-		if (actor_offset >= PAGE_SIZE) {
-			actor_addr = squashfs_next_page(actor);
-			if (!actor_addr)
-				break;
-			actor_offset = 0;
-		}
-		if (offset >= bvec->bv_len) {
-			if (!bio_next_segment(bio, &iter_all))
-				break;
-			offset = 0;
-		}
-	}
-	squashfs_finish_page(actor);
-	return copied_bytes;
-}
-
-static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
-			     struct bio **biop, int *block_offset)
+static struct buffer_head *get_block_length(struct super_block *sb,
+			u64 *cur_index, int *offset, int *length)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	const u64 read_start = round_down(index, msblk->devblksize);
-	const sector_t block = read_start >> msblk->devblksize_log2;
-	const u64 read_end = round_up(index + length, msblk->devblksize);
-	const sector_t block_end = read_end >> msblk->devblksize_log2;
-	int offset = read_start - round_down(index, PAGE_SIZE);
-	int total_len = (block_end - block) << msblk->devblksize_log2;
-	const int page_count = DIV_ROUND_UP(total_len + offset, PAGE_SIZE);
-	int error, i;
-	struct bio *bio;
+	struct buffer_head *bh;

-	bio = bio_alloc(GFP_NOIO, page_count);
-	if (!bio)
-		return -ENOMEM;
+	bh = sb_bread(sb, *cur_index);
+	if (bh == NULL)
+		return NULL;

-	bio_set_dev(bio, sb->s_bdev);
-	bio->bi_opf = READ;
-	bio->bi_iter.bi_sector = block * (msblk->devblksize >> SECTOR_SHIFT);
+	if (msblk->devblksize - *offset == 1) {
+		*length = (unsigned char) bh->b_data[*offset];
+		put_bh(bh);
+		bh = sb_bread(sb, ++(*cur_index));
+		if (bh == NULL)
+			return NULL;
+		*length |= (unsigned char) bh->b_data[0] << 8;
+		*offset = 1;
+	} else {
+		*length = (unsigned char) bh->b_data[*offset] |
+			(unsigned char) bh->b_data[*offset + 1] << 8;
+		*offset += 2;

-	for (i = 0; i < page_count; ++i) {
-		unsigned int len =
-			min_t(unsigned int, PAGE_SIZE - offset, total_len);
-		struct page *page = alloc_page(GFP_NOIO);
-
-		if (!page) {
-			error = -ENOMEM;
-			goto out_free_bio;
+		if (*offset == msblk->devblksize) {
+			put_bh(bh);
+			bh = sb_bread(sb, ++(*cur_index));
+			if (bh == NULL)
+				return NULL;
+			*offset = 0;
 		}
-		if (!bio_add_page(bio, page, len, offset)) {
-			error = -EIO;
-			goto out_free_bio;
-		}
-		offset = 0;
-		total_len -= len;
 	}

-	error = submit_bio_wait(bio);
-	if (error)
-		goto out_free_bio;
-
-	*biop = bio;
-	*block_offset = index & ((1 << msblk->devblksize_log2) - 1);
-	return 0;
-
-out_free_bio:
-	bio_free_pages(bio);
-	bio_put(bio);
-	return error;
+	return bh;
 }

+
 /*
 * Read and decompress a metadata block or datablock.  Length is non-zero
 * if a datablock is being read (the size is stored elsewhere in the
@@ -149,88 +89,129 @@ out_free_bio:
 * algorithms).
 */
 int squashfs_read_data(struct super_block *sb, u64 index, int length,
-		       u64 *next_index, struct squashfs_page_actor *output)
+		u64 *next_index, struct squashfs_page_actor *output)
 {
 	struct squashfs_sb_info *msblk = sb->s_fs_info;
-	struct bio *bio = NULL;
-	int compressed;
-	int res;
-	int offset;
+	struct buffer_head **bh;
+	int offset = index & ((1 << msblk->devblksize_log2) - 1);
+	u64 cur_index = index >> msblk->devblksize_log2;
+	int bytes, compressed, b = 0, k = 0, avail, i;
+
+	bh = kcalloc(((output->length + msblk->devblksize - 1)
+		>> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
+	if (bh == NULL)
+		return -ENOMEM;

 	if (length) {
 		/*
 		 * Datablock.
 		 */
+		bytes = -offset;
 		compressed = SQUASHFS_COMPRESSED_BLOCK(length);
 		length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length);
+		if (next_index)
+			*next_index = index + length;
+
 		TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
 			index, compressed ? "" : "un", length, output->length);
+
+		if (length < 0 || length > output->length ||
+				(index + length) > msblk->bytes_used)
+			goto read_failure;
+
+		for (b = 0; bytes < length; b++, cur_index++) {
+			bh[b] = sb_getblk(sb, cur_index);
+			if (bh[b] == NULL)
+				goto block_release;
+			bytes += msblk->devblksize;
+		}
+		ll_rw_block(REQ_OP_READ, 0, b, bh);
 	} else {
 		/*
 		 * Metadata block.
 		 */
-		const u8 *data;
-		struct bvec_iter_all iter_all = {};
-		struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
+		if ((index + 2) > msblk->bytes_used)
+			goto read_failure;

-		if (index + 2 > msblk->bytes_used) {
-			res = -EIO;
-			goto out;
-		}
-		res = squashfs_bio_read(sb, index, 2, &bio, &offset);
-		if (res)
-			goto out;
-
-		if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) {
-			res = -EIO;
-			goto out_free_bio;
-		}
-		/* Extract the length of the metadata block */
-		data = page_address(bvec->bv_page) + bvec->bv_offset;
-		length = data[offset];
-		if (offset <= bvec->bv_len - 1) {
-			length |= data[offset + 1] << 8;
-		} else {
-			if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) {
-				res = -EIO;
-				goto out_free_bio;
-			}
-			data = page_address(bvec->bv_page) + bvec->bv_offset;
-			length |= data[0] << 8;
-		}
-		bio_free_pages(bio);
-		bio_put(bio);
+		bh[0] = get_block_length(sb, &cur_index, &offset, &length);
+		if (bh[0] == NULL)
+			goto read_failure;
+		b = 1;

+		bytes = msblk->devblksize - offset;
 		compressed = SQUASHFS_COMPRESSED(length);
 		length = SQUASHFS_COMPRESSED_SIZE(length);
-		index += 2;
+		if (next_index)
+			*next_index = index + length + 2;

 		TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
-		      compressed ? "" : "un", length);
-	}
-	if (next_index)
-		*next_index = index + length;
+				compressed ? "" : "un", length);

-	res = squashfs_bio_read(sb, index, length, &bio, &offset);
-	if (res)
-		goto out;
+		if (length < 0 || length > output->length ||
+					(index + length) > msblk->bytes_used)
+			goto block_release;
+
+		for (; bytes < length; b++) {
+			bh[b] = sb_getblk(sb, ++cur_index);
+			if (bh[b] == NULL)
+				goto block_release;
+			bytes += msblk->devblksize;
+		}
+		ll_rw_block(REQ_OP_READ, 0, b - 1, bh + 1);
+	}
+
+	for (i = 0; i < b; i++) {
+		wait_on_buffer(bh[i]);
+		if (!buffer_uptodate(bh[i]))
+			goto block_release;
+	}

 	if (compressed) {
-		if (!msblk->stream) {
-			res = -EIO;
-			goto out_free_bio;
-		}
-		res = squashfs_decompress(msblk, bio, offset, length, output);
+		if (!msblk->stream)
+			goto read_failure;
+		length = squashfs_decompress(msblk, bh, b, offset, length,
+			output);
+		if (length < 0)
+			goto read_failure;
 	} else {
-		res = copy_bio_to_actor(bio, output, offset, length);
+		/*
+		 * Block is uncompressed.
+		 */
+		int in, pg_offset = 0;
+		void *data = squashfs_first_page(output);
+
+		for (bytes = length; k < b; k++) {
+			in = min(bytes, msblk->devblksize - offset);
+			bytes -= in;
+			while (in) {
+				if (pg_offset == PAGE_SIZE) {
+					data = squashfs_next_page(output);
+					pg_offset = 0;
+				}
+				avail = min_t(int, in, PAGE_SIZE -
+						pg_offset);
+				memcpy(data + pg_offset, bh[k]->b_data + offset,
+						avail);
+				in -= avail;
+				pg_offset += avail;
+				offset += avail;
+			}
+			offset = 0;
+			put_bh(bh[k]);
+		}
+		squashfs_finish_page(output);
 	}

-out_free_bio:
-	bio_free_pages(bio);
-	bio_put(bio);
-out:
-	if (res < 0)
-		ERROR("Failed to read block 0x%llx: %d\n", index, res);
+	kfree(bh);
+	return length;

-	return res;
+block_release:
+	for (; k < b; k++)
+		put_bh(bh[k]);
+
+read_failure:
+	ERROR("squashfs_read_data failed to read block 0x%llx\n",
+					(unsigned long long) index);
+	kfree(bh);
+	return -EIO;
 }
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -23,14 +23,13 @@
 * decompressor.h
 */

-#include <linux/bio.h>
-
 struct squashfs_decompressor {
 	void	*(*init)(struct squashfs_sb_info *, void *);
 	void	*(*comp_opts)(struct squashfs_sb_info *, void *, int);
 	void	(*free)(void *);
 	int	(*decompress)(struct squashfs_sb_info *, void *,
-		struct bio *, int, int, struct squashfs_page_actor *);
+		struct buffer_head **, int, int, int,
+		struct squashfs_page_actor *);
 	int	id;
 	char	*name;
 	int	supported;
--- a/fs/squashfs/decompressor_multi.c
+++ b/fs/squashfs/decompressor_multi.c
@@ -8,7 +8,7 @@
 #include <linux/types.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/bio.h>
+#include <linux/buffer_head.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/cpumask.h>
@@ -182,15 +182,14 @@ wait:
 }


-int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
-			int offset, int length,
-			struct squashfs_page_actor *output)
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+	int b, int offset, int length, struct squashfs_page_actor *output)
 {
 	int res;
 	struct squashfs_stream *stream = msblk->stream;
 	struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream);
 	res = msblk->decompressor->decompress(msblk, decomp_stream->stream,
-		bio, offset, length, output);
+		bh, b, offset, length, output);
 	put_decomp_stream(decomp_stream, stream);
 	if (res < 0)
 		ERROR("%s decompression failed, data probably corrupt\n",
--- a/fs/squashfs/decompressor_multi_percpu.c
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@ -74,17 +74,14 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
 	}
 }

-int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
-	int offset, int length, struct squashfs_page_actor *output)
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+	int b, int offset, int length, struct squashfs_page_actor *output)
 {
-	struct squashfs_stream __percpu *percpu;
-	struct squashfs_stream *stream;
-	int res;
-
-	percpu = (struct squashfs_stream __percpu *)msblk->stream;
-	stream = get_cpu_ptr(percpu);
-	res = msblk->decompressor->decompress(msblk, stream->stream, bio,
-					      offset, length, output);
+	struct squashfs_stream __percpu *percpu =
+			(struct squashfs_stream __percpu *) msblk->stream;
+	struct squashfs_stream *stream = get_cpu_ptr(percpu);
+	int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+		offset, length, output);
 	put_cpu_ptr(stream);

 	if (res < 0)
--- a/fs/squashfs/decompressor_single.c
+++ b/fs/squashfs/decompressor_single.c
@@ -9,7 +9,7 @@
 #include <linux/types.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
-#include <linux/bio.h>
+#include <linux/buffer_head.h>

 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
@@ -61,15 +61,14 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
 	}
 }

-int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio,
-			int offset, int length,
-			struct squashfs_page_actor *output)
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+	int b, int offset, int length, struct squashfs_page_actor *output)
 {
 	int res;
 	struct squashfs_stream *stream = msblk->stream;

 	mutex_lock(&stream->mutex);
-	res = msblk->decompressor->decompress(msblk, stream->stream, bio,
+	res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
 		offset, length, output);
 	mutex_unlock(&stream->mutex);

--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -6,7 +6,7 @@
 * the COPYING file in the top-level directory.
 */

-#include <linux/bio.h>
+#include <linux/buffer_head.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
@@ -91,23 +91,20 @@ static void lz4_free(void *strm)


 static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
-	struct bio *bio, int offset, int length,
+	struct buffer_head **bh, int b, int offset, int length,
 	struct squashfs_page_actor *output)
 {
-	struct bvec_iter_all iter_all = {};
-	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
 	struct squashfs_lz4 *stream = strm;
 	void *buff = stream->input, *data;
-	int bytes = length, res;
+	int avail, i, bytes = length, res;

-	while (bio_next_segment(bio, &iter_all)) {
-		int avail = min(bytes, ((int)bvec->bv_len) - offset);
-
-		data = page_address(bvec->bv_page) + bvec->bv_offset;
-		memcpy(buff, data + offset, avail);
+	for (i = 0; i < b; i++) {
+		avail = min(bytes, msblk->devblksize - offset);
+		memcpy(buff, bh[i]->b_data + offset, avail);
 		buff += avail;
 		bytes -= avail;
 		offset = 0;
+		put_bh(bh[i]);
 	}

 	res = LZ4_decompress_safe(stream->input, stream->output,
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -22,7 +22,7 @@
 */

 #include <linux/mutex.h>
-#include <linux/bio.h>
+#include <linux/buffer_head.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/lzo.h>
@@ -76,24 +76,21 @@ static void lzo_free(void *strm)


 static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
-	struct bio *bio, int offset, int length,
+	struct buffer_head **bh, int b, int offset, int length,
 	struct squashfs_page_actor *output)
 {
-	struct bvec_iter_all iter_all = {};
-	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
 	struct squashfs_lzo *stream = strm;
 	void *buff = stream->input, *data;
-	int bytes = length, res;
+	int avail, i, bytes = length, res;
 	size_t out_len = output->length;

-	while (bio_next_segment(bio, &iter_all)) {
-		int avail = min(bytes, ((int)bvec->bv_len) - offset);
-
-		data = page_address(bvec->bv_page) + bvec->bv_offset;
-		memcpy(buff, data + offset, avail);
+	for (i = 0; i < b; i++) {
+		avail = min(bytes, msblk->devblksize - offset);
+		memcpy(buff, bh[i]->b_data + offset, avail);
 		buff += avail;
 		bytes -= avail;
 		offset = 0;
+		put_bh(bh[i]);
 	}

 	res = lzo1x_decompress_safe(stream->input, (size_t)length,
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -53,8 +53,8 @@ extern void *squashfs_decompressor_setup(struct super_block *, unsigned short);
 /* decompressor_xxx.c */
 extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *);
 extern void squashfs_decompressor_destroy(struct squashfs_sb_info *);
-extern int squashfs_decompress(struct squashfs_sb_info *, struct bio *,
-				int, int, struct squashfs_page_actor *);
+extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **,
+	int, int, int, struct squashfs_page_actor *);
 extern int squashfs_max_decompressors(void);

 /* export.c */
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -23,7 +23,7 @@


 #include <linux/mutex.h>
-#include <linux/bio.h>
+#include <linux/buffer_head.h>
 #include <linux/slab.h>
 #include <linux/xz.h>
 #include <linux/bitops.h>
@@ -130,12 +130,11 @@ static void squashfs_xz_free(void *strm)


 static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
-	struct bio *bio, int offset, int length,
+	struct buffer_head **bh, int b, int offset, int length,
 	struct squashfs_page_actor *output)
 {
-	struct bvec_iter_all iter_all = {};
-	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
-	int total = 0, error = 0;
+	enum xz_ret xz_err;
+	int avail, total = 0, k = 0;
 	struct squashfs_xz *stream = strm;

 	xz_dec_reset(stream->state);
@@ -145,23 +144,11 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
 	stream->buf.out_size = PAGE_SIZE;
 	stream->buf.out = squashfs_first_page(output);

-	for (;;) {
-		enum xz_ret xz_err;
-
-		if (stream->buf.in_pos == stream->buf.in_size) {
-			const void *data;
-			int avail;
-
-			if (!bio_next_segment(bio, &iter_all)) {
-				/* XZ_STREAM_END must be reached. */
-				error = -EIO;
-				break;
-			}
-
-			avail = min(length, ((int)bvec->bv_len) - offset);
-			data = page_address(bvec->bv_page) + bvec->bv_offset;
+	do {
+		if (stream->buf.in_pos == stream->buf.in_size && k < b) {
+			avail = min(length, msblk->devblksize - offset);
 			length -= avail;
-			stream->buf.in = data + offset;
+			stream->buf.in = bh[k]->b_data + offset;
 			stream->buf.in_size = avail;
 			stream->buf.in_pos = 0;
 			offset = 0;
@@ -176,17 +163,23 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		}

 		xz_err = xz_dec_run(stream->state, &stream->buf);
-		if (xz_err == XZ_STREAM_END)
-			break;
-		if (xz_err != XZ_OK) {
-			error = -EIO;
-			break;
-		}
-	}
+
+		if (stream->buf.in_pos == stream->buf.in_size && k < b)
+			put_bh(bh[k++]);
+	} while (xz_err == XZ_OK);

 	squashfs_finish_page(output);

-	return error ? error : total + stream->buf.out_pos;
+	if (xz_err != XZ_STREAM_END || k < b)
+		goto out;
+
+	return total + stream->buf.out_pos;
+
+out:
+	for (; k < b; k++)
+		put_bh(bh[k]);
+
+	return -EIO;
 }

 const struct squashfs_decompressor squashfs_xz_comp_ops = {
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -23,7 +23,7 @@


 #include <linux/mutex.h>
-#include <linux/bio.h>
+#include <linux/buffer_head.h>
 #include <linux/slab.h>
 #include <linux/zlib.h>
 #include <linux/vmalloc.h>
@@ -63,35 +63,21 @@ static void zlib_free(void *strm)


 static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
-	struct bio *bio, int offset, int length,
+	struct buffer_head **bh, int b, int offset, int length,
 	struct squashfs_page_actor *output)
 {
-	struct bvec_iter_all iter_all = {};
-	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);
-	int zlib_init = 0, error = 0;
+	int zlib_err, zlib_init = 0, k = 0;
 	z_stream *stream = strm;

 	stream->avail_out = PAGE_SIZE;
 	stream->next_out = squashfs_first_page(output);
 	stream->avail_in = 0;

-	for (;;) {
-		int zlib_err;
-
-		if (stream->avail_in == 0) {
-			const void *data;
-			int avail;
-
-			if (!bio_next_segment(bio, &iter_all)) {
-				/* Z_STREAM_END must be reached. */
-				error = -EIO;
-				break;
-			}
-
-			avail = min(length, ((int)bvec->bv_len) - offset);
-			data = page_address(bvec->bv_page) + bvec->bv_offset;
+	do {
+		if (stream->avail_in == 0 && k < b) {
+			int avail = min(length, msblk->devblksize - offset);
 			length -= avail;
-			stream->next_in = data + offset;
+			stream->next_in = bh[k]->b_data + offset;
 			stream->avail_in = avail;
 			offset = 0;
 		}
@@ -105,28 +91,37 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		if (!zlib_init) {
 			zlib_err = zlib_inflateInit(stream);
 			if (zlib_err != Z_OK) {
-				error = -EIO;
-				break;
+				squashfs_finish_page(output);
+				goto out;
 			}
 			zlib_init = 1;
 		}

 		zlib_err = zlib_inflate(stream, Z_SYNC_FLUSH);
-		if (zlib_err == Z_STREAM_END)
-			break;
-		if (zlib_err != Z_OK) {
-			error = -EIO;
-			break;
-		}
-	}
+
+		if (stream->avail_in == 0 && k < b)
+			put_bh(bh[k++]);
+	} while (zlib_err == Z_OK);

 	squashfs_finish_page(output);

-	if (!error)
-		if (zlib_inflateEnd(stream) != Z_OK)
-			error = -EIO;
+	if (zlib_err != Z_STREAM_END)
+		goto out;

-	return error ? error : stream->total_out;
+	zlib_err = zlib_inflateEnd(stream);
+	if (zlib_err != Z_OK)
+		goto out;
+
+	if (k < b)
+		goto out;
+
+	return stream->total_out;
+
+out:
+	for (; k < b; k++)
+		put_bh(bh[k]);
+
+	return -EIO;
 }

 const struct squashfs_decompressor squashfs_zlib_comp_ops = {
--- a/fs/squashfs/zstd_wrapper.c
+++ b/fs/squashfs/zstd_wrapper.c
@@ -18,7 +18,7 @@
 */

 #include <linux/mutex.h>
-#include <linux/bio.h>
+#include <linux/buffer_head.h>
 #include <linux/slab.h>
 #include <linux/zstd.h>
 #include <linux/vmalloc.h>
@@ -68,44 +68,33 @@ static void zstd_free(void *strm)


 static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
-	struct bio *bio, int offset, int length,
+	struct buffer_head **bh, int b, int offset, int length,
 	struct squashfs_page_actor *output)
 {
 	struct workspace *wksp = strm;
 	ZSTD_DStream *stream;
 	size_t total_out = 0;
-	int error = 0;
+	size_t zstd_err;
+	int k = 0;
 	ZSTD_inBuffer in_buf = { NULL, 0, 0 };
 	ZSTD_outBuffer out_buf = { NULL, 0, 0 };
-	struct bvec_iter_all iter_all = {};
-	struct bio_vec *bvec = bvec_init_iter_all(&iter_all);

 	stream = ZSTD_initDStream(wksp->window_size, wksp->mem, wksp->mem_size);

 	if (!stream) {
 		ERROR("Failed to initialize zstd decompressor\n");
-		return -EIO;
+		goto out;
 	}

 	out_buf.size = PAGE_SIZE;
 	out_buf.dst = squashfs_first_page(output);

-	for (;;) {
-		size_t zstd_err;
+	do {
+		if (in_buf.pos == in_buf.size && k < b) {
+			int avail = min(length, msblk->devblksize - offset);

-		if (in_buf.pos == in_buf.size) {
-			const void *data;
-			int avail;
-
-			if (!bio_next_segment(bio, &iter_all)) {
-				error = -EIO;
-				break;
-			}
-
-			avail = min(length, ((int)bvec->bv_len) - offset);
-			data = page_address(bvec->bv_page) + bvec->bv_offset;
 			length -= avail;
-			in_buf.src = data + offset;
+			in_buf.src = bh[k]->b_data + offset;
 			in_buf.size = avail;
 			in_buf.pos = 0;
 			offset = 0;
@@ -117,8 +106,8 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
 				/* Shouldn't run out of pages
 				 * before stream is done.
 				 */
-				error = -EIO;
-				break;
+				squashfs_finish_page(output);
+				goto out;
 			}
 			out_buf.pos = 0;
 			out_buf.size = PAGE_SIZE;
@@ -127,20 +116,29 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm,
 		total_out -= out_buf.pos;
 		zstd_err = ZSTD_decompressStream(stream, &out_buf, &in_buf);
 		total_out += out_buf.pos; /* add the additional data produced */
-		if (zstd_err == 0)
-			break;

-		if (ZSTD_isError(zstd_err)) {
-			ERROR("zstd decompression error: %d\n",
-					(int)ZSTD_getErrorCode(zstd_err));
-			error = -EIO;
-			break;
-		}
-	}
+		if (in_buf.pos == in_buf.size && k < b)
+			put_bh(bh[k++]);
+	} while (zstd_err != 0 && !ZSTD_isError(zstd_err));

 	squashfs_finish_page(output);

-	return error ? error : total_out;
+	if (ZSTD_isError(zstd_err)) {
+		ERROR("zstd decompression error: %d\n",
+				(int)ZSTD_getErrorCode(zstd_err));
+		goto out;
+	}
+
+	if (k < b)
+		goto out;
+
+	return (int)total_out;
+
+out:
+	for (; k < b; k++)
+		put_bh(bh[k]);
+
+	return -EIO;
 }

 const struct squashfs_decompressor squashfs_zstd_comp_ops = {
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -410,7 +410,6 @@ struct blk_zone_report_hdr {
 	u8		padding[60];
 };

-extern unsigned int blkdev_nr_zones(struct block_device *bdev);
 extern int blkdev_report_zones(struct block_device *bdev,
 			       sector_t sector, struct blk_zone *zones,
 			       unsigned int *nr_zones, gfp_t gfp_mask);
@@ -424,10 +423,6 @@ extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,

 #else /* CONFIG_BLK_DEV_ZONED */

-static inline unsigned int blkdev_nr_zones(struct block_device *bdev)
-{
-	return 0;
-}
 static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
 					    fmode_t mode, unsigned int cmd,
 					    unsigned long arg)
@@ -823,11 +818,6 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
 }

 #ifdef CONFIG_BLK_DEV_ZONED
-static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
-{
-	return blk_queue_is_zoned(q) ? q->nr_zones : 0;
-}
-
 static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 					     sector_t sector)
 {
@@ -843,11 +833,6 @@ static inline bool blk_queue_zone_is_seq(struct request_queue *q,
 		return false;
 	return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
 }
-#else /* CONFIG_BLK_DEV_ZONED */
-static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
-{
-	return 0;
-}
 #endif /* CONFIG_BLK_DEV_ZONED */

 static inline bool rq_is_sync(struct request *rq)
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -113,7 +113,7 @@ struct elevator_mq_ops {
 	void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool);
 	struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
 	bool (*has_work)(struct blk_mq_hw_ctx *);
-	void (*completed_request)(struct request *, u64);
+	void (*completed_request)(struct request *);
 	void (*started_request)(struct request *);
 	void (*requeue_request)(struct request *);
 	struct request *(*former_request)(struct request_queue *, struct request *);
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -49,24 +49,22 @@

 /* Exported common interfaces */

-#ifndef CONFIG_TINY_RCU
-void synchronize_sched(void);
-void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
-#endif
-
+#ifdef CONFIG_PREEMPT_RCU
 void call_rcu(struct rcu_head *head, rcu_callback_t func);
-void rcu_barrier_tasks(void);
-void synchronize_rcu(void);
+#else /* #ifdef CONFIG_PREEMPT_RCU */
+#define	call_rcu	call_rcu_sched
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */

-static inline void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
-{
-	call_rcu(head, func);
-}
+void call_rcu_bh(struct rcu_head *head, rcu_callback_t func);
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
+void synchronize_sched(void);
+void rcu_barrier_tasks(void);

 #ifdef CONFIG_PREEMPT_RCU

 void __rcu_read_lock(void);
 void __rcu_read_unlock(void);
+void synchronize_rcu(void);

 /*
 * Defined as a macro as it is a very low level header included from
@@ -88,6 +86,11 @@ static inline void __rcu_read_unlock(void)
 	preempt_enable();
 }

+static inline void synchronize_rcu(void)
+{
+	synchronize_sched();
+}
+
 static inline int rcu_preempt_depth(void)
 {
 	return 0;
@@ -98,7 +101,9 @@ static inline int rcu_preempt_depth(void)
 /* Internal to kernel */
 void rcu_init(void);
 extern int rcu_scheduler_active __read_mostly;
-void rcu_sched_clock_irq(int user);
+void rcu_sched_qs(void);
+void rcu_bh_qs(void);
+void rcu_check_callbacks(int user);
 void rcu_report_dead(unsigned int cpu);
 void rcutree_migrate_callbacks(int cpu);

@@ -357,7 +362,8 @@ static inline void rcu_preempt_sleep_check(void) { }
 * and rcu_assign_pointer().  Some of these could be folded into their
 * callers, but they are left separate in order to ease introduction of
 * multiple flavors of pointers to match the multiple flavors of RCU
- * (e.g., __rcu_sched, and __srcu), should this make sense in the future.
+ * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in
+ * the future.
 */

 #ifdef __CHECKER__
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -36,11 +36,6 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
 /* Never flag non-existent other CPUs! */
 static inline bool rcu_eqs_special_set(int cpu) { return false; }

-static inline void synchronize_sched(void)
-{
-	synchronize_rcu();
-}
-
 static inline unsigned long get_state_synchronize_rcu(void)
 {
 	return 0;
@@ -61,16 +56,17 @@ static inline void cond_synchronize_sched(unsigned long oldstate)
 	might_sleep();
 }

-extern void rcu_barrier(void);
+extern void rcu_barrier_bh(void);
+extern void rcu_barrier_sched(void);

-static inline void rcu_barrier_sched(void)
+static inline void synchronize_rcu_expedited(void)
 {
-	rcu_barrier();  /* Only one CPU, so only one list of callbacks! */
+	synchronize_sched();	/* Only one CPU, so pretty fast anyway!!! */
 }

-static inline void rcu_barrier_bh(void)
+static inline void rcu_barrier(void)
 {
-	rcu_barrier();
+	rcu_barrier_sched();  /* Only one CPU, so only one list of callbacks! */
 }

 static inline void synchronize_rcu_bh(void)
@@ -83,36 +79,25 @@ static inline void synchronize_rcu_bh_expedited(void)
 	synchronize_sched();
 }

-static inline void synchronize_rcu_expedited(void)
-{
-	synchronize_sched();
-}
-
 static inline void synchronize_sched_expedited(void)
 {
 	synchronize_sched();
 }

-static inline void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
+static inline void kfree_call_rcu(struct rcu_head *head,
+				  rcu_callback_t func)
 {
 	call_rcu(head, func);
 }

-static inline void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
-{
-	call_rcu(head, func);
-}
-
-void rcu_qs(void);
-
 static inline void rcu_softirq_qs(void)
 {
-	rcu_qs();
+	rcu_sched_qs();
 }

 #define rcu_note_context_switch(preempt) \
 	do { \
-		rcu_qs(); \
+		rcu_sched_qs(); \
 		rcu_tasks_qs(current); \
 	} while (0)

--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -45,17 +45,10 @@ static inline void rcu_virt_note_context_switch(int cpu)
 	rcu_note_context_switch(false);
 }

-static inline void synchronize_rcu_bh(void)
-{
-	synchronize_rcu();
-}
+void synchronize_rcu_bh(void);
+void synchronize_sched_expedited(void);
 void synchronize_rcu_expedited(void);

-static inline void synchronize_sched_expedited(void)
-{
-	synchronize_rcu_expedited();
-}
-
 void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);

 /**
@@ -76,7 +69,7 @@ void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func);
 */
 static inline void synchronize_rcu_bh_expedited(void)
 {
-	synchronize_rcu_expedited();
+	synchronize_sched_expedited();
 }

 void rcu_barrier(void);
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -36,7 +36,6 @@ struct srcu_struct {
 	struct rcu_head *srcu_cb_head;	/* Pending callbacks: Head. */
 	struct rcu_head **srcu_cb_tail;	/* Pending callbacks: Tail. */
 	struct work_struct srcu_work;	/* For driving grace periods. */
-	struct list_head srcu_boot_entry; /* Early-boot callbacks. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -49,7 +48,6 @@ void srcu_drive_gp(struct work_struct *wp);
 	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\
 	.srcu_cb_tail = &name.srcu_cb_head,				\
 	.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp),	\
-	.srcu_boot_entry = LIST_HEAD_INIT(name.srcu_boot_entry),	\
 	__SRCU_DEP_MAP_INIT(name)					\
 }

--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -94,7 +94,6 @@ struct srcu_struct {
 						/*  callback for the barrier */
 						/*  operation. */
 	struct delayed_work work;
-	struct list_head srcu_boot_entry;	/* Early-boot callbacks. */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 	struct lockdep_map dep_map;
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -106,13 +105,12 @@ struct srcu_struct {
 #define SRCU_STATE_SCAN2	2

 #define __SRCU_STRUCT_INIT(name, pcpu_name)				\
-{									\
-	.sda = &pcpu_name,						\
-	.lock = __SPIN_LOCK_UNLOCKED(name.lock),			\
-	.srcu_gp_seq_needed = -1UL,					\
-	.srcu_boot_entry = LIST_HEAD_INIT(name.srcu_boot_entry),	\
-	__SRCU_DEP_MAP_INIT(name)					\
-}
+	{								\
+		.sda = &pcpu_name,					\
+		.lock = __SPIN_LOCK_UNLOCKED(name.lock),		\
+		.srcu_gp_seq_needed = 0 - 1,				\
+		__SRCU_DEP_MAP_INIT(name)				\
+	}

 /*
 * Define and initialize a srcu struct at build time.
--- a/include/trace/events/kyber.h
+++ b/include/trace/events/kyber.h
@@ -1,96 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM kyber
-
-#if !defined(_TRACE_KYBER_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_KYBER_H
-
-#include <linux/blkdev.h>
-#include <linux/tracepoint.h>
-
-#define DOMAIN_LEN		16
-#define LATENCY_TYPE_LEN	8
-
-TRACE_EVENT(kyber_latency,
-
-	TP_PROTO(struct request_queue *q, const char *domain, const char *type,
-		 unsigned int percentile, unsigned int numerator,
-		 unsigned int denominator, unsigned int samples),
-
-	TP_ARGS(q, domain, type, percentile, numerator, denominator, samples),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev				)
-		__array(	char,	domain,	DOMAIN_LEN		)
-		__array(	char,	type,	LATENCY_TYPE_LEN	)
-		__field(	u8,	percentile			)
-		__field(	u8,	numerator			)
-		__field(	u8,	denominator			)
-		__field(	unsigned int,	samples			)
-	),
-
-	TP_fast_assign(
-		__entry->dev		= disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent)));
-		strlcpy(__entry->domain, domain, DOMAIN_LEN);
-		strlcpy(__entry->type, type, DOMAIN_LEN);
-		__entry->percentile	= percentile;
-		__entry->numerator	= numerator;
-		__entry->denominator	= denominator;
-		__entry->samples	= samples;
-	),
-
-	TP_printk("%d,%d %s %s p%u %u/%u samples=%u",
-		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->domain,
-		  __entry->type, __entry->percentile, __entry->numerator,
-		  __entry->denominator, __entry->samples)
-);
-
-TRACE_EVENT(kyber_adjust,
-
-	TP_PROTO(struct request_queue *q, const char *domain,
-		 unsigned int depth),
-
-	TP_ARGS(q, domain, depth),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__array(	char,	domain,	DOMAIN_LEN	)
-		__field(	unsigned int,	depth		)
-	),
-
-	TP_fast_assign(
-		__entry->dev		= disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent)));
-		strlcpy(__entry->domain, domain, DOMAIN_LEN);
-		__entry->depth		= depth;
-	),
-
-	TP_printk("%d,%d %s %u",
-		  MAJOR(__entry->dev), MINOR(__entry->dev), __entry->domain,
-		  __entry->depth)
-);
-
-TRACE_EVENT(kyber_throttled,
-
-	TP_PROTO(struct request_queue *q, const char *domain),
-
-	TP_ARGS(q, domain),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__array(	char,	domain,	DOMAIN_LEN	)
-	),
-
-	TP_fast_assign(
-		__entry->dev		= disk_devt(dev_to_disk(kobj_to_dev(q->kobj.parent)));
-		strlcpy(__entry->domain, domain, DOMAIN_LEN);
-	),
-
-	TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev),
-		  __entry->domain)
-);
-
-#define _TRACE_KYBER_H
-#endif /* _TRACE_KYBER_H */
-
-/* This part must be outside protection */
-#include <trace/define_trace.h>
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -137,11 +137,8 @@ struct blk_zone_range {
 *                 sector specified in the report request structure.
 * @BLKRESETZONE: Reset the write pointer of the zones in the specified
 *                sector range. The sector range must be zone aligned.
- * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
 */
 #define BLKREPORTZONE	_IOWR(0x12, 130, struct blk_zone_report)
 #define BLKRESETZONE	_IOW(0x12, 131, struct blk_zone_range)
-#define BLKGETZONESZ	_IOW(0x12, 132, __u32)
-#define BLKGETNRZONES	_IOW(0x12, 133, __u32)

 #endif /* _UAPI_BLKZONED_H */
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -816,6 +816,7 @@ config MEMCG_SWAP
 config MEMCG_SWAP_ENABLED
 	bool "Swap controller enabled by default"
 	depends on MEMCG_SWAP
+	default y
 	help
 	  Memory Resource Controller Swap Extension comes with its price in
 	  a bigger memory consumption. General purpose distribution kernels
@@ -829,6 +830,7 @@ config MEMCG_SWAP_ENABLED
 config MEMCG_KMEM
 	bool
 	depends on MEMCG && !SLOB
+	default y

 config BLK_CGROUP
 	bool "IO controller"
@@ -1883,6 +1885,7 @@ config SLAB_FREELIST_HARDENED
 	  freelist exploit methods.

 config SLUB_CPU_PARTIAL
+	default y
 	depends on SLUB && SMP
 	bool "SLUB per cpu partial cache"
 	help
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -148,7 +148,7 @@ static ssize_t write_irq_affinity(int type, struct file *file,
 	if (!irq_can_set_affinity_usr(irq) || no_irq_affinity)
 		return -EIO;

-	if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
+	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
 		return -ENOMEM;

 	if (type)
@@ -247,7 +247,7 @@ static ssize_t default_affinity_write(struct file *file,
 	cpumask_var_t new_value;
 	int err;

-	if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
+	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
 		return -ENOMEM;

 	err = cpumask_parse_user(buffer, count, new_value);
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -17,6 +17,7 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/smp.h>
+#include <linux/interrupt.h>
 #include <asm/processor.h>


@@ -162,7 +163,9 @@ static void irq_work_run_list(struct llist_head *list)
 		flags = atomic_read(&work->flags) & ~IRQ_WORK_PENDING;
 		atomic_xchg(&work->flags, flags);

+		check_start_time(ts);
 		work->func(work);
+		check_process_time("irq_work %ps", ts, work->func);
 		/*
 		 * Clear the BUSY bit and return to the free state if
 		 * no-one else claimed it meanwhile.
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -343,7 +343,7 @@ static int profile_dead_cpu(unsigned int cpu)
 	struct page *page;
 	int i;

-	if (cpumask_available(prof_cpu_mask))
+	if (prof_cpu_mask != NULL)
 		cpumask_clear_cpu(cpu, prof_cpu_mask);

 	for (i = 0; i < 2; i++) {
@@ -380,7 +380,7 @@ static int profile_prepare_cpu(unsigned int cpu)

 static int profile_online_cpu(unsigned int cpu)
 {
-	if (cpumask_available(prof_cpu_mask))
+	if (prof_cpu_mask != NULL)
 		cpumask_set_cpu(cpu, prof_cpu_mask);

 	return 0;
@@ -410,7 +410,7 @@ void profile_tick(int type)
 {
 	struct pt_regs *regs = get_irq_regs();

-	if (!user_mode(regs) && cpumask_available(prof_cpu_mask) &&
+	if (!user_mode(regs) && prof_cpu_mask != NULL &&
 	    cpumask_test_cpu(smp_processor_id(), prof_cpu_mask))
 		profile_hit(type, (void *)profile_pc(regs));
 }
@@ -437,7 +437,7 @@ static ssize_t prof_cpu_mask_proc_write(struct file *file,
 	cpumask_var_t new_value;
 	int err;

-	if (!zalloc_cpumask_var(&new_value, GFP_KERNEL))
+	if (!alloc_cpumask_var(&new_value, GFP_KERNEL))
 		return -ENOMEM;

 	err = cpumask_parse_user(buffer, count, new_value);
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -443,12 +443,6 @@ do {									\

 #endif /* #if defined(SRCU) || !defined(TINY_RCU) */

-#ifdef CONFIG_SRCU
-void srcu_init(void);
-#else /* #ifdef CONFIG_SRCU */
-static inline void srcu_init(void) { }
-#endif /* #else #ifdef CONFIG_SRCU */
-
 #ifdef CONFIG_TINY_RCU
 /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
 static inline bool rcu_gp_is_normal(void) { return true; }
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -34,8 +34,6 @@
 #include "rcu.h"

 int rcu_scheduler_active __read_mostly;
-static LIST_HEAD(srcu_boot_list);
-static bool srcu_init_done;

 static int init_srcu_struct_fields(struct srcu_struct *sp)
 {
@@ -48,7 +46,6 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
 	sp->srcu_gp_waiting = false;
 	sp->srcu_idx = 0;
 	INIT_WORK(&sp->srcu_work, srcu_drive_gp);
-	INIT_LIST_HEAD(&sp->srcu_boot_entry);
 	return 0;
 }

@@ -182,12 +179,8 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
 	*sp->srcu_cb_tail = rhp;
 	sp->srcu_cb_tail = &rhp->next;
 	local_irq_restore(flags);
-	if (!READ_ONCE(sp->srcu_gp_running)) {
-		if (likely(srcu_init_done))
-			schedule_work(&sp->srcu_work);
-		else if (list_empty(&sp->srcu_boot_entry))
-			list_add(&sp->srcu_boot_entry, &srcu_boot_list);
-	}
+	if (!READ_ONCE(sp->srcu_gp_running))
+		schedule_work(&sp->srcu_work);
 }
 EXPORT_SYMBOL_GPL(call_srcu);

@@ -211,21 +204,3 @@ void __init rcu_scheduler_starting(void)
 {
 	rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
 }
-
-/*
- * Queue work for srcu_struct structures with early boot callbacks.
- * The work won't actually execute until the workqueue initialization
- * phase that takes place after the scheduler starts.
- */
-void __init srcu_init(void)
-{
-	struct srcu_struct *sp;
-
-	srcu_init_done = true;
-	while (!list_empty(&srcu_boot_list)) {
-		sp = list_first_entry(&srcu_boot_list,
-				      struct srcu_struct, srcu_boot_entry);
-		list_del_init(&sp->srcu_boot_entry);
-		schedule_work(&sp->srcu_work);
-	}
-}
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -51,10 +51,6 @@ module_param(exp_holdoff, ulong, 0444);
 static ulong counter_wrap_check = (ULONG_MAX >> 2);
 module_param(counter_wrap_check, ulong, 0444);

-/* Early-boot callback-management, so early that no lock is required! */
-static LIST_HEAD(srcu_boot_list);
-static bool __read_mostly srcu_init_done;
-
 static void srcu_invoke_callbacks(struct work_struct *work);
 static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
 static void process_srcu(struct work_struct *work);
@@ -186,7 +182,6 @@ static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static)
 	mutex_init(&sp->srcu_barrier_mutex);
 	atomic_set(&sp->srcu_barrier_cpu_cnt, 0);
 	INIT_DELAYED_WORK(&sp->work, process_srcu);
-	INIT_LIST_HEAD(&sp->srcu_boot_entry);
 	if (!is_static)
 		sp->sda = alloc_percpu(struct srcu_data);
 	init_srcu_struct_nodes(sp, is_static);
@@ -240,6 +235,7 @@ static void check_init_srcu_struct(struct srcu_struct *sp)
 {
 	unsigned long flags;

+	WARN_ON_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INIT);
 	/* The smp_load_acquire() pairs with the smp_store_release(). */
 	if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
 		return; /* Already initialized. */
@@ -707,11 +703,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
 	    rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) {
 		WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
 		srcu_gp_start(sp);
-		if (likely(srcu_init_done))
-			queue_delayed_work(rcu_gp_wq, &sp->work,
-					   srcu_get_delay(sp));
-		else if (list_empty(&sp->srcu_boot_entry))
-			list_add(&sp->srcu_boot_entry, &srcu_boot_list);
+		queue_delayed_work(rcu_gp_wq, &sp->work, srcu_get_delay(sp));
 	}
 	spin_unlock_irqrestore_rcu_node(sp, flags);
 }
@@ -1318,17 +1310,3 @@ static int __init srcu_bootup_announce(void)
 	return 0;
 }
 early_initcall(srcu_bootup_announce);
-
-void __init srcu_init(void)
-{
-	struct srcu_struct *sp;
-
-	srcu_init_done = true;
-	while (!list_empty(&srcu_boot_list)) {
-		sp = list_first_entry(&srcu_boot_list,
-				      struct srcu_struct, srcu_boot_entry);
-		check_init_srcu_struct(sp);
-		list_del_init(&sp->srcu_boot_entry);
-		queue_work(rcu_gp_wq, &sp->work.work);
-	}
-}
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -1,10 +1,23 @@
-// SPDX-License-Identifier: GPL-2.0+
 /*
 * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition.
 *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
 * Copyright IBM Corporation, 2008
 *
- * Author: Paul E. McKenney <paulmck@linux.ibm.com>
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 *
 * For detailed explanation of Read-Copy Update mechanism see -
 *		Documentation/RCU
@@ -33,28 +46,69 @@ struct rcu_ctrlblk {
 };

 /* Definition for rcupdate control block. */
-static struct rcu_ctrlblk rcu_ctrlblk = {
-	.donetail	= &rcu_ctrlblk.rcucblist,
-	.curtail	= &rcu_ctrlblk.rcucblist,
+static struct rcu_ctrlblk rcu_sched_ctrlblk = {
+	.donetail	= &rcu_sched_ctrlblk.rcucblist,
+	.curtail	= &rcu_sched_ctrlblk.rcucblist,
 };

-void rcu_barrier(void)
-{
-	wait_rcu_gp(call_rcu);
-}
-EXPORT_SYMBOL(rcu_barrier);
+static struct rcu_ctrlblk rcu_bh_ctrlblk = {
+	.donetail	= &rcu_bh_ctrlblk.rcucblist,
+	.curtail	= &rcu_bh_ctrlblk.rcucblist,
+};

-/* Record an rcu quiescent state.  */
-void rcu_qs(void)
+void rcu_barrier_bh(void)
+{
+	wait_rcu_gp(call_rcu_bh);
+}
+EXPORT_SYMBOL(rcu_barrier_bh);
+
+void rcu_barrier_sched(void)
+{
+	wait_rcu_gp(call_rcu_sched);
+}
+EXPORT_SYMBOL(rcu_barrier_sched);
+
+/*
+ * Helper function for rcu_sched_qs() and rcu_bh_qs().
+ * Also irqs are disabled to avoid confusion due to interrupt handlers
+ * invoking call_rcu().
+ */
+static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
+{
+	if (rcp->donetail != rcp->curtail) {
+		rcp->donetail = rcp->curtail;
+		return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * Record an rcu quiescent state.  And an rcu_bh quiescent state while we
+ * are at it, given that any rcu quiescent state is also an rcu_bh
+ * quiescent state.  Use "+" instead of "||" to defeat short circuiting.
+ */
+void rcu_sched_qs(void)
 {
 	unsigned long flags;

 	local_irq_save(flags);
+	if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
+	    rcu_qsctr_help(&rcu_bh_ctrlblk))
+		raise_softirq(RCU_SOFTIRQ);
+	local_irq_restore(flags);
+}

-	if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
-		rcu_ctrlblk.donetail = rcu_ctrlblk.curtail;
-		raise_softirq_irqoff(RCU_SOFTIRQ);
-	}
+/*
+ * Record an rcu_bh quiescent state.
+ */
+void rcu_bh_qs(void)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	if (rcu_qsctr_help(&rcu_bh_ctrlblk))
+		raise_softirq(RCU_SOFTIRQ);
 	local_irq_restore(flags);
 }

@@ -64,35 +118,36 @@ void rcu_qs(void)
 * be called from hardirq context.  It is normally called from the
 * scheduling-clock interrupt.
 */
-void rcu_sched_clock_irq(int user)
+void rcu_check_callbacks(int user)
 {
-	if (user) {
-		rcu_qs();
-	} else if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
-		set_tsk_need_resched(current);
-		set_preempt_need_resched();
-	}
+	if (user)
+		rcu_sched_qs();
+	if (user || !in_softirq())
+		rcu_bh_qs();
 }

-/* Invoke the RCU callbacks whose grace period has elapsed.  */
-static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
+/*
+ * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
+ * whose grace period has elapsed.
+ */
+static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
 {
 	struct rcu_head *next, *list;
 	unsigned long flags;

 	/* Move the ready-to-invoke callbacks to a local list. */
 	local_irq_save(flags);
-	if (rcu_ctrlblk.donetail == &rcu_ctrlblk.rcucblist) {
+	if (rcp->donetail == &rcp->rcucblist) {
 		/* No callbacks ready, so just leave. */
 		local_irq_restore(flags);
 		return;
 	}
-	list = rcu_ctrlblk.rcucblist;
-	rcu_ctrlblk.rcucblist = *rcu_ctrlblk.donetail;
-	*rcu_ctrlblk.donetail = NULL;
-	if (rcu_ctrlblk.curtail == rcu_ctrlblk.donetail)
-		rcu_ctrlblk.curtail = &rcu_ctrlblk.rcucblist;
-	rcu_ctrlblk.donetail = &rcu_ctrlblk.rcucblist;
+	list = rcp->rcucblist;
+	rcp->rcucblist = *rcp->donetail;
+	*rcp->donetail = NULL;
+	if (rcp->curtail == rcp->donetail)
+		rcp->curtail = &rcp->rcucblist;
+	rcp->donetail = &rcp->rcucblist;
 	local_irq_restore(flags);

 	/* Invoke the callbacks on the local list. */
@@ -107,31 +162,37 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused
 	}
 }

+static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
+{
+	__rcu_process_callbacks(&rcu_sched_ctrlblk);
+	__rcu_process_callbacks(&rcu_bh_ctrlblk);
+}
+
 /*
 * Wait for a grace period to elapse.  But it is illegal to invoke
- * synchronize_rcu() from within an RCU read-side critical section.
- * Therefore, any legal call to synchronize_rcu() is a quiescent
- * state, and so on a UP system, synchronize_rcu() need do nothing.
- * (But Lai Jiangshan points out the benefits of doing might_sleep()
- * to reduce latency.)
+ * synchronize_sched() from within an RCU read-side critical section.
+ * Therefore, any legal call to synchronize_sched() is a quiescent
+ * state, and so on a UP system, synchronize_sched() need do nothing.
+ * Ditto for synchronize_rcu_bh().  (But Lai Jiangshan points out the
+ * benefits of doing might_sleep() to reduce latency.)
 *
 * Cool, huh?  (Due to Josh Triplett.)
 */
-void synchronize_rcu(void)
+void synchronize_sched(void)
 {
 	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
 			 lock_is_held(&rcu_lock_map) ||
 			 lock_is_held(&rcu_sched_lock_map),
-			 "Illegal synchronize_rcu() in RCU read-side critical section");
+			 "Illegal synchronize_sched() in RCU read-side critical section");
 }
-EXPORT_SYMBOL_GPL(synchronize_rcu);
+EXPORT_SYMBOL_GPL(synchronize_sched);

 /*
- * Post an RCU callback to be invoked after the end of an RCU grace
- * period.  But since we have but one CPU, that would be after any
- * quiescent state.
+ * Helper function for call_rcu() and call_rcu_bh().
 */
-void call_rcu(struct rcu_head *head, rcu_callback_t func)
+static void __call_rcu(struct rcu_head *head,
+		       rcu_callback_t func,
+		       struct rcu_ctrlblk *rcp)
 {
 	unsigned long flags;

@@ -140,20 +201,39 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
 	head->next = NULL;

 	local_irq_save(flags);
-	*rcu_ctrlblk.curtail = head;
-	rcu_ctrlblk.curtail = &head->next;
+	*rcp->curtail = head;
+	rcp->curtail = &head->next;
 	local_irq_restore(flags);

 	if (unlikely(is_idle_task(current))) {
-		/* force scheduling for rcu_qs() */
+		/* force scheduling for rcu_sched_qs() */
 		resched_cpu(0);
 	}
 }
-EXPORT_SYMBOL_GPL(call_rcu);
+
+/*
+ * Post an RCU callback to be invoked after the end of an RCU-sched grace
+ * period.  But since we have but one CPU, that would be after any
+ * quiescent state.
+ */
+void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
+{
+	__call_rcu(head, func, &rcu_sched_ctrlblk);
+}
+EXPORT_SYMBOL_GPL(call_rcu_sched);
+
+/*
+ * Post an RCU bottom-half callback to be invoked after any subsequent
+ * quiescent state.
+ */
+void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
+{
+	__call_rcu(head, func, &rcu_bh_ctrlblk);
+}
+EXPORT_SYMBOL_GPL(call_rcu_bh);

 void __init rcu_init(void)
 {
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 	rcu_early_boot_tests();
-	srcu_init();
 }
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -92,29 +92,25 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var

 #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
 DEFINE_RCU_TPS(sname) \
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data); \
-struct rcu_state rcu_state = { \
-	.level = { &rcu_state.node[0] }, \
-	.rda = &rcu_data, \
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \
+struct rcu_state sname##_state = { \
+	.level = { &sname##_state.node[0] }, \
+	.rda = &sname##_data, \
 	.call = cr, \
 	.gp_state = RCU_GP_IDLE, \
 	.gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT, \
-	.barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex), \
+	.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
 	.name = RCU_STATE_NAME(sname), \
 	.abbr = sabbr, \
-	.exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex), \
-	.exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex), \
-	.ofl_lock = __SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock), \
+	.exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \
+	.exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \
+	.ofl_lock = __SPIN_LOCK_UNLOCKED(sname##_state.ofl_lock), \
 }

-#ifdef CONFIG_PREEMPT_RCU
-RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
-#else
-RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu);
-#endif
+RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
+RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);

-static struct rcu_state *const rcu_state_p = &rcu_state;
-static struct rcu_data __percpu *const rcu_data_p = &rcu_data;
+static struct rcu_state *const rcu_state_p;
 LIST_HEAD(rcu_struct_flavors);

 /* Dump rcu_node combining tree at boot to verify correct setup. */
@@ -226,9 +222,43 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
 	return rcu_seq_state(rcu_seq_current(&rsp->gp_seq));
 }

+/*
+ * Note a quiescent state.  Because we do not need to know
+ * how many quiescent states passed, just if there was at least
+ * one since the start of the grace period, this just sets a flag.
+ * The caller must have disabled preemption.
+ */
+void rcu_sched_qs(void)
+{
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_sched_qs() invoked with preemption enabled!!!");
+	if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))
+		return;
+	trace_rcu_grace_period(TPS("rcu_sched"),
+			       __this_cpu_read(rcu_sched_data.gp_seq),
+			       TPS("cpuqs"));
+	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
+	if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
+		return;
+	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false);
+	rcu_report_exp_rdp(&rcu_sched_state,
+			   this_cpu_ptr(&rcu_sched_data), true);
+}
+
+void rcu_bh_qs(void)
+{
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!");
+	if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
+		trace_rcu_grace_period(TPS("rcu_bh"),
+				       __this_cpu_read(rcu_bh_data.gp_seq),
+				       TPS("cpuqs"));
+		__this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false);
+	}
+}
+
 void rcu_softirq_qs(void)
 {
-	rcu_qs();
+	rcu_sched_qs();
+	rcu_preempt_qs();
 	rcu_preempt_deferred_qs(current);
 }

@@ -402,18 +432,31 @@ static void rcu_momentary_dyntick_idle(void)
 	rcu_preempt_deferred_qs(current);
 }

-/**
- * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
- *
- * If the current CPU is idle or running at a first-level (not nested)
- * interrupt from idle, return true.  The caller must have at least
- * disabled preemption.
+/*
+ * Note a context switch.  This is a quiescent state for RCU-sched,
+ * and requires special handling for preemptible RCU.
+ * The caller must have disabled interrupts.
 */
-static int rcu_is_cpu_rrupt_from_idle(void)
+void rcu_note_context_switch(bool preempt)
 {
-	return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 &&
-	       __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;
+	barrier(); /* Avoid RCU read-side critical sections leaking down. */
+	trace_rcu_utilization(TPS("Start context switch"));
+	rcu_sched_qs();
+	rcu_preempt_note_context_switch(preempt);
+	/* Load rcu_urgent_qs before other flags. */
+	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
+		goto out;
+	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
+	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
+		rcu_momentary_dyntick_idle();
+	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
+	if (!preempt)
+		rcu_tasks_qs(current);
+out:
+	trace_rcu_utilization(TPS("End context switch"));
+	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
+EXPORT_SYMBOL_GPL(rcu_note_context_switch);

 /*
 * Register a quiescent state for all RCU flavors.  If there is an
@@ -447,8 +490,8 @@ void rcu_all_qs(void)
 		rcu_momentary_dyntick_idle();
 		local_irq_restore(flags);
 	}
-	if (unlikely(raw_cpu_read(rcu_data.cpu_no_qs.b.exp)))
-		rcu_qs();
+	if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)))
+		rcu_sched_qs();
 	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
 	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 	preempt_enable();
@@ -529,7 +572,7 @@ EXPORT_SYMBOL_GPL(rcu_get_gp_seq);
 */
 unsigned long rcu_sched_get_gp_seq(void)
 {
-	return rcu_get_gp_seq();
+	return READ_ONCE(rcu_sched_state.gp_seq);
 }
 EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq);

@@ -538,7 +581,7 @@ EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq);
 */
 unsigned long rcu_bh_get_gp_seq(void)
 {
-	return READ_ONCE(rcu_state_p->gp_seq);
+	return READ_ONCE(rcu_bh_state.gp_seq);
 }
 EXPORT_SYMBOL_GPL(rcu_bh_get_gp_seq);

@@ -560,7 +603,7 @@ EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
 */
 unsigned long rcu_exp_batches_completed_sched(void)
 {
-	return rcu_state.expedited_sequence;
+	return rcu_sched_state.expedited_sequence;
 }
 EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched);

@@ -578,7 +621,7 @@ EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
 */
 void rcu_bh_force_quiescent_state(void)
 {
-	force_quiescent_state(rcu_state_p);
+	force_quiescent_state(&rcu_bh_state);
 }
 EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);

@@ -587,7 +630,7 @@ EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
 */
 void rcu_sched_force_quiescent_state(void)
 {
-	rcu_force_quiescent_state();
+	force_quiescent_state(&rcu_sched_state);
 }
 EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);

@@ -637,10 +680,14 @@ void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,

 	switch (test_type) {
 	case RCU_FLAVOR:
-	case RCU_BH_FLAVOR:
-	case RCU_SCHED_FLAVOR:
 		rsp = rcu_state_p;
 		break;
+	case RCU_BH_FLAVOR:
+		rsp = &rcu_bh_state;
+		break;
+	case RCU_SCHED_FLAVOR:
+		rsp = &rcu_sched_state;
+		break;
 	default:
 		break;
 	}
@@ -1051,6 +1098,19 @@ EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);

 #endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */

+/**
+ * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
+ *
+ * If the current CPU is idle or running at a first-level (not nested)
+ * interrupt from idle, return true.  The caller must have at least
+ * disabled preemption.
+ */
+static int rcu_is_cpu_rrupt_from_idle(void)
+{
+	return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 &&
+	       __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;
+}
+
 /*
 * We are reporting a quiescent state on behalf of some other CPU, so
 * it is our responsibility to check for and handle potential overflow
@@ -2305,7 +2365,7 @@ rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
 	struct rcu_node *rnp_p;

 	raw_lockdep_assert_held_rcu_node(rnp);
-	if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)) ||
+	if (WARN_ON_ONCE(rcu_state_p == &rcu_sched_state) ||
 	    WARN_ON_ONCE(rsp != rcu_state_p) ||
 	    WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) ||
 	    rnp->qsmask != 0) {
@@ -2580,18 +2640,47 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
 }

 /*
- * This function is invoked from each scheduling-clock interrupt,
- * and checks to see if this CPU is in a non-context-switch quiescent
- * state, for example, user mode or idle loop.  It also schedules RCU
- * core processing.  If the current grace period has gone on too long,
- * it will ask the scheduler to manufacture a context switch for the sole
- * purpose of providing a providing the needed quiescent state.
+ * Check to see if this CPU is in a non-context-switch quiescent state
+ * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
+ * Also schedule RCU core processing.
+ *
+ * This function must be called from hardirq context.  It is normally
+ * invoked from the scheduling-clock interrupt.
 */
-void rcu_sched_clock_irq(int user)
+void rcu_check_callbacks(int user)
 {
 	trace_rcu_utilization(TPS("Start scheduler-tick"));
 	increment_cpu_stall_ticks();
-	rcu_flavor_sched_clock_irq(user);
+	if (user || rcu_is_cpu_rrupt_from_idle()) {
+
+		/*
+		 * Get here if this CPU took its interrupt from user
+		 * mode or from the idle loop, and if this is not a
+		 * nested interrupt.  In this case, the CPU is in
+		 * a quiescent state, so note it.
+		 *
+		 * No memory barrier is required here because both
+		 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
+		 * variables that other CPUs neither access nor modify,
+		 * at least not while the corresponding CPU is online.
+		 */
+
+		rcu_sched_qs();
+		rcu_bh_qs();
+		rcu_note_voluntary_context_switch(current);
+
+	} else if (!in_softirq()) {
+
+		/*
+		 * Get here if this CPU did not take its interrupt from
+		 * softirq, in other words, if it is not interrupting
+		 * a rcu_bh read-side critical section.  This is an _bh
+		 * critical section, so note it.
+		 */
+
+		rcu_bh_qs();
+	}
+	rcu_preempt_check_callbacks();
 	/* The load-acquire pairs with the store-release setting to true. */
 	if (smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) {
 		/* Idle and userspace execution already are quiescent states. */
@@ -2626,7 +2715,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
 		mask = 0;
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 		if (rnp->qsmask == 0) {
-			if (!IS_ENABLED(CONFIG_PREEMPT) ||
+			if (rcu_state_p == &rcu_sched_state ||
 			    rsp != rcu_state_p ||
 			    rcu_preempt_blocked_readers_cgp(rnp)) {
 				/*
@@ -2959,60 +3048,60 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
 	local_irq_restore(flags);
 }

-/**
- * call_rcu() - Queue an RCU callback for invocation after a grace period.
- * @head: structure to be used for queueing the RCU updates.
- * @func: actual callback function to be invoked after the grace period
- *
- * The callback function will be invoked some time after a full grace
- * period elapses, in other words after all pre-existing RCU read-side
- * critical sections have completed.  However, the callback function
- * might well execute concurrently with RCU read-side critical sections
- * that started after call_rcu() was invoked.  RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(), and
- * may be nested.  In addition, regions of code across which interrupts,
- * preemption, or softirqs have been disabled also serve as RCU read-side
- * critical sections.  This includes hardware interrupt handlers, softirq
- * handlers, and NMI handlers.
- *
- * Note that all CPUs must agree that the grace period extended beyond
- * all pre-existing RCU read-side critical section.  On systems with more
- * than one CPU, this means that when "func()" is invoked, each CPU is
- * guaranteed to have executed a full memory barrier since the end of its
- * last RCU read-side critical section whose beginning preceded the call
- * to call_rcu().  It also means that each CPU executing an RCU read-side
- * critical section that continues beyond the start of "func()" must have
- * executed a memory barrier after the call_rcu() but before the beginning
- * of that RCU read-side critical section.  Note that these guarantees
- * include CPUs that are offline, idle, or executing in user mode, as
- * well as CPUs that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
- * resulting RCU callback function "func()", then both CPU A and CPU B are
- * guaranteed to execute a full memory barrier during the time interval
- * between the call to call_rcu() and the invocation of "func()" -- even
- * if CPU A and CPU B are the same CPU (but again only if the system has
- * more than one CPU).
- */
-void call_rcu(struct rcu_head *head, rcu_callback_t func)
-{
-	__call_rcu(head, func, rcu_state_p, -1, 0);
-}
-EXPORT_SYMBOL_GPL(call_rcu);
-
 /**
 * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
 * @head: structure to be used for queueing the RCU updates.
 * @func: actual callback function to be invoked after the grace period
 *
- * This is transitional.
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_sched() assumes
+ * that the read-side critical sections end on enabling of preemption
+ * or on voluntary preemption.
+ * RCU read-side critical sections are delimited by:
+ *
+ * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR
+ * - anything that disables preemption.
+ *
+ *  These may be nested.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
 */
 void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
 {
-	call_rcu(head, func);
+	__call_rcu(head, func, &rcu_sched_state, -1, 0);
 }
 EXPORT_SYMBOL_GPL(call_rcu_sched);

+/**
+ * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_bh() assumes
+ * that the read-side critical sections end on completion of a softirq
+ * handler. This means that read-side critical sections in process
+ * context must not be interrupted by softirqs. This interface is to be
+ * used when most of the read-side critical sections are in softirq context.
+ * RCU read-side critical sections are delimited by:
+ *
+ * - rcu_read_lock() and  rcu_read_unlock(), if in interrupt context, OR
+ * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
+ *
+ * These may be nested.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
+ */
+void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
+{
+	__call_rcu(head, func, &rcu_bh_state, -1, 0);
+}
+EXPORT_SYMBOL_GPL(call_rcu_bh);
+
 /*
 * Queue an RCU callback for lazy invocation after a grace period.
 * This will likely be later named something like "call_rcu_lazy()",
@@ -3027,17 +3116,103 @@ void kfree_call_rcu(struct rcu_head *head,
 }
 EXPORT_SYMBOL_GPL(kfree_call_rcu);

+/*
+ * Because a context switch is a grace period for RCU-sched and RCU-bh,
+ * any blocking grace-period wait automatically implies a grace period
+ * if there is only one CPU online at any point time during execution
+ * of either synchronize_sched() or synchronize_rcu_bh().  It is OK to
+ * occasionally incorrectly indicate that there are multiple CPUs online
+ * when there was in fact only one the whole time, as this just adds
+ * some overhead: RCU still operates correctly.
+ */
+static int rcu_blocking_is_gp(void)
+{
+	int ret;
+
+	might_sleep();  /* Check for RCU read-side critical section. */
+	preempt_disable();
+	ret = num_online_cpus() <= 1;
+	preempt_enable();
+	return ret;
+}
+
 /**
 * synchronize_sched - wait until an rcu-sched grace period has elapsed.
 *
- * This is transitional.
+ * Control will return to the caller some time after a full rcu-sched
+ * grace period has elapsed, in other words after all currently executing
+ * rcu-sched read-side critical sections have completed.   These read-side
+ * critical sections are delimited by rcu_read_lock_sched() and
+ * rcu_read_unlock_sched(), and may be nested.  Note that preempt_disable(),
+ * local_irq_disable(), and so on may be used in place of
+ * rcu_read_lock_sched().
+ *
+ * This means that all preempt_disable code sequences, including NMI and
+ * non-threaded hardware-interrupt handlers, in progress on entry will
+ * have completed before this primitive returns.  However, this does not
+ * guarantee that softirq handlers will have completed, since in some
+ * kernels, these handlers can run in process context, and can block.
+ *
+ * Note that this guarantee implies further memory-ordering guarantees.
+ * On systems with more than one CPU, when synchronize_sched() returns,
+ * each CPU is guaranteed to have executed a full memory barrier since the
+ * end of its last RCU-sched read-side critical section whose beginning
+ * preceded the call to synchronize_sched().  In addition, each CPU having
+ * an RCU read-side critical section that extends beyond the return from
+ * synchronize_sched() is guaranteed to have executed a full memory barrier
+ * after the beginning of synchronize_sched() and before the beginning of
+ * that RCU read-side critical section.  Note that these guarantees include
+ * CPUs that are offline, idle, or executing in user mode, as well as CPUs
+ * that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked synchronize_sched(), which returned
+ * to its caller on CPU B, then both CPU A and CPU B are guaranteed
+ * to have executed a full memory barrier during the execution of
+ * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
+ * again only if the system has more than one CPU).
 */
 void synchronize_sched(void)
 {
-	synchronize_rcu();
+	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
+			 lock_is_held(&rcu_lock_map) ||
+			 lock_is_held(&rcu_sched_lock_map),
+			 "Illegal synchronize_sched() in RCU-sched read-side critical section");
+	if (rcu_blocking_is_gp())
+		return;
+	if (rcu_gp_is_expedited())
+		synchronize_sched_expedited();
+	else
+		wait_rcu_gp(call_rcu_sched);
 }
 EXPORT_SYMBOL_GPL(synchronize_sched);

+/**
+ * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu_bh grace
+ * period has elapsed, in other words after all currently executing rcu_bh
+ * read-side critical sections have completed.  RCU read-side critical
+ * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
+ * and may be nested.
+ *
+ * See the description of synchronize_sched() for more detailed information
+ * on memory ordering guarantees.
+ */
+void synchronize_rcu_bh(void)
+{
+	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
+			 lock_is_held(&rcu_lock_map) ||
+			 lock_is_held(&rcu_sched_lock_map),
+			 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
+	if (rcu_blocking_is_gp())
+		return;
+	if (rcu_gp_is_expedited())
+		synchronize_rcu_bh_expedited();
+	else
+		wait_rcu_gp(call_rcu_bh);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
+
 /**
 * get_state_synchronize_rcu - Snapshot current RCU state
 *
@@ -3082,23 +3257,41 @@ EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
 /**
 * get_state_synchronize_sched - Snapshot current RCU-sched state
 *
- * This is transitional, and only used by rcutorture.
+ * Returns a cookie that is used by a later call to cond_synchronize_sched()
+ * to determine whether or not a full grace period has elapsed in the
+ * meantime.
 */
 unsigned long get_state_synchronize_sched(void)
 {
-	return get_state_synchronize_rcu();
+	/*
+	 * Any prior manipulation of RCU-protected data must happen
+	 * before the load from ->gp_seq.
+	 */
+	smp_mb();  /* ^^^ */
+	return rcu_seq_snap(&rcu_sched_state.gp_seq);
 }
 EXPORT_SYMBOL_GPL(get_state_synchronize_sched);

 /**
 * cond_synchronize_sched - Conditionally wait for an RCU-sched grace period
+ *
 * @oldstate: return value from earlier call to get_state_synchronize_sched()
 *
- * This is transitional and only used by rcutorture.
+ * If a full RCU-sched grace period has elapsed since the earlier call to
+ * get_state_synchronize_sched(), just return.  Otherwise, invoke
+ * synchronize_sched() to wait for a full grace period.
+ *
+ * Yes, this function does not take counter wrap into account.  But
+ * counter wrap is harmless.  If the counter wraps, we have waited for
+ * more than 2 billion grace periods (and way more on a 64-bit system!),
+ * so waiting for one additional grace period should be just fine.
 */
 void cond_synchronize_sched(unsigned long oldstate)
 {
-	cond_synchronize_rcu(oldstate);
+	if (!rcu_seq_done(&rcu_sched_state.gp_seq, oldstate))
+		synchronize_sched();
+	else
+		smp_mb(); /* Ensure GP ends before subsequent accesses. */
 }
 EXPORT_SYMBOL_GPL(cond_synchronize_sched);

@@ -3331,32 +3524,16 @@ static void _rcu_barrier(struct rcu_state *rsp)
 */
 void rcu_barrier_bh(void)
 {
-	_rcu_barrier(rcu_state_p);
+	_rcu_barrier(&rcu_bh_state);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_bh);

-/**
- * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
- *
- * Note that this primitive does not necessarily wait for an RCU grace period
- * to complete.  For example, if there are no RCU callbacks queued anywhere
- * in the system, then rcu_barrier() is within its rights to return
- * immediately, without waiting for anything, much less an RCU grace period.
- */
-void rcu_barrier(void)
-{
-	_rcu_barrier(rcu_state_p);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier);
-
 /**
 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
- *
- * This is transitional.
 */
 void rcu_barrier_sched(void)
 {
-	rcu_barrier();
+	_rcu_barrier(&rcu_sched_state);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);

@@ -3655,7 +3832,8 @@ void rcu_report_dead(unsigned int cpu)

 	/* QS for any half-done expedited RCU-sched GP. */
 	preempt_disable();
-	rcu_report_exp_rdp(&rcu_state, this_cpu_ptr(rcu_state.rda), true);
+	rcu_report_exp_rdp(&rcu_sched_state,
+			   this_cpu_ptr(rcu_sched_state.rda), true);
 	preempt_enable();
 	rcu_preempt_deferred_qs(current);
 	for_each_rcu_flavor(rsp)
@@ -3991,15 +4169,17 @@ struct workqueue_struct *rcu_par_gp_wq;

 void __init rcu_init(void)
 {
-	int cpu = smp_processor_id();
+	int cpu;

 	rcu_early_boot_tests();

 	rcu_bootup_announce();
 	rcu_init_geometry();
-	rcu_init_one(&rcu_state);
+	rcu_init_one(&rcu_bh_state);
+	rcu_init_one(&rcu_sched_state);
 	if (dump_tree)
-		rcu_dump_rcu_node_tree(&rcu_state);
+		rcu_dump_rcu_node_tree(&rcu_sched_state);
+	__rcu_init_preempt();
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);

 	/*
@@ -4008,17 +4188,17 @@ void __init rcu_init(void)
 	 * or the scheduler are operational.
 	 */
 	pm_notifier(rcu_pm_notify, 0);
-	WARN_ON(num_online_cpus() > 1); // Only one CPU this early in boot.
-	rcutree_prepare_cpu(cpu);
-	rcu_cpu_starting(cpu);
-	rcutree_online_cpu(cpu);
+	for_each_online_cpu(cpu) {
+		rcutree_prepare_cpu(cpu);
+		rcu_cpu_starting(cpu);
+		rcutree_online_cpu(cpu);
+	}

 	/* Create workqueue for expedited GPs and for Tree SRCU. */
 	rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_POWER_EFFICIENT | WQ_MEM_RECLAIM, 0);
 	WARN_ON(!rcu_gp_wq);
 	rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
 	WARN_ON(!rcu_par_gp_wq);
-	srcu_init();
 }

 #include "tree_exp.h"
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -225,6 +225,9 @@ struct rcu_data {

 	/* 5) _rcu_barrier(), OOM callbacks, and expediting. */
 	struct rcu_head barrier_head;
+#ifdef CONFIG_RCU_FAST_NO_HZ
+	struct rcu_head oom_head;
+#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
 	int exp_dynticks_snap;		/* Double-check need for IPI. */

 	/* 6) Callback offloading. */
@@ -430,7 +433,8 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);

 /* Forward declarations for rcutree_plugin.h */
 static void rcu_bootup_announce(void);
-static void rcu_qs(void);
+static void rcu_preempt_qs(void);
+static void rcu_preempt_note_context_switch(bool preempt);
 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
 static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
@@ -440,8 +444,9 @@ static int rcu_print_task_stall(struct rcu_node *rnp);
 static int rcu_print_task_exp_stall(struct rcu_node *rnp);
 static void rcu_preempt_check_blocked_tasks(struct rcu_state *rsp,
 					    struct rcu_node *rnp);
-static void rcu_flavor_sched_clock_irq(int user);
+static void rcu_preempt_check_callbacks(void);
 void call_rcu(struct rcu_head *head, rcu_callback_t func);
+static void __init __rcu_init_preempt(void);
 static void dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp,
 			    int ncheck);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -266,7 +266,7 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
 	rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake);
 }

-/* Common code for work-done checking. */
+/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
 static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)
 {
 	if (rcu_exp_gp_seq_done(rsp, s)) {
@@ -338,6 +338,45 @@ fastpath:
 	return false;
 }

+/* Invoked on each online non-idle CPU for expedited quiescent state. */
+static void sync_sched_exp_handler(void *data)
+{
+	struct rcu_data *rdp;
+	struct rcu_node *rnp;
+	struct rcu_state *rsp = data;
+
+	rdp = this_cpu_ptr(rsp->rda);
+	rnp = rdp->mynode;
+	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
+	    __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
+		return;
+	if (rcu_is_cpu_rrupt_from_idle()) {
+		rcu_report_exp_rdp(&rcu_sched_state,
+				   this_cpu_ptr(&rcu_sched_data), true);
+		return;
+	}
+	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
+	/* Store .exp before .rcu_urgent_qs. */
+	smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
+	resched_cpu(smp_processor_id());
+}
+
+/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
+static void sync_sched_exp_online_cleanup(int cpu)
+{
+	struct rcu_data *rdp;
+	int ret;
+	struct rcu_node *rnp;
+	struct rcu_state *rsp = &rcu_sched_state;
+
+	rdp = per_cpu_ptr(rsp->rda, cpu);
+	rnp = rdp->mynode;
+	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
+		return;
+	ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
+	WARN_ON_ONCE(ret);
+}
+
 /*
 * Select the CPUs within the specified rcu_node that the upcoming
 * expedited grace period needs to wait for.
@@ -654,6 +693,39 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
 	mutex_unlock(&rsp->exp_mutex);
 }

+/**
+ * synchronize_sched_expedited - Brute-force RCU-sched grace period
+ *
+ * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
+ * approach to force the grace period to end quickly.  This consumes
+ * significant time on all CPUs and is unfriendly to real-time workloads,
+ * so is thus not recommended for any sort of common-case code.  In fact,
+ * if you are using synchronize_sched_expedited() in a loop, please
+ * restructure your code to batch your updates, and then use a single
+ * synchronize_sched() instead.
+ *
+ * This implementation can be thought of as an application of sequence
+ * locking to expedited grace periods, but using the sequence counter to
+ * determine when someone else has already done the work instead of for
+ * retrying readers.
+ */
+void synchronize_sched_expedited(void)
+{
+	struct rcu_state *rsp = &rcu_sched_state;
+
+	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
+			 lock_is_held(&rcu_lock_map) ||
+			 lock_is_held(&rcu_sched_lock_map),
+			 "Illegal synchronize_sched_expedited() in RCU read-side critical section");
+
+	/* If only one CPU, this is automatically a grace period. */
+	if (rcu_blocking_is_gp())
+		return;
+
+	_synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+
 #ifdef CONFIG_PREEMPT_RCU

 /*
@@ -731,11 +803,6 @@ static void sync_rcu_exp_handler(void *info)
 		resched_cpu(rdp->cpu);
 }

-/* PREEMPT=y, so no RCU-sched to clean up after. */
-static void sync_sched_exp_online_cleanup(int cpu)
-{
-}
-
 /**
 * synchronize_rcu_expedited - Brute-force RCU grace period
 *
@@ -753,8 +820,6 @@ static void sync_sched_exp_online_cleanup(int cpu)
 * you are using synchronize_rcu_expedited() in a loop, please restructure
 * your code to batch your updates, and then Use a single synchronize_rcu()
 * instead.
- *
- * This has the same semantics as (but is more brutal than) synchronize_rcu().
 */
 void synchronize_rcu_expedited(void)
 {
@@ -773,79 +838,13 @@ EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);

 #else /* #ifdef CONFIG_PREEMPT_RCU */

-/* Invoked on each online non-idle CPU for expedited quiescent state. */
-static void sync_sched_exp_handler(void *data)
-{
-	struct rcu_data *rdp;
-	struct rcu_node *rnp;
-	struct rcu_state *rsp = data;
-
-	rdp = this_cpu_ptr(rsp->rda);
-	rnp = rdp->mynode;
-	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
-	    __this_cpu_read(rcu_data.cpu_no_qs.b.exp))
-		return;
-	if (rcu_is_cpu_rrupt_from_idle()) {
-		rcu_report_exp_rdp(&rcu_state, this_cpu_ptr(&rcu_data));
-		return;
-	}
-	__this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
-	/* Store .exp before .rcu_urgent_qs. */
-	smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
-	resched_cpu(smp_processor_id());
-}
-
-/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
-static void sync_sched_exp_online_cleanup(int cpu)
-{
-	struct rcu_data *rdp;
-	int ret;
-	struct rcu_node *rnp;
-	struct rcu_state *rsp = &rcu_state;
-
-	rdp = per_cpu_ptr(rsp->rda, cpu);
-	rnp = rdp->mynode;
-	if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
-		return;
-	ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
-	WARN_ON_ONCE(ret);
-}
-
 /*
- * Because a context switch is a grace period for RCU-sched, any blocking
- * grace-period wait automatically implies a grace period if there
- * is only one CPU online at any point time during execution of either
- * synchronize_sched() or synchronize_rcu_bh().  It is OK to occasionally
- * incorrectly indicate that there are multiple CPUs online when there
- * was in fact only one the whole time, as this just adds some overhead:
- * RCU still operates correctly.
+ * Wait for an rcu-preempt grace period, but make it happen quickly.
+ * But because preemptible RCU does not exist, map to rcu-sched.
 */
-static int rcu_blocking_is_gp(void)
-{
-	int ret;
-
-	might_sleep();  /* Check for RCU read-side critical section. */
-	preempt_disable();
-	ret = num_online_cpus() <= 1;
-	preempt_enable();
-	return ret;
-}
-
-/* PREEMPT=n implementation of synchronize_rcu_expedited(). */
 void synchronize_rcu_expedited(void)
 {
-	struct rcu_state *rsp = &rcu_state;
-
-	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
-			 lock_is_held(&rcu_lock_map) ||
-			 lock_is_held(&rcu_sched_lock_map),
-			 "Illegal synchronize_sched_expedited() in RCU read-side critical section");
-
-	/* If only one CPU, this is automatically a grace period. */
-	if (rcu_blocking_is_gp())
-		return;
-
-	_synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
+	synchronize_sched_expedited();
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);

--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -123,6 +123,10 @@ static void __init rcu_bootup_announce_oddness(void)

 #ifdef CONFIG_PREEMPT_RCU

+RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
+static struct rcu_state *const rcu_state_p = &rcu_preempt_state;
+static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
+
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 			       bool wake);
 static void rcu_read_unlock_special(struct task_struct *t);
@@ -302,15 +306,15 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
 *
 * Callers to this function must disable preemption.
 */
-static void rcu_qs(void)
+static void rcu_preempt_qs(void)
 {
-	RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
+	RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n");
 	if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {
 		trace_rcu_grace_period(TPS("rcu_preempt"),
 				       __this_cpu_read(rcu_data_p->gp_seq),
 				       TPS("cpuqs"));
 		__this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false);
-		barrier(); /* Coordinate with rcu_flavor_sched_clock_irq(). */
+		barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
 		current->rcu_read_unlock_special.b.need_qs = false;
 	}
 }
@@ -328,14 +332,12 @@ static void rcu_qs(void)
 *
 * Caller must disable interrupts.
 */
-void rcu_note_context_switch(bool preempt)
+static void rcu_preempt_note_context_switch(bool preempt)
 {
 	struct task_struct *t = current;
 	struct rcu_data *rdp;
 	struct rcu_node *rnp;

-	barrier(); /* Avoid RCU read-side critical sections leaking down. */
-	trace_rcu_utilization(TPS("Start context switch"));
 	lockdep_assert_irqs_disabled();
 	WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
 	if (t->rcu_read_lock_nesting > 0 &&
@@ -383,11 +385,8 @@ void rcu_note_context_switch(bool preempt)
 	 * grace period, then the fact that the task has been enqueued
 	 * means that we continue to block the current grace period.
 	 */
-	rcu_qs();
-	trace_rcu_utilization(TPS("End context switch"));
-	barrier(); /* Avoid RCU read-side critical sections leaking up. */
+	rcu_preempt_qs();
 }
-EXPORT_SYMBOL_GPL(rcu_note_context_switch);

 /*
 * Check for preempted RCU readers blocking the current grace period
@@ -496,7 +495,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 		return;
 	}
 	if (special.b.need_qs) {
-		rcu_qs();
+		rcu_preempt_qs();
 		t->rcu_read_unlock_special.b.need_qs = false;
 		if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) {
 			local_irq_restore(flags);
@@ -601,7 +600,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
 */
 static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
 {
-	return (this_cpu_ptr(&rcu_data)->deferred_qs ||
+	return (this_cpu_ptr(&rcu_preempt_data)->deferred_qs ||
 		READ_ONCE(t->rcu_read_unlock_special.s)) &&
 	       !t->rcu_read_lock_nesting;
 }
@@ -780,21 +779,17 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
 }

 /*
- * Check for a quiescent state from the current CPU, including voluntary
- * context switches for Tasks RCU.  When a task blocks, the task is
- * recorded in the corresponding CPU's rcu_node structure, which is checked
- * elsewhere, hence this function need only check for quiescent states
- * related to the current CPU, not to those related to tasks.
+ * Check for a quiescent state from the current CPU.  When a task blocks,
+ * the task is recorded in the corresponding CPU's rcu_node structure,
+ * which is checked elsewhere.
+ *
+ * Caller must disable hard irqs.
 */
-static void rcu_flavor_sched_clock_irq(int user)
+static void rcu_preempt_check_callbacks(void)
 {
-	struct rcu_state *rsp = &rcu_state;
+	struct rcu_state *rsp = &rcu_preempt_state;
 	struct task_struct *t = current;

-	if (user || rcu_is_cpu_rrupt_from_idle()) {
-		rcu_note_voluntary_context_switch(current);
-	}
-
 	if (t->rcu_read_lock_nesting > 0 ||
 	    (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
 		/* No QS, force context switch if deferred. */
@@ -804,7 +799,7 @@ static void rcu_flavor_sched_clock_irq(int user)
 		rcu_preempt_deferred_qs(t); /* Report deferred QS. */
 		return;
 	} else if (!t->rcu_read_lock_nesting) {
-		rcu_qs(); /* Report immediate QS. */
+		rcu_preempt_qs(); /* Report immediate QS. */
 		return;
 	}

@@ -817,6 +812,44 @@ static void rcu_flavor_sched_clock_irq(int user)
 		t->rcu_read_unlock_special.b.need_qs = true;
 }

+/**
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed.  However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked.  RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
+ *
+ * Note that all CPUs must agree that the grace period extended beyond
+ * all pre-existing RCU read-side critical section.  On systems with more
+ * than one CPU, this means that when "func()" is invoked, each CPU is
+ * guaranteed to have executed a full memory barrier since the end of its
+ * last RCU read-side critical section whose beginning preceded the call
+ * to call_rcu().  It also means that each CPU executing an RCU read-side
+ * critical section that continues beyond the start of "func()" must have
+ * executed a memory barrier after the call_rcu() but before the beginning
+ * of that RCU read-side critical section.  Note that these guarantees
+ * include CPUs that are offline, idle, or executing in user mode, as
+ * well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
+ * resulting RCU callback function "func()", then both CPU A and CPU B are
+ * guaranteed to execute a full memory barrier during the time interval
+ * between the call to call_rcu() and the invocation of "func()" -- even
+ * if CPU A and CPU B are the same CPU (but again only if the system has
+ * more than one CPU).
+ */
+void call_rcu(struct rcu_head *head, rcu_callback_t func)
+{
+	__call_rcu(head, func, rcu_state_p, -1, 0);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
 /**
 * synchronize_rcu - wait until a grace period has elapsed.
 *
@@ -827,28 +860,14 @@ static void rcu_flavor_sched_clock_irq(int user)
 * concurrently with new RCU read-side critical sections that began while
 * synchronize_rcu() was waiting.  RCU read-side critical sections are
 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
- * In addition, regions of code across which interrupts, preemption, or
- * softirqs have been disabled also serve as RCU read-side critical
- * sections.  This includes hardware interrupt handlers, softirq handlers,
- * and NMI handlers.
 *
- * Note that this guarantee implies further memory-ordering guarantees.
- * On systems with more than one CPU, when synchronize_rcu() returns,
- * each CPU is guaranteed to have executed a full memory barrier since the
- * end of its last RCU-sched read-side critical section whose beginning
- * preceded the call to synchronize_rcu().  In addition, each CPU having
- * an RCU read-side critical section that extends beyond the return from
- * synchronize_rcu() is guaranteed to have executed a full memory barrier
- * after the beginning of synchronize_rcu() and before the beginning of
- * that RCU read-side critical section.  Note that these guarantees include
- * CPUs that are offline, idle, or executing in user mode, as well as CPUs
- * that are executing in the kernel.
- *
- * Furthermore, if CPU A invoked synchronize_rcu(), which returned
- * to its caller on CPU B, then both CPU A and CPU B are guaranteed
- * to have executed a full memory barrier during the execution of
- * synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but
- * again only if the system has more than one CPU).
+ * See the description of synchronize_sched() for more detailed
+ * information on memory-ordering guarantees.  However, please note
+ * that -only- the memory-ordering guarantees apply.  For example,
+ * synchronize_rcu() is -not- guaranteed to wait on things like code
+ * protected by preempt_disable(), instead, synchronize_rcu() is -only-
+ * guaranteed to wait on RCU read-side critical sections, that is, sections
+ * of code protected by rcu_read_lock().
 */
 void synchronize_rcu(void)
 {
@@ -865,6 +884,28 @@ void synchronize_rcu(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu);

+/**
+ * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
+ *
+ * Note that this primitive does not necessarily wait for an RCU grace period
+ * to complete.  For example, if there are no RCU callbacks queued anywhere
+ * in the system, then rcu_barrier() is within its rights to return
+ * immediately, without waiting for anything, much less an RCU grace period.
+ */
+void rcu_barrier(void)
+{
+	_rcu_barrier(rcu_state_p);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+/*
+ * Initialize preemptible RCU's state structures.
+ */
+static void __init __rcu_init_preempt(void)
+{
+	rcu_init_one(rcu_state_p);
+}
+
 /*
 * Check for a task exiting while in a preemptible-RCU read-side
 * critical section, clean up if so.  No need to issue warnings,
@@ -928,6 +969,8 @@ dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck)

 #else /* #ifdef CONFIG_PREEMPT_RCU */

+static struct rcu_state *const rcu_state_p = &rcu_sched_state;
+
 /*
 * Tell them what RCU they are running.
 */
@@ -937,48 +980,18 @@ static void __init rcu_bootup_announce(void)
 	rcu_bootup_announce_oddness();
 }

-/*
- * Note a quiescent state for PREEMPT=n.  Because we do not need to know
- * how many quiescent states passed, just if there was at least one since
- * the start of the grace period, this just sets a flag.  The caller must
- * have disabled preemption.
- */
-static void rcu_qs(void)
+/* Because preemptible RCU does not exist, we can ignore its QSes. */
+static void rcu_preempt_qs(void)
 {
-	RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!");
-	if (!__this_cpu_read(rcu_data.cpu_no_qs.s))
-		return;
-	trace_rcu_grace_period(TPS("rcu_sched"),
-			       __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
-	__this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
-	if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
-		return;
-	__this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
-	rcu_report_exp_rdp(&rcu_state, this_cpu_ptr(&rcu_data));
 }

 /*
- * Note a PREEMPT=n context switch.  The caller must have disabled interrupts.
+ * Because preemptible RCU does not exist, we never have to check for
+ * CPUs being in quiescent states.
 */
-void rcu_note_context_switch(bool preempt)
+static void rcu_preempt_note_context_switch(bool preempt)
 {
-	barrier(); /* Avoid RCU read-side critical sections leaking down. */
-	trace_rcu_utilization(TPS("Start context switch"));
-	rcu_qs();
-	/* Load rcu_urgent_qs before other flags. */
-	if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
-		goto out;
-	this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
-	if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
-		rcu_momentary_dyntick_idle();
-	this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
-	if (!preempt)
-		rcu_tasks_qs(current);
-out:
-	trace_rcu_utilization(TPS("End context switch"));
-	barrier(); /* Avoid RCU read-side critical sections leaking up. */
 }
-EXPORT_SYMBOL_GPL(rcu_note_context_switch);

 /*
 * Because preemptible RCU does not exist, there are never any preempted
@@ -1046,44 +1059,29 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
 }

 /*
- * Check to see if this CPU is in a non-context-switch quiescent state,
- * namely user mode and idle loop.
+ * Because preemptible RCU does not exist, it never has any callbacks
+ * to check.
 */
-static void rcu_flavor_sched_clock_irq(int user)
+static void rcu_preempt_check_callbacks(void)
 {
-	if (user || rcu_is_cpu_rrupt_from_idle()) {
-
-		/*
-		 * Get here if this CPU took its interrupt from user
-		 * mode or from the idle loop, and if this is not a
-		 * nested interrupt.  In this case, the CPU is in
-		 * a quiescent state, so note it.
-		 *
-		 * No memory barrier is required here because rcu_qs()
-		 * references only CPU-local variables that other CPUs
-		 * neither access nor modify, at least not while the
-		 * corresponding CPU is online.
-		 */
-
-		rcu_qs();
-	}
 }

-/* PREEMPT=n implementation of synchronize_rcu(). */
-void synchronize_rcu(void)
+/*
+ * Because preemptible RCU does not exist, rcu_barrier() is just
+ * another name for rcu_barrier_sched().
+ */
+void rcu_barrier(void)
+{
+	rcu_barrier_sched();
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+/*
+ * Because preemptible RCU does not exist, it need not be initialized.
+ */
+static void __init __rcu_init_preempt(void)
 {
-	RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
-			 lock_is_held(&rcu_lock_map) ||
-			 lock_is_held(&rcu_sched_lock_map),
-			 "Illegal synchronize_rcu() in RCU-sched read-side critical section");
-	if (rcu_blocking_is_gp())
-		return;
-	if (rcu_gp_is_expedited())
-		synchronize_rcu_expedited();
-	else
-		wait_rcu_gp(call_rcu);
 }
-EXPORT_SYMBOL_GPL(synchronize_rcu);

 /*
 * Because preemptible RCU does not exist, tasks cannot possibly exit
@@ -1326,7 +1324,9 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,

 static void rcu_kthread_do_work(void)
 {
-	rcu_do_batch(&rcu_state, this_cpu_ptr(&rcu_data));
+	rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
+	rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
+	rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
 }

 static void rcu_cpu_kthread_setup(unsigned int cpu)
@@ -1733,6 +1733,87 @@ static void rcu_idle_count_callbacks_posted(void)
 	__this_cpu_add(rcu_dynticks.nonlazy_posted, 1);
 }

+/*
+ * Data for flushing lazy RCU callbacks at OOM time.
+ */
+static atomic_t oom_callback_count;
+static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
+
+/*
+ * RCU OOM callback -- decrement the outstanding count and deliver the
+ * wake-up if we are the last one.
+ */
+static void rcu_oom_callback(struct rcu_head *rhp)
+{
+	if (atomic_dec_and_test(&oom_callback_count))
+		wake_up(&oom_callback_wq);
+}
+
+/*
+ * Post an rcu_oom_notify callback on the current CPU if it has at
+ * least one lazy callback.  This will unnecessarily post callbacks
+ * to CPUs that already have a non-lazy callback at the end of their
+ * callback list, but this is an infrequent operation, so accept some
+ * extra overhead to keep things simple.
+ */
+static void rcu_oom_notify_cpu(void *unused)
+{
+	struct rcu_state *rsp;
+	struct rcu_data *rdp;
+
+	for_each_rcu_flavor(rsp) {
+		rdp = raw_cpu_ptr(rsp->rda);
+		if (rcu_segcblist_n_lazy_cbs(&rdp->cblist)) {
+			atomic_inc(&oom_callback_count);
+			rsp->call(&rdp->oom_head, rcu_oom_callback);
+		}
+	}
+}
+
+/*
+ * If low on memory, ensure that each CPU has a non-lazy callback.
+ * This will wake up CPUs that have only lazy callbacks, in turn
+ * ensuring that they free up the corresponding memory in a timely manner.
+ * Because an uncertain amount of memory will be freed in some uncertain
+ * timeframe, we do not claim to have freed anything.
+ */
+static int rcu_oom_notify(struct notifier_block *self,
+			  unsigned long notused, void *nfreed)
+{
+	int cpu;
+
+	/* Wait for callbacks from earlier instance to complete. */
+	wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
+	smp_mb(); /* Ensure callback reuse happens after callback invocation. */
+
+	/*
+	 * Prevent premature wakeup: ensure that all increments happen
+	 * before there is a chance of the counter reaching zero.
+	 */
+	atomic_set(&oom_callback_count, 1);
+
+	for_each_online_cpu(cpu) {
+		smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
+		cond_resched_tasks_rcu_qs();
+	}
+
+	/* Unconditionally decrement: no need to wake ourselves up. */
+	atomic_dec(&oom_callback_count);
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block rcu_oom_nb = {
+	.notifier_call = rcu_oom_notify
+};
+
+static int __init rcu_register_oom_notifier(void)
+{
+	register_oom_notifier(&rcu_oom_nb);
+	return 0;
+}
+early_initcall(rcu_register_oom_notifier);
+
 #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */

 #ifdef CONFIG_RCU_FAST_NO_HZ
@@ -1861,22 +1942,11 @@ static void increment_cpu_stall_ticks(void)
 */


-/*
- * Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters.
- * The string after the "rcu_nocbs=" is either "all" for all CPUs, or a
- * comma-separated list of CPUs and/or CPU ranges.  If an invalid list is
- * given, a warning is emitted and all CPUs are offloaded.
- */
+/* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */
 static int __init rcu_nocb_setup(char *str)
 {
 	alloc_bootmem_cpumask_var(&rcu_nocb_mask);
-	if (!strcasecmp(str, "all"))
-		cpumask_setall(rcu_nocb_mask);
-	else
-		if (cpulist_parse(str, rcu_nocb_mask)) {
-			pr_warn("rcu_nocbs= bad CPU range, all CPUs set\n");
-			cpumask_setall(rcu_nocb_mask);
-		}
+	cpulist_parse(str, rcu_nocb_mask);
 	return 1;
 }
 __setup("rcu_nocbs=", rcu_nocb_setup);
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -553,16 +553,11 @@ static void test_callback(struct rcu_head *r)
 	pr_info("RCU test callback executed %d\n", rcu_self_test_counter);
 }

-DEFINE_STATIC_SRCU(early_srcu);
-
 static void early_boot_test_call_rcu(void)
 {
 	static struct rcu_head head;
-	static struct rcu_head shead;

 	call_rcu(&head, test_callback);
-	if (IS_ENABLED(CONFIG_SRCU))
-		call_srcu(&early_srcu, &shead, test_callback);
 }

 static void early_boot_test_call_rcu_bh(void)
@@ -600,10 +595,6 @@ static int rcu_verify_early_boot_tests(void)
 	if (rcu_self_test) {
 		early_boot_test_counter++;
 		rcu_barrier();
-		if (IS_ENABLED(CONFIG_SRCU)) {
-			early_boot_test_counter++;
-			srcu_barrier(&early_srcu);
-		}
 	}
 	if (rcu_self_test_bh) {
 		early_boot_test_counter++;
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -423,7 +423,7 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task)
 	 * state, even in the failed case, an explicit smp_mb() must be used.
 	 */
 	smp_mb__before_atomic();
-	if (unlikely(cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)))
+	if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))
 		return;

 	head->count++;
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -307,6 +307,7 @@ restart:
 		pending >>= softirq_bit;
 	}

+	rcu_bh_qs();
 	if (__this_cpu_read(ksoftirqd) == current)
 		rcu_softirq_qs();
 	local_irq_disable();
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -726,7 +726,7 @@ static int alarm_timer_create(struct k_itimer *new_timer)
 static enum alarmtimer_restart alarmtimer_nsleep_wakeup(struct alarm *alarm,
 								ktime_t now)
 {
-	struct task_struct *task = alarm->data;
+	struct task_struct *task = (struct task_struct *)alarm->data;

 	alarm->data = NULL;
 	if (task)
@@ -822,7 +822,7 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
 	struct restart_block *restart = &current->restart_block;
 	struct alarm alarm;
 	ktime_t exp;
-	int ret;
+	int ret = 0;

 	if (!alarmtimer_get_rtcdev())
 		return -EOPNOTSUPP;
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1773,7 +1773,7 @@ void update_process_times(int user_tick)
 	/* Note: this timer irq context must be accounted for as well. */
 	account_process_tick(p, user_tick);
 	run_local_timers();
-	rcu_sched_clock_irq(user_tick);
+	rcu_check_callbacks(user_tick);
 #ifdef CONFIG_IRQ_WORK
 	if (in_irq())
 		irq_work_tick();
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4345,7 +4345,7 @@ tracing_cpumask_write(struct file *filp, const char __user *ubuf,
 	cpumask_var_t tracing_cpumask_new;
 	int err, cpu;

-	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
+	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
 		return -ENOMEM;

 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1982,11 +1982,11 @@ static bool check_new_pcp(struct page *page)
 	return check_new_page(page);
 }
 #else
-static inline bool check_pcp_refill(struct page *page)
+static bool check_pcp_refill(struct page *page)
 {
-	return false;
+	return check_new_page(page);
 }
-static inline bool check_new_pcp(struct page *page)
+static bool check_new_pcp(struct page *page)
 {
 	return false;
 }
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2324,10 +2324,6 @@ void __kmemcg_cache_deactivate(struct kmem_cache *cachep)
 {
 	__kmem_cache_shrink(cachep);
 }
-
-void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
-{
-}
 #endif

 int __kmem_cache_shutdown(struct kmem_cache *cachep)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -172,7 +172,6 @@ int __kmem_cache_shutdown(struct kmem_cache *);
 void __kmem_cache_release(struct kmem_cache *);
 int __kmem_cache_shrink(struct kmem_cache *);
 void __kmemcg_cache_deactivate(struct kmem_cache *s);
-void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s);
 void slab_kmem_cache_release(struct kmem_cache *);

 struct seq_file;
@@ -295,6 +294,8 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order,

 extern void slab_init_memcg_params(struct kmem_cache *);
 extern void memcg_link_cache(struct kmem_cache *s);
+extern void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
+				void (*deact_fn)(struct kmem_cache *));

 #else /* CONFIG_MEMCG_KMEM */

--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -682,7 +682,7 @@ static void kmemcg_deactivate_workfn(struct work_struct *work)
 	put_online_mems();
 	put_online_cpus();

-	/* done, put the ref from kmemcg_cache_deactivate() */
+	/* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */
 	css_put(&s->memcg_params.memcg->css);
 }

@@ -700,7 +700,19 @@ static void kmemcg_deactivate_rcufn(struct rcu_head *head)
 	queue_work(memcg_kmem_cache_wq, &s->memcg_params.deact_work);
 }

-static void kmemcg_cache_deactivate(struct kmem_cache *s)
+/**
+ * slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a
+ *					   sched RCU grace period
+ * @s: target kmem_cache
+ * @deact_fn: deactivation function to call
+ *
+ * Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex
+ * held after a sched RCU grace period.  The slab is guaranteed to stay
+ * alive until @deact_fn is finished.  This is to be used from
+ * __kmemcg_cache_deactivate().
+ */
+void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
+					   void (*deact_fn)(struct kmem_cache *))
 {
 	if (WARN_ON_ONCE(is_root_cache(s)) ||
 	    WARN_ON_ONCE(s->memcg_params.deact_fn))
@@ -715,8 +727,6 @@ static void kmemcg_cache_deactivate(struct kmem_cache *s)
 	if (s->memcg_params.root_cache->memcg_params.dying)
 		goto unlock;

-	__kmemcg_cache_deactivate(s);
-
 	/* pin memcg so that @s doesn't get destroyed in the middle */
 	css_get(&s->memcg_params.memcg->css);

@@ -745,7 +755,7 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
 		if (!c)
 			continue;

-		kmemcg_cache_deactivate(c);
+		__kmemcg_cache_deactivate(c);
 		arr->entries[idx] = NULL;
 	}
 	mutex_unlock(&slab_mutex);
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4087,7 +4087,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
 }

 #ifdef CONFIG_MEMCG
-void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
+static void kmemcg_cache_deact_after_rcu(struct kmem_cache *s)
 {
 	/*
 	 * Called with all the locks held after a sched RCU grace period.
@@ -4113,6 +4113,12 @@ void __kmemcg_cache_deactivate(struct kmem_cache *s)
 	 */
 	slub_set_cpu_partial(s, 0);
 	s->min_partial = 0;
+
+	/*
+	 * s->cpu_partial is checked locklessly (see put_cpu_partial), so
+	 * we have to make sure the change is visible before shrinking.
+	 */
+	slab_deactivate_memcg_cache_rcu_sched(s, kmemcg_cache_deact_after_rcu);
 }
 #endif

--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -177,6 +177,7 @@ config CFG80211_DEBUGFS

 config CFG80211_CRDA_SUPPORT
 	bool "support CRDA" if EXPERT
+	default y
 	help
 	  You should enable this option unless you know for sure you have no
 	  need for it, for example when using internal regdb (above) or the
Author	SHA1	Message	Date
claxten10	910fd8062c	arch: arm64: configs: Disable AEE features Signed-off-by: claxten10 <claxten10@gmail.com>	2025-10-18 10:51:12 +00:00
bengris32	692339219d	arch: arm64: Build connectivity modules inline Change-Id: I08f90939ca3d5c4e0c6c65a60c31c2cda4f9915d Signed-off-by: bengris32 <bengris32@protonmail.ch>	2025-10-18 10:51:12 +00:00
Eric Biggers	2460f2826c	arch: arm64: configs: enable BLAKE2b support Bug: 178411248 [adelva: patched around missing XCBC option on 4.19] Change-Id: Iec497954d29adcf7193da9ca4b27d61eac7615d9 Signed-off-by: Eric Biggers <ebiggers@google.com>	2025-10-18 10:51:12 +00:00
Rachel Tseng	8243f2f10e	[ALPS08338404] Dont use PMKID if auth type is SAE If STA put PMKID in Assoc Req when auth type is SAE, some AP will reject with invalid PMKID, event the PMKID is correct. Therefore, dont use PMKID if auth type is SAE. MTK-Commit-Id: 5f44cb7b7a5067da4bf426c33500abcb7770d729 Change-Id: Ie3c3aea6801a9f1b8ef513a544f48bf3364b835a CR-Id: ALPS08338404 Feature: Wi-Fi Driver CONNAC Signed-off-by: junjiang.yu <ot_junjiang.yu@mediatek.com> Reviewed-on: https://gerrit.mediatek.inc/c/neptune/wlan_driver/gen4m/+/7730698 Test: srv_pf_nep_sanity Coverity-Review-Label: srv_neptune_adm <srv_neptune_adm@mediatek.com> Test: srv_preflight_a001 <srv_preflight_a001@mediatek.com> Reviewed-by: sticky.chen <sticky.chen@mediatek.com> Reviewed-on: https://gerrit.mediatek.inc/c/neptune/wlan_driver/gen4m/+/7981197 Build: srv_preflight_a001 <srv_preflight_a001@mediatek.com> AutoUT-Review-Label: srv_neptune_adm <srv_neptune_adm@mediatek.com> Reviewed-by: rachel.tseng <rachel.tseng@mediatek.com> Commit-Check: srv_check_service <srv_check_service@mediatek.com> Reviewed-by: ben.lai <ben.lai@mediatek.com>	2025-10-18 10:51:12 +00:00
sunyue	f4cc4c6b28	wlan: Fix a NULL pointer issue in wlan host driver In a rare case, PMF wifi router sends sa query request to us before the connection established(aisUpdateBssInfoForJOIN), which will cause kpanic because prStaRecOfAP has not been set. Solution: Directly return from the function rsnSaQueryRequest without updating the MDSU_INFO_T and sending sa query response Change-Id: Ieb643f13dd1203e382881517af6cc7fb8e95c354 Reviewed-on: https://gerrit.mot.com/2060858 SME-Granted: SME Approvals Granted SLTApproved: Slta Waiver Tested-by: Jira Key Reviewed-by: Yue Sun <sunyue5@lenovo.com> Reviewed-by: Bin Liu <liubin7@motorola.com> Submit-Approved: Jira Key	2025-10-18 10:51:12 +00:00
claxten10	5a71cacdac	misc: mtk/connectivity: Build gps driver Signed-off-by: claxten10 <claxten10@gmail.com>	2025-10-18 10:51:12 +00:00
bengris32	c9ca5a6ef5	conninfra: Suppress spammy verbose logging Change-Id: I4dcf1ecea571a48f023a992f8a9799df219b75b8 Signed-off-by: bengris32 <bengris32@protonmail.ch>	2025-10-18 10:51:12 +00:00
bengris32	504a64fa46	connectivity: Disable WLAN boost if !CONFIG_MTK_CPU_CTRL Change-Id: I4bf1df6b600e2a3c3495e1a149a993cf029c57fa Signed-off-by: bengris32 <bengris32@protonmail.ch>	2025-10-18 10:51:12 +00:00
bengris32	be5746d4b9	drivers: connectivity: {connfem,gps}: Build modules into kernel Change-Id: Ib72fa5910b9e43efa266cd0bd0abaabb223a3b1e Signed-off-by: bengris32 <bengris32@protonmail.ch>	2025-10-18 10:51:12 +00:00
bengris32	890a019265	drivers: connectivity: gen4m: Silence more debug logging Change-Id: Ic176c9b20b909b233bf07eb613fb04f842fe2e38 Signed-off-by: bengris32 <bengris32@protonmail.ch>	2025-10-18 10:51:12 +00:00
Vaisakh Murali	d97026317e	drivers: misc/mtk: connectivity-wlan: Queue delayed work on power efficient wq Power efficient workqueues will help reduce the overall power overhead incurred by this driver on certain workqueues. Signed-off-by: Vaisakh Murali <mvaisakh@statixos.com> Signed-off-by: LinkBoi00 <linkdevel@protonmail.com>	2025-10-18 10:51:12 +00:00
Vaisakh Murali	795e8d9cc6	drivers: connectivity: gen4m: Change logging levels * The logs on this drivers horrendously hog the CPU power, affecting performance * Show only errors. Change-Id: I8259933219afb13037606fbb51f09cab505f5bbc Signed-off-by: Vaisakh Murali <mvaisakh@statixos.com>	2025-10-18 10:51:12 +00:00
bengris32	75d42635a7	connectivity: Clean-up Makefile * Clean up Makefile for inline compiling of connectivity modules.	2025-10-18 10:51:12 +00:00
bengris32	83db65b407	drivers: connectivity: bt: Don't define module init/exit if built-in to kernel * The way MediaTek intended the connectivity modules to work when built-in to the kernel is to have conninfra initialise all of the connectivity modules by itself (Wi-Fi, BT, GPS, FM Radio, etc). * This initialisation process would be done when conninfra was fully initialised and ready to communicate with the other drivers. However MediaTek forgot to guard the module_init and module_exit definitions with the macro used to compile the driver for built-in usage. This causes a race condition where the Bluetooth driver is trying to initialise before conninfra is ready, leading to a kernel panic early on due to a null pointer dereference. Change-Id: I77f831b2aed913865b5d77f117fdab9038e956b2 Signed-off-by: bengris32 <bengris32@protonmail.ch>	2025-10-18 10:51:11 +00:00
bengris32	3f033c08a4	drivers: connectivity: gen4m: Fix built-in config detection Change-Id: I5e3eaf3d405cf90af1fb98f7a0281bd7a7dc298d Signed-off-by: bengris32 <bengris32@protonmail.ch>	2025-10-18 10:51:11 +00:00
Erfan Abdi	3ad57d6e74	mediatek: Port connectivity modules for in-kernel building	2025-10-18 10:51:11 +00:00
bengris32	d54ffc163d	connectivity: Fix function prototype warnings Change-Id: Ie9f0bb34161a0fbda3202dce0deb1e94215a38c5 Signed-off-by: bengris32 <bengris32@protonmail.ch>	2025-10-18 10:51:11 +00:00
claxten10	a0a1372b9e	misc: mtk/connectivity: Build BT driver inline Signed-off-by: claxten10 <claxten10@gmail.com>	2025-10-18 10:51:11 +00:00
Vaisakh Murali	85e9ebb458	drivers: connectivity: Add an option to build wlan driver in kernel * This the way mediatek prefers it, so be it Signed-off-by: Vaisakh Murali <mvaisakh@statixos.com> Change-Id: Ie02d6e887a0febad4515162d126abea2014eecf7	2025-10-18 10:51:11 +00:00
bengris32	2d5fcddd7a	gen4m: Add NL80211_WPA_VERSION_3 enumeration Signed-off-by: bengris32 <bengris32@protonmail.ch> Change-Id: I9fe0aa9d6420380b727532ae054d75097bacd07f	2025-10-18 10:51:11 +00:00
Woomymy	09fc0929ae	drivers: connectivity: common: Force-disable WMT debugging Signed-off-by: Woomymy <woomy@woomy.be> Change-Id: Ia4f6b799fc7858e77e05f50c285f6c0151d5c3f5	2025-10-18 10:51:11 +00:00
Woomymy	1ca87c7aac	drivers: connectivity: bt-mt66xx: Disable debugging logs on all variants Change-Id: I296bf4fdac66bc27ffbbe1dd04b3b6d4e4a7ff92 Signed-off-by: Woomymy <woomy@woomy.be>	2025-10-18 10:51:11 +00:00
zainarbani	f815d27834	connectivity: gen4m: Silence logspam - Same behaviour on stock, shut it up. Signed-off-by: zainarbani <zaintsyariev@gmail.com>	2025-10-18 10:51:11 +00:00
claxten10	25ad8858ce	misc: mtk/connectivity: Remove redefinitions Signed-off-by: claxten10 <claxten10@gmail.com>	2025-10-18 10:51:11 +00:00
bengris32	1fa6f4752a	drivers: connectivity: gen4m: Use PM notifier to control WLAN suspend Change-Id: Iaa8df18c147b9dc6c940e90de6d98ee2f1cb7f51 Signed-off-by: bengris32 <bengris32@protonmail.ch>	2025-10-18 10:51:11 +00:00
Erfan Abdi	5d5955b876	drivers: misc/mtk: connectivity: wlan: Fix wifi random disconnections Change-Id: Id00b452996363a14d127f6f720bf0a00a8c167ee Signed-off-by: LinkBoi00 <linkdevel@protonmail.com>	2025-10-18 10:51:11 +00:00
bengris32	042c42db1f	drivers: connectivity: gen4m: Disable WLAN wakelocks Change-Id: Ia30adf5adbb2b1b2de001b28a05cfef6186d25d2 Signed-off-by: bengris32 <bengris32@protonmail.ch>	2025-10-18 10:51:10 +00:00
Erfan Abdi	66c7b39315	connectivity: Import from BSP modules	2025-10-18 10:51:10 +00:00
Georg Veichtlbauer	25e5497273	arch: arm64: configs: Enable memory stats This is actually read by BatteryStats in Android Change-Id: Ie5717ebf33a2cab5a4f6ab1846b291931477dd95	2025-10-18 10:51:09 +00:00
Hemant Kumar	6edf38d15b	arch: arm64: configs: Enable NCM function driver Enables configfs supported NCM function driver Change-Id: Ic23796c5a1388c41d533ca0f4fad04d01fe9e965 Signed-off-by: Hemant Kumar <hemantk@codeaurora.org>	2025-10-18 10:51:09 +00:00
Dan Vacura	8bd1c67f52	ANDROID: defconfig: enable CONFIG_USB_CONFIGFS_F_UVC Enable the UVC function driver to allow USB gadgets to connect as a standard video device to a host.	2025-10-18 10:51:09 +00:00
geeny	e09f6903db	arch: arm64: configs: Enable WireGuard support Change-Id: I143bab359f49ae4f7e1b560e39a68ddf56fc0400	2025-10-18 10:51:09 +00:00
Woomymy	5a9adedd3f	arch: arm64: configs: Disable SLUB debugging completely Change-Id: I7a5977c3fb97a546f3e402bedad1f77ff49ece3e Signed-off-by: Woomymy <woomy@woomy.be>	2025-10-18 10:51:09 +00:00
Woomymy	061fa055f7	staging: mtk_ion: Silence IONMSG logspam Change-Id: I7d932a56a6d1fb2eca6a76ed966566b000ae24b8 Signed-off-by: Woomymy <woomy@woomy.be>	2025-10-18 10:51:09 +00:00
Woomymy	6c6ad89f18	Revert "[ALPS05269737] USB: Enhance RNDIS Performance" Reason for revert: Mediatek "optimized" RNDIS so well that they literally broke NCM This reverts commit `3f2cec825b`. Change-Id: Idf19e3761a9ce31f9a38c357ae758c87afdc0d78 Signed-off-by: Woomymy <woomy@woomy.be>	2025-10-18 10:51:09 +00:00
Woomymy	3b0970a18a	Revert "[ALPS05333045] cert: fix 10466153" This reverts commit `8145844a13`.	2025-10-18 10:51:09 +00:00
Woomymy	0dfb7011c6	Revert "[ALPS05130667] usb: fix flag logic error" This reverts commit `639af33ffc`.	2025-10-18 10:51:09 +00:00
rogercl.yang	d0e6ae6bad	ANDROID: adding __nocfi to cpuidle_enter_state Background: When CPU is going to idle state, it would inform RCU that current CPU is entering idle through rcu_idle_enter(), and RCU will ignore read-side critical sections of this CPU. However, there is CFI check mechanism inside idle flow and calls rcu_read_lock(), so "rcu_read_lock() used illegally while idle" in rcu_read_lock() will be triggered because rcu_idle_enter() was already called before. Beside, the pointer of rcu_dereference() might be invalid due to the RCU read-side critical sections will be ignoring in this going idle CPU, it might cause problems like: access the wrong data/address, kernel exception... Based on above description: We will add __nocfi to cpuidle_enter_state to avoid “rcu_read_lock() used illegally while idle!” and avoid the usage of invalid pointer of rcu_dereference() in this situation. Bug: 169017431 Change-Id: I8bbe25704e18cfde351a8f4277dd4b44b07421f5 Signed-off-by: rogercl.yang <rogercl.yang@mediatek.com> Signed-off-by: Chun-Hung Wu <chun-hung.wu@mediatek.com>	2025-10-18 10:51:09 +00:00
Sami Tolvanen	c1a3730327	ANDROID: arm64: add __va_function With CFI, the compiler replaces function references with pointers to the CFI jump table. This breaks passing these addresses to code running at EL2, where the jump tables are not valid. Add a __va_function macro similarly to the earlier __pa_function to take address of the actual function in inline assembly and use that in kvm_ksym_ref instead. Bug: 163385976 Change-Id: I097b99409995512c00786300e7d18fe42c720a1b (cherry picked from commit 2f4d6c9fd77c88ad0500aad4bf1f64aaf2654c49) Signed-off-by: Sami Tolvanen <samitolvanen@google.com>	2025-10-18 10:51:09 +00:00
bengris32	0a86bf8ec6	mali_valhall: Remove MediaTek memtrack support * We'll be using gs101 memtrack from now on. Change-Id: I2d91e0d57e59549e3f5bf915f428bc9c14136478 Signed-off-by: bengris32 <bengris32@protonmail.ch>	2025-10-18 10:51:09 +00:00
Ankit Goyal	595d1d3195	mali_kbase: platform: Add per-process and global sysfs nodes for GPU mem usage Bug: 191966412 Signed-off-by: Ankit Goyal <layog@google.com> Change-Id: Id47feadaf9da7ef8e22494ab64e6263d7f87213c	2025-10-18 10:51:09 +00:00
Ankit Goyal	0bfff46aa0	mali_kbase: platform: Add per-process and global accounting for dma-buf pages This adds dma_buf_pages alongside total_gpu_pages to track GPU addressable dmabuf pages for each process and for complete device. Bug: 191966412 Signed-off-by: Ankit Goyal <layog@google.com> Change-Id: I29da69e469395d30e784ea9c2ffddcf6fab688fd	2025-10-18 10:51:09 +00:00
Kimberly Brown	865fd36486	kobject: Add support for default attribute groups to kobj_type kobj_type currently uses a list of individual attributes to store default attributes. Attribute groups are more flexible than a list of attributes because groups provide support for attribute visibility. So, add support for default attribute groups to kobj_type. In future patches, the existing uses of kobj_type’s attribute list will be converted to attribute groups. When that is complete, kobj_type’s attribute list, “default_attrs”, will be removed. Signed-off-by: Kimberly Brown <kimbrownkd@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Change-Id: Id6e67b4b7311ee0ced3653220d4d5e86e3f2ede0	2025-10-18 10:51:09 +00:00
Vaisakh Murali	6dc14d7c93	drivers: mtk-perf: Shut up with the spam * fpsgo is a proprietary kernel driver (yes, these exist in mtk), this line keeps spamming the log, masking what I actually want from the logs Signed-off-by: Vaisakh Murali <vaisakhmurali@gmail.com> Signed-off-by: zainarbani <zaintsyariev@gmail.com>	2025-10-18 10:51:08 +00:00
TheMalachite	1a0cefc094	arch: arm64: Remove console args from cmdline	2025-10-18 10:51:08 +00:00
kdrag0n	ab7245b494	arch: arm64: dts: Suppress verbose output during boot This should make the kernel initialization faster as it suppresses any potential serial console output. Signed-off-by: kdrag0n <dragon@khronodragon.com>	2025-10-18 10:51:08 +00:00
Gagan Malvi	230d60f2d9	arch: arm64: dts: Remove cmdline argument for SLUB debugging. Signed-off-by: Gagan Malvi <malvigagan@gmail.com>	2025-10-18 10:51:08 +00:00
Arian	23e70813f2	cpufreq: Ensure the minimal frequency is lower than the maximal frequency * Libperfmgr increases the minimal frequency to 9999999 in order to boost the cpu to the maximal frequency. This usally works because it also increases the max frequency to 9999999 at init. However if we decrease the maximal frequency afterwards, which mi_thermald does, setting the minimal frequency to 9999999 fails because it exceeds the maximal frequency. * We can allow setting a minimal frequency higher than the maximal frequency and setting a lower maximal frequency than the minimal frequency by adjusting the minimal frequency if it exceeds the maximal frequency. Change-Id: I25b7ccde714aac14c8fdb9910857c3bd38c0aa05	2025-10-18 10:51:08 +00:00
Sultan Alsawaf	e2dec40cfa	sched/fair: Compile out NUMA code entirely when NUMA is disabled Scheduler code is very hot and every little optimization counts. Instead of constantly checking sched_numa_balancing when NUMA is disabled, compile it out. Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com> Change-Id: I7334594fbe835f615a199cfe02ee526135abab06	2025-10-18 10:51:08 +00:00
bengris32	3747ca0b54	arch: arm64: configs: Disable kernel AAL support * Not only is AAL broken with our blobs (constantly spamming that CONFIG_MTK_AAL_SUPPORT is disabled, even though it isn't), this will cause the brightness levels to be forcefully remapped to the 0-1024 range. * Since AAL is broken anyway, just disable it. Change-Id: Icbb402c435d7af1512d381a0a136d181f064771a Signed-off-by: bengris32 <bengris32@protonmail.ch>	2025-10-18 10:51:08 +00:00
claxten10	e7f8031a28	misc: mtk/flashlight: Import minimal Xiaomi changes Signed-off-by: claxten10 <claxten10@gmail.com>	2025-10-18 10:51:08 +00:00
huangsh4	45e45c274f	misc: mtk/flashlight: optimize for flash light camera: optimize for flash light Change-Id: Ia5c8614944c1554a1bb1771dcbd2d37bc56cfcbf Signed-off-by: huangsh4 <huangsh4@lenovo.com> Reviewed-on: https://gerrit.mot.com/1924674 Reviewed-by: Darong Huang <huangdra@motorola.com> Reviewed-by: Heng Chen <chenheng3@lenovo.com> Reviewed-by: Shanghui Zhang <zhangsh@motorola.com> Reviewed-by: Shenhuai Huang <huangsh4@motorola.com> Reviewed-by: Zhilong Wang <wangzl30@motorola.com> Reviewed-by: Xu Ji <jixu@motorola.com> Reviewed-by: Long Cheng <chengl1@motorola.com> SME-Granted: SME Approvals Granted SLTApproved: Slta Waiver Tested-by: Jira Key Reviewed-by: Zhuoran Xu <xuzr3@motorola.com> Reviewed-by: Jian Zhang <zhangjo@motorola.com> Reviewed-by: Zhichao Chen <chenzc2@motorola.com> Submit-Approved: Jira Key	2025-10-18 10:51:08 +00:00
huangsh4	622de06aab	misc: mtk/flashlight: enable flashlight feature enable flashlight feature. Change-Id: I59e570d68d49a48a0bf70ab45f4ecd4d74f4636c Signed-off-by: huangsh4 <huangsh4@lenovo.com> Reviewed-on: https://gerrit.mot.com/1906966 SLTApproved: Slta Waiver SME-Granted: SME Approvals Granted Submit-Approved: Jira Key Tested-by: Jira Key Reviewed-by: Jian Zhang <zhangjo@motorola.com> Reviewed-by: Zhuoran Xu <xuzr3@motorola.com> Reviewed-by: Zhilong Wang <wangzl30@motorola.com> Reviewed-by: Shanghui Zhang <zhangsh@motorola.com> Reviewed-by: Qiang Guo <guoq8@motorola.com> Reviewed-by: Long Cheng <chengl1@motorola.com> Reviewed-by: Zhichao Chen <chenzc2@motorola.com>	2025-10-18 10:51:08 +00:00
claxten10	9fc8e19f0a	Revert "misc: mtk/flashlight: Import Xiaomi changes" * Will move to Motorola's newer driver. This reverts commit d83925de53b5da625bef2c41b95265eed31ccaa9.	2025-10-18 10:51:08 +00:00
Onelots	f17c956310	dts/mt6781: remove duplicate of vdec_gcon Signed-off-by: Onelots <onelots@onelots.fr>	2025-10-18 10:51:08 +00:00
Onelots	756766bfb0	dts/mt6781: remove duplicate of venc@17000000 Signed-off-by: Onelots <onelots@onelots.fr> Co-autored-by: Edrick Sinsuan <evcsinsuan@gmail.com>	2025-10-18 10:51:08 +00:00
Onelots	d78e733bea	dts/mt6781: uart: disable all uselesses uart nodes Signed-off-by: Onelots <onelots@onelots.fr>	2025-10-18 10:51:08 +00:00
Sultan Alsawaf	1cd79cdca9	binder: Stub out debug prints by default Binder code is very hot, so checking frequently to see if a debug message should be printed is a waste of cycles. We're not debugging binder, so just stub out the debug prints to compile them out entirely. Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>	2025-10-18 10:51:08 +00:00
kdrag0n	2980f5e379	arm64: debug: disable self-hosted debug by default Signed-off-by: kdrag0n <dragon@khronodragon.com> Signed-off-by: celtare21 <celtare21@gmail.com>	2025-10-18 10:51:07 +00:00
John Dias	592ea90818	binder: set binder_debug_mask=0 to suppress logging Excessive logging -- not present on angler -- is affecting performance, contributing to missed audio deadlines and likely other latency-dependent tasks. Bug: 30375418 Change-Id: I88b9c7fa4540ad46e564f44a0e589b5215e8487d	2025-10-18 10:51:07 +00:00
Pzqqt	d47beea86d	drivers: scsi: Reduce logspam	2025-10-18 10:51:07 +00:00
claxten10	f829f8aad1	arch: arm64: configs: Enable SIA81XX driver * Used in Indonesian models of fleur. Signed-off-by: claxten10 <claxten10@gmail.com>	2025-10-18 10:51:07 +00:00
wulan17	cc00c447d2	arch: arm64: configs: Enable ThinLTO Signed-off-by: wulan17 <wulan17@nusantararom.org>	2025-10-18 10:51:07 +00:00