Files
kernel_google_redbull/mm/oom_kill.c
Wilson Sung 7418288acf Merge android-4.19-stable (4.19.220) into android-msm-pixel-4.19-lts
Merge 4.19.220 into android-4.19-stable
Linux 4.19.220
    ipmi: msghandler: Make symbol 'remove_work_wq' static
    parisc: Mark cr16 CPU clocksource unstable on all SMP machines
  * serial: core: fix transmit-buffer reset and memleak
      drivers/tty/serial/serial_core.c
    serial: pl011: Add ACPI SBSA UART match id
    tty: serial: msm_serial: Deactivate RX DMA for polling support
    x86/64/mm: Map all kernel memory into trampoline_pgd
    usb: typec: tcpm: Wait in SNK_DEBOUNCED until disconnect
  * USB: NO_LPM quirk Lenovo Powered USB-C Travel Hub
      drivers/usb/core/quirks.c
  * xhci: Fix commad ring abort, write all 64 bits to CRCR register.
      drivers/usb/host/xhci-ring.c
    vgacon: Propagate console boot parameters before calling `vc_resize'
    parisc: Fix "make install" on newer debian releases
    parisc: Fix KBUILD_IMAGE for self-extracting kernel
    drm/msm: Do hw_init() before capturing GPU state
    net/smc: Keep smc_close_final rc during active close
    net/rds: correct socket tunable error in rds_tcp_tune()
  * net: annotate data-races on txq->xmit_lock_owner
      include/linux/netdevice.h
      net/core/dev.c
    net: usb: lan78xx: lan78xx_phy_init(): use PHY_POLL instead of "0" if no IRQ is available
    rxrpc: Fix rxrpc_local leak in rxrpc_lookup_peer()
    net/mlx4_en: Fix an use-after-free bug in mlx4_en_try_alloc_resources()
  * siphash: use _unaligned version by default
      include/linux/siphash.h
      lib/siphash.c
    net: mpls: Fix notifications when deleting a device
    net: qlogic: qlcnic: Fix a NULL pointer dereference in qlcnic_83xx_add_rings()
    natsemi: xtensa: fix section mismatch warnings
    i2c: stm32f7: stop dma transfer in case of NACK
    i2c: stm32f7: recover the bus on access timeout
  * fget: check that the fd still exists after getting a ref to it
      fs/file.c
  * fs: add fget_many() and fput_many()
      fs/file.c
      fs/file_table.c
      include/linux/file.h
      include/linux/fs.h
    sata_fsl: fix warning in remove_proc_entry when rmmod sata_fsl
    sata_fsl: fix UAF in sata_fsl_port_stop when rmmod sata_fsl
    ipmi: Move remove_work to dedicated workqueue
  * kprobes: Limit max data_size of the kretprobe instances
      include/linux/kprobes.h
    vrf: Reset IPCB/IP6CB when processing outbound pkts in vrf dev xmit
    perf hist: Fix memory leak of a perf_hpp_fmt
    net: ethernet: dec: tulip: de4x5: fix possible array overflows in type3_infoblock()
    net: tulip: de4x5: fix the problem that the array 'lp->phy[8]' may be out of bound
    ethernet: hisilicon: hns: hns_dsaf_misc: fix a possible array overflow in hns_dsaf_ge_srst_by_port()
    ata: ahci: Add Green Sardine vendor ID as board_ahci_mobile
    scsi: iscsi: Unblock session then wake up error handler
  * thermal: core: Reset previous low and high trip during thermal zone init
      drivers/thermal/thermal_core.c
    btrfs: check-integrity: fix a warning on write caching disabled disk
    s390/setup: avoid using memblock_enforce_memory_limit
    platform/x86: thinkpad_acpi: Fix WWAN device disabled issue after S3 deep
  * net: return correct error code
      net/ipv4/devinet.c
    atlantic: Fix OOB read and write in hw_atl_utils_fw_rpc_wait
    gfs2: Fix length of holes reported at end-of-file
  * of: clk: Make <linux/of_clk.h> self-contained
      include/linux/of_clk.h
    NFSv42: Fix pagecache invalidation after COPY/CLONE
  * shm: extend forced shm destroy to support objects from several IPC nses
      include/linux/ipc_namespace.h
      include/linux/sched/task.h
  * BACKPORT: arm64: vdso32: suppress error message for 'make mrproper'
      arch/arm64/kernel/vdso32/Makefile
    Merge 4.19.219 into android-4.19-stable
Linux 4.19.219
    tty: hvc: replace BUG_ON() with negative return value
    xen/netfront: don't trust the backend response data blindly
    xen/netfront: disentangle tx_skb_freelist
    xen/netfront: don't read data from request on the ring page
    xen/netfront: read response from backend only once
    xen/blkfront: don't trust the backend response data blindly
    xen/blkfront: don't take local copy of a request from the ring page
    xen/blkfront: read response from backend only once
  * xen: sync include/xen/interface/io/ring.h with Xen's newest version
      include/xen/interface/io/ring.h
  * fuse: release pipe buf after last use
      fs/fuse/dev.c
  * NFC: add NCI_UNREG flag to eliminate the race
      include/net/nfc/nci_core.h
  * hugetlbfs: flush TLBs correctly after huge_pmd_unshare
      include/asm-generic/tlb.h
      mm/memory.c
    s390/mm: validate VMA in PGSTE manipulation functions
  * tracing: Check pid filtering when creating events
      kernel/trace/trace_events.c
    vhost/vsock: fix incorrect used length reported to the guest
    net: hns3: fix VF RSS failed problem after PF enable multi-TCs
    net/smc: Don't call clcsock shutdown twice when smc shutdown
    MIPS: use 3-level pgtable for 64KB page size on MIPS_VA_BITS_48
  * tcp_cubic: fix spurious Hystart ACK train detections for not-cwnd-limited flows
      net/ipv4/tcp_cubic.c
    PM: hibernate: use correct mode for swsusp_close()
    net/smc: Ensure the active closing peer first closes clcsock
  * ipv6: fix typos in __ip6_finish_output()
      net/ipv6/ip6_output.c
    drm/vc4: fix error code in vc4_create_object()
    scsi: mpt3sas: Fix kernel panic during drive powercycle test
    ARM: socfpga: Fix crash with CONFIG_FORTIRY_SOURCE
    NFSv42: Don't fail clone() unless the OP_CLONE operation failed
    firmware: arm_scmi: pm: Propagate return value to caller
  * net: ieee802154: handle iftypes as u32
      include/net/nl802154.h
    ASoC: topology: Add missing rwsem around snd_ctl_remove() calls
    ASoC: qdsp6: q6routing: Conditionally reset FrontEnd Mixer
    ARM: dts: BCM5301X: Add interrupt properties to GPIO node
    ARM: dts: BCM5301X: Fix I2C controller interrupt
    netfilter: ipvs: Fix reuse connection if RS weight is 0
    arm64: dts: marvell: armada-37xx: Set pcie_reset_pin to gpio function
    arm64: dts: marvell: armada-37xx: declare PCIe reset pin
    pinctrl: armada-37xx: Correct PWM pins definitions
    pinctrl: armada-37xx: add missing pin: PCIe1 Wakeup
    pinctrl: armada-37xx: Correct mpp definitions
    PCI: aardvark: Fix checking for link up via LTSSM state
    PCI: aardvark: Fix link training
    PCI: aardvark: Fix PCIe Max Payload Size setting
    PCI: aardvark: Configure PCIe resources from 'ranges' DT property
    PCI: aardvark: Update comment about disabling link training
    PCI: aardvark: Move PCIe reset card code to advk_pcie_train_link()
    PCI: aardvark: Fix compilation on s390
    PCI: aardvark: Don't touch PCIe registers if no card connected
    PCI: aardvark: Indicate error in 'val' when config read fails
    PCI: aardvark: Replace custom macros by standard linux/pci_regs.h macros
    PCI: aardvark: Issue PERST via GPIO
    PCI: aardvark: Improve link training
    PCI: aardvark: Train link immediately after enabling training
    PCI: aardvark: Wait for endpoint to be ready before training link
    PCI: aardvark: Fix a leaked reference by adding missing of_node_put()
    proc/vmcore: fix clearing user buffer by properly using clear_user()
    xtensa: use CONFIG_USE_OF instead of CONFIG_OF
  * tracing: Fix pid filtering when triggers are attached
      kernel/trace/trace.h
    xen: detect uninitialized xenbus in xenbus_init
    xen: don't continue xenstore initialization in case of errors
  * fuse: fix page stealing
      fs/fuse/dev.c
    staging: rtl8192e: Fix use after free in _rtl92e_pci_disconnect()
    HID: wacom: Use "Confidence" flag to prevent reporting invalid contacts
    media: cec: copy sequence field for the reply
    ALSA: ctxfi: Fix out-of-range access
  * binder: fix test regression due to sender_euid change
      drivers/android/binder.c
  * usb: hub: Fix locking issues with address0_mutex
      drivers/usb/core/hub.c
  * usb: hub: Fix usb enumeration issue due to address0 race
      drivers/usb/core/hub.c
    usb: dwc2: hcd_queue: Fix use of floating point literal
    USB: serial: option: add Fibocom FM101-GL variants
    USB: serial: option: add Telit LE910S1 0x9200 composition
  * Revert "net: sched: update default qdisc visibility after Tx queue cnt changes"
      include/net/sch_generic.h
      net/core/dev.c
      net/sched/sch_generic.c
      net/sched/sch_mq.c
  * Revert "serial: core: Fix initializing and restoring termios speed"
      drivers/tty/serial/serial_core.c
      include/linux/console.h
    ANDROID: GKI: disable CONFIG_FORTIFY_SOURCE
    Merge 4.19.218 into android-4.19-stable
Linux 4.19.218
    soc/tegra: pmc: Fix imbalanced clock disabling in error code path
    usb: max-3421: Use driver data instead of maintaining a list of bound devices
  * ASoC: DAPM: Cover regression by kctl change notification fix
      sound/soc/soc-dapm.c
  * RDMA/netlink: Add __maybe_unused to static inline in C file
      include/rdma/rdma_netlink.h
    batman-adv: Don't always reallocate the fragmentation skb head
    batman-adv: Reserve needed_*room for fragments
    batman-adv: Consider fragmentation for needed_headroom
    batman-adv: mcast: fix duplicate mcast packets in BLA backbone from LAN
  * perf/core: Avoid put_page() when GUP fails
      kernel/events/core.c
    drm/amdgpu: fix set scaling mode Full/Full aspect/Center not works on vga and dvi connectors
    drm/udl: fix control-message timeout
  * cfg80211: call cfg80211_stop_ap when switch from P2P_GO type
      net/wireless/util.c
    parisc/sticon: fix reverse colors
    btrfs: fix memory ordering between normal and ordered work functions
    udf: Fix crash after seekdir
    x86/hyperv: Fix NULL deref in set_hv_tscchange_cb() if Hyper-V setup fails
  * mm: kmemleak: slob: respect SLAB_NOLEAKTRACE flag
      mm/slab.h
    ipc: WARN if trying to remove ipc object which is absent
    hexagon: export raw I/O routines for modules
  * tun: fix bonding active backup with arp monitoring
      drivers/net/tun.c
    perf/x86/intel/uncore: Fix IIO event constraints for Skylake Server
    perf/x86/intel/uncore: Fix filter_tid mask for CHA events on Skylake Server
    NFC: reorder the logic in nfc_{un,}register_device
    NFC: reorganize the functions in nci_request
    i40e: Fix display error code in dmesg
    i40e: Fix changing previously set num_queue_pairs for PFs
    i40e: Fix NULL ptr dereference on VSI filter sync
    i40e: Fix correct max_pkt_size on VF RX queue
  * net: virtio_net_hdr_to_skb: count transport header in UFO
      include/linux/virtio_net.h
    platform/x86: hp_accel: Fix an error handling path in 'lis3lv02d_probe()'
    mips: lantiq: add support for clk_get_parent()
    mips: bcm63xx: add support for clk_get_parent()
    MIPS: generic/yamon-dt: fix uninitialized variable error
    iavf: Fix for the false positive ASQ/ARQ errors while issuing VF reset
    iavf: check for null in iavf_fix_features
    net: bnx2x: fix variable dereferenced before check
    drm/nouveau: hdmigv100.c: fix corrupted HDMI Vendor InfoFrame
  * sched/core: Mitigate race cpus_share_cache()/update_top_cache_domain()
      kernel/sched/core.c
    mips: BCM63XX: ensure that CPU_SUPPORTS_32BIT_KERNEL is set
    sh: define __BIG_ENDIAN for math-emu
    sh: fix kconfig unmet dependency warning for FRAME_POINTER
  * f2fs: fix up f2fs_lookup tracepoints
      include/trace/events/f2fs.h
    maple: fix wrong return value of maple_bus_init().
    sh: check return code of request_irq
    powerpc/dcr: Use cmplwi instead of 3-argument cmpli
    ALSA: gus: fix null pointer dereference on pointer block
    powerpc/5200: dts: fix memory node unit name
    scsi: target: Fix alua_tg_pt_gps_count tracking
  * scsi: target: Fix ordered tag handling
      include/target/target_core_base.h
    MIPS: sni: Fix the build
  * tty: tty_buffer: Fix the softlockup issue in flush_to_ldisc
      drivers/tty/tty_buffer.c
  * ALSA: ISA: not for M68K
      sound/core/Makefile
      sound/isa/Kconfig
      sound/pci/Kconfig
    usb: host: ohci-tmio: check return value after calling platform_get_resource()
    ARM: dts: omap: fix gpmc,mux-add-data type
  * firmware_loader: fix pre-allocated buf built-in firmware use
      drivers/base/firmware_loader/main.c
    scsi: advansys: Fix kernel pointer leak
    ASoC: nau8824: Add DMI quirk mechanism for active-high jack-detect
    arm64: dts: freescale: fix arm,sp805 compatible string
    usb: typec: tipd: Remove WARN_ON in tps6598x_block_read
    usb: musb: tusb6010: check return value after calling platform_get_resource()
    arm64: dts: hisilicon: fix arm,sp805 compatible string
    scsi: lpfc: Fix list_add() corruption in lpfc_drain_txq()
    arm64: zynqmp: Fix serial compatible string
    arm64: zynqmp: Do not duplicate flash partition label property
    erofs: fix unsafe pagevec reuse of hooked pclusters
    erofs: remove the occupied parameter from z_erofs_pagevec_enqueue()
  * PCI: Add MSI masking quirk for Nvidia ION AHCI
      drivers/pci/quirks.c
  * PCI/MSI: Deal with devices lying about their MSI mask capability
      drivers/pci/msi.c
      include/linux/pci.h
  * PCI/MSI: Destroy sysfs before freeing entries
      drivers/pci/msi.c
    parisc/entry: fix trace test in syscall exit path
  * fortify: Explicitly disable Clang support
      security/Kconfig
  * ext4: fix lazy initialization next schedule time computation in more granular unit
      fs/ext4/super.c
    x86/cpu: Fix migration safety with X86_BUG_NULL_SEL
  * fuse: truncate pagecache on atomic_o_trunc
      fs/fuse/file.c
  * PCI: Add PCI_EXP_DEVCTL_PAYLOAD_* macros
      include/uapi/linux/pci_regs.h
    s390/tape: fix timer initialization in tape_std_assign()
    s390/cio: check the subchannel validity for dev_busid
  * video: backlight: Drop maximum brightness override for brightness zero
      drivers/video/backlight/backlight.c
    backlight: gpio-backlight: Correct initial power state handling
  * mm, oom: do not trigger out_of_memory from the #PF
      mm/oom_kill.c
  * mm, oom: pagefault_out_of_memory: don't force global OOM for dying tasks
      mm/oom_kill.c
    powerpc/bpf: Emit stf barrier instruction sequences for BPF_NOSPEC
    powerpc/security: Add a helper to query stf_barrier type
    powerpc/bpf: Fix BPF_SUB when imm == 0x80000000
    powerpc/bpf: Validate branch ranges
    powerpc/lib: Add helper to check if offset is within conditional branch range
    9p/net: fix missing error check in p9_check_errors
  * f2fs: should use GFP_NOFS for directory inodes
      fs/f2fs/inode.c
      fs/f2fs/namei.c
    ARM: 9156/1: drop cc-option fallbacks for architecture selection
    ARM: 9155/1: fix early early_iounmap()
    USB: chipidea: fix interrupt deadlock
    cxgb4: fix eeprom len when diagnostics not implemented
    vsock: prevent unnecessary refcnt inc for nonblocking connect
  * arm64: pgtable: make __pte_to_phys/__phys_to_pte_val inline functions
      arch/arm64/include/asm/pgtable.h
    nfc: pn533: Fix double free when pn533_fill_fragment_skbs() fails
  * llc: fix out-of-bound array index in llc_sk_dev_hash()
      include/net/llc.h
  * zram: off by one in read_block_state()
      drivers/block/zram/zram_drv.c
  * mm/zsmalloc.c: close race window between zs_pool_dec_isolated() and zs_unregister_migration()
      mm/zsmalloc.c
  * bonding: Fix a use-after-free problem when bond_sysfs_slave_add() failed
      drivers/net/bonding/bond_sysfs_slave.c
    ACPI: PMIC: Fix intel_pmic_regs_handler() read accesses
    net: davinci_emac: Fix interrupt pacing disable
    xen-pciback: Fix return in pm_ctrl_init()
    i2c: xlr: Fix a resource leak in the error handling path of 'xlr_i2c_probe()'
    scsi: qla2xxx: Turn off target reset during issue_lip
    scsi: qla2xxx: Fix gnl list corruption
  * ar7: fix kernel builds for compiler test
      drivers/watchdog/Kconfig
    watchdog: f71808e_wdt: fix inaccurate report in WDIOC_GETTIMEOUT
    m68k: set a default value for MEMORY_RESERVE
  * dmaengine: dmaengine_desc_callback_valid(): Check for `callback_result`
      drivers/dma/dmaengine.h
  * netfilter: nfnetlink_queue: fix OOB when mac header was cleared
      net/netfilter/nfnetlink_queue.c
    auxdisplay: ht16k33: Fix frame buffer device blanking
    auxdisplay: ht16k33: Connect backlight to fbdev
    auxdisplay: img-ascii-lcd: Fix lock-up when displaying empty string
    dmaengine: at_xdmac: fix AT_XDMAC_CC_PERID() macro
    mtd: spi-nor: hisi-sfc: Remove excessive clk_disable_unprepare()
    fs: orangefs: fix error return code of orangefs_revalidate_lookup()
    NFS: Fix deadlocks in nfs_scan_commit_list()
    PCI: aardvark: Don't spam about PIO Response Status
  * drm/plane-helper: fix uninitialized variable reference
      drivers/gpu/drm/drm_plane_helper.c
    pnfs/flexfiles: Fix misplaced barrier in nfs4_ff_layout_prepare_ds
  * rpmsg: Fix rpmsg_create_ept return when RPMSG config is not defined
      include/linux/rpmsg.h
    apparmor: fix error check
    power: supply: bq27xxx: Fix kernel crash on IRQ handler register error
    mips: cm: Convert to bitfield API to fix out-of-bounds access
    serial: xilinx_uartps: Fix race condition causing stuck TX
    phy: qcom-qusb2: Fix a memory leak on probe
    ASoC: cs42l42: Defer probe if request_threaded_irq() returns EPROBE_DEFER
    ASoC: cs42l42: Correct some register default values
    RDMA/mlx4: Return missed an error if device doesn't support steering
    scsi: csiostor: Uninitialized data in csio_ln_vnp_read_cbfn()
    power: supply: rt5033_battery: Change voltage values to µV
    usb: gadget: hid: fix error code in do_config()
    serial: 8250_dw: Drop wrong use of ACPI_PTR()
    video: fbdev: chipsfb: use memset_io() instead of memset()
    memory: fsl_ifc: fix leak of irq and nand_irq in fsl_ifc_ctrl_probe
    soc/tegra: Fix an error handling path in tegra_powergate_power_up()
    arm: dts: omap3-gta04a4: accelerometer irq fix
    ALSA: hda: Reduce udelay() at SKL+ position reporting
    JFS: fix memleak in jfs_mount
    MIPS: loongson64: make CPU_LOONGSON64 depends on MIPS_FP_SUPPORT
    scsi: dc395: Fix error case unwinding
    ARM: dts: at91: tse850: the emac<->phy interface is rmii
    RDMA/bnxt_re: Fix query SRQ failure
    arm64: dts: rockchip: Fix GPU register width for RK3328
    ARM: s3c: irq-s3c24xx: Fix return value check for s3c24xx_init_intc()
    RDMA/rxe: Fix wrong port_cap_flags
    ibmvnic: Process crqs after enabling interrupts
    selftests/bpf: Fix fclose/pclose mismatch in test_progs
    crypto: pcrypt - Delay write to padata->info
    net: phylink: avoid mvneta warning when setting pause parameters
    net: amd-xgbe: Toggle PLL settings during rate change
    wcn36xx: add proper DMA memory barriers in rx path
    libertas: Fix possible memory leak in probe and disconnect
    libertas_tf: Fix possible memory leak in probe and disconnect
    KVM: s390: Fix handle_sske page fault handling
    samples/kretprobes: Fix return value if register_kretprobe() failed
  * tcp: don't free a FIN sk_buff in tcp_remove_empty_skb()
      net/ipv4/tcp.c
    irq: mips: avoid nested irq_enter()
    s390/gmap: don't unconditionally call pte_unmap_unlock() in __gmap_zap()
    smackfs: use netlbl_cfg_cipsov4_del() for deleting cipso_v4_doi
    drm/msm: Fix potential NULL dereference in DPU SSPP
  * clocksource/drivers/timer-ti-dm: Select TIMER_OF
      drivers/clocksource/Kconfig
    PM: hibernate: fix sparse warnings
    nvme-rdma: fix error code in nvme_rdma_setup_ctrl
    phy: micrel: ksz8041nl: do not use power down mode
    mwifiex: Send DELBA requests according to spec
    rsi: stop thread firstly in rsi_91x_init() error handling
    platform/x86: thinkpad_acpi: Fix bitwise vs. logical warning
    mmc: mxs-mmc: disable regulator on error and in the remove function
  * net: stream: don't purge sk_error_queue in sk_stream_kill_queues()
      net/core/stream.c
    drm/msm: uninitialized variable in msm_gem_import()
    ath10k: fix max antenna gain unit
    hwmon: (pmbus/lm25066) Let compiler determine outer dimension of lm25066_coeff
  * hwmon: Fix possible memleak in __hwmon_device_register()
      drivers/hwmon/hwmon.c
    memstick: jmb38x_ms: use appropriate free function in jmb38x_ms_alloc_host()
    memstick: avoid out-of-range warning
    mmc: sdhci-omap: Fix NULL pointer exception if regulator is not configured
    b43: fix a lower bounds test
    b43legacy: fix a lower bounds test
    hwrng: mtk - Force runtime pm ops for sleep ops
    crypto: qat - disregard spurious PFVF interrupts
    crypto: qat - detect PFVF collision after ACK
    media: dvb-frontends: mn88443x: Handle errors of clk_prepare_enable()
    ath9k: Fix potential interrupt storm on queue reset
    media: em28xx: Don't use ops->suspend if it is NULL
  * cpuidle: Fix kobject memory leaks in error paths
      drivers/cpuidle/sysfs.c
    media: cx23885: Fix snd_card_free call on null card pointer
    media: si470x: Avoid card name truncation
    media: mtk-vpu: Fix a resource leak in the error handling path of 'mtk_vpu_probe()'
    media: dvb-usb: fix ununit-value in az6027_rc_query
    media: em28xx: add missing em28xx_close_extension
    drm/amdgpu: fix warning for overflow check
    net: dsa: rtl8366rb: Fix off-by-one bug
  * cgroup: Make rebind_subsystems() disable v2 controllers all at once
      kernel/cgroup/cgroup.c
  * Bluetooth: fix init and cleanup of sco_conn.timeout_work
      net/bluetooth/sco.c
    parisc/kgdb: add kgdb_roundup() to make kgdb work with idle polling
    parisc/unwind: fix unwinder when CONFIG_64BIT is enabled
  * task_stack: Fix end_of_stack() for architectures with upwards-growing stack
      include/linux/sched/task_stack.h
    parisc: fix warning in flush_tlb_all
    x86/hyperv: Protect set_hv_tscchange_cb() against getting preempted
    spi: bcm-qspi: Fix missing clk_disable_unprepare() on error in bcm_qspi_probe()
    ARM: 9136/1: ARMv7-M uses BE-8, not BE-32
  * gre/sit: Don't generate link-local addr if addr_gen_mode is IN6_ADDR_GEN_MODE_NONE
      net/ipv6/addrconf.c
    ARM: clang: Do not rely on lr register for stacktrace
    smackfs: use __GFP_NOFAIL for smk_cipso_doi()
    iwlwifi: mvm: disable RX-diversity in powersave
    PM: hibernate: Get block device exclusively in swsusp_check()
    mwl8k: Fix use-after-free in mwl8k_fw_state_machine()
    tracing/cfi: Fix cmp_entries_* functions signature mismatch
  * workqueue: make sysfs of unbound kworker cpumask more clever
      kernel/workqueue.c
  * lib/xz: Validate the value before assigning it to an enum variable
      lib/xz/xz_dec_stream.c
  * lib/xz: Avoid overlapping memcpy() with invalid input with in-place decompression
      lib/xz/xz_dec_lzma2.c
    memstick: r592: Fix a UAF bug when removing the driver
    leaking_addresses: Always print a trailing newline
    ACPI: battery: Accept charges over the design capacity as full
    ath: dfs_pattern_detector: Fix possible null-pointer dereference in channel_detector_create()
  * tracefs: Have tracefs directories not set OTH permission bits by default
      fs/tracefs/inode.c
    media: usb: dvd-usb: fix uninit-value bug in dibusb_read_eeprom_byte()
    ACPICA: Avoid evaluating methods too early during system resume
    media: rcar-csi2: Add checking to rcsi2_start_receiver()
    ia64: don't do IA64_CMPXCHG_DEBUG without CONFIG_PRINTK
    media: mceusb: return without resubmitting URB in case of -EPROTO error.
    media: s5p-mfc: Add checking to s5p_mfc_probe().
    media: s5p-mfc: fix possible null-pointer dereference in s5p_mfc_probe()
    media: uvcvideo: Return -EIO for control errors
    media: uvcvideo: Set capability in s_param
    media: netup_unidvb: handle interrupt properly according to the firmware
    media: mt9p031: Fix corrupted frame after restarting stream
    mwifiex: Properly initialize private structure on interface type changes
    mwifiex: Run SET_BSS_MODE when changing from P2P to STATION vif-type
    x86: Increase exception stack sizes
    smackfs: Fix use-after-free in netlbl_catmap_walk()
  * net: sched: update default qdisc visibility after Tx queue cnt changes
      include/net/sch_generic.h
      net/core/dev.c
      net/sched/sch_generic.c
      net/sched/sch_mq.c
    locking/lockdep: Avoid RCU-induced noinstr fail
    MIPS: lantiq: dma: reset correct number of channel
    MIPS: lantiq: dma: add small delay after reset
    platform/x86: wmi: do not fail if disabling fails
  * Bluetooth: fix use-after-free error in lock_sock_nested()
      net/bluetooth/l2cap_sock.c
  * Bluetooth: sco: Fix lock_sock() blockage by memcpy_from_msg()
      net/bluetooth/sco.c
  * drm: panel-orientation-quirks: Add quirk for KD Kurio Smart C15200 2-in-1
      drivers/gpu/drm/drm_panel_orientation_quirks.c
    USB: iowarrior: fix control-message timeouts
    USB: serial: keyspan: fix memleak on probe errors
    iio: dac: ad5446: Fix ad5622_write() return value
  * pinctrl: core: fix possible memory leak in pinctrl_enable()
      drivers/pinctrl/core.c
  * quota: correct error number in free_dqentry()
      fs/quota/quota_tree.c
  * quota: check block number when reading the block in quota file
      fs/quota/quota_tree.c
    PCI: aardvark: Read all 16-bits from PCIE_MSI_PAYLOAD_REG
    PCI: aardvark: Fix return value of MSI domain .alloc() method
    PCI: aardvark: Do not unmask unused interrupts
    PCI: aardvark: Do not clear status bits of masked interrupts
    xen/balloon: add late_initcall_sync() for initial ballooning done
    ALSA: mixer: fix deadlock in snd_mixer_oss_set_volume
    ALSA: mixer: oss: Fix racy access to slots
  * serial: core: Fix initializing and restoring termios speed
      drivers/tty/serial/serial_core.c
      include/linux/console.h
    powerpc/85xx: Fix oops when mpc85xx_smp_guts_ids node cannot be found
    power: supply: max17042_battery: use VFSOC for capacity when no rsns
    power: supply: max17042_battery: Prevent int underflow in set_soc_threshold
    signal/mips: Update (_save|_restore)_fp_context to fail with -EFAULT
  * signal: Remove the bogus sigkill_pending in ptrace_stop
      kernel/signal.c
    RDMA/qedr: Fix NULL deref for query_qp on the GSI QP
    rsi: Fix module dev_oper_mode parameter description
    rsi: fix rate mask set leading to P2P failure
    rsi: fix key enabled check causing unwanted encryption for vap_id > 0
    rsi: fix occasional initialisation failure with BT coex
    wcn36xx: handle connection loss indication
    libata: fix checking of DMA state
    mwifiex: Read a PCI register after writing the TX ring write pointer
    wcn36xx: Fix HT40 capability for 2Ghz band
    evm: mark evm_fixmode as __ro_after_init
    rtl8187: fix control-message timeouts
  * PCI: Mark Atheros QCA6174 to avoid bus reset
      drivers/pci/quirks.c
    ath10k: fix division by zero in send path
    ath10k: fix control-message timeout
    ath6kl: fix control-message timeout
    ath6kl: fix division by zero in send path
    mwifiex: fix division by zero in fw download path
    EDAC/sb_edac: Fix top-of-high-memory value for Broadwell/Haswell
    regulator: dt-bindings: samsung,s5m8767: correct s5m8767,pmic-buck-default-dvs-idx property
    regulator: s5m8767: do not use reset value as DVS voltage if GPIO DVS is disabled
    hwmon: (pmbus/lm25066) Add offset coefficients
    ia64: kprobes: Fix to pass correct trampoline address to the handler
    btrfs: call btrfs_check_rw_degradable only if there is a missing device
    btrfs: fix lost error handling when replaying directory deletes
    btrfs: clear MISSING device status bit in btrfs_close_one_device
    vmxnet3: do not stop tx queues after netif_device_detach()
    watchdog: Fix OMAP watchdog early handling
    spi: spl022: fix Microwire full duplex mode
    xen/netfront: stop tx queues during live migration
  * bpf: Prevent increasing bpf_jit_limit above max
      include/linux/filter.h
      kernel/bpf/core.c
      net/core/sysctl_net_core.c
  * drm: panel-orientation-quirks: Add quirk for Aya Neo 2021
      drivers/gpu/drm/drm_panel_orientation_quirks.c
  * mmc: winbond: don't build on M68K
      drivers/mmc/host/Kconfig
    hyperv/vmbus: include linux/bitops.h
    sfc: Don't use netif_info before net_device setup
    cavium: Fix return values of the probe function
    scsi: qla2xxx: Fix unmap of already freed sgl
    cavium: Return negative value when pci_alloc_irq_vectors() fails
    x86/irq: Ensure PI wakeup handler is unregistered before module unload
    x86/sme: Use #define USE_EARLY_PGTABLE_L5 in mem_encrypt_identity.c
  * ALSA: timer: Unconditionally unlink slave instances, too
      sound/core/timer.c
  * ALSA: timer: Fix use-after-free problem
      sound/core/timer.c
    ALSA: synth: missing check for possible NULL after the call to kstrdup
  * ALSA: usb-audio: Add registration quirk for JBL Quantum 400
      sound/usb/quirks.c
    ALSA: line6: fix control and interrupt message timeouts
    ALSA: 6fire: fix control and bulk message timeouts
    ALSA: ua101: fix division by zero at probe
    ALSA: hda/realtek: Add quirk for Clevo PC70HS
    media: ir-kbd-i2c: improve responsiveness of hauppauge zilog receivers
    media: ite-cir: IR receiver stop working after receive overflow
    crypto: s5p-sss - Add error handling in s5p_aes_probe()
    firmware/psci: fix application of sizeof to pointer
    tpm: Check for integer overflow in tpm2_map_response_body()
    parisc: Fix ptrace check on syscall return
    mmc: dw_mmc: Dont wait for DRTO on Write RSP error
    ocfs2: fix data corruption on truncate
  * libata: fix read log timeout value
      include/linux/libata.h
    Input: i8042 - Add quirk for Fujitsu Lifebook T725
    Input: elantench - fix misreporting trackpoint coordinates
  * binder: use cred instead of task for selinux checks
      drivers/android/binder.c
      include/linux/lsm_hooks.h
      include/linux/security.h
      security/security.c
      security/selinux/hooks.c
  * binder: use euid from cred instead of using task
      drivers/android/binder.c
  * xhci: Fix USB 3.1 enumeration issues by increasing roothub power-on-good delay
      drivers/usb/host/xhci-hub.c
  * ANDROID: usb: gadget: f_accessory: Mitgate handling of non-existent USB request
      drivers/usb/gadget/function/f_accessory.c
  * UPSTREAM: binder: use cred instead of task for getsecid
      drivers/android/binder.c
      include/linux/security.h
  * FROMGIT: binder: fix test regression due to sender_euid change
      drivers/android/binder.c
  * BACKPORT: binder: use cred instead of task for selinux checks
      drivers/android/binder.c
      include/linux/lsm_hooks.h
      include/linux/security.h
      security/security.c
      security/selinux/hooks.c
  * UPSTREAM: binder: use euid from cred instead of using task
      drivers/android/binder.c
  * ANDROID: setlocalversion: make KMI_GENERATION optional
      scripts/setlocalversion
    Merge 4.19.217 into android-4.19-stable
Linux 4.19.217
    rsi: fix control-message timeout
    staging: rtl8192u: fix control-message timeouts
    staging: r8712u: fix control-message timeout
    comedi: vmk80xx: fix bulk and interrupt message timeouts
    comedi: vmk80xx: fix bulk-buffer overflow
    comedi: vmk80xx: fix transfer-buffer overflows
    comedi: ni_usb6501: fix NULL-deref in command paths
    comedi: dt9812: fix DMA buffers on stack
    isofs: Fix out of bound access for corrupted isofs image
  * printk/console: Allow to disable console output by using console="" or console=null
      kernel/printk/printk.c
  * usb-storage: Add compatibility quirk flags for iODD 2531/2541
      drivers/usb/storage/unusual_devs.h
    usb: musb: Balance list entry in musb_gadget_queue
  * usb: gadget: Mark USB_FSL_QE broken on 64-bit
      drivers/usb/gadget/udc/Kconfig
  * usb: ehci: handshake CMD_RUN instead of STS_HALT
      drivers/usb/host/ehci-hcd.c
      drivers/usb/host/ehci-platform.c
      drivers/usb/host/ehci.h
    Revert "x86/kvm: fix vcpu-id indexed array sizes"
    Merge 4.19.216 into android-4.19-stable
Linux 4.19.216
  * ARM: 9120/1: Revert "amba: make use of -1 IRQs warn"
      drivers/amba/bus.c
  * arch: pgtable: define MAX_POSSIBLE_PHYSMEM_BITS where needed
      include/asm-generic/pgtable.h
    sfc: Fix reading non-legacy supported link modes
    IB/qib: Protect from buffer overflow in struct qib_user_sdma_pkt fields
    IB/qib: Use struct_size() helper
    media: firewire: firedtv-avc: fix a buffer overflow in avc_ca_pmt()
  * scsi: core: Put LLD module refcnt after SCSI device is released
      drivers/scsi/scsi.c
      drivers/scsi/scsi_sysfs.c
  * UPSTREAM: security: selinux: allow per-file labeling for bpffs
      security/selinux/hooks.c

Bug: 210364486
Change-Id: I6232c6c7fde1bf54c16a32dd632456dc41e01e6e
Signed-off-by: JohnnLee <johnnlee@google.com>
2021-12-29 06:52:25 +00:00

1168 lines
31 KiB
C

/*
* linux/mm/oom_kill.c
*
* Copyright (C) 1998,2000 Rik van Riel
* Thanks go out to Claus Fischer for some serious inspiration and
* for goading me into coding this file...
* Copyright (C) 2010 Google, Inc.
* Rewritten by David Rientjes
*
* The routines in this file are used to kill a process when
* we're seriously out of memory. This gets called from __alloc_pages()
* in mm/page_alloc.c when we really run out of memory.
*
* Since we won't call these routines often (on a well-configured
* machine) this file will double as a 'coding guide' and a signpost
* for newbie kernel hackers. It features several pointers to major
* kernel subsystems and hints as to where to find out what things do.
*/
#include <linux/oom.h>
#include <linux/mm.h>
#include <linux/err.h>
#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/sched/task.h>
#include <linux/swap.h>
#include <linux/timex.h>
#include <linux/jiffies.h>
#include <linux/cpuset.h>
#include <linux/export.h>
#include <linux/notifier.h>
#include <linux/memcontrol.h>
#include <linux/mempolicy.h>
#include <linux/security.h>
#include <linux/ptrace.h>
#include <linux/freezer.h>
#include <linux/ftrace.h>
#include <linux/ratelimit.h>
#include <linux/kthread.h>
#include <linux/init.h>
#include <linux/mmu_notifier.h>
#include <linux/memory_hotplug.h>
#include <linux/show_mem_notifier.h>
#include <asm/tlb.h>
#include "internal.h"
#include "slab.h"
#define CREATE_TRACE_POINTS
#include <trace/events/oom.h>
int sysctl_panic_on_oom;
int sysctl_oom_kill_allocating_task;
int sysctl_oom_dump_tasks = 1;
/*
* Serializes oom killer invocations (out_of_memory()) from all contexts to
* prevent from over eager oom killing (e.g. when the oom killer is invoked
* from different domains).
*
* oom_killer_disable() relies on this lock to stabilize oom_killer_disabled
* and mark_oom_victim
*/
DEFINE_MUTEX(oom_lock);
/* Serializes oom_score_adj and oom_score_adj_min updates */
DEFINE_MUTEX(oom_adj_mutex);
#ifdef CONFIG_NUMA
/**
* has_intersects_mems_allowed() - check task eligiblity for kill
* @start: task struct of which task to consider
* @mask: nodemask passed to page allocator for mempolicy ooms
*
* Task eligibility is determined by whether or not a candidate task, @tsk,
* shares the same mempolicy nodes as current if it is bound by such a policy
* and whether or not it has the same set of allowed cpuset nodes.
*/
static bool has_intersects_mems_allowed(struct task_struct *start,
const nodemask_t *mask)
{
struct task_struct *tsk;
bool ret = false;
rcu_read_lock();
for_each_thread(start, tsk) {
if (mask) {
/*
* If this is a mempolicy constrained oom, tsk's
* cpuset is irrelevant. Only return true if its
* mempolicy intersects current, otherwise it may be
* needlessly killed.
*/
ret = mempolicy_nodemask_intersects(tsk, mask);
} else {
/*
* This is not a mempolicy constrained oom, so only
* check the mems of tsk's cpuset.
*/
ret = cpuset_mems_allowed_intersects(current, tsk);
}
if (ret)
break;
}
rcu_read_unlock();
return ret;
}
#else
static bool has_intersects_mems_allowed(struct task_struct *tsk,
const nodemask_t *mask)
{
return true;
}
#endif /* CONFIG_NUMA */
/*
* The process p may have detached its own ->mm while exiting or through
* use_mm(), but one or more of its subthreads may still have a valid
* pointer. Return p, or any of its subthreads with a valid ->mm, with
* task_lock() held.
*/
struct task_struct *find_lock_task_mm(struct task_struct *p)
{
struct task_struct *t;
rcu_read_lock();
for_each_thread(p, t) {
task_lock(t);
if (likely(t->mm))
goto found;
task_unlock(t);
}
t = NULL;
found:
rcu_read_unlock();
return t;
}
/*
* order == -1 means the oom kill is required by sysrq, otherwise only
* for display purposes.
*/
static inline bool is_sysrq_oom(struct oom_control *oc)
{
return oc->order == -1;
}
static inline bool is_memcg_oom(struct oom_control *oc)
{
return oc->memcg != NULL;
}
/* return true if the task is not adequate as candidate victim task. */
static bool oom_unkillable_task(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask)
{
if (is_global_init(p))
return true;
if (p->flags & PF_KTHREAD)
return true;
/* When mem_cgroup_out_of_memory() and p is not member of the group */
if (memcg && !task_in_mem_cgroup(p, memcg))
return true;
/* p may not have freeable memory in nodemask */
if (!has_intersects_mems_allowed(p, nodemask))
return true;
return false;
}
/*
* Print out unreclaimble slabs info when unreclaimable slabs amount is greater
* than all user memory (LRU pages)
*/
static bool is_dump_unreclaim_slabs(void)
{
unsigned long nr_lru;
nr_lru = global_node_page_state(NR_ACTIVE_ANON) +
global_node_page_state(NR_INACTIVE_ANON) +
global_node_page_state(NR_ACTIVE_FILE) +
global_node_page_state(NR_INACTIVE_FILE) +
global_node_page_state(NR_ISOLATED_ANON) +
global_node_page_state(NR_ISOLATED_FILE) +
global_node_page_state(NR_UNEVICTABLE);
return (global_node_page_state(NR_SLAB_UNRECLAIMABLE) > nr_lru);
}
/**
* oom_badness - heuristic function to determine which candidate task to kill
* @p: task struct of which task we should calculate
* @totalpages: total present RAM allowed for page allocation
* @memcg: task's memory controller, if constrained
* @nodemask: nodemask passed to page allocator for mempolicy ooms
*
* The heuristic for determining which task to kill is made to be as simple and
* predictable as possible. The goal is to return the highest value for the
* task consuming the most memory to avoid subsequent oom failures.
*/
unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg,
const nodemask_t *nodemask, unsigned long totalpages)
{
long points;
long adj;
if (oom_unkillable_task(p, memcg, nodemask))
return 0;
p = find_lock_task_mm(p);
if (!p)
return 0;
/*
* Do not even consider tasks which are explicitly marked oom
* unkillable or have been already oom reaped or the are in
* the middle of vfork
*/
adj = (long)p->signal->oom_score_adj;
if (adj == OOM_SCORE_ADJ_MIN ||
test_bit(MMF_OOM_SKIP, &p->mm->flags) ||
in_vfork(p)) {
task_unlock(p);
return 0;
}
/*
* The baseline for the badness score is the proportion of RAM that each
* task's rss, pagetable and swap space use.
*/
points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) +
mm_pgtables_bytes(p->mm) / PAGE_SIZE;
task_unlock(p);
/* Normalize to oom_score_adj units */
adj *= totalpages / 1000;
points += adj;
/*
* Never return 0 for an eligible task regardless of the root bonus and
* oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here).
*/
return points > 0 ? points : 1;
}
enum oom_constraint {
CONSTRAINT_NONE,
CONSTRAINT_CPUSET,
CONSTRAINT_MEMORY_POLICY,
CONSTRAINT_MEMCG,
};
/*
* Determine the type of allocation constraint.
*/
static enum oom_constraint constrained_alloc(struct oom_control *oc)
{
struct zone *zone;
struct zoneref *z;
enum zone_type high_zoneidx = gfp_zone(oc->gfp_mask);
bool cpuset_limited = false;
int nid;
if (is_memcg_oom(oc)) {
oc->totalpages = mem_cgroup_get_max(oc->memcg) ?: 1;
return CONSTRAINT_MEMCG;
}
/* Default to all available memory */
oc->totalpages = totalram_pages + total_swap_pages;
if (!IS_ENABLED(CONFIG_NUMA))
return CONSTRAINT_NONE;
if (!oc->zonelist)
return CONSTRAINT_NONE;
/*
* Reach here only when __GFP_NOFAIL is used. So, we should avoid
* to kill current.We have to random task kill in this case.
* Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
*/
if (oc->gfp_mask & __GFP_THISNODE)
return CONSTRAINT_NONE;
/*
* This is not a __GFP_THISNODE allocation, so a truncated nodemask in
* the page allocator means a mempolicy is in effect. Cpuset policy
* is enforced in get_page_from_freelist().
*/
if (oc->nodemask &&
!nodes_subset(node_states[N_MEMORY], *oc->nodemask)) {
oc->totalpages = total_swap_pages;
for_each_node_mask(nid, *oc->nodemask)
oc->totalpages += node_spanned_pages(nid);
return CONSTRAINT_MEMORY_POLICY;
}
/* Check this allocation failure is caused by cpuset's wall function */
for_each_zone_zonelist_nodemask(zone, z, oc->zonelist,
high_zoneidx, oc->nodemask)
if (!cpuset_zone_allowed(zone, oc->gfp_mask))
cpuset_limited = true;
if (cpuset_limited) {
oc->totalpages = total_swap_pages;
for_each_node_mask(nid, cpuset_current_mems_allowed)
oc->totalpages += node_spanned_pages(nid);
return CONSTRAINT_CPUSET;
}
return CONSTRAINT_NONE;
}
static int oom_evaluate_task(struct task_struct *task, void *arg)
{
struct oom_control *oc = arg;
unsigned long points;
if (oom_unkillable_task(task, NULL, oc->nodemask))
goto next;
/*
* This task already has access to memory reserves and is being killed.
* Don't allow any other task to have access to the reserves unless
* the task has MMF_OOM_SKIP because chances that it would release
* any memory is quite low.
*/
if (!is_sysrq_oom(oc) && tsk_is_oom_victim(task)) {
if (test_bit(MMF_OOM_SKIP, &task->signal->oom_mm->flags))
goto next;
goto abort;
}
/*
* If task is allocating a lot of memory and has been marked to be
* killed first if it triggers an oom, then select it.
*/
if (oom_task_origin(task)) {
points = ULONG_MAX;
goto select;
}
points = oom_badness(task, NULL, oc->nodemask, oc->totalpages);
if (!points || points < oc->chosen_points)
goto next;
/* Prefer thread group leaders for display purposes */
if (points == oc->chosen_points && thread_group_leader(oc->chosen))
goto next;
select:
if (oc->chosen)
put_task_struct(oc->chosen);
get_task_struct(task);
oc->chosen = task;
oc->chosen_points = points;
next:
return 0;
abort:
if (oc->chosen)
put_task_struct(oc->chosen);
oc->chosen = (void *)-1UL;
return 1;
}
/*
* Simple selection loop. We choose the process with the highest number of
* 'points'. In case scan was aborted, oc->chosen is set to -1.
*/
static void select_bad_process(struct oom_control *oc)
{
if (is_memcg_oom(oc))
mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
else {
struct task_struct *p;
rcu_read_lock();
for_each_process(p)
if (oom_evaluate_task(p, oc))
break;
rcu_read_unlock();
}
oc->chosen_points = oc->chosen_points * 1000 / oc->totalpages;
}
/**
* dump_tasks - dump current memory state of all system tasks
* @memcg: current's memory controller, if constrained
* @nodemask: nodemask passed to page allocator for mempolicy ooms
*
* Dumps the current memory state of all eligible tasks. Tasks not in the same
* memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes
* are not shown.
* State information includes task's pid, uid, tgid, vm size, rss,
* pgtables_bytes, swapents, oom_score_adj value, and name.
*/
void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
{
struct task_struct *p;
struct task_struct *task;
pr_info("Tasks state (memory values in pages):\n");
pr_info("[ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name\n");
rcu_read_lock();
for_each_process(p) {
if (oom_unkillable_task(p, memcg, nodemask))
continue;
task = find_lock_task_mm(p);
if (!task) {
/*
* This is a kthread or all of p's threads have already
* detached their mm's. There's no need to report
* them; they can't be oom killed anyway.
*/
continue;
}
pr_info("[%7d] %5d %5d %8lu %8lu %8ld %8lu %5hd %s\n",
task->pid, from_kuid(&init_user_ns, task_uid(task)),
task->tgid, task->mm->total_vm, get_mm_rss(task->mm),
mm_pgtables_bytes(task->mm),
get_mm_counter(task->mm, MM_SWAPENTS),
task->signal->oom_score_adj, task->comm);
task_unlock(task);
}
rcu_read_unlock();
}
static void dump_header(struct oom_control *oc, struct task_struct *p)
{
pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), nodemask=%*pbl, order=%d, oom_score_adj=%hd\n",
current->comm, oc->gfp_mask, &oc->gfp_mask,
nodemask_pr_args(oc->nodemask), oc->order,
current->signal->oom_score_adj);
if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
pr_warn("COMPACTION is disabled!!!\n");
cpuset_print_current_mems_allowed();
dump_stack();
if (is_memcg_oom(oc))
mem_cgroup_print_oom_info(oc->memcg, p);
else {
show_mem(SHOW_MEM_FILTER_NODES, oc->nodemask);
if (is_dump_unreclaim_slabs())
dump_unreclaimable_slab();
show_mem_call_notifiers();
}
if (sysctl_oom_dump_tasks)
dump_tasks(oc->memcg, oc->nodemask);
}
/*
* Number of OOM victims in flight
*/
static atomic_t oom_victims = ATOMIC_INIT(0);
static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait);
static bool oom_killer_disabled __read_mostly;
#define K(x) ((x) << (PAGE_SHIFT-10))
/*
* task->mm can be NULL if the task is the exited group leader. So to
* determine whether the task is using a particular mm, we examine all the
* task's threads: if one of those is using this mm then this task was also
* using it.
*/
bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
{
struct task_struct *t;
for_each_thread(p, t) {
struct mm_struct *t_mm = READ_ONCE(t->mm);
if (t_mm)
return t_mm == mm;
}
return false;
}
#ifdef CONFIG_MMU
/*
* OOM Reaper kernel thread which tries to reap the memory used by the OOM
* victim (if that is possible) to help the OOM killer to move on.
*/
static struct task_struct *oom_reaper_th;
static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
static struct task_struct *oom_reaper_list;
static DEFINE_SPINLOCK(oom_reaper_lock);
bool __oom_reap_task_mm(struct mm_struct *mm)
{
struct vm_area_struct *vma;
bool ret = true;
/*
* Tell all users of get_user/copy_from_user etc... that the content
* is no longer stable. No barriers really needed because unmapping
* should imply barriers already and the reader would hit a page fault
* if it stumbled over a reaped memory.
*/
set_bit(MMF_UNSTABLE, &mm->flags);
for (vma = mm->mmap ; vma; vma = vma->vm_next) {
if (!can_madv_dontneed_vma(vma))
continue;
/*
* Only anonymous pages have a good chance to be dropped
* without additional steps which we cannot afford as we
* are OOM already.
*
* We do not even care about fs backed pages because all
* which are reclaimable have already been reclaimed and
* we do not want to block exit_mmap by keeping mm ref
* count elevated without a good reason.
*/
if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
const unsigned long start = vma->vm_start;
const unsigned long end = vma->vm_end;
struct mmu_gather tlb;
tlb_gather_mmu(&tlb, mm, start, end);
if (mmu_notifier_invalidate_range_start_nonblock(mm, start, end)) {
tlb_finish_mmu(&tlb, start, end);
ret = false;
continue;
}
unmap_page_range(&tlb, vma, start, end, NULL);
mmu_notifier_invalidate_range_end(mm, start, end);
tlb_finish_mmu(&tlb, start, end);
}
}
return ret;
}
/*
* Reaps the address space of the give task.
*
* Returns true on success and false if none or part of the address space
* has been reclaimed and the caller should retry later.
*/
static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
{
bool ret = true;
if (!down_read_trylock(&mm->mmap_sem)) {
trace_skip_task_reaping(tsk->pid);
return false;
}
/*
* MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't
* work on the mm anymore. The check for MMF_OOM_SKIP must run
* under mmap_sem for reading because it serializes against the
* down_write();up_write() cycle in exit_mmap().
*/
if (test_bit(MMF_OOM_SKIP, &mm->flags)) {
trace_skip_task_reaping(tsk->pid);
goto out_unlock;
}
trace_start_task_reaping(tsk->pid);
/* failed to reap part of the address space. Try again later */
ret = __oom_reap_task_mm(mm);
if (!ret)
goto out_finish;
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(tsk), tsk->comm,
K(get_mm_counter(mm, MM_ANONPAGES)),
K(get_mm_counter(mm, MM_FILEPAGES)),
K(get_mm_counter(mm, MM_SHMEMPAGES)));
out_finish:
trace_finish_task_reaping(tsk->pid);
out_unlock:
up_read(&mm->mmap_sem);
return ret;
}
#define MAX_OOM_REAP_RETRIES 10
static void oom_reap_task(struct task_struct *tsk)
{
int attempts = 0;
struct mm_struct *mm = tsk->signal->oom_mm;
/* Retry the down_read_trylock(mmap_sem) a few times */
while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
schedule_timeout_idle(HZ/10);
if (attempts <= MAX_OOM_REAP_RETRIES ||
test_bit(MMF_OOM_SKIP, &mm->flags))
goto done;
pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
task_pid_nr(tsk), tsk->comm);
debug_show_all_locks();
done:
tsk->oom_reaper_list = NULL;
/*
* Hide this mm from OOM killer because it has been either reaped or
* somebody can't call up_write(mmap_sem).
*/
set_bit(MMF_OOM_SKIP, &mm->flags);
/* Drop a reference taken by wake_oom_reaper */
put_task_struct(tsk);
}
static int oom_reaper(void *unused)
{
while (true) {
struct task_struct *tsk = NULL;
wait_event_freezable(oom_reaper_wait, oom_reaper_list != NULL);
spin_lock(&oom_reaper_lock);
if (oom_reaper_list != NULL) {
tsk = oom_reaper_list;
oom_reaper_list = tsk->oom_reaper_list;
}
spin_unlock(&oom_reaper_lock);
if (tsk)
oom_reap_task(tsk);
}
return 0;
}
static void wake_oom_reaper(struct task_struct *tsk)
{
/* mm is already queued? */
if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags))
return;
get_task_struct(tsk);
spin_lock(&oom_reaper_lock);
tsk->oom_reaper_list = oom_reaper_list;
oom_reaper_list = tsk;
spin_unlock(&oom_reaper_lock);
trace_wake_reaper(tsk->pid);
wake_up(&oom_reaper_wait);
}
static int __init oom_init(void)
{
oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
return 0;
}
subsys_initcall(oom_init)
#else
static inline void wake_oom_reaper(struct task_struct *tsk)
{
}
#endif /* CONFIG_MMU */
/**
* mark_oom_victim - mark the given task as OOM victim
* @tsk: task to mark
*
* Has to be called with oom_lock held and never after
* oom has been disabled already.
*
* tsk->mm has to be non NULL and caller has to guarantee it is stable (either
* under task_lock or operate on the current).
*/
static void mark_oom_victim(struct task_struct *tsk)
{
struct mm_struct *mm = tsk->mm;
WARN_ON(oom_killer_disabled);
/* OOM killer might race with memcg OOM */
if (test_and_set_tsk_thread_flag(tsk, TIF_MEMDIE))
return;
/* oom_mm is bound to the signal struct life time. */
if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) {
mmgrab(tsk->signal->oom_mm);
set_bit(MMF_OOM_VICTIM, &mm->flags);
}
/*
* Make sure that the task is woken up from uninterruptible sleep
* if it is frozen because OOM killer wouldn't be able to free
* any memory and livelock. freezing_slow_path will tell the freezer
* that TIF_MEMDIE tasks should be ignored.
*/
__thaw_task(tsk);
atomic_inc(&oom_victims);
trace_mark_victim(tsk->pid);
}
/**
* exit_oom_victim - note the exit of an OOM victim
*/
void exit_oom_victim(void)
{
clear_thread_flag(TIF_MEMDIE);
if (!atomic_dec_return(&oom_victims))
wake_up_all(&oom_victims_wait);
}
/**
* oom_killer_enable - enable OOM killer
*/
void oom_killer_enable(void)
{
oom_killer_disabled = false;
pr_info("OOM killer enabled.\n");
}
/**
* oom_killer_disable - disable OOM killer
* @timeout: maximum timeout to wait for oom victims in jiffies
*
* Forces all page allocations to fail rather than trigger OOM killer.
* Will block and wait until all OOM victims are killed or the given
* timeout expires.
*
* The function cannot be called when there are runnable user tasks because
* the userspace would see unexpected allocation failures as a result. Any
* new usage of this function should be consulted with MM people.
*
* Returns true if successful and false if the OOM killer cannot be
* disabled.
*/
bool oom_killer_disable(signed long timeout)
{
signed long ret;
/*
* Make sure to not race with an ongoing OOM killer. Check that the
* current is not killed (possibly due to sharing the victim's memory).
*/
if (mutex_lock_killable(&oom_lock))
return false;
oom_killer_disabled = true;
mutex_unlock(&oom_lock);
ret = wait_event_interruptible_timeout(oom_victims_wait,
!atomic_read(&oom_victims), timeout);
if (ret <= 0) {
oom_killer_enable();
return false;
}
pr_info("OOM killer disabled.\n");
return true;
}
static inline bool __task_will_free_mem(struct task_struct *task)
{
struct signal_struct *sig = task->signal;
/*
* A coredumping process may sleep for an extended period in exit_mm(),
* so the oom killer cannot assume that the process will promptly exit
* and release memory.
*/
if (sig->flags & SIGNAL_GROUP_COREDUMP)
return false;
if (sig->flags & SIGNAL_GROUP_EXIT)
return true;
if (thread_group_empty(task) && (task->flags & PF_EXITING))
return true;
return false;
}
/*
* Checks whether the given task is dying or exiting and likely to
* release its address space. This means that all threads and processes
* sharing the same mm have to be killed or exiting.
* Caller has to make sure that task->mm is stable (hold task_lock or
* it operates on the current).
*/
static bool task_will_free_mem(struct task_struct *task)
{
struct mm_struct *mm = task->mm;
struct task_struct *p;
bool ret = true;
/*
* Skip tasks without mm because it might have passed its exit_mm and
* exit_oom_victim. oom_reaper could have rescued that but do not rely
* on that for now. We can consider find_lock_task_mm in future.
*/
if (!mm)
return false;
if (!__task_will_free_mem(task))
return false;
/*
* This task has already been drained by the oom reaper so there are
* only small chances it will free some more
*/
if (test_bit(MMF_OOM_SKIP, &mm->flags))
return false;
if (atomic_read(&mm->mm_users) <= 1)
return true;
/*
* Make sure that all tasks which share the mm with the given tasks
* are dying as well to make sure that a) nobody pins its mm and
* b) the task is also reapable by the oom reaper.
*/
rcu_read_lock();
for_each_process(p) {
if (!process_shares_mm(p, mm))
continue;
if (same_thread_group(task, p))
continue;
ret = __task_will_free_mem(p);
if (!ret)
break;
}
rcu_read_unlock();
return ret;
}
static void __oom_kill_process(struct task_struct *victim)
{
struct task_struct *p;
struct mm_struct *mm;
bool can_oom_reap = true;
p = find_lock_task_mm(victim);
if (!p) {
put_task_struct(victim);
return;
} else if (victim != p) {
get_task_struct(p);
put_task_struct(victim);
victim = p;
}
/* Get a reference to safely compare mm after task_unlock(victim) */
mm = victim->mm;
mmgrab(mm);
/* Raise event before sending signal: task reaper must see this */
count_vm_event(OOM_KILL);
memcg_memory_event_mm(mm, MEMCG_OOM_KILL);
/*
* We should send SIGKILL before granting access to memory reserves
* in order to prevent the OOM victim from depleting the memory
* reserves from the user space under its control.
*/
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, PIDTYPE_TGID);
mark_oom_victim(victim);
pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
K(get_mm_counter(victim->mm, MM_ANONPAGES)),
K(get_mm_counter(victim->mm, MM_FILEPAGES)),
K(get_mm_counter(victim->mm, MM_SHMEMPAGES)));
task_unlock(victim);
/*
* Kill all user processes sharing victim->mm in other thread groups, if
* any. They don't get access to memory reserves, though, to avoid
* depletion of all memory. This prevents mm->mmap_sem livelock when an
* oom killed thread cannot exit because it requires the semaphore and
* its contended by another thread trying to allocate memory itself.
* That thread will now get access to memory reserves since it has a
* pending fatal signal.
*/
rcu_read_lock();
for_each_process(p) {
if (!process_shares_mm(p, mm))
continue;
if (same_thread_group(p, victim))
continue;
if (is_global_init(p)) {
can_oom_reap = false;
set_bit(MMF_OOM_SKIP, &mm->flags);
pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n",
task_pid_nr(victim), victim->comm,
task_pid_nr(p), p->comm);
continue;
}
/*
* No use_mm() user needs to read from the userspace so we are
* ok to reap it.
*/
if (unlikely(p->flags & PF_KTHREAD))
continue;
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, PIDTYPE_TGID);
}
rcu_read_unlock();
if (can_oom_reap)
wake_oom_reaper(victim);
mmdrop(mm);
put_task_struct(victim);
}
#undef K
/*
* Kill provided task unless it's secured by setting
* oom_score_adj to OOM_SCORE_ADJ_MIN.
*/
static int oom_kill_memcg_member(struct task_struct *task, void *unused)
{
if (task->signal->oom_score_adj != OOM_SCORE_ADJ_MIN &&
!is_global_init(task)) {
get_task_struct(task);
__oom_kill_process(task);
}
return 0;
}
static void oom_kill_process(struct oom_control *oc, const char *message)
{
struct task_struct *p = oc->chosen;
unsigned int points = oc->chosen_points;
struct task_struct *victim = p;
struct task_struct *child;
struct task_struct *t;
struct mem_cgroup *oom_group;
unsigned int victim_points = 0;
static DEFINE_RATELIMIT_STATE(oom_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
/*
* If the task is already exiting, don't alarm the sysadmin or kill
* its children or threads, just give it access to memory reserves
* so it can die quickly
*/
task_lock(p);
if (task_will_free_mem(p)) {
mark_oom_victim(p);
wake_oom_reaper(p);
task_unlock(p);
put_task_struct(p);
return;
}
task_unlock(p);
if (__ratelimit(&oom_rs))
dump_header(oc, p);
pr_err("%s: Kill process %d (%s) score %u or sacrifice child\n",
message, task_pid_nr(p), p->comm, points);
/*
* If any of p's children has a different mm and is eligible for kill,
* the one with the highest oom_badness() score is sacrificed for its
* parent. This attempts to lose the minimal amount of work done while
* still freeing memory.
*/
read_lock(&tasklist_lock);
/*
* The task 'p' might have already exited before reaching here. The
* put_task_struct() will free task_struct 'p' while the loop still try
* to access the field of 'p', so, get an extra reference.
*/
get_task_struct(p);
for_each_thread(p, t) {
list_for_each_entry(child, &t->children, sibling) {
unsigned int child_points;
if (process_shares_mm(child, p->mm))
continue;
/*
* oom_badness() returns 0 if the thread is unkillable
*/
child_points = oom_badness(child,
oc->memcg, oc->nodemask, oc->totalpages);
if (child_points > victim_points) {
put_task_struct(victim);
victim = child;
victim_points = child_points;
get_task_struct(victim);
}
}
}
put_task_struct(p);
read_unlock(&tasklist_lock);
/*
* Do we need to kill the entire memory cgroup?
* Or even one of the ancestor memory cgroups?
* Check this out before killing the victim task.
*/
oom_group = mem_cgroup_get_oom_group(victim, oc->memcg);
__oom_kill_process(victim);
/*
* If necessary, kill all tasks in the selected memory cgroup.
*/
if (oom_group) {
mem_cgroup_print_oom_group(oom_group);
mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member, NULL);
mem_cgroup_put(oom_group);
}
}
/*
* Determines whether the kernel must panic because of the panic_on_oom sysctl.
*/
static void check_panic_on_oom(struct oom_control *oc,
enum oom_constraint constraint)
{
if (likely(!sysctl_panic_on_oom))
return;
if (sysctl_panic_on_oom != 2) {
/*
* panic_on_oom == 1 only affects CONSTRAINT_NONE, the kernel
* does not panic for cpuset, mempolicy, or memcg allocation
* failures.
*/
if (constraint != CONSTRAINT_NONE)
return;
}
/* Do not panic for oom kills triggered by sysrq */
if (is_sysrq_oom(oc))
return;
dump_header(oc, NULL);
panic("Out of memory: %s panic_on_oom is enabled\n",
sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide");
}
static BLOCKING_NOTIFIER_HEAD(oom_notify_list);
int register_oom_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&oom_notify_list, nb);
}
EXPORT_SYMBOL_GPL(register_oom_notifier);
int unregister_oom_notifier(struct notifier_block *nb)
{
return blocking_notifier_chain_unregister(&oom_notify_list, nb);
}
EXPORT_SYMBOL_GPL(unregister_oom_notifier);
/**
* out_of_memory - kill the "best" process when we run out of memory
* @oc: pointer to struct oom_control
*
* If we run out of memory, we have the choice between either
* killing a random task (bad), letting the system crash (worse)
* OR try to be smart about which process to kill. Note that we
* don't have to be perfect here, we just have to be good.
*/
bool out_of_memory(struct oom_control *oc)
{
unsigned long freed = 0;
enum oom_constraint constraint = CONSTRAINT_NONE;
if (oom_killer_disabled)
return false;
if (try_online_one_block(numa_node_id())) {
/* Got some memory back */
WARN(1, "OOM killer had to online a memory block\n");
return true;
}
if (!is_memcg_oom(oc)) {
blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
if (freed > 0)
/* Got some memory back in the last second. */
return true;
}
/*
* If current has a pending SIGKILL or is exiting, then automatically
* select it. The goal is to allow it to allocate so that it may
* quickly exit and free its memory.
*/
if (task_will_free_mem(current)) {
mark_oom_victim(current);
wake_oom_reaper(current);
return true;
}
/*
* The OOM killer does not compensate for IO-less reclaim.
* pagefault_out_of_memory lost its gfp context so we have to
* make sure exclude 0 mask - all other users should have at least
* ___GFP_DIRECT_RECLAIM to get here. But mem_cgroup_oom() has to
* invoke the OOM killer even if it is a GFP_NOFS allocation.
*/
if (oc->gfp_mask && !(oc->gfp_mask & __GFP_FS) && !is_memcg_oom(oc))
return true;
/*
* Check if there were limitations on the allocation (only relevant for
* NUMA and memcg) that may require different handling.
*/
constraint = constrained_alloc(oc);
if (constraint != CONSTRAINT_MEMORY_POLICY)
oc->nodemask = NULL;
check_panic_on_oom(oc, constraint);
if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) &&
current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
get_task_struct(current);
oc->chosen = current;
oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
return true;
}
select_bad_process(oc);
/* Found nothing?!?! */
if (!oc->chosen) {
dump_header(oc, NULL);
pr_warn("Out of memory and no killable processes...\n");
/*
* If we got here due to an actual allocation at the
* system level, we cannot survive this and will enter
* an endless loop in the allocator. Bail out now.
*/
if (!is_sysrq_oom(oc) && !is_memcg_oom(oc))
panic("System is deadlocked on memory\n");
}
if (oc->chosen && oc->chosen != (void *)-1UL)
oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
"Memory cgroup out of memory");
return !!oc->chosen;
}
/*
* The pagefault handler calls here because some allocation has failed. We have
* to take care of the memcg OOM here because this is the only safe context without
* any locks held but let the oom killer triggered from the allocation context care
* about the global OOM.
*/
void pagefault_out_of_memory(void)
{
static DEFINE_RATELIMIT_STATE(pfoom_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
if (mem_cgroup_oom_synchronize(true))
return;
if (fatal_signal_pending(current))
return;
if (__ratelimit(&pfoom_rs))
pr_warn("Huh VM_FAULT_OOM leaked out to the #PF handler. Retrying PF\n");
}