Files
Thierry Strudel 75c8bc7183 Merged linux-4.4.80 into android-msm-wahoo-4.4
Linux 4.4.80
    ASoC: dpcm: Avoid putting stream state to STOP when FE stream is paused
    scsi: snic: Return error code on memory allocation failure
    scsi: fnic: Avoid sending reset to firmware when another reset is in progress
    HID: ignore Petzl USB headlamp
    ALSA: usb-audio: test EP_FLAG_RUNNING at urb completion
    sh_eth: enable RX descriptor word 0 shift on SH7734
    nvmem: imx-ocotp: Fix wrong register size
    arm64: mm: fix show_pte KERN_CONT fallout
    vfio-pci: Handle error from pci_iomap
    video: fbdev: cobalt_lcdfb: Handle return NULL error from devm_ioremap
    perf symbols: Robustify reading of build-id from sysfs
    perf tools: Install tools/lib/traceevent plugins with install-bin
    xfrm: Don't use sk_family for socket policy lookups
    tools lib traceevent: Fix prev/next_prio for deadline tasks
    Btrfs: adjust outstanding_extents counter properly when dio write is split
    usb: gadget: Fix copy/pasted error message
    ACPI / scan: Prefer devices without _HID/_CID for _ADR matching
    ARM: s3c2410_defconfig: Fix invalid values for NF_CT_PROTO_*
    ARM64: zynqmp: Fix i2c node's compatible string
    ARM64: zynqmp: Fix W=1 dtc 1.4 warnings
    dmaengine: ti-dma-crossbar: Add some 'of_node_put()' in error path.
    dmaengine: ioatdma: workaround SKX ioatdma version
    dmaengine: ioatdma: Add Skylake PCI Dev ID
    openrisc: Add _text symbol to fix ksym build error
    irqchip/mxs: Enable SKIP_SET_WAKE and MASK_ON_SUSPEND
    ASoC: nau8825: fix invalid configuration in Pre-Scalar of FLL
    spi: dw: Make debugfs name unique between instances
    ASoC: tlv320aic3x: Mark the RESET register as volatile
    irqchip/keystone: Fix "scheduling while atomic" on rt
    vfio-pci: use 32-bit comparisons for register address for gcc-4.5
    drm/msm: Verify that MSM_SUBMIT_BO_FLAGS are set
    drm/msm: Ensure that the hardware write pointer is valid
    net/mlx4: Remove BUG_ON from ICM allocation routine
    ipv6: Should use consistent conditional judgement for ip6 fragment between __ip6_append_data and ip6_finish_output
    ARM: dts: n900: Mark eMMC slot with no-sdio and no-sd flags
    r8169: add support for RTL8168 series add-on card.
    x86/mce/AMD: Make the init code more robust
    tpm: Replace device number bitmap with IDR
    tpm: fix a kernel memory leak in tpm-sysfs.c
    xen/blkback: don't use xen_blkif_get() in xen-blkback kthread
    xen/blkback: don't free be structure too early
    sched/cputime: Fix prev steal time accouting during CPU hotplug
    net: skb_needs_check() accepts CHECKSUM_NONE for tx
    pstore: Use dynamic spinlock initializer
    pstore: Correctly initialize spinlock and flags
    pstore: Allow prz to control need for locking
    vlan: Propagate MAC address to VLANs
    /proc/iomem: only expose physical resource addresses to privileged users
    Make file credentials available to the seqfile interfaces
    v4l: s5c73m3: fix negation operator
    dentry name snapshots
    ipmi/watchdog: fix watchdog timeout set on reboot
    libnvdimm, btt: fix btt_rw_page not returning errors
    RDMA/uverbs: Fix the check for port number
    PM / Domains: defer dev_pm_domain_set() until genpd->attach_dev succeeds if present
    sched/cgroup: Move sched_online_group() back into css_online() to fix crash
    kaweth: fix oops upon failed memory allocation
    kaweth: fix firmware download
    mpt3sas: Don't overreach ioc->reply_post[] during initialization
    mailbox: handle empty message in tx_tick
    mailbox: skip complete wait event if timer expired
    mailbox: always wait in mbox_send_message for blocking Tx mode
    wil6210: fix deadlock when using fw_no_recovery option
    ath10k: fix null deref on wmi-tlv when trying spectral scan
    isdn/i4l: fix buffer overflow
    isdn: Fix a sleep-in-atomic bug
    net: phy: Do not perform software reset for Generic PHY
    nfc: fdp: fix NULL pointer dereference
    xfs: don't BUG() on mixed direct and mapped I/O
    perf intel-pt: Ensure never to set 'last_ip' when packet 'count' is zero
    perf intel-pt: Use FUP always when scanning for an IP
    perf intel-pt: Fix last_ip usage
    perf intel-pt: Fix ip compression
    drm: rcar-du: Simplify and fix probe error handling
    drm: rcar-du: Perform initialization/cleanup at probe/remove time
    drm/rcar: Nuke preclose hook
    Staging: comedi: comedi_fops: Avoid orphaned proc entry
    Revert "powerpc/numa: Fix percpu allocations to be NUMA aware"
    KVM: PPC: Book3S HV: Save/restore host values of debug registers
    KVM: PPC: Book3S HV: Reload HTM registers explicitly
    KVM: PPC: Book3S HV: Restore critical SPRs to host values on guest exit
    KVM: PPC: Book3S HV: Context-switch EBB registers properly
    drm/nouveau/bar/gf100: fix access to upper half of BAR2
    drm/vmwgfx: Fix gcc-7.1.1 warning
    md/raid5: add thread_group worker async_tx_issue_pending_all
    crypto: authencesn - Fix digest_null crash
    powerpc/pseries: Fix of_node_put() underflow during reconfig remove
    net: reduce skb_warn_bad_offload() noise
    pstore: Make spinlock per zone instead of global
    af_key: Add lock to key dump
Linux 4.4.79
    alarmtimer: don't rate limit one-shot timers
    tracing: Fix kmemleak in instance_rmdir
    spmi: Include OF based modalias in device uevent
    of: device: Export of_device_{get_modalias, uvent_modalias} to modules
    drm/mst: Avoid processing partially received up/down message transactions
    drm/mst: Avoid dereferencing a NULL mstb in drm_dp_mst_handle_up_req()
    drm/mst: Fix error handling during MST sideband message reception
    RDMA/core: Initialize port_num in qp_attr
    ceph: fix race in concurrent readdir
    staging: rtl8188eu: add TL-WN722N v2 support
    Revert "perf/core: Drop kernel samples even though :u is specified"
    perf annotate: Fix broken arrow at row 0 connecting jmp instruction to its target
    target: Fix COMPARE_AND_WRITE caw_sem leak during se_cmd quiesce
    udf: Fix deadlock between writeback and udf_setsize()
    NFS: only invalidate dentrys that are clearly invalid.
    Input: i8042 - fix crash at boot time
    MIPS: Fix a typo: s/preset/present/ in r2-to-r6 emulation error message
    MIPS: Send SIGILL for linked branches in `__compute_return_epc_for_insn'
    MIPS: Rename `sigill_r6' to `sigill_r2r6' in `__compute_return_epc_for_insn'
    MIPS: Send SIGILL for BPOSGE32 in `__compute_return_epc_for_insn'
    MIPS: math-emu: Prevent wrong ISA mode instruction emulation
    MIPS: Fix unaligned PC interpretation in `compute_return_epc'
    MIPS: Actually decode JALX in `__compute_return_epc_for_insn'
    MIPS: Save static registers before sysmips
    MIPS: Fix MIPS I ISA /proc/cpuinfo reporting
    x86/ioapic: Pass the correct data to unmask_ioapic_irq()
    x86/acpi: Prevent out of bound access caused by broken ACPI tables
    MIPS: Negate error syscall return in trace
    MIPS: Fix mips_atomic_set() with EVA
    MIPS: Fix mips_atomic_set() retry condition
    ftrace: Fix uninitialized variable in match_records()
    vfio: New external user group/file match
    vfio: Fix group release deadlock
    f2fs: Don't clear SGID when inheriting ACLs
    ipmi:ssif: Add missing unlock in error branch
    ipmi: use rcu lock around call to intf->handlers->sender()
    drm/radeon: Fix eDP for single-display iMac10,1 (v2)
    drm/radeon/ci: disable mclk switching for high refresh rates (v2)
    drm/amd/amdgpu: Return error if initiating read out of range on vram
    s390/syscalls: Fix out of bounds arguments access
    Raid5 should update rdev->sectors after reshape
    cx88: Fix regression in initial video standard setting
    x86/xen: allow userspace access during hypercalls
    md: don't use flush_signals in userspace processes
    usb: renesas_usbhs: gadget: disable all eps when the driver stops
    usb: renesas_usbhs: fix usbhsc_resume() for !USBHSF_RUNTIME_PWCTRL
    USB: cdc-acm: add device-id for quirky printer
    usb: storage: return on error to avoid a null pointer dereference
    xhci: Fix NULL pointer dereference when cleaning up streams for removed host
    xhci: fix 20000ms port resume timeout
    ipvs: SNAT packet replies only for NATed connections
    PCI/PM: Restore the status of PCI devices across hibernation
    af_key: Fix sadb_x_ipsecrequest parsing
    powerpc/asm: Mark cr0 as clobbered in mftb()
    powerpc: Fix emulation of mfocrf in emulate_step()
    powerpc: Fix emulation of mcrf in emulate_step()
    powerpc/64: Fix atomic64_inc_not_zero() to return an int
    iscsi-target: Add login_keys_workaround attribute for non RFC initiators
    scsi: ses: do not add a device to an enclosure if enclosure_add_links() fails.
    PM / Domains: Fix unsafe iteration over modified list of domain providers
    PM / Domains: Fix unsafe iteration over modified list of device links
    ASoC: compress: Derive substream from stream based on direction
    wlcore: fix 64K page support
    Bluetooth: use constant time memory comparison for secret values
    perf intel-pt: Clear FUP flag on error
    perf intel-pt: Ensure IP is zero when state is INTEL_PT_STATE_NO_IP
    perf intel-pt: Fix missing stack clear
    perf intel-pt: Improve sample timestamp
    perf intel-pt: Move decoder error setting into one condition
    NFC: Add sockaddr length checks before accessing sa_family in bind handlers
    nfc: Fix the sockaddr length sanitization in llcp_sock_connect
    nfc: Ensure presence of required attributes in the activate_target handler
    NFC: nfcmrvl: fix firmware-management initialisation
    NFC: nfcmrvl: use nfc-device for firmware download
    NFC: nfcmrvl: do not use device-managed resources
    NFC: nfcmrvl_uart: add missing tty-device sanity check
    NFC: fix broken device allocation
    ath9k: fix tx99 bus error
    ath9k: fix tx99 use after free
    thermal: cpu_cooling: Avoid accessing potentially freed structures
    s5p-jpeg: don't return a random width/height
    ir-core: fix gcc-7 warning on bool arithmetic
    disable new gcc-7.1.1 warnings for now
Linux 4.4.78
    kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS
    kvm: vmx: Check value written to IA32_BNDCFGS
    kvm: x86: Guest BNDCFGS requires guest MPX support
    kvm: vmx: Do not disable intercepts for BNDCFGS
    KVM: x86: disable MPX if host did not enable MPX XSAVE features
    tracing: Use SOFTIRQ_OFFSET for softirq dectection for more accurate results
    PM / QoS: return -EINVAL for bogus strings
    PM / wakeirq: Convert to SRCU
    sched/topology: Optimize build_group_mask()
    sched/topology: Fix overlapping sched_group_mask
    crypto: caam - fix signals handling
    crypto: sha1-ssse3 - Disable avx2
    crypto: atmel - only treat EBUSY as transient if backlog
    crypto: talitos - Extend max key length for SHA384/512-HMAC and AEAD
    mm: fix overflow check in expand_upwards()
    tpm: Issue a TPM2_Shutdown for TPM2 devices.
    Add "shutdown" to "struct class".
    tpm: Provide strong locking for device removal
    tpm: Get rid of chip->pdev
    selftests/capabilities: Fix the test_execve test
    mnt: Make propagate_umount less slow for overlapping mount propagation trees
    mnt: In propgate_umount handle visiting mounts in any order
    mnt: In umount propagation reparent in a separate pass
    vt: fix unchecked __put_user() in tioclinux ioctls
    exec: Limit arg stack to at most 75% of _STK_LIM
    s390: reduce ELF_ET_DYN_BASE
    powerpc: move ELF_ET_DYN_BASE to 4GB / 4MB
    arm64: move ELF_ET_DYN_BASE to 4GB / 4MB
    arm: move ELF_ET_DYN_BASE to 4MB
    binfmt_elf: use ELF_ET_DYN_BASE only for PIE
    checkpatch: silence perl 5.26.0 unescaped left brace warnings
    fs/dcache.c: fix spin lockup issue on nlru->lock
    mm/list_lru.c: fix list_lru_count_node() to be race free
    kernel/extable.c: mark core_kernel_text notrace
    tools/lib/lockdep: Reduce MAX_LOCK_DEPTH to avoid overflowing lock_chain/: Depth
    parisc/mm: Ensure IRQs are off in switch_mm()
    parisc: DMA API: return error instead of BUG_ON for dma ops on non dma devs
    parisc: use compat_sys_keyctl()
    parisc: Report SIGSEGV instead of SIGBUS when running out of stack
    irqchip/gic-v3: Fix out-of-bound access in gic_set_affinity
    cfg80211: Check if PMKID attribute is of expected size
    cfg80211: Validate frequencies nested in NL80211_ATTR_SCAN_FREQUENCIES
    cfg80211: Define nla_policy for NL80211_ATTR_LOCAL_MESH_POWER_MODE
    brcmfmac: fix possible buffer overflow in brcmf_cfg80211_mgmt_tx()
    rds: tcp: use sock_create_lite() to create the accept socket
    vrf: fix bug_on triggered by rx when destroying a vrf
    net: ipv6: Compare lwstate in detecting duplicate nexthops
    ipv6: dad: don't remove dynamic addresses if link is down
    net: handle NAPI_GRO_FREE_STOLEN_HEAD case also in napi_frags_finish()
    bpf: prevent leaking pointer via xadd on unpriviledged
    net: prevent sign extension in dev_get_stats()
    tcp: reset sk_rx_dst in tcp_disconnect()
    net: dp83640: Avoid NULL pointer dereference.
    ipv6: avoid unregistering inet6_dev for loopback
    net/phy: micrel: configure intterupts after autoneg workaround
    net: sched: Fix one possible panic when no destroy callback
    net_sched: fix error recovery at qdisc creation
Linux 4.4.77
    saa7134: fix warm Medion 7134 EEPROM read
    x86/mm/pat: Don't report PAT on CPUs that don't support it
    ext4: check return value of kstrtoull correctly in reserved_clusters_store
    staging: comedi: fix clean-up of comedi_class in comedi_init()
    staging: vt6556: vnt_start Fix missing call to vnt_key_init_table.
    tcp: fix tcp_mark_head_lost to check skb len before fragmenting
    md: fix super_offset endianness in super_1_rdev_size_change
    md: fix incorrect use of lexx_to_cpu in does_sb_need_changing
    perf tools: Use readdir() instead of deprecated readdir_r() again
    perf tests: Remove wrong semicolon in while loop in CQM test
    perf trace: Do not process PERF_RECORD_LOST twice
    perf dwarf: Guard !x86_64 definitions under #ifdef else clause
    perf pmu: Fix misleadingly indented assignment (whitespace)
    perf annotate browser: Fix behaviour of Shift-Tab with nothing focussed
    perf tools: Remove duplicate const qualifier
    perf script: Use readdir() instead of deprecated readdir_r()
    perf thread_map: Use readdir() instead of deprecated readdir_r()
    perf tools: Use readdir() instead of deprecated readdir_r()
    perf bench numa: Avoid possible truncation when using snprintf()
    perf tests: Avoid possible truncation with dirent->d_name + snprintf
    perf scripting perl: Fix compile error with some perl5 versions
    perf thread_map: Correctly size buffer used with dirent->dt_name
    perf intel-pt: Use __fallthrough
    perf top: Use __fallthrough
    tools strfilter: Use __fallthrough
    tools string: Use __fallthrough in perf_atoll()
    tools include: Add a __fallthrough statement
    mqueue: fix a use-after-free in sys_mq_notify()
    RDMA/uverbs: Check port number supplied by user verbs cmds
    KEYS: Fix an error code in request_master_key()
    ath10k: override CE5 config for QCA9377
    x86/uaccess: Optimize copy_user_enhanced_fast_string() for short strings
    x86/tools: Fix gcc-7 warning in relocs.c
    gfs2: Fix glock rhashtable rcu bug
    USB: serial: qcserial: new Sierra Wireless EM7305 device ID
    USB: serial: option: add two Longcheer device ids
    pinctrl: sh-pfc: Update info pointer after SoC-specific init
    pinctrl: mxs: atomically switch mux and drive strength config
    pinctrl: sunxi: Fix SPDIF function name for A83T
    pinctrl: meson: meson8b: fix the NAND DQS pins
    pinctrl: sh-pfc: r8a7791: Fix SCIF2 pinmux data
    sysctl: report EINVAL if value is larger than UINT_MAX for proc_douintvec
    sysctl: don't print negative flag for proc_douintvec
    mac80211_hwsim: Replace bogus hrtimer clockid
    usb: Fix typo in the definition of Endpoint[out]Request
    usb: usbip: set buffer pointers to NULL after free
    Add USB quirk for HVR-950q to avoid intermittent device resets
    USB: serial: cp210x: add ID for CEL EM3588 USB ZigBee stick
    usb: dwc3: replace %p with %pK
    drm/virtio: don't leak bo on drm_gem_object_init failure
    tracing/kprobes: Allow to create probe with a module name starting with a digit
    mm: fix classzone_idx underflow in shrink_zones()
    bgmac: reset & enable Ethernet core before using it
    driver core: platform: fix race condition with driver_override
    fs: completely ignore unknown open flags
    fs: add a VALID_OPEN_FLAGS
Linux 4.4.76
    KVM: nVMX: Fix exception injection
    KVM: x86: zero base3 of unusable segments
    KVM: x86/vPMU: fix undefined shift in intel_pmu_refresh()
    KVM: x86: fix emulation of RSM and IRET instructions
    cpufreq: s3c2416: double free on driver init error path
    iommu/amd: Fix incorrect error handling in amd_iommu_bind_pasid()
    iommu: Handle default domain attach failure
    iommu/vt-d: Don't over-free page table directories
    ocfs2: o2hb: revert hb threshold to keep compatible
    x86/mm: Fix flush_tlb_page() on Xen
    x86/mpx: Correctly report do_mpx_bt_fault() failures to user-space
    ARM: 8685/1: ensure memblock-limit is pmd-aligned
    ARM64/ACPI: Fix BAD_MADT_GICC_ENTRY() macro implementation
    sched/loadavg: Avoid loadavg spikes caused by delayed NO_HZ accounting
    watchdog: bcm281xx: Fix use of uninitialized spinlock.
    xfrm: Oops on error in pfkey_msg2xfrm_state()
    xfrm: NULL dereference on allocation failure
    xfrm: fix stack access out of bounds with CONFIG_XFRM_SUB_POLICY
    jump label: fix passing kbuild_cflags when checking for asm goto support
    ravb: Fix use-after-free on `ifconfig eth0 down`
    sctp: check af before verify address in sctp_addr_id2transport
    net/mlx4_core: Eliminate warning messages for SRQ_LIMIT under SRIOV
    perf probe: Fix to show correct locations for events on modules
    be2net: fix status check in be_cmd_pmac_add()
    s390/ctl_reg: make __ctl_load a full memory barrier
    swiotlb: ensure that page-sized mappings are page-aligned
    coredump: Ensure proper size of sparse core files
    x86/mpx: Use compatible types in comparison to fix sparse error
    mac80211: initialize SMPS field in HT capabilities
    spi: davinci: use dma_mapping_error()
    scsi: lpfc: avoid double free of resource identifiers
    HID: i2c-hid: Add sleep between POWER ON and RESET
    kernel/panic.c: add missing \n
    ibmveth: Add a proper check for the availability of the checksum features
    vxlan: do not age static remote mac entries
    virtio_net: fix PAGE_SIZE > 64k
    vfio/spapr: fail tce_iommu_attach_group() when iommu_data is null
    drm/amdgpu: check ring being ready before using
    net: dsa: Check return value of phy_connect_direct()
    amd-xgbe: Check xgbe_init() return code
    platform/x86: ideapad-laptop: handle ACPI event 1
    scsi: virtio_scsi: Reject commands when virtqueue is broken
    xen-netfront: Fix Rx stall during network stress and OOM
    swiotlb-xen: update dev_addr after swapping pages
    virtio_console: fix a crash in config_work_handler
    Btrfs: fix truncate down when no_holes feature is enabled
    gianfar: Do not reuse pages from emergency reserve
    powerpc/eeh: Enable IO path on permanent error
    net: bgmac: Remove superflous netif_carrier_on()
    net: bgmac: Start transmit queue in bgmac_open
    net: bgmac: Fix SOF bit checking
    bgmac: Fix reversed test of build_skb() return value.
    mtd: bcm47xxpart: don't fail because of bit-flips
    bgmac: fix a missing check for build_skb
    mtd: bcm47xxpart: limit scanned flash area on BCM47XX (MIPS) only
    MIPS: ralink: fix MT7628 wled_an pinmux gpio
    MIPS: ralink: fix MT7628 pinmux typos
    MIPS: ralink: Fix invalid assignment of SoC type
    MIPS: ralink: fix USB frequency scaling
    MIPS: ralink: MT7688 pinmux fixes
    net: korina: Fix NAPI versus resources freeing
    MIPS: ath79: fix regression in PCI window initialization
    net: mvneta: Fix for_each_present_cpu usage
    ARM: dts: BCM5301X: Correct GIC_PPI interrupt flags
    qla2xxx: Fix erroneous invalid handle message
    scsi: lpfc: Set elsiocb contexts to NULL after freeing it
    scsi: sd: Fix wrong DPOFUA disable in sd_read_cache_type
    KVM: x86: fix fixing of hypercalls
    mm: numa: avoid waiting on freed migrated pages
    block: fix module reference leak on put_disk() call for cgroups throttle
    sysctl: enable strict writes
    usb: gadget: f_fs: Fix possibe deadlock
    drm/vmwgfx: Free hash table allocated by cmdbuf managed res mgr
    ALSA: hda - set input_path bitmap to zero after moving it to new place
    ALSA: hda - Fix endless loop of codec configure
    MIPS: Fix IRQ tracing & lockdep when rescheduling
    MIPS: pm-cps: Drop manual cache-line alignment of ready_count
    MIPS: Avoid accidental raw backtrace
    mm, swap_cgroup: reschedule when neeed in swap_cgroup_swapoff()
    drm/ast: Handle configuration without P2A bridge
    NFSv4: fix a reference leak caused WARNING messages
    netfilter: synproxy: fix conntrackd interaction
    netfilter: xt_TCPMSS: add more sanity tests on tcph->doff
    rtnetlink: add IFLA_GROUP to ifla_policy
    ipv6: Do not leak throw route references
    sfc: provide dummy definitions of vswitch functions
    net: 8021q: Fix one possible panic caused by BUG_ON in free_netdev
    decnet: always not take dst->__refcnt when inserting dst into hash table
    net/mlx5: Wait for FW readiness before initializing command interface
    ipv6: fix calling in6_ifa_hold incorrectly for dad work
    igmp: add a missing spin_lock_init()
    igmp: acquire pmc lock for ip_mc_clear_src()
    net: caif: Fix a sleep-in-atomic bug in cfpkt_create_pfx
    Fix an intermittent pr_emerg warning about lo becoming free.
    af_unix: Add sockaddr length checks before accessing sa_family in bind and connect handlers
    net: Zero ifla_vf_info in rtnl_fill_vfinfo()
    decnet: dn_rtmsg: Improve input length sanitization in dnrmg_receive_user_skb
    net: don't call strlen on non-terminated string in dev_set_alias()
    ipv6: release dst on error in ip6_dst_lookup_tail
Linux 4.4.75
    nvme: apply DELAY_BEFORE_CHK_RDY quirk at probe time too
    nvme/quirk: Add a delay before checking for adapter readiness
    net: phy: fix marvell phy status reading
    net: phy: Initialize mdio clock at probe function
    usb: gadget: f_fs: avoid out of bounds access on comp_desc
    powerpc/slb: Force a full SLB flush when we insert for a bad EA
    mtd: spi-nor: fix spansion quad enable
    of: Add check to of_scan_flat_dt() before accessing initial_boot_params
    rxrpc: Fix several cases where a padded len isn't checked in ticket decode
    USB: usbip: fix nonconforming hub descriptor
    drm/amdgpu: adjust default display clock
    drm/amdgpu/atom: fix ps allocation size for EnableDispPowerGating
    drm/radeon: add a quirk for Toshiba Satellite L20-183
    drm/radeon: add a PX quirk for another K53TK variant
    iscsi-target: Reject immediate data underflow larger than SCSI transfer length
    target: Fix kref->refcount underflow in transport_cmd_finish_abort
    time: Fix clock->read(clock) race around clocksource changes
    Input: i8042 - add Fujitsu Lifebook AH544 to notimeout list
    powerpc/kprobes: Pause function_graph tracing during jprobes handling
    signal: Only reschedule timers on signals timers have sent
    HID: Add quirk for Dell PIXART OEM mouse
    CIFS: Improve readdir verbosity
    KVM: PPC: Book3S HV: Preserve userspace HTM state properly
    lib/cmdline.c: fix get_options() overflow while parsing ranges
    autofs: sanity check status reported with AUTOFS_DEV_IOCTL_FAIL
    fs/exec.c: account for argv/envp pointers
Linux 4.4.74
    mm: fix new crash in unmapped_area_topdown()
    Allow stack to grow up to address space limit
    mm: larger stack guard gap, between vmas
    alarmtimer: Rate limit periodic intervals
    MIPS: Fix bnezc/jialc return address calculation
    usb: dwc3: exynos fix axius clock error path to do cleanup
    alarmtimer: Prevent overflow of relative timers
    genirq: Release resources in __setup_irq() error path
    swap: cond_resched in swap_cgroup_prepare()
    mm/memory-failure.c: use compound_head() flags for huge pages
    USB: gadgetfs, dummy-hcd, net2280: fix locking for callbacks
    usb: xhci: ASMedia ASM1042A chipset need shorts TX quirk
    drivers/misc/c2port/c2port-duramar2150.c: checking for NULL instead of IS_ERR()
    usb: r8a66597-hcd: decrease timeout
    usb: r8a66597-hcd: select a different endpoint on timeout
    USB: gadget: dummy_hcd: fix hub-descriptor removable fields
    pvrusb2: reduce stack usage pvr2_eeprom_analyze()
    usb: core: fix potential memory leak in error path during hcd creation
    USB: hub: fix SS max number of ports
    iio: proximity: as3935: recalibrate RCO after resume
    staging: rtl8188eu: prevent an underflow in rtw_check_beacon_data()
    mfd: omap-usb-tll: Fix inverted bit use for USB TLL mode
    x86/mm/32: Set the '__vmalloc_start_set' flag in initmem_init()
    serial: efm32: Fix parity management in 'efm32_uart_console_get_options()'
    mac80211: fix IBSS presp allocation size
    mac80211: fix CSA in IBSS mode
    mac80211/wpa: use constant time memory comparison for MACs
    mac80211: don't look at the PM bit of BAR frames
    vb2: Fix an off by one error in 'vb2_plane_vaddr'
    cpufreq: conservative: Allow down_threshold to take values from 1 to 10
    can: gs_usb: fix memory leak in gs_cmd_reset()
    configfs: Fix race between create_link and configfs_rmdir
Linux 4.4.73
    sparc64: make string buffers large enough
    s390/kvm: do not rely on the ILC on kvm host protection fauls
    xtensa: don't use linux IRQ #0
    tipc: ignore requests when the connection state is not CONNECTED
    proc: add a schedule point in proc_pid_readdir()
    romfs: use different way to generate fsid for BLOCK or MTD
    sctp: sctp_addr_id2transport should verify the addr before looking up assoc
    r8152: avoid start_xmit to schedule napi when napi is disabled
    r8152: fix rtl8152_post_reset function
    r8152: re-schedule napi for tx
    nfs: Fix "Don't increment lock sequence ID after NFS4ERR_MOVED"
    ravb: unmap descriptors when freeing rings
    drm/ast: Fixed system hanged if disable P2A
    drm/nouveau: Don't enabling polling twice on runtime resume
    parisc, parport_gsc: Fixes for printk continuation lines
    net: adaptec: starfire: add checks for dma mapping errors
    pinctrl: berlin-bg4ct: fix the value for "sd1a" of pin SCRD0_CRD_PRES
    gianfar: synchronize DMA API usage by free_skb_rx_queue w/ gfar_new_page
    net/mlx4_core: Avoid command timeouts during VF driver device shutdown
    drm/nouveau/fence/g84-: protect against concurrent access to semaphore buffers
    drm/nouveau: prevent userspace from deleting client object
    ipv6: fix flow labels when the traffic class is non-0
    FS-Cache: Initialise stores_lock in netfs cookie
    fscache: Clear outstanding writes when disabling a cookie
    fscache: Fix dead object requeue
    ethtool: do not vzalloc(0) on registers dump
    log2: make order_base_2() behave correctly on const input value zero
    kasan: respect /proc/sys/kernel/traceoff_on_warning
    jump label: pass kbuild_cflags when checking for asm goto support
    PM / runtime: Avoid false-positive warnings from might_sleep_if()
    ipv6: Fix IPv6 packet loss in scenarios involving roaming + snooping switches
    i2c: piix4: Fix request_region size
    sierra_net: Add support for IPv6 and Dual-Stack Link Sense Indications
    sierra_net: Skip validating irrelevant fields for IDLE LSIs
    net: hns: Fix the device being used for dma mapping during TX
    NET: mkiss: Fix panic
    NET: Fix /proc/net/arp for AX.25
    ipv6: Inhibit IPv4-mapped src address on the wire.
    ipv6: Handle IPv4-mapped src to in6addr_any dst.
    net: xilinx_emaclite: fix receive buffer overflow
    net: xilinx_emaclite: fix freezes due to unordered I/O
    Call echo service immediately after socket reconnect
    staging: rtl8192e: rtl92e_fill_tx_desc fix write to mapped out memory.
    ARM: dts: imx6dl: Fix the VDD_ARM_CAP voltage for 396MHz operation
    partitions/msdos: FreeBSD UFS2 file systems are not recognized
    s390/vmem: fix identity mapping
Linux 4.4.72
    arm64: ensure extension of smp_store_release value
    arm64: armv8_deprecated: ensure extension of addr
    usercopy: Adjust tests to deal with SMAP/PAN
    RDMA/qib,hfi1: Fix MR reference count leak on write with immediate
    arm64: entry: improve data abort handling of tagged pointers
    arm64: hw_breakpoint: fix watchpoint matching for tagged pointers
    Make __xfs_xattr_put_listen preperly report errors.
    NFSv4: Don't perform cached access checks before we've OPENed the file
    NFS: Ensure we revalidate attributes before using execute_ok()
    mm: consider memblock reservations for deferred memory initialization sizing
    net: better skb->sender_cpu and skb->napi_id cohabitation
    serial: sh-sci: Fix panic when serial console and DMA are enabled
    tty: Drop krefs for interrupted tty lock
    drivers: char: mem: Fix wraparound check to allow mappings up to the end
    ASoC: Fix use-after-free at card unregistration
    ALSA: timer: Fix missing queue indices reset at SNDRV_TIMER_IOCTL_SELECT
    ALSA: timer: Fix race between read and ioctl
    drm/nouveau/tmr: fully separate alarm execution/pending lists
    drm/vmwgfx: Make sure backup_handle is always valid
    drm/vmwgfx: limit the number of mip levels in vmw_gb_surface_define_ioctl()
    drm/vmwgfx: Handle vmalloc() failure in vmw_local_fifo_reserve()
    perf/core: Drop kernel samples even though :u is specified
    powerpc/hotplug-mem: Fix missing endian conversion of aa_index
    powerpc/numa: Fix percpu allocations to be NUMA aware
    powerpc/eeh: Avoid use after free in eeh_handle_special_event()
    scsi: qla2xxx: don't disable a not previously enabled PCI device
    KVM: arm/arm64: Handle possible NULL stage2 pud when ageing pages
    btrfs: fix memory leak in update_space_info failure path
    btrfs: use correct types for page indices in btrfs_page_exists_in_range
    cxl: Fix error path on bad ioctl
    ufs_getfrag_block(): we only grab ->truncate_mutex on block creation path
    ufs_extend_tail(): fix the braino in calling conventions of ufs_new_fragments()
    ufs: set correct ->s_maxsize
    ufs: restore maintaining ->i_blocks
    fix ufs_isblockset()
    ufs: restore proper tail allocation
    fs: add i_blocksize()
    cpuset: consider dying css as offline
    Input: elantech - add Fujitsu Lifebook E546/E557 to force crc_enabled
    drm/msm: Expose our reservation object when exporting a dmabuf.
    target: Re-add check to reject control WRITEs with overflow data
    cpufreq: cpufreq_register_driver() should return -ENODEV if init fails
    stackprotector: Increase the per-task stack canary's random range from 32 bits to 64 bits on 64-bit platforms
    random: properly align get_random_int_hash
    drivers: char: random: add get_random_long()
    iio: proximity: as3935: fix AS3935_INT mask
    iio: light: ltr501 Fix interchanged als/ps register field
    staging/lustre/lov: remove set_fs() call from lov_getstripe()
    usb: chipidea: debug: check before accessing ci_role
    usb: chipidea: udc: fix NULL pointer dereference if udc_start failed
    usb: gadget: f_mass_storage: Serialize wake and sleep execution
    ext4: fix fdatasync(2) after extent manipulation operations
    ext4: keep existing extra fields when inode expands
    ext4: fix SEEK_HOLE
    xen-netfront: cast grant table reference first to type int
    xen-netfront: do not cast grant table reference to signed short
    xen/privcmd: Support correctly 64KB page granularity when mapping memory
    dmaengine: ep93xx: Always start from BASE0
    dmaengine: usb-dmac: Fix DMAOR AE bit definition
    KVM: async_pf: avoid async pf injection when in guest mode
    arm: KVM: Allow unaligned accesses at HYP
    KVM: cpuid: Fix read/write out-of-bounds vulnerability in cpuid emulation
    kvm: async_pf: fix rcu_irq_enter() with irqs enabled
    nfsd: Fix up the "supattr_exclcreat" attributes
    nfsd4: fix null dereference on replay
    drm/amdgpu/ci: disable mclk switching for high refresh rates (v2)
    crypto: gcm - wait for crypto op not signal safe
    KEYS: fix freeing uninitialized memory in key_update()
    KEYS: fix dereferencing NULL payload with nonzero length
    ptrace: Properly initialize ptracer_cred on fork
    serial: ifx6x60: fix use-after-free on module unload
    arch/sparc: support NR_CPUS = 4096
    sparc64: delete old wrap code
    sparc64: new context wrap
    sparc64: add per-cpu mm of secondary contexts
    sparc64: redefine first version
    sparc64: combine activate_mm and switch_mm
    sparc64: reset mm cpumask after wrap
    sparc: Machine description indices can vary
    sparc64: mm: fix copy_tsb to correctly copy huge page TSBs
    net: bridge: start hello timer only if device is up
    net: ethoc: enable NAPI before poll may be scheduled
    net: ping: do not abuse udp_poll()
    ipv6: Fix leak in ipv6_gso_segment().
    vxlan: fix use-after-free on deletion
    tcp: disallow cwnd undo when switching congestion control
    cxgb4: avoid enabling napi twice to the same queue
    ipv6: xfrm: Handle errors reported by xfrm6_find_1stfragopt()
    bnx2x: Fix Multi-Cos
Linux 4.4.71
    xfs: only return -errno or success from attr ->put_listent
    xfs: in _attrlist_by_handle, copy the cursor back to userspace
    xfs: fix unaligned access in xfs_btree_visit_blocks
    xfs: bad assertion for delalloc an extent that start at i_size
    xfs: fix indlen accounting error on partial delalloc conversion
    xfs: wait on new inodes during quotaoff dquot release
    xfs: update ag iterator to support wait on new inodes
    xfs: support ability to wait on new inodes
    xfs: fix up quotacheck buffer list error handling
    xfs: prevent multi-fsb dir readahead from reading random blocks
    xfs: handle array index overrun in xfs_dir2_leaf_readbuf()
    xfs: fix over-copying of getbmap parameters from userspace
    xfs: fix off-by-one on max nr_pages in xfs_find_get_desired_pgoff()
    xfs: Fix missed holes in SEEK_HOLE implementation
    mlock: fix mlock count can not decrease in race condition
    mm/migrate: fix refcount handling when !hugepage_migration_supported()
    drm/gma500/psb: Actually use VBT mode when it is found
    slub/memcg: cure the brainless abuse of sysfs attributes
    ALSA: hda - apply STAC_9200_DELL_M22 quirk for Dell Latitude D430
    pcmcia: remove left-over %Z format
    drm/radeon: Unbreak HPD handling for r600+
    drm/radeon/ci: disable mclk switching for high refresh rates (v2)
    scsi: mpt3sas: Force request partial completion alignment
    HID: wacom: Have wacom_tpc_irq guard against possible NULL dereference
    mmc: sdhci-iproc: suppress spurious interrupt with Multiblock read
    i2c: i2c-tiny-usb: fix buffer not being DMA capable
    vlan: Fix tcp checksum offloads in Q-in-Q vlans
    net: phy: marvell: Limit errata to 88m1101
    netem: fix skb_orphan_partial()
    ipv4: add reference counting to metrics
    sctp: fix ICMP processing if skb is non-linear
    tcp: avoid fastopen API to be used on AF_UNSPEC
    virtio-net: enable TSO/checksum offloads for Q-in-Q vlans
    be2net: Fix offload features for Q-in-Q packets
    ipv6: fix out of bound writes in __ip6_append_data()
    bridge: start hello_timer when enabling KERNEL_STP in br_stp_start
    qmi_wwan: add another Lenovo EM74xx device ID
    bridge: netlink: check vlan_default_pvid range
    ipv6: Check ip6_find_1stfragopt() return value properly.
    ipv6: Prevent overrun when parsing v6 header options
    net: Improve handling of failures on link and route dumps
    tcp: eliminate negative reordering in tcp_clean_rtx_queue
    sctp: do not inherit ipv6_{mc|ac|fl}_list from parent
    sctp: fix src address selection if using secondary addresses for ipv6
    tcp: avoid fragmenting peculiar skbs in SACK
    s390/qeth: avoid null pointer dereference on OSN
    s390/qeth: unbreak OSM and OSN support
    s390/qeth: handle sysfs error during initialization
    ipv6/dccp: do not inherit ipv6_mc_list from parent
    dccp/tcp: do not inherit mc_list from parent
    sparc: Fix -Wstringop-overflow warning

Bug: 62730977
Change-Id: Ifca755d82f9e4b11016f6660298c2c1b073bfb3a
Signed-off-by: Thierry Strudel <tstrudel@google.com>
2017-09-20 16:42:37 -07:00

767 lines
21 KiB
C

/*
* fs/mpage.c
*
* Copyright (C) 2002, Linus Torvalds.
*
* Contains functions related to preparing and submitting BIOs which contain
* multiple pagecache pages.
*
* 15May2002 Andrew Morton
* Initial version
* 27Jun2002 axboe@suse.de
* use bio_add_page() to build bio's just the right size
*/
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/kdev_t.h>
#include <linux/gfp.h>
#include <linux/bio.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/highmem.h>
#include <linux/prefetch.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/cleancache.h>
#include "internal.h"
#define CREATE_TRACE_POINTS
#include <trace/events/android_fs.h>
EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_start);
EXPORT_TRACEPOINT_SYMBOL(android_fs_datawrite_end);
EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_start);
EXPORT_TRACEPOINT_SYMBOL(android_fs_dataread_end);
/*
* I/O completion handler for multipage BIOs.
*
* The mpage code never puts partial pages into a BIO (except for end-of-file).
* If a page does not map to a contiguous run of blocks then it simply falls
* back to block_read_full_page().
*
* Why is this? If a page's completion depends on a number of different BIOs
* which can complete in any order (or at the same time) then determining the
* status of that page is hard. See end_buffer_async_read() for the details.
* There is no point in duplicating all that complexity.
*/
static void mpage_end_io(struct bio *bio)
{
struct bio_vec *bv;
int i;
if (trace_android_fs_dataread_end_enabled() &&
(bio_data_dir(bio) == READ)) {
struct page *first_page = bio->bi_io_vec[0].bv_page;
if (first_page != NULL)
trace_android_fs_dataread_end(first_page->mapping->host,
page_offset(first_page),
bio->bi_iter.bi_size);
}
bio_for_each_segment_all(bv, bio, i) {
struct page *page = bv->bv_page;
page_endio(page, bio_data_dir(bio), bio->bi_error);
}
bio_put(bio);
}
static struct bio *mpage_bio_submit(int rw, struct bio *bio)
{
if (trace_android_fs_dataread_start_enabled() && (rw == READ)) {
struct page *first_page = bio->bi_io_vec[0].bv_page;
if (first_page != NULL) {
char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
path = android_fstrace_get_pathname(pathbuf,
MAX_TRACE_PATHBUF_LEN,
first_page->mapping->host);
trace_android_fs_dataread_start(
first_page->mapping->host,
page_offset(first_page),
bio->bi_iter.bi_size,
current->pid,
path,
current->comm);
}
}
bio->bi_end_io = mpage_end_io;
guard_bio_eod(rw, bio);
submit_bio(rw, bio);
return NULL;
}
static struct bio *
mpage_alloc(struct block_device *bdev,
sector_t first_sector, int nr_vecs,
gfp_t gfp_flags)
{
struct bio *bio;
bio = bio_alloc(gfp_flags, nr_vecs);
if (bio == NULL && (current->flags & PF_MEMALLOC)) {
while (!bio && (nr_vecs /= 2))
bio = bio_alloc(gfp_flags, nr_vecs);
}
if (bio) {
bio->bi_bdev = bdev;
bio->bi_iter.bi_sector = first_sector;
}
return bio;
}
/*
* support function for mpage_readpages. The fs supplied get_block might
* return an up to date buffer. This is used to map that buffer into
* the page, which allows readpage to avoid triggering a duplicate call
* to get_block.
*
* The idea is to avoid adding buffers to pages that don't already have
* them. So when the buffer is up to date and the page size == block size,
* this marks the page up to date instead of adding new buffers.
*/
static void
map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
{
struct inode *inode = page->mapping->host;
struct buffer_head *page_bh, *head;
int block = 0;
if (!page_has_buffers(page)) {
/*
* don't make any buffers if there is only one buffer on
* the page and the page just needs to be set up to date
*/
if (inode->i_blkbits == PAGE_CACHE_SHIFT &&
buffer_uptodate(bh)) {
SetPageUptodate(page);
return;
}
create_empty_buffers(page, i_blocksize(inode), 0);
}
head = page_buffers(page);
page_bh = head;
do {
if (block == page_block) {
page_bh->b_state = bh->b_state;
page_bh->b_bdev = bh->b_bdev;
page_bh->b_blocknr = bh->b_blocknr;
break;
}
page_bh = page_bh->b_this_page;
block++;
} while (page_bh != head);
}
/*
* This is the worker routine which does all the work of mapping the disk
* blocks and constructs largest possible bios, submits them for IO if the
* blocks are not contiguous on the disk.
*
* We pass a buffer_head back and forth and use its buffer_mapped() flag to
* represent the validity of its disk mapping and to decide when to do the next
* get_block() call.
*/
static struct bio *
do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
sector_t *last_block_in_bio, struct buffer_head *map_bh,
unsigned long *first_logical_block, get_block_t get_block,
gfp_t gfp)
{
struct inode *inode = page->mapping->host;
const unsigned blkbits = inode->i_blkbits;
const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
const unsigned blocksize = 1 << blkbits;
sector_t block_in_file;
sector_t last_block;
sector_t last_block_in_file;
sector_t blocks[MAX_BUF_PER_PAGE];
unsigned page_block;
unsigned first_hole = blocks_per_page;
struct block_device *bdev = NULL;
int length;
int fully_mapped = 1;
unsigned nblocks;
unsigned relative_block;
if (page_has_buffers(page))
goto confused;
block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
last_block = block_in_file + nr_pages * blocks_per_page;
last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
if (last_block > last_block_in_file)
last_block = last_block_in_file;
page_block = 0;
/*
* Map blocks using the result from the previous get_blocks call first.
*/
nblocks = map_bh->b_size >> blkbits;
if (buffer_mapped(map_bh) && block_in_file > *first_logical_block &&
block_in_file < (*first_logical_block + nblocks)) {
unsigned map_offset = block_in_file - *first_logical_block;
unsigned last = nblocks - map_offset;
for (relative_block = 0; ; relative_block++) {
if (relative_block == last) {
clear_buffer_mapped(map_bh);
break;
}
if (page_block == blocks_per_page)
break;
blocks[page_block] = map_bh->b_blocknr + map_offset +
relative_block;
page_block++;
block_in_file++;
}
bdev = map_bh->b_bdev;
}
/*
* Then do more get_blocks calls until we are done with this page.
*/
map_bh->b_page = page;
while (page_block < blocks_per_page) {
map_bh->b_state = 0;
map_bh->b_size = 0;
if (block_in_file < last_block) {
map_bh->b_size = (last_block-block_in_file) << blkbits;
if (get_block(inode, block_in_file, map_bh, 0))
goto confused;
*first_logical_block = block_in_file;
}
if (!buffer_mapped(map_bh)) {
fully_mapped = 0;
if (first_hole == blocks_per_page)
first_hole = page_block;
page_block++;
block_in_file++;
continue;
}
/* some filesystems will copy data into the page during
* the get_block call, in which case we don't want to
* read it again. map_buffer_to_page copies the data
* we just collected from get_block into the page's buffers
* so readpage doesn't have to repeat the get_block call
*/
if (buffer_uptodate(map_bh)) {
map_buffer_to_page(page, map_bh, page_block);
goto confused;
}
if (first_hole != blocks_per_page)
goto confused; /* hole -> non-hole */
/* Contiguous blocks? */
if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1)
goto confused;
nblocks = map_bh->b_size >> blkbits;
for (relative_block = 0; ; relative_block++) {
if (relative_block == nblocks) {
clear_buffer_mapped(map_bh);
break;
} else if (page_block == blocks_per_page)
break;
blocks[page_block] = map_bh->b_blocknr+relative_block;
page_block++;
block_in_file++;
}
bdev = map_bh->b_bdev;
}
if (first_hole != blocks_per_page) {
zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE);
if (first_hole == 0) {
SetPageUptodate(page);
unlock_page(page);
goto out;
}
} else if (fully_mapped) {
SetPageMappedToDisk(page);
}
if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) &&
cleancache_get_page(page) == 0) {
SetPageUptodate(page);
goto confused;
}
/*
* This page will go to BIO. Do we need to send this BIO off first?
*/
if (bio && (*last_block_in_bio != blocks[0] - 1))
bio = mpage_bio_submit(READ, bio);
alloc_new:
if (bio == NULL) {
if (first_hole == blocks_per_page) {
if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
page))
goto out;
}
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
min_t(int, nr_pages, BIO_MAX_PAGES), gfp);
if (bio == NULL)
goto confused;
}
length = first_hole << blkbits;
if (bio_add_page(bio, page, length, 0) < length) {
bio = mpage_bio_submit(READ, bio);
goto alloc_new;
}
relative_block = block_in_file - *first_logical_block;
nblocks = map_bh->b_size >> blkbits;
if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
(first_hole != blocks_per_page))
bio = mpage_bio_submit(READ, bio);
else
*last_block_in_bio = blocks[blocks_per_page - 1];
out:
return bio;
confused:
if (bio)
bio = mpage_bio_submit(READ, bio);
if (!PageUptodate(page))
block_read_full_page(page, get_block);
else
unlock_page(page);
goto out;
}
/**
* mpage_readpages - populate an address space with some pages & start reads against them
* @mapping: the address_space
* @pages: The address of a list_head which contains the target pages. These
* pages have their ->index populated and are otherwise uninitialised.
* The page at @pages->prev has the lowest file offset, and reads should be
* issued in @pages->prev to @pages->next order.
* @nr_pages: The number of pages at *@pages
* @get_block: The filesystem's block mapper function.
*
* This function walks the pages and the blocks within each page, building and
* emitting large BIOs.
*
* If anything unusual happens, such as:
*
* - encountering a page which has buffers
* - encountering a page which has a non-hole after a hole
* - encountering a page with non-contiguous blocks
*
* then this code just gives up and calls the buffer_head-based read function.
* It does handle a page which has holes at the end - that is a common case:
* the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
*
* BH_Boundary explanation:
*
* There is a problem. The mpage read code assembles several pages, gets all
* their disk mappings, and then submits them all. That's fine, but obtaining
* the disk mappings may require I/O. Reads of indirect blocks, for example.
*
* So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
* submitted in the following order:
* 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
*
* because the indirect block has to be read to get the mappings of blocks
* 13,14,15,16. Obviously, this impacts performance.
*
* So what we do it to allow the filesystem's get_block() function to set
* BH_Boundary when it maps block 11. BH_Boundary says: mapping of the block
* after this one will require I/O against a block which is probably close to
* this one. So you should push what I/O you have currently accumulated.
*
* This all causes the disk requests to be issued in the correct order.
*/
int
mpage_readpages(struct address_space *mapping, struct list_head *pages,
unsigned nr_pages, get_block_t get_block)
{
struct bio *bio = NULL;
unsigned page_idx;
sector_t last_block_in_bio = 0;
struct buffer_head map_bh;
unsigned long first_logical_block = 0;
gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL);
map_bh.b_state = 0;
map_bh.b_size = 0;
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_entry(pages->prev, struct page, lru);
prefetchw(&page->flags);
list_del(&page->lru);
if (!add_to_page_cache_lru(page, mapping,
page->index,
gfp)) {
bio = do_mpage_readpage(bio, page,
nr_pages - page_idx,
&last_block_in_bio, &map_bh,
&first_logical_block,
get_block, gfp);
}
page_cache_release(page);
}
BUG_ON(!list_empty(pages));
if (bio)
mpage_bio_submit(READ, bio);
return 0;
}
EXPORT_SYMBOL(mpage_readpages);
/*
* This isn't called much at all
*/
int mpage_readpage(struct page *page, get_block_t get_block)
{
struct bio *bio = NULL;
sector_t last_block_in_bio = 0;
struct buffer_head map_bh;
unsigned long first_logical_block = 0;
gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
map_bh.b_state = 0;
map_bh.b_size = 0;
bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
&map_bh, &first_logical_block, get_block, gfp);
if (bio)
mpage_bio_submit(READ, bio);
return 0;
}
EXPORT_SYMBOL(mpage_readpage);
/*
* Writing is not so simple.
*
* If the page has buffers then they will be used for obtaining the disk
* mapping. We only support pages which are fully mapped-and-dirty, with a
* special case for pages which are unmapped at the end: end-of-file.
*
* If the page has no buffers (preferred) then the page is mapped here.
*
* If all blocks are found to be contiguous then the page can go into the
* BIO. Otherwise fall back to the mapping's writepage().
*
* FIXME: This code wants an estimate of how many pages are still to be
* written, so it can intelligently allocate a suitably-sized BIO. For now,
* just allocate full-size (16-page) BIOs.
*/
struct mpage_data {
struct bio *bio;
sector_t last_block_in_bio;
get_block_t *get_block;
unsigned use_writepage;
};
/*
* We have our BIO, so we can now mark the buffers clean. Make
* sure to only clean buffers which we know we'll be writing.
*/
static void clean_buffers(struct page *page, unsigned first_unmapped)
{
unsigned buffer_counter = 0;
struct buffer_head *bh, *head;
if (!page_has_buffers(page))
return;
head = page_buffers(page);
bh = head;
do {
if (buffer_counter++ == first_unmapped)
break;
clear_buffer_dirty(bh);
bh = bh->b_this_page;
} while (bh != head);
/*
* we cannot drop the bh if the page is not uptodate or a concurrent
* readpage would fail to serialize with the bh and it would read from
* disk before we reach the platter.
*/
if (buffer_heads_over_limit && PageUptodate(page))
try_to_free_buffers(page);
}
static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
void *data)
{
struct mpage_data *mpd = data;
struct bio *bio = mpd->bio;
struct address_space *mapping = page->mapping;
struct inode *inode = page->mapping->host;
const unsigned blkbits = inode->i_blkbits;
unsigned long end_index;
const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
sector_t last_block;
sector_t block_in_file;
sector_t blocks[MAX_BUF_PER_PAGE];
unsigned page_block;
unsigned first_unmapped = blocks_per_page;
struct block_device *bdev = NULL;
int boundary = 0;
sector_t boundary_block = 0;
struct block_device *boundary_bdev = NULL;
int length;
struct buffer_head map_bh;
loff_t i_size = i_size_read(inode);
int ret = 0;
int wr = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
if (page_has_buffers(page)) {
struct buffer_head *head = page_buffers(page);
struct buffer_head *bh = head;
/* If they're all mapped and dirty, do it */
page_block = 0;
do {
BUG_ON(buffer_locked(bh));
if (!buffer_mapped(bh)) {
/*
* unmapped dirty buffers are created by
* __set_page_dirty_buffers -> mmapped data
*/
if (buffer_dirty(bh))
goto confused;
if (first_unmapped == blocks_per_page)
first_unmapped = page_block;
continue;
}
if (first_unmapped != blocks_per_page)
goto confused; /* hole -> non-hole */
if (!buffer_dirty(bh) || !buffer_uptodate(bh))
goto confused;
if (page_block) {
if (bh->b_blocknr != blocks[page_block-1] + 1)
goto confused;
}
blocks[page_block++] = bh->b_blocknr;
boundary = buffer_boundary(bh);
if (boundary) {
boundary_block = bh->b_blocknr;
boundary_bdev = bh->b_bdev;
}
bdev = bh->b_bdev;
} while ((bh = bh->b_this_page) != head);
if (first_unmapped)
goto page_is_mapped;
/*
* Page has buffers, but they are all unmapped. The page was
* created by pagein or read over a hole which was handled by
* block_read_full_page(). If this address_space is also
* using mpage_readpages then this can rarely happen.
*/
goto confused;
}
/*
* The page has no buffers: map it to disk
*/
BUG_ON(!PageUptodate(page));
block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
last_block = (i_size - 1) >> blkbits;
map_bh.b_page = page;
for (page_block = 0; page_block < blocks_per_page; ) {
map_bh.b_state = 0;
map_bh.b_size = 1 << blkbits;
if (mpd->get_block(inode, block_in_file, &map_bh, 1))
goto confused;
if (buffer_new(&map_bh))
unmap_underlying_metadata(map_bh.b_bdev,
map_bh.b_blocknr);
if (buffer_boundary(&map_bh)) {
boundary_block = map_bh.b_blocknr;
boundary_bdev = map_bh.b_bdev;
}
if (page_block) {
if (map_bh.b_blocknr != blocks[page_block-1] + 1)
goto confused;
}
blocks[page_block++] = map_bh.b_blocknr;
boundary = buffer_boundary(&map_bh);
bdev = map_bh.b_bdev;
if (block_in_file == last_block)
break;
block_in_file++;
}
BUG_ON(page_block == 0);
first_unmapped = page_block;
page_is_mapped:
end_index = i_size >> PAGE_CACHE_SHIFT;
if (page->index >= end_index) {
/*
* The page straddles i_size. It must be zeroed out on each
* and every writepage invocation because it may be mmapped.
* "A file is mapped in multiples of the page size. For a file
* that is not a multiple of the page size, the remaining memory
* is zeroed when mapped, and writes to that region are not
* written out to the file."
*/
unsigned offset = i_size & (PAGE_CACHE_SIZE - 1);
if (page->index > end_index || !offset)
goto confused;
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
}
/*
* This page will go to BIO. Do we need to send this BIO off first?
*/
if (bio && mpd->last_block_in_bio != blocks[0] - 1)
bio = mpage_bio_submit(wr, bio);
alloc_new:
if (bio == NULL) {
if (first_unmapped == blocks_per_page) {
if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
page, wbc)) {
clean_buffers(page, first_unmapped);
goto out;
}
}
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
if (bio == NULL)
goto confused;
wbc_init_bio(wbc, bio);
}
/*
* Must try to add the page before marking the buffer clean or
* the confused fail path above (OOM) will be very confused when
* it finds all bh marked clean (i.e. it will not write anything)
*/
wbc_account_io(wbc, page, PAGE_SIZE);
length = first_unmapped << blkbits;
if (bio_add_page(bio, page, length, 0) < length) {
bio = mpage_bio_submit(wr, bio);
goto alloc_new;
}
clean_buffers(page, first_unmapped);
BUG_ON(PageWriteback(page));
set_page_writeback(page);
unlock_page(page);
if (boundary || (first_unmapped != blocks_per_page)) {
bio = mpage_bio_submit(wr, bio);
if (boundary_block) {
write_boundary_block(boundary_bdev,
boundary_block, 1 << blkbits);
}
} else {
mpd->last_block_in_bio = blocks[blocks_per_page - 1];
}
goto out;
confused:
if (bio)
bio = mpage_bio_submit(wr, bio);
if (mpd->use_writepage) {
ret = mapping->a_ops->writepage(page, wbc);
} else {
ret = -EAGAIN;
goto out;
}
/*
* The caller has a ref on the inode, so *mapping is stable
*/
mapping_set_error(mapping, ret);
out:
mpd->bio = bio;
return ret;
}
/**
* mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
* @mapping: address space structure to write
* @wbc: subtract the number of written pages from *@wbc->nr_to_write
* @get_block: the filesystem's block mapper function.
* If this is NULL then use a_ops->writepage. Otherwise, go
* direct-to-BIO.
*
* This is a library function, which implements the writepages()
* address_space_operation.
*
* If a page is already under I/O, generic_writepages() skips it, even
* if it's dirty. This is desirable behaviour for memory-cleaning writeback,
* but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
* and msync() need to guarantee that all the data which was dirty at the time
* the call was made get new I/O started against them. If wbc->sync_mode is
* WB_SYNC_ALL then we were called for data integrity and we must wait for
* existing IO to complete.
*/
int
mpage_writepages(struct address_space *mapping,
struct writeback_control *wbc, get_block_t get_block)
{
struct blk_plug plug;
int ret;
blk_start_plug(&plug);
if (!get_block)
ret = generic_writepages(mapping, wbc);
else {
struct mpage_data mpd = {
.bio = NULL,
.last_block_in_bio = 0,
.get_block = get_block,
.use_writepage = 1,
};
ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
if (mpd.bio) {
int wr = (wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC : WRITE);
mpage_bio_submit(wr, mpd.bio);
}
}
blk_finish_plug(&plug);
return ret;
}
EXPORT_SYMBOL(mpage_writepages);
int mpage_writepage(struct page *page, get_block_t get_block,
struct writeback_control *wbc)
{
struct mpage_data mpd = {
.bio = NULL,
.last_block_in_bio = 0,
.get_block = get_block,
.use_writepage = 0,
};
int ret = __mpage_writepage(page, wbc, &mpd);
if (mpd.bio) {
int wr = (wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC : WRITE);
mpage_bio_submit(wr, mpd.bio);
}
return ret;
}
EXPORT_SYMBOL(mpage_writepage);