diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc index 4fe677ed1305..ab49b9ac3bcb 100644 --- a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc +++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc @@ -83,3 +83,11 @@ KernelVersion: 4.7 Contact: Mathieu Poirier Description: (R) Indicates the capabilities of the Coresight TMC. The value is read directly from the DEVID register, 0xFC8, + +What: /sys/bus/coresight/devices/.tmc/buffer_size +Date: December 2018 +KernelVersion: 4.19 +Contact: Mathieu Poirier +Description: (RW) Size of the trace buffer for TMC-ETR when used in SYSFS + mode. Writable only for TMC-ETR configurations. The value + should be aligned to the kernel pagesize. diff --git a/Documentation/ABI/testing/sysfs-bus-mei b/Documentation/ABI/testing/sysfs-bus-mei index 6bd45346ac7e..3f8701e8fa24 100644 --- a/Documentation/ABI/testing/sysfs-bus-mei +++ b/Documentation/ABI/testing/sysfs-bus-mei @@ -4,7 +4,7 @@ KernelVersion: 3.10 Contact: Samuel Ortiz linux-mei@linux.intel.com Description: Stores the same MODALIAS value emitted by uevent - Format: mei::: + Format: mei::: What: /sys/bus/mei/devices/.../name Date: May 2015 diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index ee39acacf6f8..335595a79866 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -7,6 +7,13 @@ Description: The name of devfreq object denoted as ... is same as the name of device using devfreq. +What: /sys/class/devfreq/.../name +Date: November 2019 +Contact: Chanwoo Choi +Description: + The /sys/class/devfreq/.../name shows the name of device + of the corresponding devfreq object. + What: /sys/class/devfreq/.../governor Date: September 2011 Contact: MyungJoo Ham diff --git a/Documentation/ABI/testing/sysfs-class-gnss b/Documentation/ABI/testing/sysfs-class-gnss new file mode 100644 index 000000000000..2467b6900eae --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-gnss @@ -0,0 +1,15 @@ +What: /sys/class/gnss/gnssN/type +Date: May 2018 +KernelVersion: 4.18 +Contact: Johan Hovold +Description: + The GNSS receiver type. The currently identified types reflect + the protocol(s) supported by the receiver: + + "NMEA" NMEA 0183 + "SiRF" SiRF Binary + "UBX" UBX + + Note that also non-"NMEA" type receivers typically support a + subset of NMEA 0183 with vendor extensions (e.g. to allow + switching to a vendor protocol). diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 645687b1870d..9ebca6a750f3 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -381,6 +381,8 @@ What: /sys/devices/system/cpu/vulnerabilities /sys/devices/system/cpu/vulnerabilities/spec_store_bypass /sys/devices/system/cpu/vulnerabilities/l1tf /sys/devices/system/cpu/vulnerabilities/mds + /sys/devices/system/cpu/vulnerabilities/tsx_async_abort + /sys/devices/system/cpu/vulnerabilities/itlb_multihit Date: January 2018 Contact: Linux kernel mailing list Description: Information about CPU vulnerabilities diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 12edef14db63..e0a8a2c2c0c1 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -1,260 +1,320 @@ What: /sys/fs/f2fs//gc_max_sleep_time Date: July 2013 Contact: "Namjae Jeon" -Description: - Controls the maximun sleep time for gc_thread. Time - is in milliseconds. +Description: Controls the maximum sleep time for gc_thread. Time + is in milliseconds. What: /sys/fs/f2fs//gc_min_sleep_time Date: July 2013 Contact: "Namjae Jeon" -Description: - Controls the minimum sleep time for gc_thread. Time - is in milliseconds. +Description: Controls the minimum sleep time for gc_thread. Time + is in milliseconds. What: /sys/fs/f2fs//gc_no_gc_sleep_time Date: July 2013 Contact: "Namjae Jeon" -Description: - Controls the default sleep time for gc_thread. Time - is in milliseconds. +Description: Controls the default sleep time for gc_thread. Time + is in milliseconds. What: /sys/fs/f2fs//gc_idle Date: July 2013 Contact: "Namjae Jeon" -Description: - Controls the victim selection policy for garbage collection. +Description: Controls the victim selection policy for garbage collection. + Setting gc_idle = 0(default) will disable this option. Setting + gc_idle = 1 will select the Cost Benefit approach & setting + gc_idle = 2 will select the greedy approach. What: /sys/fs/f2fs//reclaim_segments Date: October 2013 Contact: "Jaegeuk Kim" +Description: This parameter controls the number of prefree segments to be + reclaimed. If the number of prefree segments is larger than + the number of segments in the proportion to the percentage + over total volume size, f2fs tries to conduct checkpoint to + reclaim the prefree segments to free segments. + By default, 5% over total # of segments. + +What: /sys/fs/f2fs//main_blkaddr +Date: November 2019 +Contact: "Ramon Pantin" Description: - Controls the issue rate of segment discard commands. + Shows first block address of MAIN area. What: /sys/fs/f2fs//ipu_policy Date: November 2013 Contact: "Jaegeuk Kim" -Description: - Controls the in-place-update policy. +Description: Controls the in-place-update policy. + updates in f2fs. User can set: + 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR, + 0x04: F2FS_IPU_UTIL, 0x08: F2FS_IPU_SSR_UTIL, + 0x10: F2FS_IPU_FSYNC, 0x20: F2FS_IPU_ASYNC, + 0x40: F2FS_IPU_NOCACHE. + Refer segment.h for details. What: /sys/fs/f2fs//min_ipu_util Date: November 2013 Contact: "Jaegeuk Kim" -Description: - Controls the FS utilization condition for the in-place-update - policies. +Description: Controls the FS utilization condition for the in-place-update + policies. It is used by F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies. What: /sys/fs/f2fs//min_fsync_blocks Date: September 2014 Contact: "Jaegeuk Kim" -Description: - Controls the dirty page count condition for the in-place-update - policies. +Description: Controls the dirty page count condition for the in-place-update + policies. What: /sys/fs/f2fs//min_seq_blocks Date: August 2018 Contact: "Jaegeuk Kim" -Description: - Controls the dirty page count condition for batched sequential - writes in ->writepages. - +Description: Controls the dirty page count condition for batched sequential + writes in writepages. What: /sys/fs/f2fs//min_hot_blocks Date: March 2017 Contact: "Jaegeuk Kim" -Description: - Controls the dirty page count condition for redefining hot data. +Description: Controls the dirty page count condition for redefining hot data. What: /sys/fs/f2fs//min_ssr_sections Date: October 2017 Contact: "Chao Yu" -Description: - Controls the fee section threshold to trigger SSR allocation. +Description: Controls the free section threshold to trigger SSR allocation. + If this is large, SSR mode will be enabled early. What: /sys/fs/f2fs//max_small_discards Date: November 2013 Contact: "Jaegeuk Kim" -Description: - Controls the issue rate of small discard commands. +Description: Controls the issue rate of discard commands that consist of small + blocks less than 2MB. The candidates to be discarded are cached until + checkpoint is triggered, and issued during the checkpoint. + By default, it is disabled with 0. -What: /sys/fs/f2fs//discard_granularity -Date: July 2017 -Contact: "Chao Yu" -Description: - Controls discard granularity of inner discard thread, inner thread +What: /sys/fs/f2fs//discard_granularity +Date: July 2017 +Contact: "Chao Yu" +Description: Controls discard granularity of inner discard thread. Inner thread will not issue discards with size that is smaller than granularity. - The unit size is one block, now only support configuring in range - of [1, 512]. + The unit size is one block(4KB), now only support configuring + in range of [1, 512]. Default value is 4(=16KB). -What: /sys/fs/f2fs//umount_discard_timeout -Date: January 2019 -Contact: "Jaegeuk Kim" -Description: - Set timeout to issue discard commands during umount. - Default: 5 secs +What: /sys/fs/f2fs//umount_discard_timeout +Date: January 2019 +Contact: "Jaegeuk Kim" +Description: Set timeout to issue discard commands during umount. + Default: 5 secs What: /sys/fs/f2fs//max_victim_search Date: January 2014 Contact: "Jaegeuk Kim" -Description: - Controls the number of trials to find a victim segment. +Description: Controls the number of trials to find a victim segment + when conducting SSR and cleaning operations. The default value + is 4096 which covers 8GB block address range. What: /sys/fs/f2fs//migration_granularity Date: October 2018 Contact: "Chao Yu" -Description: - Controls migration granularity of garbage collection on large - section, it can let GC move partial segment{s} of one section - in one GC cycle, so that dispersing heavy overhead GC to - multiple lightweight one. +Description: Controls migration granularity of garbage collection on large + section, it can let GC move partial segment{s} of one section + in one GC cycle, so that dispersing heavy overhead GC to + multiple lightweight one. What: /sys/fs/f2fs//dir_level Date: March 2014 Contact: "Jaegeuk Kim" -Description: - Controls the directory level for large directory. +Description: Controls the directory level for large directory. If a + directory has a number of files, it can reduce the file lookup + latency by increasing this dir_level value. Otherwise, it + needs to decrease this value to reduce the space overhead. + The default value is 0. What: /sys/fs/f2fs//ram_thresh Date: March 2014 Contact: "Jaegeuk Kim" -Description: - Controls the memory footprint used by f2fs. +Description: Controls the memory footprint used by free nids and cached + nat entries. By default, 1 is set, which indicates + 10 MB / 1 GB RAM. What: /sys/fs/f2fs//batched_trim_sections Date: February 2015 Contact: "Jaegeuk Kim" -Description: - Controls the trimming rate in batch mode. - +Description: Controls the trimming rate in batch mode. + What: /sys/fs/f2fs//cp_interval Date: October 2015 Contact: "Jaegeuk Kim" -Description: - Controls the checkpoint timing. +Description: Controls the checkpoint timing, set to 60 seconds by default. What: /sys/fs/f2fs//idle_interval Date: January 2016 Contact: "Jaegeuk Kim" -Description: - Controls the idle timing for all paths other than - discard and gc path. +Description: Controls the idle timing of system, if there is no FS operation + during given interval. + Set to 5 seconds by default. What: /sys/fs/f2fs//discard_idle_interval Date: September 2018 Contact: "Chao Yu" Contact: "Sahitya Tummala" -Description: - Controls the idle timing for discard path. +Description: Controls the idle timing of discard thread given + this time interval. + Default is 5 secs. What: /sys/fs/f2fs//gc_idle_interval Date: September 2018 Contact: "Chao Yu" Contact: "Sahitya Tummala" -Description: - Controls the idle timing for gc path. +Description: Controls the idle timing for gc path. Set to 5 seconds by default. What: /sys/fs/f2fs//iostat_enable Date: August 2017 Contact: "Chao Yu" -Description: - Controls to enable/disable IO stat. +Description: Controls to enable/disable IO stat. What: /sys/fs/f2fs//ra_nid_pages Date: October 2015 Contact: "Chao Yu" -Description: - Controls the count of nid pages to be readaheaded. +Description: Controls the count of nid pages to be readaheaded. + When building free nids, F2FS reads NAT blocks ahead for + speed up. Default is 0. What: /sys/fs/f2fs//dirty_nats_ratio Date: January 2016 Contact: "Chao Yu" -Description: - Controls dirty nat entries ratio threshold, if current - ratio exceeds configured threshold, checkpoint will - be triggered for flushing dirty nat entries. +Description: Controls dirty nat entries ratio threshold, if current + ratio exceeds configured threshold, checkpoint will + be triggered for flushing dirty nat entries. What: /sys/fs/f2fs//lifetime_write_kbytes Date: January 2016 Contact: "Shuoran Liu" -Description: - Shows total written kbytes issued to disk. +Description: Shows total written kbytes issued to disk. What: /sys/fs/f2fs//feature Date: July 2017 Contact: "Jaegeuk Kim" -Description: - Shows all enabled features in current device. +Description: Shows all enabled features in current device. What: /sys/fs/f2fs//inject_rate Date: May 2016 Contact: "Sheng Yong" -Description: - Controls the injection rate. +Description: Controls the injection rate of arbitrary faults. What: /sys/fs/f2fs//inject_type Date: May 2016 Contact: "Sheng Yong" -Description: - Controls the injection type. +Description: Controls the injection type of arbitrary faults. + +What: /sys/fs/f2fs//dirty_segments +Date: October 2017 +Contact: "Jaegeuk Kim" +Description: Shows the number of dirty segments. What: /sys/fs/f2fs//reserved_blocks Date: June 2017 Contact: "Chao Yu" -Description: - Controls target reserved blocks in system, the threshold - is soft, it could exceed current available user space. +Description: Controls target reserved blocks in system, the threshold + is soft, it could exceed current available user space. What: /sys/fs/f2fs//current_reserved_blocks Date: October 2017 Contact: "Yunlong Song" Contact: "Chao Yu" -Description: - Shows current reserved blocks in system, it may be temporarily - smaller than target_reserved_blocks, but will gradually - increase to target_reserved_blocks when more free blocks are - freed by user later. +Description: Shows current reserved blocks in system, it may be temporarily + smaller than target_reserved_blocks, but will gradually + increase to target_reserved_blocks when more free blocks are + freed by user later. What: /sys/fs/f2fs//gc_urgent Date: August 2017 Contact: "Jaegeuk Kim" -Description: - Do background GC agressively +Description: Do background GC agressively when set. When gc_urgent = 1, + background thread starts to do GC by given gc_urgent_sleep_time + interval. It is set to 0 by default. What: /sys/fs/f2fs//gc_urgent_sleep_time Date: August 2017 Contact: "Jaegeuk Kim" -Description: - Controls sleep time of GC urgent mode +Description: Controls sleep time of GC urgent mode. Set to 500ms by default. What: /sys/fs/f2fs//readdir_ra Date: November 2017 Contact: "Sheng Yong" -Description: - Controls readahead inode block in readdir. +Description: Controls readahead inode block in readdir. Enabled by default. + +What: /sys/fs/f2fs//gc_pin_file_thresh +Date: January 2018 +Contact: Jaegeuk Kim +Description: This indicates how many GC can be failed for the pinned + file. If it exceeds this, F2FS doesn't guarantee its pinning + state. 2048 trials is set by default. What: /sys/fs/f2fs//extension_list Date: Feburary 2018 Contact: "Chao Yu" -Description: - Used to control configure extension list: - - Query: cat /sys/fs/f2fs//extension_list - - Add: echo '[h/c]extension' > /sys/fs/f2fs//extension_list - - Del: echo '[h/c]!extension' > /sys/fs/f2fs//extension_list - - [h] means add/del hot file extension - - [c] means add/del cold file extension +Description: Used to control configure extension list: + - Query: cat /sys/fs/f2fs//extension_list + - Add: echo '[h/c]extension' > /sys/fs/f2fs//extension_list + - Del: echo '[h/c]!extension' > /sys/fs/f2fs//extension_list + - [h] means add/del hot file extension + - [c] means add/del cold file extension What: /sys/fs/f2fs//unusable Date April 2019 Contact: "Daniel Rosenberg" -Description: - If checkpoint=disable, it displays the number of blocks that are unusable. - If checkpoint=enable it displays the enumber of blocks that would be unusable - if checkpoint=disable were to be set. +Description: If checkpoint=disable, it displays the number of blocks that + are unusable. + If checkpoint=enable it displays the enumber of blocks that + would be unusable if checkpoint=disable were to be set. What: /sys/fs/f2fs//encoding Date July 2019 Contact: "Daniel Rosenberg" -Description: - Displays name and version of the encoding set for the filesystem. - If no encoding is set, displays (none) +Description: Displays name and version of the encoding set for the filesystem. + If no encoding is set, displays (none) + +What: /sys/fs/f2fs//free_segments +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of free segments in disk. + +What: /sys/fs/f2fs//cp_foreground_calls +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of checkpoint operations performed on demand. Available when + CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//cp_background_calls +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of checkpoint operations performed in the background to + free segments. Available when CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//gc_foreground_calls +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of garbage collection operations performed on demand. + Available when CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//gc_background_calls +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of garbage collection operations triggered in background. + Available when CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//moved_blocks_foreground +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of blocks moved by garbage collection in foreground. + Available when CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//moved_blocks_background +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Number of blocks moved by garbage collection in background. + Available when CONFIG_F2FS_STAT_FS=y. + +What: /sys/fs/f2fs//avg_vblocks +Date: September 2019 +Contact: "Hridya Valsaraju" +Description: Average number of valid blocks. + Available when CONFIG_F2FS_STAT_FS=y. diff --git a/Documentation/ABI/testing/sysfs-kernel-ion b/Documentation/ABI/testing/sysfs-kernel-ion new file mode 100644 index 000000000000..f57f970574ae --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-ion @@ -0,0 +1,27 @@ +What: /sys/kernel/ion +Date: Dec 2019 +KernelVersion: 4.14.158 +Contact: Suren Baghdasaryan , + Sandeep Patil +Description: + The /sys/kernel/ion directory contains a snapshot of the + internal state of ION memory heaps and pools. +Users: kernel memory tuning tools + +What: /sys/kernel/ion/total_heaps_kb +Date: Dec 2019 +KernelVersion: 4.14.158 +Contact: Suren Baghdasaryan , + Sandeep Patil +Description: + The total_heaps_kb file is read-only and specifies how much + memory in Kb is allocated to ION heaps. + +What: /sys/kernel/ion/total_pools_kb +Date: Dec 2019 +KernelVersion: 4.14.158 +Contact: Suren Baghdasaryan , + Sandeep Patil +Description: + The total_pools_kb file is read-only and specifies how much + memory in Kb is allocated to ION pools. diff --git a/Documentation/admin-guide/dynamic-debug-howto.rst b/Documentation/admin-guide/dynamic-debug-howto.rst index 12278a926370..36b7e740558f 100644 --- a/Documentation/admin-guide/dynamic-debug-howto.rst +++ b/Documentation/admin-guide/dynamic-debug-howto.rst @@ -54,6 +54,9 @@ If you make a mistake with the syntax, the write will fail thus:: /dynamic_debug/control -bash: echo: write error: Invalid argument +Note, for systems without 'debugfs' enabled, the control file can be +found in ``/proc/dynamic_debug/control``. + Viewing Dynamic Debug Behaviour =============================== diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 49311f3da6f2..0795e3c2643f 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -12,3 +12,5 @@ are configurable at compile, boot or run time. spectre l1tf mds + tsx_async_abort + multihit.rst diff --git a/Documentation/admin-guide/hw-vuln/mds.rst b/Documentation/admin-guide/hw-vuln/mds.rst index e3a796c0d3a2..2d19c9f4c1fe 100644 --- a/Documentation/admin-guide/hw-vuln/mds.rst +++ b/Documentation/admin-guide/hw-vuln/mds.rst @@ -265,8 +265,11 @@ time with the option "mds=". The valid arguments for this option are: ============ ============================================================= -Not specifying this option is equivalent to "mds=full". - +Not specifying this option is equivalent to "mds=full". For processors +that are affected by both TAA (TSX Asynchronous Abort) and MDS, +specifying just "mds=off" without an accompanying "tsx_async_abort=off" +will have no effect as the same mitigation is used for both +vulnerabilities. Mitigation selection guide -------------------------- diff --git a/Documentation/admin-guide/hw-vuln/multihit.rst b/Documentation/admin-guide/hw-vuln/multihit.rst new file mode 100644 index 000000000000..ba9988d8bce5 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/multihit.rst @@ -0,0 +1,163 @@ +iTLB multihit +============= + +iTLB multihit is an erratum where some processors may incur a machine check +error, possibly resulting in an unrecoverable CPU lockup, when an +instruction fetch hits multiple entries in the instruction TLB. This can +occur when the page size is changed along with either the physical address +or cache type. A malicious guest running on a virtualized system can +exploit this erratum to perform a denial of service attack. + + +Affected processors +------------------- + +Variations of this erratum are present on most Intel Core and Xeon processor +models. The erratum is not present on: + + - non-Intel processors + + - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont) + + - Intel processors that have the PSCHANGE_MC_NO bit set in the + IA32_ARCH_CAPABILITIES MSR. + + +Related CVEs +------------ + +The following CVE entry is related to this issue: + + ============== ================================================= + CVE-2018-12207 Machine Check Error Avoidance on Page Size Change + ============== ================================================= + + +Problem +------- + +Privileged software, including OS and virtual machine managers (VMM), are in +charge of memory management. A key component in memory management is the control +of the page tables. Modern processors use virtual memory, a technique that creates +the illusion of a very large memory for processors. This virtual space is split +into pages of a given size. Page tables translate virtual addresses to physical +addresses. + +To reduce latency when performing a virtual to physical address translation, +processors include a structure, called TLB, that caches recent translations. +There are separate TLBs for instruction (iTLB) and data (dTLB). + +Under this errata, instructions are fetched from a linear address translated +using a 4 KB translation cached in the iTLB. Privileged software modifies the +paging structure so that the same linear address using large page size (2 MB, 4 +MB, 1 GB) with a different physical address or memory type. After the page +structure modification but before the software invalidates any iTLB entries for +the linear address, a code fetch that happens on the same linear address may +cause a machine-check error which can result in a system hang or shutdown. + + +Attack scenarios +---------------- + +Attacks against the iTLB multihit erratum can be mounted from malicious +guests in a virtualized system. + + +iTLB multihit system information +-------------------------------- + +The Linux kernel provides a sysfs interface to enumerate the current iTLB +multihit status of the system:whether the system is vulnerable and which +mitigations are active. The relevant sysfs file is: + +/sys/devices/system/cpu/vulnerabilities/itlb_multihit + +The possible values in this file are: + +.. list-table:: + + * - Not affected + - The processor is not vulnerable. + * - KVM: Mitigation: Split huge pages + - Software changes mitigate this issue. + * - KVM: Vulnerable + - The processor is vulnerable, but no mitigation enabled + + +Enumeration of the erratum +-------------------------------- + +A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr +and will be set on CPU's which are mitigated against this issue. + + ======================================= =========== =============================== + IA32_ARCH_CAPABILITIES MSR Not present Possibly vulnerable,check model + IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '0' Likely vulnerable,check model + IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '1' Not vulnerable + ======================================= =========== =============================== + + +Mitigation mechanism +------------------------- + +This erratum can be mitigated by restricting the use of large page sizes to +non-executable pages. This forces all iTLB entries to be 4K, and removes +the possibility of multiple hits. + +In order to mitigate the vulnerability, KVM initially marks all huge pages +as non-executable. If the guest attempts to execute in one of those pages, +the page is broken down into 4K pages, which are then marked executable. + +If EPT is disabled or not available on the host, KVM is in control of TLB +flushes and the problematic situation cannot happen. However, the shadow +EPT paging mechanism used by nested virtualization is vulnerable, because +the nested guest can trigger multiple iTLB hits by modifying its own +(non-nested) page tables. For simplicity, KVM will make large pages +non-executable in all shadow paging modes. + +Mitigation control on the kernel command line and KVM - module parameter +------------------------------------------------------------------------ + +The KVM hypervisor mitigation mechanism for marking huge pages as +non-executable can be controlled with a module parameter "nx_huge_pages=". +The kernel command line allows to control the iTLB multihit mitigations at +boot time with the option "kvm.nx_huge_pages=". + +The valid arguments for these options are: + + ========== ================================================================ + force Mitigation is enabled. In this case, the mitigation implements + non-executable huge pages in Linux kernel KVM module. All huge + pages in the EPT are marked as non-executable. + If a guest attempts to execute in one of those pages, the page is + broken down into 4K pages, which are then marked executable. + + off Mitigation is disabled. + + auto Enable mitigation only if the platform is affected and the kernel + was not booted with the "mitigations=off" command line parameter. + This is the default option. + ========== ================================================================ + + +Mitigation selection guide +-------------------------- + +1. No virtualization in use +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + The system is protected by the kernel unconditionally and no further + action is required. + +2. Virtualization with trusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + If the guest comes from a trusted source, you may assume that the guest will + not attempt to maliciously exploit these errata and no further action is + required. + +3. Virtualization with untrusted guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + If the guest comes from an untrusted source, the guest host kernel will need + to apply iTLB multihit mitigation via the kernel command line or kvm + module parameter. diff --git a/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst new file mode 100644 index 000000000000..af6865b822d2 --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst @@ -0,0 +1,279 @@ +.. SPDX-License-Identifier: GPL-2.0 + +TAA - TSX Asynchronous Abort +====================================== + +TAA is a hardware vulnerability that allows unprivileged speculative access to +data which is available in various CPU internal buffers by using asynchronous +aborts within an Intel TSX transactional region. + +Affected processors +------------------- + +This vulnerability only affects Intel processors that support Intel +Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8) +is 0 in the IA32_ARCH_CAPABILITIES MSR. On processors where the MDS_NO bit +(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations +also mitigate against TAA. + +Whether a processor is affected or not can be read out from the TAA +vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`. + +Related CVEs +------------ + +The following CVE entry is related to this TAA issue: + + ============== ===== =================================================== + CVE-2019-11135 TAA TSX Asynchronous Abort (TAA) condition on some + microprocessors utilizing speculative execution may + allow an authenticated user to potentially enable + information disclosure via a side channel with + local access. + ============== ===== =================================================== + +Problem +------- + +When performing store, load or L1 refill operations, processors write +data into temporary microarchitectural structures (buffers). The data in +those buffers can be forwarded to load operations as an optimization. + +Intel TSX is an extension to the x86 instruction set architecture that adds +hardware transactional memory support to improve performance of multi-threaded +software. TSX lets the processor expose and exploit concurrency hidden in an +application due to dynamically avoiding unnecessary synchronization. + +TSX supports atomic memory transactions that are either committed (success) or +aborted. During an abort, operations that happened within the transactional region +are rolled back. An asynchronous abort takes place, among other options, when a +different thread accesses a cache line that is also used within the transactional +region when that access might lead to a data race. + +Immediately after an uncompleted asynchronous abort, certain speculatively +executed loads may read data from those internal buffers and pass it to dependent +operations. This can be then used to infer the value via a cache side channel +attack. + +Because the buffers are potentially shared between Hyper-Threads cross +Hyper-Thread attacks are possible. + +The victim of a malicious actor does not need to make use of TSX. Only the +attacker needs to begin a TSX transaction and raise an asynchronous abort +which in turn potenitally leaks data stored in the buffers. + +More detailed technical information is available in the TAA specific x86 +architecture section: :ref:`Documentation/x86/tsx_async_abort.rst `. + + +Attack scenarios +---------------- + +Attacks against the TAA vulnerability can be implemented from unprivileged +applications running on hosts or guests. + +As for MDS, the attacker has no control over the memory addresses that can +be leaked. Only the victim is responsible for bringing data to the CPU. As +a result, the malicious actor has to sample as much data as possible and +then postprocess it to try to infer any useful information from it. + +A potential attacker only has read access to the data. Also, there is no direct +privilege escalation by using this technique. + + +.. _tsx_async_abort_sys_info: + +TAA system information +----------------------- + +The Linux kernel provides a sysfs interface to enumerate the current TAA status +of mitigated systems. The relevant sysfs file is: + +/sys/devices/system/cpu/vulnerabilities/tsx_async_abort + +The possible values in this file are: + +.. list-table:: + + * - 'Vulnerable' + - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied. + * - 'Vulnerable: Clear CPU buffers attempted, no microcode' + - The system tries to clear the buffers but the microcode might not support the operation. + * - 'Mitigation: Clear CPU buffers' + - The microcode has been updated to clear the buffers. TSX is still enabled. + * - 'Mitigation: TSX disabled' + - TSX is disabled. + * - 'Not affected' + - The CPU is not affected by this issue. + +.. _ucode_needed: + +Best effort mitigation mode +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If the processor is vulnerable, but the availability of the microcode-based +mitigation mechanism is not advertised via CPUID the kernel selects a best +effort mitigation mode. This mode invokes the mitigation instructions +without a guarantee that they clear the CPU buffers. + +This is done to address virtualization scenarios where the host has the +microcode update applied, but the hypervisor is not yet updated to expose the +CPUID to the guest. If the host has updated microcode the protection takes +effect; otherwise a few CPU cycles are wasted pointlessly. + +The state in the tsx_async_abort sysfs file reflects this situation +accordingly. + + +Mitigation mechanism +-------------------- + +The kernel detects the affected CPUs and the presence of the microcode which is +required. If a CPU is affected and the microcode is available, then the kernel +enables the mitigation by default. + + +The mitigation can be controlled at boot time via a kernel command line option. +See :ref:`taa_mitigation_control_command_line`. + +.. _virt_mechanism: + +Virtualization mitigation +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Affected systems where the host has TAA microcode and TAA is mitigated by +having disabled TSX previously, are not vulnerable regardless of the status +of the VMs. + +In all other cases, if the host either does not have the TAA microcode or +the kernel is not mitigated, the system might be vulnerable. + + +.. _taa_mitigation_control_command_line: + +Mitigation control on the kernel command line +--------------------------------------------- + +The kernel command line allows to control the TAA mitigations at boot time with +the option "tsx_async_abort=". The valid arguments for this option are: + + ============ ============================================================= + off This option disables the TAA mitigation on affected platforms. + If the system has TSX enabled (see next parameter) and the CPU + is affected, the system is vulnerable. + + full TAA mitigation is enabled. If TSX is enabled, on an affected + system it will clear CPU buffers on ring transitions. On + systems which are MDS-affected and deploy MDS mitigation, + TAA is also mitigated. Specifying this option on those + systems will have no effect. + + full,nosmt The same as tsx_async_abort=full, with SMT disabled on + vulnerable CPUs that have TSX enabled. This is the complete + mitigation. When TSX is disabled, SMT is not disabled because + CPU is not vulnerable to cross-thread TAA attacks. + ============ ============================================================= + +Not specifying this option is equivalent to "tsx_async_abort=full". For +processors that are affected by both TAA and MDS, specifying just +"tsx_async_abort=off" without an accompanying "mds=off" will have no +effect as the same mitigation is used for both vulnerabilities. + +The kernel command line also allows to control the TSX feature using the +parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used +to control the TSX feature and the enumeration of the TSX feature bits (RTM +and HLE) in CPUID. + +The valid options are: + + ============ ============================================================= + off Disables TSX on the system. + + Note that this option takes effect only on newer CPUs which are + not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 + and which get the new IA32_TSX_CTRL MSR through a microcode + update. This new MSR allows for the reliable deactivation of + the TSX functionality. + + on Enables TSX. + + Although there are mitigations for all known security + vulnerabilities, TSX has been known to be an accelerator for + several previous speculation-related CVEs, and so there may be + unknown security risks associated with leaving it enabled. + + auto Disables TSX if X86_BUG_TAA is present, otherwise enables TSX + on the system. + ============ ============================================================= + +Not specifying this option is equivalent to "tsx=off". + +The following combinations of the "tsx_async_abort" and "tsx" are possible. For +affected platforms tsx=auto is equivalent to tsx=off and the result will be: + + ========= ========================== ========================================= + tsx=on tsx_async_abort=full The system will use VERW to clear CPU + buffers. Cross-thread attacks are still + possible on SMT machines. + tsx=on tsx_async_abort=full,nosmt As above, cross-thread attacks on SMT + mitigated. + tsx=on tsx_async_abort=off The system is vulnerable. + tsx=off tsx_async_abort=full TSX might be disabled if microcode + provides a TSX control MSR. If so, + system is not vulnerable. + tsx=off tsx_async_abort=full,nosmt Ditto + tsx=off tsx_async_abort=off ditto + ========= ========================== ========================================= + + +For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU +buffers. For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0) +"tsx" command line argument has no effect. + +For the affected platforms below table indicates the mitigation status for the +combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO +and TSX_CTRL_MSR. + + ======= ========= ============= ======================================== + MDS_NO MD_CLEAR TSX_CTRL_MSR Status + ======= ========= ============= ======================================== + 0 0 0 Vulnerable (needs microcode) + 0 1 0 MDS and TAA mitigated via VERW + 1 1 0 MDS fixed, TAA vulnerable if TSX enabled + because MD_CLEAR has no meaning and + VERW is not guaranteed to clear buffers + 1 X 1 MDS fixed, TAA can be mitigated by + VERW or TSX_CTRL_MSR + ======= ========= ============= ======================================== + +Mitigation selection guide +-------------------------- + +1. Trusted userspace and guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If all user space applications are from a trusted source and do not execute +untrusted code which is supplied externally, then the mitigation can be +disabled. The same applies to virtualized environments with trusted guests. + + +2. Untrusted userspace and guests +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If there are untrusted applications or guests on the system, enabling TSX +might allow a malicious actor to leak data from the host or from other +processes running on the same physical core. + +If the microcode is available and the TSX is disabled on the host, attacks +are prevented in a virtualized environment as well, even if the VMs do not +explicitly enable the mitigation. + + +.. _taa_default_mitigations: + +Default mitigations +------------------- + +The kernel's default action for vulnerable processors is: + + - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off). diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 551b4cc8fc5c..20ff4fc30c71 100755 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -137,6 +137,10 @@ dynamic table installation which will install SSDT tables to /sys/firmware/acpi/tables/dynamic. + acpi_no_watchdog [HW,ACPI,WDT] + Ignore the ACPI-based watchdog interface (WDAT) and let + a native driver control the watchdog device instead. + acpi_rsdp= [ACPI,EFI,KEXEC] Pass the RSDP address to the kernel, mostly used on machines running EFI runtime service to boot the @@ -1854,6 +1858,12 @@ Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y, the default is off. + kpti= [ARM64] Control page table isolation of user + and kernel address spaces. + Default: enabled on cores which need mitigation. + 0: force disabled + 1: force enabled + kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs. Default is 0 (don't ignore, but inject #GP) @@ -1861,6 +1871,25 @@ KVM MMU at runtime. Default is 0 (off) + kvm.nx_huge_pages= + [KVM] Controls the software workaround for the + X86_BUG_ITLB_MULTIHIT bug. + force : Always deploy workaround. + off : Never deploy workaround. + auto : Deploy workaround based on the presence of + X86_BUG_ITLB_MULTIHIT. + + Default is 'auto'. + + If the software workaround is enabled for the host, + guests do need not to enable it for nested guests. + + kvm.nx_huge_pages_recovery_ratio= + [KVM] Controls how many 4KiB pages are periodically zapped + back to huge pages. 0 disables the recovery, otherwise if + the value is N KVM will zap 1/Nth of the 4KiB pages every + minute. The default is 60. + kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM. Default is 1 (enabled) @@ -2223,6 +2252,38 @@ Format: , Specifies range of consoles to be captured by the MDA. + mds= [X86,INTEL] + Control mitigation for the Micro-architectural Data + Sampling (MDS) vulnerability. + + Certain CPUs are vulnerable to an exploit against CPU + internal buffers which can forward information to a + disclosure gadget under certain conditions. + + In vulnerable processors, the speculatively + forwarded data can be used in a cache side channel + attack, to access data to which the attacker does + not have direct access. + + This parameter controls the MDS mitigation. The + options are: + + full - Enable MDS mitigation on vulnerable CPUs + full,nosmt - Enable MDS mitigation and disable + SMT on vulnerable CPUs + off - Unconditionally disable MDS mitigation + + On TAA-affected machines, mds=off can be prevented by + an active TAA mitigation as both vulnerabilities are + mitigated with the same mechanism so in order to disable + this mitigation, you need to specify tsx_async_abort=off + too. + + Not specifying this option is equivalent to + mds=full. + + For details see: Documentation/admin-guide/hw-vuln/mds.rst + mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory Amount of memory to be used when the kernel is not able to see the whole system memory or for test. @@ -2372,8 +2433,8 @@ http://repo.or.cz/w/linux-2.6/mini2440.git mitigations= - [X86,PPC,S390] Control optional mitigations for CPU - vulnerabilities. This is a set of curated, + [X86,PPC,S390,ARM64] Control optional mitigations for + CPU vulnerabilities. This is a set of curated, arch-independent options, each of which is an aggregation of existing arch-specific options. @@ -2382,14 +2443,23 @@ improves system performance, but it may also expose users to several CPU vulnerabilities. Equivalent to: nopti [X86,PPC] + kpti=0 [ARM64] nospectre_v1 [PPC] nobp=0 [S390] nospectre_v1 [X86] - nospectre_v2 [X86,PPC,S390] + nospectre_v2 [X86,PPC,S390,ARM64] spectre_v2_user=off [X86] spec_store_bypass_disable=off [X86,PPC] + ssbd=force-off [ARM64] l1tf=off [X86] mds=off [X86] + tsx_async_abort=off [X86] + kvm.nx_huge_pages=off [X86] + + Exceptions: + This does not have any effect on + kvm.nx_huge_pages when + kvm.nx_huge_pages=force. auto (default) Mitigate all CPU vulnerabilities, but leave SMT @@ -2405,6 +2475,7 @@ be fully mitigated, even if it means losing SMT. Equivalent to: l1tf=flush,nosmt [X86] mds=full,nosmt [X86] + tsx_async_abort=full,nosmt [X86] mminit_loglevel= [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this @@ -2728,10 +2799,10 @@ (bounds check bypass). With this option data leaks are possible in the system. - nospectre_v2 [X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2 - (indirect branch prediction) vulnerability. System may - allow data leaks with this option, which is equivalent - to spectre_v2=off. + nospectre_v2 [X86,PPC_FSL_BOOK3E,ARM64] Disable all mitigations for + the Spectre variant 2 (indirect branch prediction) + vulnerability. System may allow data leaks with this + option. nospec_store_bypass_disable [HW] Disable all mitigations for the Speculative Store Bypass vulnerability @@ -4490,6 +4561,76 @@ platforms where RDTSC is slow and this accounting can add overhead. + tsx= [X86] Control Transactional Synchronization + Extensions (TSX) feature in Intel processors that + support TSX control. + + This parameter controls the TSX feature. The options are: + + on - Enable TSX on the system. Although there are + mitigations for all known security vulnerabilities, + TSX has been known to be an accelerator for + several previous speculation-related CVEs, and + so there may be unknown security risks associated + with leaving it enabled. + + off - Disable TSX on the system. (Note that this + option takes effect only on newer CPUs which are + not vulnerable to MDS, i.e., have + MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get + the new IA32_TSX_CTRL MSR through a microcode + update. This new MSR allows for the reliable + deactivation of the TSX functionality.) + + auto - Disable TSX if X86_BUG_TAA is present, + otherwise enable TSX on the system. + + Not specifying this option is equivalent to tsx=off. + + See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst + for more details. + + tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async + Abort (TAA) vulnerability. + + Similar to Micro-architectural Data Sampling (MDS) + certain CPUs that support Transactional + Synchronization Extensions (TSX) are vulnerable to an + exploit against CPU internal buffers which can forward + information to a disclosure gadget under certain + conditions. + + In vulnerable processors, the speculatively forwarded + data can be used in a cache side channel attack, to + access data to which the attacker does not have direct + access. + + This parameter controls the TAA mitigation. The + options are: + + full - Enable TAA mitigation on vulnerable CPUs + if TSX is enabled. + + full,nosmt - Enable TAA mitigation and disable SMT on + vulnerable CPUs. If TSX is disabled, SMT + is not disabled because CPU is not + vulnerable to cross-thread TAA attacks. + off - Unconditionally disable TAA mitigation + + On MDS-affected machines, tsx_async_abort=off can be + prevented by an active MDS mitigation as both vulnerabilities + are mitigated with the same mechanism so in order to disable + this mitigation, you need to specify mds=off too. + + Not specifying this option is equivalent to + tsx_async_abort=full. On CPUs which are MDS affected + and deploy MDS mitigation, TAA mitigation is not + required and doesn't provide any additional + mitigation. + + For details see: + Documentation/admin-guide/hw-vuln/tsx_async_abort.rst + turbografx.map[2|3]= [HW,JOY] TurboGraFX parallel port interface Format: @@ -4575,13 +4716,13 @@ Flags is a set of characters, each corresponding to a common usb-storage quirk flag as follows: a = SANE_SENSE (collect more than 18 bytes - of sense data); + of sense data, not on uas); b = BAD_SENSE (don't collect more than 18 - bytes of sense data); + bytes of sense data, not on uas); c = FIX_CAPACITY (decrease the reported device capacity by one sector); d = NO_READ_DISC_INFO (don't use - READ_DISC_INFO command); + READ_DISC_INFO command, not on uas); e = NO_READ_CAPACITY_16 (don't use READ_CAPACITY_16 command); f = NO_REPORT_OPCODES (don't use report opcodes @@ -4596,17 +4737,18 @@ j = NO_REPORT_LUNS (don't use report luns command, uas only); l = NOT_LOCKABLE (don't try to lock and - unlock ejectable media); + unlock ejectable media, not on uas); m = MAX_SECTORS_64 (don't transfer more - than 64 sectors = 32 KB at a time); + than 64 sectors = 32 KB at a time, + not on uas); n = INITIAL_READ10 (force a retry of the - initial READ(10) command); + initial READ(10) command, not on uas); o = CAPACITY_OK (accept the capacity - reported by the device); + reported by the device, not on uas); p = WRITE_CACHE (the device cache is ON - by default); + by default, not on uas); r = IGNORE_RESIDUE (the device reports - bogus residue values); + bogus residue values, not on uas); s = SINGLE_LUN (the device has only one Logical Unit); t = NO_ATA_1X (don't allow ATA(12) and ATA(16) @@ -4615,7 +4757,8 @@ w = NO_WP_DETECT (don't test whether the medium is write-protected). y = ALWAYS_SYNC (issue a SYNCHRONIZE_CACHE - even if the device claims no cache) + even if the device claims no cache, + not on uas) Example: quirks=0419:aaf5:rl,0421:0433:rc user_debug= [KNL,ARM] @@ -4860,6 +5003,10 @@ the unplug protocol never -- do not unplug even if version check succeeds + xen_legacy_crash [X86,XEN] + Crash from Xen panic notifier, without executing late + panic() code such as dumping handler. + xen_nopvspin [X86,XEN] Disables the ticketlock slowpath using Xen PV optimizations. diff --git a/Documentation/arm64/cpu-feature-registers.txt b/Documentation/arm64/cpu-feature-registers.txt index 011ddfc1e570..6aa3558163c5 100644 --- a/Documentation/arm64/cpu-feature-registers.txt +++ b/Documentation/arm64/cpu-feature-registers.txt @@ -111,6 +111,9 @@ infrastructure: | Name | bits | visible | |--------------------------------------------------| | RES0 | [63-48] | n | + | TS | [55-52] | y | + |--------------------------------------------------| + | FHM | [51-48] | y | |--------------------------------------------------| | DP | [47-44] | y | |--------------------------------------------------| @@ -133,8 +136,6 @@ infrastructure: | SHA1 | [11-8] | y | |--------------------------------------------------| | AES | [7-4] | y | - |--------------------------------------------------| - | RES0 | [3-0] | n | x--------------------------------------------------x @@ -142,7 +143,9 @@ infrastructure: x--------------------------------------------------x | Name | bits | visible | |--------------------------------------------------| - | RES0 | [63-28] | n | + | DIT | [51-48] | y | + |--------------------------------------------------| + | SVE | [35-32] | y | |--------------------------------------------------| | GIC | [27-24] | n | |--------------------------------------------------| @@ -193,6 +196,14 @@ infrastructure: | DPB | [3-0] | y | x--------------------------------------------------x + 5) ID_AA64MMFR2_EL1 - Memory model feature register 2 + + x--------------------------------------------------x + | Name | bits | visible | + |--------------------------------------------------| + | AT | [35-32] | y | + x--------------------------------------------------x + Appendix I: Example --------------------------- diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst index d4a85d535bf9..4a9d9c794ee5 100644 --- a/Documentation/arm64/tagged-address-abi.rst +++ b/Documentation/arm64/tagged-address-abi.rst @@ -44,8 +44,15 @@ The AArch64 Tagged Address ABI has two stages of relaxation depending how the user addresses are used by the kernel: 1. User addresses not accessed by the kernel but used for address space - management (e.g. ``mmap()``, ``mprotect()``, ``madvise()``). The use - of valid tagged pointers in this context is always allowed. + management (e.g. ``mprotect()``, ``madvise()``). The use of valid + tagged pointers in this context is allowed with the exception of + ``brk()``, ``mmap()`` and the ``new_address`` argument to + ``mremap()`` as these have the potential to alias with existing + user addresses. + + NOTE: This behaviour changed in v5.6 and so some earlier kernels may + incorrectly accept valid tagged pointers for the ``brk()``, + ``mmap()`` and ``mremap()`` system calls. 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI relaxation is disabled by default and the application thread needs to diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX index 8d55b4bbb5e2..7300403c5a0d 100644 --- a/Documentation/block/00-INDEX +++ b/Documentation/block/00-INDEX @@ -16,6 +16,8 @@ data-integrity.txt - Block data integrity deadline-iosched.txt - Deadline IO scheduler tunables +inline-encryption.rst + - Blk-crypto internals and inline encryption ioprio.txt - Block io priorities (in CFQ scheduler) pr.txt diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst new file mode 100644 index 000000000000..026addfc69bc --- /dev/null +++ b/Documentation/block/index.rst @@ -0,0 +1,26 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===== +Block +===== + +.. toctree:: + :maxdepth: 1 + + bfq-iosched + biodoc + biovecs + capability + cmdline-partition + data-integrity + deadline-iosched + inline-encryption + ioprio + kyber-iosched + null_blk + pr + queue-sysfs + request + stat + switching-sched + writeback_cache_control diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst new file mode 100644 index 000000000000..330106b23c09 --- /dev/null +++ b/Documentation/block/inline-encryption.rst @@ -0,0 +1,183 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================= +Inline Encryption +================= + +Objective +========= + +We want to support inline encryption (IE) in the kernel. +To allow for testing, we also want a crypto API fallback when actual +IE hardware is absent. We also want IE to work with layered devices +like dm and loopback (i.e. we want to be able to use the IE hardware +of the underlying devices if present, or else fall back to crypto API +en/decryption). + + +Constraints and notes +===================== + +- IE hardware have a limited number of "keyslots" that can be programmed + with an encryption context (key, algorithm, data unit size, etc.) at any time. + One can specify a keyslot in a data request made to the device, and the + device will en/decrypt the data using the encryption context programmed into + that specified keyslot. When possible, we want to make multiple requests with + the same encryption context share the same keyslot. + +- We need a way for filesystems to specify an encryption context to use for + en/decrypting a struct bio, and a device driver (like UFS) needs to be able + to use that encryption context when it processes the bio. + +- We need a way for device drivers to expose their capabilities in a unified + way to the upper layers. + + +Design +====== + +We add a struct bio_crypt_ctx to struct bio that can represent an +encryption context, because we need to be able to pass this encryption +context from the FS layer to the device driver to act upon. + +While IE hardware works on the notion of keyslots, the FS layer has no +knowledge of keyslots - it simply wants to specify an encryption context to +use while en/decrypting a bio. + +We introduce a keyslot manager (KSM) that handles the translation from +encryption contexts specified by the FS to keyslots on the IE hardware. +This KSM also serves as the way IE hardware can expose their capabilities to +upper layers. The generic mode of operation is: each device driver that wants +to support IE will construct a KSM and set it up in its struct request_queue. +Upper layers that want to use IE on this device can then use this KSM in +the device's struct request_queue to translate an encryption context into +a keyslot. The presence of the KSM in the request queue shall be used to mean +that the device supports IE. + +On the device driver end of the interface, the device driver needs to tell the +KSM how to actually manipulate the IE hardware in the device to do things like +programming the crypto key into the IE hardware into a particular keyslot. All +this is achieved through the :c:type:`struct keyslot_mgmt_ll_ops` that the +device driver passes to the KSM when creating it. + +It uses refcounts to track which keyslots are idle (either they have no +encryption context programmed, or there are no in-flight struct bios +referencing that keyslot). When a new encryption context needs a keyslot, it +tries to find a keyslot that has already been programmed with the same +encryption context, and if there is no such keyslot, it evicts the least +recently used idle keyslot and programs the new encryption context into that +one. If no idle keyslots are available, then the caller will sleep until there +is at least one. + + +Blk-crypto +========== + +The above is sufficient for simple cases, but does not work if there is a +need for a crypto API fallback, or if we are want to use IE with layered +devices. To these ends, we introduce blk-crypto. Blk-crypto allows us to +present a unified view of encryption to the FS (so FS only needs to specify +an encryption context and not worry about keyslots at all), and blk-crypto +can decide whether to delegate the en/decryption to IE hardware or to the +crypto API. Blk-crypto maintains an internal KSM that serves as the crypto +API fallback. + +Blk-crypto needs to ensure that the encryption context is programmed into the +"correct" keyslot manager for IE. If a bio is submitted to a layered device +that eventually passes the bio down to a device that really does support IE, we +want the encryption context to be programmed into a keyslot for the KSM of the +device with IE support. However, blk-crypto does not know a priori whether a +particular device is the final device in the layering structure for a bio or +not. So in the case that a particular device does not support IE, since it is +possibly the final destination device for the bio, if the bio requires +encryption (i.e. the bio is doing a write operation), blk-crypto must fallback +to the crypto API *before* sending the bio to the device. + +Blk-crypto ensures that: + +- The bio's encryption context is programmed into a keyslot in the KSM of the + request queue that the bio is being submitted to (or the crypto API fallback + KSM if the request queue doesn't have a KSM), and that the ``bc_ksm`` + in the ``bi_crypt_context`` is set to this KSM + +- That the bio has its own individual reference to the keyslot in this KSM. + Once the bio passes through blk-crypto, its encryption context is programmed + in some KSM. The "its own individual reference to the keyslot" ensures that + keyslots can be released by each bio independently of other bios while + ensuring that the bio has a valid reference to the keyslot when, for e.g., the + crypto API fallback KSM in blk-crypto performs crypto on the device's behalf. + The individual references are ensured by increasing the refcount for the + keyslot in the ``bc_ksm`` when a bio with a programmed encryption + context is cloned. + + +What blk-crypto does on bio submission +-------------------------------------- + +**Case 1:** blk-crypto is given a bio with only an encryption context that hasn't +been programmed into any keyslot in any KSM (for e.g. a bio from the FS). + In this case, blk-crypto will program the encryption context into the KSM of the + request queue the bio is being submitted to (and if this KSM does not exist, + then it will program it into blk-crypto's internal KSM for crypto API + fallback). The KSM that this encryption context was programmed into is stored + as the ``bc_ksm`` in the bio's ``bi_crypt_context``. + +**Case 2:** blk-crypto is given a bio whose encryption context has already been +programmed into a keyslot in the *crypto API fallback* KSM. + In this case, blk-crypto does nothing; it treats the bio as not having + specified an encryption context. Note that we cannot do here what we will do + in Case 3 because we would have already encrypted the bio via the crypto API + by this point. + +**Case 3:** blk-crypto is given a bio whose encryption context has already been +programmed into a keyslot in some KSM (that is *not* the crypto API fallback +KSM). + In this case, blk-crypto first releases that keyslot from that KSM and then + treats the bio as in Case 1. + +This way, when a device driver is processing a bio, it can be sure that +the bio's encryption context has been programmed into some KSM (either the +device driver's request queue's KSM, or blk-crypto's crypto API fallback KSM). +It then simply needs to check if the bio's ``bc_ksm`` is the device's +request queue's KSM. If so, then it should proceed with IE. If not, it should +simply do nothing with respect to crypto, because some other KSM (perhaps the +blk-crypto crypto API fallback KSM) is handling the en/decryption. + +Blk-crypto will release the keyslot that is being held by the bio (and also +decrypt it if the bio is using the crypto API fallback KSM) once +``bio_remaining_done`` returns true for the bio. + + +Layered Devices +=============== + +Layered devices that wish to support IE need to create their own keyslot +manager for their request queue, and expose whatever functionality they choose. +When a layered device wants to pass a bio to another layer (either by +resubmitting the same bio, or by submitting a clone), it doesn't need to do +anything special because the bio (or the clone) will once again pass through +blk-crypto, which will work as described in Case 3. If a layered device wants +for some reason to do the IO by itself instead of passing it on to a child +device, but it also chose to expose IE capabilities by setting up a KSM in its +request queue, it is then responsible for en/decrypting the data itself. In +such cases, the device can choose to call the blk-crypto function +``blk_crypto_fallback_to_kernel_crypto_api`` (TODO: Not yet implemented), which will +cause the en/decryption to be done via the crypto API fallback. + + +Future Optimizations for layered devices +======================================== + +Creating a keyslot manager for the layered device uses up memory for each +keyslot, and in general, a layered device (like dm-linear) merely passes the +request on to a "child" device, so the keyslots in the layered device itself +might be completely unused. We can instead define a new type of KSM; the +"passthrough KSM", that layered devices can use to let blk-crypto know that +this layered device *will* pass the bio to some child device (and hence +through blk-crypto again, at which point blk-crypto can program the encryption +context, instead of programming it into the layered device's KSM). Again, if +the device "lies" and decides to do the IO itself instead of passing it on to +a child device, it is responsible for doing the en/decryption (and can choose +to call ``blk_crypto_fallback_to_kernel_crypto_api``). Another use case for the +"passthrough KSM" is for IE devices that want to manage their own keyslots/do +not have a limited number of keyslots. diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index 44886c91e112..f254173b180f 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -12,19 +12,31 @@ To achieve this goal it does not collect coverage in soft/hard interrupts and instrumentation of some inherently non-deterministic parts of kernel is disabled (e.g. scheduler, locking). -Usage ------ +kcov is also able to collect comparison operands from the instrumented code +(this feature currently requires that the kernel is compiled with clang). + +Prerequisites +------------- Configure the kernel with:: CONFIG_KCOV=y CONFIG_KCOV requires gcc built on revision 231296 or later. + +If the comparison operands need to be collected, set:: + + CONFIG_KCOV_ENABLE_COMPARISONS=y + Profiling data will only become accessible once debugfs has been mounted:: mount -t debugfs none /sys/kernel/debug -The following program demonstrates kcov usage from within a test program: +Coverage collection +------------------- + +The following program demonstrates coverage collection from within a test +program using kcov: .. code-block:: c @@ -44,6 +56,9 @@ The following program demonstrates kcov usage from within a test program: #define KCOV_DISABLE _IO('c', 101) #define COVER_SIZE (64<<10) + #define KCOV_TRACE_PC 0 + #define KCOV_TRACE_CMP 1 + int main(int argc, char **argv) { int fd; @@ -64,7 +79,7 @@ The following program demonstrates kcov usage from within a test program: if ((void*)cover == MAP_FAILED) perror("mmap"), exit(1); /* Enable coverage collection on the current thread. */ - if (ioctl(fd, KCOV_ENABLE, 0)) + if (ioctl(fd, KCOV_ENABLE, KCOV_TRACE_PC)) perror("ioctl"), exit(1); /* Reset coverage from the tail of the ioctl() call. */ __atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED); @@ -111,3 +126,208 @@ The interface is fine-grained to allow efficient forking of test processes. That is, a parent process opens /sys/kernel/debug/kcov, enables trace mode, mmaps coverage buffer and then forks child processes in a loop. Child processes only need to enable coverage (disable happens automatically on thread end). + +Comparison operands collection +------------------------------ + +Comparison operands collection is similar to coverage collection: + +.. code-block:: c + + /* Same includes and defines as above. */ + + /* Number of 64-bit words per record. */ + #define KCOV_WORDS_PER_CMP 4 + + /* + * The format for the types of collected comparisons. + * + * Bit 0 shows whether one of the arguments is a compile-time constant. + * Bits 1 & 2 contain log2 of the argument size, up to 8 bytes. + */ + + #define KCOV_CMP_CONST (1 << 0) + #define KCOV_CMP_SIZE(n) ((n) << 1) + #define KCOV_CMP_MASK KCOV_CMP_SIZE(3) + + int main(int argc, char **argv) + { + int fd; + uint64_t *cover, type, arg1, arg2, is_const, size; + unsigned long n, i; + + fd = open("/sys/kernel/debug/kcov", O_RDWR); + if (fd == -1) + perror("open"), exit(1); + if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE)) + perror("ioctl"), exit(1); + /* + * Note that the buffer pointer is of type uint64_t*, because all + * the comparison operands are promoted to uint64_t. + */ + cover = (uint64_t *)mmap(NULL, COVER_SIZE * sizeof(unsigned long), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if ((void*)cover == MAP_FAILED) + perror("mmap"), exit(1); + /* Note KCOV_TRACE_CMP instead of KCOV_TRACE_PC. */ + if (ioctl(fd, KCOV_ENABLE, KCOV_TRACE_CMP)) + perror("ioctl"), exit(1); + __atomic_store_n(&cover[0], 0, __ATOMIC_RELAXED); + read(-1, NULL, 0); + /* Read number of comparisons collected. */ + n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); + for (i = 0; i < n; i++) { + type = cover[i * KCOV_WORDS_PER_CMP + 1]; + /* arg1 and arg2 - operands of the comparison. */ + arg1 = cover[i * KCOV_WORDS_PER_CMP + 2]; + arg2 = cover[i * KCOV_WORDS_PER_CMP + 3]; + /* ip - caller address. */ + ip = cover[i * KCOV_WORDS_PER_CMP + 4]; + /* size of the operands. */ + size = 1 << ((type & KCOV_CMP_MASK) >> 1); + /* is_const - true if either operand is a compile-time constant.*/ + is_const = type & KCOV_CMP_CONST; + printf("ip: 0x%lx type: 0x%lx, arg1: 0x%lx, arg2: 0x%lx, " + "size: %lu, %s\n", + ip, type, arg1, arg2, size, + is_const ? "const" : "non-const"); + } + if (ioctl(fd, KCOV_DISABLE, 0)) + perror("ioctl"), exit(1); + /* Free resources. */ + if (munmap(cover, COVER_SIZE * sizeof(unsigned long))) + perror("munmap"), exit(1); + if (close(fd)) + perror("close"), exit(1); + return 0; + } + +Note that the kcov modes (coverage collection or comparison operands) are +mutually exclusive. + +Remote coverage collection +-------------------------- + +With KCOV_ENABLE coverage is collected only for syscalls that are issued +from the current process. With KCOV_REMOTE_ENABLE it's possible to collect +coverage for arbitrary parts of the kernel code, provided that those parts +are annotated with kcov_remote_start()/kcov_remote_stop(). + +This allows to collect coverage from two types of kernel background +threads: the global ones, that are spawned during kernel boot in a limited +number of instances (e.g. one USB hub_event() worker thread is spawned per +USB HCD); and the local ones, that are spawned when a user interacts with +some kernel interface (e.g. vhost workers). + +To enable collecting coverage from a global background thread, a unique +global handle must be assigned and passed to the corresponding +kcov_remote_start() call. Then a userspace process can pass a list of such +handles to the KCOV_REMOTE_ENABLE ioctl in the handles array field of the +kcov_remote_arg struct. This will attach the used kcov device to the code +sections, that are referenced by those handles. + +Since there might be many local background threads spawned from different +userspace processes, we can't use a single global handle per annotation. +Instead, the userspace process passes a non-zero handle through the +common_handle field of the kcov_remote_arg struct. This common handle gets +saved to the kcov_handle field in the current task_struct and needs to be +passed to the newly spawned threads via custom annotations. Those threads +should in turn be annotated with kcov_remote_start()/kcov_remote_stop(). + +Internally kcov stores handles as u64 integers. The top byte of a handle +is used to denote the id of a subsystem that this handle belongs to, and +the lower 4 bytes are used to denote the id of a thread instance within +that subsystem. A reserved value 0 is used as a subsystem id for common +handles as they don't belong to a particular subsystem. The bytes 4-7 are +currently reserved and must be zero. In the future the number of bytes +used for the subsystem or handle ids might be increased. + +When a particular userspace proccess collects coverage by via a common +handle, kcov will collect coverage for each code section that is annotated +to use the common handle obtained as kcov_handle from the current +task_struct. However non common handles allow to collect coverage +selectively from different subsystems. + +.. code-block:: c + + struct kcov_remote_arg { + __u32 trace_mode; + __u32 area_size; + __u32 num_handles; + __aligned_u64 common_handle; + __aligned_u64 handles[0]; + }; + + #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) + #define KCOV_DISABLE _IO('c', 101) + #define KCOV_REMOTE_ENABLE _IOW('c', 102, struct kcov_remote_arg) + + #define COVER_SIZE (64 << 10) + + #define KCOV_TRACE_PC 0 + + #define KCOV_SUBSYSTEM_COMMON (0x00ull << 56) + #define KCOV_SUBSYSTEM_USB (0x01ull << 56) + + #define KCOV_SUBSYSTEM_MASK (0xffull << 56) + #define KCOV_INSTANCE_MASK (0xffffffffull) + + static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst) + { + if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK) + return 0; + return subsys | inst; + } + + #define KCOV_COMMON_ID 0x42 + #define KCOV_USB_BUS_NUM 1 + + int main(int argc, char **argv) + { + int fd; + unsigned long *cover, n, i; + struct kcov_remote_arg *arg; + + fd = open("/sys/kernel/debug/kcov", O_RDWR); + if (fd == -1) + perror("open"), exit(1); + if (ioctl(fd, KCOV_INIT_TRACE, COVER_SIZE)) + perror("ioctl"), exit(1); + cover = (unsigned long*)mmap(NULL, COVER_SIZE * sizeof(unsigned long), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if ((void*)cover == MAP_FAILED) + perror("mmap"), exit(1); + + /* Enable coverage collection via common handle and from USB bus #1. */ + arg = calloc(1, sizeof(*arg) + sizeof(uint64_t)); + if (!arg) + perror("calloc"), exit(1); + arg->trace_mode = KCOV_TRACE_PC; + arg->area_size = COVER_SIZE; + arg->num_handles = 1; + arg->common_handle = kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, + KCOV_COMMON_ID); + arg->handles[0] = kcov_remote_handle(KCOV_SUBSYSTEM_USB, + KCOV_USB_BUS_NUM); + if (ioctl(fd, KCOV_REMOTE_ENABLE, arg)) + perror("ioctl"), free(arg), exit(1); + free(arg); + + /* + * Here the user needs to trigger execution of a kernel code section + * that is either annotated with the common handle, or to trigger some + * activity on USB bus #1. + */ + sleep(2); + + n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED); + for (i = 0; i < n; i++) + printf("0x%lx\n", cover[i + 1]); + if (ioctl(fd, KCOV_DISABLE, 0)) + perror("ioctl"), exit(1); + if (munmap(cover, COVER_SIZE * sizeof(unsigned long))) + perror("munmap"), exit(1); + if (close(fd)) + perror("close"), exit(1); + return 0; + } diff --git a/Documentation/devicetree/bindings/arm/coresight.txt b/Documentation/devicetree/bindings/arm/coresight.txt index 15ac8e8dcfdf..63173479e8c3 100644 --- a/Documentation/devicetree/bindings/arm/coresight.txt +++ b/Documentation/devicetree/bindings/arm/coresight.txt @@ -73,6 +73,15 @@ its hardware characteristcs. * port or ports: same as above. +* Optional properties for all components: + + * arm,coresight-loses-context-with-cpu : boolean. Indicates that the + hardware will lose register context on CPU power down (e.g. CPUIdle). + An example of where this may be needed are systems which contain a + coresight component and CPU in the same power domain. When the CPU + powers down the coresight component also powers down and loses its + context. This property is currently only used for the ETM 4.x driver. + * Optional properties for ETM/PTMs: * arm,cp14: must be present if the system accesses ETM/PTM management @@ -84,8 +93,11 @@ its hardware characteristcs. * Optional property for TMC: * arm,buffer-size: size of contiguous buffer space for TMC ETR - (embedded trace router) + (embedded trace router). This property is obsolete. The buffer size + can be configured dynamically via buffer_size property in sysfs. + * arm,scatter-gather: boolean. Indicates that the TMC-ETR can safely + use the SG mode on this system. Example: diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt index e96e085271c1..83f6c6a7c41c 100644 --- a/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt +++ b/Documentation/devicetree/bindings/clock/renesas,rcar-usb2-clock-sel.txt @@ -46,7 +46,7 @@ Required properties: Example (R-Car H3): usb2_clksel: clock-controller@e6590630 { - compatible = "renesas,r8a77950-rcar-usb2-clock-sel", + compatible = "renesas,r8a7795-rcar-usb2-clock-sel", "renesas,rcar-gen3-usb2-clock-sel"; reg = <0 0xe6590630 0 0x02>; clocks = <&cpg CPG_MOD 703>, <&usb_extal>, <&usb_xtal>; diff --git a/Documentation/devicetree/bindings/gnss/gnss.txt b/Documentation/devicetree/bindings/gnss/gnss.txt new file mode 100644 index 000000000000..f1e4a2ff47c5 --- /dev/null +++ b/Documentation/devicetree/bindings/gnss/gnss.txt @@ -0,0 +1,36 @@ +GNSS Receiver DT binding + +This documents the binding structure and common properties for GNSS receiver +devices. + +A GNSS receiver node is a node named "gnss" and typically resides on a serial +bus (e.g. UART, I2C or SPI). + +Please refer to the following documents for generic properties: + + Documentation/devicetree/bindings/serial/slave-device.txt + Documentation/devicetree/bindings/spi/spi-bus.txt + +Required properties: + +- compatible : A string reflecting the vendor and specific device the node + represents + +Optional properties: +- enable-gpios : GPIO used to enable the device +- timepulse-gpios : Time pulse GPIO + +Example: + +serial@1234 { + compatible = "ns16550a"; + + gnss { + compatible = "u-blox,neo-8"; + + vcc-supply = <&gnss_reg>; + timepulse-gpios = <&gpio0 16 GPIO_ACTIVE_HIGH>; + + current-speed = <4800>; + }; +}; diff --git a/Documentation/devicetree/bindings/media/i2c/adv748x.txt b/Documentation/devicetree/bindings/media/i2c/adv748x.txt index 21ffb5ed8183..54d1d3bc1869 100644 --- a/Documentation/devicetree/bindings/media/i2c/adv748x.txt +++ b/Documentation/devicetree/bindings/media/i2c/adv748x.txt @@ -73,7 +73,7 @@ Example: }; }; - port@10 { + port@a { reg = <10>; adv7482_txa: endpoint { @@ -83,7 +83,7 @@ Example: }; }; - port@11 { + port@b { reg = <11>; adv7482_txb: endpoint { diff --git a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt index 4648948f7c3b..e15589f47787 100644 --- a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt +++ b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt @@ -19,6 +19,9 @@ Optional properties: - interrupt-names: must be "mdio_done_error" when there is a share interrupt fed to this hardware block, or must be "mdio_done" for the first interrupt and "mdio_error" for the second when there are separate interrupts +- clocks: A reference to the clock supplying the MDIO bus controller +- clock-frequency: the MDIO bus clock that must be output by the MDIO bus + hardware, if absent, the default hardware values are used Child nodes of this MDIO bus controller node are standard Ethernet PHY device nodes as described in Documentation/devicetree/bindings/net/phy.txt diff --git a/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt b/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt index be789685a1c2..18b892d010d8 100644 --- a/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt +++ b/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt @@ -27,4 +27,4 @@ and valid to enable charging: - "abracon,tc-diode": should be "standard" (0.6V) or "schottky" (0.3V) - "abracon,tc-resistor": should be <0>, <3>, <6> or <11>. 0 disables the output - resistor, the other values are in ohm. + resistor, the other values are in kOhm. diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt b/Documentation/devicetree/bindings/usb/dwc3.txt index 52fb41046b34..44e8bab159ad 100644 --- a/Documentation/devicetree/bindings/usb/dwc3.txt +++ b/Documentation/devicetree/bindings/usb/dwc3.txt @@ -47,6 +47,8 @@ Optional properties: from P0 to P1/P2/P3 without delay. - snps,dis-tx-ipgap-linecheck-quirk: when set, disable u2mac linestate check during HS transmit. + - snps,dis_metastability_quirk: when set, disable metastability workaround. + CAUTION: use only if you are absolutely sure of it. - snps,is-utmi-l1-suspend: true when DWC3 asserts output signal utmi_l1_suspend_n, false when asserts utmi_sleep_n - snps,hird-threshold: HIRD threshold diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 93f49f2786ca..b4de21867b96 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -235,6 +235,17 @@ checkpoint=%s[:%u[%]] Set to "disable" to turn off checkpointing. Set to "en hide up to all remaining free space. The actual space that would be unusable can be viewed at /sys/fs/f2fs//unusable This space is reclaimed once checkpoint=enable. +compress_algorithm=%s Control compress algorithm, currently f2fs supports "lzo" + and "lz4" algorithm. +compress_log_size=%u Support configuring compress cluster size, the size will + be 4KB * (1 << %u), 16KB is minimum size, also it's + default size. +compress_extension=%s Support adding specified extension, so that f2fs can enable + compression on those corresponding files, e.g. if all files + with '.ext' has high compression rate, we can set the '.ext' + on compression extension list and enable compression on + these file by default rather than to enable it via ioctl. + For other files, we can still enable compression via ioctl. ================================================================================ DEBUGFS ENTRIES @@ -259,167 +270,6 @@ The files in each per-device directory are shown in table below. Files in /sys/fs/f2fs/ (see also Documentation/ABI/testing/sysfs-fs-f2fs) -.............................................................................. - File Content - - gc_urgent_sleep_time This parameter controls sleep time for gc_urgent. - 500 ms is set by default. See above gc_urgent. - - gc_min_sleep_time This tuning parameter controls the minimum sleep - time for the garbage collection thread. Time is - in milliseconds. - - gc_max_sleep_time This tuning parameter controls the maximum sleep - time for the garbage collection thread. Time is - in milliseconds. - - gc_no_gc_sleep_time This tuning parameter controls the default sleep - time for the garbage collection thread. Time is - in milliseconds. - - gc_idle This parameter controls the selection of victim - policy for garbage collection. Setting gc_idle = 0 - (default) will disable this option. Setting - gc_idle = 1 will select the Cost Benefit approach - & setting gc_idle = 2 will select the greedy approach. - - gc_urgent This parameter controls triggering background GCs - urgently or not. Setting gc_urgent = 0 [default] - makes back to default behavior, while if it is set - to 1, background thread starts to do GC by given - gc_urgent_sleep_time interval. - - reclaim_segments This parameter controls the number of prefree - segments to be reclaimed. If the number of prefree - segments is larger than the number of segments - in the proportion to the percentage over total - volume size, f2fs tries to conduct checkpoint to - reclaim the prefree segments to free segments. - By default, 5% over total # of segments. - - max_small_discards This parameter controls the number of discard - commands that consist small blocks less than 2MB. - The candidates to be discarded are cached until - checkpoint is triggered, and issued during the - checkpoint. By default, it is disabled with 0. - - discard_granularity This parameter controls the granularity of discard - command size. It will issue discard commands iif - the size is larger than given granularity. Its - unit size is 4KB, and 4 (=16KB) is set by default. - The maximum value is 128 (=512KB). - - reserved_blocks This parameter indicates the number of blocks that - f2fs reserves internally for root. - - batched_trim_sections This parameter controls the number of sections - to be trimmed out in batch mode when FITRIM - conducts. 32 sections is set by default. - - ipu_policy This parameter controls the policy of in-place - updates in f2fs. There are five policies: - 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR, - 0x04: F2FS_IPU_UTIL, 0x08: F2FS_IPU_SSR_UTIL, - 0x10: F2FS_IPU_FSYNC. - - min_ipu_util This parameter controls the threshold to trigger - in-place-updates. The number indicates percentage - of the filesystem utilization, and used by - F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies. - - min_fsync_blocks This parameter controls the threshold to trigger - in-place-updates when F2FS_IPU_FSYNC mode is set. - The number indicates the number of dirty pages - when fsync needs to flush on its call path. If - the number is less than this value, it triggers - in-place-updates. - - min_seq_blocks This parameter controls the threshold to serialize - write IOs issued by multiple threads in parallel. - - min_hot_blocks This parameter controls the threshold to allocate - a hot data log for pending data blocks to write. - - min_ssr_sections This parameter adds the threshold when deciding - SSR block allocation. If this is large, SSR mode - will be enabled early. - - ram_thresh This parameter controls the memory footprint used - by free nids and cached nat entries. By default, - 10 is set, which indicates 10 MB / 1 GB RAM. - - ra_nid_pages When building free nids, F2FS reads NAT blocks - ahead for speed up. Default is 0. - - dirty_nats_ratio Given dirty ratio of cached nat entries, F2FS - determines flushing them in background. - - max_victim_search This parameter controls the number of trials to - find a victim segment when conducting SSR and - cleaning operations. The default value is 4096 - which covers 8GB block address range. - - migration_granularity For large-sized sections, F2FS can stop GC given - this granularity instead of reclaiming entire - section. - - dir_level This parameter controls the directory level to - support large directory. If a directory has a - number of files, it can reduce the file lookup - latency by increasing this dir_level value. - Otherwise, it needs to decrease this value to - reduce the space overhead. The default value is 0. - - cp_interval F2FS tries to do checkpoint periodically, 60 secs - by default. - - idle_interval F2FS detects system is idle, if there's no F2FS - operations during given interval, 5 secs by - default. - - discard_idle_interval F2FS detects the discard thread is idle, given - time interval. Default is 5 secs. - - gc_idle_interval F2FS detects the GC thread is idle, given time - interval. Default is 5 secs. - - umount_discard_timeout When unmounting the disk, F2FS waits for finishing - queued discard commands which can take huge time. - This gives time out for it, 5 secs by default. - - iostat_enable This controls to enable/disable iostat in F2FS. - - readdir_ra This enables/disabled readahead of inode blocks - in readdir, and default is enabled. - - gc_pin_file_thresh This indicates how many GC can be failed for the - pinned file. If it exceeds this, F2FS doesn't - guarantee its pinning state. 2048 trials is set - by default. - - extension_list This enables to change extension_list for hot/cold - files in runtime. - - inject_rate This controls injection rate of arbitrary faults. - - inject_type This controls injection type of arbitrary faults. - - dirty_segments This shows # of dirty segments. - - lifetime_write_kbytes This shows # of data written to the disk. - - features This shows current features enabled on F2FS. - - current_reserved_blocks This shows # of blocks currently reserved. - - unusable If checkpoint=disable, this shows the number of - blocks that are unusable. - If checkpoint=enable it shows the number of blocks - that would be unusable if checkpoint=disable were - to be set. - -encoding This shows the encoding used for casefolding. - If casefolding is not enabled, returns (none) ================================================================================ USAGE @@ -777,3 +627,44 @@ zero or random data, which is useful to the below scenario where: 4. address = fibmap(fd, offset) 5. open(blkdev) 6. write(blkdev, address) + +Compression implementation +-------------------------- + +- New term named cluster is defined as basic unit of compression, file can +be divided into multiple clusters logically. One cluster includes 4 << n +(n >= 0) logical pages, compression size is also cluster size, each of +cluster can be compressed or not. + +- In cluster metadata layout, one special block address is used to indicate +cluster is compressed one or normal one, for compressed cluster, following +metadata maps cluster to [1, 4 << n - 1] physical blocks, in where f2fs +stores data including compress header and compressed data. + +- In order to eliminate write amplification during overwrite, F2FS only +support compression on write-once file, data can be compressed only when +all logical blocks in file are valid and cluster compress ratio is lower +than specified threshold. + +- To enable compression on regular inode, there are three ways: +* chattr +c file +* chattr +c dir; touch dir/file +* mount w/ -o compress_extension=ext; touch file.ext + +Compress metadata layout: + [Dnode Structure] + +-----------------------------------------------+ + | cluster 1 | cluster 2 | ......... | cluster N | + +-----------------------------------------------+ + . . . . + . . . . + . Compressed Cluster . . Normal Cluster . ++----------+---------+---------+---------+ +---------+---------+---------+---------+ +|compr flag| block 1 | block 2 | block 3 | | block 1 | block 2 | block 3 | block 4 | ++----------+---------+---------+---------+ +---------+---------+---------+---------+ + . . + . . + . . + +-------------+-------------+----------+----------------------------+ + | data length | data chksum | reserved | compressed data | + +-------------+-------------+----------+----------------------------+ diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 8a0700af9596..fbcd185d15da 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -234,8 +234,8 @@ HKDF is more flexible, is nonreversible, and evenly distributes entropy from the master key. HKDF is also standardized and widely used by other software, whereas the AES-128-ECB based KDF is ad-hoc. -Per-file keys -------------- +Per-file encryption keys +------------------------ Since each master key can protect many files, it is necessary to "tweak" the encryption of each file so that the same plaintext in two @@ -256,13 +256,8 @@ alternative master keys or to support rotating master keys. Instead, the master keys may be wrapped in userspace, e.g. as is done by the `fscrypt `_ tool. -Including the inode number in the IVs was considered. However, it was -rejected as it would have prevented ext4 filesystems from being -resized, and by itself still wouldn't have been sufficient to prevent -the same key from being directly reused for both XTS and CTS-CBC. - -DIRECT_KEY and per-mode keys ----------------------------- +DIRECT_KEY policies +------------------- The Adiantum encryption mode (see `Encryption modes and usage`_) is suitable for both contents and filenames encryption, and it accepts @@ -273,9 +268,9 @@ is greater than that of an AES-256-XTS key. Therefore, to improve performance and save memory, for Adiantum a "direct key" configuration is supported. When the user has enabled this by setting FSCRYPT_POLICY_FLAG_DIRECT_KEY in the fscrypt policy, -per-file keys are not used. Instead, whenever any data (contents or -filenames) is encrypted, the file's 16-byte nonce is included in the -IV. Moreover: +per-file encryption keys are not used. Instead, whenever any data +(contents or filenames) is encrypted, the file's 16-byte nonce is +included in the IV. Moreover: - For v1 encryption policies, the encryption is done directly with the master key. Because of this, users **must not** use the same master @@ -285,6 +280,21 @@ IV. Moreover: key derived using the KDF. Users may use the same master key for other v2 encryption policies. +IV_INO_LBLK_64 policies +----------------------- + +When FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 is set in the fscrypt policy, +the encryption keys are derived from the master key, encryption mode +number, and filesystem UUID. This normally results in all files +protected by the same master key sharing a single contents encryption +key and a single filenames encryption key. To still encrypt different +files' data differently, inode numbers are included in the IVs. +Consequently, shrinking the filesystem may not be allowed. + +This format is optimized for use with inline encryption hardware +compliant with the UFS or eMMC standards, which support only 64 IV +bits per I/O request and may have only a small number of keyslots. + Key identifiers --------------- @@ -292,6 +302,16 @@ For master keys used for v2 encryption policies, a unique 16-byte "key identifier" is also derived using the KDF. This value is stored in the clear, since it is needed to reliably identify the key itself. +Dirhash keys +------------ + +For directories that are indexed using a secret-keyed dirhash over the +plaintext filenames, the KDF is also used to derive a 128-bit +SipHash-2-4 key per directory in order to hash filenames. This works +just like deriving a per-file encryption key, except that a different +KDF context is used. Currently, only casefolded ("case-insensitive") +encrypted directories use this style of hashing. + Encryption modes and usage ========================== @@ -308,17 +328,18 @@ If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair. AES-128-CBC was added only for low-powered embedded devices with crypto accelerators such as CAAM or CESA that do not support XTS. To -use AES-128-CBC, CONFIG_CRYPTO_SHA256 (or another SHA-256 -implementation) must be enabled so that ESSIV can be used. +use AES-128-CBC, CONFIG_CRYPTO_ESSIV and CONFIG_CRYPTO_SHA256 (or +another SHA-256 implementation) must be enabled so that ESSIV can be +used. Adiantum is a (primarily) stream cipher-based mode that is fast even on CPUs without dedicated crypto instructions. It's also a true wide-block mode, unlike XTS. It can also eliminate the need to derive -per-file keys. However, it depends on the security of two primitives, -XChaCha12 and AES-256, rather than just one. See the paper -"Adiantum: length-preserving encryption for entry-level processors" -(https://eprint.iacr.org/2018/720.pdf) for more details. To use -Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast +per-file encryption keys. However, it depends on the security of two +primitives, XChaCha12 and AES-256, rather than just one. See the +paper "Adiantum: length-preserving encryption for entry-level +processors" (https://eprint.iacr.org/2018/720.pdf) for more details. +To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast implementations of ChaCha and NHPoly1305 should be enabled, e.g. CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM. @@ -341,10 +362,16 @@ a little endian number, except that: is encrypted with AES-256 where the AES-256 key is the SHA-256 hash of the file's data encryption key. -- In the "direct key" configuration (FSCRYPT_POLICY_FLAG_DIRECT_KEY - set in the fscrypt_policy), the file's nonce is also appended to the - IV. Currently this is only allowed with the Adiantum encryption - mode. +- With `DIRECT_KEY policies`_, the file's nonce is appended to the IV. + Currently this is only allowed with the Adiantum encryption mode. + +- With `IV_INO_LBLK_64 policies`_, the logical block number is limited + to 32 bits and is placed in bits 0-31 of the IV. The inode number + (which is also limited to 32 bits) is placed in bits 32-63. + +Note that because file logical block numbers are included in the IVs, +filesystems must enforce that blocks are never shifted around within +encrypted files, e.g. via "collapse range" or "insert range". Filenames encryption -------------------- @@ -354,10 +381,10 @@ the requirements to retain support for efficient directory lookups and filenames of up to 255 bytes, the same IV is used for every filename in a directory. -However, each encrypted directory still uses a unique key; or -alternatively (for the "direct key" configuration) has the file's -nonce included in the IVs. Thus, IV reuse is limited to within a -single directory. +However, each encrypted directory still uses a unique key, or +alternatively has the file's nonce (for `DIRECT_KEY policies`_) or +inode number (for `IV_INO_LBLK_64 policies`_) included in the IVs. +Thus, IV reuse is limited to within a single directory. With CTS-CBC, the IV reuse means that when the plaintext filenames share a common prefix at least as long as the cipher block size (16 @@ -431,12 +458,15 @@ This structure must be initialized as follows: (1) for ``contents_encryption_mode`` and FSCRYPT_MODE_AES_256_CTS (4) for ``filenames_encryption_mode``. -- ``flags`` must contain a value from ```` which - identifies the amount of NUL-padding to use when encrypting - filenames. If unsure, use FSCRYPT_POLICY_FLAGS_PAD_32 (0x3). - Additionally, if the encryption modes are both - FSCRYPT_MODE_ADIANTUM, this can contain - FSCRYPT_POLICY_FLAG_DIRECT_KEY; see `DIRECT_KEY and per-mode keys`_. +- ``flags`` contains optional flags from ````: + + - FSCRYPT_POLICY_FLAGS_PAD_*: The amount of NUL padding to use when + encrypting filenames. If unsure, use FSCRYPT_POLICY_FLAGS_PAD_32 + (0x3). + - FSCRYPT_POLICY_FLAG_DIRECT_KEY: See `DIRECT_KEY policies`_. + - FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64: See `IV_INO_LBLK_64 + policies`_. This is mutually exclusive with DIRECT_KEY and is not + supported on v1 policies. - For v2 encryption policies, ``__reserved`` must be zeroed. @@ -493,7 +523,9 @@ FS_IOC_SET_ENCRYPTION_POLICY can fail with the following errors: - ``EEXIST``: the file is already encrypted with an encryption policy different from the one specified - ``EINVAL``: an invalid encryption policy was specified (invalid - version, mode(s), or flags; or reserved bits were set) + version, mode(s), or flags; or reserved bits were set); or a v1 + encryption policy was specified but the directory has the casefold + flag enabled (casefolding is incompatible with v1 policies). - ``ENOKEY``: a v2 encryption policy was specified, but the key with the specified ``master_key_identifier`` has not been added, nor does the process have the CAP_FOWNER capability in the initial user @@ -618,7 +650,8 @@ follows:: struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; - __u32 __reserved[9]; + __u32 key_id; + __u32 __reserved[8]; __u8 raw[]; }; @@ -635,6 +668,12 @@ follows:: } u; }; + struct fscrypt_provisioning_key_payload { + __u32 type; + __u32 __reserved; + __u8 raw[]; + }; + :c:type:`struct fscrypt_add_key_arg` must be zeroed, then initialized as follows: @@ -657,9 +696,26 @@ as follows: ``Documentation/security/keys/core.rst``). - ``raw_size`` must be the size of the ``raw`` key provided, in bytes. + Alternatively, if ``key_id`` is nonzero, this field must be 0, since + in that case the size is implied by the specified Linux keyring key. + +- ``key_id`` is 0 if the raw key is given directly in the ``raw`` + field. Otherwise ``key_id`` is the ID of a Linux keyring key of + type "fscrypt-provisioning" whose payload is a :c:type:`struct + fscrypt_provisioning_key_payload` whose ``raw`` field contains the + raw key and whose ``type`` field matches ``key_spec.type``. Since + ``raw`` is variable-length, the total size of this key's payload + must be ``sizeof(struct fscrypt_provisioning_key_payload)`` plus the + raw key size. The process must have Search permission on this key. + + Most users should leave this 0 and specify the raw key directly. + The support for specifying a Linux keyring key is intended mainly to + allow re-adding keys after a filesystem is unmounted and re-mounted, + without having to store the raw keys in userspace memory. - ``raw`` is a variable-length field which must contain the actual - key, ``raw_size`` bytes long. + key, ``raw_size`` bytes long. Alternatively, if ``key_id`` is + nonzero, then this field is unused. For v2 policy keys, the kernel keeps track of which user (identified by effective user ID) added the key, and only allows the key to be @@ -681,11 +737,16 @@ FS_IOC_ADD_ENCRYPTION_KEY can fail with the following errors: - ``EACCES``: FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR was specified, but the caller does not have the CAP_SYS_ADMIN capability in the initial - user namespace + user namespace; or the raw key was specified by Linux key ID but the + process lacks Search permission on the key. - ``EDQUOT``: the key quota for this user would be exceeded by adding the key - ``EINVAL``: invalid key size or key specifier type, or reserved bits were set +- ``EKEYREJECTED``: the raw key was specified by Linux key ID, but the + key has the wrong type +- ``ENOKEY``: the raw key was specified by Linux key ID, but no key + exists with that ID - ``ENOTTY``: this type of filesystem does not implement encryption - ``EOPNOTSUPP``: the kernel was not configured with encryption support for this filesystem, or the filesystem superblock has not @@ -1088,8 +1149,8 @@ The context structs contain the same information as the corresponding policy structs (see `Setting an encryption policy`_), except that the context structs also contain a nonce. The nonce is randomly generated by the kernel and is used as KDF input or as a tweak to cause -different files to be encrypted differently; see `Per-file keys`_ and -`DIRECT_KEY and per-mode keys`_. +different files to be encrypted differently; see `Per-file encryption +keys`_ and `DIRECT_KEY policies`_. Data path changes ----------------- @@ -1141,7 +1202,7 @@ filesystem-specific hash(es) needed for directory lookups. This allows the filesystem to still, with a high degree of confidence, map the filename given in ->lookup() back to a particular directory entry that was previously listed by readdir(). See :c:type:`struct -fscrypt_digested_name` in the source for more details. +fscrypt_nokey_name` in the source for more details. Note that the precise way that filenames are presented to userspace without the key is subject to change in the future. It is only meant diff --git a/Documentation/filesystems/fsverity.rst b/Documentation/filesystems/fsverity.rst index 42a0b6dd9e0b..a95536b6443c 100644 --- a/Documentation/filesystems/fsverity.rst +++ b/Documentation/filesystems/fsverity.rst @@ -226,6 +226,14 @@ To do so, check for FS_VERITY_FL (0x00100000) in the returned flags. The verity flag is not settable via FS_IOC_SETFLAGS. You must use FS_IOC_ENABLE_VERITY instead, since parameters must be provided. +statx +----- + +Since Linux v5.5, the statx() system call sets STATX_ATTR_VERITY if +the file has fs-verity enabled. This can perform better than +FS_IOC_GETFLAGS and FS_IOC_MEASURE_VERITY because it doesn't require +opening the file, and opening verity files can be expensive. + Accessing verity files ====================== @@ -398,7 +406,7 @@ pages have been read into the pagecache. (See `Verifying data`_.) ext4 ---- -ext4 supports fs-verity since Linux TODO and e2fsprogs v1.45.2. +ext4 supports fs-verity since Linux v5.4 and e2fsprogs v1.45.2. To create verity files on an ext4 filesystem, the filesystem must have been formatted with ``-O verity`` or had ``tune2fs -O verity`` run on @@ -434,7 +442,7 @@ also only supports extent-based files. f2fs ---- -f2fs supports fs-verity since Linux TODO and f2fs-tools v1.11.0. +f2fs supports fs-verity since Linux v5.4 and f2fs-tools v1.11.0. To create verity files on an f2fs filesystem, the filesystem must have been formatted with ``-O verity``. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 93e0a2404532..c757c1c3cb81 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -606,3 +606,10 @@ in your dentry operations instead. dentry separately, and it now has request_mask and query_flags arguments to specify the fields and sync type requested by statx. Filesystems not supporting any statx-specific features may ignore the new arguments. +-- +[mandatory] + + [should've been added in 2016] stale comment in finish_open() + nonwithstanding, failure exits in ->atomic_open() instances should + *NOT* fput() the file, no matter what. Everything is handled by the + caller. diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 09bc35fbe66b..76bfa25151bf 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -862,6 +862,7 @@ Writeback: 0 kB AnonPages: 861800 kB Mapped: 280372 kB Shmem: 644 kB +KReclaimable: 168048 kB Slab: 284364 kB SReclaimable: 159856 kB SUnreclaim: 124508 kB @@ -925,6 +926,9 @@ AnonHugePages: Non-file backed huge pages mapped into userspace page tables ShmemHugePages: Memory used by shared memory (shmem) and tmpfs allocated with huge pages ShmemPmdMapped: Shared memory mapped into userspace with huge pages +KReclaimable: Kernel allocations that the kernel will attempt to reclaim + under memory pressure. Includes SReclaimable (below), and other + direct allocations with a shrinker. Slab: in-kernel data structures cache SReclaimable: Part of Slab, that might be reclaimed, such as caches SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure diff --git a/Documentation/hid/uhid.txt b/Documentation/hid/uhid.txt index c8656dd029a9..958fff945304 100644 --- a/Documentation/hid/uhid.txt +++ b/Documentation/hid/uhid.txt @@ -160,7 +160,7 @@ them but you should handle them according to your needs. UHID_OUTPUT: This is sent if the HID device driver wants to send raw data to the I/O device on the interrupt channel. You should read the payload and forward it to - the device. The payload is of type "struct uhid_data_req". + the device. The payload is of type "struct uhid_output_req". This may be received even though you haven't received UHID_OPEN, yet. UHID_GET_REPORT: diff --git a/Documentation/scheduler/sched-bwc.txt b/Documentation/scheduler/sched-bwc.txt index f6b1873f68ab..de583fbbfe42 100644 --- a/Documentation/scheduler/sched-bwc.txt +++ b/Documentation/scheduler/sched-bwc.txt @@ -90,6 +90,51 @@ There are two ways in which a group may become throttled: In case b) above, even though the child may have runtime remaining it will not be allowed to until the parent's runtime is refreshed. +CFS Bandwidth Quota Caveats +--------------------------- +Once a slice is assigned to a cpu it does not expire. However all but 1ms of +the slice may be returned to the global pool if all threads on that cpu become +unrunnable. This is configured at compile time by the min_cfs_rq_runtime +variable. This is a performance tweak that helps prevent added contention on +the global lock. + +The fact that cpu-local slices do not expire results in some interesting corner +cases that should be understood. + +For cgroup cpu constrained applications that are cpu limited this is a +relatively moot point because they will naturally consume the entirety of their +quota as well as the entirety of each cpu-local slice in each period. As a +result it is expected that nr_periods roughly equal nr_throttled, and that +cpuacct.usage will increase roughly equal to cfs_quota_us in each period. + +For highly-threaded, non-cpu bound applications this non-expiration nuance +allows applications to briefly burst past their quota limits by the amount of +unused slice on each cpu that the task group is running on (typically at most +1ms per cpu or as defined by min_cfs_rq_runtime). This slight burst only +applies if quota had been assigned to a cpu and then not fully used or returned +in previous periods. This burst amount will not be transferred between cores. +As a result, this mechanism still strictly limits the task group to quota +average usage, albeit over a longer time window than a single period. This +also limits the burst ability to no more than 1ms per cpu. This provides +better more predictable user experience for highly threaded applications with +small quota limits on high core count machines. It also eliminates the +propensity to throttle these applications while simultanously using less than +quota amounts of cpu. Another way to say this, is that by allowing the unused +portion of a slice to remain valid across periods we have decreased the +possibility of wastefully expiring quota on cpu-local silos that don't need a +full slice's amount of cpu time. + +The interaction between cpu-bound and non-cpu-bound-interactive applications +should also be considered, especially when single core usage hits 100%. If you +gave each of these applications half of a cpu-core and they both got scheduled +on the same CPU it is theoretically possible that the non-cpu bound application +will use up to 1ms additional quota in some periods, thereby preventing the +cpu-bound application from fully using its quota by that same amount. In these +instances it will be up to the CFS algorithm (see sched-design-CFS.rst) to +decide which application is chosen to run, as they will both be runnable and +have remaining quota. This runtime discrepancy will be made up in the following +periods when the interactive application idles. + Examples -------- 1. Limit a group to 1 CPU worth of runtime. diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index b757d6eb365b..694968c7523c 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -653,8 +653,7 @@ allowed to execute. perf_event_paranoid: Controls use of the performance events system by unprivileged -users (without CAP_SYS_ADMIN). The default value is 3 if -CONFIG_SECURITY_PERF_EVENTS_RESTRICT is set, or 2 otherwise. +users (without CAP_SYS_ADMIN). The default value is 2. -1: Allow use of (almost) all events by all users Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK @@ -662,7 +661,6 @@ CONFIG_SECURITY_PERF_EVENTS_RESTRICT is set, or 2 otherwise. Disallow raw tracepoint access by users without CAP_SYS_ADMIN >=1: Disallow CPU event access by users without CAP_SYS_ADMIN >=2: Disallow kernel profiling by users without CAP_SYS_ADMIN ->=3: Disallow all event access by users without CAP_SYS_ADMIN ============================================================== diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index 1bb8bcaf8497..635cd6eaf714 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -15,8 +15,6 @@ The acquisition orders for mutexes are as follows: On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. -For spinlocks, kvm_lock is taken outside kvm->mmu_lock. - Everything else is a leaf: no other lock is taken inside the critical sections. @@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above. ------------ Name: kvm_lock -Type: spinlock_t +Type: mutex Arch: any Protects: - vm_list diff --git a/Documentation/x86/index.rst b/Documentation/x86/index.rst index ef389dcf1b1d..0780d55c5aa8 100644 --- a/Documentation/x86/index.rst +++ b/Documentation/x86/index.rst @@ -6,3 +6,4 @@ x86 architecture specifics :maxdepth: 1 mds + tsx_async_abort diff --git a/Documentation/x86/tsx_async_abort.rst b/Documentation/x86/tsx_async_abort.rst new file mode 100644 index 000000000000..583ddc185ba2 --- /dev/null +++ b/Documentation/x86/tsx_async_abort.rst @@ -0,0 +1,117 @@ +.. SPDX-License-Identifier: GPL-2.0 + +TSX Async Abort (TAA) mitigation +================================ + +.. _tsx_async_abort: + +Overview +-------- + +TSX Async Abort (TAA) is a side channel attack on internal buffers in some +Intel processors similar to Microachitectural Data Sampling (MDS). In this +case certain loads may speculatively pass invalid data to dependent operations +when an asynchronous abort condition is pending in a Transactional +Synchronization Extensions (TSX) transaction. This includes loads with no +fault or assist condition. Such loads may speculatively expose stale data from +the same uarch data structures as in MDS, with same scope of exposure i.e. +same-thread and cross-thread. This issue affects all current processors that +support TSX. + +Mitigation strategy +------------------- + +a) TSX disable - one of the mitigations is to disable TSX. A new MSR +IA32_TSX_CTRL will be available in future and current processors after +microcode update which can be used to disable TSX. In addition, it +controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID. + +b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this +vulnerability. More details on this approach can be found in +:ref:`Documentation/admin-guide/hw-vuln/mds.rst `. + +Kernel internal mitigation modes +-------------------------------- + + ============= ============================================================ + off Mitigation is disabled. Either the CPU is not affected or + tsx_async_abort=off is supplied on the kernel command line. + + tsx disabled Mitigation is enabled. TSX feature is disabled by default at + bootup on processors that support TSX control. + + verw Mitigation is enabled. CPU is affected and MD_CLEAR is + advertised in CPUID. + + ucode needed Mitigation is enabled. CPU is affected and MD_CLEAR is not + advertised in CPUID. That is mainly for virtualization + scenarios where the host has the updated microcode but the + hypervisor does not expose MD_CLEAR in CPUID. It's a best + effort approach without guarantee. + ============= ============================================================ + +If the CPU is affected and the "tsx_async_abort" kernel command line parameter is +not provided then the kernel selects an appropriate mitigation depending on the +status of RTM and MD_CLEAR CPUID bits. + +Below tables indicate the impact of tsx=on|off|auto cmdline options on state of +TAA mitigation, VERW behavior and TSX feature for various combinations of +MSR_IA32_ARCH_CAPABILITIES bits. + +1. "tsx=off" + +========= ========= ============ ============ ============== =================== ====================== +MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=off +---------------------------------- ------------------------------------------------------------------------- +TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation + after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full +========= ========= ============ ============ ============== =================== ====================== + 0 0 0 HW default Yes Same as MDS Same as MDS + 0 0 1 Invalid case Invalid case Invalid case Invalid case + 0 1 0 HW default No Need ucode update Need ucode update + 0 1 1 Disabled Yes TSX disabled TSX disabled + 1 X 1 Disabled X None needed None needed +========= ========= ============ ============ ============== =================== ====================== + +2. "tsx=on" + +========= ========= ============ ============ ============== =================== ====================== +MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=on +---------------------------------- ------------------------------------------------------------------------- +TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation + after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full +========= ========= ============ ============ ============== =================== ====================== + 0 0 0 HW default Yes Same as MDS Same as MDS + 0 0 1 Invalid case Invalid case Invalid case Invalid case + 0 1 0 HW default No Need ucode update Need ucode update + 0 1 1 Enabled Yes None Same as MDS + 1 X 1 Enabled X None needed None needed +========= ========= ============ ============ ============== =================== ====================== + +3. "tsx=auto" + +========= ========= ============ ============ ============== =================== ====================== +MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=auto +---------------------------------- ------------------------------------------------------------------------- +TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation + after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full +========= ========= ============ ============ ============== =================== ====================== + 0 0 0 HW default Yes Same as MDS Same as MDS + 0 0 1 Invalid case Invalid case Invalid case Invalid case + 0 1 0 HW default No Need ucode update Need ucode update + 0 1 1 Disabled Yes TSX disabled TSX disabled + 1 X 1 Enabled X None needed None needed +========= ========= ============ ============ ============== =================== ====================== + +In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that +indicates whether MSR_IA32_TSX_CTRL is supported. + +There are two control bits in IA32_TSX_CTRL MSR: + + Bit 0: When set it disables the Restricted Transactional Memory (RTM) + sub-feature of TSX (will force all transactions to abort on the + XBEGIN instruction). + + Bit 1: When set it disables the enumeration of the RTM and HLE feature + (i.e. it will make CPUID(EAX=7).EBX{bit4} and + CPUID(EAX=7).EBX{bit11} read as 0). diff --git a/MAINTAINERS b/MAINTAINERS index 740e82b50c39..858c3a81d063 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5847,6 +5847,14 @@ F: Documentation/isdn/README.gigaset F: drivers/isdn/gigaset/ F: include/uapi/linux/gigaset_dev.h +GNSS SUBSYSTEM +M: Johan Hovold +S: Maintained +F: Documentation/ABI/testing/sysfs-class-gnss +F: Documentation/devicetree/bindings/gnss/ +F: drivers/gnss/ +F: include/linux/gnss.h + GO7007 MPEG CODEC M: Hans Verkuil L: linux-media@vger.kernel.org @@ -6891,7 +6899,7 @@ M: Joonas Lahtinen M: Rodrigo Vivi L: intel-gfx@lists.freedesktop.org W: https://01.org/linuxgraphics/ -B: https://01.org/linuxgraphics/documentation/how-report-bugs +B: https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs C: irc://chat.freenode.net/intel-gfx Q: http://patchwork.freedesktop.org/project/intel-gfx/ T: git git://anongit.freedesktop.org/drm-intel @@ -7878,6 +7886,13 @@ Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported F: drivers/nvdimm/pmem* +LIBNVDIMM: DEVICETREE BINDINGS +M: Oliver O'Halloran +L: linux-nvdimm@lists.01.org +Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ +S: Supported +F: drivers/nvdimm/of_pmem.c + LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM M: Dan Williams L: linux-nvdimm@lists.01.org @@ -9576,6 +9591,12 @@ S: Maintained F: Documentation/scsi/NinjaSCSI.txt F: drivers/scsi/nsp32* +NINTENDO HID DRIVER +M: Daniel J. Ogorchock +L: linux-input@vger.kernel.org +S: Maintained +F: drivers/hid/hid-nintendo* + NIOS2 ARCHITECTURE M: Ley Foon Tan L: nios2-dev@lists.rocketboards.org (moderated for non-subscribers) diff --git a/Makefile b/Makefile index c0644af6fcad..8fbdabf46291 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 150 +SUBLEVEL = 174 EXTRAVERSION = NAME = Petit Gorille @@ -499,6 +499,10 @@ CLANG_FLAGS += -Werror=unknown-warning-option KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_AFLAGS += $(CLANG_FLAGS) export CLANG_FLAGS +ifeq ($(ld-name),lld) +KBUILD_CFLAGS += -fuse-ld=lld +endif +KBUILD_CPPFLAGS += -Qunused-arguments endif RETPOLINE_CFLAGS_GCC := -mindirect-branch=thunk-extern -mindirect-branch-register @@ -593,6 +597,8 @@ ifeq ($(dot-config),1) -include include/config/auto.conf ifeq ($(KBUILD_EXTMOD),) +include/config/auto.conf.cmd: check-clang-specific-options + # Read in dependencies to all Kconfig* files, make sure to run # oldconfig if changes are detected. -include include/config/auto.conf.cmd @@ -645,8 +651,7 @@ KBUILD_AFLAGS += $(call cc-option,-fno-PIE) CFLAGS_GCOV := -fprofile-arcs -ftest-coverage \ $(call cc-option,-fno-tree-loop-im) \ $(call cc-disable-warning,maybe-uninitialized,) -CFLAGS_KCOV := $(call cc-option,-fsanitize-coverage=trace-pc,) -export CFLAGS_GCOV CFLAGS_KCOV +export CFLAGS_GCOV # Make toolchain changes before including arch/$(SRCARCH)/Makefile to ensure # ar/cc/ld-* macros return correct values. @@ -660,8 +665,8 @@ endif # use llvm-ar for building symbol tables from IR files, and llvm-dis instead # of objdump for processing symbol versions and exports LLVM_AR := llvm-ar -LLVM_DIS := llvm-dis -export LLVM_AR LLVM_DIS +LLVM_NM := llvm-nm +export LLVM_AR LLVM_NM endif # The arch Makefile can set ARCH_{CPP,A,C}FLAGS to override the default @@ -701,6 +706,7 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLA KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO endif +include scripts/Makefile.kcov include scripts/Makefile.gcc-plugins ifdef CONFIG_READABLE_ASM @@ -740,7 +746,6 @@ endif KBUILD_CFLAGS += $(stackp-flag) ifeq ($(cc-name),clang) -KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) KBUILD_CFLAGS += $(call cc-disable-warning, duplicate-decl-specifier) @@ -758,6 +763,10 @@ else KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable) endif +ifeq ($(ld-name),lld) +LDFLAGS += -O2 +endif + KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable) ifdef CONFIG_FRAME_POINTER KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls @@ -825,10 +834,24 @@ KBUILD_CFLAGS += $(call cc-option,-fdata-sections,) endif ifdef CONFIG_LTO_CLANG -lto-clang-flags := -flto -fvisibility=hidden +ifdef CONFIG_THINLTO +lto-clang-flags := -flto=thin +LDFLAGS += --thinlto-cache-dir=.thinlto-cache +else +lto-clang-flags := -flto +endif +lto-clang-flags += -fvisibility=default $(call cc-option, -fsplit-lto-unit) + +# Limit inlining across translation units to reduce binary size +LD_FLAGS_LTO_CLANG := -mllvm -import-instr-limit=5 + +KBUILD_LDFLAGS += $(LD_FLAGS_LTO_CLANG) +KBUILD_LDFLAGS_MODULE += $(LD_FLAGS_LTO_CLANG) + +KBUILD_LDS_MODULE += $(srctree)/scripts/module-lto.lds # allow disabling only clang LTO where needed -DISABLE_LTO_CLANG := -fno-lto -fvisibility=default +DISABLE_LTO_CLANG := -fno-lto export DISABLE_LTO_CLANG endif @@ -845,7 +868,7 @@ export LDFINAL_vmlinux LDFLAGS_FINAL_vmlinux endif ifdef CONFIG_CFI_CLANG -cfi-clang-flags += -fsanitize=cfi $(call cc-option, -fsplit-lto-unit) +cfi-clang-flags += -fsanitize=cfi -fno-sanitize-cfi-canonical-jump-tables DISABLE_CFI_CLANG := -fno-sanitize=cfi ifdef CONFIG_MODULES cfi-clang-flags += -fsanitize-cfi-cross-dso @@ -871,6 +894,12 @@ DISABLE_LTO += $(DISABLE_CFI) export DISABLE_CFI endif +ifdef CONFIG_SHADOW_CALL_STACK +CC_FLAGS_SCS := -fsanitize=shadow-call-stack +KBUILD_CFLAGS += $(CC_FLAGS_SCS) +export CC_FLAGS_SCS +endif + # arch Makefile may override CC so keep this after arch Makefile is included NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include) CHECKFLAGS += $(NOSTDINC_FLAGS) @@ -917,6 +946,15 @@ KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types) # Require designated initializers for all marked structures KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init) +# change __FILE__ to the relative path from the srctree +KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=) + +# ensure -fcf-protection is disabled when using retpoline as it is +# incompatible with -mindirect-branch=thunk-extern +ifdef CONFIG_RETPOLINE +KBUILD_CFLAGS += $(call cc-option,-fcf-protection=none) +endif + # use the deterministic mode of AR if available KBUILD_ARFLAGS := $(call ar-option,D) @@ -1043,6 +1081,7 @@ ifdef CONFIG_STACK_VALIDATION endif endif +PHONY += prepare0 ifeq ($(KBUILD_EXTMOD),) core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ @@ -1137,8 +1176,7 @@ include/config/kernel.release: include/config/auto.conf FORCE # archprepare is used in arch Makefiles and when processed asm symlink, # version.h and scripts_basic is processed / created. -# Listed in dependency order -PHONY += prepare archprepare prepare0 prepare1 prepare2 prepare3 +PHONY += prepare archprepare prepare1 prepare2 prepare3 # prepare3 is used to check if we are building in a separate output directory, # and if so do: @@ -1190,6 +1228,22 @@ else endif endif +# Disable clang-specific config options when using a different compiler +clang-specific-configs := LTO_CLANG CFI_CLANG SHADOW_CALL_STACK INIT_STACK_ALL + +PHONY += check-clang-specific-options +check-clang-specific-options: $(KCONFIG_CONFIG) FORCE +ifneq ($(cc-name),clang) +ifneq ($(findstring y,$(shell $(CONFIG_SHELL) \ + $(srctree)/scripts/config --file $(KCONFIG_CONFIG) \ + $(foreach c,$(clang-specific-configs),-s $(c)))),) + @echo WARNING: Disabling clang-specific options with $(cc-name) >&2 + $(Q)$(srctree)/scripts/config --file $(KCONFIG_CONFIG) \ + $(foreach c,$(clang-specific-configs),-d $(c)) && \ + $(MAKE) -f $(srctree)/Makefile olddefconfig +endif +endif + # Check for CONFIG flags that require compiler support. Abort the build # after .config has been processed, but before the kernel build starts. # @@ -1207,7 +1261,7 @@ ifdef CONFIG_LTO_CLANG endif ifneq ($(ld-name),lld) ifneq ($(call gold-ifversion, -ge, 112000000, y), y) - @echo Cannot use CONFIG_LTO_CLANG: requires GNU gold 1.12 or later >&2 && exit 1 + @echo Cannot use CONFIG_LTO_CLANG: requires GNU gold 1.12 or later >&2 && exit 1 endif endif endif @@ -1624,9 +1678,6 @@ else # KBUILD_EXTMOD # We are always building modules KBUILD_MODULES := 1 -PHONY += crmodverdir -crmodverdir: - $(cmd_crmodverdir) PHONY += $(objtree)/Module.symvers $(objtree)/Module.symvers: @@ -1638,7 +1689,7 @@ $(objtree)/Module.symvers: module-dirs := $(addprefix _module_,$(KBUILD_EXTMOD)) PHONY += $(module-dirs) modules -$(module-dirs): crmodverdir $(objtree)/Module.symvers +$(module-dirs): prepare $(objtree)/Module.symvers $(Q)$(MAKE) $(build)=$(patsubst _module_%,%,$@) modules: $(module-dirs) @@ -1679,7 +1730,8 @@ help: # Dummies... PHONY += prepare scripts -prepare: ; +prepare: + $(cmd_crmodverdir) scripts: ; endif # KBUILD_EXTMOD @@ -1805,17 +1857,14 @@ endif # Modules /: prepare scripts FORCE - $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) # Make sure the latest headers are built for Documentation Documentation/ samples/: headers_install %/: prepare scripts FORCE - $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) %.ko: prepare scripts FORCE - $(cmd_crmodverdir) $(Q)$(MAKE) KBUILD_MODULES=$(if $(CONFIG_MODULES),1) \ $(build)=$(build-dir) $(@:.ko=.o) $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modpost diff --git a/arch/Kconfig b/arch/Kconfig index 784c1fe35583..b27eac589618 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -622,6 +622,18 @@ config ARCH_SUPPORTS_LTO_CLANG - compiling inline assembly with clang's integrated assembler, - and linking with either lld or GNU gold w/ LLVMgold. +config ARCH_SUPPORTS_THINLTO + bool + help + An architecture should select this if it supports clang's ThinLTO. + +config THINLTO + bool "Use clang ThinLTO (EXPERIMENTAL)" + depends on LTO_CLANG && ARCH_SUPPORTS_THINLTO + default y + help + Use ThinLTO to speed up Link Time Optimization. + choice prompt "Link-Time Optimization (LTO) (EXPERIMENTAL)" default LTO_NONE @@ -680,6 +692,52 @@ config CFI_CLANG_SHADOW If you select this option, the kernel builds a fast look-up table of CFI check functions in loaded modules to reduce overhead. +config ARCH_SUPPORTS_SHADOW_CALL_STACK + bool + help + An architecture should select this if it supports Clang's Shadow + Call Stack, has asm/scs.h, and implements runtime support for shadow + stack switching. + +choice + prompt "Return-oriented programming (ROP) protection" + default ROP_PROTECTION_NONE + help + This option controls kernel protections against return-oriented + programming (ROP) attacks, which involve overwriting function return + addresses. + +config ROP_PROTECTION_NONE + bool "None" + +config SHADOW_CALL_STACK + bool "Clang Shadow Call Stack" + depends on ARCH_SUPPORTS_SHADOW_CALL_STACK + help + This option enables Clang's Shadow Call Stack, which uses a + shadow stack to protect function return addresses from being + overwritten by an attacker. More information can be found from + Clang's documentation: + + https://clang.llvm.org/docs/ShadowCallStack.html + + Note that security guarantees in the kernel differ from the ones + documented for user space. The kernel must store addresses of shadow + stacks used by other tasks and interrupt handlers in memory, which + means an attacker capable reading and writing arbitrary memory may + be able to locate them and hijack control flow by modifying shadow + stacks that are not currently in use. + +endchoice + +config SHADOW_CALL_STACK_VMAP + bool "Use virtually mapped shadow call stacks" + depends on SHADOW_CALL_STACK + help + Use virtually mapped shadow call stacks. Selecting this option + provides better stack exhaustion protection, but increases per-thread + memory consumption as a full page is allocated for each shadow stack. + config HAVE_ARCH_WITHIN_STACK_FRAMES bool help diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index e114000a84f5..d825b9dbae5d 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -70,6 +70,7 @@ interrupt-names = "macirq"; phy-mode = "rgmii"; snps,pbl = < 32 >; + snps,multicast-filter-bins = <256>; clocks = <&apbclk>; clock-names = "stmmaceth"; max-speed = <100>; diff --git a/arch/arc/include/asm/linkage.h b/arch/arc/include/asm/linkage.h index b29f1a9fd6f7..07c8e1a6c56e 100644 --- a/arch/arc/include/asm/linkage.h +++ b/arch/arc/include/asm/linkage.h @@ -14,6 +14,8 @@ #ifdef __ASSEMBLY__ #define ASM_NL ` /* use '`' to mark new line in macro */ +#define __ALIGN .align 4 +#define __ALIGN_STR __stringify(__ALIGN) /* annotation for data we want in DCCM - if enabled in .config */ .macro ARCFP_DATA nm diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 2ce24e74f879..a509b77ef80d 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -488,8 +488,8 @@ static int arc_pmu_device_probe(struct platform_device *pdev) /* loop thru all available h/w condition indexes */ for (j = 0; j < cc_bcr.c; j++) { write_aux_reg(ARC_REG_CC_INDEX, j); - cc_name.indiv.word0 = read_aux_reg(ARC_REG_CC_NAME0); - cc_name.indiv.word1 = read_aux_reg(ARC_REG_CC_NAME1); + cc_name.indiv.word0 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME0)); + cc_name.indiv.word1 = le32_to_cpu(read_aux_reg(ARC_REG_CC_NAME1)); /* See if it has been mapped to a perf event_id */ for (i = 0; i < ARRAY_SIZE(arc_pmu_ev_hw_map); i++) { diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig index 8eff057efcae..ce908e2c5282 100644 --- a/arch/arc/plat-eznps/Kconfig +++ b/arch/arc/plat-eznps/Kconfig @@ -7,7 +7,7 @@ menuconfig ARC_PLAT_EZNPS bool "\"EZchip\" ARC dev platform" select CPU_BIG_ENDIAN - select CLKSRC_NPS + select CLKSRC_NPS if !PHYS_ADDR_T_64BIT select EZNPS_GIC select EZCHIP_NPS_MANAGEMENT_ENET if ETHERNET help diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index bc9d81142cec..3ac1bb58b74f 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1535,12 +1535,10 @@ config THUMB2_KERNEL bool "Compile the kernel in Thumb-2 mode" if !CPU_THUMBONLY depends on (CPU_V7 || CPU_V7M) && !CPU_V6 && !CPU_V6K default y if CPU_THUMBONLY - select ARM_ASM_UNIFIED select ARM_UNWIND help By enabling this option, the kernel will be compiled in - Thumb-2 mode. A compiler/assembler that understand the unified - ARM-Thumb syntax is needed. + Thumb-2 mode. If unsure, say N. @@ -1575,9 +1573,6 @@ config THUMB2_AVOID_R_ARM_THM_JUMP11 Unless you are sure your tools don't have this problem, say Y. -config ARM_ASM_UNIFIED - bool - config ARM_PATCH_IDIV bool "Runtime patch udiv/sdiv instructions into __aeabi_{u}idiv()" depends on CPU_32v7 && !XIP_KERNEL @@ -2052,7 +2047,7 @@ config XIP_PHYS_ADDR config KEXEC bool "Kexec system call (EXPERIMENTAL)" depends on (!SMP || PM_SLEEP_SMP) - depends on !CPU_V7M + depends on MMU select KEXEC_CORE help kexec is a system call that implements the ability to shutdown your diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 954ba8b81052..b14f154919a5 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1023,14 +1023,21 @@ choice Say Y here if you want kernel low-level debugging support on SOCFPGA(Cyclone 5 and Arria 5) based platforms. - config DEBUG_SOCFPGA_UART1 + config DEBUG_SOCFPGA_ARRIA10_UART1 depends on ARCH_SOCFPGA - bool "Use SOCFPGA UART1 for low-level debug" + bool "Use SOCFPGA Arria10 UART1 for low-level debug" select DEBUG_UART_8250 help Say Y here if you want kernel low-level debugging support on SOCFPGA(Arria 10) based platforms. + config DEBUG_SOCFPGA_CYCLONE5_UART1 + depends on ARCH_SOCFPGA + bool "Use SOCFPGA Cyclone 5 UART1 for low-level debug" + select DEBUG_UART_8250 + help + Say Y here if you want kernel low-level debugging support + on SOCFPGA(Cyclone 5 and Arria 5) based platforms. config DEBUG_SUN9I_UART0 bool "Kernel low-level debugging messages via sun9i UART0" @@ -1376,21 +1383,21 @@ config DEBUG_OMAP2PLUS_UART depends on ARCH_OMAP2PLUS config DEBUG_IMX_UART_PORT - int "i.MX Debug UART Port Selection" if DEBUG_IMX1_UART || \ - DEBUG_IMX25_UART || \ - DEBUG_IMX21_IMX27_UART || \ - DEBUG_IMX31_UART || \ - DEBUG_IMX35_UART || \ - DEBUG_IMX50_UART || \ - DEBUG_IMX51_UART || \ - DEBUG_IMX53_UART || \ - DEBUG_IMX6Q_UART || \ - DEBUG_IMX6SL_UART || \ - DEBUG_IMX6SX_UART || \ - DEBUG_IMX6UL_UART || \ - DEBUG_IMX7D_UART + int "i.MX Debug UART Port Selection" + depends on DEBUG_IMX1_UART || \ + DEBUG_IMX25_UART || \ + DEBUG_IMX21_IMX27_UART || \ + DEBUG_IMX31_UART || \ + DEBUG_IMX35_UART || \ + DEBUG_IMX50_UART || \ + DEBUG_IMX51_UART || \ + DEBUG_IMX53_UART || \ + DEBUG_IMX6Q_UART || \ + DEBUG_IMX6SL_UART || \ + DEBUG_IMX6SX_UART || \ + DEBUG_IMX6UL_UART || \ + DEBUG_IMX7D_UART default 1 - depends on ARCH_MXC help Choose UART port on which kernel low-level debug messages should be output. @@ -1585,7 +1592,8 @@ config DEBUG_UART_PHYS default 0xfe800000 if ARCH_IOP32X default 0xff690000 if DEBUG_RK32_UART2 default 0xffc02000 if DEBUG_SOCFPGA_UART0 - default 0xffc02100 if DEBUG_SOCFPGA_UART1 + default 0xffc02100 if DEBUG_SOCFPGA_ARRIA10_UART1 + default 0xffc03000 if DEBUG_SOCFPGA_CYCLONE5_UART1 default 0xffd82340 if ARCH_IOP13XX default 0xffe40000 if DEBUG_RCAR_GEN1_SCIF0 default 0xffe42000 if DEBUG_RCAR_GEN1_SCIF2 @@ -1689,7 +1697,8 @@ config DEBUG_UART_VIRT default 0xfeb30c00 if DEBUG_KEYSTONE_UART0 default 0xfeb31000 if DEBUG_KEYSTONE_UART1 default 0xfec02000 if DEBUG_SOCFPGA_UART0 - default 0xfec02100 if DEBUG_SOCFPGA_UART1 + default 0xfec02100 if DEBUG_SOCFPGA_ARRIA10_UART1 + default 0xfec03000 if DEBUG_SOCFPGA_CYCLONE5_UART1 default 0xfec12000 if (DEBUG_MVEBU_UART0 || DEBUG_MVEBU_UART0_ALTERNATE) && ARCH_MVEBU default 0xfec12100 if DEBUG_MVEBU_UART1_ALTERNATE default 0xfec10000 if DEBUG_SIRFATLAS7_UART0 @@ -1737,9 +1746,9 @@ config DEBUG_UART_8250_WORD depends on DEBUG_LL_UART_8250 || DEBUG_UART_8250 depends on DEBUG_UART_8250_SHIFT >= 2 default y if DEBUG_PICOXCELL_UART || \ - DEBUG_SOCFPGA_UART0 || DEBUG_SOCFPGA_UART1 || \ - DEBUG_KEYSTONE_UART0 || DEBUG_KEYSTONE_UART1 || \ - DEBUG_ALPINE_UART0 || \ + DEBUG_SOCFPGA_UART0 || DEBUG_SOCFPGA_ARRIA10_UART1 || \ + DEBUG_SOCFPGA_CYCLONE5_UART1 || DEBUG_KEYSTONE_UART0 || \ + DEBUG_KEYSTONE_UART1 || DEBUG_ALPINE_UART0 || \ DEBUG_DAVINCI_DMx_UART0 || DEBUG_DAVINCI_DA8XX_UART1 || \ DEBUG_DAVINCI_DA8XX_UART2 || \ DEBUG_BCM_KONA_UART || DEBUG_RK32_UART2 diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 2a35d7324efd..c1185cbc7f54 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -39,7 +39,10 @@ KBUILD_CFLAGS += $(call cc-option,-mno-unaligned-access) endif ifeq ($(CONFIG_FRAME_POINTER),y) -KBUILD_CFLAGS +=-fno-omit-frame-pointer -mapcs -mno-sched-prolog +KBUILD_CFLAGS +=-fno-omit-frame-pointer +ifeq ($(cc-name),gcc) +KBUILD_CFLAGS += -mapcs -mno-sched-prolog +endif endif ifeq ($(CONFIG_CPU_BIG_ENDIAN),y) @@ -115,9 +118,15 @@ ifeq ($(CONFIG_ARM_UNWIND),y) CFLAGS_ABI +=-funwind-tables endif +ifeq ($(cc-name),clang) +CFLAGS_ABI += -meabi gnu +endif + +# Accept old syntax despite ".syntax unified" +AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) + ifeq ($(CONFIG_THUMB2_KERNEL),y) AFLAGS_AUTOIT :=$(call as-option,-Wa$(comma)-mimplicit-it=always,-Wa$(comma)-mauto-it) -AFLAGS_NOWARN :=$(call as-option,-Wa$(comma)-mno-warn-deprecated,-Wa$(comma)-W) CFLAGS_ISA :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb # Work around buggy relocation from gas if requested: @@ -125,7 +134,7 @@ ifeq ($(CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11),y) CFLAGS_MODULE +=-fno-optimize-sibling-calls endif else -CFLAGS_ISA :=$(call cc-option,-marm,) +CFLAGS_ISA :=$(call cc-option,-marm,) $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) endif diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h index 07437816e098..6a0f1f524466 100644 --- a/arch/arm/boot/compressed/libfdt_env.h +++ b/arch/arm/boot/compressed/libfdt_env.h @@ -2,10 +2,14 @@ #ifndef _ARM_LIBFDT_ENV_H #define _ARM_LIBFDT_ENV_H +#include #include #include #include +#define INT32_MAX S32_MAX +#define UINT32_MAX U32_MAX + typedef __be16 fdt16_t; typedef __be32 fdt32_t; typedef __be64 fdt64_t; diff --git a/arch/arm/boot/dts/am335x-boneblack-common.dtsi b/arch/arm/boot/dts/am335x-boneblack-common.dtsi index 325daae40278..485c27f039f5 100644 --- a/arch/arm/boot/dts/am335x-boneblack-common.dtsi +++ b/arch/arm/boot/dts/am335x-boneblack-common.dtsi @@ -131,6 +131,11 @@ }; / { + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x20000000>; /* 512 MB */ + }; + clk_mcasp0_fixed: clk_mcasp0_fixed { #clock-cells = <0>; compatible = "fixed-clock"; diff --git a/arch/arm/boot/dts/am335x-evm.dts b/arch/arm/boot/dts/am335x-evm.dts index 478434ebff92..27ff3e689e96 100644 --- a/arch/arm/boot/dts/am335x-evm.dts +++ b/arch/arm/boot/dts/am335x-evm.dts @@ -724,6 +724,7 @@ pinctrl-0 = <&cpsw_default>; pinctrl-1 = <&cpsw_sleep>; status = "okay"; + slaves = <1>; }; &davinci_mdio { @@ -731,15 +732,14 @@ pinctrl-0 = <&davinci_mdio_default>; pinctrl-1 = <&davinci_mdio_sleep>; status = "okay"; + + ethphy0: ethernet-phy@0 { + reg = <0>; + }; }; &cpsw_emac0 { - phy_id = <&davinci_mdio>, <0>; - phy-mode = "rgmii-txid"; -}; - -&cpsw_emac1 { - phy_id = <&davinci_mdio>, <1>; + phy-handle = <ðphy0>; phy-mode = "rgmii-txid"; }; diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index 4714a59fd86d..345c117bd5ef 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -1118,6 +1118,8 @@ ti,hwmods = "dss_dispc"; clocks = <&disp_clk>; clock-names = "fck"; + + max-memory-bandwidth = <230000000>; }; rfbi: rfbi@4832a800 { diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts index afb8eb0a0a16..051823b7e5a1 100644 --- a/arch/arm/boot/dts/am437x-gp-evm.dts +++ b/arch/arm/boot/dts/am437x-gp-evm.dts @@ -83,7 +83,7 @@ }; lcd0: display { - compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + compatible = "osddisplays,osd070t1718-19ts", "panel-dpi"; label = "lcd"; panel-timing { diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts index 081fa68b6f98..c4279b0b9f12 100644 --- a/arch/arm/boot/dts/am43x-epos-evm.dts +++ b/arch/arm/boot/dts/am43x-epos-evm.dts @@ -45,7 +45,7 @@ }; lcd0: display { - compatible = "osddisplays,osd057T0559-34ts", "panel-dpi"; + compatible = "osddisplays,osd070t1718-19ts", "panel-dpi"; label = "lcd"; panel-timing { diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts index debf9464403e..96a4df4109d7 100644 --- a/arch/arm/boot/dts/am571x-idk.dts +++ b/arch/arm/boot/dts/am571x-idk.dts @@ -93,7 +93,7 @@ &pcie1_rc { status = "okay"; - gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>; + gpios = <&gpio5 18 GPIO_ACTIVE_HIGH>; }; &pcie1_ep { diff --git a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi index 49aeecd312b4..d578a9f7e1a0 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi +++ b/arch/arm/boot/dts/am57xx-beagle-x15-common.dtsi @@ -32,6 +32,27 @@ reg = <0x0 0x80000000 0x0 0x80000000>; }; + main_12v0: fixedregulator-main_12v0 { + /* main supply */ + compatible = "regulator-fixed"; + regulator-name = "main_12v0"; + regulator-min-microvolt = <12000000>; + regulator-max-microvolt = <12000000>; + regulator-always-on; + regulator-boot-on; + }; + + evm_5v0: fixedregulator-evm_5v0 { + /* Output of TPS54531D */ + compatible = "regulator-fixed"; + regulator-name = "evm_5v0"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + vin-supply = <&main_12v0>; + regulator-always-on; + regulator-boot-on; + }; + vdd_3v3: fixedregulator-vdd_3v3 { compatible = "regulator-fixed"; regulator-name = "vdd_3v3"; diff --git a/arch/arm/boot/dts/arm-realview-eb.dtsi b/arch/arm/boot/dts/arm-realview-eb.dtsi index e2e9599596e2..05379b6c1c13 100644 --- a/arch/arm/boot/dts/arm-realview-eb.dtsi +++ b/arch/arm/boot/dts/arm-realview-eb.dtsi @@ -334,7 +334,7 @@ clock-names = "uartclk", "apb_pclk"; }; - ssp: ssp@1000d000 { + ssp: spi@1000d000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x1000d000 0x1000>; clocks = <&sspclk>, <&pclk>; diff --git a/arch/arm/boot/dts/arm-realview-pb1176.dts b/arch/arm/boot/dts/arm-realview-pb1176.dts index c789564f2803..939c108c24a6 100644 --- a/arch/arm/boot/dts/arm-realview-pb1176.dts +++ b/arch/arm/boot/dts/arm-realview-pb1176.dts @@ -45,7 +45,7 @@ }; /* The voltage to the MMC card is hardwired at 3.3V */ - vmmc: fixedregulator@0 { + vmmc: regulator-vmmc { compatible = "regulator-fixed"; regulator-name = "vmmc"; regulator-min-microvolt = <3300000>; @@ -53,7 +53,7 @@ regulator-boot-on; }; - veth: fixedregulator@0 { + veth: regulator-veth { compatible = "regulator-fixed"; regulator-name = "veth"; regulator-min-microvolt = <3300000>; @@ -343,7 +343,7 @@ clock-names = "apb_pclk"; }; - pb1176_ssp: ssp@1010b000 { + pb1176_ssp: spi@1010b000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x1010b000 0x1000>; interrupt-parent = <&intc_dc1176>; diff --git a/arch/arm/boot/dts/arm-realview-pb11mp.dts b/arch/arm/boot/dts/arm-realview-pb11mp.dts index 3944765ac4b0..95037c48182d 100644 --- a/arch/arm/boot/dts/arm-realview-pb11mp.dts +++ b/arch/arm/boot/dts/arm-realview-pb11mp.dts @@ -145,7 +145,7 @@ }; /* The voltage to the MMC card is hardwired at 3.3V */ - vmmc: fixedregulator@0 { + vmmc: regulator-vmmc { compatible = "regulator-fixed"; regulator-name = "vmmc"; regulator-min-microvolt = <3300000>; @@ -153,7 +153,7 @@ regulator-boot-on; }; - veth: fixedregulator@0 { + veth: regulator-veth { compatible = "regulator-fixed"; regulator-name = "veth"; regulator-min-microvolt = <3300000>; @@ -480,7 +480,7 @@ clock-names = "uartclk", "apb_pclk"; }; - ssp@1000d000 { + spi@1000d000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x1000d000 0x1000>; interrupt-parent = <&intc_pb11mp>; diff --git a/arch/arm/boot/dts/arm-realview-pbx.dtsi b/arch/arm/boot/dts/arm-realview-pbx.dtsi index aeb49c4bd773..068293254fbb 100644 --- a/arch/arm/boot/dts/arm-realview-pbx.dtsi +++ b/arch/arm/boot/dts/arm-realview-pbx.dtsi @@ -43,7 +43,7 @@ }; /* The voltage to the MMC card is hardwired at 3.3V */ - vmmc: fixedregulator@0 { + vmmc: regulator-vmmc { compatible = "regulator-fixed"; regulator-name = "vmmc"; regulator-min-microvolt = <3300000>; @@ -51,7 +51,7 @@ regulator-boot-on; }; - veth: fixedregulator@0 { + veth: regulator-veth { compatible = "regulator-fixed"; regulator-name = "veth"; regulator-min-microvolt = <3300000>; @@ -318,7 +318,7 @@ clock-names = "uartclk", "apb_pclk"; }; - ssp: ssp@1000d000 { + ssp: spi@1000d000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x1000d000 0x1000>; clocks = <&sspclk>, <&pclk>; @@ -539,4 +539,3 @@ }; }; }; - diff --git a/arch/arm/boot/dts/armada-388-clearfog.dtsi b/arch/arm/boot/dts/armada-388-clearfog.dtsi index 68acfc968706..8a3bbb7d6cc1 100644 --- a/arch/arm/boot/dts/armada-388-clearfog.dtsi +++ b/arch/arm/boot/dts/armada-388-clearfog.dtsi @@ -89,7 +89,7 @@ &clearfog_sdhci_cd_pins>; pinctrl-names = "default"; status = "okay"; - vmmc = <®_3p3v>; + vmmc-supply = <®_3p3v>; wp-inverted; }; diff --git a/arch/arm/boot/dts/at91-sama5d4_xplained.dts b/arch/arm/boot/dts/at91-sama5d4_xplained.dts index cf712444b2c2..10f2fb9e0ea6 100644 --- a/arch/arm/boot/dts/at91-sama5d4_xplained.dts +++ b/arch/arm/boot/dts/at91-sama5d4_xplained.dts @@ -240,7 +240,7 @@ rootfs@800000 { label = "rootfs"; - reg = <0x800000 0x0f800000>; + reg = <0x800000 0x1f800000>; }; }; }; diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi index 64fa3f9a39d3..db0921e7a613 100644 --- a/arch/arm/boot/dts/at91sam9g45.dtsi +++ b/arch/arm/boot/dts/at91sam9g45.dtsi @@ -566,7 +566,7 @@ }; }; - uart1 { + usart1 { pinctrl_usart1: usart1-0 { atmel,pins = ; + reg = <0x800000 0x0f800000>; }; }; }; diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi index 8b2c65cd61a2..b822952c29f8 100644 --- a/arch/arm/boot/dts/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi @@ -165,8 +165,8 @@ mdio: mdio@18002000 { compatible = "brcm,iproc-mdio"; reg = <0x18002000 0x8>; - #size-cells = <1>; - #address-cells = <0>; + #size-cells = <0>; + #address-cells = <1>; status = "disabled"; gphy0: ethernet-phy@0 { diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 4745e3c7806b..fdb018e1278f 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -38,7 +38,7 @@ trips { cpu-crit { - temperature = <80000>; + temperature = <90000>; hysteresis = <0>; type = "critical"; }; diff --git a/arch/arm/boot/dts/dove-cubox.dts b/arch/arm/boot/dts/dove-cubox.dts index 580e3cbcfbf7..3e1584e787ae 100644 --- a/arch/arm/boot/dts/dove-cubox.dts +++ b/arch/arm/boot/dts/dove-cubox.dts @@ -87,7 +87,7 @@ status = "okay"; clock-frequency = <100000>; - si5351: clock-generator { + si5351: clock-generator@60 { compatible = "silabs,si5351a-msop"; reg = <0x60>; #address-cells = <1>; diff --git a/arch/arm/boot/dts/dove.dtsi b/arch/arm/boot/dts/dove.dtsi index f4a07bb7c3a2..c78471b05ab4 100644 --- a/arch/arm/boot/dts/dove.dtsi +++ b/arch/arm/boot/dts/dove.dtsi @@ -155,7 +155,7 @@ 0xffffe000 MBUS_ID(0x03, 0x01) 0 0x0000800 /* CESA SRAM 2k */ 0xfffff000 MBUS_ID(0x0d, 0x00) 0 0x0000800>; /* PMU SRAM 2k */ - spi0: spi-ctrl@10600 { + spi0: spi@10600 { compatible = "marvell,orion-spi"; #address-cells = <1>; #size-cells = <0>; @@ -168,7 +168,7 @@ status = "disabled"; }; - i2c: i2c-ctrl@11000 { + i2c: i2c@11000 { compatible = "marvell,mv64xxx-i2c"; reg = <0x11000 0x20>; #address-cells = <1>; @@ -218,7 +218,7 @@ status = "disabled"; }; - spi1: spi-ctrl@14600 { + spi1: spi@14600 { compatible = "marvell,orion-spi"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 0bf354024ef5..fec965009b9f 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -314,6 +314,7 @@ <0 0 0 2 &pcie1_intc 2>, <0 0 0 3 &pcie1_intc 3>, <0 0 0 4 &pcie1_intc 4>; + ti,syscon-unaligned-access = <&scm_conf1 0x14 1>; status = "disabled"; pcie1_intc: interrupt-controller { interrupt-controller; @@ -367,6 +368,7 @@ <0 0 0 2 &pcie2_intc 2>, <0 0 0 3 &pcie2_intc 3>, <0 0 0 4 &pcie2_intc 4>; + ti,syscon-unaligned-access = <&scm_conf1 0x14 2>; pcie2_intc: interrupt-controller { interrupt-controller; #address-cells = <0>; @@ -1540,6 +1542,7 @@ dr_mode = "otg"; snps,dis_u3_susphy_quirk; snps,dis_u2_susphy_quirk; + snps,dis_metastability_quirk; }; }; diff --git a/arch/arm/boot/dts/dra76x.dtsi b/arch/arm/boot/dts/dra76x.dtsi index 1c88c581ff18..78d58b8af67e 100644 --- a/arch/arm/boot/dts/dra76x.dtsi +++ b/arch/arm/boot/dts/dra76x.dtsi @@ -17,3 +17,8 @@ &crossbar_mpu { ti,irqs-skip = <10 67 68 133 139 140>; }; + +&mmc3 { + /* dra76x is not affected by i887 */ + max-frequency = <96000000>; +}; diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi index aa06a02c3ff5..5ba662254909 100644 --- a/arch/arm/boot/dts/exynos3250.dtsi +++ b/arch/arm/boot/dts/exynos3250.dtsi @@ -359,7 +359,7 @@ }; hsotg: hsotg@12480000 { - compatible = "snps,dwc2"; + compatible = "samsung,s3c6400-hsotg", "snps,dwc2"; reg = <0x12480000 0x20000>; interrupts = ; clocks = <&cmu CLK_USBOTG>; diff --git a/arch/arm/boot/dts/exynos5250-arndale.dts b/arch/arm/boot/dts/exynos5250-arndale.dts index 18a7f396ac5f..abd1705635f9 100644 --- a/arch/arm/boot/dts/exynos5250-arndale.dts +++ b/arch/arm/boot/dts/exynos5250-arndale.dts @@ -169,6 +169,8 @@ reg = <0x66>; interrupt-parent = <&gpx3>; interrupts = <2 IRQ_TYPE_LEVEL_LOW>; + pinctrl-names = "default"; + pinctrl-0 = <&s5m8767_irq>; vinb1-supply = <&main_dc_reg>; vinb2-supply = <&main_dc_reg>; @@ -544,6 +546,13 @@ cap-sd-highspeed; }; +&pinctrl_0 { + s5m8767_irq: s5m8767-irq { + samsung,pins = "gpx3-2"; + samsung,pin-pud = ; + }; +}; + &rtc { status = "okay"; }; diff --git a/arch/arm/boot/dts/exynos5250-snow-rev5.dts b/arch/arm/boot/dts/exynos5250-snow-rev5.dts index 90560c316f64..cb986175b69b 100644 --- a/arch/arm/boot/dts/exynos5250-snow-rev5.dts +++ b/arch/arm/boot/dts/exynos5250-snow-rev5.dts @@ -23,6 +23,14 @@ samsung,model = "Snow-I2S-MAX98090"; samsung,audio-codec = <&max98090>; + + cpu { + sound-dai = <&i2s0 0>; + }; + + codec { + sound-dai = <&max98090 0>, <&hdmi>; + }; }; }; @@ -34,6 +42,9 @@ interrupt-parent = <&gpx0>; pinctrl-names = "default"; pinctrl-0 = <&max98090_irq>; + clocks = <&pmu_system_controller 0>; + clock-names = "mclk"; + #sound-dai-cells = <1>; }; }; diff --git a/arch/arm/boot/dts/exynos5420-peach-pit.dts b/arch/arm/boot/dts/exynos5420-peach-pit.dts index 7ccee2cfe481..442161d2acd5 100644 --- a/arch/arm/boot/dts/exynos5420-peach-pit.dts +++ b/arch/arm/boot/dts/exynos5420-peach-pit.dts @@ -301,6 +301,7 @@ regulator-name = "vdd_1v35"; regulator-min-microvolt = <1350000>; regulator-max-microvolt = <1350000>; + regulator-always-on; regulator-boot-on; regulator-state-mem { regulator-on-in-suspend; @@ -322,6 +323,7 @@ regulator-name = "vdd_2v"; regulator-min-microvolt = <2000000>; regulator-max-microvolt = <2000000>; + regulator-always-on; regulator-boot-on; regulator-state-mem { regulator-on-in-suspend; @@ -332,6 +334,7 @@ regulator-name = "vdd_1v8"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-always-on; regulator-boot-on; regulator-state-mem { regulator-on-in-suspend; diff --git a/arch/arm/boot/dts/exynos5800-peach-pi.dts b/arch/arm/boot/dts/exynos5800-peach-pi.dts index 0900b38f60b4..58af2254e521 100644 --- a/arch/arm/boot/dts/exynos5800-peach-pi.dts +++ b/arch/arm/boot/dts/exynos5800-peach-pi.dts @@ -301,6 +301,7 @@ regulator-name = "vdd_1v35"; regulator-min-microvolt = <1350000>; regulator-max-microvolt = <1350000>; + regulator-always-on; regulator-boot-on; regulator-state-mem { regulator-on-in-suspend; @@ -322,6 +323,7 @@ regulator-name = "vdd_2v"; regulator-min-microvolt = <2000000>; regulator-max-microvolt = <2000000>; + regulator-always-on; regulator-boot-on; regulator-state-mem { regulator-on-in-suspend; @@ -332,6 +334,7 @@ regulator-name = "vdd_1v8"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-always-on; regulator-boot-on; regulator-state-mem { regulator-on-in-suspend; diff --git a/arch/arm/boot/dts/gemini-sq201.dts b/arch/arm/boot/dts/gemini-sq201.dts index 63c02ca9513c..e9e2f6ff0c58 100644 --- a/arch/arm/boot/dts/gemini-sq201.dts +++ b/arch/arm/boot/dts/gemini-sq201.dts @@ -20,7 +20,7 @@ }; chosen { - bootargs = "console=ttyS0,115200n8"; + bootargs = "console=ttyS0,115200n8 root=/dev/mtdblock2 rw rootfstype=squashfs,jffs2 rootwait"; stdout-path = &uart0; }; @@ -71,37 +71,10 @@ /* 16MB of flash */ reg = <0x30000000 0x01000000>; - partition@0 { - label = "RedBoot"; - reg = <0x00000000 0x00120000>; - read-only; - }; - partition@120000 { - label = "Kernel"; - reg = <0x00120000 0x00200000>; - }; - partition@320000 { - label = "Ramdisk"; - reg = <0x00320000 0x00600000>; - }; - partition@920000 { - label = "Application"; - reg = <0x00920000 0x00600000>; - }; - partition@f20000 { - label = "VCTL"; - reg = <0x00f20000 0x00020000>; - read-only; - }; - partition@f40000 { - label = "CurConf"; - reg = <0x00f40000 0x000a0000>; - read-only; - }; - partition@fe0000 { - label = "FIS directory"; - reg = <0x00fe0000 0x00020000>; - read-only; + partitions { + compatible = "redboot-fis"; + /* Eraseblock at 0xfe0000 */ + fis-index-block = <0x1fc>; }; }; diff --git a/arch/arm/boot/dts/imx53-voipac-dmm-668.dtsi b/arch/arm/boot/dts/imx53-voipac-dmm-668.dtsi index df8dafe2564d..2297ed90ee89 100644 --- a/arch/arm/boot/dts/imx53-voipac-dmm-668.dtsi +++ b/arch/arm/boot/dts/imx53-voipac-dmm-668.dtsi @@ -17,12 +17,8 @@ memory@70000000 { device_type = "memory"; - reg = <0x70000000 0x20000000>; - }; - - memory@b0000000 { - device_type = "memory"; - reg = <0xb0000000 0x20000000>; + reg = <0x70000000 0x20000000>, + <0xb0000000 0x20000000>; }; regulators { diff --git a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi index 849eb3443cde..719e63092c2e 100644 --- a/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi +++ b/arch/arm/boot/dts/imx6qdl-zii-rdu2.dtsi @@ -587,7 +587,7 @@ pinctrl-0 = <&pinctrl_usdhc2>; bus-width = <4>; cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 3 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; status = "okay"; @@ -598,7 +598,7 @@ pinctrl-0 = <&pinctrl_usdhc3>; bus-width = <4>; cd-gpios = <&gpio2 0 GPIO_ACTIVE_LOW>; - wp-gpios = <&gpio2 1 GPIO_ACTIVE_HIGH>; + disable-wp; vmmc-supply = <®_3p3v_sd>; vqmmc-supply = <®_3p3v>; status = "okay"; @@ -1001,7 +1001,6 @@ MX6QDL_PAD_SD2_DAT1__SD2_DATA1 0x17059 MX6QDL_PAD_SD2_DAT2__SD2_DATA2 0x17059 MX6QDL_PAD_SD2_DAT3__SD2_DATA3 0x17059 - MX6QDL_PAD_NANDF_D3__GPIO2_IO03 0x40010040 MX6QDL_PAD_NANDF_D2__GPIO2_IO02 0x40010040 >; }; @@ -1014,7 +1013,6 @@ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059 MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059 MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059 - MX6QDL_PAD_NANDF_D1__GPIO2_IO01 0x40010040 MX6QDL_PAD_NANDF_D0__GPIO2_IO00 0x40010040 >; diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index bf15efbe8a71..836550f2297a 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -450,7 +450,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302d0000 0x10000>; interrupts = ; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT1_ROOT_CLK>, <&clks IMX7D_GPT1_ROOT_CLK>; clock-names = "ipg", "per"; }; @@ -459,7 +459,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302e0000 0x10000>; interrupts = ; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT2_ROOT_CLK>, <&clks IMX7D_GPT2_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; @@ -469,7 +469,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x302f0000 0x10000>; interrupts = ; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT3_ROOT_CLK>, <&clks IMX7D_GPT3_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; @@ -479,7 +479,7 @@ compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; reg = <0x30300000 0x10000>; interrupts = ; - clocks = <&clks IMX7D_CLK_DUMMY>, + clocks = <&clks IMX7D_GPT4_ROOT_CLK>, <&clks IMX7D_GPT4_ROOT_CLK>; clock-names = "ipg", "per"; status = "disabled"; diff --git a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi index fe4cbdc72359..7265d7072b5c 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-som.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-som.dtsi @@ -270,3 +270,7 @@ &twl_gpio { ti,use-leds; }; + +&twl_keypad { + status = "disabled"; +}; diff --git a/arch/arm/boot/dts/lpc3250-phy3250.dts b/arch/arm/boot/dts/lpc3250-phy3250.dts index b7bd3a110a8d..dd0bdf765599 100644 --- a/arch/arm/boot/dts/lpc3250-phy3250.dts +++ b/arch/arm/boot/dts/lpc3250-phy3250.dts @@ -49,8 +49,8 @@ sd_reg: regulator@2 { compatible = "regulator-fixed"; regulator-name = "sd_reg"; - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <1800000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; gpio = <&gpio 5 5 0>; enable-active-high; }; diff --git a/arch/arm/boot/dts/lpc32xx.dtsi b/arch/arm/boot/dts/lpc32xx.dtsi index f22a33a01819..c5b119ddb70b 100644 --- a/arch/arm/boot/dts/lpc32xx.dtsi +++ b/arch/arm/boot/dts/lpc32xx.dtsi @@ -139,11 +139,11 @@ }; clcd: clcd@31040000 { - compatible = "arm,pl110", "arm,primecell"; + compatible = "arm,pl111", "arm,primecell"; reg = <0x31040000 0x1000>; interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clk LPC32XX_CLK_LCD>; - clock-names = "apb_pclk"; + clocks = <&clk LPC32XX_CLK_LCD>, <&clk LPC32XX_CLK_LCD>; + clock-names = "clcdclk", "apb_pclk"; status = "disabled"; }; @@ -179,7 +179,7 @@ * ssp0 and spi1 are shared pins; * enable one in your board dts, as needed. */ - ssp0: ssp@20084000 { + ssp0: spi@20084000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x20084000 0x1000>; interrupts = <20 IRQ_TYPE_LEVEL_HIGH>; @@ -199,7 +199,7 @@ * ssp1 and spi2 are shared pins; * enable one in your board dts, as needed. */ - ssp1: ssp@2008c000 { + ssp1: spi@2008c000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x2008c000 0x1000>; interrupts = <21 IRQ_TYPE_LEVEL_HIGH>; @@ -462,7 +462,9 @@ key: key@40050000 { compatible = "nxp,lpc3220-key"; reg = <0x40050000 0x1000>; - interrupts = <54 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk LPC32XX_CLK_KEY>; + interrupt-parent = <&sic1>; + interrupts = <22 IRQ_TYPE_LEVEL_HIGH>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/ls1021a-twr.dts b/arch/arm/boot/dts/ls1021a-twr.dts index 44715c8ef756..72a3fc63d0ec 100644 --- a/arch/arm/boot/dts/ls1021a-twr.dts +++ b/arch/arm/boot/dts/ls1021a-twr.dts @@ -143,7 +143,7 @@ }; &enet0 { - tbi-handle = <&tbi1>; + tbi-handle = <&tbi0>; phy-handle = <&sgmii_phy2>; phy-connection-type = "sgmii"; status = "okay"; @@ -222,6 +222,13 @@ sgmii_phy2: ethernet-phy@2 { reg = <0x2>; }; + tbi0: tbi-phy@1f { + reg = <0x1f>; + device_type = "tbi-phy"; + }; +}; + +&mdio1 { tbi1: tbi-phy@1f { reg = <0x1f>; device_type = "tbi-phy"; diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 2d20f60947b9..68f4482c35e2 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -569,6 +569,15 @@ reg = <0x0 0x2d24000 0x0 0x4000>; }; + mdio1: mdio@2d64000 { + compatible = "gianfar"; + device_type = "mdio"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x2d64000 0x0 0x4000>, + <0x0 0x2d50030 0x0 0x4>; + }; + ptp_clock@2d10e00 { compatible = "fsl,etsec-ptp"; reg = <0x0 0x2d10e00 0x0 0xb0>; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index b98d44fde6b6..e3ae85d65b39 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -170,7 +170,7 @@ #clock-cells = <1>; #reset-cells = <1>; compatible = "amlogic,meson8-clkc"; - reg = <0x8000 0x4>, <0x4000 0x460>; + reg = <0x8000 0x4>, <0x4000 0x400>; }; pwm_ef: pwm@86c0 { diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index bc278da7df0d..0f76da280ee7 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -121,7 +121,7 @@ #clock-cells = <1>; #reset-cells = <1>; compatible = "amlogic,meson8b-clkc"; - reg = <0x8000 0x4>, <0x4000 0x460>; + reg = <0x8000 0x4>, <0x4000 0x400>; }; reset: reset-controller@4404 { diff --git a/arch/arm/boot/dts/mmp2.dtsi b/arch/arm/boot/dts/mmp2.dtsi index 47e5b63339d1..e95deed6a797 100644 --- a/arch/arm/boot/dts/mmp2.dtsi +++ b/arch/arm/boot/dts/mmp2.dtsi @@ -180,7 +180,7 @@ clocks = <&soc_clocks MMP2_CLK_GPIO>; resets = <&soc_clocks MMP2_CLK_GPIO>; interrupt-controller; - #interrupt-cells = <1>; + #interrupt-cells = <2>; ranges; gcb0: gpio@d4019000 { diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index 4504908c23fe..e83d0619b3b7 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -28,6 +28,7 @@ aliases { display0 = &lcd; + display1 = &tv0; }; gpio-keys { @@ -71,7 +72,7 @@ #sound-dai-cells = <0>; }; - spi_lcd { + spi_lcd: spi_lcd { compatible = "spi-gpio"; #address-cells = <0x1>; #size-cells = <0x0>; @@ -123,7 +124,7 @@ }; tv0: connector { - compatible = "svideo-connector"; + compatible = "composite-video-connector"; label = "tv"; port { @@ -135,7 +136,7 @@ tv_amp: opa362 { compatible = "ti,opa362"; - enable-gpios = <&gpio1 23 GPIO_ACTIVE_HIGH>; + enable-gpios = <&gpio1 23 GPIO_ACTIVE_HIGH>; /* GPIO_23 to enable video out amplifier */ ports { #address-cells = <1>; @@ -274,6 +275,13 @@ OMAP3_CORE1_IOPAD(0x2134, PIN_INPUT_PULLUP | MUX_MODE4) /* gpio112 */ >; }; + + penirq_pins: pinmux_penirq_pins { + pinctrl-single,pins = < + /* here we could enable to wakeup the cpu from suspend by a pen touch */ + OMAP3_CORE1_IOPAD(0x2194, PIN_INPUT_PULLUP | MUX_MODE4) /* gpio160 */ + >; + }; }; &omap3_pmx_core2 { @@ -411,10 +419,19 @@ tsc2007@48 { compatible = "ti,tsc2007"; reg = <0x48>; + pinctrl-names = "default"; + pinctrl-0 = <&penirq_pins>; interrupt-parent = <&gpio6>; interrupts = <0 IRQ_TYPE_EDGE_FALLING>; /* GPIO_160 */ - gpios = <&gpio6 0 GPIO_ACTIVE_LOW>; + gpios = <&gpio6 0 GPIO_ACTIVE_LOW>; /* GPIO_160 */ ti,x-plate-ohms = <600>; + touchscreen-size-x = <480>; + touchscreen-size-y = <640>; + touchscreen-max-pressure = <1000>; + touchscreen-fuzz-x = <3>; + touchscreen-fuzz-y = <8>; + touchscreen-fuzz-pressure = <10>; + touchscreen-inverted-y; }; /* RFID EEPROM */ @@ -520,6 +537,12 @@ regulator-max-microvolt = <3150000>; }; +/* Needed to power the DPI pins */ + +&vpll2 { + regulator-always-on; +}; + &dss { pinctrl-names = "default"; pinctrl-0 = < &dss_dpi_pins >; @@ -540,10 +563,14 @@ vdda-supply = <&vdac>; + #address-cells = <1>; + #size-cells = <0>; + port { + reg = <0>; venc_out: endpoint { remote-endpoint = <&opa_in>; - ti,channels = <2>; + ti,channels = <1>; ti,invert-polarity; }; }; @@ -587,22 +614,22 @@ bootloaders@80000 { label = "U-Boot"; - reg = <0x80000 0x1e0000>; + reg = <0x80000 0x1c0000>; }; - bootloaders_env@260000 { + bootloaders_env@240000 { label = "U-Boot Env"; - reg = <0x260000 0x20000>; + reg = <0x240000 0x40000>; }; kernel@280000 { label = "Kernel"; - reg = <0x280000 0x400000>; + reg = <0x280000 0x600000>; }; - filesystem@680000 { + filesystem@880000 { label = "File System"; - reg = <0x680000 0xf980000>; + reg = <0x880000 0>; /* 0 = MTDPART_SIZ_FULL */ }; }; }; diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi index 53e007abdc71..964240a0f4a9 100644 --- a/arch/arm/boot/dts/omap3-pandora-common.dtsi +++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi @@ -221,6 +221,17 @@ gpio = <&gpio6 4 GPIO_ACTIVE_HIGH>; /* GPIO_164 */ }; + /* wl1251 wifi+bt module */ + wlan_en: fixed-regulator-wg7210_en { + compatible = "regulator-fixed"; + regulator-name = "vwlan"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + startup-delay-us = <50000>; + enable-active-high; + gpio = <&gpio1 23 GPIO_ACTIVE_HIGH>; + }; + /* wg7210 (wifi+bt module) 32k clock buffer */ wg7210_32k: fixed-regulator-wg7210_32k { compatible = "regulator-fixed"; @@ -514,9 +525,30 @@ /*wp-gpios = <&gpio4 31 GPIO_ACTIVE_HIGH>;*/ /* GPIO_127 */ }; -/* mmc3 is probed using pdata-quirks to pass wl1251 card data */ &mmc3 { - status = "disabled"; + vmmc-supply = <&wlan_en>; + + bus-width = <4>; + non-removable; + ti,non-removable; + cap-power-off-card; + + pinctrl-names = "default"; + pinctrl-0 = <&mmc3_pins>; + + #address-cells = <1>; + #size-cells = <0>; + + wlan: wifi@1 { + compatible = "ti,wl1251"; + + reg = <1>; + + interrupt-parent = <&gpio1>; + interrupts = <21 IRQ_TYPE_LEVEL_HIGH>; /* GPIO_21 */ + + ti,wl1251-has-eeprom; + }; }; /* bluetooth*/ diff --git a/arch/arm/boot/dts/omap3-tao3530.dtsi b/arch/arm/boot/dts/omap3-tao3530.dtsi index 9a601d15247b..5b7bda74752b 100644 --- a/arch/arm/boot/dts/omap3-tao3530.dtsi +++ b/arch/arm/boot/dts/omap3-tao3530.dtsi @@ -224,7 +224,7 @@ pinctrl-0 = <&mmc1_pins>; vmmc-supply = <&vmmc1>; vqmmc-supply = <&vsim>; - cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_HIGH>; + cd-gpios = <&twl_gpio 0 GPIO_ACTIVE_LOW>; bus-width = <8>; }; diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index 7824b2631cb6..c58f14de0145 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -694,6 +694,11 @@ vbus-supply = <&smps10_out1_reg>; }; +&dwc3 { + extcon = <&extcon_usb3>; + dr_mode = "otg"; +}; + &mcspi1 { }; diff --git a/arch/arm/boot/dts/orion5x-linkstation.dtsi b/arch/arm/boot/dts/orion5x-linkstation.dtsi index e9991c83d7b7..117d71546ed0 100644 --- a/arch/arm/boot/dts/orion5x-linkstation.dtsi +++ b/arch/arm/boot/dts/orion5x-linkstation.dtsi @@ -156,7 +156,7 @@ &i2c { status = "okay"; - rtc { + rtc@32 { compatible = "ricoh,rs5c372a"; reg = <0x32>; }; diff --git a/arch/arm/boot/dts/pxa25x.dtsi b/arch/arm/boot/dts/pxa25x.dtsi index 95d59be97213..8494b5787170 100644 --- a/arch/arm/boot/dts/pxa25x.dtsi +++ b/arch/arm/boot/dts/pxa25x.dtsi @@ -80,6 +80,10 @@ #pwm-cells = <1>; clocks = <&clks CLK_PWM1>; }; + + rtc@40900000 { + clocks = <&clks CLK_OSC32k768>; + }; }; timer@40a00000 { diff --git a/arch/arm/boot/dts/pxa27x.dtsi b/arch/arm/boot/dts/pxa27x.dtsi index 747f750f675d..ccbecad9c5c7 100644 --- a/arch/arm/boot/dts/pxa27x.dtsi +++ b/arch/arm/boot/dts/pxa27x.dtsi @@ -35,7 +35,7 @@ clocks = <&clks CLK_NONE>; }; - pxa27x_ohci: usb@4c000000 { + usb0: usb@4c000000 { compatible = "marvell,pxa-ohci"; reg = <0x4c000000 0x10000>; interrupts = <3>; @@ -71,7 +71,7 @@ clocks = <&clks CLK_PWM1>; }; - pwri2c: i2c@40f000180 { + pwri2c: i2c@40f00180 { compatible = "mrvl,pxa-i2c"; reg = <0x40f00180 0x24>; interrupts = <6>; @@ -113,6 +113,10 @@ status = "disabled"; }; + + rtc@40900000 { + clocks = <&clks CLK_OSC32k768>; + }; }; clocks { diff --git a/arch/arm/boot/dts/pxa2xx.dtsi b/arch/arm/boot/dts/pxa2xx.dtsi index e4ebcde17837..a03bca81ae8a 100644 --- a/arch/arm/boot/dts/pxa2xx.dtsi +++ b/arch/arm/boot/dts/pxa2xx.dtsi @@ -117,13 +117,6 @@ status = "disabled"; }; - usb0: ohci@4c000000 { - compatible = "marvell,pxa-ohci"; - reg = <0x4c000000 0x10000>; - interrupts = <3>; - status = "disabled"; - }; - mmc0: mmc@41100000 { compatible = "marvell,pxa-mmc"; reg = <0x41100000 0x1000>; diff --git a/arch/arm/boot/dts/pxa3xx.dtsi b/arch/arm/boot/dts/pxa3xx.dtsi index 55c75b67351c..affa5b6f6da1 100644 --- a/arch/arm/boot/dts/pxa3xx.dtsi +++ b/arch/arm/boot/dts/pxa3xx.dtsi @@ -189,7 +189,7 @@ status = "disabled"; }; - pxa3xx_ohci: usb@4c000000 { + usb0: usb@4c000000 { compatible = "marvell,pxa-ohci"; reg = <0x4c000000 0x10000>; interrupts = <3>; diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi index 10d112a4078e..19156cbb6003 100644 --- a/arch/arm/boot/dts/qcom-ipq4019.dtsi +++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi @@ -234,7 +234,7 @@ saw0: regulator@b089000 { compatible = "qcom,saw2"; - reg = <0x02089000 0x1000>, <0x0b009000 0x1000>; + reg = <0x0b089000 0x1000>, <0x0b009000 0x1000>; regulator; }; diff --git a/arch/arm/boot/dts/r8a7779.dtsi b/arch/arm/boot/dts/r8a7779.dtsi index 8ee0b2ca5d39..2face089d65b 100644 --- a/arch/arm/boot/dts/r8a7779.dtsi +++ b/arch/arm/boot/dts/r8a7779.dtsi @@ -67,6 +67,14 @@ <0xf0000100 0x100>; }; + timer@f0000200 { + compatible = "arm,cortex-a9-global-timer"; + reg = <0xf0000200 0x100>; + interrupts = ; + clocks = <&cpg_clocks R8A7779_CLK_ZS>; + }; + timer@f0000600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0xf0000600 0x20>; diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index 5c0a76493d22..03cf0c84ac0a 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -750,7 +750,7 @@ /* no rts / cts for uart2 */ }; - spi { + spi-pins { spi_txd:spi-txd { rockchip,pins = <1 29 RK_FUNC_3 &pcfg_pull_default>; }; diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts index 53d6fc2fdbce..541a798d3d20 100644 --- a/arch/arm/boot/dts/rk3188-radxarock.dts +++ b/arch/arm/boot/dts/rk3188-radxarock.dts @@ -130,6 +130,8 @@ regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; gpio = <&gpio3 RK_PA1 GPIO_ACTIVE_LOW>; + pinctrl-names = "default"; + pinctrl-0 = <&sdmmc_pwr>; startup-delay-us = <100000>; vin-supply = <&vcc_io>; }; @@ -348,6 +350,12 @@ }; }; + sd0 { + sdmmc_pwr: sdmmc-pwr { + rockchip,pins = ; + }; + }; + usb { host_vbus_drv: host-vbus-drv { rockchip,pins = <0 3 RK_FUNC_GPIO &pcfg_pull_none>; diff --git a/arch/arm/boot/dts/rk3288-rock2-som.dtsi b/arch/arm/boot/dts/rk3288-rock2-som.dtsi index b9c471fcbd42..862c2248fcb6 100644 --- a/arch/arm/boot/dts/rk3288-rock2-som.dtsi +++ b/arch/arm/boot/dts/rk3288-rock2-som.dtsi @@ -63,7 +63,7 @@ vcc_flash: flash-regulator { compatible = "regulator-fixed"; - regulator-name = "vcc_sys"; + regulator-name = "vcc_flash"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; startup-delay-us = <150>; diff --git a/arch/arm/boot/dts/rv1108.dtsi b/arch/arm/boot/dts/rv1108.dtsi index e7cd1315db1b..aa4119eaea98 100644 --- a/arch/arm/boot/dts/rv1108.dtsi +++ b/arch/arm/boot/dts/rv1108.dtsi @@ -101,7 +101,7 @@ arm-pmu { compatible = "arm,cortex-a7-pmu"; - interrupts = ; + interrupts = ; }; timer { @@ -522,7 +522,7 @@ compatible = "rockchip,gpio-bank"; reg = <0x20030000 0x100>; interrupts = ; - clocks = <&xin24m>; + clocks = <&cru PCLK_GPIO0_PMU>; gpio-controller; #gpio-cells = <2>; @@ -535,7 +535,7 @@ compatible = "rockchip,gpio-bank"; reg = <0x10310000 0x100>; interrupts = ; - clocks = <&xin24m>; + clocks = <&cru PCLK_GPIO1>; gpio-controller; #gpio-cells = <2>; @@ -548,7 +548,7 @@ compatible = "rockchip,gpio-bank"; reg = <0x10320000 0x100>; interrupts = ; - clocks = <&xin24m>; + clocks = <&cru PCLK_GPIO2>; gpio-controller; #gpio-cells = <2>; @@ -561,7 +561,7 @@ compatible = "rockchip,gpio-bank"; reg = <0x10330000 0x100>; interrupts = ; - clocks = <&xin24m>; + clocks = <&cru PCLK_GPIO3>; gpio-controller; #gpio-cells = <2>; diff --git a/arch/arm/boot/dts/s3c6410-mini6410.dts b/arch/arm/boot/dts/s3c6410-mini6410.dts index f4afda3594f8..de04d8764b0f 100644 --- a/arch/arm/boot/dts/s3c6410-mini6410.dts +++ b/arch/arm/boot/dts/s3c6410-mini6410.dts @@ -167,6 +167,10 @@ }; }; +&clocks { + clocks = <&fin_pll>; +}; + &sdhci0 { pinctrl-names = "default"; pinctrl-0 = <&sd0_clk>, <&sd0_cmd>, <&sd0_cd>, <&sd0_bus4>; diff --git a/arch/arm/boot/dts/s3c6410-smdk6410.dts b/arch/arm/boot/dts/s3c6410-smdk6410.dts index ecf35ec466f7..7ade1a0686d2 100644 --- a/arch/arm/boot/dts/s3c6410-smdk6410.dts +++ b/arch/arm/boot/dts/s3c6410-smdk6410.dts @@ -71,6 +71,10 @@ }; }; +&clocks { + clocks = <&fin_pll>; +}; + &sdhci0 { pinctrl-names = "default"; pinctrl-0 = <&sd0_clk>, <&sd0_cmd>, <&sd0_cd>, <&sd0_bus4>; diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi index 554d0bdedc7a..f96b41ed5b96 100644 --- a/arch/arm/boot/dts/sama5d3.dtsi +++ b/arch/arm/boot/dts/sama5d3.dtsi @@ -1185,49 +1185,49 @@ usart0_clk: usart0_clk { #clock-cells = <0>; reg = <12>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart1_clk: usart1_clk { #clock-cells = <0>; reg = <13>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart2_clk: usart2_clk { #clock-cells = <0>; reg = <14>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; usart3_clk: usart3_clk { #clock-cells = <0>; reg = <15>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; uart0_clk: uart0_clk { #clock-cells = <0>; reg = <16>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; twi0_clk: twi0_clk { reg = <18>; #clock-cells = <0>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; twi1_clk: twi1_clk { #clock-cells = <0>; reg = <19>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; twi2_clk: twi2_clk { #clock-cells = <0>; reg = <20>; - atmel,clk-output-range = <0 16625000>; + atmel,clk-output-range = <0 41500000>; }; mci0_clk: mci0_clk { @@ -1243,19 +1243,19 @@ spi0_clk: spi0_clk { #clock-cells = <0>; reg = <24>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; spi1_clk: spi1_clk { #clock-cells = <0>; reg = <25>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; tcb0_clk: tcb0_clk { #clock-cells = <0>; reg = <26>; - atmel,clk-output-range = <0 133000000>; + atmel,clk-output-range = <0 166000000>; }; pwm_clk: pwm_clk { @@ -1266,7 +1266,7 @@ adc_clk: adc_clk { #clock-cells = <0>; reg = <29>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; dma0_clk: dma0_clk { @@ -1297,13 +1297,13 @@ ssc0_clk: ssc0_clk { #clock-cells = <0>; reg = <38>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; ssc1_clk: ssc1_clk { #clock-cells = <0>; reg = <39>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; sha_clk: sha_clk { diff --git a/arch/arm/boot/dts/sama5d3_can.dtsi b/arch/arm/boot/dts/sama5d3_can.dtsi index c5a3772741bf..0fac79f75c06 100644 --- a/arch/arm/boot/dts/sama5d3_can.dtsi +++ b/arch/arm/boot/dts/sama5d3_can.dtsi @@ -37,13 +37,13 @@ can0_clk: can0_clk { #clock-cells = <0>; reg = <40>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; can1_clk: can1_clk { #clock-cells = <0>; reg = <41>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; }; }; diff --git a/arch/arm/boot/dts/sama5d3_tcb1.dtsi b/arch/arm/boot/dts/sama5d3_tcb1.dtsi index 801f9745e82f..b80dbc45a3c2 100644 --- a/arch/arm/boot/dts/sama5d3_tcb1.dtsi +++ b/arch/arm/boot/dts/sama5d3_tcb1.dtsi @@ -23,6 +23,7 @@ tcb1_clk: tcb1_clk { #clock-cells = <0>; reg = <27>; + atmel,clk-output-range = <0 166000000>; }; }; }; diff --git a/arch/arm/boot/dts/sama5d3_uart.dtsi b/arch/arm/boot/dts/sama5d3_uart.dtsi index 186377d41c91..48e23d18e5e3 100644 --- a/arch/arm/boot/dts/sama5d3_uart.dtsi +++ b/arch/arm/boot/dts/sama5d3_uart.dtsi @@ -42,13 +42,13 @@ uart0_clk: uart0_clk { #clock-cells = <0>; reg = <16>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; uart1_clk: uart1_clk { #clock-cells = <0>; reg = <17>; - atmel,clk-output-range = <0 66000000>; + atmel,clk-output-range = <0 83000000>; }; }; }; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts index b280e6494193..31b01a998b2e 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts @@ -88,7 +88,7 @@ status = "okay"; clock-frequency = <100000>; - adxl345: adxl345@0 { + adxl345: adxl345@53 { compatible = "adi,adxl345"; reg = <0x53>; diff --git a/arch/arm/boot/dts/ste-dbx5x0.dtsi b/arch/arm/boot/dts/ste-dbx5x0.dtsi index 2310a4e97768..986767735e24 100644 --- a/arch/arm/boot/dts/ste-dbx5x0.dtsi +++ b/arch/arm/boot/dts/ste-dbx5x0.dtsi @@ -197,7 +197,7 @@ <0xa0410100 0x100>; }; - scu@a04100000 { + scu@a0410000 { compatible = "arm,cortex-a9-scu"; reg = <0xa0410000 0x100>; }; @@ -878,7 +878,7 @@ power-domains = <&pm_domains DOMAIN_VAPE>; }; - ssp@80002000 { + spi@80002000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x80002000 0x1000>; interrupts = ; @@ -892,7 +892,7 @@ power-domains = <&pm_domains DOMAIN_VAPE>; }; - ssp@80003000 { + spi@80003000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x80003000 0x1000>; interrupts = ; diff --git a/arch/arm/boot/dts/ste-href-family-pinctrl.dtsi b/arch/arm/boot/dts/ste-href-family-pinctrl.dtsi index 5c5cea232743..1ec193b0c506 100644 --- a/arch/arm/boot/dts/ste-href-family-pinctrl.dtsi +++ b/arch/arm/boot/dts/ste-href-family-pinctrl.dtsi @@ -607,16 +607,20 @@ mcde { lcd_default_mode: lcd_default { - default_mux { + default_mux1 { /* Mux in VSI0 and all the data lines */ function = "lcd"; groups = "lcdvsi0_a_1", /* VSI0 for LCD */ "lcd_d0_d7_a_1", /* Data lines */ "lcd_d8_d11_a_1", /* TV-out */ - "lcdaclk_b_1", /* Clock line for TV-out */ "lcdvsi1_a_1"; /* VSI1 for HDMI */ }; + default_mux2 { + function = "lcda"; + groups = + "lcdaclk_b_1"; /* Clock line for TV-out */ + }; default_cfg1 { pins = "GPIO68_E1", /* VSI0 */ diff --git a/arch/arm/boot/dts/ste-hrefprev60.dtsi b/arch/arm/boot/dts/ste-hrefprev60.dtsi index 3f14b4df69b4..94eeb7f1c947 100644 --- a/arch/arm/boot/dts/ste-hrefprev60.dtsi +++ b/arch/arm/boot/dts/ste-hrefprev60.dtsi @@ -57,7 +57,7 @@ }; }; - ssp@80002000 { + spi@80002000 { /* * On the first generation boards, this SSP/SPI port was connected * to the AB8500. diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts index ade1d0d4e5f4..1bf4358f8fa7 100644 --- a/arch/arm/boot/dts/ste-snowball.dts +++ b/arch/arm/boot/dts/ste-snowball.dts @@ -376,7 +376,7 @@ pinctrl-1 = <&i2c3_sleep_mode>; }; - ssp@80002000 { + spi@80002000 { pinctrl-names = "default"; pinctrl-0 = <&ssp0_snowball_mode>; }; diff --git a/arch/arm/boot/dts/ste-u300.dts b/arch/arm/boot/dts/ste-u300.dts index 62ecb6a2fa39..1bd1aba3322f 100644 --- a/arch/arm/boot/dts/ste-u300.dts +++ b/arch/arm/boot/dts/ste-u300.dts @@ -442,7 +442,7 @@ dma-names = "rx"; }; - spi: ssp@c0006000 { + spi: spi@c0006000 { compatible = "arm,pl022", "arm,primecell"; reg = <0xc0006000 0x1000>; interrupt-parent = <&vica>; diff --git a/arch/arm/boot/dts/stm32h743i-eval.dts b/arch/arm/boot/dts/stm32h743i-eval.dts index 6c07786e7ddb..0d98b2865bd7 100644 --- a/arch/arm/boot/dts/stm32h743i-eval.dts +++ b/arch/arm/boot/dts/stm32h743i-eval.dts @@ -71,6 +71,7 @@ }; &adc_12 { + vdda-supply = <&vdda>; vref-supply = <&vdda>; status = "okay"; adc1: adc@0 { diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi index 18f25c5e75ae..396fb6632bf0 100644 --- a/arch/arm/boot/dts/sun5i-a10s.dtsi +++ b/arch/arm/boot/dts/sun5i-a10s.dtsi @@ -104,8 +104,6 @@ }; hdmi_out: port@1 { - #address-cells = <1>; - #size-cells = <0>; reg = <1>; }; }; diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi index eef072a21acc..0bb82d0442a5 100644 --- a/arch/arm/boot/dts/sun6i-a31.dtsi +++ b/arch/arm/boot/dts/sun6i-a31.dtsi @@ -173,7 +173,7 @@ }; pmu { - compatible = "arm,cortex-a7-pmu", "arm,cortex-a15-pmu"; + compatible = "arm,cortex-a7-pmu"; interrupts = , , , diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 96bee776e145..77f04dbdf996 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -171,7 +171,7 @@ }; pmu { - compatible = "arm,cortex-a7-pmu", "arm,cortex-a15-pmu"; + compatible = "arm,cortex-a7-pmu"; interrupts = , ; }; diff --git a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts index 716a205c6dbb..1fed3231f5c1 100644 --- a/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts +++ b/arch/arm/boot/dts/sun8i-a83t-cubietruck-plus.dts @@ -90,7 +90,7 @@ initial-mode = <1>; /* initialize in HUB mode */ disabled-ports = <1>; intn-gpios = <&pio 7 5 GPIO_ACTIVE_HIGH>; /* PH5 */ - reset-gpios = <&pio 4 16 GPIO_ACTIVE_HIGH>; /* PE16 */ + reset-gpios = <&pio 4 16 GPIO_ACTIVE_LOW>; /* PE16 */ connect-gpios = <&pio 4 17 GPIO_ACTIVE_HIGH>; /* PE17 */ refclk-frequency = <19200000>; }; diff --git a/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts b/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts index 10da56e86ab8..21b38c386f1b 100644 --- a/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts +++ b/arch/arm/boot/dts/sun8i-h3-beelink-x2.dts @@ -79,6 +79,8 @@ wifi_pwrseq: wifi_pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&r_pio 0 7 GPIO_ACTIVE_LOW>; /* PL7 */ + clocks = <&rtc 1>; + clock-names = "ext_clock"; }; sound_spdif { @@ -128,6 +130,8 @@ pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins_a>; vmmc-supply = <®_vcc3v3>; + vqmmc-supply = <®_vcc3v3>; + mmc-pwrseq = <&wifi_pwrseq>; bus-width = <4>; non-removable; status = "okay"; diff --git a/arch/arm/boot/dts/sun8i-v3s-licheepi-zero.dts b/arch/arm/boot/dts/sun8i-v3s-licheepi-zero.dts index 387fc2aa546d..333df90e8037 100644 --- a/arch/arm/boot/dts/sun8i-v3s-licheepi-zero.dts +++ b/arch/arm/boot/dts/sun8i-v3s-licheepi-zero.dts @@ -78,7 +78,7 @@ }; &mmc0 { - pinctrl-0 = <&mmc0_pins_a>; + pinctrl-0 = <&mmc0_pins>; pinctrl-names = "default"; broken-cd; bus-width = <4>; @@ -87,7 +87,7 @@ }; &uart0 { - pinctrl-0 = <&uart0_pins_a>; + pinctrl-0 = <&uart0_pb_pins>; pinctrl-names = "default"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun8i-v3s.dtsi b/arch/arm/boot/dts/sun8i-v3s.dtsi index 3a06dc5b3746..da5823c6fa3e 100644 --- a/arch/arm/boot/dts/sun8i-v3s.dtsi +++ b/arch/arm/boot/dts/sun8i-v3s.dtsi @@ -292,17 +292,17 @@ interrupt-controller; #interrupt-cells = <3>; - i2c0_pins: i2c0 { + i2c0_pins: i2c0-pins { pins = "PB6", "PB7"; function = "i2c0"; }; - uart0_pins_a: uart0@0 { + uart0_pb_pins: uart0-pb-pins { pins = "PB8", "PB9"; function = "uart0"; }; - mmc0_pins_a: mmc0@0 { + mmc0_pins: mmc0-pins { pins = "PF0", "PF1", "PF2", "PF3", "PF4", "PF5"; function = "mmc0"; @@ -310,7 +310,7 @@ bias-pull-up; }; - mmc1_pins: mmc1 { + mmc1_pins: mmc1-pins { pins = "PG0", "PG1", "PG2", "PG3", "PG4", "PG5"; function = "mmc1"; @@ -318,7 +318,7 @@ bias-pull-up; }; - spi0_pins: spi0 { + spi0_pins: spi0-pins { pins = "PC0", "PC1", "PC2", "PC3"; function = "spi0"; }; diff --git a/arch/arm/boot/dts/sunxi-h3-h5.dtsi b/arch/arm/boot/dts/sunxi-h3-h5.dtsi index 11240a8313c2..03f37081fc64 100644 --- a/arch/arm/boot/dts/sunxi-h3-h5.dtsi +++ b/arch/arm/boot/dts/sunxi-h3-h5.dtsi @@ -594,7 +594,7 @@ clock-names = "apb", "ir"; resets = <&r_ccu RST_APB0_IR>; interrupts = ; - reg = <0x01f02000 0x40>; + reg = <0x01f02000 0x400>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/tegra20-paz00.dts b/arch/arm/boot/dts/tegra20-paz00.dts index 30436969adc0..1b8db91277b1 100644 --- a/arch/arm/boot/dts/tegra20-paz00.dts +++ b/arch/arm/boot/dts/tegra20-paz00.dts @@ -524,10 +524,10 @@ gpio-keys { compatible = "gpio-keys"; - power { - label = "Power"; + wakeup { + label = "Wakeup"; gpios = <&gpio TEGRA_GPIO(J, 7) GPIO_ACTIVE_LOW>; - linux,code = ; + linux,code = ; wakeup-source; }; }; diff --git a/arch/arm/boot/dts/tegra30-apalis.dtsi b/arch/arm/boot/dts/tegra30-apalis.dtsi index faa8cd2914e8..b9368d40bc6f 100644 --- a/arch/arm/boot/dts/tegra30-apalis.dtsi +++ b/arch/arm/boot/dts/tegra30-apalis.dtsi @@ -166,14 +166,14 @@ /* Apalis MMC1 */ sdmmc3_clk_pa6 { - nvidia,pins = "sdmmc3_clk_pa6", - "sdmmc3_cmd_pa7"; + nvidia,pins = "sdmmc3_clk_pa6"; nvidia,function = "sdmmc3"; nvidia,pull = ; nvidia,tristate = ; }; sdmmc3_dat0_pb7 { - nvidia,pins = "sdmmc3_dat0_pb7", + nvidia,pins = "sdmmc3_cmd_pa7", + "sdmmc3_dat0_pb7", "sdmmc3_dat1_pb6", "sdmmc3_dat2_pb5", "sdmmc3_dat3_pb4", diff --git a/arch/arm/boot/dts/tegra30.dtsi b/arch/arm/boot/dts/tegra30.dtsi index c3e9f1e847db..cb5b76e95813 100644 --- a/arch/arm/boot/dts/tegra30.dtsi +++ b/arch/arm/boot/dts/tegra30.dtsi @@ -840,7 +840,7 @@ nvidia,elastic-limit = <16>; nvidia,term-range-adj = <6>; nvidia,xcvr-setup = <51>; - nvidia.xcvr-setup-use-fuses; + nvidia,xcvr-setup-use-fuses; nvidia,xcvr-lsfslew = <1>; nvidia,xcvr-lsrslew = <1>; nvidia,xcvr-hsslew = <32>; @@ -877,7 +877,7 @@ nvidia,elastic-limit = <16>; nvidia,term-range-adj = <6>; nvidia,xcvr-setup = <51>; - nvidia.xcvr-setup-use-fuses; + nvidia,xcvr-setup-use-fuses; nvidia,xcvr-lsfslew = <2>; nvidia,xcvr-lsrslew = <2>; nvidia,xcvr-hsslew = <32>; @@ -913,7 +913,7 @@ nvidia,elastic-limit = <16>; nvidia,term-range-adj = <6>; nvidia,xcvr-setup = <51>; - nvidia.xcvr-setup-use-fuses; + nvidia,xcvr-setup-use-fuses; nvidia,xcvr-lsfslew = <2>; nvidia,xcvr-lsrslew = <2>; nvidia,xcvr-hsslew = <32>; diff --git a/arch/arm/boot/dts/versatile-ab.dts b/arch/arm/boot/dts/versatile-ab.dts index 4a51612996bc..a9000d22b2c0 100644 --- a/arch/arm/boot/dts/versatile-ab.dts +++ b/arch/arm/boot/dts/versatile-ab.dts @@ -304,7 +304,7 @@ clock-names = "apb_pclk"; }; - ssp@101f4000 { + spi@101f4000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x101f4000 0x1000>; interrupts = <11>; diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 2b913f17d50f..c24a55b0deac 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -379,7 +379,7 @@ static int __init nocache_trampoline(unsigned long _arg) unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); phys_reset_t phys_reset; - mcpm_set_entry_vector(cpu, cluster, cpu_resume); + mcpm_set_entry_vector(cpu, cluster, cpu_resume_no_hyp); setup_mm_for_reboot(); __mcpm_cpu_going_down(cpu, cluster); diff --git a/arch/arm/configs/k39tv1_bsp_1g_defconfig b/arch/arm/configs/k39tv1_bsp_1g_defconfig index ec1e8821e789..80ec70d21fbe 100644 --- a/arch/arm/configs/k39tv1_bsp_1g_defconfig +++ b/arch/arm/configs/k39tv1_bsp_1g_defconfig @@ -329,7 +329,6 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_SW_SYNC=y CONFIG_ANDROID_INTF_ALARM_DEV=y CONFIG_MTK_GMO_RAM_OPTIMIZE=y CONFIG_ION=y diff --git a/arch/arm/configs/k39tv1_bsp_512_defconfig b/arch/arm/configs/k39tv1_bsp_512_defconfig index cee76d4b7f8b..5130db278b7c 100644 --- a/arch/arm/configs/k39tv1_bsp_512_defconfig +++ b/arch/arm/configs/k39tv1_bsp_512_defconfig @@ -333,7 +333,6 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_SW_SYNC=y CONFIG_ANDROID_INTF_ALARM_DEV=y CONFIG_MTK_GMO_RAM_OPTIMIZE=y CONFIG_ION=y diff --git a/arch/arm/configs/k39tv1_bsp_defconfig b/arch/arm/configs/k39tv1_bsp_defconfig index eac4031f4d62..6a46a64eee14 100644 --- a/arch/arm/configs/k39tv1_bsp_defconfig +++ b/arch/arm/configs/k39tv1_bsp_defconfig @@ -333,7 +333,6 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_SW_SYNC=y CONFIG_ANDROID_INTF_ALARM_DEV=y CONFIG_ION=y CONFIG_MTK_ION=y diff --git a/arch/arm/configs/k68v1_2g_hdp_defconfig b/arch/arm/configs/k68v1_2g_hdp_defconfig index 06327cb11705..b2dd7b33c06b 100644 --- a/arch/arm/configs/k68v1_2g_hdp_defconfig +++ b/arch/arm/configs/k68v1_2g_hdp_defconfig @@ -358,7 +358,6 @@ CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_RTC_DRV_MT6358=y CONFIG_DMADEVICES=y CONFIG_DMA_MTK_UART=y -CONFIG_SW_SYNC=y CONFIG_ANDROID_INTF_ALARM_DEV=y CONFIG_ION=y CONFIG_MTK_ION=y diff --git a/arch/arm/configs/k69v1_2g_hdp_defconfig b/arch/arm/configs/k69v1_2g_hdp_defconfig index 228026727c39..22d765328381 100644 --- a/arch/arm/configs/k69v1_2g_hdp_defconfig +++ b/arch/arm/configs/k69v1_2g_hdp_defconfig @@ -359,7 +359,6 @@ CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_RTC_DRV_MT6358=y CONFIG_DMADEVICES=y CONFIG_DMA_MTK_UART=y -CONFIG_SW_SYNC=y CONFIG_ANDROID_INTF_ALARM_DEV=y CONFIG_ION=y CONFIG_MTK_ION=y diff --git a/arch/arm/configs/k71v1_bsp_2g_defconfig b/arch/arm/configs/k71v1_bsp_2g_defconfig index d84722ba1799..00d76fed715d 100644 --- a/arch/arm/configs/k71v1_bsp_2g_defconfig +++ b/arch/arm/configs/k71v1_bsp_2g_defconfig @@ -350,7 +350,6 @@ CONFIG_LEDS_TRIGGER_TIMER=y # CONFIG_RTC_SYSTOHC is not set CONFIG_DMADEVICES=y CONFIG_DMA_MTK_UART=y -CONFIG_SW_SYNC=y CONFIG_ANDROID_INTF_ALARM_DEV=y CONFIG_ION=y CONFIG_MTK_ION=y diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c index 96e62ec105d0..cd9e93b46c2d 100644 --- a/arch/arm/crypto/crc32-ce-glue.c +++ b/arch/arm/crypto/crc32-ce-glue.c @@ -236,7 +236,7 @@ static void __exit crc32_pmull_mod_exit(void) ARRAY_SIZE(crc32_pmull_algs)); } -static const struct cpu_feature crc32_cpu_feature[] = { +static const struct cpu_feature __maybe_unused crc32_cpu_feature[] = { { cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { } }; MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature); diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 98089ffd91bb..078dbd25cca4 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -144,6 +144,11 @@ static inline bool kvm_vcpu_dabt_issext(struct kvm_vcpu *vcpu) return kvm_vcpu_get_hsr(vcpu) & HSR_SSE; } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return false; +} + static inline int kvm_vcpu_dabt_get_rd(struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & HSR_SRT_MASK) >> HSR_SRT_SHIFT; diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index f3a7de71f515..848339d76f9a 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -26,6 +26,8 @@ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Not used on 32-bit arm */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm/include/asm/suspend.h b/arch/arm/include/asm/suspend.h index 452bbdcbcc83..506314265c6f 100644 --- a/arch/arm/include/asm/suspend.h +++ b/arch/arm/include/asm/suspend.h @@ -10,6 +10,7 @@ struct sleep_save_sp { }; extern void cpu_resume(void); +extern void cpu_resume_no_hyp(void); extern void cpu_resume_arm(void); extern int cpu_suspend(unsigned long, int (*)(unsigned long)); diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index a5807b67ca8a..fe47d24955ea 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -349,6 +349,13 @@ do { \ #define __get_user_asm_byte(x, addr, err) \ __get_user_asm(x, addr, err, ldrb) +#if __LINUX_ARM_ARCH__ >= 6 + +#define __get_user_asm_half(x, addr, err) \ + __get_user_asm(x, addr, err, ldrh) + +#else + #ifndef __ARMEB__ #define __get_user_asm_half(x, __gu_addr, err) \ ({ \ @@ -367,6 +374,8 @@ do { \ }) #endif +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + #define __get_user_asm_word(x, addr, err) \ __get_user_asm(x, addr, err, ldr) #endif @@ -442,6 +451,13 @@ do { \ #define __put_user_asm_byte(x, __pu_addr, err) \ __put_user_asm(x, __pu_addr, err, strb) +#if __LINUX_ARM_ARCH__ >= 6 + +#define __put_user_asm_half(x, __pu_addr, err) \ + __put_user_asm(x, __pu_addr, err, strh) + +#else + #ifndef __ARMEB__ #define __put_user_asm_half(x, __pu_addr, err) \ ({ \ @@ -458,6 +474,8 @@ do { \ }) #endif +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + #define __put_user_asm_word(x, __pu_addr, err) \ __put_user_asm(x, __pu_addr, err, str) diff --git a/arch/arm/include/asm/unified.h b/arch/arm/include/asm/unified.h index a91ae499614c..2c3b952be63e 100644 --- a/arch/arm/include/asm/unified.h +++ b/arch/arm/include/asm/unified.h @@ -20,8 +20,10 @@ #ifndef __ASM_UNIFIED_H #define __ASM_UNIFIED_H -#if defined(__ASSEMBLY__) && defined(CONFIG_ARM_ASM_UNIFIED) +#if defined(__ASSEMBLY__) .syntax unified +#else +__asm__(".syntax unified"); #endif #ifdef CONFIG_CPU_V7M @@ -64,77 +66,4 @@ #endif /* CONFIG_THUMB2_KERNEL */ -#ifndef CONFIG_ARM_ASM_UNIFIED - -/* - * If the unified assembly syntax isn't used (in ARM mode), these - * macros expand to an empty string - */ -#ifdef __ASSEMBLY__ - .macro it, cond - .endm - .macro itt, cond - .endm - .macro ite, cond - .endm - .macro ittt, cond - .endm - .macro itte, cond - .endm - .macro itet, cond - .endm - .macro itee, cond - .endm - .macro itttt, cond - .endm - .macro ittte, cond - .endm - .macro ittet, cond - .endm - .macro ittee, cond - .endm - .macro itett, cond - .endm - .macro itete, cond - .endm - .macro iteet, cond - .endm - .macro iteee, cond - .endm -#else /* !__ASSEMBLY__ */ -__asm__( -" .macro it, cond\n" -" .endm\n" -" .macro itt, cond\n" -" .endm\n" -" .macro ite, cond\n" -" .endm\n" -" .macro ittt, cond\n" -" .endm\n" -" .macro itte, cond\n" -" .endm\n" -" .macro itet, cond\n" -" .endm\n" -" .macro itee, cond\n" -" .endm\n" -" .macro itttt, cond\n" -" .endm\n" -" .macro ittte, cond\n" -" .endm\n" -" .macro ittet, cond\n" -" .endm\n" -" .macro ittee, cond\n" -" .endm\n" -" .macro itett, cond\n" -" .endm\n" -" .macro itete, cond\n" -" .endm\n" -" .macro iteet, cond\n" -" .endm\n" -" .macro iteee, cond\n" -" .endm\n"); -#endif /* __ASSEMBLY__ */ - -#endif /* CONFIG_ARM_ASM_UNIFIED */ - #endif /* !__ASM_UNIFIED_H */ diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index d7dc808a3d15..08a7132f5600 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -282,16 +282,15 @@ __sys_trace: cmp scno, #-1 @ skip the syscall? bne 2b add sp, sp, #S_OFF @ restore stack - b ret_slow_syscall -__sys_trace_return: - str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 +__sys_trace_return_nosave: + enable_irq_notrace mov r0, sp bl syscall_trace_exit b ret_slow_syscall -__sys_trace_return_nosave: - enable_irq_notrace +__sys_trace_return: + str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 mov r0, sp bl syscall_trace_exit b ret_slow_syscall diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 60146e32619a..83e463c05dcd 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -159,10 +159,9 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE #if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER) @ make CNTP_* and CNTPCT accessible from PL1 mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 - lsr r7, #16 - and r7, #0xf - cmp r7, #1 - bne 1f + ubfx r7, r7, #16, #4 + teq r7, #0 + beq 1f mrc p15, 4, r7, c14, c1, 0 @ CNTHCTL orr r7, r7, #3 @ PL1PCEN | PL1PCTEN mcr p15, 4, r7, c14, c1, 0 @ CNTHCTL @@ -180,8 +179,8 @@ ARM_BE8(orr r7, r7, #(1 << 25)) @ HSCTLR.EE @ Check whether GICv3 system registers are available mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 ubfx r7, r7, #28, #4 - cmp r7, #1 - bne 2f + teq r7, #0 + beq 2f @ Enable system register accesses mrc p15, 4, r7, c12, c9, 5 @ ICC_HSRE diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index a8257fc9cf2a..5dc8b80bb693 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -120,6 +120,14 @@ ENDPROC(cpu_resume_after_mmu) .text .align +#ifdef CONFIG_MCPM + .arm +THUMB( .thumb ) +ENTRY(cpu_resume_no_hyp) +ARM_BE8(setend be) @ ensure we are in BE mode + b no_hyp +#endif + #ifdef CONFIG_MMU .arm ENTRY(cpu_resume_arm) @@ -135,6 +143,7 @@ ARM_BE8(setend be) @ ensure we are in BE mode bl __hyp_stub_install_secondary #endif safe_svcmode_maskall r1 +no_hyp: mov r1, #0 ALT_SMP(mrc p15, 0, r0, c0, c0, 5) ALT_UP_B(1f) @@ -163,6 +172,9 @@ ENDPROC(cpu_resume) #ifdef CONFIG_MMU ENDPROC(cpu_resume_arm) +#endif +#ifdef CONFIG_MCPM +ENDPROC(cpu_resume_no_hyp) #endif .align 2 diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index f4dd7f9663c1..0001742c131d 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -103,6 +103,8 @@ static bool __init cntvct_functional(void) * this. */ np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer"); + if (!np) + np = of_find_compatible_node(NULL, NULL, "arm,armv8-timer"); if (!np) goto out_put; diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 4cf026f3f00d..69f135069bd0 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -5,7 +5,7 @@ # Copyright (C) 1995-2000 Russell King # -lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ +lib-y := changebit.o csumipv6.o csumpartial.o \ csumpartialcopy.o csumpartialcopyuser.o clearbit.o \ delay.o delay-loop.o findbit.o memchr.o memcpy.o \ memmove.o memset.o memzero.o setbit.o \ @@ -19,6 +19,12 @@ lib-y := backtrace.o changebit.o csumipv6.o csumpartial.o \ mmu-y := clear_user.o copy_page.o getuser.o putuser.o \ copy_from_user.o copy_to_user.o +ifeq ($(cc-name),clang) + lib-y += backtrace-clang.o +else + lib-y += backtrace.o +endif + # using lib_ here won't override already available weak symbols obj-$(CONFIG_UACCESS_WITH_MEMCPY) += uaccess_with_memcpy.o diff --git a/arch/arm/lib/backtrace-clang.S b/arch/arm/lib/backtrace-clang.S new file mode 100644 index 000000000000..2ff375144b55 --- /dev/null +++ b/arch/arm/lib/backtrace-clang.S @@ -0,0 +1,217 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * linux/arch/arm/lib/backtrace-clang.S + * + * Copyright (C) 2019 Nathan Huckleberry + * + */ +#include +#include +#include + .text + +/* fp is 0 or stack frame */ + +#define frame r4 +#define sv_fp r5 +#define sv_pc r6 +#define mask r7 +#define sv_lr r8 + +ENTRY(c_backtrace) + +#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) + ret lr +ENDPROC(c_backtrace) +#else + + +/* + * Clang does not store pc or sp in function prologues so we don't know exactly + * where the function starts. + * + * We can treat the current frame's lr as the saved pc and the preceding + * frame's lr as the current frame's lr, but we can't trace the most recent + * call. Inserting a false stack frame allows us to reference the function + * called last in the stacktrace. + * + * If the call instruction was a bl we can look at the callers branch + * instruction to calculate the saved pc. We can recover the pc in most cases, + * but in cases such as calling function pointers we cannot. In this case, + * default to using the lr. This will be some address in the function, but will + * not be the function start. + * + * Unfortunately due to the stack frame layout we can't dump r0 - r3, but these + * are less frequently saved. + * + * Stack frame layout: + * + * saved lr + * frame=> saved fp + * optionally saved caller registers (r4 - r10) + * optionally saved arguments (r0 - r3) + * + * + * + * Functions start with the following code sequence: + * corrected pc => stmfd sp!, {..., fp, lr} + * add fp, sp, #x + * stmfd sp!, {r0 - r3} (optional) + * + * + * + * + * + * + * The diagram below shows an example stack setup for dump_stack. + * + * The frame for c_backtrace has pointers to the code of dump_stack. This is + * why the frame of c_backtrace is used to for the pc calculation of + * dump_stack. This is why we must move back a frame to print dump_stack. + * + * The stored locals for dump_stack are in dump_stack's frame. This means that + * to fully print dump_stack's frame we need both the frame for dump_stack (for + * locals) and the frame that was called by dump_stack (for pc). + * + * To print locals we must know where the function start is. If we read the + * function prologue opcodes we can determine which variables are stored in the + * stack frame. + * + * To find the function start of dump_stack we can look at the stored LR of + * show_stack. It points at the instruction directly after the bl dump_stack. + * We can then read the offset from the bl opcode to determine where the branch + * takes us. The address calculated must be the start of dump_stack. + * + * c_backtrace frame dump_stack: + * {[LR] } ============| ... + * {[FP] } =======| | bl c_backtrace + * | |=> ... + * {[R4-R10]} | + * {[R0-R3] } | show_stack: + * dump_stack frame | ... + * {[LR] } =============| bl dump_stack + * {[FP] } <=======| |=> ... + * {[R4-R10]} + * {[R0-R3] } + */ + + stmfd sp!, {r4 - r9, fp, lr} @ Save an extra register + @ to ensure 8 byte alignment + movs frame, r0 @ if frame pointer is zero + beq no_frame @ we have no stack frames + tst r1, #0x10 @ 26 or 32-bit mode? + moveq mask, #0xfc000003 + movne mask, #0 @ mask for 32-bit + +/* + * Switches the current frame to be the frame for dump_stack. + */ + add frame, sp, #24 @ switch to false frame +for_each_frame: tst frame, mask @ Check for address exceptions + bne no_frame + +/* + * sv_fp is the stack frame with the locals for the current considered + * function. + * + * sv_pc is the saved lr frame the frame above. This is a pointer to a code + * address within the current considered function, but it is not the function + * start. This value gets updated to be the function start later if it is + * possible. + */ +1001: ldr sv_pc, [frame, #4] @ get saved 'pc' +1002: ldr sv_fp, [frame, #0] @ get saved fp + + teq sv_fp, mask @ make sure next frame exists + beq no_frame + +/* + * sv_lr is the lr from the function that called the current function. This is + * a pointer to a code address in the current function's caller. sv_lr-4 is + * the instruction used to call the current function. + * + * This sv_lr can be used to calculate the function start if the function was + * called using a bl instruction. If the function start can be recovered sv_pc + * is overwritten with the function start. + * + * If the current function was called using a function pointer we cannot + * recover the function start and instead continue with sv_pc as an arbitrary + * value within the current function. If this is the case we cannot print + * registers for the current function, but the stacktrace is still printed + * properly. + */ +1003: ldr sv_lr, [sv_fp, #4] @ get saved lr from next frame + + ldr r0, [sv_lr, #-4] @ get call instruction + ldr r3, .Lopcode+4 + and r2, r3, r0 @ is this a bl call + teq r2, r3 + bne finished_setup @ give up if it's not + and r0, #0xffffff @ get call offset 24-bit int + lsl r0, r0, #8 @ sign extend offset + asr r0, r0, #8 + ldr sv_pc, [sv_fp, #4] @ get lr address + add sv_pc, sv_pc, #-4 @ get call instruction address + add sv_pc, sv_pc, #8 @ take care of prefetch + add sv_pc, sv_pc, r0, lsl #2@ find function start + +finished_setup: + + bic sv_pc, sv_pc, mask @ mask PC/LR for the mode + +/* + * Print the function (sv_pc) and where it was called from (sv_lr). + */ +1004: mov r0, sv_pc + + mov r1, sv_lr + mov r2, frame + bic r1, r1, mask @ mask PC/LR for the mode + bl dump_backtrace_entry + +/* + * Test if the function start is a stmfd instruction to determine which + * registers were stored in the function prologue. + * + * If we could not recover the sv_pc because we were called through a function + * pointer the comparison will fail and no registers will print. Unwinding will + * continue as if there had been no registers stored in this frame. + */ +1005: ldr r1, [sv_pc, #0] @ if stmfd sp!, {..., fp, lr} + ldr r3, .Lopcode @ instruction exists, + teq r3, r1, lsr #11 + ldr r0, [frame] @ locals are stored in + @ the preceding frame + subeq r0, r0, #4 + bleq dump_backtrace_stm @ dump saved registers + +/* + * If we are out of frames or if the next frame is invalid. + */ + teq sv_fp, #0 @ zero saved fp means + beq no_frame @ no further frames + + cmp sv_fp, frame @ next frame must be + mov frame, sv_fp @ above the current frame + bhi for_each_frame + +1006: adr r0, .Lbad + mov r1, frame + bl printk +no_frame: ldmfd sp!, {r4 - r9, fp, pc} +ENDPROC(c_backtrace) + .pushsection __ex_table,"a" + .align 3 + .long 1001b, 1006b + .long 1002b, 1006b + .long 1003b, 1006b + .long 1004b, 1006b + .long 1005b, 1006b + .popsection + +.Lbad: .asciz "Backtrace aborted due to bad frame pointer <%p>\n" + .align +.Lopcode: .word 0xe92d4800 >> 11 @ stmfd sp!, {... fp, lr} + .word 0x0b000000 @ bl if these bits are set + +#endif diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 746e7801dcdf..b2e4bc3a635e 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -42,6 +42,12 @@ _ASM_NOKPROBE(__get_user_1) ENTRY(__get_user_2) check_uaccess r0, 2, r1, r2, __get_user_bad +#if __LINUX_ARM_ARCH__ >= 6 + +2: TUSER(ldrh) r2, [r0] + +#else + #ifdef CONFIG_CPU_USE_DOMAINS rb .req ip 2: ldrbt r2, [r0], #1 @@ -56,6 +62,9 @@ rb .req r0 #else orr r2, rb, r2, lsl #8 #endif + +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + mov r0, #0 ret lr ENDPROC(__get_user_2) @@ -145,7 +154,9 @@ _ASM_NOKPROBE(__get_user_bad8) .pushsection __ex_table, "a" .long 1b, __get_user_bad .long 2b, __get_user_bad +#if __LINUX_ARM_ARCH__ < 6 .long 3b, __get_user_bad +#endif .long 4b, __get_user_bad .long 5b, __get_user_bad8 .long 6b, __get_user_bad8 diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index 38d660d3705f..515eeaa9975c 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -41,16 +41,13 @@ ENDPROC(__put_user_1) ENTRY(__put_user_2) check_uaccess r0, 2, r1, ip, __put_user_bad - mov ip, r2, lsr #8 -#ifdef CONFIG_THUMB2_KERNEL -#ifndef __ARMEB__ -2: TUSER(strb) r2, [r0] -3: TUSER(strb) ip, [r0, #1] +#if __LINUX_ARM_ARCH__ >= 6 + +2: TUSER(strh) r2, [r0] + #else -2: TUSER(strb) ip, [r0] -3: TUSER(strb) r2, [r0, #1] -#endif -#else /* !CONFIG_THUMB2_KERNEL */ + + mov ip, r2, lsr #8 #ifndef __ARMEB__ 2: TUSER(strb) r2, [r0], #1 3: TUSER(strb) ip, [r0] @@ -58,7 +55,8 @@ ENTRY(__put_user_2) 2: TUSER(strb) ip, [r0], #1 3: TUSER(strb) r2, [r0] #endif -#endif /* CONFIG_THUMB2_KERNEL */ + +#endif /* __LINUX_ARM_ARCH__ >= 6 */ mov r0, #0 ret lr ENDPROC(__put_user_2) @@ -91,7 +89,9 @@ ENDPROC(__put_user_bad) .pushsection __ex_table, "a" .long 1b, __put_user_bad .long 2b, __put_user_bad +#if __LINUX_ARM_ARCH__ < 6 .long 3b, __put_user_bad +#endif .long 4b, __put_user_bad .long 5b, __put_user_bad .long 6b, __put_user_bad diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 8be04ec95adf..d80b2290ac2e 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -856,8 +856,8 @@ static s8 dm365_queue_priority_mapping[][2] = { }; static const struct dma_slave_map dm365_edma_map[] = { - { "davinci-mcbsp.0", "tx", EDMA_FILTER_PARAM(0, 2) }, - { "davinci-mcbsp.0", "rx", EDMA_FILTER_PARAM(0, 3) }, + { "davinci-mcbsp", "tx", EDMA_FILTER_PARAM(0, 2) }, + { "davinci-mcbsp", "rx", EDMA_FILTER_PARAM(0, 3) }, { "davinci_voicecodec", "tx", EDMA_FILTER_PARAM(0, 2) }, { "davinci_voicecodec", "rx", EDMA_FILTER_PARAM(0, 3) }, { "spi_davinci.2", "tx", EDMA_FILTER_PARAM(0, 10) }, diff --git a/arch/arm/mach-imx/Makefile b/arch/arm/mach-imx/Makefile index 8ff71058207d..8cf1a98785a5 100644 --- a/arch/arm/mach-imx/Makefile +++ b/arch/arm/mach-imx/Makefile @@ -87,6 +87,8 @@ AFLAGS_suspend-imx6.o :=-Wa,-march=armv7-a obj-$(CONFIG_SOC_IMX6) += suspend-imx6.o obj-$(CONFIG_SOC_IMX53) += suspend-imx53.o endif +AFLAGS_resume-imx6.o :=-Wa,-march=armv7-a +obj-$(CONFIG_SOC_IMX6) += resume-imx6.o obj-$(CONFIG_SOC_IMX6) += pm-imx6.o obj-$(CONFIG_SOC_IMX1) += mach-imx1.o diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index b09a2ec19267..4b318c864446 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -111,17 +111,17 @@ void imx_cpu_die(unsigned int cpu); int imx_cpu_kill(unsigned int cpu); #ifdef CONFIG_SUSPEND -void v7_cpu_resume(void); void imx53_suspend(void __iomem *ocram_vbase); extern const u32 imx53_suspend_sz; void imx6_suspend(void __iomem *ocram_vbase); #else -static inline void v7_cpu_resume(void) {} static inline void imx53_suspend(void __iomem *ocram_vbase) {} static const u32 imx53_suspend_sz; static inline void imx6_suspend(void __iomem *ocram_vbase) {} #endif +void v7_cpu_resume(void); + void imx6_pm_ccm_init(const char *ccm_compat); void imx6q_pm_init(void); void imx6dl_pm_init(void); diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index ecdf071653d4..6078bcc9f594 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -604,6 +604,28 @@ static void __init imx6_pm_common_init(const struct imx6_pm_socdata IMX6Q_GPR1_GINT); } +static void imx6_pm_stby_poweroff(void) +{ + imx6_set_lpm(STOP_POWER_OFF); + imx6q_suspend_finish(0); + + mdelay(1000); + + pr_emerg("Unable to poweroff system\n"); +} + +static int imx6_pm_stby_poweroff_probe(void) +{ + if (pm_power_off) { + pr_warn("%s: pm_power_off already claimed %p %pf!\n", + __func__, pm_power_off, pm_power_off); + return -EBUSY; + } + + pm_power_off = imx6_pm_stby_poweroff; + return 0; +} + void __init imx6_pm_ccm_init(const char *ccm_compat) { struct device_node *np; @@ -620,6 +642,9 @@ void __init imx6_pm_ccm_init(const char *ccm_compat) val = readl_relaxed(ccm_base + CLPCR); val &= ~BM_CLPCR_LPM; writel_relaxed(val, ccm_base + CLPCR); + + if (of_property_read_bool(np, "fsl,pmic-stby-poweroff")) + imx6_pm_stby_poweroff_probe(); } void __init imx6q_pm_init(void) diff --git a/arch/arm/mach-imx/resume-imx6.S b/arch/arm/mach-imx/resume-imx6.S new file mode 100644 index 000000000000..5bd1ba7ef15b --- /dev/null +++ b/arch/arm/mach-imx/resume-imx6.S @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright 2014 Freescale Semiconductor, Inc. + */ + +#include +#include +#include +#include +#include "hardware.h" + +/* + * The following code must assume it is running from physical address + * where absolute virtual addresses to the data section have to be + * turned into relative ones. + */ + +ENTRY(v7_cpu_resume) + bl v7_invalidate_l1 +#ifdef CONFIG_CACHE_L2X0 + bl l2c310_early_resume +#endif + b cpu_resume +ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S index 76ee2ceec8d5..7d84b617af48 100644 --- a/arch/arm/mach-imx/suspend-imx6.S +++ b/arch/arm/mach-imx/suspend-imx6.S @@ -333,17 +333,3 @@ resume: ret lr ENDPROC(imx6_suspend) - -/* - * The following code must assume it is running from physical address - * where absolute virtual addresses to the data section have to be - * turned into relative ones. - */ - -ENTRY(v7_cpu_resume) - bl v7_invalidate_l1 -#ifdef CONFIG_CACHE_L2X0 - bl l2c310_early_resume -#endif - b cpu_resume -ENDPROC(v7_cpu_resume) diff --git a/arch/arm/mach-ks8695/board-acs5k.c b/arch/arm/mach-ks8695/board-acs5k.c index e4d709c8ed32..76d3083f1f63 100644 --- a/arch/arm/mach-ks8695/board-acs5k.c +++ b/arch/arm/mach-ks8695/board-acs5k.c @@ -92,7 +92,7 @@ static struct i2c_board_info acs5k_i2c_devs[] __initdata = { }, }; -static void acs5k_i2c_init(void) +static void __init acs5k_i2c_init(void) { /* The gpio interface */ platform_device_register(&acs5k_i2c_device); diff --git a/arch/arm/mach-mediatek/Kconfig b/arch/arm/mach-mediatek/Kconfig index 12abb408713a..aaf57591bc61 100644 --- a/arch/arm/mach-mediatek/Kconfig +++ b/arch/arm/mach-mediatek/Kconfig @@ -43,7 +43,7 @@ config MACH_MT8167 select CPU_V7 select HAVE_SMP select IRQ_DOMAIN - select IRQ_DOMAIN_DEBUG + select IRQ_DOMAIN_DEBUG if IRQ_DOMAIN && DEBUG_FS select GENERIC_SCHED_CLOCK select MTK_SYS_CIRQ select ARMV7_COMPAT @@ -89,7 +89,7 @@ config MACH_MT8168 select CPU_V7 select HAVE_SMP select IRQ_DOMAIN - select IRQ_DOMAIN_DEBUG + select IRQ_DOMAIN_DEBUG if IRQ_DOMAIN && DEBUG_FS select GENERIC_SCHED_CLOCK select ARMV7_COMPAT select ARMV7_COMPAT_CPUINFO @@ -136,7 +136,7 @@ config MACH_MT6768 select CPU_V7 select HAVE_SMP select IRQ_DOMAIN - select IRQ_DOMAIN_DEBUG + select IRQ_DOMAIN_DEBUG if IRQ_DOMAIN && DEBUG_FS select GENERIC_SCHED_CLOCK select ARMV7_COMPAT select ARMV7_COMPAT_CPUINFO @@ -194,7 +194,7 @@ config MACH_MT6739 select F2FS_FS_XATTR select OVERLAY_FS select IRQ_DOMAIN - select IRQ_DOMAIN_DEBUG + select IRQ_DOMAIN_DEBUG if IRQ_DOMAIN && DEBUG_FS select GENERIC_SCHED_CLOCK select MTK_SYS_CIRQ select MTK_GIC_TARGET_ALL @@ -239,7 +239,7 @@ config MACH_MT6771 select MTK_PID_MAP select MTK_IO_BOOST select IRQ_DOMAIN - select IRQ_DOMAIN_DEBUG + select IRQ_DOMAIN_DEBUG if IRQ_DOMAIN && DEBUG_FS select GENERIC_SCHED_CLOCK select ARMV7_COMPAT select ARMV7_COMPAT_CPUINFO diff --git a/arch/arm/mach-omap1/Makefile b/arch/arm/mach-omap1/Makefile index e8ccf51c6f29..ec0235899de2 100644 --- a/arch/arm/mach-omap1/Makefile +++ b/arch/arm/mach-omap1/Makefile @@ -25,7 +25,7 @@ obj-y += $(i2c-omap-m) $(i2c-omap-y) led-y := leds.o -usb-fs-$(CONFIG_USB) := usb.o +usb-fs-$(CONFIG_USB_SUPPORT) := usb.o obj-y += $(usb-fs-m) $(usb-fs-y) # Specific board support diff --git a/arch/arm/mach-omap1/id.c b/arch/arm/mach-omap1/id.c index 52de382fc804..7e49dfda3d2f 100644 --- a/arch/arm/mach-omap1/id.c +++ b/arch/arm/mach-omap1/id.c @@ -200,10 +200,10 @@ void __init omap_check_revision(void) printk(KERN_INFO "Unknown OMAP cpu type: 0x%02x\n", cpu_type); } - printk(KERN_INFO "OMAP%04x", omap_revision >> 16); + pr_info("OMAP%04x", omap_revision >> 16); if ((omap_revision >> 8) & 0xff) - printk(KERN_INFO "%x", (omap_revision >> 8) & 0xff); - printk(KERN_INFO " revision %i handled as %02xxx id: %08x%08x\n", + pr_cont("%x", (omap_revision >> 8) & 0xff); + pr_cont(" revision %i handled as %02xxx id: %08x%08x\n", die_rev, omap_revision & 0xff, system_serial_low, system_serial_high); } diff --git a/arch/arm/mach-omap1/include/mach/usb.h b/arch/arm/mach-omap1/include/mach/usb.h index 77867778d4ec..5429d86c7190 100644 --- a/arch/arm/mach-omap1/include/mach/usb.h +++ b/arch/arm/mach-omap1/include/mach/usb.h @@ -11,7 +11,7 @@ #include -#if IS_ENABLED(CONFIG_USB) +#if IS_ENABLED(CONFIG_USB_SUPPORT) void omap1_usb_init(struct omap_usb_config *pdata); #else static inline void omap1_usb_init(struct omap_usb_config *pdata) diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 16cb1c195fd8..79d71b1eae59 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -199,8 +199,8 @@ void __init omap2xxx_check_revision(void) pr_info("%s", soc_name); if ((omap_rev() >> 8) & 0x0f) - pr_info("%s", soc_rev); - pr_info("\n"); + pr_cont("%s", soc_rev); + pr_cont("\n"); } #define OMAP3_SHOW_FEATURE(feat) \ diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 45c8f2ef4e23..9274a484c6a3 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2530,7 +2530,7 @@ static void _setup_iclk_autoidle(struct omap_hwmod *oh) */ static int _setup_reset(struct omap_hwmod *oh) { - int r; + int r = 0; if (oh->_state != _HWMOD_STATE_INITIALIZED) return -EINVAL; diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c index de06a1d5ffab..e61c14f59063 100644 --- a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c @@ -966,7 +966,8 @@ static struct omap_hwmod_class_sysconfig am33xx_timer_sysc = { .rev_offs = 0x0000, .sysc_offs = 0x0010, .syss_offs = 0x0014, - .sysc_flags = (SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET), + .sysc_flags = SYSC_HAS_SIDLEMODE | SYSC_HAS_SOFTRESET | + SYSC_HAS_RESET_STATUS, .idlemodes = (SIDLE_FORCE | SIDLE_NO | SIDLE_SMART | SIDLE_SMART_WKUP), .sysc_fields = &omap_hwmod_sysc_type2, diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 6b433fce65a5..2477f6086de4 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -307,108 +307,15 @@ static void __init omap3_logicpd_torpedo_init(void) } /* omap3pandora legacy devices */ -#define PANDORA_WIFI_IRQ_GPIO 21 -#define PANDORA_WIFI_NRESET_GPIO 23 static struct platform_device pandora_backlight = { .name = "pandora-backlight", .id = -1, }; -static struct regulator_consumer_supply pandora_vmmc3_supply[] = { - REGULATOR_SUPPLY("vmmc", "omap_hsmmc.2"), -}; - -static struct regulator_init_data pandora_vmmc3 = { - .constraints = { - .valid_ops_mask = REGULATOR_CHANGE_STATUS, - }, - .num_consumer_supplies = ARRAY_SIZE(pandora_vmmc3_supply), - .consumer_supplies = pandora_vmmc3_supply, -}; - -static struct fixed_voltage_config pandora_vwlan = { - .supply_name = "vwlan", - .microvolts = 1800000, /* 1.8V */ - .gpio = PANDORA_WIFI_NRESET_GPIO, - .startup_delay = 50000, /* 50ms */ - .enable_high = 1, - .init_data = &pandora_vmmc3, -}; - -static struct platform_device pandora_vwlan_device = { - .name = "reg-fixed-voltage", - .id = 1, - .dev = { - .platform_data = &pandora_vwlan, - }, -}; - -static void pandora_wl1251_init_card(struct mmc_card *card) -{ - /* - * We have TI wl1251 attached to MMC3. Pass this information to - * SDIO core because it can't be probed by normal methods. - */ - if (card->type == MMC_TYPE_SDIO || card->type == MMC_TYPE_SD_COMBO) { - card->quirks |= MMC_QUIRK_NONSTD_SDIO; - card->cccr.wide_bus = 1; - card->cis.vendor = 0x104c; - card->cis.device = 0x9066; - card->cis.blksize = 512; - card->cis.max_dtr = 24000000; - card->ocr = 0x80; - } -} - -static struct omap2_hsmmc_info pandora_mmc3[] = { - { - .mmc = 3, - .caps = MMC_CAP_4_BIT_DATA | MMC_CAP_POWER_OFF_CARD, - .gpio_cd = -EINVAL, - .gpio_wp = -EINVAL, - .init_card = pandora_wl1251_init_card, - }, - {} /* Terminator */ -}; - -static void __init pandora_wl1251_init(void) -{ - struct wl1251_platform_data pandora_wl1251_pdata; - int ret; - - memset(&pandora_wl1251_pdata, 0, sizeof(pandora_wl1251_pdata)); - - pandora_wl1251_pdata.power_gpio = -1; - - ret = gpio_request_one(PANDORA_WIFI_IRQ_GPIO, GPIOF_IN, "wl1251 irq"); - if (ret < 0) - goto fail; - - pandora_wl1251_pdata.irq = gpio_to_irq(PANDORA_WIFI_IRQ_GPIO); - if (pandora_wl1251_pdata.irq < 0) - goto fail_irq; - - pandora_wl1251_pdata.use_eeprom = true; - ret = wl1251_set_platform_data(&pandora_wl1251_pdata); - if (ret < 0) - goto fail_irq; - - return; - -fail_irq: - gpio_free(PANDORA_WIFI_IRQ_GPIO); -fail: - pr_err("wl1251 board initialisation failed\n"); -} - static void __init omap3_pandora_legacy_init(void) { platform_device_register(&pandora_backlight); - platform_device_register(&pandora_vwlan_device); - omap_hsmmc_init(pandora_mmc3); - omap_hsmmc_late_init(pandora_mmc3); - pandora_wl1251_init(); } #endif /* CONFIG_ARCH_OMAP3 */ diff --git a/arch/arm/mach-rpc/irq.c b/arch/arm/mach-rpc/irq.c index b8a61cb11207..7f0f40178634 100644 --- a/arch/arm/mach-rpc/irq.c +++ b/arch/arm/mach-rpc/irq.c @@ -118,7 +118,7 @@ extern unsigned char rpc_default_fiq_start, rpc_default_fiq_end; void __init rpc_init_irq(void) { - unsigned int irq, clr, set = 0; + unsigned int irq, clr, set; iomd_writeb(0, IOMD_IRQMASKA); iomd_writeb(0, IOMD_IRQMASKB); @@ -130,6 +130,7 @@ void __init rpc_init_irq(void) for (irq = 0; irq < NR_IRQS; irq++) { clr = IRQ_NOREQUEST; + set = 0; if (irq <= 6 || (irq >= 9 && irq <= 15)) clr |= IRQ_NOPROBE; diff --git a/arch/arm/mach-tegra/reset-handler.S b/arch/arm/mach-tegra/reset-handler.S index 805f306fa6f7..e31f167a8199 100644 --- a/arch/arm/mach-tegra/reset-handler.S +++ b/arch/arm/mach-tegra/reset-handler.S @@ -56,16 +56,16 @@ ENTRY(tegra_resume) cmp r6, #TEGRA20 beq 1f @ Yes /* Clear the flow controller flags for this CPU. */ - cpu_to_csr_reg r1, r0 + cpu_to_csr_reg r3, r0 mov32 r2, TEGRA_FLOW_CTRL_BASE - ldr r1, [r2, r1] + ldr r1, [r2, r3] /* Clear event & intr flag */ orr r1, r1, \ #FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG movw r0, #0x3FFD @ enable, cluster_switch, immed, bitmaps @ & ext flags for CPU power mgnt bic r1, r1, r0 - str r1, [r2] + str r1, [r2, r3] 1: mov32 r9, 0xc09 diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S index dd4a67dabd91..b7cd41461e7d 100644 --- a/arch/arm/mach-tegra/sleep-tegra30.S +++ b/arch/arm/mach-tegra/sleep-tegra30.S @@ -382,6 +382,14 @@ _pll_m_c_x_done: pll_locked r1, r0, CLK_RESET_PLLC_BASE pll_locked r1, r0, CLK_RESET_PLLX_BASE + tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 + cmp r1, #TEGRA30 + beq 1f + ldr r1, [r0, #CLK_RESET_PLLP_BASE] + bic r1, r1, #(1<<31) @ disable PllP bypass + str r1, [r0, #CLK_RESET_PLLP_BASE] +1: + mov32 r7, TEGRA_TMRUS_BASE ldr r1, [r7] add r1, r1, #LOCK_DELAY @@ -641,7 +649,10 @@ tegra30_switch_cpu_to_clk32k: str r0, [r4, #PMC_PLLP_WB0_OVERRIDE] /* disable PLLP, PLLA, PLLC and PLLX */ + tegra_get_soc_id TEGRA_APB_MISC_BASE, r1 + cmp r1, #TEGRA30 ldr r0, [r5, #CLK_RESET_PLLP_BASE] + orrne r0, r0, #(1 << 31) @ enable PllP bypass on fast cluster bic r0, r0, #(1 << 30) str r0, [r5, #CLK_RESET_PLLP_BASE] ldr r0, [r5, #CLK_RESET_PLLA_BASE] diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c index fe488523694c..635b0d549487 100644 --- a/arch/arm/mach-vexpress/spc.c +++ b/arch/arm/mach-vexpress/spc.c @@ -555,8 +555,9 @@ static struct clk *ve_spc_clk_register(struct device *cpu_dev) static int __init ve_spc_clk_init(void) { - int cpu; + int cpu, cluster; struct clk *clk; + bool init_opp_table[MAX_CLUSTERS] = { false }; if (!info) return 0; /* Continue only if SPC is initialised */ @@ -582,8 +583,17 @@ static int __init ve_spc_clk_init(void) continue; } + cluster = topology_physical_package_id(cpu_dev->id); + if (init_opp_table[cluster]) + continue; + if (ve_init_opp_table(cpu_dev)) pr_warn("failed to initialise cpu%d opp table\n", cpu); + else if (dev_pm_opp_set_sharing_cpus(cpu_dev, + topology_core_cpumask(cpu_dev->id))) + pr_warn("failed to mark OPPs shared for cpu%d\n", cpu); + else + init_opp_table[cluster] = true; } platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0); diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 2c96190e018b..96b17a870b91 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -768,6 +768,36 @@ do_alignment_t32_to_handler(unsigned long *pinstr, struct pt_regs *regs, return NULL; } +static int alignment_get_arm(struct pt_regs *regs, u32 *ip, unsigned long *inst) +{ + u32 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = probe_kernel_address(ip, instr); + + *inst = __mem_to_opcode_arm(instr); + + return fault; +} + +static int alignment_get_thumb(struct pt_regs *regs, u16 *ip, u16 *inst) +{ + u16 instr = 0; + int fault; + + if (user_mode(regs)) + fault = get_user(instr, ip); + else + fault = probe_kernel_address(ip, instr); + + *inst = __mem_to_opcode_thumb16(instr); + + return fault; +} + static int do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { @@ -775,10 +805,10 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) unsigned long instr = 0, instrptr; int (*handler)(unsigned long addr, unsigned long instr, struct pt_regs *regs); unsigned int type; - unsigned int fault; u16 tinstr = 0; int isize = 4; int thumb2_32b = 0; + int fault; if (interrupts_enabled(regs)) local_irq_enable(); @@ -787,15 +817,14 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (thumb_mode(regs)) { u16 *ptr = (u16 *)(instrptr & ~1); - fault = probe_kernel_address(ptr, tinstr); - tinstr = __mem_to_opcode_thumb16(tinstr); + + fault = alignment_get_thumb(regs, ptr, &tinstr); if (!fault) { if (cpu_architecture() >= CPU_ARCH_ARMv7 && IS_T32(tinstr)) { /* Thumb-2 32-bit */ - u16 tinst2 = 0; - fault = probe_kernel_address(ptr + 1, tinst2); - tinst2 = __mem_to_opcode_thumb16(tinst2); + u16 tinst2; + fault = alignment_get_thumb(regs, ptr + 1, &tinst2); instr = __opcode_thumb32_compose(tinstr, tinst2); thumb2_32b = 1; } else { @@ -804,8 +833,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } } } else { - fault = probe_kernel_address((void *)instrptr, instr); - instr = __mem_to_opcode_arm(instr); + fault = alignment_get_arm(regs, (void *)instrptr, &instr); } if (fault) { diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 27a40101dd3a..fd26b5c92b44 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -356,7 +356,7 @@ static inline void poison_init_mem(void *s, size_t count) *p++ = 0xe7fddef0; } -static inline void +static inline void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn) { struct page *start_pg, *end_pg; diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 70e560cf8ca0..d8cbe772f690 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1195,6 +1195,9 @@ void __init adjust_lowmem_bounds(void) phys_addr_t block_start = reg->base; phys_addr_t block_end = reg->base + reg->size; + if (memblock_is_nomap(reg)) + continue; + if (reg->base < vmalloc_limit) { if (block_end > lowmem_limit) /* diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 92e84181933a..c68408d51c4b 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -135,7 +135,6 @@ __v7m_setup_cont: dsb mov r6, lr @ save LR ldr sp, =init_thread_union + THREAD_START_SP - stmia sp, {r0-r3, r12} cpsie i svc #0 1: cpsid i diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index b92673efffff..97bd43c16cd8 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -230,18 +230,12 @@ static int pxa_ssp_probe(struct platform_device *pdev) static int pxa_ssp_remove(struct platform_device *pdev) { - struct resource *res; struct ssp_device *ssp; ssp = platform_get_drvdata(pdev); if (ssp == NULL) return -ENODEV; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - - clk_put(ssp->clk); - mutex_lock(&ssp_lock); list_del(&ssp->node); mutex_unlock(&ssp_lock); diff --git a/arch/arm/xen/efi.c b/arch/arm/xen/efi.c index b4d78959cadf..bc9a37b3cecd 100644 --- a/arch/arm/xen/efi.c +++ b/arch/arm/xen/efi.c @@ -31,7 +31,9 @@ void __init xen_efi_runtime_setup(void) efi.get_variable = xen_efi_get_variable; efi.get_next_variable = xen_efi_get_next_variable; efi.set_variable = xen_efi_set_variable; + efi.set_variable_nonblocking = xen_efi_set_variable; efi.query_variable_info = xen_efi_query_variable_info; + efi.query_variable_info_nonblocking = xen_efi_query_variable_info; efi.update_capsule = xen_efi_update_capsule; efi.query_capsule_caps = xen_efi_query_capsule_caps; efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9e94f192bfa5..10dfbee55014 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -26,6 +26,8 @@ config ARM64 select ARCH_USE_CMPXCHG_LOCKREF select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_LTO_CLANG + select ARCH_SUPPORTS_THINLTO + select ARCH_SUPPORTS_SHADOW_CALL_STACK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION @@ -51,6 +53,7 @@ config ARM64 select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_PROBE @@ -94,7 +97,7 @@ config ARM64 select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK select HAVE_GCC_PLUGINS select HAVE_GENERIC_DMA_COHERENT select HAVE_HW_BREAKPOINT if PERF_EVENTS @@ -438,7 +441,6 @@ config ARM64_ERRATUM_845719 config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" - default y if !LTO_CLANG select ARM64_MODULE_CMODEL_LARGE if MODULES help This option links the kernel with '--fix-cortex-a53-843419' and diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 64ae05d576aa..16520bf6d825 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -162,7 +162,7 @@ config MACH_MT6765 select HAVE_SMP select NEED_MACH_MEMORY_H select IRQ_DOMAIN - select IRQ_DOMAIN_DEBUG + select IRQ_DOMAIN_DEBUG if IRQ_DOMAIN && DEBUG_FS select GENERIC_SCHED_CLOCK select ARMV7_COMPAT select ARMV7_COMPAT_CPUINFO @@ -208,7 +208,7 @@ config MACH_MT6771 select MTK_IO_BOOST select NEED_MACH_MEMORY_H select IRQ_DOMAIN - select IRQ_DOMAIN_DEBUG + select IRQ_DOMAIN_DEBUG if IRQ_DOMAIN && DEBUG_FS select GENERIC_SCHED_CLOCK select ARMV7_COMPAT select ARMV7_COMPAT_CPUINFO @@ -258,7 +258,7 @@ config MACH_MT6768 select HAVE_SMP select NEED_MACH_MEMORY_H select IRQ_DOMAIN - select IRQ_DOMAIN_DEBUG + select IRQ_DOMAIN_DEBUG if IRQ_DOMAIN && DEBUG_FS select GENERIC_SCHED_CLOCK select ARMV7_COMPAT select ARMV7_COMPAT_CPUINFO @@ -323,7 +323,7 @@ config MACH_MT6739 select F2FS_FS_XATTR select NEED_MACH_MEMORY_H select IRQ_DOMAIN - select IRQ_DOMAIN_DEBUG + select IRQ_DOMAIN_DEBUG if IRQ_DOMAIN && DEBUG_FS select GENERIC_SCHED_CLOCK select MTK_SYS_CIRQ select MTK_GIC_TARGET_ALL @@ -368,7 +368,7 @@ config MACH_MT8167 select HAVE_SMP select NEED_MACH_MEMORY_H select IRQ_DOMAIN - select IRQ_DOMAIN_DEBUG + select IRQ_DOMAIN_DEBUG if IRQ_DOMAIN && DEBUG_FS select GENERIC_SCHED_CLOCK select ARMV7_COMPAT select ARMV7_COMPAT_CPUINFO @@ -399,7 +399,7 @@ config MACH_MT8168 select GENERIC_CLOCKEVENTS select ARM_AMBA select IRQ_DOMAIN - select IRQ_DOMAIN_DEBUG + select IRQ_DOMAIN_DEBUG if IRQ_DOMAIN && DEBUG_FS select GENERIC_SCHED_CLOCK select PINCTRL select PINCTRL_MT8168 @@ -430,7 +430,7 @@ config MACH_MT6885 select HIE select NEED_MACH_MEMORY_H select IRQ_DOMAIN - select IRQ_DOMAIN_DEBUG + select IRQ_DOMAIN_DEBUG if IRQ_DOMAIN && DEBUG_FS select GENERIC_SCHED_CLOCK select ARMV7_COMPAT select ARMV7_COMPAT_CPUINFO @@ -496,7 +496,7 @@ config MACH_MT6873 select HIE select NEED_MACH_MEMORY_H select IRQ_DOMAIN - select IRQ_DOMAIN_DEBUG + select IRQ_DOMAIN_DEBUG if IRQ_DOMAIN && DEBUG_FS select ARM_MTK_NEW_ARCH_CPUIDLE select MTK_LPM select MTK_LOW_POWER_MODULE diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 38276937aef9..49b6fc689156 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -74,6 +74,10 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) +ifeq ($(CONFIG_SHADOW_CALL_STACK), y) +KBUILD_CFLAGS += -ffixed-x18 +endif + ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ @@ -209,6 +213,7 @@ archclean: $(Q)$(MAKE) $(clean)=$(boot) $(Q)$(MAKE) $(clean)=$(boot)/dts +ifeq ($(KBUILD_EXTMOD),) # We need to generate vdso-offsets.h before compiling certain files in kernel/. # In order to do that, we should use the archprepare target, but we can't since # asm-offsets.h is included in some files used to generate vdso-offsets.h, and @@ -218,6 +223,7 @@ archclean: prepare: vdso_prepare vdso_prepare: prepare0 $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso include/generated/vdso-offsets.h +endif define archhelp echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)' diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index ac16790c2940..142538f3c698 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -18,7 +18,7 @@ include $(srctree)/arch/arm64/boot/dts/Makefile OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S -targets := Image Image.gz +targets := Image Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo DTB_NAMES := $(subst $\",,$(CONFIG_BUILD_ARM64_APPENDED_DTB_IMAGE_NAMES)) ifneq ($(DTB_NAMES),) diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-nanopi-a64.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-nanopi-a64.dts index 2beef9e6cb88..aa0b3844ad63 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-nanopi-a64.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-nanopi-a64.dts @@ -126,9 +126,9 @@ ®_dcdc1 { regulator-always-on; - regulator-min-microvolt = <3000000>; - regulator-max-microvolt = <3000000>; - regulator-name = "vcc-3v"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-name = "vcc-3v3"; }; ®_dcdc2 { diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts index 338e786155b1..2ef779b02757 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-olinuxino.dts @@ -120,10 +120,14 @@ /* DCDC3 is polyphased with DCDC2 */ +/* + * The board uses DDR3L DRAM chips. 1.36V is the closest to the nominal + * 1.35V that the PMIC can drive. + */ ®_dcdc5 { regulator-always-on; - regulator-min-microvolt = <1500000>; - regulator-max-microvolt = <1500000>; + regulator-min-microvolt = <1360000>; + regulator-max-microvolt = <1360000>; regulator-name = "vcc-ddr3"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 8c8db1b057df..788a6f8c5994 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -274,7 +274,8 @@ interrupts = , , ; - clocks = <&ccu 58>; + clocks = <&ccu 58>, <&osc24M>, <&rtc 0>; + clock-names = "apb", "hosc", "losc"; gpio-controller; #gpio-cells = <3>; interrupt-controller; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index e79f3defe002..c2ad4f97cef0 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -56,10 +56,10 @@ pmu { compatible = "arm,armv8-pmuv3"; - interrupts = <0 120 8>, - <0 121 8>, - <0 122 8>, - <0 123 8>; + interrupts = <0 170 4>, + <0 171 4>, + <0 172 4>, + <0 173 4>; interrupt-affinity = <&cpu0>, <&cpu1>, <&cpu2>, diff --git a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi index 125f4deb52fe..b664e7af74eb 100644 --- a/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi +++ b/arch/arm64/boot/dts/amd/amd-seattle-soc.dtsi @@ -107,7 +107,7 @@ clock-names = "uartclk", "apb_pclk"; }; - spi0: ssp@e1020000 { + spi0: spi@e1020000 { status = "disabled"; compatible = "arm,pl022", "arm,primecell"; reg = <0 0xe1020000 0 0x1000>; @@ -117,7 +117,7 @@ clock-names = "apb_pclk"; }; - spi1: ssp@e1030000 { + spi1: spi@e1030000 { status = "disabled"; compatible = "arm,pl022", "arm,primecell"; reg = <0 0xe1030000 0 0x1000>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts index 4b17a76959b2..c83c028e95af 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-nanopi-k2.dts @@ -178,7 +178,7 @@ pinctrl-names = "default"; }; -&pinctrl_aobus { +&gpio_ao { gpio-line-names = "UART TX", "UART RX", "Power Control", "Power Key In", "VCCK En", "CON1 Header Pin31", "I2S Header Pin6", "IR In", "I2S Header Pin7", @@ -186,7 +186,7 @@ "I2S Header Pin5", "HDMI CEC", "SYS LED"; }; -&pinctrl_periphs { +&gpio { gpio-line-names = /* Bank GPIOZ */ "Eth MDIO", "Eth MDC", "Eth RGMII RX Clk", "Eth RX DV", "Eth RX D0", "Eth RX D1", "Eth RX D2", diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts index c3c65b06ba76..5da604e5cf28 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-odroidc2.dts @@ -189,7 +189,7 @@ pinctrl-names = "default"; }; -&pinctrl_aobus { +&gpio_ao { gpio-line-names = "UART TX", "UART RX", "VCCK En", "TF 3V3/1V8 En", "USB HUB nRESET", "USB OTG Power En", "J7 Header Pin2", "IR In", "J7 Header Pin4", @@ -197,7 +197,7 @@ "HDMI CEC", "SYS LED"; }; -&pinctrl_periphs { +&gpio { gpio-line-names = /* Bank GPIOZ */ "Eth MDIO", "Eth MDC", "Eth RGMII RX Clk", "Eth RX DV", "Eth RX D0", "Eth RX D1", "Eth RX D2", @@ -295,7 +295,7 @@ }; &usb0_phy { - status = "okay"; + status = "disabled"; phy-supply = <&usb_otg_pwr>; }; @@ -305,7 +305,7 @@ }; &usb0 { - status = "okay"; + status = "disabled"; }; &usb1 { diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi index af834cdbba79..250b5c11c0e2 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb.dtsi @@ -413,7 +413,7 @@ }; }; - spi_pins: spi { + spi_pins: spi-pins { mux { groups = "spi_miso", "spi_mosi", diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts index edc512ad0bac..ce4a116382bf 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-khadas-vim.dts @@ -33,11 +33,9 @@ gpio-keys-polled { compatible = "gpio-keys-polled"; - #address-cells = <1>; - #size-cells = <0>; poll-interval = <100>; - button@0 { + power-button { label = "power"; linux,code = ; gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_LOW>; @@ -112,7 +110,7 @@ linux,rc-map-name = "rc-geekbox"; }; -&pinctrl_aobus { +&gpio_ao { gpio-line-names = "UART TX", "UART RX", "Power Key In", @@ -125,7 +123,7 @@ "SYS LED"; }; -&pinctrl_periphs { +&gpio { gpio-line-names = /* Bank GPIOZ */ "", "", "", "", "", "", "", "", "", "", "", "", "", "", diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts index d71cbf596d1f..407d32f4fe73 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts +++ b/arch/arm64/boot/dts/amlogic/meson-gxl-s905x-libretech-cc.dts @@ -14,7 +14,7 @@ / { compatible = "libretech,cc", "amlogic,s905x", "amlogic,meson-gxl"; - model = "Libre Technology CC"; + model = "Libre Computer Board AML-S905X-CC"; aliases { serial0 = &uart_AO; @@ -139,7 +139,7 @@ }; }; -&pinctrl_aobus { +&gpio_ao { gpio-line-names = "UART TX", "UART RX", "Blue LED", @@ -152,7 +152,7 @@ "7J1 Header Pin13"; }; -&pinctrl_periphs { +&gpio { gpio-line-names = /* Bank GPIOZ */ "", "", "", "", "", "", "", "", "", "", "", "", "", "", @@ -226,7 +226,6 @@ cap-mmc-highspeed; mmc-ddr-3_3v; max-frequency = <50000000>; - non-removable; disable-wp; mmc-pwrseq = <&emmc_pwrseq>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi index fb8d76a17bc5..3c3057944960 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxl.dtsi @@ -310,7 +310,7 @@ }; }; - spi_pins: spi { + spi_pins: spi-pins { mux { groups = "spi_miso", "spi_mosi", diff --git a/arch/arm64/boot/dts/arm/juno-base.dtsi b/arch/arm64/boot/dts/arm/juno-base.dtsi index f165f04db0c9..13ee8ffa9bbf 100644 --- a/arch/arm64/boot/dts/arm/juno-base.dtsi +++ b/arch/arm64/boot/dts/arm/juno-base.dtsi @@ -5,7 +5,6 @@ /* * Devices shared by all Juno boards */ - dma-ranges = <0 0 0 0 0x100 0>; memtimer: timer@2a810000 { compatible = "arm,armv7-timer-mem"; diff --git a/arch/arm64/boot/dts/arm/juno-clocks.dtsi b/arch/arm64/boot/dts/arm/juno-clocks.dtsi index e5e265dfa902..2870b5eeb198 100644 --- a/arch/arm64/boot/dts/arm/juno-clocks.dtsi +++ b/arch/arm64/boot/dts/arm/juno-clocks.dtsi @@ -8,10 +8,10 @@ */ / { /* SoC fixed clocks */ - soc_uartclk: refclk7273800hz { + soc_uartclk: refclk7372800hz { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <7273800>; + clock-frequency = <7372800>; clock-output-names = "juno:uartclk"; }; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi index 15214d05fec1..8c20d4a0cb4e 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray-pinctrl.dtsi @@ -42,13 +42,14 @@ pinmux: pinmux@0014029c { compatible = "pinctrl-single"; - reg = <0x0014029c 0x250>; + reg = <0x0014029c 0x26c>; #address-cells = <1>; #size-cells = <1>; pinctrl-single,register-width = <32>; pinctrl-single,function-mask = <0xf>; pinctrl-single,gpio-range = < - &range 0 154 MODE_GPIO + &range 0 91 MODE_GPIO + &range 95 60 MODE_GPIO >; range: gpio-range { #pinctrl-single,gpio-range-cells = <3>; diff --git a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi index 2b76293b51c8..3d2921ef2935 100644 --- a/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi +++ b/arch/arm64/boot/dts/broadcom/stingray/stingray.dtsi @@ -444,8 +444,7 @@ <&pinmux 108 16 27>, <&pinmux 135 77 6>, <&pinmux 141 67 4>, - <&pinmux 145 149 6>, - <&pinmux 151 91 4>; + <&pinmux 145 149 6>; }; i2c1: i2c@000e0000 { diff --git a/arch/arm64/boot/dts/lg/lg1312.dtsi b/arch/arm64/boot/dts/lg/lg1312.dtsi index 860c8fb10795..4bde7b6f2b11 100644 --- a/arch/arm64/boot/dts/lg/lg1312.dtsi +++ b/arch/arm64/boot/dts/lg/lg1312.dtsi @@ -168,14 +168,14 @@ clock-names = "apb_pclk"; status="disabled"; }; - spi0: ssp@fe800000 { + spi0: spi@fe800000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x0 0xfe800000 0x1000>; interrupts = ; clocks = <&clk_bus>; clock-names = "apb_pclk"; }; - spi1: ssp@fe900000 { + spi1: spi@fe900000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x0 0xfe900000 0x1000>; interrupts = ; diff --git a/arch/arm64/boot/dts/lg/lg1313.dtsi b/arch/arm64/boot/dts/lg/lg1313.dtsi index 1887af654a7d..16ced1ff1ad3 100644 --- a/arch/arm64/boot/dts/lg/lg1313.dtsi +++ b/arch/arm64/boot/dts/lg/lg1313.dtsi @@ -168,14 +168,14 @@ clock-names = "apb_pclk"; status="disabled"; }; - spi0: ssp@fe800000 { + spi0: spi@fe800000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x0 0xfe800000 0x1000>; interrupts = ; clocks = <&clk_bus>; clock-names = "apb_pclk"; }; - spi1: ssp@fe900000 { + spi1: spi@fe900000 { compatible = "arm,pl022", "arm,primecell"; reg = <0x0 0xfe900000 0x1000>; interrupts = ; diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi index f6e6f1e83ba8..be91873c0878 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2180.dtsi @@ -282,6 +282,7 @@ status = "okay"; bus-width = <8>; non-removable; + vqmmc-supply = <&vdd_1v8>; }; clocks { diff --git a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi index d67ef4319f3b..97f31bc4fa1e 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210-p2597.dtsi @@ -1584,7 +1584,7 @@ regulator-name = "VDD_HDMI_5V0"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; - gpio = <&exp1 12 GPIO_ACTIVE_LOW>; + gpio = <&exp1 12 GPIO_ACTIVE_HIGH>; enable-active-high; vin-supply = <&vdd_5v0_sys>; }; diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi index b6b44fdf7fac..c1028b47edde 100644 --- a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi @@ -458,6 +458,8 @@ l11 { regulator-min-microvolt = <1750000>; regulator-max-microvolt = <3337000>; + regulator-allow-set-load; + regulator-system-load = <200000>; }; l12 { diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index 789f3e87321e..7a510505e0c2 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -262,6 +262,8 @@ l21 { regulator-min-microvolt = <2950000>; regulator-max-microvolt = <2950000>; + regulator-allow-set-load; + regulator-system-load = <200000>; }; l22 { regulator-min-microvolt = <3300000>; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 6f372ec055dd..da2949586c7a 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -788,6 +788,8 @@ interrupts = <0 138 0>; phys = <&hsusb_phy2>; phy-names = "usb2-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; @@ -817,6 +819,8 @@ interrupts = <0 131 0>; phys = <&hsusb_phy1>, <&ssusb_phy_0>; phy-names = "usb2-phy", "usb3-phy"; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; }; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi index ce592a4c0c4c..075659847791 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi @@ -113,6 +113,19 @@ vin-supply = <&vcc_1v8>; }; + vcc3v0_sd: vcc3v0-sd { + compatible = "regulator-fixed"; + enable-active-high; + gpio = <&gpio0 RK_PA1 GPIO_ACTIVE_HIGH>; + pinctrl-names = "default"; + pinctrl-0 = <&sdmmc0_pwr_h>; + regulator-always-on; + regulator-max-microvolt = <3000000>; + regulator-min-microvolt = <3000000>; + regulator-name = "vcc3v0_sd"; + vin-supply = <&vcc3v3_sys>; + }; + vcc3v3_sys: vcc3v3-sys { compatible = "regulator-fixed"; regulator-name = "vcc3v3_sys"; @@ -136,7 +149,7 @@ vcc5v0_host: vcc5v0-host-regulator { compatible = "regulator-fixed"; enable-active-high; - gpio = <&gpio1 RK_PD1 GPIO_ACTIVE_HIGH>; + gpio = <&gpio4 RK_PD1 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&vcc5v0_host_en>; regulator-name = "vcc5v0_host"; @@ -315,7 +328,7 @@ regulator-always-on; regulator-boot-on; regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <3000000>; + regulator-max-microvolt = <3300000>; regulator-state-mem { regulator-on-in-suspend; regulator-suspend-microvolt = <3000000>; @@ -490,6 +503,13 @@ }; }; + sd { + sdmmc0_pwr_h: sdmmc0-pwr-h { + rockchip,pins = + ; + }; + }; + usb2 { vcc5v0_host_en: vcc5v0-host-en { rockchip,pins = @@ -537,6 +557,7 @@ }; &sdmmc { + broken-cd; bus-width = <4>; cap-mmc-highspeed; cap-sd-highspeed; @@ -545,6 +566,7 @@ max-frequency = <150000000>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; + vmmc-supply = <&vcc3v0_sd>; vqmmc-supply = <&vcc_sdio>; status = "okay"; }; diff --git a/arch/arm64/configs/cuttlefish_defconfig b/arch/arm64/configs/cuttlefish_defconfig index 36e51257f589..5aefc3f8ac60 100644 --- a/arch/arm64/configs/cuttlefish_defconfig +++ b/arch/arm64/configs/cuttlefish_defconfig @@ -8,14 +8,17 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_IKHEADERS=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_BPF=y +CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set CONFIG_SCHED_AUTOGROUP=y CONFIG_SCHED_TUNE=y CONFIG_DEFAULT_USE_ENERGY_AWARE=y @@ -24,7 +27,6 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_RD_LZMA is not set # CONFIG_RD_XZ is not set # CONFIG_RD_LZO is not set -# CONFIG_RD_LZ4 is not set CONFIG_SGETMASK_SYSCALL=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_KALLSYMS_ALL=y @@ -35,20 +37,23 @@ CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set # CONFIG_SLAB_MERGE_DEFAULT is not set CONFIG_PROFILING=y -CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_CC_STACKPROTECTOR_STRONG=y +CONFIG_LTO_CLANG=y +CONFIG_CFI_CLANG=y +CONFIG_SHADOW_CALL_STACK=y CONFIG_REFCOUNT_FULL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y +CONFIG_BLK_INLINE_ENCRYPTION=y +CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PCI=y CONFIG_PCI_HOST_GENERIC=y CONFIG_PREEMPT=y CONFIG_HZ_100=y # CONFIG_SPARSEMEM_VMEMMAP is not set CONFIG_KSM=y -CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_ZSMALLOC=y CONFIG_SECCOMP=y CONFIG_PARAVIRT=y @@ -61,6 +66,7 @@ CONFIG_ARM64_LSE_ATOMICS=y CONFIG_RANDOMIZE_BASE=y # CONFIG_EFI is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +CONFIG_BINFMT_MISC=y CONFIG_COMPAT=y CONFIG_PM_WAKELOCKS=y CONFIG_PM_WAKELOCKS_LIMIT=0 @@ -74,6 +80,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=y CONFIG_CPU_FREQ_GOV_POWERSAVE=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y CONFIG_CPUFREQ_DT=y +CONFIG_CPUFREQ_DUMMY=y CONFIG_ARM_BIG_LITTLE_CPUFREQ=y CONFIG_ARM_DT_BL_CPUFREQ=y CONFIG_ARM_SCPI_CPUFREQ=y @@ -146,6 +153,7 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=y CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y CONFIG_NETFILTER_XT_MATCH_QUOTA=y CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y CONFIG_NETFILTER_XT_MATCH_SOCKET=y CONFIG_NETFILTER_XT_MATCH_STATE=y CONFIG_NETFILTER_XT_MATCH_STATISTIC=y @@ -189,10 +197,6 @@ CONFIG_NET_CLS_ACT=y CONFIG_VSOCKETS=y CONFIG_VIRTIO_VSOCKETS=y CONFIG_BPF_JIT=y -CONFIG_CAN=y -# CONFIG_CAN_BCM is not set -# CONFIG_CAN_GW is not set -CONFIG_CAN_VCAN=y CONFIG_CFG80211=y # CONFIG_CFG80211_DEFAULT_PS is not set CONFIG_MAC80211=y @@ -201,6 +205,8 @@ CONFIG_RFKILL=y # CONFIG_UEVENT_HELPER is not set # CONFIG_ALLOW_DEV_COREDUMP is not set CONFIG_DEBUG_DEVRES=y +CONFIG_GNSS=y +CONFIG_GNSS_CMDLINE_SERIAL=m CONFIG_OF_UNITTEST=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=y @@ -211,16 +217,17 @@ CONFIG_UID_SYS_STATS=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y -CONFIG_SCSI_VIRTIO=y CONFIG_MD=y CONFIG_BLK_DEV_DM=y CONFIG_DM_CRYPT=y +CONFIG_DM_DEFAULT_KEY=y CONFIG_DM_SNAPSHOT=y CONFIG_DM_UEVENT=y CONFIG_DM_VERITY=y CONFIG_DM_VERITY_FEC=y CONFIG_DM_VERITY_AVB=y CONFIG_NETDEVICES=y +CONFIG_DUMMY=y CONFIG_NETCONSOLE=y CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_TUN=y @@ -289,10 +296,12 @@ CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y -CONFIG_VIRTIO_CONSOLE=y +CONFIG_SERIAL_DEV_BUS=y CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_HW_RANDOM_CAVIUM is not set +CONFIG_TCG_TPM=y +CONFIG_TCG_VTPM_PROXY=y # CONFIG_DEVPORT is not set # CONFIG_I2C_COMPAT is not set # CONFIG_I2C_HELPER_AUTO is not set @@ -340,6 +349,7 @@ CONFIG_HID_MAGICMOUSE=y CONFIG_HID_MICROSOFT=y CONFIG_HID_MONTEREY=y CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NINTENDO=y CONFIG_HID_NTRIG=y CONFIG_HID_ORTEK=y CONFIG_HID_PANTHERLORD=y @@ -352,6 +362,7 @@ CONFIG_HID_SAITEK=y CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SPEEDLINK=y +CONFIG_HID_STEAM=y CONFIG_HID_SUNPLUS=y CONFIG_HID_GREENASIA=y CONFIG_GREENASIA_FF=y @@ -385,7 +396,7 @@ CONFIG_RTC_DRV_PL030=y CONFIG_RTC_DRV_PL031=y CONFIG_VIRTIO_PCI=y # CONFIG_VIRTIO_PCI_LEGACY is not set -CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_PMEM=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y @@ -393,13 +404,15 @@ CONFIG_STAGING=y CONFIG_ASHMEM=y CONFIG_ANDROID_VSOC=y CONFIG_ION=y -CONFIG_ION_SYSTEM_HEAP=y CONFIG_COMMON_CLK_SCPI=y # CONFIG_COMMON_CLK_XGENE is not set CONFIG_MAILBOX=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_BINDERFS=y +CONFIG_LIBNVDIMM=y +# CONFIG_ND_BLK is not set CONFIG_ARM_SCPI_PROTOCOL=y # CONFIG_ARM_SCPI_POWER_DOMAIN is not set CONFIG_EXT4_FS=y @@ -408,11 +421,15 @@ CONFIG_EXT4_ENCRYPTION=y CONFIG_F2FS_FS=y CONFIG_F2FS_FS_SECURITY=y CONFIG_F2FS_FS_ENCRYPTION=y +CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y +CONFIG_FS_VERITY=y +CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y # CONFIG_DNOTIFY is not set CONFIG_QUOTA=y CONFIG_QFMT_V2=y CONFIG_FUSE_FS=y CONFIG_OVERLAY_FS=y +CONFIG_INCREMENTAL_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_TMPFS=y @@ -421,6 +438,7 @@ CONFIG_SDCARD_FS=y CONFIG_PSTORE=y CONFIG_PSTORE_CONSOLE=y CONFIG_PSTORE_RAM=y +CONFIG_UNICODE=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_MUST_CHECK is not set @@ -435,15 +453,18 @@ CONFIG_PANIC_TIMEOUT=5 CONFIG_SCHEDSTATS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ENABLE_DEFAULT_TRACERS=y -CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y +CONFIG_TEST_MEMINIT=y +CONFIG_TEST_STACKINIT=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_LSM_MMAP_MIN_ADDR=65536 CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y +CONFIG_INIT_STACK_ALL=y +CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_LZ4=y CONFIG_CRYPTO_ZSTD=y CONFIG_CRYPTO_ANSI_CPRNG=y -CONFIG_CRYPTO_DEV_VIRTIO=y +# CONFIG_CRYPTO_DEV_VIRTIO is not set CONFIG_XZ_DEC=y diff --git a/arch/arm64/configs/k39tv1_64_bsp_defconfig b/arch/arm64/configs/k39tv1_64_bsp_defconfig index ef0762157a4e..6f62c7ff1c30 100644 --- a/arch/arm64/configs/k39tv1_64_bsp_defconfig +++ b/arch/arm64/configs/k39tv1_64_bsp_defconfig @@ -337,7 +337,6 @@ CONFIG_NEW_LEDS=y CONFIG_LEDS_CLASS=y CONFIG_LEDS_TRIGGERS=y CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_SW_SYNC=y CONFIG_ANDROID_INTF_ALARM_DEV=y CONFIG_ION=y CONFIG_MTK_ION=y diff --git a/arch/arm64/configs/k6885v1_64_defconfig b/arch/arm64/configs/k6885v1_64_defconfig index f2753da35110..8f21bf90d72d 100644 --- a/arch/arm64/configs/k6885v1_64_defconfig +++ b/arch/arm64/configs/k6885v1_64_defconfig @@ -356,7 +356,6 @@ CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_RTC_DRV_MT6358=y CONFIG_DMADEVICES=y CONFIG_DMA_MTK_UART=y -CONFIG_SW_SYNC=y CONFIG_ANDROID_INTF_ALARM_DEV=y CONFIG_ION=y CONFIG_MTK_ION=y diff --git a/arch/arm64/configs/k68v1_64_defconfig b/arch/arm64/configs/k68v1_64_defconfig index f9641b41a6f5..7a0e57f79a47 100644 --- a/arch/arm64/configs/k68v1_64_defconfig +++ b/arch/arm64/configs/k68v1_64_defconfig @@ -363,7 +363,6 @@ CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_RTC_DRV_MT6358=y CONFIG_DMADEVICES=y CONFIG_DMA_MTK_UART=y -CONFIG_SW_SYNC=y CONFIG_ANDROID_INTF_ALARM_DEV=y CONFIG_ION=y CONFIG_MTK_ION=y diff --git a/arch/arm64/configs/k69v1_64_defconfig b/arch/arm64/configs/k69v1_64_defconfig index ab3e5aaed6c5..b9f1c86b478b 100644 --- a/arch/arm64/configs/k69v1_64_defconfig +++ b/arch/arm64/configs/k69v1_64_defconfig @@ -361,7 +361,6 @@ CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_RTC_DRV_MT6358=y CONFIG_DMADEVICES=y CONFIG_DMA_MTK_UART=y -CONFIG_SW_SYNC=y CONFIG_ANDROID_INTF_ALARM_DEV=y CONFIG_ION=y CONFIG_MTK_ION=y diff --git a/arch/arm64/configs/k71v1_64_bsp_defconfig b/arch/arm64/configs/k71v1_64_bsp_defconfig index 5f86913108c6..0d1b895e0d9d 100644 --- a/arch/arm64/configs/k71v1_64_bsp_defconfig +++ b/arch/arm64/configs/k71v1_64_bsp_defconfig @@ -360,7 +360,6 @@ CONFIG_LEDS_TRIGGER_TIMER=y # CONFIG_RTC_SYSTOHC is not set CONFIG_DMADEVICES=y CONFIG_DMA_MTK_UART=y -CONFIG_SW_SYNC=y CONFIG_ANDROID_INTF_ALARM_DEV=y CONFIG_ION=y CONFIG_MTK_ION=y diff --git a/arch/arm64/configs/k85v1_64_defconfig b/arch/arm64/configs/k85v1_64_defconfig index ad0c60947886..8c196539ffe7 100644 --- a/arch/arm64/configs/k85v1_64_defconfig +++ b/arch/arm64/configs/k85v1_64_defconfig @@ -366,7 +366,6 @@ CONFIG_LEDS_TRIGGER_TIMER=y CONFIG_RTC_DRV_MT6358=y CONFIG_DMADEVICES=y CONFIG_DMA_MTK_UART=y -CONFIG_SW_SYNC=y CONFIG_ANDROID_INTF_ALARM_DEV=y CONFIG_ION=y CONFIG_MTK_ION=y diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index a91933b1e2e6..4cd4a793dc32 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -30,13 +30,16 @@ typedef void (*alternative_cb_t)(struct alt_instr *alt, void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); -#define ALTINSTR_ENTRY(feature,cb) \ +#define ALTINSTR_ENTRY(feature) \ " .word 661b - .\n" /* label */ \ - " .if " __stringify(cb) " == 0\n" \ " .word 663f - .\n" /* new instruction */ \ - " .else\n" \ + " .hword " __stringify(feature) "\n" /* feature bit */ \ + " .byte 662b-661b\n" /* source len */ \ + " .byte 664f-663f\n" /* replacement len */ + +#define ALTINSTR_ENTRY_CB(feature, cb) \ + " .word 661b - .\n" /* label */ \ " .word " __stringify(cb) "- .\n" /* callback */ \ - " .endif\n" \ " .hword " __stringify(feature) "\n" /* feature bit */ \ " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ @@ -57,15 +60,14 @@ void apply_alternatives(void *start, size_t length); * * Alternatives with callbacks do not generate replacement instructions. */ -#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(feature,cb) \ + ALTINSTR_ENTRY(feature) \ ".popsection\n" \ - " .if " __stringify(cb) " == 0\n" \ ".pushsection .altinstr_replacement, \"a\"\n" \ "663:\n\t" \ newinstr "\n" \ @@ -73,17 +75,25 @@ void apply_alternatives(void *start, size_t length); ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ ".org . - (662b-661b) + (664b-663b)\n" \ - ".else\n\t" \ + ".endif\n" + +#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \ + ".if "__stringify(cfg_enabled)" == 1\n" \ + "661:\n\t" \ + oldinstr "\n" \ + "662:\n" \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY_CB(feature, cb) \ + ".popsection\n" \ "663:\n\t" \ "664:\n\t" \ - ".endif\n" \ ".endif\n" #define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ - __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0) + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) #define ALTERNATIVE_CB(oldinstr, cb) \ - __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_CB_PATCH, 1, cb) + __ALTERNATIVE_CFG_CB(oldinstr, ARM64_CB_PATCH, 1, cb) #else #include diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index dd49c3567f20..f9af19fe4a02 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -78,10 +78,9 @@ alternative_else_nop_endif /* * Remove the address tag from a virtual address, if present. */ - .macro clear_address_tag, dst, addr - tst \addr, #(1 << 55) - bic \dst, \addr, #(0xff << 56) - csel \dst, \dst, \addr, eq + .macro untagged_addr, dst, addr + sbfx \dst, \addr, #0, #56 + and \dst, \dst, \addr .endm #endif diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index d7403610ea1e..e20ad2498f98 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -47,6 +47,7 @@ #define ARM64_MISMATCHED_CACHE_TYPE 27 #define ARM64_SSBS 28 + #define ARM64_NCAPS 29 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 5048c7a55eef..166f81b7afee 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -10,6 +10,7 @@ #define __ASM_CPUFEATURE_H #include +#include #include #include @@ -85,24 +86,227 @@ struct arm64_ftr_reg { extern struct arm64_ftr_reg arm64_ftr_reg_ctrel0; -/* scope of capability check */ -enum { - SCOPE_SYSTEM, - SCOPE_LOCAL_CPU, -}; +/* + * CPU capabilities: + * + * We use arm64_cpu_capabilities to represent system features, errata work + * arounds (both used internally by kernel and tracked in cpu_hwcaps) and + * ELF HWCAPs (which are exposed to user). + * + * To support systems with heterogeneous CPUs, we need to make sure that we + * detect the capabilities correctly on the system and take appropriate + * measures to ensure there are no incompatibilities. + * + * This comment tries to explain how we treat the capabilities. + * Each capability has the following list of attributes : + * + * 1) Scope of Detection : The system detects a given capability by + * performing some checks at runtime. This could be, e.g, checking the + * value of a field in CPU ID feature register or checking the cpu + * model. The capability provides a call back ( @matches() ) to + * perform the check. Scope defines how the checks should be performed. + * There are three cases: + * + * a) SCOPE_LOCAL_CPU: check all the CPUs and "detect" if at least one + * matches. This implies, we have to run the check on all the + * booting CPUs, until the system decides that state of the + * capability is finalised. (See section 2 below) + * Or + * b) SCOPE_SYSTEM: check all the CPUs and "detect" if all the CPUs + * matches. This implies, we run the check only once, when the + * system decides to finalise the state of the capability. If the + * capability relies on a field in one of the CPU ID feature + * registers, we use the sanitised value of the register from the + * CPU feature infrastructure to make the decision. + * Or + * c) SCOPE_BOOT_CPU: Check only on the primary boot CPU to detect the + * feature. This category is for features that are "finalised" + * (or used) by the kernel very early even before the SMP cpus + * are brought up. + * + * The process of detection is usually denoted by "update" capability + * state in the code. + * + * 2) Finalise the state : The kernel should finalise the state of a + * capability at some point during its execution and take necessary + * actions if any. Usually, this is done, after all the boot-time + * enabled CPUs are brought up by the kernel, so that it can make + * better decision based on the available set of CPUs. However, there + * are some special cases, where the action is taken during the early + * boot by the primary boot CPU. (e.g, running the kernel at EL2 with + * Virtualisation Host Extensions). The kernel usually disallows any + * changes to the state of a capability once it finalises the capability + * and takes any action, as it may be impossible to execute the actions + * safely. A CPU brought up after a capability is "finalised" is + * referred to as "Late CPU" w.r.t the capability. e.g, all secondary + * CPUs are treated "late CPUs" for capabilities determined by the boot + * CPU. + * + * At the moment there are two passes of finalising the capabilities. + * a) Boot CPU scope capabilities - Finalised by primary boot CPU via + * setup_boot_cpu_capabilities(). + * b) Everything except (a) - Run via setup_system_capabilities(). + * + * 3) Verification: When a CPU is brought online (e.g, by user or by the + * kernel), the kernel should make sure that it is safe to use the CPU, + * by verifying that the CPU is compliant with the state of the + * capabilities finalised already. This happens via : + * + * secondary_start_kernel()-> check_local_cpu_capabilities() + * + * As explained in (2) above, capabilities could be finalised at + * different points in the execution. Each newly booted CPU is verified + * against the capabilities that have been finalised by the time it + * boots. + * + * a) SCOPE_BOOT_CPU : All CPUs are verified against the capability + * except for the primary boot CPU. + * + * b) SCOPE_LOCAL_CPU, SCOPE_SYSTEM: All CPUs hotplugged on by the + * user after the kernel boot are verified against the capability. + * + * If there is a conflict, the kernel takes an action, based on the + * severity (e.g, a CPU could be prevented from booting or cause a + * kernel panic). The CPU is allowed to "affect" the state of the + * capability, if it has not been finalised already. See section 5 + * for more details on conflicts. + * + * 4) Action: As mentioned in (2), the kernel can take an action for each + * detected capability, on all CPUs on the system. Appropriate actions + * include, turning on an architectural feature, modifying the control + * registers (e.g, SCTLR, TCR etc.) or patching the kernel via + * alternatives. The kernel patching is batched and performed at later + * point. The actions are always initiated only after the capability + * is finalised. This is usally denoted by "enabling" the capability. + * The actions are initiated as follows : + * a) Action is triggered on all online CPUs, after the capability is + * finalised, invoked within the stop_machine() context from + * enable_cpu_capabilitie(). + * + * b) Any late CPU, brought up after (1), the action is triggered via: + * + * check_local_cpu_capabilities() -> verify_local_cpu_capabilities() + * + * 5) Conflicts: Based on the state of the capability on a late CPU vs. + * the system state, we could have the following combinations : + * + * x-----------------------------x + * | Type | System | Late CPU | + * |-----------------------------| + * | a | y | n | + * |-----------------------------| + * | b | n | y | + * x-----------------------------x + * + * Two separate flag bits are defined to indicate whether each kind of + * conflict can be allowed: + * ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU - Case(a) is allowed + * ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU - Case(b) is allowed + * + * Case (a) is not permitted for a capability that the system requires + * all CPUs to have in order for the capability to be enabled. This is + * typical for capabilities that represent enhanced functionality. + * + * Case (b) is not permitted for a capability that must be enabled + * during boot if any CPU in the system requires it in order to run + * safely. This is typical for erratum work arounds that cannot be + * enabled after the corresponding capability is finalised. + * + * In some non-typical cases either both (a) and (b), or neither, + * should be permitted. This can be described by including neither + * or both flags in the capability's type field. + */ + + +/* + * Decide how the capability is detected. + * On any local CPU vs System wide vs the primary boot CPU + */ +#define ARM64_CPUCAP_SCOPE_LOCAL_CPU ((u16)BIT(0)) +#define ARM64_CPUCAP_SCOPE_SYSTEM ((u16)BIT(1)) +/* + * The capabilitiy is detected on the Boot CPU and is used by kernel + * during early boot. i.e, the capability should be "detected" and + * "enabled" as early as possibly on all booting CPUs. + */ +#define ARM64_CPUCAP_SCOPE_BOOT_CPU ((u16)BIT(2)) +#define ARM64_CPUCAP_SCOPE_MASK \ + (ARM64_CPUCAP_SCOPE_SYSTEM | \ + ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ + ARM64_CPUCAP_SCOPE_BOOT_CPU) + +#define SCOPE_SYSTEM ARM64_CPUCAP_SCOPE_SYSTEM +#define SCOPE_LOCAL_CPU ARM64_CPUCAP_SCOPE_LOCAL_CPU +#define SCOPE_BOOT_CPU ARM64_CPUCAP_SCOPE_BOOT_CPU +#define SCOPE_ALL ARM64_CPUCAP_SCOPE_MASK + +/* + * Is it permitted for a late CPU to have this capability when system + * hasn't already enabled it ? + */ +#define ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU ((u16)BIT(4)) +/* Is it safe for a late CPU to miss this capability when system has it */ +#define ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU ((u16)BIT(5)) + +/* + * CPU errata workarounds that need to be enabled at boot time if one or + * more CPUs in the system requires it. When one of these capabilities + * has been enabled, it is safe to allow any CPU to boot that doesn't + * require the workaround. However, it is not safe if a "late" CPU + * requires a workaround and the system hasn't enabled it already. + */ +#define ARM64_CPUCAP_LOCAL_CPU_ERRATUM \ + (ARM64_CPUCAP_SCOPE_LOCAL_CPU | ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU) +/* + * CPU feature detected at boot time based on system-wide value of a + * feature. It is safe for a late CPU to have this feature even though + * the system hasn't enabled it, although the featuer will not be used + * by Linux in this case. If the system has enabled this feature already, + * then every late CPU must have it. + */ +#define ARM64_CPUCAP_SYSTEM_FEATURE \ + (ARM64_CPUCAP_SCOPE_SYSTEM | ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU) +/* + * CPU feature detected at boot time based on feature of one or more CPUs. + * All possible conflicts for a late CPU are ignored. + */ +#define ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE \ + (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ + ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU | \ + ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU) + +/* + * CPU feature detected at boot time, on one or more CPUs. A late CPU + * is not allowed to have the capability when the system doesn't have it. + * It is Ok for a late CPU to miss the feature. + */ +#define ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE \ + (ARM64_CPUCAP_SCOPE_LOCAL_CPU | \ + ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU) + +/* + * CPU feature used early in the boot based on the boot CPU. All secondary + * CPUs must match the state of the capability as detected by the boot CPU. + */ +#define ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE ARM64_CPUCAP_SCOPE_BOOT_CPU struct arm64_cpu_capabilities { const char *desc; u16 capability; - int def_scope; /* default scope */ + u16 type; bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope); - int (*enable)(void *); /* Called on all active CPUs */ + /* + * Take the appropriate actions to enable this capability for this CPU. + * For each successfully booted CPU, this method is called for each + * globally detected capability. + */ + void (*cpu_enable)(const struct arm64_cpu_capabilities *cap); union { struct { /* To be used for erratum handling only */ - u32 midr_model; - u32 midr_range_min, midr_range_max; + struct midr_range midr_range; }; + const struct midr_range *midr_range_list; struct { /* Feature register checking */ u32 sys_reg; u8 field_pos; @@ -114,6 +318,23 @@ struct arm64_cpu_capabilities { }; }; +static inline int cpucap_default_scope(const struct arm64_cpu_capabilities *cap) +{ + return cap->type & ARM64_CPUCAP_SCOPE_MASK; +} + +static inline bool +cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) +{ + return !!(cap->type & ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU); +} + +static inline bool +cpucap_late_cpu_permitted(const struct arm64_cpu_capabilities *cap) +{ + return !!(cap->type & ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU); +} + extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; extern struct static_key_false arm64_const_caps_ready; @@ -225,15 +446,8 @@ static inline bool id_aa64pfr0_32bit_el0(u64 pfr0) } void __init setup_cpu_features(void); - -void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - const char *info); -void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps); void check_local_cpu_capabilities(void); -void update_cpu_errata_workarounds(void); -void __init enable_errata_workarounds(void); -void verify_local_cpu_errata_workarounds(void); u64 read_sanitised_ftr_reg(u32 id); @@ -279,11 +493,7 @@ static inline int arm64_get_ssbd_state(void) #endif } -#ifdef CONFIG_ARM64_SSBD void arm64_set_ssbd_mitigation(bool state); -#else -static inline void arm64_set_ssbd_mitigation(bool state) {} -#endif #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index a968b499f826..43ca286a07f8 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,8 @@ #define ARM_CPU_PART_CORTEX_A53 0xD03 #define ARM_CPU_PART_CORTEX_A73 0xD09 #define ARM_CPU_PART_CORTEX_A75 0xD0A +#define ARM_CPU_PART_CORTEX_A35 0xD04 +#define ARM_CPU_PART_CORTEX_A55 0xD05 #define APM_CPU_PART_POTENZA 0x000 @@ -110,6 +112,8 @@ #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) +#define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) +#define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) @@ -141,19 +145,19 @@ struct midr_range { u32 rv_max; }; -#define MIDR_IN_RANGE(m, v_min, r_min, v_max, r_max) \ +#define MIDR_RANGE(m, v_min, r_min, v_max, r_max) \ { \ .model = m, \ .rv_min = MIDR_CPU_VAR_REV(v_min, r_min), \ .rv_max = MIDR_CPU_VAR_REV(v_max, r_max), \ } -#define _MIDR_ALL_VERSIONS(m) MIDR_IN_RANGE(m, 0, 0, 0xf, 0xf) +#define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf) static inline bool is_midr_in_range(u32 midr, struct midr_range const *range) { return MIDR_IS_CPU_MODEL_RANGE(midr, range->model, - range->rv_min, range->rv_max); + range->rv_min, range->rv_max); } static inline bool diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 1a6d02350fc6..c59e81b65132 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -70,8 +70,6 @@ extern u32 __kvm_get_mdcr_el2(void); extern u32 __init_stage2_translation(void); -extern void __qcom_hyp_sanitize_btac_predictors(void); - /* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */ #define __hyp_this_cpu_ptr(sym) \ ({ \ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 2b55aee7c051..92f70a34c5e6 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -188,6 +188,11 @@ static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); } +static inline bool kvm_vcpu_dabt_issf(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SF); +} + static inline int kvm_vcpu_dabt_get_rd(const struct kvm_vcpu *vcpu) { return (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index 75ea42079757..0240290cf764 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -21,13 +21,11 @@ #include #include -/* - * This is annoying. The mmio code requires this, even if we don't - * need any decoding. To be fixed. - */ struct kvm_decode { unsigned long rt; bool sign_extend; + /* Witdth of the register accessed by the faulting instruction is 64-bits */ + bool sixty_four; }; void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index e42c1f0ae6cf..47ba6a57dc45 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -296,6 +296,11 @@ static inline bool __kvm_cpu_uses_extended_idmap(void) return __cpu_uses_extended_idmap(); } +/* + * Can't use pgd_populate here, because the extended idmap adds an extra level + * above CONFIG_PGTABLE_LEVELS (which is 2 or 3 if we're using the extended + * idmap), and pgd_populate is only available if CONFIG_PGTABLE_LEVELS = 4. + */ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, pgd_t *hyp_pgd, pgd_t *merged_hyp_pgd, diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 5d5903a4f108..9d230ea44b66 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -215,21 +215,31 @@ static inline unsigned long kaslr_offset(void) * up with a tagged userland pointer. Clear the tag to get a sane pointer to * pass on to access_ok(), for instance. */ -#define untagged_addr(addr) \ +#define __untagged_addr(addr) \ ((__force __typeof__(addr))sign_extend64((__force u64)(addr), 55)) +#define untagged_addr(addr) ({ \ + u64 __addr = (__force u64)(addr); \ + __addr &= __untagged_addr(__addr); \ + (__force __typeof__(addr))__addr; \ +}) + #ifdef CONFIG_KASAN_SW_TAGS #define __tag_shifted(tag) ((u64)(tag) << 56) -#define __tag_set(addr, tag) (__typeof__(addr))( \ - ((u64)(addr) & ~__tag_shifted(0xff)) | __tag_shifted(tag)) -#define __tag_reset(addr) untagged_addr(addr) +#define __tag_reset(addr) __untagged_addr(addr) #define __tag_get(addr) (__u8)((u64)(addr) >> 56) #else -#define __tag_set(addr, tag) (addr) +#define __tag_shifted(tag) 0UL #define __tag_reset(addr) (addr) #define __tag_get(addr) 0 #endif +static inline const void *__tag_set(const void *addr, u8 tag) +{ + u64 __addr = (u64)addr & ~__tag_shifted(0xff); + return (const void *)(__addr | __tag_shifted(tag)); +} + /* * Physical vs virtual RAM address space conversion. These are * private definitions which should NOT be used outside memory.h @@ -300,6 +310,22 @@ static inline void *phys_to_virt(phys_addr_t x) #define virt_to_pfn(x) __phys_to_pfn(__virt_to_phys((unsigned long)(x))) #define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x)) +/* + * With non-canonical CFI jump tables, the compiler replaces function + * address references with the address of the function's CFI jump + * table entry. This results in __pa_symbol(function) returning the + * physical address of the jump table entry, which can lead to address + * space confusion since the jump table points to the function's + * virtual address. Therefore, use inline assembly to ensure we are + * always taking the address of the actual function. + */ +#define __pa_function(x) ({ \ + unsigned long addr; \ + asm("adrp %0, " __stringify(x) "\n\t" \ + "add %0, %0, :lo12:" __stringify(x) : "=r" (addr)); \ + __pa_symbol(addr); \ +}) + /* * virt_to_page(k) convert a _valid_ virtual address to struct page * * virt_addr_valid(k) indicates whether a virtual address is valid @@ -316,8 +342,9 @@ static inline void *phys_to_virt(phys_addr_t x) #define page_to_virt(page) ({ \ unsigned long __addr = \ ((__page_to_voff(page)) | PAGE_OFFSET); \ - __addr = __tag_set(__addr, page_kasan_tag(page)); \ - ((void *)__addr); \ + const void *__addr_tag = \ + __tag_set((void *)__addr, page_kasan_tag(page)); \ + ((void *)__addr_tag); \ }) #define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index f7ff06580721..ecb5a1115455 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -140,7 +140,7 @@ static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgd) phys_addr_t pgd_phys = virt_to_phys(pgd); - replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1); + replace_phys = (void *)__pa_function(idmap_cpu_replace_ttbr1); cpu_install_idmap(); replace_phys(pgd_phys); diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 2db84df5eb42..26efe251f076 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -43,11 +43,11 @@ #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) @@ -71,17 +71,17 @@ #define PAGE_S2_DEVICE __pgprot(_PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) #define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) -#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) -#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) +/* shared+writable pages are clean by default, hence PTE_RDONLY|PTE_WRITE */ +#define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) +#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN) #define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN) -#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN) #define __P000 PAGE_NONE #define __P001 PAGE_READONLY #define __P010 PAGE_READONLY #define __P011 PAGE_READONLY -#define __P100 PAGE_EXECONLY +#define __P100 PAGE_READONLY_EXEC #define __P101 PAGE_READONLY_EXEC #define __P110 PAGE_READONLY_EXEC #define __P111 PAGE_READONLY_EXEC @@ -90,7 +90,7 @@ #define __S001 PAGE_READONLY #define __S010 PAGE_SHARED #define __S011 PAGE_SHARED -#define __S100 PAGE_EXECONLY +#define __S100 PAGE_READONLY_EXEC #define __S101 PAGE_READONLY_EXEC #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 4cf248185e6f..324db23b37de 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -90,12 +90,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -/* - * Execute-only user mappings do not have the PTE_USER bit set. All valid - * kernel mappings have the PTE_UXN bit set. - */ #define pte_valid_not_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN)) + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) #define pte_valid_young(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF)) #define pte_valid_user(pte) \ @@ -111,8 +107,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; /* * p??_access_permitted() is true for valid user mappings (subject to the - * write permission check) other than user execute-only which do not have the - * PTE_USER bit set. PROT_NONE mappings do not have the PTE_VALID bit set. + * write permission check). PROT_NONE mappings do not have the PTE_VALID bit + * set. */ #define pte_access_permitted(pte, write) \ (pte_valid_user(pte) && (!(write) || pte_write(pte))) @@ -258,23 +254,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, pte); } -#define __HAVE_ARCH_PTE_SAME -static inline int pte_same(pte_t pte_a, pte_t pte_b) -{ - pteval_t lhs, rhs; - - lhs = pte_val(pte_a); - rhs = pte_val(pte_b); - - if (pte_present(pte_a)) - lhs &= ~PTE_RDONLY; - - if (pte_present(pte_b)) - rhs &= ~PTE_RDONLY; - - return (lhs == rhs); -} - /* * Huge pte definitions. */ @@ -364,6 +343,7 @@ static inline int pmd_protnone(pmd_t pmd) #define pud_write(pud) pte_write(pud_pte(pud)) #define pud_pfn(pud) (((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT) +#define pfn_pud(pfn,prot) (__pud(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) #define set_pmd_at(mm, addr, pmdp, pmd) set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 4945363fad25..245cb12de526 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -239,8 +240,8 @@ static inline void spin_lock_prefetch(const void *ptr) #endif -int cpu_enable_pan(void *__unused); -int cpu_enable_cache_maint_trap(void *__unused); +void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused); +void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused); #ifdef CONFIG_ARM64_TAGGED_ADDR_ABI /* PR_{SET,GET}_TAGGED_ADDR_CTRL prctl */ diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 8c2816b5daaa..76162e4fafdc 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -35,7 +35,38 @@ #define COMPAT_PTRACE_GETHBPREGS 29 #define COMPAT_PTRACE_SETHBPREGS 30 -/* AArch32 CPSR bits */ +/* SPSR_ELx bits for exceptions taken from AArch32 */ +#define PSR_AA32_MODE_MASK 0x0000001f +#define PSR_AA32_MODE_USR 0x00000010 +#define PSR_AA32_MODE_FIQ 0x00000011 +#define PSR_AA32_MODE_IRQ 0x00000012 +#define PSR_AA32_MODE_SVC 0x00000013 +#define PSR_AA32_MODE_ABT 0x00000017 +#define PSR_AA32_MODE_HYP 0x0000001a +#define PSR_AA32_MODE_UND 0x0000001b +#define PSR_AA32_MODE_SYS 0x0000001f +#define PSR_AA32_T_BIT 0x00000020 +#define PSR_AA32_F_BIT 0x00000040 +#define PSR_AA32_I_BIT 0x00000080 +#define PSR_AA32_A_BIT 0x00000100 +#define PSR_AA32_E_BIT 0x00000200 +#define PSR_AA32_SSBS_BIT 0x00800000 +#define PSR_AA32_DIT_BIT 0x01000000 +#define PSR_AA32_Q_BIT 0x08000000 +#define PSR_AA32_V_BIT 0x10000000 +#define PSR_AA32_C_BIT 0x20000000 +#define PSR_AA32_Z_BIT 0x40000000 +#define PSR_AA32_N_BIT 0x80000000 +#define PSR_AA32_IT_MASK 0x0600fc00 /* If-Then execution state mask */ +#define PSR_AA32_GE_MASK 0x000f0000 + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define PSR_AA32_ENDSTATE PSR_AA32_E_BIT +#else +#define PSR_AA32_ENDSTATE 0 +#endif + +/* AArch32 CPSR bits, as seen in AArch32 */ #define COMPAT_PSR_MODE_MASK 0x0000001f #define COMPAT_PSR_MODE_USR 0x00000010 #define COMPAT_PSR_MODE_FIQ 0x00000011 @@ -51,6 +82,7 @@ #define COMPAT_PSR_A_BIT 0x00000100 #define COMPAT_PSR_E_BIT 0x00000200 #define PSR_AA32_SSBS_BIT 0x00800000 +#define COMPAT_PSR_DIT_BIT 0x00200000 #define COMPAT_PSR_J_BIT 0x01000000 #define COMPAT_PSR_Q_BIT 0x08000000 #define COMPAT_PSR_V_BIT 0x10000000 @@ -112,6 +144,30 @@ #define compat_sp_fiq regs[29] #define compat_lr_fiq regs[30] +static inline unsigned long compat_psr_to_pstate(const unsigned long psr) +{ + unsigned long pstate; + + pstate = psr & ~COMPAT_PSR_DIT_BIT; + + if (psr & COMPAT_PSR_DIT_BIT) + pstate |= PSR_AA32_DIT_BIT; + + return pstate; +} + +static inline unsigned long pstate_to_compat_psr(const unsigned long pstate) +{ + unsigned long psr; + + psr = pstate & ~PSR_AA32_DIT_BIT; + + if (pstate & PSR_AA32_DIT_BIT) + psr |= COMPAT_PSR_DIT_BIT; + + return psr; +} + /* * This struct defines the way the registers are stored on the stack during an * exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h new file mode 100644 index 000000000000..c50d2b0c6c5f --- /dev/null +++ b/arch/arm64/include/asm/scs.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SCS_H +#define _ASM_SCS_H + +#ifndef __ASSEMBLY__ + +#include + +#ifdef CONFIG_SHADOW_CALL_STACK + +extern void scs_init_irq(void); + +static __always_inline void scs_save(struct task_struct *tsk) +{ + void *s; + + asm volatile("mov %0, x18" : "=r" (s)); + task_set_scs(tsk, s); +} + +static inline void scs_overflow_check(struct task_struct *tsk) +{ + if (unlikely(scs_corrupted(tsk))) + panic("corrupted shadow stack detected inside scheduler\n"); +} + +#else /* CONFIG_SHADOW_CALL_STACK */ + +static inline void scs_init_irq(void) {} +static inline void scs_save(struct task_struct *tsk) {} +static inline void scs_overflow_check(struct task_struct *tsk) {} + +#endif /* CONFIG_SHADOW_CALL_STACK */ + +#endif /* __ASSEMBLY __ */ + +#endif /* _ASM_SCS_H */ diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 99390755c0c4..b9da0ad58d33 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -38,6 +38,10 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk); DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); +#ifdef CONFIG_SHADOW_CALL_STACK +DECLARE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr); +#endif + static inline bool on_irq_stack(unsigned long sp) { unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 8939c87c4dce..0cde2f473971 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -2,7 +2,7 @@ #ifndef __ASM_SUSPEND_H #define __ASM_SUSPEND_H -#define NR_CTX_REGS 12 +#define NR_CTX_REGS 13 #define NR_CALLEE_SAVED_REGS 12 /* diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 806225017037..748b6a701730 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,6 +20,7 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H +#include #include /* @@ -301,27 +302,88 @@ /* Common SCTLR_ELx flags. */ #define SCTLR_ELx_DSSBS (1UL << 44) #define SCTLR_ELx_EE (1 << 25) +#define SCTLR_ELx_WXN (1 << 19) #define SCTLR_ELx_I (1 << 12) #define SCTLR_ELx_SA (1 << 3) #define SCTLR_ELx_C (1 << 2) #define SCTLR_ELx_A (1 << 1) #define SCTLR_ELx_M 1 -#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \ - (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \ - (1 << 29)) - #define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ SCTLR_ELx_SA | SCTLR_ELx_I) +/* SCTLR_EL2 specific flags. */ +#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \ + (1 << 18) | (1 << 22) | (1 << 23) | (1 << 28) | \ + (1 << 29)) +#define SCTLR_EL2_RES0 ((1 << 6) | (1 << 7) | (1 << 8) | (1 << 9) | \ + (1 << 10) | (1 << 13) | (1 << 14) | (1 << 15) | \ + (1 << 17) | (1 << 20) | (1 << 21) | (1 << 24) | \ + (1 << 26) | (1 << 27) | (1 << 30) | (1 << 31) | \ + (0xffffefffUL << 32)) + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ENDIAN_SET_EL2 SCTLR_ELx_EE +#define ENDIAN_CLEAR_EL2 0 +#else +#define ENDIAN_SET_EL2 0 +#define ENDIAN_CLEAR_EL2 SCTLR_ELx_EE +#endif + +/* SCTLR_EL2 value used for the hyp-stub */ +#define SCTLR_EL2_SET (ENDIAN_SET_EL2 | SCTLR_EL2_RES1) +#define SCTLR_EL2_CLEAR (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \ + SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ + SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) + +#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff +#error "Inconsistent SCTLR_EL2 set/clear bits" +#endif + /* SCTLR_EL1 specific flags. */ #define SCTLR_EL1_UCI (1 << 26) +#define SCTLR_EL1_E0E (1 << 24) #define SCTLR_EL1_SPAN (1 << 23) +#define SCTLR_EL1_NTWE (1 << 18) +#define SCTLR_EL1_NTWI (1 << 16) #define SCTLR_EL1_UCT (1 << 15) +#define SCTLR_EL1_DZE (1 << 14) +#define SCTLR_EL1_UMA (1 << 9) #define SCTLR_EL1_SED (1 << 8) +#define SCTLR_EL1_ITD (1 << 7) #define SCTLR_EL1_CP15BEN (1 << 5) +#define SCTLR_EL1_SA0 (1 << 4) + +#define SCTLR_EL1_RES1 ((1 << 11) | (1 << 20) | (1 << 22) | (1 << 28) | \ + (1 << 29)) +#define SCTLR_EL1_RES0 ((1 << 6) | (1 << 10) | (1 << 13) | (1 << 17) | \ + (1 << 21) | (1 << 27) | (1 << 30) | (1 << 31) | \ + (0xffffefffUL << 32)) + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define ENDIAN_SET_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) +#define ENDIAN_CLEAR_EL1 0 +#else +#define ENDIAN_SET_EL1 0 +#define ENDIAN_CLEAR_EL1 (SCTLR_EL1_E0E | SCTLR_ELx_EE) +#endif + +#define SCTLR_EL1_SET (SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA |\ + SCTLR_EL1_SA0 | SCTLR_EL1_SED | SCTLR_ELx_I |\ + SCTLR_EL1_DZE | SCTLR_EL1_UCT | SCTLR_EL1_NTWI |\ + SCTLR_EL1_NTWE | SCTLR_EL1_SPAN | ENDIAN_SET_EL1 |\ + SCTLR_EL1_UCI | SCTLR_EL1_RES1) +#define SCTLR_EL1_CLEAR (SCTLR_ELx_A | SCTLR_EL1_CP15BEN | SCTLR_EL1_ITD |\ + SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ + SCTLR_ELx_DSSBS | SCTLR_EL1_RES0) + +#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff +#error "Inconsistent SCTLR_EL1 set/clear bits" +#endif /* id_aa64isar0 */ +#define ID_AA64ISAR0_TS_SHIFT 52 +#define ID_AA64ISAR0_FHM_SHIFT 48 #define ID_AA64ISAR0_DP_SHIFT 44 #define ID_AA64ISAR0_SM4_SHIFT 40 #define ID_AA64ISAR0_SM3_SHIFT 36 @@ -342,6 +404,7 @@ /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 +#define ID_AA64PFR0_DIT_SHIFT 48 #define ID_AA64PFR0_GIC_SHIFT 24 #define ID_AA64PFR0_ASIMD_SHIFT 20 #define ID_AA64PFR0_FP_SHIFT 16 @@ -394,6 +457,7 @@ #define ID_AA64MMFR1_VMIDBITS_16 2 /* id_aa64mmfr2 */ +#define ID_AA64MMFR2_AT_SHIFT 32 #define ID_AA64MMFR2_LVA_SHIFT 16 #define ID_AA64MMFR2_IESB_SHIFT 12 #define ID_AA64MMFR2_LSM_SHIFT 8 @@ -478,6 +542,7 @@ #else +#include #include #define __DEFINE_MRS_MSR_S_REGNUM \ diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index c5b459ba2b6d..bce4ad822c0d 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -43,6 +43,9 @@ struct thread_info { u64 ttbr0; /* saved TTBR0_EL1 */ #endif int preempt_count; /* 0 => preemptable, <0 => bug */ +#ifdef CONFIG_SHADOW_CALL_STACK + void *shadow_call_stack; +#endif void *regs_on_excp; /* aee */ int cpu_excp; /* aee */ }; diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index c5f89442785c..9d1e24e030b3 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -102,12 +102,6 @@ static inline bool has_vhe(void) return false; } -#ifdef CONFIG_ARM64_VHE -extern void verify_cpu_run_el(void); -#else -static inline void verify_cpu_run_el(void) {} -#endif - #endif /* __ASSEMBLY__ */ #endif /* ! __ASM__VIRT_H */ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index eed4600efdad..2bcd6e4f3474 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -42,6 +42,12 @@ #define HWCAP_SM4 (1 << 19) #define HWCAP_ASIMDDP (1 << 20) #define HWCAP_SHA512 (1 << 21) -#define HWCAP_SSBS (1 << 22) +#define HWCAP_SVE (1 << 22) +#define HWCAP_ASIMDFHM (1 << 23) +#define HWCAP_DIT (1 << 24) +#define HWCAP_USCAT (1 << 25) +#define HWCAP_ILRCPC (1 << 26) +#define HWCAP_FLAGM (1 << 27) +#define HWCAP_SSBS (1 << 28) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index f19e95917234..e32e134b3346 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -57,6 +57,7 @@ arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o arm64-obj-$(CONFIG_ARM64_SSBD) += ssbd.o +arm64-obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o ifeq ($(CONFIG_KVM),y) arm64-obj-$(CONFIG_HARDEN_BRANCH_PREDICTOR) += bpi.o diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 5c4bce4ac381..77b36c88eee5 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -122,7 +122,7 @@ static void patch_alternative(struct alt_instr *alt, } } -static void __apply_alternatives(void *alt_region, bool use_linear_alias) +static void __nocfi __apply_alternatives(void *alt_region, bool use_linear_alias) { struct alt_instr *alt; struct alt_region *region = alt_region; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 5a73ef423139..0ec3aef7ea2a 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -42,6 +42,9 @@ int main(void) DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit)); #ifdef CONFIG_ARM64_SW_TTBR0_PAN DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); +#endif +#ifdef CONFIG_SHADOW_CALL_STACK + DEFINE(TSK_TI_SCS, offsetof(struct task_struct, thread_info.shadow_call_stack)); #endif DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); DEFINE(TI_CPU_EXCP, offsetof(struct thread_info, cpu_excp)); diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S index e5de33513b5d..4cae34e5a24e 100644 --- a/arch/arm64/kernel/bpi.S +++ b/arch/arm64/kernel/bpi.S @@ -55,29 +55,14 @@ ENTRY(__bp_harden_hyp_vecs_start) .endr ENTRY(__bp_harden_hyp_vecs_end) -ENTRY(__qcom_hyp_sanitize_link_stack_start) - stp x29, x30, [sp, #-16]! - .rept 16 - bl . + 4 - .endr - ldp x29, x30, [sp], #16 -ENTRY(__qcom_hyp_sanitize_link_stack_end) -.macro smccc_workaround_1 inst +ENTRY(__smccc_workaround_1_smc_start) sub sp, sp, #(8 * 4) stp x2, x3, [sp, #(8 * 0)] stp x0, x1, [sp, #(8 * 2)] mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - \inst #0 + smc #0 ldp x2, x3, [sp, #(8 * 0)] ldp x0, x1, [sp, #(8 * 2)] add sp, sp, #(8 * 4) -.endm - -ENTRY(__smccc_workaround_1_smc_start) - smccc_workaround_1 smc ENTRY(__smccc_workaround_1_smc_end) - -ENTRY(__smccc_workaround_1_hvc_start) - smccc_workaround_1 hvc -ENTRY(__smccc_workaround_1_hvc_end) diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 8021b46c9743..63ccd07d994b 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -45,11 +45,11 @@ ENTRY(__cpu_soft_restart) mov x0, #HVC_SOFT_RESTART hvc #0 // no return -1: mov x18, x1 // entry +1: mov x8, x1 // entry mov x0, x2 // arg0 mov x1, x3 // arg1 mov x2, x4 // arg2 - br x18 + br x8 ENDPROC(__cpu_soft_restart) .popsection diff --git a/arch/arm64/kernel/cpu-reset.h b/arch/arm64/kernel/cpu-reset.h index 6c2b1b4f57c9..d3c60c0c121b 100644 --- a/arch/arm64/kernel/cpu-reset.h +++ b/arch/arm64/kernel/cpu-reset.h @@ -24,7 +24,7 @@ static inline void __noreturn cpu_soft_restart(unsigned long el2_switch, el2_switch = el2_switch && !is_kernel_in_hyp_mode() && is_hyp_mode_available(); - restart = (void *)__pa_symbol(__cpu_soft_restart); + restart = (void *)__pa_function(__cpu_soft_restart); cpu_install_idmap(); restart(el2_switch, entry, arg0, arg1, arg2); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9e8bdaa9a6ad..30c6ff610f14 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -26,10 +27,18 @@ static bool __maybe_unused is_affected_midr_range(const struct arm64_cpu_capabilities *entry, int scope) { + u32 midr = read_cpuid_id(); + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - return MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(), entry->midr_model, - entry->midr_range_min, - entry->midr_range_max); + return is_midr_in_range(midr, &entry->midr_range); +} + +static bool __maybe_unused +is_affected_midr_range_list(const struct arm64_cpu_capabilities *entry, + int scope) +{ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list); } static bool __maybe_unused @@ -43,7 +52,7 @@ is_kryo_midr(const struct arm64_cpu_capabilities *entry, int scope) model &= MIDR_IMPLEMENTOR_MASK | (0xf00 << MIDR_PARTNUM_SHIFT) | MIDR_ARCHITECTURE_MASK; - return model == entry->midr_model; + return model == entry->midr_range.model; } static bool @@ -61,26 +70,21 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, (arm64_ftr_reg_ctrel0.sys_val & mask); } -static int cpu_enable_trap_ctr_access(void *__unused) +static void +cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) { /* Clear SCTLR_EL1.UCT */ config_sctlr_el1(SCTLR_EL1_UCT, 0); - return 0; } -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR #include #include DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); #ifdef CONFIG_KVM -extern char __qcom_hyp_sanitize_link_stack_start[]; -extern char __qcom_hyp_sanitize_link_stack_end[]; extern char __smccc_workaround_1_smc_start[]; extern char __smccc_workaround_1_smc_end[]; -extern char __smccc_workaround_1_hvc_start[]; -extern char __smccc_workaround_1_hvc_end[]; static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, const char *hyp_vecs_end) @@ -94,9 +98,9 @@ static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K); } -static void __install_bp_hardening_cb(bp_hardening_cb_t fn, - const char *hyp_vecs_start, - const char *hyp_vecs_end) +static void install_bp_hardening_cb(bp_hardening_cb_t fn, + const char *hyp_vecs_start, + const char *hyp_vecs_end) { static int last_slot = -1; static DEFINE_SPINLOCK(bp_lock); @@ -123,14 +127,10 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, spin_unlock(&bp_lock); } #else -#define __qcom_hyp_sanitize_link_stack_start NULL -#define __qcom_hyp_sanitize_link_stack_end NULL #define __smccc_workaround_1_smc_start NULL #define __smccc_workaround_1_smc_end NULL -#define __smccc_workaround_1_hvc_start NULL -#define __smccc_workaround_1_hvc_end NULL -static void __install_bp_hardening_cb(bp_hardening_cb_t fn, +static void install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, const char *hyp_vecs_end) { @@ -138,23 +138,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, } #endif /* CONFIG_KVM */ -static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, - bp_hardening_cb_t fn, - const char *hyp_vecs_start, - const char *hyp_vecs_end) -{ - u64 pfr0; - - if (!entry->matches(entry, SCOPE_LOCAL_CPU)) - return; - - pfr0 = read_cpuid(ID_AA64PFR0_EL1); - if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT)) - return; - - __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end); -} - #include #include #include @@ -169,49 +152,6 @@ static void call_hvc_arch_workaround_1(void) arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static int enable_smccc_arch_workaround_1(void *data) -{ - const struct arm64_cpu_capabilities *entry = data; - bp_hardening_cb_t cb; - void *smccc_start, *smccc_end; - struct arm_smccc_res res; - - if (!entry->matches(entry, SCOPE_LOCAL_CPU)) - return 0; - - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) - return 0; - - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: - arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 < 0) - return 0; - cb = call_hvc_arch_workaround_1; - smccc_start = __smccc_workaround_1_hvc_start; - smccc_end = __smccc_workaround_1_hvc_end; - break; - - case PSCI_CONDUIT_SMC: - arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 < 0) - return 0; - cb = call_smc_arch_workaround_1; - smccc_start = __smccc_workaround_1_smc_start; - smccc_end = __smccc_workaround_1_smc_end; - break; - - default: - return 0; - } - - install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); - - return 0; -} - static void qcom_link_stack_sanitization(void) { u64 tmp; @@ -224,22 +164,83 @@ static void qcom_link_stack_sanitization(void) : "=&r" (tmp)); } -static int qcom_enable_link_stack_sanitization(void *data) +static bool __nospectre_v2; +static int __init parse_nospectre_v2(char *str) { - const struct arm64_cpu_capabilities *entry = data; - - install_bp_hardening_cb(entry, qcom_link_stack_sanitization, - __qcom_hyp_sanitize_link_stack_start, - __qcom_hyp_sanitize_link_stack_end); - + __nospectre_v2 = true; return 0; } -#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ +early_param("nospectre_v2", parse_nospectre_v2); + +/* + * -1: No workaround + * 0: No workaround required + * 1: Workaround installed + */ +static int detect_harden_bp_fw(void) +{ + bp_hardening_cb_t cb; + void *smccc_start, *smccc_end; + struct arm_smccc_res res; + u32 midr = read_cpuid_id(); + + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) + return -1; + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + switch ((int)res.a0) { + case 1: + /* Firmware says we're just fine */ + return 0; + case 0: + cb = call_hvc_arch_workaround_1; + /* This is a guest, no need to patch KVM vectors */ + smccc_start = NULL; + smccc_end = NULL; + break; + default: + return -1; + } + break; + + case PSCI_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + switch ((int)res.a0) { + case 1: + /* Firmware says we're just fine */ + return 0; + case 0: + cb = call_smc_arch_workaround_1; + smccc_start = __smccc_workaround_1_smc_start; + smccc_end = __smccc_workaround_1_smc_end; + break; + default: + return -1; + } + break; + + default: + return -1; + } + + if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || + ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) + cb = qcom_link_stack_sanitization; + + if (IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) + install_bp_hardening_cb(cb, smccc_start, smccc_end); + + return 1; +} -#ifdef CONFIG_ARM64_SSBD DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); int ssbd_state __read_mostly = ARM64_SSBD_KERNEL; +static bool __ssb_safe = true; static const struct ssbd_options { const char *str; @@ -309,6 +310,11 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, void arm64_set_ssbd_mitigation(bool state) { + if (!IS_ENABLED(CONFIG_ARM64_SSBD)) { + pr_info_once("SSBD disabled by kernel configuration\n"); + return; + } + if (this_cpu_has_cap(ARM64_SSBS)) { if (state) asm volatile(SET_PSTATE_SSBS(0)); @@ -338,16 +344,28 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, struct arm_smccc_res res; bool required = true; s32 val; + bool this_cpu_safe = false; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + if (cpu_mitigations_off()) + ssbd_state = ARM64_SSBD_FORCE_DISABLE; + + /* delay setting __ssb_safe until we get a firmware response */ + if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list)) + this_cpu_safe = true; + if (this_cpu_has_cap(ARM64_SSBS)) { + if (!this_cpu_safe) + __ssb_safe = false; required = false; goto out_printmsg; } if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { ssbd_state = ARM64_SSBD_UNKNOWN; + if (!this_cpu_safe) + __ssb_safe = false; return false; } @@ -364,6 +382,8 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, default: ssbd_state = ARM64_SSBD_UNKNOWN; + if (!this_cpu_safe) + __ssb_safe = false; return false; } @@ -372,14 +392,18 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, switch (val) { case SMCCC_RET_NOT_SUPPORTED: ssbd_state = ARM64_SSBD_UNKNOWN; + if (!this_cpu_safe) + __ssb_safe = false; return false; + /* machines with mixed mitigation requirements must not return this */ case SMCCC_RET_NOT_REQUIRED: pr_info_once("%s mitigation not required\n", entry->desc); ssbd_state = ARM64_SSBD_MITIGATED; return false; case SMCCC_RET_SUCCESS: + __ssb_safe = false; required = true; break; @@ -389,6 +413,8 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, default: WARN_ON(1); + if (!this_cpu_safe) + __ssb_safe = false; return false; } @@ -427,21 +453,113 @@ out_printmsg: return required; } -#endif /* CONFIG_ARM64_SSBD */ -#define MIDR_RANGE(model, min, max) \ - .def_scope = SCOPE_LOCAL_CPU, \ - .matches = is_affected_midr_range, \ - .midr_model = model, \ - .midr_range_min = min, \ - .midr_range_max = max +/* known invulnerable cores */ +static const struct midr_range arm64_ssb_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + {}, +}; -#define MIDR_ALL_VERSIONS(model) \ - .def_scope = SCOPE_LOCAL_CPU, \ - .matches = is_affected_midr_range, \ - .midr_model = model, \ - .midr_range_min = 0, \ - .midr_range_max = (MIDR_VARIANT_MASK | MIDR_REVISION_MASK) +#define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ + .matches = is_affected_midr_range, \ + .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) + +#define CAP_MIDR_ALL_VERSIONS(model) \ + .matches = is_affected_midr_range, \ + .midr_range = MIDR_ALL_VERSIONS(model) + +#define MIDR_FIXED(rev, revidr_mask) \ + .fixed_revs = (struct arm64_midr_revidr[]){{ (rev), (revidr_mask) }, {}} + +#define ERRATA_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ + CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) + +#define CAP_MIDR_RANGE_LIST(list) \ + .matches = is_affected_midr_range_list, \ + .midr_range_list = list + +/* Errata affecting a range of revisions of given model variant */ +#define ERRATA_MIDR_REV_RANGE(m, var, r_min, r_max) \ + ERRATA_MIDR_RANGE(m, var, r_min, var, r_max) + +/* Errata affecting a single variant/revision of a model */ +#define ERRATA_MIDR_REV(model, var, rev) \ + ERRATA_MIDR_RANGE(model, var, rev, var, rev) + +/* Errata affecting all variants/revisions of a given a model */ +#define ERRATA_MIDR_ALL_VERSIONS(model) \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ + CAP_MIDR_ALL_VERSIONS(model) + +/* Errata affecting a list of midr ranges, with same work around */ +#define ERRATA_MIDR_RANGE_LIST(midr_list) \ + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \ + CAP_MIDR_RANGE_LIST(midr_list) + +/* Track overall mitigation state. We are only mitigated if all cores are ok */ +static bool __hardenbp_enab = true; +static bool __spectrev2_safe = true; + +/* + * List of CPUs that do not need any Spectre-v2 mitigation at all. + */ +static const struct midr_range spectre_v2_safe_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + { /* sentinel */ } +}; + +/* + * Track overall bp hardening for all heterogeneous cores in the machine. + * We are only considered "safe" if all booted cores are known safe. + */ +static bool __maybe_unused +check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) +{ + int need_wa; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + /* If the CPU has CSV2 set, we're safe */ + if (cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64PFR0_EL1), + ID_AA64PFR0_CSV2_SHIFT)) + return false; + + /* Alternatively, we have a list of unaffected CPUs */ + if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list)) + return false; + + /* Fallback to firmware detection */ + need_wa = detect_harden_bp_fw(); + if (!need_wa) + return false; + + __spectrev2_safe = false; + + if (!IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) { + pr_warn_once("spectrev2 mitigation disabled by kernel configuration\n"); + __hardenbp_enab = false; + return false; + } + + /* forced off */ + if (__nospectre_v2 || cpu_mitigations_off()) { + pr_info_once("spectrev2 mitigation disabled by command line option\n"); + __hardenbp_enab = false; + return false; + } + + if (need_wa < 0) { + pr_warn_once("ARM_SMCCC_ARCH_WORKAROUND_1 missing from firmware\n"); + __hardenbp_enab = false; + } + + return (need_wa > 0); +} const struct arm64_cpu_capabilities arm64_errata[] = { #if defined(CONFIG_ARM64_ERRATUM_826319) || \ @@ -451,8 +569,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A53 r0p[012] */ .desc = "ARM errata 826319, 827319, 824069", .capability = ARM64_WORKAROUND_CLEAN_CACHE, - MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x02), - .enable = cpu_enable_cache_maint_trap, + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 2), + .cpu_enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_819472 @@ -460,8 +578,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A53 r0p[01] */ .desc = "ARM errata 819472", .capability = ARM64_WORKAROUND_CLEAN_CACHE, - MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x01), - .enable = cpu_enable_cache_maint_trap, + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 1), + .cpu_enable = cpu_enable_cache_maint_trap, }, #endif #ifdef CONFIG_ARM64_ERRATUM_832075 @@ -469,9 +587,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A57 r0p0 - r1p2 */ .desc = "ARM erratum 832075", .capability = ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE, - MIDR_RANGE(MIDR_CORTEX_A57, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(1, 2)), + ERRATA_MIDR_RANGE(MIDR_CORTEX_A57, + 0, 0, + 1, 2), }, #endif #ifdef CONFIG_ARM64_ERRATUM_834220 @@ -479,9 +597,9 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A57 r0p0 - r1p2 */ .desc = "ARM erratum 834220", .capability = ARM64_WORKAROUND_834220, - MIDR_RANGE(MIDR_CORTEX_A57, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(1, 2)), + ERRATA_MIDR_RANGE(MIDR_CORTEX_A57, + 0, 0, + 1, 2), }, #endif #ifdef CONFIG_ARM64_ERRATUM_845719 @@ -489,7 +607,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A53 r0p[01234] */ .desc = "ARM erratum 845719", .capability = ARM64_WORKAROUND_845719, - MIDR_RANGE(MIDR_CORTEX_A53, 0x00, 0x04), + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 4), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_23154 @@ -497,7 +615,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cavium ThunderX, pass 1.x */ .desc = "Cavium erratum 23154", .capability = ARM64_WORKAROUND_CAVIUM_23154, - MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01), + ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 @@ -505,15 +623,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ .desc = "Cavium erratum 27456", .capability = ARM64_WORKAROUND_CAVIUM_27456, - MIDR_RANGE(MIDR_THUNDERX, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(1, 1)), + ERRATA_MIDR_RANGE(MIDR_THUNDERX, + 0, 0, + 1, 1), }, { /* Cavium ThunderX, T81 pass 1.0 */ .desc = "Cavium erratum 27456", .capability = ARM64_WORKAROUND_CAVIUM_27456, - MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), + ERRATA_MIDR_REV(MIDR_THUNDERX_81XX, 0, 0), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_30115 @@ -521,49 +639,48 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cavium ThunderX, T88 pass 1.x - 2.2 */ .desc = "Cavium erratum 30115", .capability = ARM64_WORKAROUND_CAVIUM_30115, - MIDR_RANGE(MIDR_THUNDERX, 0x00, - (1 << MIDR_VARIANT_SHIFT) | 2), + ERRATA_MIDR_RANGE(MIDR_THUNDERX, + 0, 0, + 1, 2), }, { /* Cavium ThunderX, T81 pass 1.0 - 1.2 */ .desc = "Cavium erratum 30115", .capability = ARM64_WORKAROUND_CAVIUM_30115, - MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x02), + ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX_81XX, 0, 0, 2), }, { /* Cavium ThunderX, T83 pass 1.0 */ .desc = "Cavium erratum 30115", .capability = ARM64_WORKAROUND_CAVIUM_30115, - MIDR_RANGE(MIDR_THUNDERX_83XX, 0x00, 0x00), + ERRATA_MIDR_REV(MIDR_THUNDERX_83XX, 0, 0), }, #endif { .desc = "Mismatched cache line size", .capability = ARM64_MISMATCHED_CACHE_LINE_SIZE, .matches = has_mismatched_cache_type, - .def_scope = SCOPE_LOCAL_CPU, - .enable = cpu_enable_trap_ctr_access, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .cpu_enable = cpu_enable_trap_ctr_access, }, { .desc = "Mismatched cache type", .capability = ARM64_MISMATCHED_CACHE_TYPE, .matches = has_mismatched_cache_type, - .def_scope = SCOPE_LOCAL_CPU, - .enable = cpu_enable_trap_ctr_access, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .cpu_enable = cpu_enable_trap_ctr_access, }, #ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003 { .desc = "Qualcomm Technologies Falkor erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, - MIDR_RANGE(MIDR_QCOM_FALKOR_V1, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(0, 0)), + ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0), }, { .desc = "Qualcomm Technologies Kryo erratum 1003", .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003, - .def_scope = SCOPE_LOCAL_CPU, - .midr_model = MIDR_QCOM_KRYO, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .midr_range.model = MIDR_QCOM_KRYO, .matches = is_kryo_midr, }, #endif @@ -571,9 +688,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "Qualcomm Technologies Falkor erratum 1009", .capability = ARM64_WORKAROUND_REPEAT_TLBI, - MIDR_RANGE(MIDR_QCOM_FALKOR_V1, - MIDR_CPU_VAR_REV(0, 0), - MIDR_CPU_VAR_REV(0, 0)), + ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0), }, #endif #ifdef CONFIG_ARM64_ERRATUM_858921 @@ -581,100 +696,56 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A73 all versions */ .desc = "ARM erratum 858921", .capability = ARM64_WORKAROUND_858921, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), }, #endif -#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - .enable = enable_smccc_arch_workaround_1, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = check_branch_predictor, }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - .enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), - .enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), - .enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), - .enable = qcom_enable_link_stack_sanitization, - }, - { - .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), - .enable = qcom_enable_link_stack_sanitization, - }, - { - .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), - .enable = enable_smccc_arch_workaround_1, - }, - { - .capability = ARM64_HARDEN_BRANCH_PREDICTOR, - MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - .enable = enable_smccc_arch_workaround_1, - }, -#endif -#ifdef CONFIG_ARM64_SSBD { .desc = "Speculative Store Bypass Disable", - .def_scope = SCOPE_LOCAL_CPU, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .capability = ARM64_SSBD, .matches = has_ssbd_mitigation, + .midr_range_list = arm64_ssb_cpus, }, -#endif { } }; -/* - * The CPU Errata work arounds are detected and applied at boot time - * and the related information is freed soon after. If the new CPU requires - * an errata not detected at boot, fail this CPU. - */ -void verify_local_cpu_errata_workarounds(void) +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, + char *buf) { - const struct arm64_cpu_capabilities *caps = arm64_errata; + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} - for (; caps->matches; caps++) { - if (cpus_have_cap(caps->capability)) { - if (caps->enable) - caps->enable((void *)caps); - } else if (caps->matches(caps, SCOPE_LOCAL_CPU)) { - pr_crit("CPU%d: Requires work around for %s, not detected" - " at boot time\n", - smp_processor_id(), - caps->desc ? : "an erratum"); - cpu_die_early(); - } +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, + char *buf) +{ + if (__spectrev2_safe) + return sprintf(buf, "Not affected\n"); + + if (__hardenbp_enab) + return sprintf(buf, "Mitigation: Branch predictor hardening\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (__ssb_safe) + return sprintf(buf, "Not affected\n"); + + switch (ssbd_state) { + case ARM64_SSBD_KERNEL: + case ARM64_SSBD_FORCE_ENABLE: + if (IS_ENABLED(CONFIG_ARM64_SSBD)) + return sprintf(buf, + "Mitigation: Speculative Store Bypass disabled via prctl\n"); } -} -void update_cpu_errata_workarounds(void) -{ - update_cpu_capabilities(arm64_errata, "enabling workaround for"); -} - -void __init enable_errata_workarounds(void) -{ - enable_cpu_capabilities(arm64_errata); + return sprintf(buf, "Vulnerable\n"); } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index c871a69461ad..af2b280e13b6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -40,9 +41,7 @@ EXPORT_SYMBOL_GPL(elf_hwcap); #define COMPAT_ELF_HWCAP_DEFAULT \ (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ - COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ - COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ - COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_IDIV|\ COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; @@ -112,6 +111,13 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SM3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_SHA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR0_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM4_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SM3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_ATOMICS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_CRC32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_SHA2_SHIFT, 4, 0), @@ -121,24 +127,25 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_DPB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_DPB_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_GIC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), /* Linux doesn't care about the EL3 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64PFR0_EL3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), ARM64_FTR_END, }; @@ -148,14 +155,14 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN16_SHIFT, 4, ID_AA64MMFR0_TGRAN16_NI), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL0_SHIFT, 4, 0), /* Linux shouldn't care about secure memory */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR0_ASID_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_SNSMEM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_BIGENDEL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ASID_SHIFT, 4, 0), /* * Differing PARange is fine as long as all peripherals and memory are mapped * within the minimum PARange of all CPUs @@ -166,20 +173,21 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_PAN_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_LOR_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HPD_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VHE_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR1_HADBS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_LOR_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HPD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VHE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_VMIDBITS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_HADBS_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LVA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_IESB_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_LSM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_UAO_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64MMFR2_CNP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_UAO_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CNP_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -206,14 +214,14 @@ struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { }; static const struct arm64_ftr_bits ftr_id_mmfr0[] = { - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 28, 4, 0xf), /* InnerShr */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 24, 4, 0), /* FCSE */ + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0xf), /* InnerShr */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), /* FCSE */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, 20, 4, 0), /* AuxReg */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 16, 4, 0), /* TCM */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0), /* ShareLvl */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0xf), /* OuterShr */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* PMSA */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* VMSA */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), /* TCM */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* ShareLvl */ + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0xf), /* OuterShr */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* PMSA */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* VMSA */ ARM64_FTR_END, }; @@ -234,8 +242,8 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { }; static const struct arm64_ftr_bits ftr_mvfr2[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* FPMisc */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* SIMDMisc */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* FPMisc */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* SIMDMisc */ ARM64_FTR_END, }; @@ -247,25 +255,25 @@ static const struct arm64_ftr_bits ftr_dczid[] = { static const struct arm64_ftr_bits ftr_id_isar5[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_RDM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_CRC32_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SHA1_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_AES_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_ISAR5_SEVL_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_RDM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_CRC32_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SHA1_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_AES_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_SEVL_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_mmfr4[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* ac2 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* ac2 */ ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_pfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 12, 4, 0), /* State3 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 8, 4, 0), /* State2 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 4, 4, 0), /* State1 */ - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 4, 0), /* State0 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* State3 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), /* State2 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* State1 */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), /* State0 */ ARM64_FTR_END, }; @@ -485,6 +493,9 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) reg->user_mask = user_mask; } +extern const struct arm64_cpu_capabilities arm64_errata[]; +static void __init setup_boot_cpu_capabilities(void); + void __init init_cpu_features(struct cpuinfo_arm64 *info) { /* Before we start using the tables, make sure it is sorted */ @@ -522,6 +533,11 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); } + /* + * Detect and enable early CPU capabilities based on the boot CPU, + * after we have initialised the CPU feature infrastructure. + */ + setup_boot_cpu_capabilities(); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) @@ -786,11 +802,6 @@ static bool has_no_hw_prefetch(const struct arm64_cpu_capabilities *entry, int _ MIDR_CPU_VAR_REV(1, MIDR_REVISION_MASK)); } -static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) -{ - return is_kernel_in_hyp_mode(); -} - static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, int __unused) { @@ -812,26 +823,35 @@ static bool has_no_fpsimd(const struct arm64_cpu_capabilities *entry, int __unus ID_AA64PFR0_FP_SHIFT) < 0; } -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static bool __meltdown_safe = true; static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, - int __unused) + int scope) { - char const *str = "command line option"; - u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); /* List of CPUs that are not vulnerable and don't need KPTI */ static const struct midr_range kpti_safe_list[] = { - _MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - _MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), - _MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), - _MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), - _MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), - _MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - _MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), - _MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), + MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), { /* sentinel */ } }; + char const *str = "kpti command line option"; + bool meltdown_safe; + + meltdown_safe = is_midr_in_range_list(read_cpuid_id(), kpti_safe_list); + + /* Defer to CPU feature registers */ + if (has_cpuid_feature(entry, scope)) + meltdown_safe = true; + + if (!meltdown_safe) + __meltdown_safe = false; /* * For reasons that aren't entirely clear, enabling KPTI on Cavium @@ -843,6 +863,24 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, __kpti_forced = -1; } + /* Useful for KASLR robustness */ + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0) { + if (!__kpti_forced) { + str = "KASLR"; + __kpti_forced = 1; + } + } + + if (cpu_mitigations_off() && !__kpti_forced) { + str = "mitigations=off"; + __kpti_forced = -1; + } + + if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { + pr_info_once("kernel page table isolation disabled by kernel configuration\n"); + return false; + } + /* Forced? */ if (__kpti_forced) { pr_info_once("kernel page table isolation forced %s by %s\n", @@ -850,28 +888,12 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, return __kpti_forced > 0; } - /* Useful for KASLR robustness */ - if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) - return true; - - /* Don't force KPTI for CPUs that are not vulnerable */ - switch (read_cpuid_id() & MIDR_CPU_MODEL_MASK) { - case MIDR_CAVIUM_THUNDERX2: - case MIDR_BRCM_VULCAN: - case MIDR_CORTEX_A53: - case MIDR_CORTEX_A55: - case MIDR_CORTEX_A57: - case MIDR_CORTEX_A72: - case MIDR_CORTEX_A73: - return false; - } - - /* Defer to CPU feature registers */ - return !cpuid_feature_extract_unsigned_field(pfr0, - ID_AA64PFR0_CSV3_SHIFT); + return !meltdown_safe; } -static int __nocfi kpti_install_ng_mappings(void *__unused) +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +static void __nocfi +kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) { typedef void (kpti_remap_fn)(int, int, phys_addr_t); extern kpti_remap_fn idmap_kpti_install_ng_mappings; @@ -881,9 +903,9 @@ static int __nocfi kpti_install_ng_mappings(void *__unused) int cpu = smp_processor_id(); if (kpti_applied) - return 0; + return; - remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); + remap_fn = (void *)__pa_function(idmap_kpti_install_ng_mappings); cpu_install_idmap(); remap_fn(cpu, num_online_cpus(), __pa_symbol(swapper_pg_dir)); @@ -892,8 +914,14 @@ static int __nocfi kpti_install_ng_mappings(void *__unused) if (!cpu) kpti_applied = true; - return 0; + return; } +#else +static void +kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) +{ +} +#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ static int __init parse_kpti(char *str) { @@ -907,9 +935,14 @@ static int __init parse_kpti(char *str) return 0; } early_param("kpti", parse_kpti); -#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ -static int cpu_copy_el2regs(void *__unused) +#ifdef CONFIG_ARM64_VHE +static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) +{ + return is_kernel_in_hyp_mode(); +} + +static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) { /* * Copy register values that aren't redirected by hardware. @@ -921,9 +954,9 @@ static int cpu_copy_el2regs(void *__unused) */ if (!alternatives_applied) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); - - return 0; } +#endif + #ifdef CONFIG_ARM64_SSBD static int ssbs_emulation_handler(struct pt_regs *regs, u32 instr) { @@ -945,7 +978,7 @@ static struct undef_hook ssbs_emulation_hook = { .fn = ssbs_emulation_handler, }; -static int cpu_enable_ssbs(void *__unused) +static void cpu_enable_ssbs(const struct arm64_cpu_capabilities *__unused) { static bool undef_hook_registered = false; static DEFINE_SPINLOCK(hook_lock); @@ -963,7 +996,6 @@ static int cpu_enable_ssbs(void *__unused) } else { arm64_set_ssbd_mitigation(true); } - return 0; } #endif /* CONFIG_ARM64_SSBD */ @@ -971,7 +1003,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, @@ -982,20 +1014,20 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, .sign = FTR_UNSIGNED, .min_field_value = 1, - .enable = cpu_enable_pan, + .cpu_enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ #if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, @@ -1006,14 +1038,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Software prefetching using PRFM", .capability = ARM64_HAS_NO_HW_PREFETCH, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_no_hw_prefetch, }, #ifdef CONFIG_ARM64_UAO { .desc = "User Access Override", .capability = ARM64_HAS_UAO, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR2_EL1, .field_pos = ID_AA64MMFR2_UAO_SHIFT, @@ -1027,21 +1059,23 @@ static const struct arm64_cpu_capabilities arm64_features[] = { #ifdef CONFIG_ARM64_PAN { .capability = ARM64_ALT_PAN_NOT_UAO, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = cpufeature_pan_not_uao, }, #endif /* CONFIG_ARM64_PAN */ +#ifdef CONFIG_ARM64_VHE { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = runs_at_el2, - .enable = cpu_copy_el2regs, + .cpu_enable = cpu_copy_el2regs, }, +#endif /* CONFIG_ARM64_VHE */ { .desc = "32-bit EL0 Support", .capability = ARM64_HAS_32BIT_EL0, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, @@ -1051,22 +1085,28 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Reduced HYP mapping offset", .capability = ARM64_HYP_OFFSET_LOW, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = hyp_offset_low, }, -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE, + /* + * The ID feature fields below are used to indicate that + * the CPU doesn't need KPTI. See unmap_kernel_at_el0 for + * more details. + */ + .sys_reg = SYS_ID_AA64PFR0_EL1, + .field_pos = ID_AA64PFR0_CSV3_SHIFT, + .min_field_value = 1, .matches = unmap_kernel_at_el0, - .enable = kpti_install_ng_mappings, + .cpu_enable = kpti_install_ng_mappings, }, -#endif { /* FP/SIMD is not implemented */ .capability = ARM64_HAS_NO_FPSIMD, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE, .min_field_value = 0, .matches = has_no_fpsimd, }, @@ -1074,7 +1114,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Data cache clean to Point of Persistence", .capability = ARM64_HAS_DCPOP, - .def_scope = SCOPE_SYSTEM, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_DPB_SHIFT, @@ -1086,31 +1126,43 @@ static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "Speculative Store Bypassing Safe (SSBS)", .capability = ARM64_SSBS, - .def_scope = SCOPE_LOCAL_CPU, - //.type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, + .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_SSBS_SHIFT, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, - .enable = cpu_enable_ssbs, + .cpu_enable = cpu_enable_ssbs, }, #endif #endif {}, }; -#define HWCAP_CAP(reg, field, s, min_value, type, cap) \ - { \ - .desc = #cap, \ - .def_scope = SCOPE_SYSTEM, \ + +#define HWCAP_CPUID_MATCH(reg, field, s, min_value) \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ .sign = s, \ .min_field_value = min_value, \ - .hwcap_type = type, \ + +#define __HWCAP_CAP(name, cap_type, cap) \ + .desc = name, \ + .type = ARM64_CPUCAP_SYSTEM_FEATURE, \ + .hwcap_type = cap_type, \ .hwcap = cap, \ + +#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + HWCAP_CPUID_MATCH(reg, field, s, min_value) \ + } + +#define HWCAP_CAP_MATCH(match, cap_type, cap) \ + { \ + __HWCAP_CAP(#cap, cap_type, cap) \ + .matches = match, \ } static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { @@ -1126,20 +1178,52 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4), HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FLAGM), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD), HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_DIT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA), HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_USCAT), HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS), {}, }; +#ifdef CONFIG_COMPAT +static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) +{ + /* + * Check that all of MVFR1_EL1.{SIMDSP, SIMDInt, SIMDLS} are available, + * in line with that of arm32 as in vfp_init(). We make sure that the + * check is future proof, by making sure value is non-zero. + */ + u32 mvfr1; + + WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); + if (scope == SCOPE_SYSTEM) + mvfr1 = read_sanitised_ftr_reg(SYS_MVFR1_EL1); + else + mvfr1 = read_sysreg_s(SYS_MVFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDSP_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDINT_SHIFT) && + cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_SIMDLS_SHIFT); +} +#endif + static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT + HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), @@ -1199,7 +1283,7 @@ static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) /* We support emulation of accesses to CPU ID feature registers */ elf_hwcap |= HWCAP_CPUID; for (; hwcaps->matches; hwcaps++) - if (hwcaps->matches(hwcaps, hwcaps->def_scope)) + if (hwcaps->matches(hwcaps, cpucap_default_scope(hwcaps))) cap_set_elf_hwcap(hwcaps); } @@ -1222,11 +1306,13 @@ static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array, return false; } -void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, - const char *info) +static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + u16 scope_mask, const char *info) { + scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (; caps->matches; caps++) { - if (!caps->matches(caps, caps->def_scope)) + if (!(caps->type & scope_mask) || + !caps->matches(caps, cpucap_default_scope(caps))) continue; if (!cpus_have_cap(caps->capability) && caps->desc) @@ -1235,33 +1321,69 @@ void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, } } +static void update_cpu_capabilities(u16 scope_mask) +{ + __update_cpu_capabilities(arm64_errata, scope_mask, + "enabling workaround for"); + __update_cpu_capabilities(arm64_features, scope_mask, "detected:"); +} + +static int __enable_cpu_capability(void *arg) +{ + const struct arm64_cpu_capabilities *cap = arg; + + cap->cpu_enable(cap); + return 0; +} + /* * Run through the enabled capabilities and enable() it on all active * CPUs */ -void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) +static void __init +__enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps, + u16 scope_mask) { + scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (; caps->matches; caps++) { unsigned int num = caps->capability; - if (!cpus_have_cap(num)) + if (!(caps->type & scope_mask) || !cpus_have_cap(num)) continue; /* Ensure cpus_have_const_cap(num) works */ static_branch_enable(&cpu_hwcap_keys[num]); - if (caps->enable) { + if (caps->cpu_enable) { /* - * Use stop_machine() as it schedules the work allowing - * us to modify PSTATE, instead of on_each_cpu() which - * uses an IPI, giving us a PSTATE that disappears when - * we return. + * Capabilities with SCOPE_BOOT_CPU scope are finalised + * before any secondary CPU boots. Thus, each secondary + * will enable the capability as appropriate via + * check_local_cpu_capabilities(). The only exception is + * the boot CPU, for which the capability must be + * enabled here. This approach avoids costly + * stop_machine() calls for this case. + * + * Otherwise, use stop_machine() as it schedules the + * work allowing us to modify PSTATE, instead of + * on_each_cpu() which uses an IPI, giving us a PSTATE + * that disappears when we return. */ - stop_machine(caps->enable, (void *)caps, cpu_online_mask); + if (scope_mask & SCOPE_BOOT_CPU) + caps->cpu_enable(caps); + else + stop_machine(__enable_cpu_capability, + (void *)caps, cpu_online_mask); } } } +static void __init enable_cpu_capabilities(u16 scope_mask) +{ + __enable_cpu_capabilities(arm64_errata, scope_mask); + __enable_cpu_capabilities(arm64_features, scope_mask); +} + /* * Flag to indicate if we have computed the system wide * capabilities based on the boot time active CPUs. This @@ -1277,14 +1399,83 @@ static inline void set_sys_caps_initialised(void) sys_caps_initialised = true; } +/* + * Run through the list of capabilities to check for conflicts. + * If the system has already detected a capability, take necessary + * action on this CPU. + * + * Returns "false" on conflicts. + */ +static bool +__verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps_list, + u16 scope_mask) +{ + bool cpu_has_cap, system_has_cap; + const struct arm64_cpu_capabilities *caps; + + scope_mask &= ARM64_CPUCAP_SCOPE_MASK; + + for (caps = caps_list; caps->matches; caps++) { + if (!(caps->type & scope_mask)) + continue; + + cpu_has_cap = __this_cpu_has_cap(caps_list, caps->capability); + system_has_cap = cpus_have_cap(caps->capability); + + if (system_has_cap) { + /* + * Check if the new CPU misses an advertised feature, + * which is not safe to miss. + */ + if (!cpu_has_cap && !cpucap_late_cpu_optional(caps)) + break; + /* + * We have to issue cpu_enable() irrespective of + * whether the CPU has it or not, as it is enabeld + * system wide. It is upto the call back to take + * appropriate action on this CPU. + */ + if (caps->cpu_enable) + caps->cpu_enable(caps); + } else { + /* + * Check if the CPU has this capability if it isn't + * safe to have when the system doesn't. + */ + if (cpu_has_cap && !cpucap_late_cpu_permitted(caps)) + break; + } + } + + if (caps->matches) { + pr_crit("CPU%d: Detected conflict for capability %d (%s), System: %d, CPU: %d\n", + smp_processor_id(), caps->capability, + caps->desc, system_has_cap, cpu_has_cap); + return false; + } + + return true; +} + +static bool verify_local_cpu_caps(u16 scope_mask) +{ + return __verify_local_cpu_caps(arm64_errata, scope_mask) && + __verify_local_cpu_caps(arm64_features, scope_mask); +} + /* * Check for CPU features that are used in early boot * based on the Boot CPU value. */ static void check_early_cpu_features(void) { - verify_cpu_run_el(); verify_cpu_asid_bits(); + /* + * Early features are used by the kernel already. If there + * is a conflict, we cannot proceed further. + */ + if (!verify_local_cpu_caps(SCOPE_BOOT_CPU)) + cpu_panic_kernel(); } static void @@ -1299,26 +1490,6 @@ verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) } } -static void -verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list) -{ - const struct arm64_cpu_capabilities *caps = caps_list; - for (; caps->matches; caps++) { - if (!cpus_have_cap(caps->capability)) - continue; - /* - * If the new CPU misses an advertised feature, we cannot proceed - * further, park the cpu. - */ - if (!__this_cpu_has_cap(caps_list, caps->capability)) { - pr_crit("CPU%d: missing feature: %s\n", - smp_processor_id(), caps->desc); - cpu_die_early(); - } - if (caps->enable) - caps->enable((void *)caps); - } -} /* * Run through the enabled system capabilities and enable() it on this CPU. @@ -1330,8 +1501,14 @@ verify_local_cpu_features(const struct arm64_cpu_capabilities *caps_list) */ static void verify_local_cpu_capabilities(void) { - verify_local_cpu_errata_workarounds(); - verify_local_cpu_features(arm64_features); + /* + * The capabilities with SCOPE_BOOT_CPU are checked from + * check_early_cpu_features(), as they need to be verified + * on all secondary CPUs. + */ + if (!verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU)) + cpu_die_early(); + verify_local_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) verify_local_elf_hwcaps(compat_elf_hwcaps); @@ -1347,20 +1524,22 @@ void check_local_cpu_capabilities(void) /* * If we haven't finalised the system capabilities, this CPU gets - * a chance to update the errata work arounds. + * a chance to update the errata work arounds and local features. * Otherwise, this CPU should verify that it has all the system * advertised capabilities. */ if (!sys_caps_initialised) - update_cpu_errata_workarounds(); + update_cpu_capabilities(SCOPE_LOCAL_CPU); else verify_local_cpu_capabilities(); } -static void __init setup_feature_capabilities(void) +static void __init setup_boot_cpu_capabilities(void) { - update_cpu_capabilities(arm64_features, "detected feature:"); - enable_cpu_capabilities(arm64_features); + /* Detect capabilities with either SCOPE_BOOT_CPU or SCOPE_LOCAL_CPU */ + update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU); + /* Enable the SCOPE_BOOT_CPU capabilities alone right away */ + enable_cpu_capabilities(SCOPE_BOOT_CPU); } DEFINE_STATIC_KEY_FALSE(arm64_const_caps_ready); @@ -1379,14 +1558,24 @@ bool this_cpu_has_cap(unsigned int cap) __this_cpu_has_cap(arm64_errata, cap)); } +static void __init setup_system_capabilities(void) +{ + /* + * We have finalised the system-wide safe feature + * registers, finalise the capabilities that depend + * on it. Also enable all the available capabilities, + * that are not enabled already. + */ + update_cpu_capabilities(SCOPE_SYSTEM); + enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU); +} + void __init setup_cpu_features(void) { u32 cwg; int cls; - /* Set the CPU feature capabilies */ - setup_feature_capabilities(); - enable_errata_workarounds(); + setup_system_capabilities(); mark_const_caps_ready(); setup_elf_hwcaps(arm64_elf_hwcaps); @@ -1512,3 +1701,15 @@ static int __init enable_mrs_emulation(void) } core_initcall(enable_mrs_emulation); + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, + char *buf) +{ + if (__meltdown_safe) + return sprintf(buf, "Not affected\n"); + + if (arm64_kernel_unmapped_at_el0()) + return sprintf(buf, "Mitigation: PTI\n"); + + return sprintf(buf, "Vulnerable\n"); +} diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 5e50c699c997..efd8ae1b3e47 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -77,6 +77,12 @@ static const char *const hwcap_str[] = { "sm4", "asimddp", "sha512", + "sve", + "asimdfhm", + "dit", + "uscat", + "ilrcpc", + "flagm", "ssbs", NULL }; diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5fc9661db6c4..be668e8b80b6 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -192,6 +192,11 @@ alternative_cb_end 1: mov x29, xzr // fp pointed to user-space + +#ifdef CONFIG_SHADOW_CALL_STACK + ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack + str xzr, [tsk, #TSK_TI_SCS] // Limit visibility of saved SCS +#endif .else add x21, sp, #S_FRAME_SIZE get_thread_info tsk @@ -281,6 +286,12 @@ alternative_else_nop_endif ct_user_enter .endif +#ifdef CONFIG_SHADOW_CALL_STACK + .if \el == 0 + str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack + .endif +#endif + #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR @@ -371,6 +382,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 .macro irq_stack_entry mov x19, sp // preserve the original sp +#ifdef CONFIG_SHADOW_CALL_STACK + mov x20, x18 // preserve the original shadow stack +#endif /* * Compare sp with the base of the task stack. @@ -388,15 +402,24 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 /* switch to the irq stack */ mov sp, x26 + +#ifdef CONFIG_SHADOW_CALL_STACK + /* also switch to the irq shadow stack */ + ldr_this_cpu x18, irq_shadow_call_stack_ptr, x26 +#endif + 9998: .endm /* - * x19 should be preserved between irq_stack_entry and - * irq_stack_exit. + * The callee-saved regs (x19-x29) should be preserved between + * irq_stack_entry and irq_stack_exit. */ .macro irq_stack_exit mov sp, x19 +#ifdef CONFIG_SHADOW_CALL_STACK + mov x18, x20 +#endif .endm /* @@ -602,7 +625,7 @@ el1_da: tbnz x23, #7, 1f // PSR_I_BIT enable_irq 1: - clear_address_tag x0, x3 + untagged_addr x0, x3 mov x2, sp // struct pt_regs bl do_mem_abort cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1 @@ -782,7 +805,7 @@ el0_da: // enable interrupts before calling the main handler enable_dbg_and_irq ct_user_exit - clear_address_tag x0, x26 + untagged_addr x0, x26 mov x1, x25 mov x2, sp bl do_mem_abort @@ -1158,6 +1181,11 @@ ENTRY(cpu_switch_to) ldr lr, [x8] mov sp, x9 msr sp_el0, x1 +#ifdef CONFIG_SHADOW_CALL_STACK + str x18, [x0, #TSK_TI_SCS] + ldr x18, [x1, #TSK_TI_SCS] + str xzr, [x1, #TSK_TI_SCS] // limit visibility of saved SCS +#endif ret ENDPROC(cpu_switch_to) NOKPROBE(cpu_switch_to) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5d547deb6996..4cd962f6c430 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -205,8 +206,19 @@ void fpsimd_preserve_current_state(void) */ void fpsimd_restore_current_state(void) { - if (!system_supports_fpsimd()) + /* + * For the tasks that were created before we detected the absence of + * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(), + * e.g, init. This could be then inherited by the children processes. + * If we later detect that the system doesn't support FP/SIMD, + * we must clear the flag for all the tasks to indicate that the + * FPSTATE is clean (as we can't have one) to avoid looping for ever in + * do_notify_resume(). + */ + if (!system_supports_fpsimd()) { + clear_thread_flag(TIF_FOREIGN_FPSTATE); return; + } local_bh_disable(); @@ -228,7 +240,7 @@ void fpsimd_restore_current_state(void) */ void fpsimd_update_current_state(struct fpsimd_state *state) { - if (!system_supports_fpsimd()) + if (WARN_ON(!system_supports_fpsimd())) return; local_bh_disable(); diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index fac79d75d1d9..6eefd5873aef 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -119,7 +119,13 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) /* * Ensure updated trampoline is visible to instruction - * fetch before we patch in the branch. + * fetch before we patch in the branch. Although the + * architecture doesn't require an IPI in this case, + * Neoverse-N1 erratum #1542419 does require one + * if the TLB maintenance in module_enable_ro() is + * skipped due to rodata_enabled. It doesn't seem worth + * it to make it conditional given that this is + * certainly not a fast-path. */ flush_icache_range((unsigned long)&dst[0], (unsigned long)&dst[1]); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2fde78082d45..a69e4cfd7394 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -335,6 +336,10 @@ __primary_switched: stp xzr, x30, [sp, #-16]! mov x29, sp +#ifdef CONFIG_SHADOW_CALL_STACK + adr_l x18, init_shadow_call_stack // Set shadow call stack +#endif + str_l x21, __fdt_pointer, x5 // Save FDT pointer ldr_l x4, kimage_vaddr // Save the offset between @@ -390,17 +395,13 @@ ENTRY(el2_setup) mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.eq 1f - mrs x0, sctlr_el1 -CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 -CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 + mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) msr sctlr_el1, x0 mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 isb ret -1: mrs x0, sctlr_el2 -CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 -CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 +1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) msr sctlr_el2, x0 #ifdef CONFIG_ARM64_VHE @@ -507,10 +508,7 @@ install_el2_stub: * requires no configuration, and all non-hyp-specific EL2 setup * will be done via the _EL1 system register aliases in __cpu_setup. */ - /* sctlr_el1 */ - mov x0, #0x0800 // Set/clear RES{1,0} bits -CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems -CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems + mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) msr sctlr_el1, x0 /* Coprocessor traps. */ @@ -603,6 +601,7 @@ secondary_startup: /* * Common entry point for secondary CPUs. */ + bl __cpu_secondary_check52bitva bl __cpu_setup // initialise processor bl __enable_mmu ldr x8, =__secondary_switched @@ -619,6 +618,10 @@ __secondary_switched: mov sp, x1 ldr x2, [x0, #CPU_BOOT_TASK] msr sp_el0, x2 +#ifdef CONFIG_SHADOW_CALL_STACK + ldr x18, [x2, #TSK_TI_SCS] // set shadow call stack + str xzr, [x2, #TSK_TI_SCS] // limit visibility of saved SCS +#endif mov x29, #0 mov x30, #0 b secondary_start_kernel @@ -677,6 +680,31 @@ ENTRY(__enable_mmu) ret ENDPROC(__enable_mmu) +ENTRY(__cpu_secondary_check52bitva) +#ifdef CONFIG_ARM64_52BIT_VA + ldr_l x0, vabits_user + cmp x0, #52 + b.ne 2f + + mrs_s x0, SYS_ID_AA64MMFR2_EL1 + and x0, x0, #(0xf << ID_AA64MMFR2_LVA_SHIFT) + cbnz x0, 2f + + adr_l x0, va52mismatch + mov w1, #1 + strb w1, [x0] + dmb sy + dc ivac, x0 // Invalidate potentially stale cache line + + update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x0, x1 +1: wfe + wfi + b 1b + +#endif +2: ret +ENDPROC(__cpu_secondary_check52bitva) + __no_granule_support: /* Indicate that this CPU can't boot and is stuck in the kernel */ update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x1, x2 diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index bb444c693796..49f543ebd6cb 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -246,8 +246,7 @@ static int create_safe_exec_page(void *src_start, size_t length, } pte = pte_offset_kernel(pmd, dst_addr); - set_pte(pte, __pte(virt_to_phys((void *)dst) | - pgprot_val(PAGE_KERNEL_EXEC))); + set_pte(pte, pfn_pte(virt_to_pfn(dst), PAGE_KERNEL_EXEC)); /* * Load our new page tables. A strict BBM approach requires that we diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index b91abb8f7cd4..706a8d4e0004 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -29,6 +29,7 @@ #include #include #include +#include unsigned long irq_err_count; @@ -91,6 +92,7 @@ static void init_irq_stacks(void) void __init init_IRQ(void) { init_irq_stacks(); + scs_init_irq(); irqchip_init(); if (!handle_arch_irq) panic("No interrupt controller found."); diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 36628393a66e..07e71bf9b402 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -393,7 +394,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, childregs->pstate |= PSR_UAO_BIT; if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) - childregs->pstate |= PSR_SSBS_BIT; + set_ssbs_bit(childregs); p->thread.cpu_context.x19 = stack_start; p->thread.cpu_context.x20 = stk_sz; @@ -448,9 +449,16 @@ static void ssbs_thread_switch(struct task_struct *next) if (unlikely(next->flags & PF_KTHREAD)) return; + /* + * If all CPUs implement the SSBS extension, then we just need to + * context-switch the PSTATE field. + */ + if (cpu_have_feature(cpu_feature(SSBS))) + return; + /* If the mitigation is enabled, then we leave SSBS clear. */ if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) || - test_tsk_thread_flag(next, TIF_SSBD)) + test_tsk_thread_flag(next, TIF_SSBD)) return; if (compat_user_mode(regs)) @@ -458,6 +466,7 @@ static void ssbs_thread_switch(struct task_struct *next) else if (user_mode(regs)) set_ssbs_bit(regs); } + /* * We store our current task in sp_el0, which is clobbered by userspace. Keep a * shadow copy so that we can restore this upon entry from userspace. @@ -487,6 +496,8 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, entry_task_switch(next); uao_thread_switch(next); ssbs_thread_switch(next); + scs_overflow_check(next); + /* * Complete any pending TLB or cache maintenance on this CPU in case * the thread migrates to a different CPU. diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index e8edbf13302a..73d5ac36803d 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -46,7 +46,8 @@ static int __init cpu_psci_cpu_prepare(unsigned int cpu) static int cpu_psci_cpu_boot(unsigned int cpu) { - int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa_symbol(secondary_entry)); + int err = psci_ops.cpu_on(cpu_logical_map(cpu), + __pa_function(secondary_entry)); if (err) pr_err("failed to boot CPU%d (%d)\n", cpu, err); @@ -84,7 +85,8 @@ static void cpu_psci_cpu_die(unsigned int cpu) static int cpu_psci_cpu_kill(unsigned int cpu) { - int err, i; + int err; + unsigned long start, end; if (!psci_ops.affinity_info) return 0; @@ -94,16 +96,18 @@ static int cpu_psci_cpu_kill(unsigned int cpu) * while it is dying. So, try again a few times. */ - for (i = 0; i < 10; i++) { + start = jiffies; + end = start + msecs_to_jiffies(100); + do { err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { - pr_info("CPU%d killed.\n", cpu); + pr_info("CPU%d killed (polled %d ms)\n", cpu, + jiffies_to_msecs(jiffies - start)); return 0; } - msleep(10); - pr_info("Retrying again to check for CPU kill\n"); - } + usleep_range(100, 1000); + } while (time_before(jiffies, end)); pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", cpu, err); diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 34d915b6974b..e230b4dff960 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -624,6 +624,13 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, return 0; } +static int fpr_active(struct task_struct *target, const struct user_regset *regset) +{ + if (!system_supports_fpsimd()) + return -ENODEV; + return regset->n; +} + /* * TODO: update fp accessors for lazy context switching (sync/flush hwstate) */ @@ -634,6 +641,9 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, struct user_fpsimd_state *uregs; uregs = &target->thread.fpsimd_state.user_fpsimd; + if (!system_supports_fpsimd()) + return -EINVAL; + if (target == current) fpsimd_preserve_current_state(); @@ -648,6 +658,9 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, struct user_fpsimd_state newstate = target->thread.fpsimd_state.user_fpsimd; + if (!system_supports_fpsimd()) + return -EINVAL; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); if (ret) return ret; @@ -740,6 +753,7 @@ static const struct user_regset aarch64_regsets[] = { */ .size = sizeof(u32), .align = sizeof(u32), + .active = fpr_active, .get = fpr_get, .set = fpr_set }, @@ -914,6 +928,9 @@ static int compat_vfp_get(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.fpsimd_state.user_fpsimd; if (target == current) @@ -947,6 +964,9 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret, vregs_end_pos; + if (!system_supports_fpsimd()) + return -EINVAL; + uregs = &target->thread.fpsimd_state.user_fpsimd; vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); @@ -1004,6 +1024,7 @@ static const struct user_regset aarch32_regsets[] = { .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), .size = sizeof(compat_ulong_t), .align = sizeof(compat_ulong_t), + .active = fpr_active, .get = compat_vfp_get, .set = compat_vfp_set }, @@ -1402,15 +1423,20 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) } /* - * Bits which are always architecturally RES0 per ARM DDI 0487A.h + * SPSR_ELx bits which are always architecturally RES0 per ARM DDI 0487D.a. + * We permit userspace to set SSBS (AArch64 bit 12, AArch32 bit 23) which is + * not described in ARM DDI 0487D.a. + * We treat PAN and UAO as RES0 bits, as they are meaningless at EL0, and may + * be allocated an EL0 meaning in future. * Userspace cannot use these until they have an architectural meaning. + * Note that this follows the SPSR_ELx format, not the AArch32 PSR format. * We also reserve IL for the kernel; SS is handled dynamically. */ #define SPSR_EL1_AARCH64_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(27, 22) | GENMASK_ULL(20, 10) | \ - GENMASK_ULL(5, 5)) + (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ + GENMASK_ULL(20, 13) | GENMASK_ULL(11, 10) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ - (GENMASK_ULL(63,32) | GENMASK_ULL(24, 22) | GENMASK_ULL(20,20)) + (GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20)) static int valid_compat_regs(struct user_pt_regs *regs) { diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c new file mode 100644 index 000000000000..eaadf5430baa --- /dev/null +++ b/arch/arm64/kernel/scs.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Shadow Call Stack support. + * + * Copyright (C) 2019 Google LLC + */ + +#include +#include +#include +#include + +DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr); + +#ifndef CONFIG_SHADOW_CALL_STACK_VMAP +DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack) + __aligned(SCS_SIZE); +#endif + +void scs_init_irq(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { +#ifdef CONFIG_SHADOW_CALL_STACK_VMAP + unsigned long *p; + + p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE, + VMALLOC_START, VMALLOC_END, + GFP_SCS, PAGE_KERNEL, + 0, cpu_to_node(cpu), + __builtin_return_address(0)); + + per_cpu(irq_shadow_call_stack_ptr, cpu) = p; +#else + per_cpu(irq_shadow_call_stack_ptr, cpu) = + per_cpu(irq_shadow_call_stack, cpu); +#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */ + } +} diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 4a58974a81aa..fb1d9d36120c 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #include #include @@ -83,43 +84,6 @@ enum ipi_msg_type { IPI_WAKEUP }; -#ifdef CONFIG_ARM64_VHE - -/* Whether the boot CPU is running in HYP mode or not*/ -static bool boot_cpu_hyp_mode; - -static inline void save_boot_cpu_run_el(void) -{ - boot_cpu_hyp_mode = is_kernel_in_hyp_mode(); -} - -static inline bool is_boot_cpu_in_hyp_mode(void) -{ - return boot_cpu_hyp_mode; -} - -/* - * Verify that a secondary CPU is running the kernel at the same - * EL as that of the boot CPU. - */ -void verify_cpu_run_el(void) -{ - bool in_el2 = is_kernel_in_hyp_mode(); - bool boot_cpu_el2 = is_boot_cpu_in_hyp_mode(); - - if (in_el2 ^ boot_cpu_el2) { - pr_crit("CPU%d: mismatched Exception Level(EL%d) with boot CPU(EL%d)\n", - smp_processor_id(), - in_el2 ? 2 : 1, - boot_cpu_el2 ? 2 : 1); - cpu_panic_kernel(); - } -} - -#else -static inline void save_boot_cpu_run_el(void) {} -#endif - #ifdef CONFIG_HOTPLUG_CPU static int op_cpu_kill(unsigned int cpu); #else @@ -143,6 +107,7 @@ static int boot_secondary(unsigned int cpu, struct task_struct *idle) } static DECLARE_COMPLETION(cpu_running); +bool va52mismatch __ro_after_init; int __cpu_up(unsigned int cpu, struct task_struct *idle) { @@ -172,10 +137,15 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) if (!cpu_online(cpu)) { pr_crit("CPU%u: failed to come online\n", cpu); + + if (IS_ENABLED(CONFIG_ARM64_52BIT_VA) && va52mismatch) + pr_crit("CPU%u: does not support 52-bit VAs\n", cpu); + ret = -EIO; } } else { pr_err("CPU%u: failed to boot: %d\n", cpu, ret); + return ret; } secondary_data.task = NULL; @@ -388,6 +358,9 @@ void cpu_die(void) { unsigned int cpu = smp_processor_id(); + /* Save the shadow stack pointer before exiting the idle task */ + scs_save(current); + idle_task_exit(); local_irq_disable(); @@ -454,13 +427,6 @@ void __init smp_prepare_boot_cpu(void) { set_my_cpu_offset(per_cpu_offset(smp_processor_id())); cpuinfo_store_boot_cpu(); - save_boot_cpu_run_el(); - /* - * Run the errata work around checks on the boot CPU, once we have - * initialised the cpu feature infrastructure from - * cpuinfo_store_boot_cpu() above. - */ - update_cpu_errata_workarounds(); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c index 93034651c87e..1ba281dc4fae 100644 --- a/arch/arm64/kernel/smp_spin_table.c +++ b/arch/arm64/kernel/smp_spin_table.c @@ -99,7 +99,7 @@ static int smp_spin_table_cpu_prepare(unsigned int cpu) * boot-loader's endianess before jumping. This is mandated by * the boot protocol. */ - writeq_relaxed(__pa_symbol(secondary_holding_pen), release_addr); + writeq_relaxed(__pa_function(secondary_holding_pen), release_addr); __flush_dcache_area((__force void *)release_addr, sizeof(*release_addr)); diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c index d102270f461f..5f5d329587d6 100644 --- a/arch/arm64/kernel/ssbd.c +++ b/arch/arm64/kernel/ssbd.c @@ -2,6 +2,7 @@ /* * Copyright (C) 2018 ARM Ltd, All Rights Reserved. */ + #include #include #include @@ -9,6 +10,7 @@ #include #include +#include #include static void ssbd_ssbs_enable(struct task_struct *task) { diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 34a4dc504ae9..00b110e5d530 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -39,6 +39,7 @@ #include #include +#include #include #include #include @@ -475,10 +476,9 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); } -int cpu_enable_cache_maint_trap(void *__unused) +void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused) { config_sctlr_el1(SCTLR_EL1_UCI, 0); - return 0; } #define __user_cache_maint(insn, address, res) \ @@ -710,7 +710,6 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); - die("Oops - bad mode", regs, 0); local_irq_disable(); panic("bad mode"); } diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index ef3f9d9d4062..f7620d315bc5 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -17,6 +17,8 @@ ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \ $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) ccflags-y += $(DISABLE_LTO) +CFLAGS_REMOVE_vgettimeofday.o += $(CC_FLAGS_SCS) + # Disable gcov profiling for VDSO code GCOV_PROFILE := n diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index bfa00a9e161d..e41392ce3808 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -31,3 +31,6 @@ GCOV_PROFILE := n KASAN_SANITIZE := n UBSAN_SANITIZE := n KCOV_INSTRUMENT := n + +# remove the SCS flags from all objects in this directory +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS)) diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index a7b3c198d4de..2c2b588a73b7 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -31,7 +31,12 @@ .text .pushsection .hyp.text, "ax" +/* + * We treat x18 as callee-saved as the host may use it as a platform + * register (e.g. for shadow call stack). + */ .macro save_callee_saved_regs ctxt + str x18, [\ctxt, #CPU_XREG_OFFSET(18)] stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] @@ -41,6 +46,8 @@ .endm .macro restore_callee_saved_regs ctxt + // We require \ctxt is not x18-x28 + ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)] ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] @@ -57,29 +64,26 @@ ENTRY(__guest_enter) // x0: vcpu // x1: host context // x2-x17: clobbered by macros - // x18: guest context + // x29: guest context // Store the host regs save_callee_saved_regs x1 - add x18, x0, #VCPU_CONTEXT + add x29, x0, #VCPU_CONTEXT // Restore guest regs x0-x17 - ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)] - ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)] - ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)] - ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)] - ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)] - ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)] - ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)] - ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)] - ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)] + ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)] + ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)] + ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)] + ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)] + ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)] + ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)] + ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)] + ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)] + ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)] - // Restore guest regs x19-x29, lr - restore_callee_saved_regs x18 - - // Restore guest reg x18 - ldr x18, [x18, #CPU_XREG_OFFSET(18)] + // Restore guest regs x18-x29, lr + restore_callee_saved_regs x29 // Do not touch any register after this! eret @@ -101,7 +105,7 @@ ENTRY(__guest_exit) // Retrieve the guest regs x0-x1 from the stack ldp x2, x3, [sp], #16 // x0, x1 - // Store the guest regs x0-x1 and x4-x18 + // Store the guest regs x0-x1 and x4-x17 stp x2, x3, [x1, #CPU_XREG_OFFSET(0)] stp x4, x5, [x1, #CPU_XREG_OFFSET(4)] stp x6, x7, [x1, #CPU_XREG_OFFSET(6)] @@ -110,9 +114,8 @@ ENTRY(__guest_exit) stp x12, x13, [x1, #CPU_XREG_OFFSET(12)] stp x14, x15, [x1, #CPU_XREG_OFFSET(14)] stp x16, x17, [x1, #CPU_XREG_OFFSET(16)] - str x18, [x1, #CPU_XREG_OFFSET(18)] - // Store the guest regs x19-x29, lr + // Store the guest regs x18-x29, lr save_callee_saved_regs x1 get_host_ctxt x2, x3 @@ -196,15 +199,3 @@ alternative_endif eret ENDPROC(__fpsimd_guest_restore) - -ENTRY(__qcom_hyp_sanitize_btac_predictors) - /** - * Call SMC64 with Silicon provider serviceID 23<<8 (0xc2001700) - * 0xC2000000-0xC200FFFF: assigned to SiP Service Calls - * b15-b0: contains SiP functionID - */ - movz x0, #0x1700 - movk x0, #0xc200, lsl #16 - smc #0 - ret -ENDPROC(__qcom_hyp_sanitize_btac_predictors) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 44845996b554..4a8fdbb29286 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -405,16 +405,6 @@ again: __set_host_arch_workaround_state(vcpu); - if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) { - u32 midr = read_cpuid_id(); - - /* Apply BTAC predictors mitigation to all Falkor chips */ - if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || - ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) { - __qcom_hyp_sanitize_btac_predictors(); - } - } - fp_enabled = __fpsimd_enabled(); __sysreg_save_guest_state(guest_ctxt); diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 4f135ec85e55..3773311ffcd0 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -199,4 +199,3 @@ void __hyp_text __kvm_enable_ssbs(void) "msr sctlr_el2, %0" : "=&r" (tmp) : "L" (SCTLR_ELx_DSSBS)); } - diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index cfbf7bd0dfba..32ae5c9daac4 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1785,8 +1785,11 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu, if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG) return NULL; + if (!index_to_params(id, ¶ms)) + return NULL; + table = get_target_table(vcpu->arch.target, true, &num); - r = find_reg_by_id(id, ¶ms, table, num); + r = find_reg(¶ms, table, num); if (!r) r = find_reg(¶ms, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 9a947afaf74c..c6a638a1bb3f 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -11,7 +11,12 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ # patching of the bl instruction in the caller with an atomic instruction # when supported by the CPU. Result and argument registers are handled # correctly, based on the function prototype. +ifeq ($(ld-name),lld) +# https://bugs.llvm.org/show_bug.cgi?id=35841 +obj-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o +else lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o +endif CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \ -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index 21ba0b29621b..4374020c824a 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -57,5 +57,6 @@ ENDPROC(__arch_clear_user) .section .fixup,"ax" .align 2 9: mov x0, x2 // return the original size + uaccess_disable_not_uao x2, x3 ret .previous diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 20305d485046..96b22c0fa343 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -75,5 +75,6 @@ ENDPROC(__arch_copy_from_user) .section .fixup,"ax" .align 2 9998: sub x0, end, dst // bytes not copied + uaccess_disable_not_uao x3, x4 ret .previous diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 54b75deb1d16..e56c705f1f23 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -77,5 +77,6 @@ ENDPROC(__arch_copy_in_user) .section .fixup,"ax" .align 2 9998: sub x0, end, dst // bytes not copied + uaccess_disable_not_uao x3, x4 ret .previous diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 076c43715e64..30f93161190f 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -45,45 +45,45 @@ alternative_else_nop_endif ldp x14, x15, [x1, #96] ldp x16, x17, [x1, #112] - mov x18, #(PAGE_SIZE - 128) + add x0, x0, #256 add x1, x1, #128 1: - subs x18, x18, #128 + tst x0, #(PAGE_SIZE - 1) alternative_if ARM64_HAS_NO_HW_PREFETCH prfm pldl1strm, [x1, #384] alternative_else_nop_endif - stnp x2, x3, [x0] + stnp x2, x3, [x0, #-256] ldp x2, x3, [x1] - stnp x4, x5, [x0, #16] + stnp x4, x5, [x0, #16 - 256] ldp x4, x5, [x1, #16] - stnp x6, x7, [x0, #32] + stnp x6, x7, [x0, #32 - 256] ldp x6, x7, [x1, #32] - stnp x8, x9, [x0, #48] + stnp x8, x9, [x0, #48 - 256] ldp x8, x9, [x1, #48] - stnp x10, x11, [x0, #64] + stnp x10, x11, [x0, #64 - 256] ldp x10, x11, [x1, #64] - stnp x12, x13, [x0, #80] + stnp x12, x13, [x0, #80 - 256] ldp x12, x13, [x1, #80] - stnp x14, x15, [x0, #96] + stnp x14, x15, [x0, #96 - 256] ldp x14, x15, [x1, #96] - stnp x16, x17, [x0, #112] + stnp x16, x17, [x0, #112 - 256] ldp x16, x17, [x1, #112] add x0, x0, #128 add x1, x1, #128 - b.gt 1b + b.ne 1b - stnp x2, x3, [x0] - stnp x4, x5, [x0, #16] - stnp x6, x7, [x0, #32] - stnp x8, x9, [x0, #48] - stnp x10, x11, [x0, #64] - stnp x12, x13, [x0, #80] - stnp x14, x15, [x0, #96] - stnp x16, x17, [x0, #112] + stnp x2, x3, [x0, #-256] + stnp x4, x5, [x0, #16 - 256] + stnp x6, x7, [x0, #32 - 256] + stnp x8, x9, [x0, #48 - 256] + stnp x10, x11, [x0, #64 - 256] + stnp x12, x13, [x0, #80 - 256] + stnp x14, x15, [x0, #96 - 256] + stnp x16, x17, [x0, #112 - 256] ret ENDPROC(copy_page) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index fda6172d6b88..6b99b939c50f 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -74,5 +74,6 @@ ENDPROC(__arch_copy_to_user) .section .fixup,"ax" .align 2 9998: sub x0, end, dst // bytes not copied + uaccess_disable_not_uao x3, x4 ret .previous diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c9854f3ad371..b69a826c9c33 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -413,7 +413,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, struct task_struct *tsk; struct mm_struct *mm; int fault, sig, code, major = 0; - unsigned long vm_flags = VM_READ | VM_WRITE; + unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; if (notify_page_fault(regs, esr)) @@ -912,7 +912,7 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, NOKPROBE_SYMBOL(do_debug_exception); #ifdef CONFIG_ARM64_PAN -int cpu_enable_pan(void *__unused) +void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { /* * We modify PSTATE. This won't work from irq context as the PSTATE @@ -922,6 +922,5 @@ int cpu_enable_pan(void *__unused) config_sctlr_el1(SCTLR_EL1_SPAN, 0); asm(SET_PSTATE_PAN(1)); - return 0; } #endif /* CONFIG_ARM64_PAN */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index abb9d2ecc675..e02a6326c800 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -605,8 +605,8 @@ static void __init map_kernel(pgd_t *pgd) * entry instead. */ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); - set_pud(pud_set_fixmap_offset(pgd, FIXADDR_START), - __pud(__pa_symbol(bm_pmd) | PUD_TYPE_TABLE)); + pud_populate(&init_mm, pud_set_fixmap_offset(pgd, FIXADDR_START), + lm_alias(bm_pmd)); pud_clear_fixmap(); } else { BUG(); @@ -721,7 +721,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) if (!p) return -ENOMEM; - set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL)); + pmd_set_huge(pmd, __pa(p), __pgprot(PROT_SECT_NORMAL)); } else vmemmap_verify((pte_t *)pmd, node, addr, next); } while (addr = next, addr != end); @@ -913,17 +913,35 @@ int __init arch_ioremap_pmd_supported(void) return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); } -int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) +int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { + pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | + pgprot_val(mk_sect_prot(prot))); + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot); + + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), + pud_val(new_pud))) + return 0; + BUG_ON(phys & ~PUD_MASK); - set_pud(pud, __pud(phys | PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + set_pud(pudp, new_pud); return 1; } -int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) +int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { + pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | + pgprot_val(mk_sect_prot(prot))); + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot); + + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), + pmd_val(new_pmd))) + return 0; + BUG_ON(phys & ~PMD_MASK); - set_pmd(pmd, __pmd(phys | PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot)))); + set_pmd(pmdp, new_pmd); return 1; } diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index dad128ba98bf..e9c843e0c172 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -419,7 +419,7 @@ static int __init dummy_numa_init(void) if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", - 0LLU, PFN_PHYS(max_pfn) - 1); + memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1); for_each_memblock(memory, mblk) { ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 8e077b25b243..118597813da7 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -65,6 +65,8 @@ ENDPROC(cpu_do_idle) * cpu_do_suspend - save CPU registers context * * x0: virtual address of context pointer + * + * This must be kept in sync with struct cpu_suspend_ctx in . */ ENTRY(cpu_do_suspend) mrs x2, tpidr_el0 @@ -89,6 +91,11 @@ alternative_endif stp x8, x9, [x0, #48] stp x10, x11, [x0, #64] stp x12, x13, [x0, #80] + /* + * Save x18 as it may be used as a platform register, e.g. by shadow + * call stack. + */ + str x18, [x0, #96] ret ENDPROC(cpu_do_suspend) @@ -105,6 +112,13 @@ ENTRY(cpu_do_resume) ldp x9, x10, [x0, #48] ldp x11, x12, [x0, #64] ldp x13, x14, [x0, #80] + /* + * Restore x18, as it may be used as a platform register, and clear + * the buffer to minimize the risk of exposure when used for shadow + * call stack. + */ + ldr x18, [x0, #96] + str xzr, [x0, #96] msr tpidr_el0, x2 msr tpidrro_el0, x3 msr contextidr_el1, x4 @@ -251,13 +265,13 @@ ENTRY(idmap_kpti_install_ng_mappings) /* We're the boot CPU. Wait for the others to catch up */ sevl 1: wfe - ldaxr w18, [flag_ptr] - eor w18, w18, num_cpus - cbnz w18, 1b + ldaxr w17, [flag_ptr] + eor w17, w17, num_cpus + cbnz w17, 1b /* We need to walk swapper, so turn off the MMU. */ - mrs x18, sctlr_el1 - bic x18, x18, #SCTLR_ELx_M + mrs x17, sctlr_el1 + bic x17, x17, #SCTLR_ELx_M msr sctlr_el1, x18 isb @@ -281,9 +295,9 @@ skip_pgd: isb /* We're done: fire up the MMU again */ - mrs x18, sctlr_el1 - orr x18, x18, #SCTLR_ELx_M - msr sctlr_el1, x18 + mrs x17, sctlr_el1 + orr x17, x17, #SCTLR_ELx_M + msr sctlr_el1, x17 isb /* @@ -353,33 +367,9 @@ skip_pte: b.ne do_pte b next_pmd - /* Secondary CPUs end up here */ -__idmap_kpti_secondary: - /* Uninstall swapper before surgery begins */ - __idmap_cpu_set_reserved_ttbr1 x18, x17 - - /* Increment the flag to let the boot CPU we're ready */ -1: ldxr w18, [flag_ptr] - add w18, w18, #1 - stxr w17, w18, [flag_ptr] - cbnz w17, 1b - - /* Wait for the boot CPU to finish messing around with swapper */ - sevl -1: wfe - ldxr w18, [flag_ptr] - cbnz w18, 1b - - /* All done, act like nothing happened */ - msr ttbr1_el1, swapper_ttb - isb - ret - .unreq cpu .unreq num_cpus .unreq swapper_pa - .unreq swapper_ttb - .unreq flag_ptr .unreq cur_pgdp .unreq end_pgdp .unreq pgd @@ -392,6 +382,31 @@ __idmap_kpti_secondary: .unreq cur_ptep .unreq end_ptep .unreq pte + + /* Secondary CPUs end up here */ +__idmap_kpti_secondary: + /* Uninstall swapper before surgery begins */ + __idmap_cpu_set_reserved_ttbr1 x16, x17 + + /* Increment the flag to let the boot CPU we're ready */ +1: ldxr w16, [flag_ptr] + add w16, w16, #1 + stxr w17, w16, [flag_ptr] + cbnz w17, 1b + + /* Wait for the boot CPU to finish messing around with swapper */ + sevl +1: wfe + ldxr w16, [flag_ptr] + cbnz w16, 1b + + /* All done, act like nothing happened */ + msr ttbr1_el1, swapper_ttb + isb + ret + + .unreq swapper_ttb + .unreq flag_ptr ENDPROC(idmap_kpti_install_ng_mappings) .popsection #endif @@ -436,11 +451,7 @@ ENTRY(__cpu_setup) /* * Prepare SCTLR */ - adr x5, crval - ldp w5, w6, [x5] - mrs x0, sctlr_el1 - bic x0, x0, x5 // clear bits - orr x0, x0, x6 // set bits + mov_q x0, SCTLR_EL1_SET /* * Set/prepare TCR and TTBR. We use 512GB (39-bit) address range for * both user and kernel. @@ -477,21 +488,3 @@ ENTRY(__cpu_setup) msr tcr_el1, x10 ret // return to head.S ENDPROC(__cpu_setup) - - /* - * We set the desired value explicitly, including those of the - * reserved bits. The values of bits EE & E0E were set early in - * el2_setup, which are left untouched below. - * - * n n T - * U E WT T UD US IHBS - * CE0 XWHW CZ ME TEEA S - * .... .IEE .... NEAI TE.I ..AD DEN0 ACAM - * 0011 0... 1101 ..0. ..0. 10.. .0.. .... < hardware reserved - * .... .1.. .... 01.1 11.1 ..01 0.01 1101 < software settings - */ - .type crval, #object -crval: - .word 0xfcffffff // clear - .word 0x34d5d91d // set - .popsection diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h index fb3dfb2a667e..d4e283b4f335 100644 --- a/arch/hexagon/include/asm/atomic.h +++ b/arch/hexagon/include/asm/atomic.h @@ -105,7 +105,7 @@ static inline void atomic_##op(int i, atomic_t *v) \ "1: %0 = memw_locked(%1);\n" \ " %0 = "#op "(%0,%2);\n" \ " memw_locked(%1,P3)=%0;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -121,7 +121,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ "1: %0 = memw_locked(%1);\n" \ " %0 = "#op "(%0,%2);\n" \ " memw_locked(%1,P3)=%0;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -138,7 +138,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \ "1: %0 = memw_locked(%2);\n" \ " %1 = "#op "(%0,%3);\n" \ " memw_locked(%2,P3)=%1;\n" \ - " if !P3 jump 1b;\n" \ + " if (!P3) jump 1b;\n" \ : "=&r" (output), "=&r" (val) \ : "r" (&v->counter), "r" (i) \ : "memory", "p3" \ @@ -187,7 +187,7 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) " }" " memw_locked(%2, p3) = %1;" " {" - " if !p3 jump 1b;" + " if (!p3) jump 1b;" " }" "2:" : "=&r" (__oldval), "=&r" (tmp) diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index 2691a1857d20..634306cda006 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -52,7 +52,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = clrbit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -76,7 +76,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = setbit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -102,7 +102,7 @@ static inline int test_and_change_bit(int nr, volatile void *addr) "1: R12 = memw_locked(R10);\n" " { P0 = tstbit(R12,R11); R12 = togglebit(R12,R11); }\n" " memw_locked(R10,P1) = R12;\n" - " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n" + " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n" : "=&r" (oldval) : "r" (addr), "r" (nr) : "r10", "r11", "r12", "p0", "p1", "memory" @@ -237,7 +237,7 @@ static inline int ffs(int x) int r; asm("{ P0 = cmp.eq(%1,#0); %0 = ct0(%1);}\n" - "{ if P0 %0 = #0; if !P0 %0 = add(%0,#1);}\n" + "{ if (P0) %0 = #0; if (!P0) %0 = add(%0,#1);}\n" : "=&r" (r) : "r" (x) : "p0"); diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h index a6e34e2acbba..db258424059f 100644 --- a/arch/hexagon/include/asm/cmpxchg.h +++ b/arch/hexagon/include/asm/cmpxchg.h @@ -44,7 +44,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, __asm__ __volatile__ ( "1: %0 = memw_locked(%1);\n" /* load into retval */ " memw_locked(%1,P0) = %2;\n" /* store into memory */ - " if !P0 jump 1b;\n" + " if (!P0) jump 1b;\n" : "=&r" (retval) : "r" (ptr), "r" (x) : "memory", "p0" diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h index c889f5993ecd..e8e5e47afb37 100644 --- a/arch/hexagon/include/asm/futex.h +++ b/arch/hexagon/include/asm/futex.h @@ -16,7 +16,7 @@ /* For example: %1 = %4 */ \ insn \ "2: memw_locked(%3,p2) = %1;\n" \ - " if !p2 jump 1b;\n" \ + " if (!p2) jump 1b;\n" \ " %1 = #0;\n" \ "3:\n" \ ".section .fixup,\"ax\"\n" \ @@ -84,10 +84,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, "1: %1 = memw_locked(%3)\n" " {\n" " p2 = cmp.eq(%1,%4)\n" - " if !p2.new jump:NT 3f\n" + " if (!p2.new) jump:NT 3f\n" " }\n" "2: memw_locked(%3,p2) = %5\n" - " if !p2 jump 1b\n" + " if (!p2) jump 1b\n" "3:\n" ".section .fixup,\"ax\"\n" "4: %0 = #%6\n" diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h index 53a8d5885887..007056263b8e 100644 --- a/arch/hexagon/include/asm/spinlock.h +++ b/arch/hexagon/include/asm/spinlock.h @@ -44,9 +44,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0);\n" " { P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -60,7 +60,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock) "1: R6 = memw_locked(%0);\n" " R6 = add(R6,#-1);\n" " memw_locked(%0,P3) = R6\n" - " if !P3 jump 1b;\n" + " if (!P3) jump 1b;\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -75,7 +75,7 @@ static inline int arch_read_trylock(arch_rwlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1);\n" " { %0 = #0; P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n" - " { if !P3 jump 1f; }\n" + " { if (!P3) jump 1f; }\n" " memw_locked(%1,P3) = R6;\n" " { %0 = P3 }\n" "1:\n" @@ -102,9 +102,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0)\n" " { P3 = cmp.eq(R6,#0); R6 = #-1;}\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -118,7 +118,7 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1)\n" " { %0 = #0; P3 = cmp.eq(R6,#0); R6 = #-1;}\n" - " { if !P3 jump 1f; }\n" + " { if (!P3) jump 1f; }\n" " memw_locked(%1,P3) = R6;\n" " %0 = P3;\n" "1:\n" @@ -141,9 +141,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) __asm__ __volatile__( "1: R6 = memw_locked(%0);\n" " P3 = cmp.eq(R6,#0);\n" - " { if !P3 jump 1b; R6 = #1; }\n" + " { if (!P3) jump 1b; R6 = #1; }\n" " memw_locked(%0,P3) = R6;\n" - " { if !P3 jump 1b; }\n" + " { if (!P3) jump 1b; }\n" : : "r" (&lock->lock) : "memory", "r6", "p3" @@ -163,7 +163,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) __asm__ __volatile__( " R6 = memw_locked(%1);\n" " P3 = cmp.eq(R6,#0);\n" - " { if !P3 jump 1f; R6 = #1; %0 = #0; }\n" + " { if (!P3) jump 1f; R6 = #1; %0 = #0; }\n" " memw_locked(%1,P3) = R6;\n" " %0 = P3;\n" "1:\n" diff --git a/arch/hexagon/kernel/stacktrace.c b/arch/hexagon/kernel/stacktrace.c index 41866a06adf7..ec4ef682923d 100644 --- a/arch/hexagon/kernel/stacktrace.c +++ b/arch/hexagon/kernel/stacktrace.c @@ -24,8 +24,6 @@ #include #include -register unsigned long current_frame_pointer asm("r30"); - struct stackframe { unsigned long fp; unsigned long rets; @@ -43,7 +41,7 @@ void save_stack_trace(struct stack_trace *trace) low = (unsigned long)task_stack_page(current); high = low + THREAD_SIZE; - fp = current_frame_pointer; + fp = (unsigned long)__builtin_frame_address(0); while (fp >= low && fp <= (high - sizeof(*frame))) { frame = (struct stackframe *)fp; diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S index 67c6ccc14770..9f4a73ff7203 100644 --- a/arch/hexagon/kernel/vm_entry.S +++ b/arch/hexagon/kernel/vm_entry.S @@ -382,7 +382,7 @@ ret_from_fork: R26.L = #LO(do_work_pending); R0 = #VM_INT_DISABLE; } - if P0 jump check_work_pending + if (P0) jump check_work_pending { R0 = R25; callr R24 diff --git a/arch/m68k/amiga/cia.c b/arch/m68k/amiga/cia.c index 2081b8cd5591..b9aee983e6f4 100644 --- a/arch/m68k/amiga/cia.c +++ b/arch/m68k/amiga/cia.c @@ -88,10 +88,19 @@ static irqreturn_t cia_handler(int irq, void *dev_id) struct ciabase *base = dev_id; int mach_irq; unsigned char ints; + unsigned long flags; + /* Interrupts get disabled while the timer irq flag is cleared and + * the timer interrupt serviced. + */ mach_irq = base->cia_irq; + local_irq_save(flags); ints = cia_set_irq(base, CIA_ICR_ALL); amiga_custom.intreq = base->int_mask; + if (ints & 1) + generic_handle_irq(mach_irq); + local_irq_restore(flags); + mach_irq++, ints >>= 1; for (; ints; mach_irq++, ints >>= 1) { if (ints & 1) generic_handle_irq(mach_irq); diff --git a/arch/m68k/atari/ataints.c b/arch/m68k/atari/ataints.c index 3d2b63bedf05..56f02ea2c248 100644 --- a/arch/m68k/atari/ataints.c +++ b/arch/m68k/atari/ataints.c @@ -142,7 +142,7 @@ struct mfptimerbase { .name = "MFP Timer D" }; -static irqreturn_t mfptimer_handler(int irq, void *dev_id) +static irqreturn_t mfp_timer_d_handler(int irq, void *dev_id) { struct mfptimerbase *base = dev_id; int mach_irq; @@ -344,7 +344,7 @@ void __init atari_init_IRQ(void) st_mfp.tim_ct_cd = (st_mfp.tim_ct_cd & 0xf0) | 0x6; /* request timer D dispatch handler */ - if (request_irq(IRQ_MFP_TIMD, mfptimer_handler, IRQF_SHARED, + if (request_irq(IRQ_MFP_TIMD, mfp_timer_d_handler, IRQF_SHARED, stmfp_base.name, &stmfp_base)) pr_err("Couldn't register %s interrupt\n", stmfp_base.name); diff --git a/arch/m68k/atari/time.c b/arch/m68k/atari/time.c index c549b48174ec..972181c1fe4b 100644 --- a/arch/m68k/atari/time.c +++ b/arch/m68k/atari/time.c @@ -24,6 +24,18 @@ DEFINE_SPINLOCK(rtc_lock); EXPORT_SYMBOL_GPL(rtc_lock); +static irqreturn_t mfp_timer_c_handler(int irq, void *dev_id) +{ + irq_handler_t timer_routine = dev_id; + unsigned long flags; + + local_irq_save(flags); + timer_routine(0, NULL); + local_irq_restore(flags); + + return IRQ_HANDLED; +} + void __init atari_sched_init(irq_handler_t timer_routine) { @@ -32,7 +44,8 @@ atari_sched_init(irq_handler_t timer_routine) /* start timer C, div = 1:100 */ st_mfp.tim_ct_cd = (st_mfp.tim_ct_cd & 15) | 0x60; /* install interrupt service routine for MFP Timer C */ - if (request_irq(IRQ_MFP_TIMC, timer_routine, 0, "timer", timer_routine)) + if (request_irq(IRQ_MFP_TIMC, mfp_timer_c_handler, 0, "timer", + timer_routine)) pr_err("Couldn't register timer interrupt\n"); } diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c index 2cfff4765040..0e602c32b246 100644 --- a/arch/m68k/bvme6000/config.c +++ b/arch/m68k/bvme6000/config.c @@ -45,11 +45,6 @@ extern int bvme6000_set_clock_mmss (unsigned long); extern void bvme6000_reset (void); void bvme6000_set_vectors (void); -/* Save tick handler routine pointer, will point to xtime_update() in - * kernel/timer/timekeeping.c, called via bvme6000_process_int() */ - -static irq_handler_t tick_handler; - int __init bvme6000_parse_bootinfo(const struct bi_record *bi) { @@ -159,12 +154,18 @@ irqreturn_t bvme6000_abort_int (int irq, void *dev_id) static irqreturn_t bvme6000_timer_int (int irq, void *dev_id) { + irq_handler_t timer_routine = dev_id; + unsigned long flags; volatile RtcPtr_t rtc = (RtcPtr_t)BVME_RTC_BASE; - unsigned char msr = rtc->msr & 0xc0; + unsigned char msr; + local_irq_save(flags); + msr = rtc->msr & 0xc0; rtc->msr = msr | 0x20; /* Ack the interrupt */ + timer_routine(0, NULL); + local_irq_restore(flags); - return tick_handler(irq, dev_id); + return IRQ_HANDLED; } /* @@ -183,9 +184,8 @@ void bvme6000_sched_init (irq_handler_t timer_routine) rtc->msr = 0; /* Ensure timer registers accessible */ - tick_handler = timer_routine; - if (request_irq(BVME_IRQ_RTC, bvme6000_timer_int, 0, - "timer", bvme6000_timer_int)) + if (request_irq(BVME_IRQ_RTC, bvme6000_timer_int, 0, "timer", + timer_routine)) panic ("Couldn't register timer int"); rtc->t1cr_omr = 0x04; /* Mode 2, ext clk */ diff --git a/arch/m68k/hp300/time.c b/arch/m68k/hp300/time.c index 289d928a46cb..d30b03ea93a2 100644 --- a/arch/m68k/hp300/time.c +++ b/arch/m68k/hp300/time.c @@ -38,13 +38,19 @@ static irqreturn_t hp300_tick(int irq, void *dev_id) { + irq_handler_t timer_routine = dev_id; + unsigned long flags; unsigned long tmp; - irq_handler_t vector = dev_id; + + local_irq_save(flags); in_8(CLOCKBASE + CLKSR); asm volatile ("movpw %1@(5),%0" : "=d" (tmp) : "a" (CLOCKBASE)); + timer_routine(0, NULL); + local_irq_restore(flags); + /* Turn off the network and SCSI leds */ blinken_leds(0, 0xe0); - return vector(irq, NULL); + return IRQ_HANDLED; } u32 hp300_gettimeoffset(void) diff --git a/arch/m68k/kernel/uboot.c b/arch/m68k/kernel/uboot.c index b29c3b241e1b..107082877064 100644 --- a/arch/m68k/kernel/uboot.c +++ b/arch/m68k/kernel/uboot.c @@ -102,5 +102,5 @@ __init void process_uboot_commandline(char *commandp, int size) } parse_uboot_commandline(commandp, len); - commandp[size - 1] = 0; + commandp[len - 1] = 0; } diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index 9f59a662ace5..863806e6775a 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -54,16 +54,6 @@ static __u8 rbv_clear; static int gIER,gIFR,gBufA,gBufB; -/* - * Timer defs. - */ - -#define TICK_SIZE 10000 -#define MAC_CLOCK_TICK (783300/HZ) /* ticks per HZ */ -#define MAC_CLOCK_LOW (MAC_CLOCK_TICK&0xFF) -#define MAC_CLOCK_HIGH (MAC_CLOCK_TICK>>8) - - /* * On Macs with a genuine VIA chip there is no way to mask an individual slot * interrupt. This limitation also seems to apply to VIA clone logic cores in @@ -278,22 +268,6 @@ void __init via_init(void) } } -/* - * Start the 100 Hz clock - */ - -void __init via_init_clock(irq_handler_t func) -{ - via1[vACR] |= 0x40; - via1[vT1LL] = MAC_CLOCK_LOW; - via1[vT1LH] = MAC_CLOCK_HIGH; - via1[vT1CL] = MAC_CLOCK_LOW; - via1[vT1CH] = MAC_CLOCK_HIGH; - - if (request_irq(IRQ_MAC_TIMER_1, func, 0, "timer", func)) - pr_err("Couldn't register %s interrupt\n", "timer"); -} - /* * Debugging dump, used in various places to see what's going on. */ @@ -321,29 +295,6 @@ void via_debug_dump(void) } } -/* - * This is always executed with interrupts disabled. - * - * TBI: get time offset between scheduling timer ticks - */ - -u32 mac_gettimeoffset(void) -{ - unsigned long ticks, offset = 0; - - /* read VIA1 timer 2 current value */ - ticks = via1[vT1CL] | (via1[vT1CH] << 8); - /* The probability of underflow is less than 2% */ - if (ticks > MAC_CLOCK_TICK - MAC_CLOCK_TICK / 50) - /* Check for pending timer interrupt in VIA1 IFR */ - if (via1[vIFR] & 0x40) offset = TICK_SIZE; - - ticks = MAC_CLOCK_TICK - ticks; - ticks = ticks * 10000L / MAC_CLOCK_TICK; - - return (ticks + offset) * 1000; -} - /* * Flush the L2 cache on Macs that have it by flipping * the system into 24-bit mode for an instant. @@ -447,6 +398,8 @@ void via_nubus_irq_shutdown(int irq) * via6522.c :-), disable/pending masks added. */ +#define VIA_TIMER_1_INT BIT(6) + void via1_irq(struct irq_desc *desc) { int irq_num; @@ -456,6 +409,21 @@ void via1_irq(struct irq_desc *desc) if (!events) return; + irq_num = IRQ_MAC_TIMER_1; + irq_bit = VIA_TIMER_1_INT; + if (events & irq_bit) { + unsigned long flags; + + local_irq_save(flags); + via1[vIFR] = irq_bit; + generic_handle_irq(irq_num); + local_irq_restore(flags); + + events &= ~irq_bit; + if (!events) + return; + } + irq_num = VIA1_SOURCE_BASE; irq_bit = 1; do { @@ -612,3 +580,56 @@ int via2_scsi_drq_pending(void) return via2[gIFR] & (1 << IRQ_IDX(IRQ_MAC_SCSIDRQ)); } EXPORT_SYMBOL(via2_scsi_drq_pending); + +/* timer and clock source */ + +#define VIA_CLOCK_FREQ 783360 /* VIA "phase 2" clock in Hz */ +#define VIA_TIMER_INTERVAL (1000000 / HZ) /* microseconds per jiffy */ +#define VIA_TIMER_CYCLES (VIA_CLOCK_FREQ / HZ) /* clock cycles per jiffy */ + +#define VIA_TC (VIA_TIMER_CYCLES - 2) /* including 0 and -1 */ +#define VIA_TC_LOW (VIA_TC & 0xFF) +#define VIA_TC_HIGH (VIA_TC >> 8) + +void __init via_init_clock(irq_handler_t timer_routine) +{ + if (request_irq(IRQ_MAC_TIMER_1, timer_routine, 0, "timer", NULL)) { + pr_err("Couldn't register %s interrupt\n", "timer"); + return; + } + + via1[vT1LL] = VIA_TC_LOW; + via1[vT1LH] = VIA_TC_HIGH; + via1[vT1CL] = VIA_TC_LOW; + via1[vT1CH] = VIA_TC_HIGH; + via1[vACR] |= 0x40; +} + +u32 mac_gettimeoffset(void) +{ + unsigned long flags; + u8 count_high; + u16 count, offset = 0; + + /* + * Timer counter wrap-around is detected with the timer interrupt flag + * but reading the counter low byte (vT1CL) would reset the flag. + * Also, accessing both counter registers is essentially a data race. + * These problems are avoided by ignoring the low byte. Clock accuracy + * is 256 times worse (error can reach 0.327 ms) but CPU overhead is + * reduced by avoiding slow VIA register accesses. + */ + + local_irq_save(flags); + count_high = via1[vT1CH]; + if (count_high == 0xFF) + count_high = 0; + if (count_high > 0 && (via1[vIFR] & VIA_TIMER_1_INT)) + offset = VIA_TIMER_CYCLES; + local_irq_restore(flags); + + count = count_high << 8; + count = VIA_TIMER_CYCLES - count + offset; + + return ((count * VIA_TIMER_INTERVAL) / VIA_TIMER_CYCLES) * 1000; +} diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index 8778612d1f31..78ae803c833e 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -46,11 +46,6 @@ extern void mvme147_reset (void); static int bcd2int (unsigned char b); -/* Save tick handler routine pointer, will point to xtime_update() in - * kernel/time/timekeeping.c, called via mvme147_process_int() */ - -irq_handler_t tick_handler; - int __init mvme147_parse_bootinfo(const struct bi_record *bi) { @@ -106,16 +101,23 @@ void __init config_mvme147(void) static irqreturn_t mvme147_timer_int (int irq, void *dev_id) { + irq_handler_t timer_routine = dev_id; + unsigned long flags; + + local_irq_save(flags); m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR; m147_pcc->t1_int_cntrl = PCC_INT_ENAB|PCC_LEVEL_TIMER1; - return tick_handler(irq, dev_id); + timer_routine(0, NULL); + local_irq_restore(flags); + + return IRQ_HANDLED; } void mvme147_sched_init (irq_handler_t timer_routine) { - tick_handler = timer_routine; - if (request_irq(PCC_IRQ_TIMER1, mvme147_timer_int, 0, "timer 1", NULL)) + if (request_irq(PCC_IRQ_TIMER1, mvme147_timer_int, 0, "timer 1", + timer_routine)) pr_err("Couldn't register timer interrupt\n"); /* Init the clock with a value */ diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index 6fa06d4d16bf..3116dd576bb3 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -51,11 +51,6 @@ extern void mvme16x_reset (void); int bcd2int (unsigned char b); -/* Save tick handler routine pointer, will point to xtime_update() in - * kernel/time/timekeeping.c, called via mvme16x_process_int() */ - -static irq_handler_t tick_handler; - unsigned short mvme16x_config; EXPORT_SYMBOL(mvme16x_config); @@ -354,8 +349,15 @@ static irqreturn_t mvme16x_abort_int (int irq, void *dev_id) static irqreturn_t mvme16x_timer_int (int irq, void *dev_id) { - *(volatile unsigned char *)0xfff4201b |= 8; - return tick_handler(irq, dev_id); + irq_handler_t timer_routine = dev_id; + unsigned long flags; + + local_irq_save(flags); + *(volatile unsigned char *)0xfff4201b |= 8; + timer_routine(0, NULL); + local_irq_restore(flags); + + return IRQ_HANDLED; } void mvme16x_sched_init (irq_handler_t timer_routine) @@ -363,14 +365,13 @@ void mvme16x_sched_init (irq_handler_t timer_routine) uint16_t brdno = be16_to_cpu(mvme_bdid.brdno); int irq; - tick_handler = timer_routine; /* Using PCCchip2 or MC2 chip tick timer 1 */ *(volatile unsigned long *)0xfff42008 = 0; *(volatile unsigned long *)0xfff42004 = 10000; /* 10ms */ *(volatile unsigned char *)0xfff42017 |= 3; *(volatile unsigned char *)0xfff4201b = 0x16; - if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, 0, - "timer", mvme16x_timer_int)) + if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, 0, "timer", + timer_routine)) panic ("Couldn't register timer int"); if (brdno == 0x0162 || brdno == 0x172) diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c index 3e7603202977..1c696906c159 100644 --- a/arch/m68k/q40/q40ints.c +++ b/arch/m68k/q40/q40ints.c @@ -127,10 +127,10 @@ void q40_mksound(unsigned int hz, unsigned int ticks) sound_ticks = ticks << 1; } -static irq_handler_t q40_timer_routine; - -static irqreturn_t q40_timer_int (int irq, void * dev) +static irqreturn_t q40_timer_int(int irq, void *dev_id) { + irq_handler_t timer_routine = dev_id; + ql_ticks = ql_ticks ? 0 : 1; if (sound_ticks) { unsigned char sval=(sound_ticks & 1) ? 128-SVOL : 128+SVOL; @@ -139,8 +139,13 @@ static irqreturn_t q40_timer_int (int irq, void * dev) *DAC_RIGHT=sval; } - if (!ql_ticks) - q40_timer_routine(irq, dev); + if (!ql_ticks) { + unsigned long flags; + + local_irq_save(flags); + timer_routine(0, NULL); + local_irq_restore(flags); + } return IRQ_HANDLED; } @@ -148,11 +153,9 @@ void q40_sched_init (irq_handler_t timer_routine) { int timer_irq; - q40_timer_routine = timer_routine; timer_irq = Q40_IRQ_FRAME; - if (request_irq(timer_irq, q40_timer_int, 0, - "timer", q40_timer_int)) + if (request_irq(timer_irq, q40_timer_int, 0, "timer", timer_routine)) panic("Couldn't register timer int"); master_outb(-1, FRAME_CLEAR_REG); diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c index 6bbca30c9188..a5824abb4a39 100644 --- a/arch/m68k/sun3/sun3ints.c +++ b/arch/m68k/sun3/sun3ints.c @@ -61,8 +61,10 @@ static irqreturn_t sun3_int7(int irq, void *dev_id) static irqreturn_t sun3_int5(int irq, void *dev_id) { + unsigned long flags; unsigned int cnt; + local_irq_save(flags); #ifdef CONFIG_SUN3 intersil_clear(); #endif @@ -76,6 +78,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id) cnt = kstat_irqs_cpu(irq, 0); if (!(cnt % 20)) sun3_leds(led_pattern[cnt % 160 / 20]); + local_irq_restore(flags); return IRQ_HANDLED; } diff --git a/arch/m68k/sun3x/time.c b/arch/m68k/sun3x/time.c index 7a2c53d9f779..48b43903253e 100644 --- a/arch/m68k/sun3x/time.c +++ b/arch/m68k/sun3x/time.c @@ -78,15 +78,19 @@ u32 sun3x_gettimeoffset(void) } #if 0 -static void sun3x_timer_tick(int irq, void *dev_id, struct pt_regs *regs) +static irqreturn_t sun3x_timer_tick(int irq, void *dev_id) { - void (*vector)(int, void *, struct pt_regs *) = dev_id; + irq_handler_t timer_routine = dev_id; + unsigned long flags; - /* Clear the pending interrupt - pulse the enable line low */ - disable_irq(5); - enable_irq(5); + local_irq_save(flags); + /* Clear the pending interrupt - pulse the enable line low */ + disable_irq(5); + enable_irq(5); + timer_routine(0, NULL); + local_irq_restore(flags); - vector(irq, NULL, regs); + return IRQ_HANDLED; } #endif diff --git a/arch/microblaze/Makefile b/arch/microblaze/Makefile index d269dd4b8279..fe5e48184c3c 100644 --- a/arch/microblaze/Makefile +++ b/arch/microblaze/Makefile @@ -83,19 +83,21 @@ archclean: linux.bin linux.bin.gz linux.bin.ub: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' simpleImage.%: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + @echo 'Kernel: $(boot)/$@ is ready' ' (#'`cat .version`')' define archhelp echo '* linux.bin - Create raw binary' echo ' linux.bin.gz - Create compressed raw binary' echo ' linux.bin.ub - Create U-Boot wrapped raw binary' - echo ' simpleImage.
- ELF image with $(arch)/boot/dts/
.dts linked in' - echo ' - stripped elf with fdt blob' - echo ' simpleImage.
.unstrip - full ELF image with fdt blob' - echo ' *_defconfig - Select default config from arch/microblaze/configs' - echo '' + echo ' simpleImage.
- Create the following images with
.dtb linked in' + echo ' simpleImage.
: raw image' + echo ' simpleImage.
.ub : raw image with U-Boot header' + echo ' simpleImage.
.unstrip: ELF (identical to vmlinux)' + echo ' simpleImage.
.strip : stripped ELF' echo ' Targets with
embed a device tree blob inside the image' echo ' These targets support board with firmware that does not' echo ' support passing a device tree directly. Replace
with the' diff --git a/arch/microblaze/boot/Makefile b/arch/microblaze/boot/Makefile index 7c2f52d4a0e4..49dbd1063d71 100644 --- a/arch/microblaze/boot/Makefile +++ b/arch/microblaze/boot/Makefile @@ -9,15 +9,12 @@ OBJCOPYFLAGS := -R .note -R .comment -R .note.gnu.build-id -O binary $(obj)/linux.bin: vmlinux FORCE $(call if_changed,objcopy) - @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' $(obj)/linux.bin.ub: $(obj)/linux.bin FORCE $(call if_changed,uimage) - @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' $(obj)/linux.bin.gz: $(obj)/linux.bin FORCE $(call if_changed,gzip) - @echo 'Kernel: $@ is ready' ' (#'`cat .version`')' quiet_cmd_cp = CP $< $@$2 cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false) @@ -35,6 +32,5 @@ $(obj)/simpleImage.%: vmlinux FORCE $(call if_changed,objcopy) $(call if_changed,uimage) $(call if_changed,strip,.strip) - @echo 'Kernel: $(UIMAGE_OUT) is ready' ' (#'`cat .version`')' clean-files += simpleImage.*.unstrip linux.bin.ub dts/*.dtb diff --git a/arch/microblaze/kernel/cpu/cache.c b/arch/microblaze/kernel/cpu/cache.c index 0bde47e4fa69..dcba53803fa5 100644 --- a/arch/microblaze/kernel/cpu/cache.c +++ b/arch/microblaze/kernel/cpu/cache.c @@ -92,7 +92,8 @@ static inline void __disable_dcache_nomsr(void) #define CACHE_LOOP_LIMITS(start, end, cache_line_length, cache_size) \ do { \ int align = ~(cache_line_length - 1); \ - end = min(start + cache_size, end); \ + if (start < UINT_MAX - cache_size) \ + end = min(start + cache_size, end); \ start &= align; \ } while (0) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ae4450e891ab..7e267d657c56 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -812,6 +812,7 @@ config SIBYTE_LITTLESUR select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_HIGHMEM select SYS_SUPPORTS_LITTLE_ENDIAN + select ZONE_DMA32 if 64BIT config SIBYTE_SENTOSA bool "Sibyte BCM91250E-Sentosa" diff --git a/arch/mips/Makefile.postlink b/arch/mips/Makefile.postlink index 4eea4188cb20..13e0beb9eee3 100644 --- a/arch/mips/Makefile.postlink +++ b/arch/mips/Makefile.postlink @@ -12,7 +12,7 @@ __archpost: include scripts/Kbuild.include CMD_RELOCS = arch/mips/boot/tools/relocs -quiet_cmd_relocs = RELOCS $@ +quiet_cmd_relocs = RELOCS $@ cmd_relocs = $(CMD_RELOCS) $@ # `@true` prevents complaint when there is nothing to be done diff --git a/arch/mips/bcm47xx/workarounds.c b/arch/mips/bcm47xx/workarounds.c index 1a8a07e7a563..46eddbec8d9f 100644 --- a/arch/mips/bcm47xx/workarounds.c +++ b/arch/mips/bcm47xx/workarounds.c @@ -5,9 +5,8 @@ #include #include -static void __init bcm47xx_workarounds_netgear_wnr3500l(void) +static void __init bcm47xx_workarounds_enable_usb_power(int usb_power) { - const int usb_power = 12; int err; err = gpio_request_one(usb_power, GPIOF_OUT_INIT_HIGH, "usb_power"); @@ -23,7 +22,10 @@ void __init bcm47xx_workarounds(void) switch (board) { case BCM47XX_BOARD_NETGEAR_WNR3500L: - bcm47xx_workarounds_netgear_wnr3500l(); + bcm47xx_workarounds_enable_usb_power(12); + break; + case BCM47XX_BOARD_NETGEAR_WNDR3400_V3: + bcm47xx_workarounds_enable_usb_power(21); break; default: /* No workaround(s) needed */ diff --git a/arch/mips/bcm63xx/Makefile b/arch/mips/bcm63xx/Makefile index c69f297fc1df..d89651e538f6 100644 --- a/arch/mips/bcm63xx/Makefile +++ b/arch/mips/bcm63xx/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 obj-y += clk.o cpu.o cs.o gpio.o irq.o nvram.o prom.o reset.o \ - setup.o timer.o dev-dsp.o dev-enet.o dev-flash.o \ - dev-pcmcia.o dev-rng.o dev-spi.o dev-hsspi.o dev-uart.o \ - dev-wdt.o dev-usb-usbd.o + setup.o timer.o dev-enet.o dev-flash.o dev-pcmcia.o \ + dev-rng.o dev-spi.o dev-hsspi.o dev-uart.o dev-wdt.o \ + dev-usb-usbd.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-y += boards/ diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c index b2097c0d2ed7..36ec3dc2c999 100644 --- a/arch/mips/bcm63xx/boards/board_bcm963xx.c +++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -289,14 +288,6 @@ static struct board_info __initdata board_96348gw_10 = { .has_pccard = 1, .has_ehci0 = 1, - .has_dsp = 1, - .dsp = { - .gpio_rst = 6, - .gpio_int = 34, - .cs = 2, - .ext_irq = 2, - }, - .leds = { { .name = "adsl-fail", @@ -401,14 +392,6 @@ static struct board_info __initdata board_96348gw = { .has_ohci0 = 1, - .has_dsp = 1, - .dsp = { - .gpio_rst = 6, - .gpio_int = 34, - .ext_irq = 2, - .cs = 2, - }, - .leds = { { .name = "adsl-fail", @@ -898,9 +881,6 @@ int __init board_register_devices(void) if (board.has_usbd) bcm63xx_usbd_register(&board.usbd); - if (board.has_dsp) - bcm63xx_dsp_register(&board.dsp); - /* Generate MAC address for WLAN and register our SPROM, * do this after registering enet devices */ diff --git a/arch/mips/bcm63xx/dev-dsp.c b/arch/mips/bcm63xx/dev-dsp.c deleted file mode 100644 index 5bb5b154c9bd..000000000000 --- a/arch/mips/bcm63xx/dev-dsp.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Broadcom BCM63xx VoIP DSP registration - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2009 Florian Fainelli - */ - -#include -#include -#include - -#include -#include -#include -#include - -static struct resource voip_dsp_resources[] = { - { - .start = -1, /* filled at runtime */ - .end = -1, /* filled at runtime */ - .flags = IORESOURCE_MEM, - }, - { - .start = -1, /* filled at runtime */ - .flags = IORESOURCE_IRQ, - }, -}; - -static struct platform_device bcm63xx_voip_dsp_device = { - .name = "bcm63xx-voip-dsp", - .id = -1, - .num_resources = ARRAY_SIZE(voip_dsp_resources), - .resource = voip_dsp_resources, -}; - -int __init bcm63xx_dsp_register(const struct bcm63xx_dsp_platform_data *pd) -{ - struct bcm63xx_dsp_platform_data *dpd; - u32 val; - - /* Get the memory window */ - val = bcm_mpi_readl(MPI_CSBASE_REG(pd->cs - 1)); - val &= MPI_CSBASE_BASE_MASK; - voip_dsp_resources[0].start = val; - voip_dsp_resources[0].end = val + 0xFFFFFFF; - voip_dsp_resources[1].start = pd->ext_irq; - - /* copy given platform data */ - dpd = bcm63xx_voip_dsp_device.dev.platform_data; - memcpy(dpd, pd, sizeof (*pd)); - - return platform_device_register(&bcm63xx_voip_dsp_device); -} diff --git a/arch/mips/bcm63xx/prom.c b/arch/mips/bcm63xx/prom.c index 7019e2967009..bbbf8057565b 100644 --- a/arch/mips/bcm63xx/prom.c +++ b/arch/mips/bcm63xx/prom.c @@ -84,7 +84,7 @@ void __init prom_init(void) * Here we will start up CPU1 in the background and ask it to * reconfigure itself then go back to sleep. */ - memcpy((void *)0xa0000200, &bmips_smp_movevec, 0x20); + memcpy((void *)0xa0000200, bmips_smp_movevec, 0x20); __sync(); set_c0_cause(C_SW0); cpumask_set_cpu(1, &bmips_booted_mask); diff --git a/arch/mips/bcm63xx/reset.c b/arch/mips/bcm63xx/reset.c index a2af38cf28a7..64574e74cb23 100644 --- a/arch/mips/bcm63xx/reset.c +++ b/arch/mips/bcm63xx/reset.c @@ -120,7 +120,7 @@ #define BCM6368_RESET_DSL 0 #define BCM6368_RESET_SAR SOFTRESET_6368_SAR_MASK #define BCM6368_RESET_EPHY SOFTRESET_6368_EPHY_MASK -#define BCM6368_RESET_ENETSW 0 +#define BCM6368_RESET_ENETSW SOFTRESET_6368_ENETSW_MASK #define BCM6368_RESET_PCM SOFTRESET_6368_PCM_MASK #define BCM6368_RESET_MPI SOFTRESET_6368_MPI_MASK #define BCM6368_RESET_PCIE 0 diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 331b9e0a8072..baa34e4deb78 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -29,6 +29,9 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \ -DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS) +# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. +KCOV_INSTRUMENT := n + # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi index efd5f0722206..39b6269610d4 100644 --- a/arch/mips/boot/dts/qca/ar9331.dtsi +++ b/arch/mips/boot/dts/qca/ar9331.dtsi @@ -99,7 +99,7 @@ miscintc: interrupt-controller@18060010 { compatible = "qca,ar7240-misc-intc"; - reg = <0x18060010 0x4>; + reg = <0x18060010 0x8>; interrupt-parent = <&cpuintc>; interrupts = <6>; diff --git a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c index 8241fc6aa17d..3839feba68f2 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c +++ b/arch/mips/cavium-octeon/executive/cvmx-cmd-queue.c @@ -266,7 +266,7 @@ int cvmx_cmd_queue_length(cvmx_cmd_queue_id_t queue_id) } else { union cvmx_pko_mem_debug8 debug8; debug8.u64 = cvmx_read_csr(CVMX_PKO_MEM_DEBUG8); - return debug8.cn58xx.doorbell; + return debug8.cn50xx.doorbell; } case CVMX_CMD_QUEUE_ZIP: case CVMX_CMD_QUEUE_DFA: diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index 1d92efb82c37..e1e24118c169 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -501,7 +501,7 @@ static void __init octeon_fdt_set_phy(int eth, int phy_addr) if (phy_addr >= 256 && alt_phy > 0) { const struct fdt_property *phy_prop; struct fdt_property *alt_prop; - u32 phy_handle_name; + fdt32_t phy_handle_name; /* Use the alt phy node instead.*/ phy_prop = fdt_get_property(initial_boot_params, eth, "phy-handle", NULL); diff --git a/arch/mips/fw/sni/sniprom.c b/arch/mips/fw/sni/sniprom.c index 6aa264b9856a..7c6151d412bd 100644 --- a/arch/mips/fw/sni/sniprom.c +++ b/arch/mips/fw/sni/sniprom.c @@ -42,7 +42,7 @@ /* O32 stack has to be 8-byte aligned. */ static u64 o32_stk[4096]; -#define O32_STK &o32_stk[sizeof(o32_stk)] +#define O32_STK (&o32_stk[ARRAY_SIZE(o32_stk)]) #define __PROM_O32(fun, arg) fun arg __asm__(#fun); \ __asm__(#fun " = call_o32") diff --git a/arch/mips/include/asm/bmips.h b/arch/mips/include/asm/bmips.h index b3e2975f83d3..a564915fddc4 100644 --- a/arch/mips/include/asm/bmips.h +++ b/arch/mips/include/asm/bmips.h @@ -75,11 +75,11 @@ static inline int register_bmips_smp_ops(void) #endif } -extern char bmips_reset_nmi_vec; -extern char bmips_reset_nmi_vec_end; -extern char bmips_smp_movevec; -extern char bmips_smp_int_vec; -extern char bmips_smp_int_vec_end; +extern char bmips_reset_nmi_vec[]; +extern char bmips_reset_nmi_vec_end[]; +extern char bmips_smp_movevec[]; +extern char bmips_smp_int_vec[]; +extern char bmips_smp_int_vec_end[]; extern int bmips_smp_enabled; extern int bmips_cpu_offset; diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 89e9fb7976fe..520ca166cbed 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -73,8 +73,8 @@ extern unsigned long __xchg_called_with_bad_pointer(void) extern unsigned long __xchg_small(volatile void *ptr, unsigned long val, unsigned int size); -static inline unsigned long __xchg(volatile void *ptr, unsigned long x, - int size) +static __always_inline +unsigned long __xchg(volatile void *ptr, unsigned long x, int size) { switch (size) { case 1: @@ -146,8 +146,9 @@ static inline unsigned long __xchg(volatile void *ptr, unsigned long x, extern unsigned long __cmpxchg_small(volatile void *ptr, unsigned long old, unsigned long new, unsigned int size); -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, unsigned int size) +static __always_inline +unsigned long __cmpxchg(volatile void *ptr, unsigned long old, + unsigned long new, unsigned int size) { switch (size) { case 1: diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index 57b34257be2b..98eb15b0524c 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -60,21 +60,11 @@ * instruction, so the lower 16 bits must be zero. Should be true on * on any sane architecture; generic code does not use this assumption. */ -extern const unsigned long mips_io_port_base; +extern unsigned long mips_io_port_base; -/* - * Gcc will generate code to load the value of mips_io_port_base after each - * function call which may be fairly wasteful in some cases. So we don't - * play quite by the book. We tell gcc mips_io_port_base is a long variable - * which solves the code generation issue. Now we need to violate the - * aliasing rules a little to make initialization possible and finally we - * will need the barrier() to fight side effects of the aliasing chat. - * This trickery will eventually collapse under gcc's optimizer. Oh well. - */ static inline void set_io_port_base(unsigned long base) { - * (unsigned long *) &mips_io_port_base = base; - barrier(); + mips_io_port_base = base; } /* diff --git a/arch/mips/include/asm/kexec.h b/arch/mips/include/asm/kexec.h index 493a3cc7c39a..cfdbe66575f4 100644 --- a/arch/mips/include/asm/kexec.h +++ b/arch/mips/include/asm/kexec.h @@ -12,11 +12,11 @@ #include /* Maximum physical address we can use pages from */ -#define KEXEC_SOURCE_MEMORY_LIMIT (0x20000000) +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) /* Maximum address we can reach in physical address mode */ -#define KEXEC_DESTINATION_MEMORY_LIMIT (0x20000000) +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) /* Maximum address we can use for the control code buffer */ -#define KEXEC_CONTROL_MEMORY_LIMIT (0x20000000) +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) /* Reserve 3*4096 bytes for board-specific info */ #define KEXEC_CONTROL_PAGE_SIZE (4096 + 3*4096) diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_dsp.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_dsp.h deleted file mode 100644 index 4e4970787371..000000000000 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_dsp.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __BCM63XX_DSP_H -#define __BCM63XX_DSP_H - -struct bcm63xx_dsp_platform_data { - unsigned gpio_rst; - unsigned gpio_int; - unsigned cs; - unsigned ext_irq; -}; - -int __init bcm63xx_dsp_register(const struct bcm63xx_dsp_platform_data *pd); - -#endif /* __BCM63XX_DSP_H */ diff --git a/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h b/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h index 5e5b1bc4a324..830f53f28e3f 100644 --- a/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h +++ b/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h @@ -7,7 +7,6 @@ #include #include #include -#include /* * flash mapping @@ -31,7 +30,6 @@ struct board_info { unsigned int has_ohci0:1; unsigned int has_ehci0:1; unsigned int has_usbd:1; - unsigned int has_dsp:1; unsigned int has_uart0:1; unsigned int has_uart1:1; @@ -43,9 +41,6 @@ struct board_info { /* USB config */ struct bcm63xx_usbd_platform_data usbd; - /* DSP config */ - struct bcm63xx_dsp_platform_data dsp; - /* GPIO LEDs */ struct gpio_led leds[5]; diff --git a/arch/mips/include/asm/octeon/cvmx-pko.h b/arch/mips/include/asm/octeon/cvmx-pko.h index 5f47f76ed510..20eb9c46a75a 100644 --- a/arch/mips/include/asm/octeon/cvmx-pko.h +++ b/arch/mips/include/asm/octeon/cvmx-pko.h @@ -611,7 +611,7 @@ static inline void cvmx_pko_get_port_status(uint64_t port_num, uint64_t clear, pko_reg_read_idx.s.index = cvmx_pko_get_base_queue(port_num); cvmx_write_csr(CVMX_PKO_REG_READ_IDX, pko_reg_read_idx.u64); debug8.u64 = cvmx_read_csr(CVMX_PKO_MEM_DEBUG8); - status->doorbell = debug8.cn58xx.doorbell; + status->doorbell = debug8.cn50xx.doorbell; } } diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index a2252c2a9ded..d0b9912fb63f 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -18,10 +18,12 @@ #include #define __ARCH_USE_5LEVEL_HACK -#if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48) +#if CONFIG_PGTABLE_LEVELS == 2 #include -#elif !(defined(CONFIG_PAGE_SIZE_4KB) && defined(CONFIG_MIPS_VA_BITS_48)) +#elif CONFIG_PGTABLE_LEVELS == 3 #include +#else +#include #endif /* @@ -222,6 +224,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) return pgd_val(pgd); } +#define pgd_phys(pgd) virt_to_phys((void *)pgd_val(pgd)) +#define pgd_page(pgd) (pfn_to_page(pgd_phys(pgd) >> PAGE_SHIFT)) + static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) { return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address); diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 5e8927f99a76..a0338dbabeaa 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -52,8 +52,26 @@ struct thread_info { #define init_thread_info (init_thread_union.thread_info) #define init_stack (init_thread_union.stack) -/* How to get the thread information struct from C. */ +/* + * A pointer to the struct thread_info for the currently executing thread is + * held in register $28/$gp. + * + * We declare __current_thread_info as a global register variable rather than a + * local register variable within current_thread_info() because clang doesn't + * support explicit local register variables. + * + * When building the VDSO we take care not to declare the global register + * variable because this causes GCC to not preserve the value of $28/$gp in + * functions that change its value (which is common in the PIC VDSO when + * accessing the GOT). Since the VDSO shouldn't be accessing + * __current_thread_info anyway we declare it extern in order to cause a link + * failure if it's referenced. + */ +#ifdef __VDSO__ +extern struct thread_info *__current_thread_info; +#else register struct thread_info *__current_thread_info __asm__("$28"); +#endif static inline struct thread_info *current_thread_info(void) { diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c index 428ef2189203..3ea95568ece4 100644 --- a/arch/mips/kernel/cacheinfo.c +++ b/arch/mips/kernel/cacheinfo.c @@ -61,6 +61,25 @@ static int __init_cache_level(unsigned int cpu) return 0; } +static void fill_cpumask_siblings(int cpu, cpumask_t *cpu_map) +{ + int cpu1; + + for_each_possible_cpu(cpu1) + if (cpus_are_siblings(cpu, cpu1)) + cpumask_set_cpu(cpu1, cpu_map); +} + +static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map) +{ + int cpu1; + int cluster = cpu_cluster(&cpu_data[cpu]); + + for_each_possible_cpu(cpu1) + if (cpu_cluster(&cpu_data[cpu1]) == cluster) + cpumask_set_cpu(cpu1, cpu_map); +} + static int __populate_cache_leaves(unsigned int cpu) { struct cpuinfo_mips *c = ¤t_cpu_data; @@ -68,14 +87,20 @@ static int __populate_cache_leaves(unsigned int cpu) struct cacheinfo *this_leaf = this_cpu_ci->info_list; if (c->icache.waysize) { + /* L1 caches are per core */ + fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map); populate_cache(dcache, this_leaf, 1, CACHE_TYPE_DATA); + fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map); populate_cache(icache, this_leaf, 1, CACHE_TYPE_INST); } else { populate_cache(dcache, this_leaf, 1, CACHE_TYPE_UNIFIED); } - if (c->scache.waysize) + if (c->scache.waysize) { + /* L2 cache is per cluster */ + fill_cpumask_cluster(cpu, &this_leaf->shared_cpu_map); populate_cache(scache, this_leaf, 2, CACHE_TYPE_UNIFIED); + } if (c->tcache.waysize) populate_cache(tcache, this_leaf, 3, CACHE_TYPE_UNIFIED); diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 795caa763da3..05ed4ed411c7 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -75,7 +75,7 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; * mips_io_port_base is the begin of the address space to which x86 style * I/O ports are mapped. */ -const unsigned long mips_io_port_base = -1; +unsigned long mips_io_port_base = -1; EXPORT_SYMBOL(mips_io_port_base); static struct resource code_resource = { .name = "Kernel code", }; diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 382d12eb88f0..45fbcbbf2504 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -457,10 +457,10 @@ static void bmips_wr_vec(unsigned long dst, char *start, char *end) static inline void bmips_nmi_handler_setup(void) { - bmips_wr_vec(BMIPS_NMI_RESET_VEC, &bmips_reset_nmi_vec, - &bmips_reset_nmi_vec_end); - bmips_wr_vec(BMIPS_WARM_RESTART_VEC, &bmips_smp_int_vec, - &bmips_smp_int_vec_end); + bmips_wr_vec(BMIPS_NMI_RESET_VEC, bmips_reset_nmi_vec, + bmips_reset_nmi_vec_end); + bmips_wr_vec(BMIPS_WARM_RESTART_VEC, bmips_smp_int_vec, + bmips_smp_int_vec_end); } struct reset_vec_info { diff --git a/arch/mips/kernel/vpe.c b/arch/mips/kernel/vpe.c index 544ea21bfef9..b2683aca401f 100644 --- a/arch/mips/kernel/vpe.c +++ b/arch/mips/kernel/vpe.c @@ -134,7 +134,7 @@ void release_vpe(struct vpe *v) { list_del(&v->list); if (v->load_addr) - release_progmem(v); + release_progmem(v->load_addr); kfree(v); } diff --git a/arch/mips/loongson64/common/serial.c b/arch/mips/loongson64/common/serial.c index ffefc1cb2612..98c3a7feb10f 100644 --- a/arch/mips/loongson64/common/serial.c +++ b/arch/mips/loongson64/common/serial.c @@ -110,7 +110,7 @@ static int __init serial_init(void) } module_init(serial_init); -static void __init serial_exit(void) +static void __exit serial_exit(void) { platform_device_unregister(&uart8250_device); } diff --git a/arch/mips/loongson64/loongson-3/platform.c b/arch/mips/loongson64/loongson-3/platform.c index 25a97cc0ee33..0db4cc3196eb 100644 --- a/arch/mips/loongson64/loongson-3/platform.c +++ b/arch/mips/loongson64/loongson-3/platform.c @@ -31,6 +31,9 @@ static int __init loongson3_platform_init(void) continue; pdev = kzalloc(sizeof(struct platform_device), GFP_KERNEL); + if (!pdev) + return -ENOMEM; + pdev->name = loongson_sysconf.sensors[i].name; pdev->id = loongson_sysconf.sensors[i].id; pdev->dev.platform_data = &loongson_sysconf.sensors[i]; diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index dc495578d44d..b55c74a7f7a4 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -658,6 +658,13 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, int restore_scratch) { if (restore_scratch) { + /* + * Ensure the MFC0 below observes the value written to the + * KScratch register by the prior MTC0. + */ + if (scratch_reg >= 0) + uasm_i_ehb(p); + /* Reset default page size */ if (PM_DEFAULT_MASK >> 16) { uasm_i_lui(p, tmp, PM_DEFAULT_MASK >> 16); @@ -672,12 +679,10 @@ static void build_restore_pagemask(u32 **p, struct uasm_reloc **r, uasm_i_mtc0(p, 0, C0_PAGEMASK); uasm_il_b(p, r, lid); } - if (scratch_reg >= 0) { - uasm_i_ehb(p); + if (scratch_reg >= 0) UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - } else { + else UASM_i_LW(p, 1, scratchpad_offset(0), 0); - } } else { /* Reset default page size */ if (PM_DEFAULT_MASK >> 16) { @@ -926,6 +931,10 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, } if (mode != not_refill && check_for_high_segbits) { uasm_l_large_segbits_fault(l, *p); + + if (mode == refill_scratch && scratch_reg >= 0) + uasm_i_ehb(p); + /* * We get here if we are an xsseg address, or if we are * an xuseg address above (PGDIR_SHIFT+PGDIR_BITS) boundary. @@ -942,12 +951,10 @@ build_get_pgd_vmalloc64(u32 **p, struct uasm_label **l, struct uasm_reloc **r, uasm_i_jr(p, ptr); if (mode == refill_scratch) { - if (scratch_reg >= 0) { - uasm_i_ehb(p); + if (scratch_reg >= 0) UASM_i_MFC0(p, 1, c0_kscratch(), scratch_reg); - } else { + else UASM_i_LW(p, 1, scratchpad_offset(0), 0); - } } else { uasm_i_nop(p); } diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 42faa95ce664..57a7a9d68475 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -612,6 +612,7 @@ static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value) static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) { int off, b_off; + int tcc_reg; ctx->flags |= EBPF_SEEN_TC; /* @@ -624,14 +625,14 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) b_off = b_imm(this_idx + 1, ctx); emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off); /* - * if (--TCC < 0) + * if (TCC-- < 0) * goto out; */ /* Delay slot */ - emit_instr(ctx, daddiu, MIPS_R_T5, - (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1); + tcc_reg = (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4; + emit_instr(ctx, daddiu, MIPS_R_T5, tcc_reg, -1); b_off = b_imm(this_idx + 1, ctx); - emit_instr(ctx, bltz, MIPS_R_T5, b_off); + emit_instr(ctx, bltz, tcc_reg, b_off); /* * prog = array->ptrs[index]; * if (prog == NULL) diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c index 1791a44ee570..20aaf77166e8 100644 --- a/arch/mips/txx9/generic/setup.c +++ b/arch/mips/txx9/generic/setup.c @@ -959,12 +959,11 @@ void __init txx9_sramc_init(struct resource *r) goto exit_put; err = sysfs_create_bin_file(&dev->dev.kobj, &dev->bindata_attr); if (err) { - device_unregister(&dev->dev); iounmap(dev->base); - kfree(dev); + device_unregister(&dev->dev); } return; exit_put: + iounmap(dev->base); put_device(&dev->dev); - return; } diff --git a/arch/nios2/kernel/nios2_ksyms.c b/arch/nios2/kernel/nios2_ksyms.c index bf2f55d10a4d..4e704046a150 100644 --- a/arch/nios2/kernel/nios2_ksyms.c +++ b/arch/nios2/kernel/nios2_ksyms.c @@ -9,12 +9,20 @@ #include #include +#include +#include + /* string functions */ EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memmove); +/* memory management */ + +EXPORT_SYMBOL(empty_zero_page); +EXPORT_SYMBOL(flush_icache_range); + /* * libgcc functions - functions that are used internally by the * compiler... (prototypes are not correct though, but that @@ -31,3 +39,7 @@ DECLARE_EXPORT(__udivsi3); DECLARE_EXPORT(__umoddi3); DECLARE_EXPORT(__umodsi3); DECLARE_EXPORT(__muldi3); +DECLARE_EXPORT(__ucmpdi2); +DECLARE_EXPORT(__lshrdi3); +DECLARE_EXPORT(__ashldi3); +DECLARE_EXPORT(__ashrdi3); diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index b16e95a4e875..1107d34e45bf 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -184,7 +184,7 @@ handler: ;\ * occured. in fact they never do. if you need them use * values saved on stack (for SPR_EPC, SPR_ESR) or content * of r4 (for SPR_EEAR). for details look at EXCEPTION_HANDLE() - * in 'arch/or32/kernel/head.S' + * in 'arch/openrisc/kernel/head.S' */ /* =====================================================[ exceptions] === */ diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index 90979acdf165..4d878d13b860 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -1551,7 +1551,7 @@ _string_nl: /* * .data section should be page aligned - * (look into arch/or32/kernel/vmlinux.lds) + * (look into arch/openrisc/kernel/vmlinux.lds.S) */ .section .data,"aw" .align 8192 diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h index f627c37dad9c..ab5c215cf46c 100644 --- a/arch/parisc/include/asm/cmpxchg.h +++ b/arch/parisc/include/asm/cmpxchg.h @@ -44,8 +44,14 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size) ** if (((unsigned long)p & 0xf) == 0) ** return __ldcw(p); */ -#define xchg(ptr, x) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))) +#define xchg(ptr, x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __typeof__(*(ptr)) _x_ = (x); \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)_x_, (ptr), sizeof(*(ptr))); \ + __ret; \ +}) /* bug catcher for when unsupported size is used - won't link */ extern void __cmpxchg_called_with_bad_pointer(void); diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c index 92a9b5f12f98..f29f682352f0 100644 --- a/arch/parisc/mm/ioremap.c +++ b/arch/parisc/mm/ioremap.c @@ -3,7 +3,7 @@ * arch/parisc/mm/ioremap.c * * (C) Copyright 1995 1996 Linus Torvalds - * (C) Copyright 2001-2006 Helge Deller + * (C) Copyright 2001-2019 Helge Deller * (C) Copyright 2005 Kyle McMartin */ @@ -84,7 +84,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l addr = (void __iomem *) area->addr; if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size, phys_addr, pgprot)) { - vfree(addr); + vunmap(addr); return NULL; } @@ -92,9 +92,11 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l } EXPORT_SYMBOL(__ioremap); -void iounmap(const volatile void __iomem *addr) +void iounmap(const volatile void __iomem *io_addr) { - if (addr > high_memory) - return vfree((void *) (PAGE_MASK & (unsigned long __force) addr)); + unsigned long addr = (unsigned long)io_addr & PAGE_MASK; + + if (is_vmalloc_addr((void *)addr)) + vunmap((void *)addr); } EXPORT_SYMBOL(iounmap); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index de3b07c7be30..277e4ffb928b 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -225,6 +225,7 @@ config PPC select MODULES_USE_ELF_RELA select NO_BOOTMEM select OF + select OF_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE select OF_EARLY_FLATTREE select OF_RESERVED_MEM select OLD_SIGACTION if PPC32 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 0f04c878113e..9c78ef298257 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -385,7 +385,9 @@ vdso_install: ifeq ($(CONFIG_PPC64),y) $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@ endif +ifdef CONFIG_VDSO32 $(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@ +endif archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/powerpc/boot/4xx.c b/arch/powerpc/boot/4xx.c index f7da65169124..3c8774163c7e 100644 --- a/arch/powerpc/boot/4xx.c +++ b/arch/powerpc/boot/4xx.c @@ -232,7 +232,7 @@ void ibm4xx_denali_fixup_memsize(void) dpath = 8; /* 64 bits */ /* get address pins (rows) */ - val = SDRAM0_READ(DDR0_42); + val = SDRAM0_READ(DDR0_42); row = DDR_GET_VAL(val, DDR_APIN, DDR_APIN_SHIFT); if (row > max_row) diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index e2a5a932c24a..5807c9d8e56d 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -24,8 +24,8 @@ compress-$(CONFIG_KERNEL_GZIP) := CONFIG_KERNEL_GZIP compress-$(CONFIG_KERNEL_XZ) := CONFIG_KERNEL_XZ BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \ - -fno-strict-aliasing -Os -msoft-float -pipe \ - -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ + -fno-strict-aliasing -Os -msoft-float -mno-altivec -mno-vsx \ + -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \ -D$(compress-y) BOOTCC := $(CC) diff --git a/arch/powerpc/boot/dts/bamboo.dts b/arch/powerpc/boot/dts/bamboo.dts index aa68911f6560..084b82ba7493 100644 --- a/arch/powerpc/boot/dts/bamboo.dts +++ b/arch/powerpc/boot/dts/bamboo.dts @@ -268,8 +268,10 @@ /* Outbound ranges, one memory and one IO, * later cannot be changed. Chip supports a second * IO range but we don't use it for now + * The chip also supports a larger memory range but + * it's not naturally aligned, so our code will break */ - ranges = <0x02000000 0x00000000 0xa0000000 0x00000000 0xa0000000 0x00000000 0x40000000 + ranges = <0x02000000 0x00000000 0xa0000000 0x00000000 0xa0000000 0x00000000 0x20000000 0x02000000 0x00000000 0x00000000 0x00000000 0xe0000000 0x00000000 0x00100000 0x01000000 0x00000000 0x00000000 0x00000000 0xe8000000 0x00000000 0x00010000>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi index e1a961f05dcd..baa0c503e741 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi index c288f3c6c637..93095600e808 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy6: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi index 94f3e7175012..ff4bd38f0645 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1-best-effort.dtsi @@ -63,6 +63,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi index 94a76982d214..1fa38ed6f59e 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi @@ -60,6 +60,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy7: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi index b5ff5f71c6b8..a8cc9780c0c4 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy0: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi index ee44182c6348..8b8bd70c9382 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy1: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi index f05f0d775039..619c880b54d8 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy2: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi index a9114ec51075..d7ebb73a400d 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy3: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi index 44dd00ac7367..b151d696a069 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy4: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi index 5b1b84b58602..adc0ae0013a3 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi @@ -59,6 +59,7 @@ fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy5: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi index 0e1daaef9e74..435047e0e250 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy14: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi index 68c5ef779266..c098657cca0a 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi @@ -60,6 +60,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xf3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy15: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi index 605363cc1117..9d06824815f3 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe1000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy8: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi index 1955dfa13634..70e947730c4b 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe3000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy9: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi index 2c1476454ee0..ad96e6529595 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe5000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy10: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi index b8b541ff5fb0..034bc4b71f7a 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe7000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy11: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi index 4b2cfddd1b15..93ca23d82b39 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xe9000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy12: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi index 0a52ddf7cc17..23b3117a2fd2 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3-1-1g-5.dtsi @@ -59,6 +59,7 @@ fman@500000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xeb000 0x1000>; + fsl,erratum-a011043; /* must ignore read errors */ pcsphy13: ethernet-phy@0 { reg = <0x0>; diff --git a/arch/powerpc/boot/libfdt_env.h b/arch/powerpc/boot/libfdt_env.h index f52c31b1f48f..ac5d3c947e04 100644 --- a/arch/powerpc/boot/libfdt_env.h +++ b/arch/powerpc/boot/libfdt_env.h @@ -5,6 +5,10 @@ #include #include +#define INT_MAX ((int)(~0U>>1)) +#define UINT32_MAX ((u32)~0U) +#define INT32_MAX ((s32)(UINT32_MAX >> 1)) + #include "of.h" typedef u32 uint32_t; diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index 9c63b596e6ce..a09595f00cab 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -28,7 +28,7 @@ static inline int arch_get_random_seed_int(unsigned int *v) unsigned long val; int rc; - rc = arch_get_random_long(&val); + rc = arch_get_random_seed_long(&val); if (rc) *v = val; diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index ba4c75062d49..2d4444981c2c 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -129,7 +129,10 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip); /* Patch sites */ extern s32 patch__call_flush_count_cache; extern s32 patch__flush_count_cache_return; +extern s32 patch__flush_link_stack_return; +extern s32 patch__call_kvm_flush_link_stack; extern long flush_count_cache; +extern long kvm_flush_link_stack; #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */ diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 53b31c2bcdf4..e4451b30d7e3 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -45,6 +45,7 @@ extern int machine_check_e500(struct pt_regs *regs); extern int machine_check_e200(struct pt_regs *regs); extern int machine_check_47x(struct pt_regs *regs); int machine_check_8xx(struct pt_regs *regs); +int machine_check_83xx(struct pt_regs *regs); extern void cpu_down_flush_e500v2(void); extern void cpu_down_flush_e500mc(void); @@ -215,7 +216,9 @@ enum { #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) #define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) +#define CPU_FTR_P9_TLBIE_STQ_BUG LONG_ASM_CONST(0x0000400000000000) #define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000) +#define CPU_FTR_P9_TLBIE_ERAT_BUG LONG_ASM_CONST(0x0001000000000000) #ifndef __ASSEMBLY__ @@ -475,7 +478,8 @@ enum { CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \ CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ - CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300) + CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | \ + CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG) #define CPU_FTRS_POWER9_DD1 ((CPU_FTRS_POWER9 | CPU_FTR_POWER9_DD1) & \ (~CPU_FTR_SAO)) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 7f74c282710f..fad0e6ff460f 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -20,11 +20,6 @@ #include #include -/* - * For static allocation of some of the structures. - */ -#define IMC_MAX_PMUS 32 - /* * Compatibility macros for IMC devices */ @@ -125,4 +120,5 @@ enum { extern int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id); extern void thread_imc_disable(void); +extern int get_max_nest_dev(void); #endif /* __ASM_POWERPC_IMC_PMU_H */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b779f3ccd412..05f3c2b3aa0e 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -733,6 +733,8 @@ #define SRR1_PROGTRAP 0x00020000 /* Trap */ #define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */ +#define SRR1_MCE_MCP 0x00080000 /* Machine check signal caused interrupt */ + #define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */ #define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */ #define HSRR1_DENORM 0x00100000 /* Denorm exception */ diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index 759597bf0fd8..ccf44c135389 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -81,6 +81,9 @@ static inline bool security_ftr_enabled(unsigned long feature) // Software required to flush count cache on context switch #define SEC_FTR_FLUSH_COUNT_CACHE 0x0000000000000400ull +// Software required to flush link stack on context switch +#define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull + // Features enabled by default #define SEC_FTR_DEFAULT \ diff --git a/arch/powerpc/include/asm/sfp-machine.h b/arch/powerpc/include/asm/sfp-machine.h index d89beaba26ff..8b957aabb826 100644 --- a/arch/powerpc/include/asm/sfp-machine.h +++ b/arch/powerpc/include/asm/sfp-machine.h @@ -213,30 +213,18 @@ * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow * (i.e. carry out) is not stored anywhere, and is lost. */ -#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (bh) && (bh) == 0) \ - __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "%r" ((USItype)(ah)), \ - "%r" ((USItype)(al)), \ - "rI" ((USItype)(bl))); \ - else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ - __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "%r" ((USItype)(ah)), \ - "%r" ((USItype)(al)), \ - "rI" ((USItype)(bl))); \ + __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ + __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ else \ - __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "%r" ((USItype)(ah)), \ - "r" ((USItype)(bh)), \ - "%r" ((USItype)(al)), \ - "rI" ((USItype)(bl))); \ + __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ + : "=r" (sh), "=&r" (sl) \ + : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ } while (0) /* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to @@ -248,44 +236,24 @@ * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, * and is lost. */ -#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ do { \ if (__builtin_constant_p (ah) && (ah) == 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(bh)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ - else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(bh)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ else if (__builtin_constant_p (bh) && (bh) == 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(ah)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ - else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0) \ - __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(ah)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ + __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ + else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ + __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ + : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ else \ - __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \ - : "=r" ((USItype)(sh)), \ - "=&r" ((USItype)(sl)) \ - : "r" ((USItype)(ah)), \ - "r" ((USItype)(bh)), \ - "rI" ((USItype)(al)), \ - "r" ((USItype)(bl))); \ + __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ } while (0) /* asm fragments for mul and div */ @@ -294,13 +262,10 @@ * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype * word product in HIGH_PROD and LOW_PROD. */ -#define umul_ppmm(ph, pl, m0, m1) \ +#define umul_ppmm(ph, pl, m0, m1) \ do { \ USItype __m0 = (m0), __m1 = (m1); \ - __asm__ ("mulhwu %0,%1,%2" \ - : "=r" ((USItype)(ph)) \ - : "%r" (__m0), \ - "r" (__m1)); \ + __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ (pl) = __m0 * __m1; \ } while (0) @@ -312,9 +277,10 @@ * significant bit of DENOMINATOR must be 1, then the pre-processor symbol * UDIV_NEEDS_NORMALIZATION is defined to 1. */ -#define udiv_qrnnd(q, r, n1, n0, d) \ +#define udiv_qrnnd(q, r, n1, n0, d) \ do { \ - UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + UWtype __d1, __d0, __q1, __q0; \ + UWtype __r1, __r0, __m; \ __d1 = __ll_highpart (d); \ __d0 = __ll_lowpart (d); \ \ @@ -325,7 +291,7 @@ if (__r1 < __m) \ { \ __q1--, __r1 += (d); \ - if (__r1 >= (d)) /* we didn't get carry when adding to __r1 */ \ + if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ if (__r1 < __m) \ __q1--, __r1 += (d); \ } \ diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 51f00c00d7e4..3865d1d23597 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -234,7 +234,7 @@ do { \ ({ \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ if (!is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ @@ -248,7 +248,7 @@ do { \ ({ \ long __gu_err = -EFAULT; \ __long_type(*(ptr)) __gu_val = 0; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ might_fault(); \ if (access_ok(VERIFY_READ, __gu_addr, (size))) { \ barrier_nospec(); \ @@ -262,7 +262,7 @@ do { \ ({ \ long __gu_err; \ __long_type(*(ptr)) __gu_val; \ - const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ __chk_user_ptr(ptr); \ barrier_nospec(); \ __get_user_size(__gu_val, __gu_addr, (size), __gu_err); \ diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index 1afe90ade595..674c03350cd1 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -86,6 +86,7 @@ struct vdso_data { __s32 wtom_clock_nsec; struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */ __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ + __u32 hrtimer_res; /* hrtimer resolution */ __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ }; @@ -107,6 +108,7 @@ struct vdso_data { __s32 wtom_clock_nsec; struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */ __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ + __u32 hrtimer_res; /* hrtimer resolution */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ __u32 dcache_block_size; /* L1 d-cache block size */ __u32 icache_block_size; /* L1 i-cache block size */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 142b08d40642..5607ce67d178 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -5,8 +5,8 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -# Disable clang warning for using setjmp without setjmp.h header -CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header) +# Avoid clang warnings around longjmp/setjmp declarations +CFLAGS_crash.o += -ffreestanding subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 2e5ea300258a..1bc761e537a9 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -373,6 +373,7 @@ int main(void) OFFSET(WTOM_CLOCK_NSEC, vdso_data, wtom_clock_nsec); OFFSET(STAMP_XTIME, vdso_data, stamp_xtime); OFFSET(STAMP_SEC_FRAC, vdso_data, stamp_sec_fraction); + OFFSET(CLOCK_HRTIMER_RES, vdso_data, hrtimer_res); OFFSET(CFG_ICACHE_BLOCKSZ, vdso_data, icache_block_size); OFFSET(CFG_DCACHE_BLOCKSZ, vdso_data, dcache_block_size); OFFSET(CFG_ICACHE_LOGBLOCKSZ, vdso_data, icache_log_block_size); @@ -401,7 +402,6 @@ int main(void) DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); DEFINE(NSEC_PER_SEC, NSEC_PER_SEC); - DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index a8f20e5928e1..9edb45430133 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -865,4 +865,25 @@ void cacheinfo_cpu_offline(unsigned int cpu_id) if (cache) cache_cpu_clear(cache, cpu_id); } + +void cacheinfo_teardown(void) +{ + unsigned int cpu; + + lockdep_assert_cpus_held(); + + for_each_online_cpu(cpu) + cacheinfo_cpu_offline(cpu); +} + +void cacheinfo_rebuild(void) +{ + unsigned int cpu; + + lockdep_assert_cpus_held(); + + for_each_online_cpu(cpu) + cacheinfo_cpu_online(cpu); +} + #endif /* (CONFIG_PPC_PSERIES && CONFIG_SUSPEND) || CONFIG_HOTPLUG_CPU */ diff --git a/arch/powerpc/kernel/cacheinfo.h b/arch/powerpc/kernel/cacheinfo.h index 955f5e999f1b..52bd3fc6642d 100644 --- a/arch/powerpc/kernel/cacheinfo.h +++ b/arch/powerpc/kernel/cacheinfo.h @@ -6,4 +6,8 @@ extern void cacheinfo_cpu_online(unsigned int cpu_id); extern void cacheinfo_cpu_offline(unsigned int cpu_id); +/* Allow migration/suspend to tear down and rebuild the hierarchy. */ +extern void cacheinfo_teardown(void); +extern void cacheinfo_rebuild(void); + #endif /* _PPC_CACHEINFO_H */ diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 760872916013..6ef41e823013 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1185,6 +1185,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_generic, .platform = "ppc603", }, +#ifdef CONFIG_PPC_83xx { /* e300c1 (a 603e core, plus some) on 83xx */ .pvr_mask = 0x7fff0000, .pvr_value = 0x00830000, @@ -1195,7 +1196,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, + .machine_check = machine_check_83xx, .platform = "ppc603", }, { /* e300c2 (an e300c1 core, plus some, minus FPU) on 83xx */ @@ -1209,7 +1210,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, + .machine_check = machine_check_83xx, .platform = "ppc603", }, { /* e300c3 (e300c1, plus one IU, half cache size) on 83xx */ @@ -1223,7 +1224,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, + .machine_check = machine_check_83xx, .num_pmcs = 4, .oprofile_cpu_type = "ppc/e300", .oprofile_type = PPC_OPROFILE_FSL_EMB, @@ -1240,12 +1241,13 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 32, .dcache_bsize = 32, .cpu_setup = __setup_cpu_603, - .machine_check = machine_check_generic, + .machine_check = machine_check_83xx, .num_pmcs = 4, .oprofile_cpu_type = "ppc/e300", .oprofile_type = PPC_OPROFILE_FSL_EMB, .platform = "ppc603", }, +#endif { /* default match, we assume split I/D cache & TB (non-601)... */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, @@ -2230,11 +2232,13 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, * oprofile_cpu_type already has a value, then we are * possibly overriding a real PVR with a logical one, * and, in that case, keep the current value for - * oprofile_cpu_type. + * oprofile_cpu_type. Futhermore, let's ensure that the + * fix for the PMAO bug is enabled on compatibility mode. */ if (old.oprofile_cpu_type != NULL) { t->oprofile_cpu_type = old.oprofile_cpu_type; t->oprofile_type = old.oprofile_type; + t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } } diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 2dba206b065a..7ed2b1b6643c 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -705,8 +705,10 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f) m = &dt_cpu_feature_match_table[i]; if (!strcmp(f->name, m->name)) { known = true; - if (m->enable(f)) + if (m->enable(f)) { + cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask; break; + } pr_info("not enabling: %s (disabled or unsupported by kernel)\n", f->name); @@ -714,17 +716,12 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f) } } - if (!known && enable_unknown) { - if (!feat_try_enable_unknown(f)) { - pr_info("not enabling: %s (unknown and unsupported by kernel)\n", - f->name); - return false; - } + if (!known && (!enable_unknown || !feat_try_enable_unknown(f))) { + pr_info("not enabling: %s (unknown and unsupported by kernel)\n", + f->name); + return false; } - if (m->cpu_ftr_bit_mask) - cur_cpu_spec->cpu_features |= m->cpu_ftr_bit_mask; - if (known) pr_debug("enabling: %s\n", f->name); else @@ -733,15 +730,45 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f) return true; } +/* + * Handle POWER9 broadcast tlbie invalidation issue using + * cpu feature flag. + */ +static __init void update_tlbie_feature_flag(unsigned long pvr) +{ + if (PVR_VER(pvr) == PVR_POWER9) { + /* + * Set the tlbie feature flag for anything below + * Nimbus DD 2.3 and Cumulus DD 1.3 + */ + if ((pvr & 0xe000) == 0) { + /* Nimbus */ + if ((pvr & 0xfff) < 0x203) + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } else if ((pvr & 0xc000) == 0) { + /* Cumulus */ + if ((pvr & 0xfff) < 0x103) + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } else { + WARN_ONCE(1, "Unknown PVR"); + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG; + } + + cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG; + } +} + static __init void cpufeatures_cpu_quirks(void) { - int version = mfspr(SPRN_PVR); + unsigned long version = mfspr(SPRN_PVR); /* * Not all quirks can be derived from the cpufeatures device tree. */ if ((version & 0xffffff00) == 0x004e0100) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; + + update_tlbie_feature_flag(version); } static void __init cpufeatures_setup_finished(void) diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 470284f9e4f6..5a48c93aaa1b 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -520,12 +520,6 @@ static void *eeh_rmv_device(void *data, void *userdata) pci_iov_remove_virtfn(edev->physfn, pdn->vf_index, 0); edev->pdev = NULL; - - /* - * We have to set the VF PE number to invalid one, which is - * required to plug the VF successfully. - */ - pdn->pe_number = IODA_INVALID_PE; #endif if (rmv_data) list_add(&edev->rmv_list, &rmv_data->edev_list); diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 8545a9523b9b..7339ca4fdc19 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -381,7 +381,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev) while (parent) { if (!(parent->type & EEH_PE_INVALID)) break; - parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP); + parent->type &= ~EEH_PE_INVALID; parent = parent->parent; } diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 12395895b9aa..02a0bf52aec0 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -524,6 +524,7 @@ flush_count_cache: /* Save LR into r9 */ mflr r9 + // Flush the link stack .rept 64 bl .+4 .endr @@ -533,6 +534,11 @@ flush_count_cache: .balign 32 /* Restore LR */ 1: mtlr r9 + + // If we're just flushing the link stack, return here +3: nop + patch_site 3b patch__flush_link_stack_return + li r9,0x7fff mtctr r9 diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index af7a20dc6e09..80b6caaa9b92 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -785,9 +785,9 @@ dma_addr_t iommu_map_page(struct device *dev, struct iommu_table *tbl, vaddr = page_address(page) + offset; uaddr = (unsigned long)vaddr; - npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl)); if (tbl) { + npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl)); align = 0; if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE && ((unsigned long)vaddr & ~PAGE_MASK) == 0) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0ce8b0e5d7ba..207ba53a500b 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -561,8 +561,6 @@ void __do_irq(struct pt_regs *regs) trace_irq_entry(regs); - check_stack_overflow(); - /* * Query the platform PIC for the interrupt & ack it. * @@ -594,6 +592,8 @@ void do_IRQ(struct pt_regs *regs) irqtp = hardirq_ctx[raw_smp_processor_id()]; sirqtp = softirq_ctx[raw_smp_processor_id()]; + check_stack_overflow(); + /* Already there ? */ if (unlikely(curtp == irqtp || curtp == sirqtp)) { __do_irq(regs); diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 09af857ca099..afe086f48e7c 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -86,7 +86,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,DCACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of cache block size */ - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 1: dcbst 0,r6 @@ -102,7 +102,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,ICACHEL1LOGBLOCKSIZE(r10) /* Get log-2 of Icache block size */ - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ beqlr /* nothing to do? */ mtctr r8 2: icbi 0,r6 diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index 0e395afbf0f4..0e45a446a8c7 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -261,9 +261,22 @@ void remove_dev_pci_data(struct pci_dev *pdev) continue; #ifdef CONFIG_EEH - /* Release EEH device for the VF */ + /* + * Release EEH state for this VF. The PCI core + * has already torn down the pci_dev for this VF, but + * we're responsible to removing the eeh_dev since it + * has the same lifetime as the pci_dn that spawned it. + */ edev = pdn_to_eeh_dev(pdn); if (edev) { + /* + * We allocate pci_dn's for the totalvfs count, + * but only only the vfs that were activated + * have a configured PE. + */ + if (edev->pe) + eeh_rmv_from_parent_pe(edev); + pdn->edev = NULL; kfree(edev); } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 5e5da2073fdf..ba0d4f9a99ba 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -567,12 +567,11 @@ void flush_all_to_thread(struct task_struct *tsk) if (tsk->thread.regs) { preempt_disable(); BUG_ON(tsk != current); - save_all(tsk); - #ifdef CONFIG_SPE if (tsk->thread.regs->msr & MSR_SPE) tsk->thread.spefscr = mfspr(SPRN_SPEFSCR); #endif + save_all(tsk); preempt_enable(); } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index f83056297441..d96b28415090 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -128,7 +128,7 @@ static void __init move_device_tree(void) p = __va(memblock_alloc(size, PAGE_SIZE)); memcpy(p, initial_boot_params, size); initial_boot_params = p; - DBG("Moved device tree to 0x%p\n", p); + DBG("Moved device tree to 0x%px\n", p); } DBG("<- move_device_tree\n"); @@ -662,7 +662,7 @@ void __init early_init_devtree(void *params) { phys_addr_t limit; - DBG(" -> early_init_devtree(%p)\n", params); + DBG(" -> early_init_devtree(%px)\n", params); /* Too early to BUG_ON(), do it by hand */ if (!early_init_dt_verify(params)) @@ -722,7 +722,7 @@ void __init early_init_devtree(void *params) memblock_allow_resize(); memblock_dump_all(); - DBG("Phys. mem: %llx\n", memblock_phys_mem_size()); + DBG("Phys. mem: %llx\n", (unsigned long long)memblock_phys_mem_size()); /* We may need to relocate the flat tree, do it now. * FIXME .. and the initrd too? */ diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 141d192c6953..a01f83ba739e 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -984,6 +984,7 @@ int rtas_ibm_suspend_me(u64 handle) goto out; } + cpu_hotplug_disable(); stop_topology_update(); /* Call function on all CPUs. One of us will make the @@ -998,6 +999,7 @@ int rtas_ibm_suspend_me(u64 handle) printk(KERN_ERR "Error doing global join\n"); start_topology_update(); + cpu_hotplug_enable(); /* Take down CPUs not online prior to suspend */ cpuret = rtas_offline_cpus_mask(offline_mask); diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 68d4ec373cfc..b3f540c9f410 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -24,11 +24,12 @@ enum count_cache_flush_type { COUNT_CACHE_FLUSH_HW = 0x4, }; static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; +static bool link_stack_flush_enabled; bool barrier_nospec_enabled; static bool no_nospec; static bool btb_flush_enabled; -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64) static bool no_spectrev2; #endif @@ -106,7 +107,7 @@ static __init int barrier_nospec_debugfs_init(void) device_initcall(barrier_nospec_debugfs_init); #endif /* CONFIG_DEBUG_FS */ -#ifdef CONFIG_PPC_FSL_BOOK3E +#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64) static int __init handle_nospectre_v2(char *p) { no_spectrev2 = true; @@ -114,6 +115,9 @@ static int __init handle_nospectre_v2(char *p) return 0; } early_param("nospectre_v2", handle_nospectre_v2); +#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */ + +#ifdef CONFIG_PPC_FSL_BOOK3E void setup_spectre_v2(void) { if (no_spectrev2 || cpu_mitigations_off()) @@ -130,32 +134,33 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV); - if (rfi_flush || thread_priv) { + if (rfi_flush) { struct seq_buf s; seq_buf_init(&s, buf, PAGE_SIZE - 1); - seq_buf_printf(&s, "Mitigation: "); - - if (rfi_flush) - seq_buf_printf(&s, "RFI Flush"); - - if (rfi_flush && thread_priv) - seq_buf_printf(&s, ", "); - + seq_buf_printf(&s, "Mitigation: RFI Flush"); if (thread_priv) - seq_buf_printf(&s, "L1D private per thread"); + seq_buf_printf(&s, ", L1D private per thread"); seq_buf_printf(&s, "\n"); return s.len; } + if (thread_priv) + return sprintf(buf, "Vulnerable: L1D private per thread\n"); + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) return sprintf(buf, "Not affected\n"); return sprintf(buf, "Vulnerable\n"); } + +ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_meltdown(dev, attr, buf); +} #endif ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) @@ -201,11 +206,19 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c if (ccd) seq_buf_printf(&s, "Indirect branch cache disabled"); + + if (link_stack_flush_enabled) + seq_buf_printf(&s, ", Software link stack flush"); + } else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { seq_buf_printf(&s, "Mitigation: Software count cache flush"); if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW) seq_buf_printf(&s, " (hardware accelerated)"); + + if (link_stack_flush_enabled) + seq_buf_printf(&s, ", Software link stack flush"); + } else if (btb_flush_enabled) { seq_buf_printf(&s, "Mitigation: Branch predictor state flush"); } else { @@ -366,18 +379,49 @@ static __init int stf_barrier_debugfs_init(void) device_initcall(stf_barrier_debugfs_init); #endif /* CONFIG_DEBUG_FS */ +static void no_count_cache_flush(void) +{ + count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; + pr_info("count-cache-flush: software flush disabled.\n"); +} + static void toggle_count_cache_flush(bool enable) { - if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) && + !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) + enable = false; + + if (!enable) { patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP); - count_cache_flush_type = COUNT_CACHE_FLUSH_NONE; - pr_info("count-cache-flush: software flush disabled.\n"); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + patch_instruction_site(&patch__call_kvm_flush_link_stack, PPC_INST_NOP); +#endif + pr_info("link-stack-flush: software flush disabled.\n"); + link_stack_flush_enabled = false; + no_count_cache_flush(); return; } + // This enables the branch from _switch to flush_count_cache patch_branch_site(&patch__call_flush_count_cache, (u64)&flush_count_cache, BRANCH_SET_LINK); +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + // This enables the branch from guest_exit_cont to kvm_flush_link_stack + patch_branch_site(&patch__call_kvm_flush_link_stack, + (u64)&kvm_flush_link_stack, BRANCH_SET_LINK); +#endif + + pr_info("link-stack-flush: software flush enabled.\n"); + link_stack_flush_enabled = true; + + // If we just need to flush the link stack, patch an early return + if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) { + patch_instruction_site(&patch__flush_link_stack_return, PPC_INST_BLR); + no_count_cache_flush(); + return; + } + if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) { count_cache_flush_type = COUNT_CACHE_FLUSH_SW; pr_info("count-cache-flush: full software flush sequence enabled.\n"); @@ -391,7 +435,26 @@ static void toggle_count_cache_flush(bool enable) void setup_count_cache_flush(void) { - toggle_count_cache_flush(true); + bool enable = true; + + if (no_spectrev2 || cpu_mitigations_off()) { + if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) || + security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED)) + pr_warn("Spectre v2 mitigations not fully under software control, can't disable\n"); + + enable = false; + } + + /* + * There's no firmware feature flag/hypervisor bit to tell us we need to + * flush the link stack on context switch. So we set it here if we see + * either of the Spectre v2 mitigations that aim to protect userspace. + */ + if (security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED) || + security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) + security_ftr_set(SEC_FTR_FLUSH_LINK_STACK); + + toggle_count_cache_flush(enable); } #ifdef CONFIG_DEBUG_FS diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index fe6f3a285455..66a9987dc0f8 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -241,7 +241,7 @@ static u64 scan_dispatch_log(u64 stop_tb) * Accumulate stolen time by scanning the dispatch trace log. * Called on entry from user mode. */ -void accumulate_stolen_time(void) +void notrace accumulate_stolen_time(void) { u64 sst, ust; u8 save_soft_enabled = local_paca->soft_enabled; @@ -920,6 +920,7 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtom_clock_nsec = tk->wall_to_monotonic.tv_nsec; vdso_data->stamp_xtime = xt; vdso_data->stamp_sec_fraction = frac_sec; + vdso_data->hrtimer_res = hrtimer_resolution; smp_wmb(); ++(vdso_data->tb_update_count); } @@ -984,10 +985,14 @@ static void register_decrementer_clockevent(int cpu) *dec = decrementer_clockevent; dec->cpumask = cpumask_of(cpu); + clockevents_config_and_register(dec, ppc_tb_freq, 2, decrementer_max); + printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n", dec->name, dec->mult, dec->shift, cpu); - clockevents_register_device(dec); + /* Set values for KVM, see kvm_emulate_dec() */ + decrementer_clockevent.mult = dec->mult; + decrementer_clockevent.shift = dec->shift; } static void enable_large_decrementer(void) @@ -1035,18 +1040,7 @@ static void __init set_decrementer_max(void) static void __init init_decrementer_clockevent(void) { - int cpu = smp_processor_id(); - - clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4); - - decrementer_clockevent.max_delta_ns = - clockevent_delta2ns(decrementer_max, &decrementer_clockevent); - decrementer_clockevent.max_delta_ticks = decrementer_max; - decrementer_clockevent.min_delta_ns = - clockevent_delta2ns(2, &decrementer_clockevent); - decrementer_clockevent.min_delta_ticks = 2; - - register_decrementer_clockevent(cpu); + register_decrementer_clockevent(smp_processor_id()); } void secondary_cpu_time_init(void) diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S index 3745113fcc65..2a7eb5452aba 100644 --- a/arch/powerpc/kernel/vdso32/datapage.S +++ b/arch/powerpc/kernel/vdso32/datapage.S @@ -37,6 +37,7 @@ data_page_branch: mtlr r0 addi r3, r3, __kernel_datapage_offset-data_page_branch lwz r0,0(r3) + .cfi_restore lr add r3,r0,r3 blr .cfi_endproc diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 769c2624e0a6..03a65fee8020 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -139,6 +139,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) */ 99: li r0,__NR_clock_gettime + .cfi_restore lr sc blr .cfi_endproc @@ -159,12 +160,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) cror cr0*4+eq,cr0*4+eq,cr1*4+eq bne cr0,99f + mflr r12 + .cfi_register lr,r12 + bl __get_datapage@local /* get data page */ + lwz r5, CLOCK_HRTIMER_RES(r3) + mtlr r12 li r3,0 cmpli cr0,r4,0 crclr cr0*4+so beqlr - lis r5,CLOCK_REALTIME_RES@h - ori r5,r5,CLOCK_REALTIME_RES@l stw r3,TSPC32_TV_SEC(r4) stw r5,TSPC32_TV_NSEC(r4) blr diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S index 69c5af2b3c96..228a4a2383d6 100644 --- a/arch/powerpc/kernel/vdso64/cacheflush.S +++ b/arch/powerpc/kernel/vdso64/cacheflush.S @@ -39,7 +39,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) subf r8,r6,r4 /* compute length */ add r8,r8,r5 /* ensure we get enough */ lwz r9,CFG_DCACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ mtctr r8 @@ -56,7 +56,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache) subf r8,r6,r4 /* compute length */ add r8,r8,r5 lwz r9,CFG_ICACHE_LOGBLOCKSZ(r10) - srw. r8,r8,r9 /* compute line count */ + srd. r8,r8,r9 /* compute line count */ crclr cr0*4+so beqlr /* nothing to do? */ mtctr r8 diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S index abf17feffe40..bf9668691511 100644 --- a/arch/powerpc/kernel/vdso64/datapage.S +++ b/arch/powerpc/kernel/vdso64/datapage.S @@ -37,6 +37,7 @@ data_page_branch: mtlr r0 addi r3, r3, __kernel_datapage_offset-data_page_branch lwz r0,0(r3) + .cfi_restore lr add r3,r0,r3 blr .cfi_endproc diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 382021324883..c973378e1f2b 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -124,6 +124,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) */ 99: li r0,__NR_clock_gettime + .cfi_restore lr sc blr .cfi_endproc @@ -144,12 +145,15 @@ V_FUNCTION_BEGIN(__kernel_clock_getres) cror cr0*4+eq,cr0*4+eq,cr1*4+eq bne cr0,99f + mflr r12 + .cfi_register lr,r12 + bl V_LOCAL_FUNC(__get_datapage) + lwz r5, CLOCK_HRTIMER_RES(r3) + mtlr r12 li r3,0 cmpldi cr0,r4,0 crclr cr0*4+so beqlr - lis r5,CLOCK_REALTIME_RES@h - ori r5,r5,CLOCK_REALTIME_RES@l std r3,TSPC64_TV_SEC(r4) std r5,TSPC64_TV_NSEC(r4) blr diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index d38280b01ef0..1eda81249937 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -79,8 +79,11 @@ void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu) { if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) { ulong pc = kvmppc_get_pc(vcpu); + ulong lr = kvmppc_get_lr(vcpu); if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK); + if ((lr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS) + kvmppc_set_lr(vcpu, lr & ~SPLIT_HACK_MASK); vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK; } } diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 27a41695fcfd..7f8f2a0189df 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -160,6 +160,9 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, asm volatile("ptesync": : :"memory"); asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) + asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) + : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); asm volatile("ptesync": : :"memory"); } diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index 2c6cce8e7cfd..ef6a58838e7c 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -134,7 +134,6 @@ extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm, continue; kref_put(&stit->kref, kvm_spapr_tce_liobn_put); - return; } } } @@ -404,7 +403,7 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm, long ret; if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir))) - return H_HARDWARE; + return H_TOO_HARD; if (dir == DMA_NONE) return H_SUCCESS; @@ -434,15 +433,15 @@ long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, return H_TOO_HARD; if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, tbl->it_page_shift, &hpa))) - return H_HARDWARE; + return H_TOO_HARD; if (mm_iommu_mapped_inc(mem)) - return H_CLOSED; + return H_TOO_HARD; ret = iommu_tce_xchg(tbl, entry, &hpa, &dir); if (WARN_ON_ONCE(ret)) { mm_iommu_mapped_dec(mem); - return H_HARDWARE; + return H_TOO_HARD; } if (dir != DMA_NONE) diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 23d6d1592f11..c75e5664fe3d 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -264,14 +264,14 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl, if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, tbl->it_page_shift, &hpa))) - return H_HARDWARE; + return H_TOO_HARD; pua = (void *) vmalloc_to_phys(pua); if (WARN_ON_ONCE_RM(!pua)) return H_HARDWARE; if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem))) - return H_CLOSED; + return H_TOO_HARD; ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir); if (ret) { @@ -448,7 +448,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, rmap = (void *) vmalloc_to_phys(rmap); if (WARN_ON_ONCE_RM(!rmap)) - return H_HARDWARE; + return H_TOO_HARD; /* * Synchronize with the MMU notifier callbacks in diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7de26809340a..e4f81f014206 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1997,7 +1997,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, mutex_unlock(&kvm->lock); if (!vcore) - goto free_vcpu; + goto uninit_vcpu; spin_lock(&vcore->lock); ++vcore->num_threads; @@ -2014,6 +2014,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, return vcpu; +uninit_vcpu: + kvm_vcpu_uninit(vcpu); free_vcpu: kmem_cache_free(kvm_vcpu_cache, vcpu); out: diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 4962d537c186..669b547385f3 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -429,6 +429,37 @@ static inline int try_lock_tlbie(unsigned int *lock) return old == 0; } +static inline void fixup_tlbie_lpid(unsigned long rb_value, unsigned long lpid) +{ + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + /* Radix flush for a hash guest */ + + unsigned long rb,rs,prs,r,ric; + + rb = PPC_BIT(52); /* IS = 2 */ + rs = 0; /* lpid = 0 */ + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + ric = 0; /* RIC_FLSUH_TLB */ + + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie + */ + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), + "i"(ric), "r"(rs) : "memory"); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : + "r" (rb_value), "r" (lpid)); + } +} + static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, long npages, int global, bool need_sync) { @@ -448,6 +479,8 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues, asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : : "r" (rbvalues[i]), "r" (kvm->arch.lpid)); } + + fixup_tlbie_lpid(rbvalues[i - 1], kvm->arch.lpid); asm volatile("eieio; tlbsync; ptesync" : : : "memory"); kvm->arch.tlbie_lock = 0; } else { diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 663a398449b7..46ea42f40334 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -1445,6 +1446,10 @@ mc_cont: 1: #endif /* CONFIG_KVM_XICS */ + /* Possibly flush the link stack here. */ +1: nop + patch_site 1b patch__call_kvm_flush_link_stack + stw r12, STACK_SLOT_TRAP(r1) mr r3, r12 /* Increment exit count, poke other threads to exit */ @@ -1957,6 +1962,28 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) mtlr r0 blr +.balign 32 +.global kvm_flush_link_stack +kvm_flush_link_stack: + /* Save LR into r0 */ + mflr r0 + + /* Flush the link stack. On Power8 it's up to 32 entries in size. */ + .rept 32 + bl .+4 + .endr + + /* And on Power9 it's up to 64. */ +BEGIN_FTR_SECTION + .rept 32 + bl .+4 + .endr +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + + /* Restore LR */ + mtlr r0 + blr + /* * Check whether an HDSI is an HPTE not found fault or something else. * If it is an HPTE not found fault that is due to the guest accessing diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index e2ef16198456..f5bbb188f18d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -1482,10 +1482,12 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm, err = kvmppc_mmu_init(vcpu); if (err < 0) - goto uninit_vcpu; + goto free_shared_page; return vcpu; +free_shared_page: + free_page((unsigned long)vcpu->arch.shared); uninit_vcpu: kvm_vcpu_uninit(vcpu); free_shadow_vcpu: diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c index 5c4c93dcff19..f666d74f05f5 100644 --- a/arch/powerpc/mm/dump_hashpagetable.c +++ b/arch/powerpc/mm/dump_hashpagetable.c @@ -343,7 +343,7 @@ static unsigned long hpte_find(struct pg_state *st, unsigned long ea, int psize) /* Look in secondary table */ if (slot == -1) - slot = base_hpte_find(ea, psize, true, &v, &r); + slot = base_hpte_find(ea, psize, false, &v, &r); /* No entry found */ if (slot == -1) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 52863deed65d..5fc8a010fdf0 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -581,21 +581,22 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) switch (regs->trap) { case 0x300: case 0x380: - printk(KERN_ALERT "Unable to handle kernel paging request for " - "data at address 0x%08lx\n", regs->dar); + pr_alert("BUG: %s at 0x%08lx\n", + regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" : + "Unable to handle kernel data access", regs->dar); break; case 0x400: case 0x480: - printk(KERN_ALERT "Unable to handle kernel paging request for " - "instruction fetch\n"); + pr_alert("BUG: Unable to handle kernel instruction fetch%s", + regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n"); break; case 0x600: - printk(KERN_ALERT "Unable to handle kernel paging request for " - "unaligned access at address 0x%08lx\n", regs->dar); + pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n", + regs->dar); break; default: - printk(KERN_ALERT "Unable to handle kernel paging request for " - "unknown fault\n"); + pr_alert("BUG: Unable to handle unknown paging fault at 0x%08lx\n", + regs->dar); break; } printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n", diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 640cf566e986..a4b6efbf667b 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -104,6 +104,37 @@ static inline unsigned long ___tlbie(unsigned long vpn, int psize, return va; } +static inline void fixup_tlbie_vpn(unsigned long vpn, int psize, + int apsize, int ssize) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + /* Radix flush for a hash guest */ + + unsigned long rb,rs,prs,r,ric; + + rb = PPC_BIT(52); /* IS = 2 */ + rs = 0; /* lpid = 0 */ + prs = 0; /* partition scoped */ + r = 1; /* radix format */ + ric = 0; /* RIC_FLSUH_TLB */ + + /* + * Need the extra ptesync to make sure we don't + * re-order the tlbie + */ + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), + "i"(ric), "r"(rs) : "memory"); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + /* Need the extra ptesync to ensure we don't reorder tlbie*/ + asm volatile("ptesync": : :"memory"); + ___tlbie(vpn, psize, apsize, ssize); + } +} + static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) { unsigned long rb; @@ -181,6 +212,7 @@ static inline void tlbie(unsigned long vpn, int psize, int apsize, asm volatile("ptesync": : :"memory"); } else { __tlbie(vpn, psize, apsize, ssize); + fixup_tlbie_vpn(vpn, psize, apsize, ssize); asm volatile("eieio; tlbsync; ptesync": : :"memory"); } if (lock_tlbie && !use_local) @@ -674,7 +706,7 @@ static void native_hpte_clear(void) */ static void native_flush_hash_range(unsigned long number, int local) { - unsigned long vpn; + unsigned long vpn = 0; unsigned long hash, index, hidx, shift, slot; struct hash_pte *hptep; unsigned long hpte_v; @@ -746,6 +778,10 @@ static void native_flush_hash_range(unsigned long number, int local) __tlbie(vpn, psize, psize, ssize); } pte_iterate_hashed_end(); } + /* + * Just do one more with the last used values. + */ + fixup_tlbie_vpn(vpn, psize, psize, ssize); asm volatile("eieio; tlbsync; ptesync":::"memory"); if (lock_tlbie) diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 58c14749bb0c..387600ecea60 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -292,10 +292,18 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend, ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, HPTE_V_BOLTED, psize, psize, ssize); - + if (ret == -1) { + /* Try to remove a non bolted entry */ + ret = mmu_hash_ops.hpte_remove(hpteg); + if (ret != -1) + ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot, + HPTE_V_BOLTED, psize, psize, + ssize); + } if (ret < 0) break; + cond_resched(); #ifdef CONFIG_DEBUG_PAGEALLOC if (debug_pagealloc_enabled() && (paddr >> PAGE_SHIFT) < linear_map_hash_count) diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 30bf13b72e5e..3c5abfbbe60e 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -353,6 +353,14 @@ void __init mem_init(void) BUILD_BUG_ON(MMU_PAGE_COUNT > 16); #ifdef CONFIG_SWIOTLB + /* + * Some platforms (e.g. 85xx) limit DMA-able memory way below + * 4G. We force memblock to bottom-up mode to ensure that the + * memory allocated in swiotlb_init() is DMA-able. + * As it's the last memblock allocation, no need to reset it + * back to to-down. + */ + memblock_set_bottom_up(true); swiotlb_init(0); #endif diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 12f95b1f7d07..48ed34d52ffd 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -491,6 +491,7 @@ void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); } + /* do we need fixup here ?*/ asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/ppc_mmu_32.c index 2a049fb8523d..96c52271e9c2 100644 --- a/arch/powerpc/mm/ppc_mmu_32.c +++ b/arch/powerpc/mm/ppc_mmu_32.c @@ -52,7 +52,7 @@ struct batrange { /* stores address ranges mapped by BATs */ phys_addr_t v_block_mapped(unsigned long va) { int b; - for (b = 0; b < 4; ++b) + for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b) if (va >= bat_addrs[b].start && va < bat_addrs[b].limit) return bat_addrs[b].phys + (va - bat_addrs[b].start); return 0; @@ -64,7 +64,7 @@ phys_addr_t v_block_mapped(unsigned long va) unsigned long p_block_mapped(phys_addr_t pa) { int b; - for (b = 0; b < 4; ++b) + for (b = 0; b < ARRAY_SIZE(bat_addrs); ++b) if (pa >= bat_addrs[b].phys && pa < (bat_addrs[b].limit-bat_addrs[b].start) +bat_addrs[b].phys) diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 6d9bf014b3e7..2502fe3bfb54 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -315,7 +315,7 @@ void slb_initialize(void) #endif } - get_paca()->stab_rr = SLB_NUM_BOLTED; + get_paca()->stab_rr = SLB_NUM_BOLTED - 1; lflags = SLB_VSID_KERNEL | linear_llp; vflags = SLB_VSID_KERNEL | vmalloc_llp; diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 4b295cfd5f7e..41e782f126d6 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -23,6 +23,37 @@ #define RIC_FLUSH_PWC 1 #define RIC_FLUSH_ALL 2 +static inline void __tlbie_va(unsigned long va, unsigned long pid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb,rs,prs,r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = pid << PPC_BITLSHIFT(31); + prs = 1; /* process scoped */ + r = 1; /* raidx format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} + + +static inline void fixup_tlbie_va(unsigned long va, unsigned long pid, + unsigned long ap) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, 0, ap, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, ap, RIC_FLUSH_TLB); + } +} + static inline void __tlbiel_pid(unsigned long pid, int set, unsigned long ric) { @@ -68,22 +99,64 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); } -static inline void _tlbie_pid(unsigned long pid, unsigned long ric) +static inline void __tlbie_pid(unsigned long pid, unsigned long ric) { unsigned long rb,rs,prs,r; rb = PPC_BIT(53); /* IS = 1 */ rs = pid << PPC_BITLSHIFT(31); prs = 1; /* process scoped */ - r = 1; /* raidx format */ + r = 1; /* radix format */ - asm volatile("ptesync": : :"memory"); asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - asm volatile("eieio; tlbsync; ptesync": : :"memory"); trace_tlbie(0, 0, rb, rs, ric, prs, r); } +static inline void fixup_tlbie_pid(unsigned long pid) +{ + /* + * We can use any address for the invalidation, pick one which is + * probably unused as an optimisation. + */ + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_pid(0, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync": : :"memory"); + __tlbie_va(va, pid, mmu_get_ap(MMU_PAGE_64K), RIC_FLUSH_TLB); + } +} + +static inline void _tlbie_pid(unsigned long pid, unsigned long ric) +{ + asm volatile("ptesync": : :"memory"); + + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_pid(pid, RIC_FLUSH_TLB); + fixup_tlbie_pid(pid); + break; + case RIC_FLUSH_PWC: + __tlbie_pid(pid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_pid(pid, RIC_FLUSH_ALL); + fixup_tlbie_pid(pid); + } + asm volatile("eieio; tlbsync; ptesync": : :"memory"); +} + static inline void _tlbiel_va(unsigned long va, unsigned long pid, unsigned long ap, unsigned long ric) { @@ -105,19 +178,10 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid, static inline void _tlbie_va(unsigned long va, unsigned long pid, unsigned long ap, unsigned long ric) { - unsigned long rb,rs,prs,r; - - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = pid << PPC_BITLSHIFT(31); - prs = 1; /* process scoped */ - r = 1; /* raidx format */ - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + __tlbie_va(va, pid, ap, ric); + fixup_tlbie_va(va, pid, ap); asm volatile("eieio; tlbsync; ptesync": : :"memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); } /* diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 3188040022c4..1c37f08bcddd 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -95,7 +95,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { return 0; } -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } +static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { } static inline u32 perf_get_misc_flags(struct pt_regs *regs) { return 0; @@ -126,7 +126,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw) static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {} -static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} +static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {} static void pmao_restore_workaround(bool ebb) { } static bool use_ic(u64 event) { @@ -174,7 +174,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the * [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER. */ -static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) +static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { unsigned long mmcra = regs->dsisr; bool sdar_valid; @@ -435,7 +435,7 @@ static __u64 power_pmu_bhrb_to(u64 addr) } /* Processing BHRB entries */ -static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) +static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) { u64 val; u64 addr; @@ -463,8 +463,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) * exporting it to userspace (avoid exposure of regions * where we could have speculative execution) */ - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) && - is_kernel_addr(addr)) + if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) continue; /* Branches are read most recent first (ie. mfbhrb 0 is @@ -2077,12 +2076,12 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) - perf_get_data_addr(regs, &data.addr); + perf_get_data_addr(event, regs, &data.addr); if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) { struct cpu_hw_events *cpuhw; cpuhw = this_cpu_ptr(&cpu_hw_events); - power_pmu_bhrb_read(cpuhw); + power_pmu_bhrb_read(event, cpuhw); data.br_stack = &cpuhw->bhrb_stack; } diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 994e4392cac5..a0b4c22d963a 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -26,7 +26,7 @@ */ static DEFINE_MUTEX(nest_init_lock); static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc); -static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS]; +static struct imc_pmu **per_nest_pmu_arr; static cpumask_t nest_imc_cpumask; struct imc_pmu_ref *nest_imc_refc; static int nest_pmus; @@ -286,13 +286,14 @@ static struct imc_pmu_ref *get_nest_pmu_ref(int cpu) static void nest_change_cpu_context(int old_cpu, int new_cpu) { struct imc_pmu **pn = per_nest_pmu_arr; - int i; if (old_cpu < 0 || new_cpu < 0) return; - for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++) + while (*pn) { perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu); + pn++; + } } static int ppc_nest_imc_cpu_offline(unsigned int cpu) @@ -1188,6 +1189,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) if (nest_pmus == 1) { cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); } if (nest_pmus > 0) @@ -1236,6 +1238,13 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, return -ENOMEM; /* Needed for hotplug/migration */ + if (!per_nest_pmu_arr) { + per_nest_pmu_arr = kcalloc(get_max_nest_dev() + 1, + sizeof(struct imc_pmu *), + GFP_KERNEL); + if (!per_nest_pmu_arr) + return -ENOMEM; + } per_nest_pmu_arr[pmu_index] = pmu_ptr; break; case IMC_DOMAIN_CORE: @@ -1318,6 +1327,8 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id ret = nest_pmu_cpumask_init(); if (ret) { mutex_unlock(&nest_init_lock); + kfree(nest_imc_refc); + kfree(per_nest_pmu_arr); goto err_free; } } diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index cf9c35aa0cf4..7ecea7143e58 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -150,6 +150,14 @@ static bool is_thresh_cmp_valid(u64 event) return true; } +static unsigned int dc_ic_rld_quad_l1_sel(u64 event) +{ + unsigned int cache; + + cache = (event >> EVENT_CACHE_SEL_SHIFT) & MMCR1_DC_IC_QUAL_MASK; + return cache; +} + static inline u64 isa207_find_source(u64 idx, u32 sub_idx) { u64 ret = PERF_MEM_NA; @@ -290,10 +298,10 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp) * have a cache selector of zero. The bank selector (bit 3) is * irrelevant, as long as the rest of the value is 0. */ - if (cache & 0x7) + if (!cpu_has_feature(CPU_FTR_ARCH_300) && (cache & 0x7)) return -1; - } else if (event & EVENT_IS_L1) { + } else if (cpu_has_feature(CPU_FTR_ARCH_300) || (event & EVENT_IS_L1)) { mask |= CNST_L1_QUAL_MASK; value |= CNST_L1_QUAL_VAL(cache); } @@ -396,11 +404,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev, /* In continuous sampling mode, update SDAR on TLB miss */ mmcra_sdar_mode(event[i], &mmcra); - if (event[i] & EVENT_IS_L1) { - cache = event[i] >> EVENT_CACHE_SEL_SHIFT; - mmcr1 |= (cache & 1) << MMCR1_IC_QUAL_SHIFT; - cache >>= 1; - mmcr1 |= (cache & 1) << MMCR1_DC_QUAL_SHIFT; + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + cache = dc_ic_rld_quad_l1_sel(event[i]); + mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT; + } else { + if (event[i] & EVENT_IS_L1) { + cache = dc_ic_rld_quad_l1_sel(event[i]); + mmcr1 |= (cache) << MMCR1_DC_IC_QUAL_SHIFT; + } } if (is_event_marked(event[i])) { diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 6c737d675792..493e5cc5fa8a 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -232,8 +232,8 @@ #define MMCR1_COMBINE_SHIFT(pmc) (35 - ((pmc) - 1)) #define MMCR1_PMCSEL_SHIFT(pmc) (24 - (((pmc) - 1)) * 8) #define MMCR1_FAB_SHIFT 36 -#define MMCR1_DC_QUAL_SHIFT 47 -#define MMCR1_IC_QUAL_SHIFT 46 +#define MMCR1_DC_IC_QUAL_MASK 0x3 +#define MMCR1_DC_IC_QUAL_SHIFT 46 /* MMCR1 Combine bits macro for power9 */ #define p9_MMCR1_COMBINE_SHIFT(pmc) (38 - ((pmc - 1) * 2)) diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c index d75c9816a5c9..2b6589fe812d 100644 --- a/arch/powerpc/platforms/83xx/misc.c +++ b/arch/powerpc/platforms/83xx/misc.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include @@ -150,3 +151,19 @@ void __init mpc83xx_setup_arch(void) mpc83xx_setup_pci(); } + +int machine_check_83xx(struct pt_regs *regs) +{ + u32 mask = 1 << (31 - IPIC_MCP_WDT); + + if (!(regs->msr & SRR1_MCE_MCP) || !(ipic_get_mcp_status() & mask)) + return machine_check_generic(regs); + ipic_clear_mcp_status(mask); + + if (debugger_fault_handler(regs)) + return 1; + + die("Watchdog NMI Reset", regs, 0); + + return 1; +} diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index 8864065eba22..fa2965c96155 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -548,8 +548,8 @@ static void pnv_eeh_get_phb_diag(struct eeh_pe *pe) static int pnv_eeh_get_phb_state(struct eeh_pe *pe) { struct pnv_phb *phb = pe->phb->private_data; - u8 fstate; - __be16 pcierr; + u8 fstate = 0; + __be16 pcierr = 0; s64 rc; int result = 0; @@ -587,8 +587,8 @@ static int pnv_eeh_get_phb_state(struct eeh_pe *pe) static int pnv_eeh_get_pe_state(struct eeh_pe *pe) { struct pnv_phb *phb = pe->phb->private_data; - u8 fstate; - __be16 pcierr; + u8 fstate = 0; + __be16 pcierr = 0; s64 rc; int result; diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c index c9a6d4f3403c..cfbd242c3e01 100644 --- a/arch/powerpc/platforms/powernv/memtrace.c +++ b/arch/powerpc/platforms/powernv/memtrace.c @@ -99,6 +99,7 @@ static int change_memblock_state(struct memory_block *mem, void *arg) return 0; } +/* called with device_hotplug_lock held */ static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages) { u64 end_pfn = start_pfn + nr_pages - 1; @@ -139,6 +140,7 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) /* Trace memory needs to be aligned to the size */ end_pfn = round_down(end_pfn - nr_pages, nr_pages); + lock_device_hotplug(); for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) { if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) { /* @@ -147,7 +149,6 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) * we never try to remove memory that spans two iomem * resources. */ - lock_device_hotplug(); end_pfn = base_pfn + nr_pages; for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) { remove_memory(nid, pfn << PAGE_SHIFT, bytes); @@ -156,6 +157,7 @@ static u64 memtrace_alloc_node(u32 nid, u64 size) return base_pfn << PAGE_SHIFT; } } + unlock_device_hotplug(); return 0; } diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 7b93191dc2e3..dad23cd8a1de 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -159,6 +159,22 @@ static void disable_core_pmu_counters(void) put_online_cpus(); } +int get_max_nest_dev(void) +{ + struct device_node *node; + u32 pmu_units = 0, type; + + for_each_compatible_node(node, NULL, IMC_DTB_UNIT_COMPAT) { + if (of_property_read_u32(node, "type", &type)) + continue; + + if (type == IMC_TYPE_CHIP) + pmu_units++; + } + + return pmu_units; +} + static int opal_imc_counters_probe(struct platform_device *pdev) { struct device_node *imc_dev = pdev->dev.of_node; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index ddef22e00ddd..36ef504eeab3 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -598,8 +598,8 @@ static int pnv_ioda_unfreeze_pe(struct pnv_phb *phb, int pe_no, int opt) static int pnv_ioda_get_pe_state(struct pnv_phb *phb, int pe_no) { struct pnv_ioda_pe *slave, *pe; - u8 fstate, state; - __be16 pcierr; + u8 fstate = 0, state; + __be16 pcierr = 0; s64 rc; /* Sanity check on PE number */ @@ -1523,6 +1523,10 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) /* Reserve PE for each VF */ for (vf_index = 0; vf_index < num_vfs; vf_index++) { + int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); + int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); + struct pci_dn *vf_pdn; + if (pdn->m64_single_mode) pe_num = pdn->pe_num_map[vf_index]; else @@ -1535,13 +1539,11 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) pe->pbus = NULL; pe->parent_dev = pdev; pe->mve_number = -1; - pe->rid = (pci_iov_virtfn_bus(pdev, vf_index) << 8) | - pci_iov_virtfn_devfn(pdev, vf_index); + pe->rid = (vf_bus << 8) | vf_devfn; pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", hose->global_number, pdev->bus->number, - PCI_SLOT(pci_iov_virtfn_devfn(pdev, vf_index)), - PCI_FUNC(pci_iov_virtfn_devfn(pdev, vf_index)), pe_num); + PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); if (pnv_ioda_configure_pe(phb, pe)) { /* XXX What do we do here ? */ @@ -1555,6 +1557,15 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) list_add_tail(&pe->list, &phb->ioda.pe_list); mutex_unlock(&phb->ioda.pe_list_mutex); + /* associate this pe to it's pdn */ + list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { + if (vf_pdn->busno == vf_bus && + vf_pdn->devfn == vf_devfn) { + vf_pdn->pe_number = pe_num; + break; + } + } + pnv_pci_ioda2_setup_dma_pe(phb, pe); } } diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 5422f4a6317c..844ca1886063 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -600,8 +600,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) static void pnv_pci_config_check_eeh(struct pci_dn *pdn) { struct pnv_phb *phb = pdn->phb->private_data; - u8 fstate; - __be16 pcierr; + u8 fstate = 0; + __be16 pcierr = 0; unsigned int pe_no; s64 rc; @@ -978,16 +978,12 @@ void pnv_pci_dma_dev_setup(struct pci_dev *pdev) struct pnv_phb *phb = hose->private_data; #ifdef CONFIG_PCI_IOV struct pnv_ioda_pe *pe; - struct pci_dn *pdn; /* Fix the VF pdn PE number */ if (pdev->is_virtfn) { - pdn = pci_get_pdn(pdev); - WARN_ON(pdn->pe_number != IODA_INVALID_PE); list_for_each_entry(pe, &phb->ioda.pe_list, list) { if (pe->rid == ((pdev->bus->number << 8) | (pdev->devfn & 0xff))) { - pdn->pe_number = pe->pe_number; pe->pdev = pdev; break; } @@ -1118,6 +1114,23 @@ void __init pnv_pci_init(void) if (!firmware_has_feature(FW_FEATURE_OPAL)) return; +#ifdef CONFIG_PCIEPORTBUS + /* + * On PowerNV PCIe devices are (currently) managed in cooperation + * with firmware. This isn't *strictly* required, but there's enough + * assumptions baked into both firmware and the platform code that + * it's unwise to allow the portbus services to be used. + * + * We need to fix this eventually, but for now set this flag to disable + * the portbus driver. The AER service isn't required since that AER + * events are handled via EEH. The pciehp hotplug driver can't work + * without kernel changes (and portbus binding breaks pnv_php). The + * other services also require some thinking about how we're going + * to integrate them. + */ + pcie_ports_disabled = true; +#endif + /* Look for IODA IO-Hubs. */ for_each_compatible_node(np, NULL, "ibm,ioda-hub") { pnv_pci_init_ioda_hub(np); diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index 3db53e8aff92..9b2ef76578f0 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -664,7 +664,7 @@ static int update_flash_db(void) db_set_64(db, &os_area_db_id_rtc_diff, saved_params.rtc_diff); count = os_area_flash_write(db, sizeof(struct os_area_db), pos); - if (count < sizeof(struct os_area_db)) { + if (count < 0 || count < sizeof(struct os_area_db)) { pr_debug("%s: os_area_flash_write failed %zd\n", __func__, count); error = count < 0 ? count : -EIO; diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 560aefde06c0..5a8e4caac369 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c @@ -425,6 +425,10 @@ static struct bus_type cmm_subsys = { .dev_name = "cmm", }; +static void cmm_release_device(struct device *dev) +{ +} + /** * cmm_sysfs_register - Register with sysfs * @@ -440,6 +444,7 @@ static int cmm_sysfs_register(struct device *dev) dev->id = 0; dev->bus = &cmm_subsys; + dev->release = cmm_release_device; if ((rc = device_register(dev))) goto subsys_unregister; diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index f4e6565dd7a9..fb2876a84fbe 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -63,6 +63,10 @@ static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa) name = (char *)ccwa + be32_to_cpu(ccwa->name_offset); prop->name = kstrdup(name, GFP_KERNEL); + if (!prop->name) { + dlpar_free_cc_property(prop); + return NULL; + } prop->length = be32_to_cpu(ccwa->prop_length); value = (char *)ccwa + be32_to_cpu(ccwa->prop_offset); diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 18014cdeb590..ef6595153642 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -149,7 +149,7 @@ static int dtl_start(struct dtl *dtl) /* Register our dtl buffer with the hypervisor. The HV expects the * buffer size to be passed in the second word of the buffer */ - ((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES; + ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES); hwcpu = get_hard_smp_processor_id(dtl->cpu); addr = __pa(dtl->buf); @@ -184,7 +184,7 @@ static void dtl_stop(struct dtl *dtl) static u64 dtl_current_index(struct dtl *dtl) { - return lppaca_of(dtl->cpu).dtl_idx; + return be64_to_cpu(lppaca_of(dtl->cpu).dtl_idx); } #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 93e09f108ca1..a0847be0b035 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -295,6 +295,7 @@ static u32 lookup_lmb_associativity_index(struct of_drconf_cell *lmb) aa_index = find_aa_index(dr_node, ala_prop, lmb_assoc); + of_node_put(dr_node); dlpar_free_cc_nodes(lmb_node); return aa_index; } @@ -451,8 +452,10 @@ static bool lmb_is_removable(struct of_drconf_cell *lmb) for (i = 0; i < scns_per_block; i++) { pfn = PFN_DOWN(phys_addr); - if (!pfn_present(pfn)) + if (!pfn_present(pfn)) { + phys_addr += MIN_MEMORY_BLOCK_SIZE; continue; + } rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION); phys_addr += MIN_MEMORY_BLOCK_SIZE; @@ -787,7 +790,7 @@ static int dlpar_add_lmb(struct of_drconf_cell *lmb) nid = memory_add_physaddr_to_nid(lmb->base_addr); /* Add the memory */ - rc = add_memory(nid, lmb->base_addr, block_sz); + rc = __add_memory(nid, lmb->base_addr, block_sz); if (rc) { dlpar_remove_device_tree_lmb(lmb); return rc; diff --git a/arch/powerpc/platforms/pseries/hvconsole.c b/arch/powerpc/platforms/pseries/hvconsole.c index 74da18de853a..73ec15cd2708 100644 --- a/arch/powerpc/platforms/pseries/hvconsole.c +++ b/arch/powerpc/platforms/pseries/hvconsole.c @@ -62,7 +62,7 @@ EXPORT_SYMBOL(hvc_get_chars); * @vtermno: The vtermno or unit_address of the adapter from which the data * originated. * @buf: The character buffer that contains the character data to send to - * firmware. + * firmware. Must be at least 16 bytes, even if count is less than 16. * @count: Send this number of characters. */ int hvc_put_chars(uint32_t vtermno, const char *buf, int count) diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 7c181467d0ad..0e4e22dfa6b5 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -168,10 +168,10 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index) return be64_to_cpu(*tcep); } -static void tce_free_pSeriesLP(struct iommu_table*, long, long); +static void tce_free_pSeriesLP(unsigned long liobn, long, long); static void tce_freemulti_pSeriesLP(struct iommu_table*, long, long); -static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, +static int tce_build_pSeriesLP(unsigned long liobn, long tcenum, long tceshift, long npages, unsigned long uaddr, enum dma_data_direction direction, unsigned long attrs) @@ -182,25 +182,25 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, int ret = 0; long tcenum_start = tcenum, npages_start = npages; - rpn = __pa(uaddr) >> TCE_SHIFT; + rpn = __pa(uaddr) >> tceshift; proto_tce = TCE_PCI_READ; if (direction != DMA_TO_DEVICE) proto_tce |= TCE_PCI_WRITE; while (npages--) { - tce = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, tce); + tce = proto_tce | (rpn & TCE_RPN_MASK) << tceshift; + rc = plpar_tce_put((u64)liobn, (u64)tcenum << tceshift, tce); if (unlikely(rc == H_NOT_ENOUGH_RESOURCES)) { ret = (int)rc; - tce_free_pSeriesLP(tbl, tcenum_start, + tce_free_pSeriesLP(liobn, tcenum_start, (npages_start - (npages + 1))); break; } if (rc && printk_ratelimit()) { printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); - printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tindex = 0x%llx\n", (u64)liobn); printk("\ttcenum = 0x%llx\n", (u64)tcenum); printk("\ttce val = 0x%llx\n", tce ); dump_stack(); @@ -229,7 +229,8 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, unsigned long flags; if ((npages == 1) || !firmware_has_feature(FW_FEATURE_MULTITCE)) { - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, npages, uaddr, direction, attrs); } @@ -245,8 +246,9 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, /* If allocation fails, fall back to the loop implementation */ if (!tcep) { local_irq_restore(flags); - return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr, - direction, attrs); + return tce_build_pSeriesLP(tbl->it_index, tcenum, + tbl->it_page_shift, + npages, uaddr, direction, attrs); } __this_cpu_write(tce_page, tcep); } @@ -297,16 +299,16 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, return ret; } -static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages) +static void tce_free_pSeriesLP(unsigned long liobn, long tcenum, long npages) { u64 rc; while (npages--) { - rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0); + rc = plpar_tce_put((u64)liobn, (u64)tcenum << 12, 0); if (rc && printk_ratelimit()) { printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); - printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tindex = 0x%llx\n", (u64)liobn); printk("\ttcenum = 0x%llx\n", (u64)tcenum); dump_stack(); } @@ -321,7 +323,7 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n u64 rc; if (!firmware_has_feature(FW_FEATURE_MULTITCE)) - return tce_free_pSeriesLP(tbl, tcenum, npages); + return tce_free_pSeriesLP(tbl->it_index, tcenum, npages); rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages); @@ -436,6 +438,19 @@ static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn, u64 rc = 0; long l, limit; + if (!firmware_has_feature(FW_FEATURE_MULTITCE)) { + unsigned long tceshift = be32_to_cpu(maprange->tce_shift); + unsigned long dmastart = (start_pfn << PAGE_SHIFT) + + be64_to_cpu(maprange->dma_base); + unsigned long tcenum = dmastart >> tceshift; + unsigned long npages = num_pfn << PAGE_SHIFT >> tceshift; + void *uaddr = __va(start_pfn << PAGE_SHIFT); + + return tce_build_pSeriesLP(be32_to_cpu(maprange->liobn), + tcenum, tceshift, npages, (unsigned long) uaddr, + DMA_BIDIRECTIONAL, 0); + } + local_irq_disable(); /* to protect tcep and the page behind it */ tcep = __this_cpu_read(tce_page); diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index eb738ef57792..c0ae3847b8db 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -48,6 +48,7 @@ #include #include #include +#include #include "pseries.h" @@ -1036,3 +1037,56 @@ static int __init reserve_vrma_context_id(void) return 0; } machine_device_initcall(pseries, reserve_vrma_context_id); + +#ifdef CONFIG_DEBUG_FS +/* debugfs file interface for vpa data */ +static ssize_t vpa_file_read(struct file *filp, char __user *buf, size_t len, + loff_t *pos) +{ + int cpu = (long)filp->private_data; + struct lppaca *lppaca = &lppaca_of(cpu); + + return simple_read_from_buffer(buf, len, pos, lppaca, + sizeof(struct lppaca)); +} + +static const struct file_operations vpa_fops = { + .open = simple_open, + .read = vpa_file_read, + .llseek = default_llseek, +}; + +static int __init vpa_debugfs_init(void) +{ + char name[16]; + long i; + static struct dentry *vpa_dir; + + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) + return 0; + + vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root); + if (!vpa_dir) { + pr_warn("%s: can't create vpa root dir\n", __func__); + return -ENOMEM; + } + + /* set up the per-cpu vpa file*/ + for_each_possible_cpu(i) { + struct dentry *d; + + sprintf(name, "cpu-%ld", i); + + d = debugfs_create_file(name, 0400, vpa_dir, (void *)i, + &vpa_fops); + if (!d) { + pr_warn("%s: can't create per-cpu vpa file\n", + __func__); + return -ENOMEM; + } + } + + return 0; +} +machine_arch_initcall(pseries, vpa_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index 9739a055e5f7..2d3668acb6ef 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -23,6 +23,7 @@ #include #include #include "pseries.h" +#include "../../kernel/cacheinfo.h" static struct kobject *mobility_kobj; @@ -359,11 +360,20 @@ void post_mobility_fixup(void) */ cpus_read_lock(); + /* + * It's common for the destination firmware to replace cache + * nodes. Release all of the cacheinfo hierarchy's references + * before updating the device tree. + */ + cacheinfo_teardown(); + rc = pseries_devicetree_update(MIGRATION_SCOPE); if (rc) printk(KERN_ERR "Post-mobility device tree update " "failed: %d\n", rc); + cacheinfo_rebuild(); + cpus_read_unlock(); /* Possibly switch to a new RFI flush type */ diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index d86938260a86..fc778865a412 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -1195,6 +1195,8 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) if (tbl == NULL) return NULL; + kref_init(&tbl->it_kref); + of_parse_dma_window(dev->dev.of_node, dma_window, &tbl->it_index, &offset, &size); diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 818fc5351591..a820370883d9 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -967,6 +967,15 @@ static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw) xd->target = XIVE_INVALID_TARGET; irq_set_handler_data(virq, xd); + /* + * Turn OFF by default the interrupt being mapped. A side + * effect of this check is the mapping the ESB page of the + * interrupt in the Linux address space. This prevents page + * fault issues in the crash handler which masks all + * interrupts. + */ + xive_esb_read(xd, XIVE_ESB_SET_PQ_01); + return 0; } @@ -1008,12 +1017,13 @@ static void xive_ipi_eoi(struct irq_data *d) { struct xive_cpu *xc = __this_cpu_read(xive_cpu); - DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n", - d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio); - /* Handle possible race with unplug and drop stale IPIs */ if (!xc) return; + + DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n", + d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio); + xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data); xive_do_queue_eoi(xc); } diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 091f1d0d0af1..7fc41bf30fd5 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -293,20 +293,28 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data) data->esb_shift = esb_shift; data->trig_page = trig_page; + data->hw_irq = hw_irq; + /* * No chip-id for the sPAPR backend. This has an impact how we * pick a target. See xive_pick_irq_target(). */ data->src_chip = XIVE_INVALID_CHIP_ID; + /* + * When the H_INT_ESB flag is set, the H_INT_ESB hcall should + * be used for interrupt management. Skip the remapping of the + * ESB pages which are not available. + */ + if (data->flags & XIVE_IRQ_FLAG_H_INT_ESB) + return 0; + data->eoi_mmio = ioremap(data->eoi_page, 1u << data->esb_shift); if (!data->eoi_mmio) { pr_err("Failed to map EOI page for irq 0x%x\n", hw_irq); return -ENOMEM; } - data->hw_irq = hw_irq; - /* Full function page supports trigger */ if (flags & XIVE_SRC_TRIGGER) { data->trig_mmio = data->eoi_mmio; diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh index ec2d5c835170..d6c16e7faa38 100755 --- a/arch/powerpc/tools/relocs_check.sh +++ b/arch/powerpc/tools/relocs_check.sh @@ -23,7 +23,7 @@ objdump="$1" vmlinux="$2" bad_relocs=$( -"$objdump" -R "$vmlinux" | +$objdump -R "$vmlinux" | # Only look at relocation lines. grep -E '\:' | awk '{print $1}' ) BRANCHES=$( -"$objdump" -R "$vmlinux" -D --start-address=0xc000000000000000 \ +$objdump -R "$vmlinux" -D --start-address=0xc000000000000000 \ --stop-address=${end_intr} | grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" | grep -v '\<__start_initialization_multiplatform>' | diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 549e99e71112..a60c44b4a3e5 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for xmon -# Disable clang warning for using setjmp without setjmp.h header -subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header) +# Avoid clang warnings around longjmp/setjmp declarations +subdir-ccflags-y := -ffreestanding subdir-ccflags-$(CONFIG_PPC_WERROR) += -Werror @@ -13,6 +13,12 @@ UBSAN_SANITIZE := n ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -mno-sched-epilog,,$(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))) +ifdef CONFIG_CC_IS_CLANG +# clang stores addresses on the stack causing the frame size to blow +# out. See https://github.com/ClangBuiltLinux/linux/issues/252 +KBUILD_CFLAGS += -Wframe-larger-than=4096 +endif + ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y += xmon.o nonstdio.o spr_access.o diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 5a739588aa50..0885993b2fb4 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1830,15 +1830,14 @@ static void dump_300_sprs(void) printf("pidr = %.16lx tidr = %.16lx\n", mfspr(SPRN_PID), mfspr(SPRN_TIDR)); - printf("asdr = %.16lx psscr = %.16lx\n", - mfspr(SPRN_ASDR), hv ? mfspr(SPRN_PSSCR) - : mfspr(SPRN_PSSCR_PR)); + printf("psscr = %.16lx\n", + hv ? mfspr(SPRN_PSSCR) : mfspr(SPRN_PSSCR_PR)); if (!hv) return; - printf("ptcr = %.16lx\n", - mfspr(SPRN_PTCR)); + printf("ptcr = %.16lx asdr = %.16lx\n", + mfspr(SPRN_PTCR), mfspr(SPRN_ASDR)); #endif } @@ -3293,7 +3292,7 @@ void dump_segments(void) printf("sr0-15 ="); for (i = 0; i < 16; ++i) - printf(" %x", mfsrin(i)); + printf(" %x", mfsrin(i << 28)); printf("\n"); } #endif diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 41e3908b397f..5f2e272895ff 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -33,6 +33,8 @@ #define ARCH_HAS_PREPARE_HUGEPAGE #define ARCH_HAS_HUGEPAGE_CLEAR_FLUSH +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA + #include #ifndef __ASSEMBLY__ @@ -40,7 +42,7 @@ void __storage_key_init_range(unsigned long start, unsigned long end); static inline void storage_key_init_range(unsigned long start, unsigned long end) { - if (PAGE_DEFAULT_KEY) + if (PAGE_DEFAULT_KEY != 0) __storage_key_init_range(start, end); } diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index bbe99cb8219d..11857fea993c 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -70,7 +70,12 @@ static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long address) crst_table_init(table, _REGION2_ENTRY_EMPTY); return (p4d_t *) table; } -#define p4d_free(mm, p4d) crst_table_free(mm, (unsigned long *) p4d) + +static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) +{ + if (!mm_p4d_folded(mm)) + crst_table_free(mm, (unsigned long *) p4d); +} static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) { @@ -79,7 +84,12 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) crst_table_init(table, _REGION3_ENTRY_EMPTY); return (pud_t *) table; } -#define pud_free(mm, pud) crst_table_free(mm, (unsigned long *) pud) + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + if (!mm_pud_folded(mm)) + crst_table_free(mm, (unsigned long *) pud); +} static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) { @@ -97,6 +107,8 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long vmaddr) static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) { + if (mm_pmd_folded(mm)) + return; pgtable_pmd_page_dtor(virt_to_page(pmd)); crst_table_free(mm, (unsigned long *) pmd); } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index d7fe9838084d..328710b386e3 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1126,8 +1126,6 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { - if (!MACHINE_HAS_NX) - pte_val(entry) &= ~_PAGE_NOEXEC; if (pte_present(entry)) pte_val(entry) &= ~_PAGE_UNUSED; if (mm_has_pgste(mm)) @@ -1144,6 +1142,8 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) { pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); + if (!MACHINE_HAS_NX) + pte_val(__pte) &= ~_PAGE_NOEXEC; return pte_mkyoung(__pte); } diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h index 64539c221672..b6a4ce9dafaf 100644 --- a/arch/s390/include/asm/timex.h +++ b/arch/s390/include/asm/timex.h @@ -10,8 +10,9 @@ #ifndef _ASM_S390_TIMEX_H #define _ASM_S390_TIMEX_H -#include +#include #include +#include /* The value of the TOD clock for 1.1.1970. */ #define TOD_UNIX_EPOCH 0x7d91048bca000000ULL @@ -154,7 +155,7 @@ static inline void get_tod_clock_ext(char *clk) static inline unsigned long long get_tod_clock(void) { - unsigned char clk[STORE_CLOCK_EXT_SIZE]; + char clk[STORE_CLOCK_EXT_SIZE]; get_tod_clock_ext(clk); return *((unsigned long long *)&clk[1]); @@ -186,15 +187,18 @@ extern unsigned char tod_clock_base[16] __aligned(8); /** * get_clock_monotonic - returns current time in clock rate units * - * The caller must ensure that preemption is disabled. * The clock and tod_clock_base get changed via stop_machine. - * Therefore preemption must be disabled when calling this - * function, otherwise the returned value is not guaranteed to - * be monotonic. + * Therefore preemption must be disabled, otherwise the returned + * value is not guaranteed to be monotonic. */ static inline unsigned long long get_tod_clock_monotonic(void) { - return get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; + unsigned long long tod; + + preempt_disable_notrace(); + tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1]; + preempt_enable_notrace(); + return tod; } /** diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 689eae8d3859..bd7a19a0aecf 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -95,7 +95,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); __rc; \ }) -static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) +static __always_inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { unsigned long spec = 0x810000UL; int rc; @@ -125,7 +125,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) return rc; } -static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) +static __always_inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { unsigned long spec = 0x81UL; int rc; diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 2394557653d5..6d154069c962 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1930,10 +1930,11 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) ptr += sprintf(ptr, "%%c%i", value); else if (operand->flags & OPERAND_VR) ptr += sprintf(ptr, "%%v%i", value); - else if (operand->flags & OPERAND_PCREL) - ptr += sprintf(ptr, "%lx", (signed int) value - + addr); - else if (operand->flags & OPERAND_SIGNED) + else if (operand->flags & OPERAND_PCREL) { + void *pcrel = (void *)((int)value + addr); + + ptr += sprintf(ptr, "%px", pcrel); + } else if (operand->flags & OPERAND_SIGNED) ptr += sprintf(ptr, "%i", value); else ptr += sprintf(ptr, "%u", value); @@ -2005,7 +2006,7 @@ void show_code(struct pt_regs *regs) else *ptr++ = ' '; addr = regs->psw.addr + start - 32; - ptr += sprintf(ptr, "%016lx: ", addr); + ptr += sprintf(ptr, "%px: ", (void *)addr); if (start + opsize >= end) break; for (i = 0; i < opsize; i++) @@ -2033,7 +2034,7 @@ void print_fn_code(unsigned char *code, unsigned long len) opsize = insn_length(*code); if (opsize > len) break; - ptr += sprintf(ptr, "%p: ", code); + ptr += sprintf(ptr, "%px: ", code); for (i = 0; i < opsize; i++) ptr += sprintf(ptr, "%02x", code[i]); *ptr++ = '\t'; diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index b9d8fe45737a..8f8456816d83 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -69,18 +69,26 @@ DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); static ssize_t show_idle_time(struct device *dev, struct device_attribute *attr, char *buf) { + unsigned long long now, idle_time, idle_enter, idle_exit, in_idle; struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long long now, idle_time, idle_enter, idle_exit; unsigned int seq; do { - now = get_tod_clock(); seq = read_seqcount_begin(&idle->seqcount); idle_time = READ_ONCE(idle->idle_time); idle_enter = READ_ONCE(idle->clock_idle_enter); idle_exit = READ_ONCE(idle->clock_idle_exit); } while (read_seqcount_retry(&idle->seqcount, seq)); - idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; + in_idle = 0; + now = get_tod_clock(); + if (idle_enter) { + if (idle_exit) { + in_idle = idle_exit - idle_enter; + } else if (now > idle_enter) { + in_idle = now - idle_enter; + } + } + idle_time += in_idle; return sprintf(buf, "%llu\n", idle_time >> 12); } DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); @@ -88,17 +96,24 @@ DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); u64 arch_cpu_idle_time(int cpu) { struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long long now, idle_enter, idle_exit; + unsigned long long now, idle_enter, idle_exit, in_idle; unsigned int seq; do { - now = get_tod_clock(); seq = read_seqcount_begin(&idle->seqcount); idle_enter = READ_ONCE(idle->clock_idle_enter); idle_exit = READ_ONCE(idle->clock_idle_exit); } while (read_seqcount_retry(&idle->seqcount, seq)); - - return cputime_to_nsecs(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0); + in_idle = 0; + now = get_tod_clock(); + if (idle_enter) { + if (idle_exit) { + in_idle = idle_exit - idle_enter; + } else if (now > idle_enter) { + in_idle = now - idle_enter; + } + } + return cputime_to_nsecs(in_idle); } void arch_cpu_idle_enter(void) diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 27110f3294ed..0cfd5a83a1da 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -25,6 +25,12 @@ ENTRY(ftrace_stub) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) #define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) +#ifdef __PACK_STACK +/* allocate just enough for r14, r15 and backchain */ +#define TRACED_FUNC_FRAME_SIZE 24 +#else +#define TRACED_FUNC_FRAME_SIZE STACK_FRAME_OVERHEAD +#endif ENTRY(_mcount) BR_EX %r14 @@ -38,9 +44,16 @@ ENTRY(ftrace_caller) #ifndef CC_USING_HOTPATCH aghi %r0,MCOUNT_RETURN_FIXUP #endif - aghi %r15,-STACK_FRAME_SIZE + # allocate stack frame for ftrace_caller to contain traced function + aghi %r15,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) + stg %r0,(__SF_GPRS+8*8)(%r15) + stg %r15,(__SF_GPRS+9*8)(%r15) + # allocate pt_regs and stack frame for ftrace_trace_function + aghi %r15,-STACK_FRAME_SIZE stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) + aghi %r1,-TRACED_FUNC_FRAME_SIZE + stg %r1,__SF_BACKCHAIN(%r15) stg %r0,(STACK_PTREGS_PSW+8)(%r15) stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index d99155793c26..b652593d7de6 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -185,7 +185,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb, gfp_t gfp_flags) { int i, rc; - unsigned long *new, *tail; + unsigned long *new, *tail, *tail_prev = NULL; if (!sfb->sdbt || !sfb->tail) return -EINVAL; @@ -224,6 +224,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, sfb->num_sdbt++; /* Link current page to tail of chain */ *tail = (unsigned long)(void *) new + 1; + tail_prev = tail; tail = new; } @@ -233,10 +234,22 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, * issue, a new realloc call (if required) might succeed. */ rc = alloc_sample_data_block(tail, gfp_flags); - if (rc) + if (rc) { + /* Undo last SDBT. An SDBT with no SDB at its first + * entry but with an SDBT entry instead can not be + * handled by the interrupt handler code. + * Avoid this situation. + */ + if (tail_prev) { + sfb->num_sdbt--; + free_page((unsigned long) new); + tail = tail_prev; + } break; + } sfb->num_sdb++; tail++; + tail_prev = new = NULL; /* Allocated at least one SBD */ } /* Link sampling buffer to its origin */ @@ -1281,18 +1294,28 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) */ if (flush_all && done) break; - - /* If an event overflow happened, discard samples by - * processing any remaining sample-data-blocks. - */ - if (event_overflow) - flush_all = 1; } /* Account sample overflows in the event hardware structure */ if (sampl_overflow) OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) + sampl_overflow, 1 + num_sdb); + + /* Perf_event_overflow() and perf_event_account_interrupt() limit + * the interrupt rate to an upper limit. Roughly 1000 samples per + * task tick. + * Hitting this limit results in a large number + * of throttled REF_REPORT_THROTTLE entries and the samples + * are dropped. + * Slightly increase the interval to avoid hitting this limit. + */ + if (event_overflow) { + SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10); + debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n", + __func__, + DIV_ROUND_UP(SAMPL_RATE(hwc), 10)); + } + if (sampl_overflow || event_overflow) debug_sprintf_event(sfdbg, 4, "hw_perf_event_update: " "overflow stats: sample=%llu event=%llu\n", @@ -1610,14 +1633,17 @@ static int __init init_cpum_sampling_pmu(void) } sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); - if (!sfdbg) + if (!sfdbg) { pr_err("Registering for s390dbf failed\n"); + return -ENOMEM; + } debug_register_view(sfdbg, &debug_sprintf_view); err = register_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert); if (err) { pr_cpumsf_err(RS_INIT_FAILURE_ALRT); + debug_unregister(sfdbg); goto out; } @@ -1626,6 +1652,7 @@ static int __init init_cpum_sampling_pmu(void) pr_cpumsf_err(RS_INIT_FAILURE_PERF); unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert); + debug_unregister(sfdbg); goto out; } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 27258db640d7..b649a6538350 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -725,39 +725,67 @@ static void __ref smp_get_core_info(struct sclp_core_info *info, int early) static int smp_add_present_cpu(int cpu); -static int __smp_rescan_cpus(struct sclp_core_info *info, int sysfs_add) +static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, + bool configured, bool early) { struct pcpu *pcpu; - cpumask_t avail; - int cpu, nr, i, j; + int cpu, nr, i; u16 address; nr = 0; - cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); - cpu = cpumask_first(&avail); - for (i = 0; (i < info->combined) && (cpu < nr_cpu_ids); i++) { - if (sclp.has_core_type && info->core[i].type != boot_core_type) + if (sclp.has_core_type && core->type != boot_core_type) + return nr; + cpu = cpumask_first(avail); + address = core->core_id << smp_cpu_mt_shift; + for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { + if (pcpu_find_address(cpu_present_mask, address + i)) continue; - address = info->core[i].core_id << smp_cpu_mt_shift; - for (j = 0; j <= smp_cpu_mtid; j++) { - if (pcpu_find_address(cpu_present_mask, address + j)) - continue; - pcpu = pcpu_devices + cpu; - pcpu->address = address + j; - pcpu->state = - (cpu >= info->configured*(smp_cpu_mtid + 1)) ? - CPU_STATE_STANDBY : CPU_STATE_CONFIGURED; - smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); - set_cpu_present(cpu, true); - if (sysfs_add && smp_add_present_cpu(cpu) != 0) - set_cpu_present(cpu, false); - else - nr++; - cpu = cpumask_next(cpu, &avail); - if (cpu >= nr_cpu_ids) + pcpu = pcpu_devices + cpu; + pcpu->address = address + i; + if (configured) + pcpu->state = CPU_STATE_CONFIGURED; + else + pcpu->state = CPU_STATE_STANDBY; + smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN); + set_cpu_present(cpu, true); + if (!early && smp_add_present_cpu(cpu) != 0) + set_cpu_present(cpu, false); + else + nr++; + cpumask_clear_cpu(cpu, avail); + cpu = cpumask_next(cpu, avail); + } + return nr; +} + +static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) +{ + struct sclp_core_entry *core; + cpumask_t avail; + bool configured; + u16 core_id; + int nr, i; + + nr = 0; + cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask); + /* + * Add IPL core first (which got logical CPU number 0) to make sure + * that all SMT threads get subsequent logical CPU numbers. + */ + if (early) { + core_id = pcpu_devices[0].address >> smp_cpu_mt_shift; + for (i = 0; i < info->configured; i++) { + core = &info->core[i]; + if (core->core_id == core_id) { + nr += smp_add_core(core, &avail, true, early); break; + } } } + for (i = 0; i < info->combined; i++) { + configured = i < info->configured; + nr += smp_add_core(&info->core[i], &avail, configured, early); + } return nr; } @@ -803,7 +831,7 @@ void __init smp_detect_cpus(void) /* Add CPUs present at boot */ get_online_cpus(); - __smp_rescan_cpus(info, 0); + __smp_rescan_cpus(info, true); put_online_cpus(); memblock_free_early((unsigned long)info, sizeof(*info)); } @@ -1156,7 +1184,7 @@ int __ref smp_rescan_cpus(void) smp_get_core_info(info, 0); get_online_cpus(); mutex_lock(&smp_cpu_state_mutex); - nr = __smp_rescan_cpus(info, 1); + nr = __smp_rescan_cpus(info, false); mutex_unlock(&smp_cpu_state_mutex); put_online_cpus(); kfree(info); diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile index 101cadabfc89..6d87f800b4f2 100644 --- a/arch/s390/kernel/vdso32/Makefile +++ b/arch/s390/kernel/vdso32/Makefile @@ -25,9 +25,10 @@ obj-y += vdso32_wrapper.o extra-y += vdso32.lds CPPFLAGS_vdso32.lds += -P -C -U$(ARCH) -# Disable gcov profiling and ubsan for VDSO code +# Disable gcov profiling, ubsan and kasan for VDSO code GCOV_PROFILE := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index 36bbafcf4a77..4bc166b8c0cb 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -25,9 +25,10 @@ obj-y += vdso64_wrapper.o extra-y += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) -# Disable gcov profiling and ubsan for VDSO code +# Disable gcov profiling, ubsan and kasan for VDSO code GCOV_PROFILE := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n # Force dependency (incbin is bad) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 28f3796d23c8..61d25e2c82ef 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1913,7 +1913,7 @@ static int flic_ais_mode_get_all(struct kvm *kvm, struct kvm_device_attr *attr) return -EINVAL; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; mutex_lock(&fi->ais_lock); ais.simm = fi->simm; @@ -2214,7 +2214,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr) int ret = 0; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) return -EFAULT; @@ -2294,7 +2294,7 @@ static int flic_ais_mode_set_all(struct kvm *kvm, struct kvm_device_attr *attr) struct kvm_s390_ais_all ais; if (!test_kvm_facility(kvm, 72)) - return -ENOTSUPP; + return -EOPNOTSUPP; if (copy_from_user(&ais, (void __user *)attr->addr, sizeof(ais))) return -EFAULT; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index cab41bc2572f..46fee3f4dedd 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -361,19 +361,30 @@ static void kvm_s390_cpu_feat_init(void) int kvm_arch_init(void *opaque) { + int rc; + kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); if (!kvm_s390_dbf) return -ENOMEM; if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) { - debug_unregister(kvm_s390_dbf); - return -ENOMEM; + rc = -ENOMEM; + goto out_debug_unreg; } kvm_s390_cpu_feat_init(); /* Register floating interrupt controller interface. */ - return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); + rc = kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); + if (rc) { + pr_err("Failed to register FLIC rc=%d\n", rc); + goto out_debug_unreg; + } + return 0; + +out_debug_unreg: + debug_unregister(kvm_s390_dbf); + return rc; } void kvm_arch_exit(void) @@ -1926,13 +1937,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); if (!kvm->arch.sca) goto out_err; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); sca_offset += 16; if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) sca_offset = 0; kvm->arch.sca = (struct bsca_block *) ((char *) kvm->arch.sca + sca_offset); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); @@ -2373,9 +2384,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); vcpu->arch.sie_block->gcr[0] = 0xE0UL; vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; - /* make sure the new fpc will be lazily loaded */ - save_fpu_regs(); - current->thread.fpu.fpc = 0; + vcpu->run->s.regs.fpc = 0; vcpu->arch.sie_block->gbea = 1; vcpu->arch.sie_block->pp = 0; vcpu->arch.sie_block->fpf &= ~FPF_BPBC; @@ -3742,7 +3751,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, } case KVM_S390_STORE_STATUS: idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvm_s390_vcpu_store_status(vcpu, arg); + r = kvm_s390_store_status_unloaded(vcpu, arg); srcu_read_unlock(&vcpu->kvm->srcu, idx); break; case KVM_S390_SET_INITIAL_PSW: { diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 829c63dbc81a..c0e96bdac80a 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -307,16 +307,16 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, } if (write) { - len = *lenp; - if (copy_from_user(buf, buffer, - len > sizeof(buf) ? sizeof(buf) : len)) + len = min(*lenp, sizeof(buf)); + if (copy_from_user(buf, buffer, len)) return -EFAULT; - buf[sizeof(buf) - 1] = '\0'; + buf[len - 1] = '\0'; cmm_skip_blanks(buf, &p); nr = simple_strtoul(p, &p, 0); cmm_skip_blanks(p, &p); seconds = simple_strtoul(p, &p, 0); cmm_set_timeout(nr, seconds); + *ppos += *lenp; } else { len = sprintf(buf, "%ld %ld\n", cmm_timeout_pages, cmm_timeout_seconds); @@ -324,9 +324,9 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, len = *lenp; if (copy_to_user(buffer, buf, len)) return -EFAULT; + *lenp = len; + *ppos += len; } - *lenp = len; - *ppos += len; return 0; } diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index 05c8abd864f1..9bce54eac0b0 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -39,7 +39,8 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); head = compound_head(page); - if (!page_cache_get_speculative(head)) + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_get_speculative(head))) return 0; if (unlikely(pte_val(pte) != pte_val(*ptep))) { put_page(head); @@ -77,7 +78,8 @@ static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end); - if (!page_cache_add_speculative(head, refs)) { + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_add_speculative(head, refs))) { *nr -= refs; return 0; } @@ -151,7 +153,8 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, refs++; } while (addr += PAGE_SIZE, addr != end); - if (!page_cache_add_speculative(head, refs)) { + if (unlikely(WARN_ON_ONCE(page_ref_count(head) < 0) + || !page_cache_add_speculative(head, refs))) { *nr -= refs; return 0; } diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index e804090f4470..e19ea9ebe960 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -2,7 +2,7 @@ /* * IBM System z Huge TLB Page Support for Kernel. * - * Copyright IBM Corp. 2007,2016 + * Copyright IBM Corp. 2007,2020 * Author(s): Gerald Schaefer */ @@ -11,6 +11,9 @@ #include #include +#include +#include +#include /* * If the bit selected by single-bit bitmask "a" is set within "x", move @@ -243,3 +246,98 @@ static __init int setup_hugepagesz(char *opt) return 1; } __setup("hugepagesz=", setup_hugepagesz); + +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + + info.flags = 0; + info.length = len; + info.low_limit = current->mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + return vm_unmapped_area(&info); +} + +static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, + unsigned long addr0, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct vm_unmapped_area_info info; + unsigned long addr; + + info.flags = VM_UNMAPPED_AREA_TOPDOWN; + info.length = len; + info.low_limit = max(PAGE_SIZE, mmap_min_addr); + info.high_limit = current->mm->mmap_base; + info.align_mask = PAGE_MASK & ~huge_page_mask(h); + info.align_offset = 0; + addr = vm_unmapped_area(&info); + + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + if (addr & ~PAGE_MASK) { + VM_BUG_ON(addr != -ENOMEM); + info.flags = 0; + info.low_limit = TASK_UNMAPPED_BASE; + info.high_limit = TASK_SIZE; + addr = vm_unmapped_area(&info); + } + + return addr; +} + +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct hstate *h = hstate_file(file); + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc; + + if (len & ~huge_page_mask(h)) + return -EINVAL; + if (len > TASK_SIZE - mmap_min_addr) + return -ENOMEM; + + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + goto check_asce_limit; + } + + if (addr) { + addr = ALIGN(addr, huge_page_size(h)); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && + (!vma || addr + len <= vm_start_gap(vma))) + goto check_asce_limit; + } + + if (mm->get_unmapped_area == arch_get_unmapped_area) + addr = hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); + else + addr = hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); + if (addr & ~PAGE_MASK) + return addr; + +check_asce_limit: + if (addr + len > current->mm->context.asce_limit && + addr + len <= TASK_SIZE) { + rc = crst_table_upgrade(mm, addr + len); + if (rc) + return (unsigned long) rc; + } + return addr; +} diff --git a/arch/sh/include/cpu-sh2a/cpu/sh7269.h b/arch/sh/include/cpu-sh2a/cpu/sh7269.h index d516e5d48818..b887cc402b71 100644 --- a/arch/sh/include/cpu-sh2a/cpu/sh7269.h +++ b/arch/sh/include/cpu-sh2a/cpu/sh7269.h @@ -78,8 +78,15 @@ enum { GPIO_FN_WDTOVF, /* CAN */ - GPIO_FN_CTX1, GPIO_FN_CRX1, GPIO_FN_CTX0, GPIO_FN_CTX0_CTX1, - GPIO_FN_CRX0, GPIO_FN_CRX0_CRX1, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2, GPIO_FN_CRX2, + GPIO_FN_CTX1, GPIO_FN_CRX1, + GPIO_FN_CTX0, GPIO_FN_CRX0, + GPIO_FN_CTX0_CTX1, GPIO_FN_CRX0_CRX1, + GPIO_FN_CTX0_CTX1_CTX2, GPIO_FN_CRX0_CRX1_CRX2, + GPIO_FN_CTX2_PJ21, GPIO_FN_CRX2_PJ20, + GPIO_FN_CTX1_PJ23, GPIO_FN_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_PJ23, GPIO_FN_CRX0_CRX1_PJ22, + GPIO_FN_CTX0_CTX1_CTX2_PJ21, GPIO_FN_CRX0_CRX1_CRX2_PJ20, /* DMAC */ GPIO_FN_TEND0, GPIO_FN_DACK0, GPIO_FN_DREQ0, diff --git a/arch/sh/include/cpu-sh4/cpu/sh7734.h b/arch/sh/include/cpu-sh4/cpu/sh7734.h index 96f0246ad2f2..82b63208135a 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7734.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7734.h @@ -134,7 +134,7 @@ enum { GPIO_FN_EX_WAIT1, GPIO_FN_SD1_DAT0_A, GPIO_FN_DREQ2, GPIO_FN_CAN1_TX_C, GPIO_FN_ET0_LINK_C, GPIO_FN_ET0_ETXD5_A, GPIO_FN_EX_WAIT0, GPIO_FN_TCLK1_B, - GPIO_FN_RD_WR, GPIO_FN_TCLK0, + GPIO_FN_RD_WR, GPIO_FN_TCLK0, GPIO_FN_CAN_CLK_B, GPIO_FN_ET0_ETXD4, GPIO_FN_EX_CS5, GPIO_FN_SD1_CMD_A, GPIO_FN_ATADIR, GPIO_FN_QSSL_B, GPIO_FN_ET0_ETXD3_A, GPIO_FN_EX_CS4, GPIO_FN_SD1_WP_A, GPIO_FN_ATAWR, GPIO_FN_QMI_QIO1_B, diff --git a/arch/sparc/include/asm/cmpxchg_64.h b/arch/sparc/include/asm/cmpxchg_64.h index f71ef3729888..316faa0130ba 100644 --- a/arch/sparc/include/asm/cmpxchg_64.h +++ b/arch/sparc/include/asm/cmpxchg_64.h @@ -52,7 +52,12 @@ static inline unsigned long xchg64(__volatile__ unsigned long *m, unsigned long return val; } -#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) +#define xchg(ptr,x) \ +({ __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \ + __ret; \ +}) void __xchg_called_with_bad_pointer(void); diff --git a/arch/sparc/include/asm/parport.h b/arch/sparc/include/asm/parport.h index 05df5f043053..3c5a1c620f0f 100644 --- a/arch/sparc/include/asm/parport.h +++ b/arch/sparc/include/asm/parport.h @@ -21,6 +21,7 @@ */ #define HAS_DMA +#ifdef CONFIG_PARPORT_PC_FIFO static DEFINE_SPINLOCK(dma_spin_lock); #define claim_dma_lock() \ @@ -31,6 +32,7 @@ static DEFINE_SPINLOCK(dma_spin_lock); #define release_dma_lock(__flags) \ spin_unlock_irqrestore(&dma_spin_lock, __flags); +#endif static struct sparc_ebus_info { struct ebus_dma_info info; diff --git a/arch/sparc/include/uapi/asm/ipcbuf.h b/arch/sparc/include/uapi/asm/ipcbuf.h index 9d0d125500e2..084b8949ddff 100644 --- a/arch/sparc/include/uapi/asm/ipcbuf.h +++ b/arch/sparc/include/uapi/asm/ipcbuf.h @@ -15,19 +15,19 @@ struct ipc64_perm { - __kernel_key_t key; - __kernel_uid_t uid; - __kernel_gid_t gid; - __kernel_uid_t cuid; - __kernel_gid_t cgid; + __kernel_key_t key; + __kernel_uid32_t uid; + __kernel_gid32_t gid; + __kernel_uid32_t cuid; + __kernel_gid32_t cgid; #ifndef __arch64__ - unsigned short __pad0; + unsigned short __pad0; #endif - __kernel_mode_t mode; - unsigned short __pad1; - unsigned short seq; - unsigned long long __unused1; - unsigned long long __unused2; + __kernel_mode_t mode; + unsigned short __pad1; + unsigned short seq; + unsigned long long __unused1; + unsigned long long __unused2; }; #endif /* __SPARC_IPCBUF_H */ diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 5a2344574f39..4323dc4ae4c7 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -167,12 +167,14 @@ SECTIONS } PERCPU_SECTION(SMP_CACHE_BYTES) -#ifdef CONFIG_JUMP_LABEL . = ALIGN(PAGE_SIZE); .exit.text : { EXIT_TEXT } -#endif + + .exit.data : { + EXIT_DATA + } . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index adfb4581bd80..dfb1a62abe93 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1326,6 +1326,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; u32 opcode = 0, rs2; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_2_used = true; emit_loadimm(imm, tmp2, ctx); @@ -1364,6 +1367,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp = bpf2sparc[TMP_REG_1]; u32 opcode = 0, rs2; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + switch (BPF_SIZE(code)) { case BPF_W: opcode = ST32; @@ -1396,6 +1402,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; const u8 tmp3 = bpf2sparc[TMP_REG_3]; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_1_used = true; ctx->tmp_2_used = true; ctx->tmp_3_used = true; @@ -1416,6 +1425,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; const u8 tmp3 = bpf2sparc[TMP_REG_3]; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_1_used = true; ctx->tmp_2_used = true; ctx->tmp_3_used = true; diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug index 967d3109689f..39d44bfb241d 100644 --- a/arch/um/Kconfig.debug +++ b/arch/um/Kconfig.debug @@ -19,6 +19,7 @@ config GPROF config GCOV bool "Enable gcov support" depends on DEBUG_INFO + depends on !KCOV help This option allows developers to retrieve coverage data from a UML session. diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 366e57f5e8d6..7e524efed584 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -261,7 +261,7 @@ static irqreturn_t line_write_interrupt(int irq, void *data) if (err == 0) { spin_unlock(&line->lock); return IRQ_NONE; - } else if (err < 0) { + } else if ((err < 0) && (err != -EAGAIN)) { line->head = line->buffer; line->tail = line->buffer; } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c885051a708b..fdc42d2bf7d9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1852,6 +1852,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS If unsure, say y. +choice + prompt "TSX enable mode" + depends on CPU_SUP_INTEL + default X86_INTEL_TSX_MODE_OFF + help + Intel's TSX (Transactional Synchronization Extensions) feature + allows to optimize locking protocols through lock elision which + can lead to a noticeable performance boost. + + On the other hand it has been shown that TSX can be exploited + to form side channel attacks (e.g. TAA) and chances are there + will be more of those attacks discovered in the future. + + Therefore TSX is not enabled by default (aka tsx=off). An admin + might override this decision by tsx=on the command line parameter. + Even with TSX enabled, the kernel will attempt to enable the best + possible TAA mitigation setting depending on the microcode available + for the particular machine. + + This option allows to set the default tsx mode between tsx=on, =off + and =auto. See Documentation/admin-guide/kernel-parameters.txt for more + details. + + Say off if not sure, auto if TSX is in use but it should be used on safe + platforms or on if TSX is in use and the security aspect of tsx is not + relevant. + +config X86_INTEL_TSX_MODE_OFF + bool "off" + help + TSX is disabled if possible - equals to tsx=off command line parameter. + +config X86_INTEL_TSX_MODE_ON + bool "on" + help + TSX is always enabled on TSX capable HW - equals the tsx=on command + line parameter. + +config X86_INTEL_TSX_MODE_AUTO + bool "auto" + help + TSX is enabled on TSX capable HW that is believed to be safe against + side channel attacks- equals the tsx=auto command line parameter. +endchoice + config EFI bool "EFI runtime service support" depends on ACPI @@ -2670,8 +2715,7 @@ config OLPC config OLPC_XO1_PM bool "OLPC XO-1 Power Management" - depends on OLPC && MFD_CS5535 && PM_SLEEP - select MFD_CORE + depends on OLPC && MFD_CS5535=y && PM_SLEEP ---help--- Add support for poweroff and suspend of the OLPC XO-1 laptop. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 6293a8768a91..bec0952c5595 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -189,7 +189,7 @@ config HAVE_MMIOTRACE_SUPPORT config X86_DECODER_SELFTEST bool "x86 instruction decoder selftest" - depends on DEBUG_KERNEL && KPROBES + depends on DEBUG_KERNEL && INSTRUCTION_DECODER depends on !COMPILE_TEST ---help--- Perform x86 instruction decoder selftests at build time. diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 4b3d92a37c80..39fdede523f2 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -227,6 +227,11 @@ ENTRY(efi32_stub_entry) leal efi32_config(%ebp), %eax movl %eax, efi_config(%ebp) + /* Disable paging */ + movl %cr0, %eax + btrl $X86_CR0_PG_BIT, %eax + movl %eax, %cr0 + jmp startup_32 ENDPROC(efi32_stub_entry) #endif diff --git a/arch/x86/boot/compressed/pagetable.c b/arch/x86/boot/compressed/pagetable.c index e691ff734cb5..46573842d8c3 100644 --- a/arch/x86/boot/compressed/pagetable.c +++ b/arch/x86/boot/compressed/pagetable.c @@ -36,9 +36,6 @@ #define __PAGE_OFFSET __PAGE_OFFSET_BASE #include "../../mm/ident_map.c" -/* Used by pgtable.h asm code to force instruction serialization. */ -unsigned long __force_order; - /* Used to track our page table allocation area. */ struct alloc_pgt_data { unsigned char *pgt_buf; diff --git a/arch/x86/configs/x86_64_cuttlefish_defconfig b/arch/x86/configs/x86_64_cuttlefish_defconfig index 3d8854b6aa48..9d2d0f051085 100644 --- a/arch/x86/configs/x86_64_cuttlefish_defconfig +++ b/arch/x86/configs/x86_64_cuttlefish_defconfig @@ -12,19 +12,24 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y +CONFIG_IKHEADERS=y CONFIG_CGROUPS=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y -CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_CGROUP_BPF=y CONFIG_NAMESPACES=y +# CONFIG_UTS_NS is not set +# CONFIG_PID_NS is not set CONFIG_SCHED_TUNE=y CONFIG_BLK_DEV_INITRD=y -# CONFIG_RD_LZ4 is not set +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set CONFIG_KALLSYMS_ALL=y # CONFIG_PCSPKR_PLATFORM is not set CONFIG_BPF_SYSCALL=y @@ -33,13 +38,14 @@ CONFIG_EMBEDDED=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_OPROFILE=y -CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_CC_STACKPROTECTOR_STRONG=y CONFIG_REFCOUNT_FULL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y +CONFIG_BLK_INLINE_ENCRYPTION=y +CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_PARTITION_ADVANCED=y CONFIG_SMP=y CONFIG_HYPERVISOR_GUEST=y @@ -55,7 +61,6 @@ CONFIG_X86_MSR=y CONFIG_X86_CPUID=y CONFIG_KSM=y CONFIG_DEFAULT_MMAP_MIN_ADDR=65536 -CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_ZSMALLOC=y # CONFIG_MTRR is not set CONFIG_HZ_100=y @@ -75,6 +80,7 @@ CONFIG_ACPI_PROCFS_POWER=y # CONFIG_X86_PM_TIMER is not set CONFIG_CPU_FREQ_TIMES=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPUFREQ_DUMMY=y CONFIG_X86_ACPI_CPUFREQ=y CONFIG_PCI_MMCONFIG=y CONFIG_PCI_MSI=y @@ -162,6 +168,7 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=y CONFIG_NETFILTER_XT_MATCH_PKTTYPE=y CONFIG_NETFILTER_XT_MATCH_QUOTA=y CONFIG_NETFILTER_XT_MATCH_QUOTA2=y +CONFIG_NETFILTER_XT_MATCH_QUOTA2_LOG=y CONFIG_NETFILTER_XT_MATCH_SOCKET=y CONFIG_NETFILTER_XT_MATCH_STATE=y CONFIG_NETFILTER_XT_MATCH_STATISTIC=y @@ -208,15 +215,13 @@ CONFIG_NET_CLS_ACT=y CONFIG_VSOCKETS=y CONFIG_VIRTIO_VSOCKETS=y CONFIG_BPF_JIT=y -CONFIG_CAN=y -# CONFIG_CAN_BCM is not set -# CONFIG_CAN_GW is not set -CONFIG_CAN_VCAN=y CONFIG_CFG80211=y CONFIG_MAC80211=y CONFIG_RFKILL=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEBUG_DEVRES=y +CONFIG_GNSS=y +CONFIG_GNSS_CMDLINE_SERIAL=m CONFIG_OF=y CONFIG_OF_UNITTEST=y # CONFIG_PNP_DEBUG_MESSAGES is not set @@ -233,10 +238,10 @@ CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=y CONFIG_SCSI_CONSTANTS=y CONFIG_SCSI_SPI_ATTRS=y -CONFIG_SCSI_VIRTIO=y CONFIG_MD=y CONFIG_BLK_DEV_DM=y CONFIG_DM_CRYPT=y +CONFIG_DM_DEFAULT_KEY=y CONFIG_DM_SNAPSHOT=y CONFIG_DM_MIRROR=y CONFIG_DM_ZERO=y @@ -246,6 +251,7 @@ CONFIG_DM_VERITY_FEC=y CONFIG_DM_ANDROID_VERITY=y CONFIG_DM_BOW=y CONFIG_NETDEVICES=y +CONFIG_DUMMY=y CONFIG_NETCONSOLE=y CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_TUN=y @@ -282,7 +288,6 @@ CONFIG_USB_USBNET=y # CONFIG_WLAN_VENDOR_TI is not set # CONFIG_WLAN_VENDOR_ZYDAS is not set # CONFIG_WLAN_VENDOR_QUANTENNA is not set -CONFIG_MAC80211_HWSIM=y CONFIG_VIRT_WIFI=y CONFIG_INPUT_MOUSEDEV=y CONFIG_INPUT_EVDEV=y @@ -314,7 +319,7 @@ CONFIG_SERIAL_8250_NR_UARTS=48 CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y -CONFIG_VIRTIO_CONSOLE=y +CONFIG_SERIAL_DEV_BUS=y CONFIG_HW_RANDOM=y # CONFIG_HW_RANDOM_INTEL is not set # CONFIG_HW_RANDOM_AMD is not set @@ -322,6 +327,8 @@ CONFIG_HW_RANDOM=y CONFIG_HW_RANDOM_VIRTIO=y CONFIG_HPET=y # CONFIG_HPET_MMAP_DEFAULT is not set +CONFIG_TCG_TPM=y +CONFIG_TCG_VTPM_PROXY=y # CONFIG_DEVPORT is not set # CONFIG_ACPI_I2C_OPREGION is not set # CONFIG_I2C_COMPAT is not set @@ -378,6 +385,7 @@ CONFIG_HID_MAGICMOUSE=y CONFIG_HID_MICROSOFT=y CONFIG_HID_MONTEREY=y CONFIG_HID_MULTITOUCH=y +CONFIG_HID_NINTENDO=y CONFIG_HID_NTRIG=y CONFIG_HID_ORTEK=y CONFIG_HID_PANTHERLORD=y @@ -390,6 +398,7 @@ CONFIG_HID_SAITEK=y CONFIG_HID_SAMSUNG=y CONFIG_HID_SONY=y CONFIG_HID_SPEEDLINK=y +CONFIG_HID_STEAM=y CONFIG_HID_SUNPLUS=y CONFIG_HID_GREENASIA=y CONFIG_GREENASIA_FF=y @@ -415,9 +424,8 @@ CONFIG_USB_CONFIGFS_UEVENT=y CONFIG_USB_CONFIGFS_F_MIDI=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_TEST=y -CONFIG_SW_SYNC=y CONFIG_VIRTIO_PCI=y -CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_PMEM=y CONFIG_VIRTIO_INPUT=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y @@ -430,6 +438,9 @@ CONFIG_ION_SYSTEM_HEAP=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_ANDROID=y CONFIG_ANDROID_BINDER_IPC=y +CONFIG_ANDROID_BINDERFS=y +CONFIG_LIBNVDIMM=y +# CONFIG_ND_BLK is not set # CONFIG_FIRMWARE_MEMMAP is not set CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y @@ -438,6 +449,9 @@ CONFIG_EXT4_ENCRYPTION=y CONFIG_F2FS_FS=y CONFIG_F2FS_FS_SECURITY=y CONFIG_F2FS_FS_ENCRYPTION=y +CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y +CONFIG_FS_VERITY=y +CONFIG_FS_VERITY_BUILTIN_SIGNATURES=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set @@ -445,6 +459,7 @@ CONFIG_QFMT_V2=y CONFIG_AUTOFS4_FS=y CONFIG_FUSE_FS=y CONFIG_OVERLAY_FS=y +CONFIG_INCREMENTAL_FS=y CONFIG_MSDOS_FS=y CONFIG_VFAT_FS=y CONFIG_PROC_KCORE=y @@ -460,6 +475,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y +CONFIG_UNICODE=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_WARN_DEPRECATED is not set @@ -475,26 +491,23 @@ CONFIG_PANIC_TIMEOUT=5 CONFIG_SCHEDSTATS=y CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_ENABLE_DEFAULT_TRACERS=y +CONFIG_TEST_MEMINIT=y +CONFIG_TEST_STACKINIT=y CONFIG_IO_DELAY_NONE=y -CONFIG_DEBUG_BOOT_PARAMS=y CONFIG_OPTIMIZE_INLINING=y CONFIG_UNWINDER_FRAME_POINTER=y -CONFIG_SECURITY_PERF_EVENTS_RESTRICT=y CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_PATH=y CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 -CONFIG_CRYPTO_RSA=y +CONFIG_INIT_STACK_ALL=y +CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set CONFIG_CRYPTO_ADIANTUM=y CONFIG_CRYPTO_AES_NI_INTEL=y CONFIG_CRYPTO_LZ4=y CONFIG_CRYPTO_ZSTD=y -CONFIG_CRYPTO_DEV_VIRTIO=y -CONFIG_ASYMMETRIC_KEY_TYPE=y -CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=y -CONFIG_X509_CERTIFICATE_PARSER=y -CONFIG_SYSTEM_TRUSTED_KEYRING=y +# CONFIG_CRYPTO_DEV_VIRTIO is not set CONFIG_SYSTEM_TRUSTED_KEYS="verity_dev_keys.x509" diff --git a/arch/x86/entry/vdso/vdso32-setup.c b/arch/x86/entry/vdso/vdso32-setup.c index 42d4c89f990e..ddff0ca6f509 100644 --- a/arch/x86/entry/vdso/vdso32-setup.c +++ b/arch/x86/entry/vdso/vdso32-setup.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 27ade3cb6482..c3ec535fd36b 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -4,12 +4,14 @@ #include #include #include +#include #include #include #include "../perf_event.h" -static DEFINE_PER_CPU(unsigned int, perf_nmi_counter); +static DEFINE_PER_CPU(unsigned long, perf_nmi_tstamp); +static unsigned long perf_nmi_window; static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] @@ -243,6 +245,7 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, @@ -640,11 +643,12 @@ static void amd_pmu_disable_event(struct perf_event *event) * handler when multiple PMCs are active or PMC overflow while handling some * other source of an NMI. * - * Attempt to mitigate this by using the number of active PMCs to determine - * whether to return NMI_HANDLED if the perf NMI handler did not handle/reset - * any PMCs. The per-CPU perf_nmi_counter variable is set to a minimum of the - * number of active PMCs or 2. The value of 2 is used in case an NMI does not - * arrive at the LAPIC in time to be collapsed into an already pending NMI. + * Attempt to mitigate this by creating an NMI window in which un-handled NMIs + * received during this window will be claimed. This prevents extending the + * window past when it is possible that latent NMIs should be received. The + * per-CPU perf_nmi_tstamp will be set to the window end time whenever perf has + * handled a counter. When an un-handled NMI is received, it will be claimed + * only if arriving within that window. */ static int amd_pmu_handle_irq(struct pt_regs *regs) { @@ -662,21 +666,19 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) handled = x86_pmu_handle_irq(regs); /* - * If a counter was handled, record the number of possible remaining - * NMIs that can occur. + * If a counter was handled, record a timestamp such that un-handled + * NMIs will be claimed if arriving within that window. */ if (handled) { - this_cpu_write(perf_nmi_counter, - min_t(unsigned int, 2, active)); + this_cpu_write(perf_nmi_tstamp, + jiffies + perf_nmi_window); return handled; } - if (!this_cpu_read(perf_nmi_counter)) + if (time_after(jiffies, this_cpu_read(perf_nmi_tstamp))) return NMI_DONE; - this_cpu_dec(perf_nmi_counter); - return NMI_HANDLED; } @@ -908,6 +910,9 @@ static int __init amd_core_pmu_init(void) if (!boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) return 0; + /* Avoid calulating the value each time in the NMI handler */ + perf_nmi_window = msecs_to_jiffies(100); + switch (boot_cpu_data.x86) { case 0x15: pr_cont("Fam15h "); diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 7a86fbc07ddc..f24e9adaa316 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -389,7 +389,8 @@ static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, struct hw_perf_event *hwc, u64 config) { config &= ~perf_ibs->cnt_mask; - wrmsrl(hwc->config_base, config); + if (boot_cpu_data.x86 == 0x10) + wrmsrl(hwc->config_base, config); config &= ~perf_ibs->enable_mask; wrmsrl(hwc->config_base, config); } @@ -564,7 +565,8 @@ static struct perf_ibs perf_ibs_op = { }, .msr = MSR_AMD64_IBSOPCTL, .config_mask = IBS_OP_CONFIG_MASK, - .cnt_mask = IBS_OP_MAX_CNT, + .cnt_mask = IBS_OP_MAX_CNT | IBS_OP_CUR_CNT | + IBS_OP_CUR_CNT_RAND, .enable_mask = IBS_OP_ENABLE, .valid_mask = IBS_OP_VAL, .max_period = IBS_OP_MAX_CNT << 4, @@ -625,7 +627,7 @@ fail: if (event->attr.sample_type & PERF_SAMPLE_RAW) offset_max = perf_ibs->offset_max; else if (check_rip) - offset_max = 2; + offset_max = 3; else offset_max = 1; do { diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index baa7e36073f9..604a8558752d 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -193,20 +193,18 @@ static int amd_uncore_event_init(struct perf_event *event) /* * NB and Last level cache counters (MSRs) are shared across all cores - * that share the same NB / Last level cache. Interrupts can be directed - * to a single target core, however, event counts generated by processes - * running on other cores cannot be masked out. So we do not support - * sampling and per-thread events. + * that share the same NB / Last level cache. On family 16h and below, + * Interrupts can be directed to a single target core, however, event + * counts generated by processes running on other cores cannot be masked + * out. So we do not support sampling and per-thread events via + * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: */ - if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) - return -EINVAL; /* NB and Last level cache counters do not have usr/os/guest/host bits */ if (event->attr.exclude_user || event->attr.exclude_kernel || event->attr.exclude_host || event->attr.exclude_guest) return -EINVAL; - /* and we do not enable counter overflow interrupts */ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1; @@ -314,6 +312,7 @@ static struct pmu amd_nb_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, }; static struct pmu amd_llc_pmu = { @@ -324,6 +323,7 @@ static struct pmu amd_llc_pmu = { .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, }; static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 6ed99de2ddf5..c1f7b3cb84a9 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -375,7 +375,7 @@ int x86_add_exclusive(unsigned int what) * LBR and BTS are still mutually exclusive. */ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) - return 0; + goto out; if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) { mutex_lock(&pmc_reserve_mutex); @@ -387,6 +387,7 @@ int x86_add_exclusive(unsigned int what) mutex_unlock(&pmc_reserve_mutex); } +out: atomic_inc(&active_events); return 0; @@ -397,11 +398,15 @@ fail_unlock: void x86_del_exclusive(unsigned int what) { + atomic_dec(&active_events); + + /* + * See the comment in x86_add_exclusive(). + */ if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt) return; atomic_dec(&x86_pmu.lbr_exclusive[what]); - atomic_dec(&active_events); } int x86_setup_perfctr(struct perf_event *event) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 24ffa1e88cf9..5a1cd9c3addf 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -71,16 +71,26 @@ struct bts_buffer { static struct pmu bts_pmu; +static int buf_nr_pages(struct page *page) +{ + if (!PagePrivate(page)) + return 1; + + return 1 << page_private(page); +} + static size_t buf_size(struct page *page) { - return 1 << (PAGE_SHIFT + page_private(page)); + return buf_nr_pages(page) * PAGE_SIZE; } static void * -bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) +bts_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool overwrite) { struct bts_buffer *buf; struct page *page; + int cpu = event->cpu; int node = (cpu == -1) ? cpu : cpu_to_node(cpu); unsigned long offset; size_t size = nr_pages << PAGE_SHIFT; @@ -89,9 +99,7 @@ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) /* count all the high order buffers */ for (pg = 0, nbuf = 0; pg < nr_pages;) { page = virt_to_page(pages[pg]); - if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1)) - return NULL; - pg += 1 << page_private(page); + pg += buf_nr_pages(page); nbuf++; } @@ -115,7 +123,7 @@ bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite) unsigned int __nr_pages; page = virt_to_page(pages[pg]); - __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1; + __nr_pages = buf_nr_pages(page); buf->buf[nbuf].page = page; buf->buf[nbuf].offset = offset; buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0); @@ -555,9 +563,11 @@ static int bts_event_init(struct perf_event *event) * Note that the default paranoia setting permits unprivileged * users to profile the kernel. */ - if (event->attr.exclude_kernel && perf_paranoid_kernel() && - !capable(CAP_SYS_ADMIN)) - return -EACCES; + if (event->attr.exclude_kernel) { + ret = perf_allow_kernel(&event->attr); + if (ret) + return ret; + } if (x86_add_exclusive(x86_lbr_exclusive_bts)) return -EBUSY; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 4a60ed8c4413..0307e34d2272 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3087,8 +3087,9 @@ static int intel_pmu_hw_config(struct perf_event *event) if (x86_pmu.version < 3) return -EINVAL; - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + ret = perf_allow_cpu(&event->attr); + if (ret) + return ret; event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 3310f9f6c3e1..550b7814ef92 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1368,6 +1368,8 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count) old = ((s64)(prev_raw_count << shift) >> shift); local64_add(new - old + count * period, &event->count); + local64_set(&hwc->period_left, -new); + perf_event_update_userpage(event); return 0; diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index d32c0eed38ca..4f9ac72968db 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -776,8 +776,9 @@ static int p4_validate_raw_event(struct perf_event *event) * the user needs special permissions to be able to use it */ if (p4_ht_active() && p4_event_bind_map[v].shared) { - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + v = perf_allow_cpu(&event->attr); + if (v) + return v; } /* ESCR EventMask bits may be invalid */ diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 81fd41d5a0d9..5af568cc4c8f 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1104,10 +1104,11 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, unsigned long nr_pages, * Return: Our private PT buffer structure. */ static void * -pt_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool snapshot) +pt_buffer_setup_aux(struct perf_event *event, void **pages, + int nr_pages, bool snapshot) { struct pt_buffer *buf; - int node, ret; + int node, ret, cpu = event->cpu; if (!nr_pages) return NULL; @@ -1190,7 +1191,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters) if (!filter->range || !filter->size) return -EOPNOTSUPP; - if (!filter->inode) { + if (!filter->path.dentry) { if (!valid_kernel_ip(filter->offset)) return -EINVAL; @@ -1208,7 +1209,8 @@ static int pt_event_addr_filters_validate(struct list_head *filters) static void pt_event_addr_filters_sync(struct perf_event *event) { struct perf_addr_filters_head *head = perf_event_addr_filters(event); - unsigned long msr_a, msr_b, *offs = event->addr_filters_offs; + unsigned long msr_a, msr_b; + struct perf_addr_filter_range *fr = event->addr_filter_ranges; struct pt_filters *filters = event->hw.addr_filters; struct perf_addr_filter *filter; int range = 0; @@ -1217,12 +1219,12 @@ static void pt_event_addr_filters_sync(struct perf_event *event) return; list_for_each_entry(filter, &head->list, entry) { - if (filter->inode && !offs[range]) { + if (filter->path.dentry && !fr[range].start) { msr_a = msr_b = 0; } else { /* apply the offset */ - msr_a = filter->offset + offs[range]; - msr_b = filter->size + msr_a - 1; + msr_a = fr[range].start; + msr_b = msr_a + fr[range].size - 1; } filters->filter[range].msr_a = msr_a; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 2e9d58cc371e..924fa9c07368 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -17,6 +17,7 @@ * */ +#include #include #include #include @@ -101,6 +102,22 @@ static int hv_cpu_init(unsigned int cpu) return 0; } +static int __init hv_pci_init(void) +{ + int gen2vm = efi_enabled(EFI_BOOT); + + /* + * For Generation-2 VM, we exit from pci_arch_init() by returning 0. + * The purpose is to suppress the harmless warning: + * "PCI: Fatal: No config space access function found" + */ + if (gen2vm) + return 0; + + /* For Generation-1 VM, we'll proceed in pci_arch_init(). */ + return 1; +} + /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -108,7 +125,7 @@ static int hv_cpu_init(unsigned int cpu) * 1. Setup the hypercall page. * 2. Register Hyper-V specific clocksource. */ -void hyperv_init(void) +void __init hyperv_init(void) { u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; @@ -154,6 +171,8 @@ void hyperv_init(void) hyper_alloc_mmu(); + x86_init.pci.arch_init = hv_pci_init; + /* * Register Hyper-V specific clocksource. */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 14357354cd28..b4bef819d5d5 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -388,5 +388,7 @@ #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h index a7adb2bfbf0b..6b8ad6fa3979 100644 --- a/arch/x86/include/asm/crash.h +++ b/arch/x86/include/asm/crash.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_CRASH_H #define _ASM_X86_CRASH_H +struct kimage; + int crash_load_segments(struct kimage *image); int crash_copy_backup_region(struct kimage *image); int crash_setup_memmap_entries(struct kimage *image, diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 6390bd8c141b..5e12b2319d7a 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -159,7 +159,7 @@ extern pte_t *kmap_pte; extern pte_t *pkmap_page_table; void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); -void native_set_fixmap(enum fixed_addresses idx, +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, phys_addr_t phys, pgprot_t flags); #ifndef CONFIG_PARAVIRT diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 038e4b63b56b..5cd7d4e1579d 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -6,7 +6,7 @@ * "Big Core" Processors (Branded as Core, Xeon, etc...) * * The "_X" parts are generally the EP and EX Xeons, or the - * "Extreme" ones, like Broadwell-E. + * "Extreme" ones, like Broadwell-E, or Atom microserver. * * Things ending in "2" are usually because we have no better * name for them. There's no processor called "SILVERMONT2". @@ -68,6 +68,7 @@ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */ #define INTEL_FAM6_ATOM_GOLDMONT_X 0x5F /* Denverton */ #define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */ +#define INTEL_FAM6_ATOM_TREMONT_X 0x86 /* Jacobsville */ /* Xeon Phi */ diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index f327236f0fa7..5125fca472bb 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -67,7 +67,7 @@ struct kimage; /* Memory to backup during crash kdump */ #define KEXEC_BACKUP_SRC_START (0UL) -#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */ +#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */ /* * CPU does not save ss and sp on stack if execution is already diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 94af073476ce..2cdf654ed132 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -277,6 +277,7 @@ struct kvm_rmap_head { struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; + struct list_head lpage_disallowed_link; /* * The following two entries are used to key the shadow page in the @@ -289,6 +290,7 @@ struct kvm_mmu_page { /* hold the gfn of each spte inside spt */ gfn_t *gfns; bool unsync; + bool lpage_disallowed; /* Can't be replaced by an equiv large page */ int root_count; /* Currently serving as active root */ unsigned int unsync_children; struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ @@ -779,6 +781,7 @@ struct kvm_arch { */ struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; + struct list_head lpage_disallowed_mmu_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; @@ -854,6 +857,8 @@ struct kvm_arch { bool x2apic_format; bool x2apic_broadcast_quirk_disabled; + + struct task_struct *nx_lpage_recovery_thread; }; struct kvm_vm_stat { @@ -867,6 +872,7 @@ struct kvm_vm_stat { ulong mmu_unsync; ulong remote_tlb_flush; ulong lpages; + ulong nx_lpage_splits; ulong max_mmu_page_hash_collisions; }; @@ -973,7 +979,7 @@ struct kvm_x86_ops { unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); - void (*tlb_flush)(struct kvm_vcpu *vcpu); + void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa); void (*run)(struct kvm_vcpu *vcpu); int (*handle_exit)(struct kvm_vcpu *vcpu); @@ -998,9 +1004,9 @@ struct kvm_x86_ops { void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); - void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); - void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index fda3bf75de6c..5761a86b88e0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -84,6 +84,18 @@ * Microarchitectural Data * Sampling (MDS) vulnerabilities. */ +#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /* + * The processor is not susceptible to a + * machine check error due to modifying the + * code page size along with either the + * physical address or cache type + * without TLB invalidation. + */ +#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ +#define ARCH_CAP_TAA_NO BIT(8) /* + * Not susceptible to + * TSX Async Abort (TAA) vulnerabilities. + */ #define MSR_IA32_FLUSH_CMD 0x0000010b #define L1D_FLUSH BIT(0) /* @@ -94,6 +106,10 @@ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e +#define MSR_IA32_TSX_CTRL 0x00000122 +#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ +#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ + #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 #define MSR_IA32_SYSENTER_EIP 0x00000176 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f6b496d11097..b73a16a56e4f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -323,7 +323,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear); #include /** - * mds_clear_cpu_buffers - Mitigation for MDS vulnerability + * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability * * This uses the otherwise unused and obsolete VERW instruction in * combination with microcode which triggers a CPU buffer flush when the @@ -346,7 +346,7 @@ static inline void mds_clear_cpu_buffers(void) } /** - * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability + * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability * * Clear CPU buffers if the corresponding static key is enabled */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d55a0adbcf27..6a87eda9691e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -994,4 +994,11 @@ enum mds_mitigations { MDS_MITIGATION_VMWERV, }; +enum taa_mitigations { + TAA_MITIGATION_OFF, + TAA_MITIGATION_UCODE_NEEDED, + TAA_MITIGATION_VERW, + TAA_MITIGATION_TSX_DISABLED, +}; + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 14131dd06b29..8603d127f73c 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -231,24 +231,52 @@ static inline int regs_within_kernel_stack(struct pt_regs *regs, (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))); } +/** + * regs_get_kernel_stack_nth_addr() - get the address of the Nth entry on stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns the address of the @n th entry of the + * kernel stack which is specified by @regs. If the @n th entry is NOT in + * the kernel stack, this returns NULL. + */ +static inline unsigned long *regs_get_kernel_stack_nth_addr(struct pt_regs *regs, unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return addr; + else + return NULL; +} + +/* To avoid include hell, we can't include uaccess.h */ +extern long probe_kernel_read(void *dst, const void *src, size_t size); + /** * regs_get_kernel_stack_nth() - get Nth entry of the stack * @regs: pt_regs which contains kernel stack pointer. * @n: stack entry number. * * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which - * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * is specified by @regs. If the @n th entry is NOT in the kernel stack * this returns 0. */ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) { - unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); - addr += n; - if (regs_within_kernel_stack(regs, (unsigned long)addr)) - return *addr; - else - return 0; + unsigned long *addr; + unsigned long val; + long ret; + + addr = regs_get_kernel_stack_nth_addr(regs, n); + if (addr) { + ret = probe_kernel_read(&val, addr, sizeof(val)); + if (!ret) + return val; + } + return 0; } #define arch_has_single_step() (1) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index fe2ee61880a8..a14730ca9d1e 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -175,16 +175,6 @@ static inline int wbinvd_on_all_cpus(void) extern unsigned disabled_cpus; #ifdef CONFIG_X86_LOCAL_APIC - -#ifndef CONFIG_X86_64 -static inline int logical_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); -} - -#endif - extern int hard_smp_processor_id(void); #else /* CONFIG_X86_LOCAL_APIC */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ea2de324ab02..6415b4aead54 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1362,81 +1362,15 @@ static void lapic_setup_esr(void) oldvalue, value); } -/** - * setup_local_APIC - setup the local APIC - * - * Used to setup local APIC while initializing BSP or bringing up APs. - * Always called with preemption disabled. - */ -void setup_local_APIC(void) +static void apic_pending_intr_clear(void) { - int cpu = smp_processor_id(); + long long max_loops = cpu_khz ? cpu_khz : 1000000; + unsigned long long tsc = 0, ntsc; unsigned int value, queued; int i, j, acked = 0; - unsigned long long tsc = 0, ntsc; - long long max_loops = cpu_khz ? cpu_khz : 1000000; if (boot_cpu_has(X86_FEATURE_TSC)) tsc = rdtsc(); - - if (disable_apic) { - disable_ioapic_support(); - return; - } - - /* - * If this comes from kexec/kcrash the APIC might be enabled in - * SPIV. Soft disable it before doing further initialization. - */ - value = apic_read(APIC_SPIV); - value &= ~APIC_SPIV_APIC_ENABLED; - apic_write(APIC_SPIV, value); - -#ifdef CONFIG_X86_32 - /* Pound the ESR really hard over the head with a big hammer - mbligh */ - if (lapic_is_integrated() && apic->disable_esr) { - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - apic_write(APIC_ESR, 0); - } -#endif - perf_events_lapic_init(); - - /* - * Double-check whether this APIC is really registered. - * This is meaningless in clustered apic mode, so we skip it. - */ - BUG_ON(!apic->apic_id_registered()); - - /* - * Intel recommends to set DFR, LDR and TPR before enabling - * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel - * document number 292116). So here it goes... - */ - apic->init_apic_ldr(); - -#ifdef CONFIG_X86_32 - /* - * APIC LDR is initialized. If logical_apicid mapping was - * initialized during get_smp_config(), make sure it matches the - * actual value. - */ - i = early_per_cpu(x86_cpu_to_logical_apicid, cpu); - WARN_ON(i != BAD_APICID && i != logical_smp_processor_id()); - /* always use the value from LDR */ - early_per_cpu(x86_cpu_to_logical_apicid, cpu) = - logical_smp_processor_id(); -#endif - - /* - * Set Task Priority to 'accept all'. We never change this - * later on. - */ - value = apic_read(APIC_TASKPRI); - value &= ~APIC_TPRI_MASK; - apic_write(APIC_TASKPRI, value); - /* * After a crash, we no longer service the interrupts and a pending * interrupt from previous kernel might still have ISR bit set. @@ -1476,6 +1410,84 @@ void setup_local_APIC(void) } } while (queued && max_loops > 0); WARN_ON(max_loops <= 0); +} + +/** + * setup_local_APIC - setup the local APIC + * + * Used to setup local APIC while initializing BSP or bringing up APs. + * Always called with preemption disabled. + */ +void setup_local_APIC(void) +{ + int cpu = smp_processor_id(); + unsigned int value; + + + if (disable_apic) { + disable_ioapic_support(); + return; + } + + /* + * If this comes from kexec/kcrash the APIC might be enabled in + * SPIV. Soft disable it before doing further initialization. + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_SPIV_APIC_ENABLED; + apic_write(APIC_SPIV, value); + +#ifdef CONFIG_X86_32 + /* Pound the ESR really hard over the head with a big hammer - mbligh */ + if (lapic_is_integrated() && apic->disable_esr) { + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + apic_write(APIC_ESR, 0); + } +#endif + perf_events_lapic_init(); + + /* + * Double-check whether this APIC is really registered. + * This is meaningless in clustered apic mode, so we skip it. + */ + BUG_ON(!apic->apic_id_registered()); + + /* + * Intel recommends to set DFR, LDR and TPR before enabling + * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel + * document number 292116). So here it goes... + */ + apic->init_apic_ldr(); + +#ifdef CONFIG_X86_32 + if (apic->dest_logical) { + int logical_apicid, ldr_apicid; + + /* + * APIC LDR is initialized. If logical_apicid mapping was + * initialized during get_smp_config(), make sure it matches + * the actual value. + */ + logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); + ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); + if (logical_apicid != BAD_APICID) + WARN_ON(logical_apicid != ldr_apicid); + /* Always use the value from LDR. */ + early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid; + } +#endif + + /* + * Set Task Priority to 'accept all'. We never change this + * later on. + */ + value = apic_read(APIC_TASKPRI); + value &= ~APIC_TPRI_MASK; + apic_write(APIC_TASKPRI, value); + + apic_pending_intr_clear(); /* * Now that we are all set up, enable the APIC diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 566b7bc5deaa..2271adbc3c42 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1690,9 +1690,10 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data) static inline bool ioapic_irqd_mask(struct irq_data *data) { - /* If we are moving the irq we need to mask it */ + /* If we are moving the IRQ we need to mask it */ if (unlikely(irqd_is_setaffinity_pending(data))) { - mask_ioapic_irq(data); + if (!irqd_irq_masked(data)) + mask_ioapic_irq(data); return true; } return false; @@ -1729,7 +1730,9 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked) */ if (!io_apic_level_ack_pending(data->chip_data)) irq_move_masked_irq(data); - unmask_ioapic_irq(data); + /* If the IRQ is masked in the core, leave it: */ + if (!irqd_irq_masked(data)) + unmask_ioapic_irq(data); } } #else diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 570e8bb1f386..e13ddd19a76c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -28,7 +28,7 @@ obj-y += cpuid-deps.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o -obj-$(CONFIG_CPU_SUP_INTEL) += intel.o +obj-$(CONFIG_CPU_SUP_INTEL) += intel.o tsx.o obj-$(CONFIG_CPU_SUP_AMD) += amd.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 8bf21bc7a190..7896a34f53b5 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -32,11 +32,15 @@ #include #include +#include "cpu.h" + static void __init spectre_v1_select_mitigation(void); static void __init spectre_v2_select_mitigation(void); static void __init ssb_select_mitigation(void); static void __init l1tf_select_mitigation(void); static void __init mds_select_mitigation(void); +static void __init mds_print_mitigation(void); +static void __init taa_select_mitigation(void); /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ u64 x86_spec_ctrl_base; @@ -103,6 +107,13 @@ void __init check_bugs(void) ssb_select_mitigation(); l1tf_select_mitigation(); mds_select_mitigation(); + taa_select_mitigation(); + + /* + * As MDS and TAA mitigations are inter-related, print MDS + * mitigation until after TAA mitigation selection is done. + */ + mds_print_mitigation(); arch_smt_update(); @@ -241,6 +252,12 @@ static void __init mds_select_mitigation(void) (mds_nosmt || cpu_mitigations_auto_nosmt())) cpu_smt_disable(false); } +} + +static void __init mds_print_mitigation(void) +{ + if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off()) + return; pr_info("%s\n", mds_strings[mds_mitigation]); } @@ -266,6 +283,113 @@ static int __init mds_cmdline(char *str) } early_param("mds", mds_cmdline); +#undef pr_fmt +#define pr_fmt(fmt) "TAA: " fmt + +/* Default mitigation for TAA-affected CPUs */ +static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW; +static bool taa_nosmt __ro_after_init; + +static const char * const taa_strings[] = { + [TAA_MITIGATION_OFF] = "Vulnerable", + [TAA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", + [TAA_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", + [TAA_MITIGATION_TSX_DISABLED] = "Mitigation: TSX disabled", +}; + +static void __init taa_select_mitigation(void) +{ + u64 ia32_cap; + + if (!boot_cpu_has_bug(X86_BUG_TAA)) { + taa_mitigation = TAA_MITIGATION_OFF; + return; + } + + /* TSX previously disabled by tsx=off */ + if (!boot_cpu_has(X86_FEATURE_RTM)) { + taa_mitigation = TAA_MITIGATION_TSX_DISABLED; + goto out; + } + + if (cpu_mitigations_off()) { + taa_mitigation = TAA_MITIGATION_OFF; + return; + } + + /* + * TAA mitigation via VERW is turned off if both + * tsx_async_abort=off and mds=off are specified. + */ + if (taa_mitigation == TAA_MITIGATION_OFF && + mds_mitigation == MDS_MITIGATION_OFF) + goto out; + + if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) + taa_mitigation = TAA_MITIGATION_VERW; + else + taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; + + /* + * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1. + * A microcode update fixes this behavior to clear CPU buffers. It also + * adds support for MSR_IA32_TSX_CTRL which is enumerated by the + * ARCH_CAP_TSX_CTRL_MSR bit. + * + * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode + * update is required. + */ + ia32_cap = x86_read_arch_cap_msr(); + if ( (ia32_cap & ARCH_CAP_MDS_NO) && + !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) + taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; + + /* + * TSX is enabled, select alternate mitigation for TAA which is + * the same as MDS. Enable MDS static branch to clear CPU buffers. + * + * For guests that can't determine whether the correct microcode is + * present on host, enable the mitigation for UCODE_NEEDED as well. + */ + static_branch_enable(&mds_user_clear); + + if (taa_nosmt || cpu_mitigations_auto_nosmt()) + cpu_smt_disable(false); + + /* + * Update MDS mitigation, if necessary, as the mds_user_clear is + * now enabled for TAA mitigation. + */ + if (mds_mitigation == MDS_MITIGATION_OFF && + boot_cpu_has_bug(X86_BUG_MDS)) { + mds_mitigation = MDS_MITIGATION_FULL; + mds_select_mitigation(); + } +out: + pr_info("%s\n", taa_strings[taa_mitigation]); +} + +static int __init tsx_async_abort_parse_cmdline(char *str) +{ + if (!boot_cpu_has_bug(X86_BUG_TAA)) + return 0; + + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) { + taa_mitigation = TAA_MITIGATION_OFF; + } else if (!strcmp(str, "full")) { + taa_mitigation = TAA_MITIGATION_VERW; + } else if (!strcmp(str, "full,nosmt")) { + taa_mitigation = TAA_MITIGATION_VERW; + taa_nosmt = true; + } + + return 0; +} +early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); + #undef pr_fmt #define pr_fmt(fmt) "Spectre V1 : " fmt @@ -772,13 +896,10 @@ static void update_mds_branch_idle(void) } #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" +#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" void arch_smt_update(void) { - /* Enhanced IBRS implies STIBP. No update required. */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) - return; - mutex_lock(&spec_ctrl_mutex); switch (spectre_v2_user) { @@ -804,6 +925,17 @@ void arch_smt_update(void) break; } + switch (taa_mitigation) { + case TAA_MITIGATION_VERW: + case TAA_MITIGATION_UCODE_NEEDED: + if (sched_smt_active()) + pr_warn_once(TAA_MSG_SMT); + break; + case TAA_MITIGATION_TSX_DISABLED: + case TAA_MITIGATION_OFF: + break; + } + mutex_unlock(&spec_ctrl_mutex); } @@ -1119,6 +1251,9 @@ void x86_spec_ctrl_setup_ap(void) x86_amd_ssb_disable(); } +bool itlb_multihit_kvm_mitigation; +EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation); + #undef pr_fmt #define pr_fmt(fmt) "L1TF: " fmt @@ -1274,11 +1409,24 @@ static ssize_t l1tf_show_state(char *buf) l1tf_vmx_states[l1tf_vmx_mitigation], sched_smt_active() ? "vulnerable" : "disabled"); } + +static ssize_t itlb_multihit_show_state(char *buf) +{ + if (itlb_multihit_kvm_mitigation) + return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); + else + return sprintf(buf, "KVM: Vulnerable\n"); +} #else static ssize_t l1tf_show_state(char *buf) { return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); } + +static ssize_t itlb_multihit_show_state(char *buf) +{ + return sprintf(buf, "Processor vulnerable\n"); +} #endif static ssize_t mds_show_state(char *buf) @@ -1298,6 +1446,21 @@ static ssize_t mds_show_state(char *buf) sched_smt_active() ? "vulnerable" : "disabled"); } +static ssize_t tsx_async_abort_show_state(char *buf) +{ + if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) || + (taa_mitigation == TAA_MITIGATION_OFF)) + return sprintf(buf, "%s\n", taa_strings[taa_mitigation]); + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + return sprintf(buf, "%s; SMT Host state unknown\n", + taa_strings[taa_mitigation]); + } + + return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); +} + static char *stibp_state(void) { if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) @@ -1363,6 +1526,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_MDS: return mds_show_state(buf); + case X86_BUG_TAA: + return tsx_async_abort_show_state(buf); + + case X86_BUG_ITLB_MULTIHIT: + return itlb_multihit_show_state(buf); + default: break; } @@ -1399,4 +1568,14 @@ ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *bu { return cpu_show_common(dev, attr, buf, X86_BUG_MDS); } + +ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_TAA); +} + +ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 551c6bed7c8c..7b4141889919 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -360,7 +360,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) * cpuid bit to be set. We need to ensure that we * update that bit in this CPU's "cpu_info". */ - get_cpu_cap(c); + set_cpu_cap(c, X86_FEATURE_OSPKE); } #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS @@ -897,13 +897,14 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) c->x86_cache_bits = c->x86_phys_bits; } -#define NO_SPECULATION BIT(0) -#define NO_MELTDOWN BIT(1) -#define NO_SSB BIT(2) -#define NO_L1TF BIT(3) -#define NO_MDS BIT(4) -#define MSBDS_ONLY BIT(5) -#define NO_SWAPGS BIT(6) +#define NO_SPECULATION BIT(0) +#define NO_MELTDOWN BIT(1) +#define NO_SSB BIT(2) +#define NO_L1TF BIT(3) +#define NO_MDS BIT(4) +#define MSBDS_ONLY BIT(5) +#define NO_SWAPGS BIT(6) +#define NO_ITLB_MULTIHIT BIT(7) #define VULNWL(_vendor, _family, _model, _whitelist) \ { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } @@ -921,26 +922,26 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), /* Intel Family 6 */ - VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), - VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), - VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), - VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), - VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), + VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), - VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), VULNWL_INTEL(CORE_YONAH, NO_SSB), - VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS), + VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), - VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS), - VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS), - VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS), + VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), /* * Technically, swapgs isn't serializing on AMD (despite it previously @@ -950,14 +951,16 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { * good enough for our purposes. */ + VULNWL_INTEL(ATOM_TREMONT_X, NO_ITLB_MULTIHIT), + /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), {} }; @@ -968,19 +971,30 @@ static bool __init cpu_matches(unsigned long which) return m && !!(m->driver_data & which); } -static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) +u64 x86_read_arch_cap_msr(void) { u64 ia32_cap = 0; + if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + + return ia32_cap; +} + +static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) +{ + u64 ia32_cap = x86_read_arch_cap_msr(); + + /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ + if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); + if (cpu_matches(NO_SPECULATION)) return; setup_force_cpu_bug(X86_BUG_SPECTRE_V1); setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); - if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); @@ -997,6 +1011,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (!cpu_matches(NO_SWAPGS)) setup_force_cpu_bug(X86_BUG_SWAPGS); + /* + * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when: + * - TSX is supported or + * - TSX_CTRL is present + * + * TSX_CTRL check is needed for cases when TSX could be disabled before + * the kernel boot e.g. kexec. + * TSX_CTRL check alone is not sufficient for cases when the microcode + * update is not present or running as guest that don't get TSX_CTRL. + */ + if (!(ia32_cap & ARCH_CAP_TAA_NO) && + (cpu_has(c, X86_FEATURE_RTM) || + (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) + setup_force_cpu_bug(X86_BUG_TAA); + if (cpu_matches(NO_MELTDOWN)) return; @@ -1037,6 +1066,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) memset(&c->x86_capability, 0, sizeof c->x86_capability); c->extended_cpuid_level = 0; + if (!have_cpuid_p()) + identify_cpu_without_cpuid(c); + /* cyrix could have cpuid enabled via c_identify()*/ if (have_cpuid_p()) { cpu_detect(c); @@ -1053,7 +1085,6 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_bsp_init) this_cpu->c_bsp_init(c); } else { - identify_cpu_without_cpuid(c); setup_clear_cpu_cap(X86_FEATURE_CPUID); } @@ -1407,6 +1438,7 @@ void __init identify_boot_cpu(void) enable_sep_cpu(); #endif cpu_detect_tlb(&boot_cpu_data); + tsx_init(); } void identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index cca588407dca..db10a63687d3 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -45,6 +45,22 @@ struct _tlb_table { extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; +#ifdef CONFIG_CPU_SUP_INTEL +enum tsx_ctrl_states { + TSX_CTRL_ENABLE, + TSX_CTRL_DISABLE, + TSX_CTRL_NOT_SUPPORTED, +}; + +extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; + +extern void __init tsx_init(void); +extern void tsx_enable(void); +extern void tsx_disable(void); +#else +static inline void tsx_init(void) { } +#endif /* CONFIG_CPU_SUP_INTEL */ + extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); extern int detect_extended_topology_early(struct cpuinfo_x86 *c); @@ -54,4 +70,6 @@ unsigned int aperfmperf_get_khz(int cpu); extern void x86_spec_ctrl_setup_ap(void); +extern u64 x86_read_arch_cap_msr(void); + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index fa61c870ada9..1d9b8aaea06c 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -437,7 +437,7 @@ static void cyrix_identify(struct cpuinfo_x86 *c) /* enable MAPEN */ setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable cpuid */ - setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* disable MAPEN */ setCx86(CX86_CCR3, ccr3); local_irq_restore(flags); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 574dcdc092ab..3a5ea741701b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -695,6 +695,11 @@ static void init_intel(struct cpuinfo_x86 *c) init_intel_energy_perf(c); init_intel_misc_features(c); + + if (tsx_ctrl_state == TSX_CTRL_ENABLE) + tsx_enable(); + if (tsx_ctrl_state == TSX_CTRL_DISABLE) + tsx_disable(); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 665d0f6cd62f..3f731d7f04bf 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -526,7 +526,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) if (static_branch_unlikely(&rdt_mon_enable_key)) rmdir_mondata_subdir_allrdtgrp(r, d->id); list_del(&d->list); - if (is_mbm_enabled()) + if (r->mon_capable && is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) { /* diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index f6ea94f8954a..f892cb0b485e 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c @@ -313,6 +313,10 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) int ret = 0; rdtgrp = rdtgroup_kn_lock_live(of->kn); + if (!rdtgrp) { + ret = -ENOENT; + goto out; + } md.priv = of->kn->priv; resid = md.u.rid; diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 2dae1b3c42fc..0ec30b2384c0 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1107,7 +1107,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, if (rdt_mon_capable) { ret = mongroup_create_dir(rdtgroup_default.kn, - NULL, "mon_groups", + &rdtgroup_default, "mon_groups", &kn_mongrp); if (ret) { dentry = ERR_PTR(ret); @@ -1260,7 +1260,11 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp) list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) { free_rmid(sentry->mon.rmid); list_del(&sentry->mon.crdtgrp_list); - kfree(sentry); + + if (atomic_read(&sentry->waitcount) != 0) + sentry->flags = RDT_DELETED; + else + kfree(sentry); } } @@ -1294,7 +1298,11 @@ static void rmdir_all_sub(void) kernfs_remove(rdtgrp->kn); list_del(&rdtgrp->rdtgroup_list); - kfree(rdtgrp); + + if (atomic_read(&rdtgrp->waitcount) != 0) + rdtgrp->flags = RDT_DELETED; + else + kfree(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ update_closid_rmid(cpu_online_mask, &rdtgroup_default); @@ -1491,7 +1499,7 @@ static int mkdir_mondata_all(struct kernfs_node *parent_kn, /* * Create the mon_data directory first. */ - ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn); + ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn); if (ret) return ret; @@ -1525,7 +1533,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, uint files = 0; int ret; - prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); + prdtgrp = rdtgroup_kn_lock_live(parent_kn); if (!prdtgrp) { ret = -ENODEV; goto out_unlock; @@ -1581,7 +1589,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, kernfs_activate(kn); /* - * The caller unlocks the prgrp_kn upon success. + * The caller unlocks the parent_kn upon success. */ return 0; @@ -1592,7 +1600,7 @@ out_destroy: out_free_rgrp: kfree(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -1630,7 +1638,7 @@ static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn, */ list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list); - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -1667,7 +1675,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, * Create an empty mon_groups directory to hold the subset * of tasks and cpus to monitor. */ - ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); + ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL); if (ret) goto out_id_free; } @@ -1680,7 +1688,7 @@ out_id_free: out_common_fail: mkdir_rdt_prepare_clean(rdtgrp); out_unlock: - rdtgroup_kn_unlock(prgrp_kn); + rdtgroup_kn_unlock(parent_kn); return ret; } @@ -1792,11 +1800,6 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, closid_free(rdtgrp->closid); free_rmid(rdtgrp->mon.rmid); - /* - * Free all the child monitor group rmids. - */ - free_all_child_rdtgrp(rdtgrp); - list_del(&rdtgrp->rdtgroup_list); /* @@ -1806,6 +1809,11 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp, kernfs_get(kn); kernfs_remove(rdtgrp->kn); + /* + * Free all the child monitor group rmids. + */ + free_all_child_rdtgrp(rdtgrp); + return 0; } diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 8fec687b3e44..e57b59762f9f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -46,8 +46,6 @@ static struct mce i_mce; static struct dentry *dfs_inj; -static u8 n_banks; - #define MAX_FLAG_OPT_SIZE 4 #define NBCFG 0x44 @@ -108,6 +106,9 @@ static void setup_inj_struct(struct mce *m) memset(m, 0, sizeof(struct mce)); m->cpuvendor = boot_cpu_data.x86_vendor; + m->time = ktime_get_real_seconds(); + m->cpuid = cpuid_eax(1); + m->microcode = boot_cpu_data.microcode; } /* Update fake mce registers on current CPU. */ @@ -567,15 +568,24 @@ err: static int inj_bank_set(void *data, u64 val) { struct mce *m = (struct mce *)data; + u8 n_banks; + u64 cap; + + /* Get bank count on target CPU so we can handle non-uniform values. */ + rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap); + n_banks = cap & MCG_BANKCNT_MASK; if (val >= n_banks) { - pr_err("Non-existent MCE bank: %llu\n", val); + pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu); return -EINVAL; } m->bank = val; do_inject(); + /* Reset injection struct */ + setup_inj_struct(&i_mce); + return 0; } @@ -659,10 +669,6 @@ static struct dfs_node { static int __init debugfs_init(void) { unsigned int i; - u64 cap; - - rdmsrl(MSR_IA32_MCG_CAP, cap); - n_banks = cap & MCG_BANKCNT_MASK; dfs_inj = debugfs_create_dir("mce-inject", NULL); if (!dfs_inj) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 4f3be91f0b0b..95c09db1bba2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -802,8 +802,8 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, if (quirk_no_way_out) quirk_no_way_out(i, m, regs); + m->bank = i; if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) { - m->bank = i; mce_read_aux(m, i); *msg = tmp; return 1; @@ -1499,13 +1499,12 @@ EXPORT_SYMBOL_GPL(mce_notify_irq); static int __mcheck_cpu_mce_banks_init(void) { int i; - u8 num_banks = mca_cfg.banks; - mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL); + mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL); if (!mce_banks) return -ENOMEM; - for (i = 0; i < num_banks; i++) { + for (i = 0; i < MAX_NR_BANKS; i++) { struct mce_bank *b = &mce_banks[i]; b->ctl = -1ULL; @@ -1519,28 +1518,19 @@ static int __mcheck_cpu_mce_banks_init(void) */ static int __mcheck_cpu_cap_init(void) { - unsigned b; u64 cap; + u8 b; rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - if (!mca_cfg.banks) - pr_info("CPU supports %d MCE banks\n", b); - - if (b > MAX_NR_BANKS) { - pr_warn("Using only %u machine check banks out of %u\n", - MAX_NR_BANKS, b); + if (WARN_ON_ONCE(b > MAX_NR_BANKS)) b = MAX_NR_BANKS; - } - /* Don't support asymmetric configurations today */ - WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks); - mca_cfg.banks = b; + mca_cfg.banks = max(mca_cfg.banks, b); if (!mce_banks) { int err = __mcheck_cpu_mce_banks_init(); - if (err) return err; } @@ -1660,36 +1650,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 0x15 && c->x86_model <= 0xf) mce_flags.overflow_recov = 1; - /* - * Turn off MC4_MISC thresholding banks on those models since - * they're not supported there. - */ - if (c->x86 == 0x15 && - (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) { - int i; - u64 hwcr; - bool need_toggle; - u32 msrs[] = { - 0x00000413, /* MC4_MISC0 */ - 0xc0000408, /* MC4_MISC1 */ - }; - - rdmsrl(MSR_K7_HWCR, hwcr); - - /* McStatusWrEn has to be set */ - need_toggle = !(hwcr & BIT(18)); - - if (need_toggle) - wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); - - /* Clear CntP bit safely */ - for (i = 0; i < ARRAY_SIZE(msrs); i++) - msr_clear_bit(msrs[i], 62); - - /* restore old settings */ - if (need_toggle) - wrmsrl(MSR_K7_HWCR, hwcr); - } } if (c->x86_vendor == X86_VENDOR_INTEL) { @@ -2500,6 +2460,8 @@ EXPORT_SYMBOL_GPL(mcsafe_key); static int __init mcheck_late_init(void) { + pr_info("Using %d MCE banks\n", mca_cfg.banks); + if (mca_cfg.recovery) static_branch_inc(&mcsafe_key); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 4fa97a44e73f..bbe94b682119 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -228,10 +228,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu) } /* Return early if this bank was already initialized. */ - if (smca_banks[bank].hwid) + if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0) return; - if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { pr_warn("Failed to read MCA_IPID for bank %d\n", bank); return; } @@ -544,6 +544,40 @@ out: return offset; } +/* + * Turn off MC4_MISC thresholding banks on all family 0x15 models since + * they're not supported there. + */ +void disable_err_thresholding(struct cpuinfo_x86 *c) +{ + int i; + u64 hwcr; + bool need_toggle; + u32 msrs[] = { + 0x00000413, /* MC4_MISC0 */ + 0xc0000408, /* MC4_MISC1 */ + }; + + if (c->x86 != 0x15) + return; + + rdmsrl(MSR_K7_HWCR, hwcr); + + /* McStatusWrEn has to be set */ + need_toggle = !(hwcr & BIT(18)); + + if (need_toggle) + wrmsrl(MSR_K7_HWCR, hwcr | BIT(18)); + + /* Clear CntP bit safely */ + for (i = 0; i < ARRAY_SIZE(msrs); i++) + msr_clear_bit(msrs[i], 62); + + /* restore old settings */ + if (need_toggle) + wrmsrl(MSR_K7_HWCR, hwcr); +} + /* cpu init entry point, called from mce.c with preempt off */ void mce_amd_feature_init(struct cpuinfo_x86 *c) { @@ -551,6 +585,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int bank, block, cpu = smp_processor_id(); int offset = -1; + disable_err_thresholding(c); + for (bank = 0; bank < mca_cfg.banks; ++bank) { if (mce_flags.smca) smca_configure(bank, cpu); @@ -1080,9 +1116,12 @@ static const struct sysfs_ops threshold_ops = { .store = store, }; +static void threshold_block_release(struct kobject *kobj); + static struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, .default_attrs = default_attrs, + .release = threshold_block_release, }; static const char *get_name(unsigned int bank, struct threshold_block *b) @@ -1115,8 +1154,9 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return buf_mcatype; } -static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, - unsigned int block, u32 address) +static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb, + unsigned int bank, unsigned int block, + u32 address) { struct threshold_block *b = NULL; u32 low, high; @@ -1160,16 +1200,12 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, INIT_LIST_HEAD(&b->miscj); - if (per_cpu(threshold_banks, cpu)[bank]->blocks) { - list_add(&b->miscj, - &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); - } else { - per_cpu(threshold_banks, cpu)[bank]->blocks = b; - } + if (tb->blocks) + list_add(&b->miscj, &tb->blocks->miscj); + else + tb->blocks = b; - err = kobject_init_and_add(&b->kobj, &threshold_ktype, - per_cpu(threshold_banks, cpu)[bank]->kobj, - get_name(bank, b)); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b)); if (err) goto out_free; recurse: @@ -1177,7 +1213,7 @@ recurse: if (!address) return 0; - err = allocate_threshold_blocks(cpu, bank, block, address); + err = allocate_threshold_blocks(cpu, tb, bank, block, address); if (err) goto out_free; @@ -1262,8 +1298,6 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out_free; } - per_cpu(threshold_banks, cpu)[bank] = b; - if (is_shared_bank(bank)) { refcount_set(&b->cpus, 1); @@ -1274,9 +1308,13 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } } - err = allocate_threshold_blocks(cpu, bank, 0, msr_ops.misc(bank)); - if (!err) - goto out; + err = allocate_threshold_blocks(cpu, b, bank, 0, msr_ops.misc(bank)); + if (err) + goto out_free; + + per_cpu(threshold_banks, cpu)[bank] = b; + + return 0; out_free: kfree(b); @@ -1285,8 +1323,12 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) return err; } -static void deallocate_threshold_block(unsigned int cpu, - unsigned int bank) +static void threshold_block_release(struct kobject *kobj) +{ + kfree(to_block(kobj)); +} + +static void deallocate_threshold_block(unsigned int cpu, unsigned int bank) { struct threshold_block *pos = NULL; struct threshold_block *tmp = NULL; @@ -1296,13 +1338,11 @@ static void deallocate_threshold_block(unsigned int cpu, return; list_for_each_entry_safe(pos, tmp, &head->blocks->miscj, miscj) { - kobject_put(&pos->kobj); list_del(&pos->miscj); - kfree(pos); + kobject_put(&pos->kobj); } - kfree(per_cpu(threshold_banks, cpu)[bank]->blocks); - per_cpu(threshold_banks, cpu)[bank]->blocks = NULL; + kobject_put(&head->blocks->kobj); } static void __threshold_remove_blocks(struct threshold_bank *b) diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index d05be307d081..1d87b85150db 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c @@ -489,17 +489,18 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) return; if ((val & 3UL) == 1UL) { - /* PPIN available but disabled: */ + /* PPIN locked in disabled mode */ return; } - /* If PPIN is disabled, but not locked, try to enable: */ - if (!(val & 3UL)) { + /* If PPIN is disabled, try to enable */ + if (!(val & 2UL)) { wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); rdmsrl_safe(MSR_PPIN_CTL, &val); } - if ((val & 3UL) == 2UL) + /* Is the enable bit set? */ + if (val & 2UL) set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); } } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index ee229ceee745..ec6a07b04fdb 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -185,7 +185,7 @@ static void therm_throt_process(bool new_event, int event, int level) /* if we just entered the thermal event */ if (new_event) { if (event == THERMAL_THROTTLING_EVENT) - pr_crit("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", + pr_warn("CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); diff --git a/arch/x86/kernel/cpu/tsx.c b/arch/x86/kernel/cpu/tsx.c new file mode 100644 index 000000000000..032509adf9de --- /dev/null +++ b/arch/x86/kernel/cpu/tsx.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Transactional Synchronization Extensions (TSX) control. + * + * Copyright (C) 2019 Intel Corporation + * + * Author: + * Pawan Gupta + */ + +#include + +#include + +#include "cpu.h" + +enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; + +void tsx_disable(void) +{ + u64 tsx; + + rdmsrl(MSR_IA32_TSX_CTRL, tsx); + + /* Force all transactions to immediately abort */ + tsx |= TSX_CTRL_RTM_DISABLE; + + /* + * Ensure TSX support is not enumerated in CPUID. + * This is visible to userspace and will ensure they + * do not waste resources trying TSX transactions that + * will always abort. + */ + tsx |= TSX_CTRL_CPUID_CLEAR; + + wrmsrl(MSR_IA32_TSX_CTRL, tsx); +} + +void tsx_enable(void) +{ + u64 tsx; + + rdmsrl(MSR_IA32_TSX_CTRL, tsx); + + /* Enable the RTM feature in the cpu */ + tsx &= ~TSX_CTRL_RTM_DISABLE; + + /* + * Ensure TSX support is enumerated in CPUID. + * This is visible to userspace and will ensure they + * can enumerate and use the TSX feature. + */ + tsx &= ~TSX_CTRL_CPUID_CLEAR; + + wrmsrl(MSR_IA32_TSX_CTRL, tsx); +} + +static bool __init tsx_ctrl_is_supported(void) +{ + u64 ia32_cap = x86_read_arch_cap_msr(); + + /* + * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this + * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. + * + * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a + * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES + * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get + * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, + * tsx= cmdline requests will do nothing on CPUs without + * MSR_IA32_TSX_CTRL support. + */ + return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); +} + +static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) +{ + if (boot_cpu_has_bug(X86_BUG_TAA)) + return TSX_CTRL_DISABLE; + + return TSX_CTRL_ENABLE; +} + +void __init tsx_init(void) +{ + char arg[5] = {}; + int ret; + + if (!tsx_ctrl_is_supported()) + return; + + ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg)); + if (ret >= 0) { + if (!strcmp(arg, "on")) { + tsx_ctrl_state = TSX_CTRL_ENABLE; + } else if (!strcmp(arg, "off")) { + tsx_ctrl_state = TSX_CTRL_DISABLE; + } else if (!strcmp(arg, "auto")) { + tsx_ctrl_state = x86_get_tsx_auto_mode(); + } else { + tsx_ctrl_state = TSX_CTRL_DISABLE; + pr_err("tsx: invalid option, defaulting to off\n"); + } + } else { + /* tsx= not provided */ + if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO)) + tsx_ctrl_state = x86_get_tsx_auto_mode(); + else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF)) + tsx_ctrl_state = TSX_CTRL_DISABLE; + else + tsx_ctrl_state = TSX_CTRL_ENABLE; + } + + if (tsx_ctrl_state == TSX_CTRL_DISABLE) { + tsx_disable(); + + /* + * tsx_disable() will change the state of the RTM and HLE CPUID + * bits. Clear them here since they are now expected to be not + * set. + */ + setup_clear_cpu_cap(X86_FEATURE_RTM); + setup_clear_cpu_cap(X86_FEATURE_HLE); + } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { + + /* + * HW defaults TSX to be enabled at bootup. + * We may still need the TSX enable support + * during init for special cases like + * kexec after TSX is disabled. + */ + tsx_enable(); + + /* + * tsx_enable() will change the state of the RTM and HLE CPUID + * bits. Force them here since they are now expected to be set. + */ + setup_force_cpu_cap(X86_FEATURE_RTM); + setup_force_cpu_cap(X86_FEATURE_HLE); + } +} diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7c67d8939f3e..e00ccbcc2913 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -145,13 +145,31 @@ unsigned long __head __startup_64(unsigned long physaddr, * we might write invalid pmds, when the kernel is relocated * cleanup_highmap() fixes this up along with the mappings * beyond _end. + * + * Only the region occupied by the kernel image has so far + * been checked against the table of usable memory regions + * provided by the firmware, so invalidate pages outside that + * region. A page table entry that maps to a reserved area of + * memory would allow processor speculation into that area, + * and on some hardware (particularly the UV platform) even + * speculative access to some reserved areas is caught as an + * error, causing the BIOS to halt the system. */ pmd = fixup_pointer(level2_kernel_pgt, physaddr); - for (i = 0; i < PTRS_PER_PMD; i++) { + + /* invalidate pages before the kernel image */ + for (i = 0; i < pmd_index((unsigned long)_text); i++) + pmd[i] &= ~_PAGE_PRESENT; + + /* fixup pages that are part of the kernel image */ + for (; i <= pmd_index((unsigned long)_end); i++) if (pmd[i] & _PAGE_PRESENT) pmd[i] += load_delta; - } + + /* invalidate pages after the kernel image */ + for (; i < PTRS_PER_PMD; i++) + pmd[i] &= ~_PAGE_PRESENT; /* * Fixup phys_base - remove the memory encryption mask to obtain diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8e36f249646e..904e18bb38c5 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -438,7 +438,7 @@ static void kgdb_disable_hw_debug(struct pt_regs *regs) */ void kgdb_roundup_cpus(unsigned long flags) { - apic->send_IPI_allbutself(APIC_DM_NMI); + apic->send_IPI_allbutself(NMI_VECTOR); } #endif diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 584cdd475bb3..734549492a18 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "tls.h" @@ -343,6 +344,49 @@ static int set_segment_reg(struct task_struct *task, return 0; } +static unsigned long task_seg_base(struct task_struct *task, + unsigned short selector) +{ + unsigned short idx = selector >> 3; + unsigned long base; + + if (likely((selector & SEGMENT_TI_MASK) == 0)) { + if (unlikely(idx >= GDT_ENTRIES)) + return 0; + + /* + * There are no user segments in the GDT with nonzero bases + * other than the TLS segments. + */ + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return 0; + + idx -= GDT_ENTRY_TLS_MIN; + base = get_desc_base(&task->thread.tls_array[idx]); + } else { +#ifdef CONFIG_MODIFY_LDT_SYSCALL + struct ldt_struct *ldt; + + /* + * If performance here mattered, we could protect the LDT + * with RCU. This is a slow path, though, so we can just + * take the mutex. + */ + mutex_lock(&task->mm->context.lock); + ldt = task->mm->context.ldt; + if (unlikely(idx >= ldt->nr_entries)) + base = 0; + else + base = get_desc_base(ldt->entries + idx); + mutex_unlock(&task->mm->context.lock); +#else + base = 0; +#endif + } + + return base; +} + #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -436,18 +480,16 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset) #ifdef CONFIG_X86_64 case offsetof(struct user_regs_struct, fs_base): { - /* - * XXX: This will not behave as expected if called on - * current or if fsindex != 0. - */ - return task->thread.fsbase; + if (task->thread.fsindex == 0) + return task->thread.fsbase; + else + return task_seg_base(task, task->thread.fsindex); } case offsetof(struct user_regs_struct, gs_base): { - /* - * XXX: This will not behave as expected if called on - * current or if fsindex != 0. - */ - return task->thread.gsbase; + if (task->thread.gsindex == 0) + return task->thread.gsbase; + else + return task_seg_base(task, task->thread.gsindex); } #endif } diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 85195d447a92..f3215346e47f 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -94,11 +94,11 @@ __init int create_simplefb(const struct screen_info *si, if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) size <<= 16; length = mode->height * mode->stride; - length = PAGE_ALIGN(length); if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } + length = PAGE_ALIGN(length); /* setup IORESOURCE_MEM as framebuffer memory */ memset(&res, 0, sizeof(res)); diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 7a87ef1f5b5e..73391c1bd2a9 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -987,7 +987,7 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, " "%%ip=%#lx\n", current->pid, regs->sp, regs->ip); - force_sig_info(SIGSEGV, SEND_SIG_FORCED, current); + force_sig(SIGSEGV, current); } return -1; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 5c82b4bc4a68..1152afad524f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -291,13 +291,18 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, { switch (func) { case 0: - entry->eax = 1; /* only one leaf currently */ + entry->eax = 7; ++*nent; break; case 1: entry->ecx = F(MOVBE); ++*nent; break; + case 7: + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + if (index == 0) + entry->ecx = F(RDPID); + ++*nent; default: break; } @@ -404,7 +409,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, r = -E2BIG; - if (*nent >= maxnent) + if (WARN_ON(*nent >= maxnent)) goto out; do_cpuid_1_ent(entry, function, index); @@ -481,8 +486,16 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* PKU is not yet implemented for shadow paging. */ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) entry->ecx &= ~F(PKU); + entry->edx &= kvm_cpuid_7_0_edx_x86_features; cpuid_mask(&entry->edx, CPUID_7_EDX); + if (boot_cpu_has(X86_FEATURE_IBPB) && + boot_cpu_has(X86_FEATURE_IBRS)) + entry->edx |= F(SPEC_CTRL); + if (boot_cpu_has(X86_FEATURE_STIBP)) + entry->edx |= F(INTEL_STIBP); + if (boot_cpu_has(X86_FEATURE_SSBD)) + entry->edx |= F(SPEC_CTRL_SSBD); /* * We emulate ARCH_CAPABILITIES in software even * if the host doesn't support it. @@ -699,6 +712,9 @@ out: static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func, u32 idx, int *nent, int maxnent, unsigned int type) { + if (*nent >= maxnent) + return -E2BIG; + if (type == KVM_GET_EMULATED_CPUID) return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index eb8b843325f4..4cc8a4a6f1d0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3539,6 +3539,16 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_rdpid(struct x86_emulate_ctxt *ctxt) +{ + u64 tsc_aux = 0; + + if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux)) + return emulate_gp(ctxt, 0); + ctxt->dst.val = tsc_aux; + return X86EMUL_CONTINUE; +} + static int em_rdtsc(struct x86_emulate_ctxt *ctxt) { u64 tsc = 0; @@ -4431,10 +4441,20 @@ static const struct opcode group8[] = { F(DstMem | SrcImmByte | Lock | PageTable, em_btc), }; +/* + * The "memory" destination is actually always a register, since we come + * from the register case of group9. + */ +static const struct gprefix pfx_0f_c7_7 = { + N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp), +}; + + static const struct group_dual group9 = { { N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N, }, { - N, N, N, N, N, N, N, N, + N, N, N, N, N, N, N, + GP(0, &pfx_0f_c7_7), } }; static const struct opcode group11[] = { @@ -5042,6 +5062,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->fetch.ptr = ctxt->fetch.data; ctxt->fetch.end = ctxt->fetch.data + insn_len; ctxt->opcode_len = 1; + ctxt->intercept = x86_intercept_none; if (insn_len > 0) memcpy(ctxt->fetch.data, insn, insn_len); else { @@ -5094,16 +5115,28 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) ctxt->ad_bytes = def_ad_bytes ^ 6; break; case 0x26: /* ES override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_ES; + break; case 0x2e: /* CS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_CS; + break; case 0x36: /* SS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_SS; + break; case 0x3e: /* DS override */ has_seg_override = true; - ctxt->seg_override = (ctxt->b >> 3) & 3; + ctxt->seg_override = VCPU_SREG_DS; break; case 0x64: /* FS override */ + has_seg_override = true; + ctxt->seg_override = VCPU_SREG_FS; + break; case 0x65: /* GS override */ has_seg_override = true; - ctxt->seg_override = ctxt->b & 7; + ctxt->seg_override = VCPU_SREG_GS; break; case 0x40 ... 0x4f: /* REX */ if (mode != X86EMUL_MODE_PROT64) @@ -5187,10 +5220,15 @@ done_prefixes: } break; case Escape: - if (ctxt->modrm > 0xbf) - opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; - else + if (ctxt->modrm > 0xbf) { + size_t size = ARRAY_SIZE(opcode.u.esc->high); + u32 index = array_index_nospec( + ctxt->modrm - 0xc0, size); + + opcode = opcode.u.esc->high[index]; + } else { opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; + } break; case InstrDual: if ((ctxt->modrm >> 6) == 3) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 5d13abecb384..2fba82b06c2d 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -747,11 +747,12 @@ static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 *pdata) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - *pdata = hv->hv_crash_param[index]; + *pdata = hv->hv_crash_param[array_index_nospec(index, size)]; return 0; } @@ -790,11 +791,12 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 data) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; + size_t size = ARRAY_SIZE(hv->hv_crash_param); - if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) + if (WARN_ON_ONCE(index >= size)) return -EINVAL; - hv->hv_crash_param[index] = data; + hv->hv_crash_param[array_index_nospec(index, size)] = data; return 0; } diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index bdcd4139eca9..38a36a1cc87f 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -460,10 +460,14 @@ static int picdev_write(struct kvm_pic *s, switch (addr) { case 0x20: case 0x21: + pic_lock(s); + pic_ioport_write(&s->pics[0], addr, data); + pic_unlock(s); + break; case 0xa0: case 0xa1: pic_lock(s); - pic_ioport_write(&s->pics[addr >> 7], addr, data); + pic_ioport_write(&s->pics[1], addr, data); pic_unlock(s); break; case 0x4d0: diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 9d270ba9643c..dab6940ea99c 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -73,13 +74,14 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, default: { u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; - u64 redir_content; + u64 redir_content = ~0ULL; - if (redir_index < IOAPIC_NUM_PINS) - redir_content = - ioapic->redirtbl[redir_index].bits; - else - redir_content = ~0ULL; + if (redir_index < IOAPIC_NUM_PINS) { + u32 index = array_index_nospec( + redir_index, IOAPIC_NUM_PINS); + + redir_content = ioapic->redirtbl[index].bits; + } result = (ioapic->ioregsel & 0x1) ? (redir_content >> 32) & 0xffffffff : @@ -297,6 +299,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) ioapic_debug("change redir index %x val %x\n", index, val); if (index >= IOAPIC_NUM_PINS) return; + index = array_index_nospec(index, IOAPIC_NUM_PINS); e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; /* Preserve read-only fields */ diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 3cc3b2d130a0..4d000aea05e0 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -427,7 +427,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, kvm_set_msi_irq(vcpu->kvm, entry, &irq); - if (irq.level && kvm_apic_match_dest(vcpu, NULL, 0, + if (irq.trig_mode && kvm_apic_match_dest(vcpu, NULL, 0, irq.dest_id, irq.dest_mode)) __set_bit(irq.vector, ioapic_handled_vectors); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 7b9ad9de4f37..537c36b55b5d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -566,9 +566,11 @@ static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) { u8 val; - if (pv_eoi_get_user(vcpu, &val) < 0) + if (pv_eoi_get_user(vcpu, &val) < 0) { apic_debug("Can't read EOI MSR value: 0x%llx\n", (unsigned long long)vcpu->arch.pv_eoi.msr_val); + return false; + } return val & 0x1; } @@ -993,11 +995,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic_clear_vector(vector, apic->regs + APIC_TMR); } - if (vcpu->arch.apicv_active) - kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); - else { + if (kvm_x86_ops->deliver_posted_interrupt(vcpu, vector)) { kvm_lapic_set_irr(vector, apic); - kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_vcpu_kick(vcpu); } @@ -1754,15 +1753,20 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: - case APIC_LVTERR: + case APIC_LVTERR: { /* TODO: Check vector */ + size_t size; + u32 index; + if (!kvm_apic_sw_enabled(apic)) val |= APIC_LVT_MASKED; - - val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4]; + size = ARRAY_SIZE(apic_lvt_mask); + index = array_index_nospec( + (reg - APIC_LVTT) >> 4, size); + val &= apic_lvt_mask[index]; kvm_lapic_set_reg(apic, reg, val); - break; + } case APIC_LVTT: if (!kvm_apic_sw_enabled(apic)) @@ -1967,13 +1971,11 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) } } - if ((old_value ^ value) & X2APIC_ENABLE) { - if (value & X2APIC_ENABLE) { - kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); - kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); - } else - kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); - } + if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE)) + kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); + + if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) + kvm_x86_ops->set_virtual_apic_mode(vcpu); apic->base_address = apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_BASE; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 4b9935a38347..bc3446d3cfdf 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -16,6 +16,13 @@ #define APIC_BUS_CYCLE_NS 1 #define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS) +enum lapic_mode { + LAPIC_MODE_DISABLED = 0, + LAPIC_MODE_INVALID = X2APIC_ENABLE, + LAPIC_MODE_XAPIC = MSR_IA32_APICBASE_ENABLE, + LAPIC_MODE_X2APIC = MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE, +}; + struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ @@ -89,6 +96,7 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); +enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); @@ -220,4 +228,10 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); + +static inline enum lapic_mode kvm_apic_mode(u64 apic_base) +{ + return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); +} + #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 87a0601b1c20..e5af08b58132 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -48,6 +49,30 @@ #include #include "trace.h" +extern bool itlb_multihit_kvm_mitigation; + +static int __read_mostly nx_huge_pages = -1; +static uint __read_mostly nx_huge_pages_recovery_ratio = 60; + +static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); +static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp); + +static struct kernel_param_ops nx_huge_pages_ops = { + .set = set_nx_huge_pages, + .get = param_get_bool, +}; + +static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = { + .set = set_nx_huge_pages_recovery_ratio, + .get = param_get_uint, +}; + +module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644); +__MODULE_PARM_TYPE(nx_huge_pages, "bool"); +module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops, + &nx_huge_pages_recovery_ratio, 0644); +__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint"); + /* * When setting this variable to true it enables Two-Dimensional-Paging * where the hardware walks 2 page tables: @@ -139,9 +164,6 @@ module_param(dbg, bool, 0644); #include -#define CREATE_TRACE_POINTS -#include "mmutrace.h" - #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) @@ -244,6 +266,11 @@ static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; static void mmu_spte_set(u64 *sptep, u64 spte); static void mmu_free_roots(struct kvm_vcpu *vcpu); +static bool is_executable_pte(u64 spte); + +#define CREATE_TRACE_POINTS +#include "mmutrace.h" + void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value) { @@ -264,6 +291,11 @@ static inline bool spte_ad_enabled(u64 spte) return !(spte & shadow_acc_track_value); } +static bool is_nx_huge_page_enabled(void) +{ + return READ_ONCE(nx_huge_pages); +} + static inline u64 spte_shadow_accessed_mask(u64 spte) { MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value); @@ -1008,10 +1040,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) { - if (sp->role.direct) - BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index)); - else + if (!sp->role.direct) { sp->gfns[index] = gfn; + return; + } + + if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index))) + pr_err_ratelimited("gfn mismatch under direct page %llx " + "(expected %llx, got %llx)\n", + sp->gfn, + kvm_mmu_page_get_gfn(sp, index), gfn); } /* @@ -1070,6 +1108,17 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_disallow_lpage(slot, gfn); } +static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + if (sp->lpage_disallowed) + return; + + ++kvm->stat.nx_lpage_splits; + list_add_tail(&sp->lpage_disallowed_link, + &kvm->arch.lpage_disallowed_mmu_pages); + sp->lpage_disallowed = true; +} + static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) { struct kvm_memslots *slots; @@ -1087,6 +1136,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_allow_lpage(slot, gfn); } +static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + --kvm->stat.nx_lpage_splits; + sp->lpage_disallowed = false; + list_del(&sp->lpage_disallowed_link); +} + static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, struct kvm_memory_slot *slot) { @@ -1109,12 +1165,12 @@ static bool mmu_gfn_lpage_is_disallowed(struct kvm_vcpu *vcpu, gfn_t gfn, return __mmu_gfn_lpage_is_disallowed(gfn, level, slot); } -static int host_mapping_level(struct kvm *kvm, gfn_t gfn) +static int host_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn) { unsigned long page_size; int i, ret = 0; - page_size = kvm_host_page_size(kvm, gfn); + page_size = kvm_host_page_size(vcpu, gfn); for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { if (page_size >= KVM_HPAGE_SIZE(i)) @@ -1164,7 +1220,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, if (unlikely(*force_pt_level)) return PT_PAGE_TABLE_LEVEL; - host_level = host_mapping_level(vcpu->kvm, large_gfn); + host_level = host_mapping_level(vcpu, large_gfn); if (host_level == PT_PAGE_TABLE_LEVEL) return host_level; @@ -2634,6 +2690,9 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, kvm_reload_remote_mmus(kvm); } + if (sp->lpage_disallowed) + unaccount_huge_nx_page(kvm, sp); + sp->role.invalid = 1; return ret; } @@ -2788,6 +2847,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (!speculative) spte |= spte_shadow_accessed_mask(spte); + if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) && + is_nx_huge_page_enabled()) { + pte_access &= ~ACC_EXEC_MASK; + } + if (pte_access & ACC_EXEC_MASK) spte |= shadow_x_mask; else @@ -2903,10 +2967,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, ret = RET_PF_EMULATE; pgprintk("%s: setting spte %llx\n", __func__, *sptep); - pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", - is_large_pte(*sptep)? "2MB" : "4kB", - *sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn, - *sptep, sptep); + trace_kvm_mmu_set_spte(level, gfn, sptep); if (!was_rmapped && is_large_pte(*sptep)) ++vcpu->kvm->stat.lpages; @@ -2918,8 +2979,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, } } - kvm_release_pfn_clean(pfn); - return ret; } @@ -2954,9 +3013,11 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, if (ret <= 0) return -1; - for (i = 0; i < ret; i++, gfn++, start++) + for (i = 0; i < ret; i++, gfn++, start++) { mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn, page_to_pfn(pages[i]), true, true); + put_page(pages[i]); + } return 0; } @@ -3004,40 +3065,71 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) __direct_pte_prefetch(vcpu, sp, sptep); } -static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, - int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault) +static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it, + gfn_t gfn, kvm_pfn_t *pfnp, int *levelp) { - struct kvm_shadow_walk_iterator iterator; + int level = *levelp; + u64 spte = *it.sptep; + + if (it.level == level && level > PT_PAGE_TABLE_LEVEL && + is_nx_huge_page_enabled() && + is_shadow_present_pte(spte) && + !is_large_pte(spte)) { + /* + * A small SPTE exists for this pfn, but FNAME(fetch) + * and __direct_map would like to create a large PTE + * instead: just force them to go down another level, + * patching back for them into pfn the next 9 bits of + * the address. + */ + u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1); + *pfnp |= gfn & page_mask; + (*levelp)--; + } +} + +static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, + int map_writable, int level, kvm_pfn_t pfn, + bool prefault, bool lpage_disallowed) +{ + struct kvm_shadow_walk_iterator it; struct kvm_mmu_page *sp; - int emulate = 0; - gfn_t pseudo_gfn; + int ret; + gfn_t gfn = gpa >> PAGE_SHIFT; + gfn_t base_gfn = gfn; if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) - return 0; + return RET_PF_RETRY; - for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { - if (iterator.level == level) { - emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, - write, level, gfn, pfn, prefault, - map_writable); - direct_pte_prefetch(vcpu, iterator.sptep); - ++vcpu->stat.pf_fixed; + trace_kvm_mmu_spte_requested(gpa, level, pfn); + for_each_shadow_entry(vcpu, gpa, it) { + /* + * We cannot overwrite existing page tables with an NX + * large page, as the leaf could be executable. + */ + disallowed_hugepage_adjust(it, gfn, &pfn, &level); + + base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + if (it.level == level) break; - } - drop_large_spte(vcpu, iterator.sptep); - if (!is_shadow_present_pte(*iterator.sptep)) { - u64 base_addr = iterator.addr; + drop_large_spte(vcpu, it.sptep); + if (!is_shadow_present_pte(*it.sptep)) { + sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr, + it.level - 1, true, ACC_ALL); - base_addr &= PT64_LVL_ADDR_MASK(iterator.level); - pseudo_gfn = base_addr >> PAGE_SHIFT; - sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, - iterator.level - 1, 1, ACC_ALL); - - link_shadow_page(vcpu, iterator.sptep, sp); + link_shadow_page(vcpu, it.sptep, sp); + if (lpage_disallowed) + account_huge_nx_page(vcpu->kvm, sp); } } - return emulate; + + ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL, + write, level, base_gfn, pfn, prefault, + map_writable); + direct_pte_prefetch(vcpu, it.sptep); + ++vcpu->stat.pf_fixed; + return ret; } static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) @@ -3072,11 +3164,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) } static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, - gfn_t *gfnp, kvm_pfn_t *pfnp, + gfn_t gfn, kvm_pfn_t *pfnp, int *levelp) { kvm_pfn_t pfn = *pfnp; - gfn_t gfn = *gfnp; int level = *levelp; /* @@ -3086,7 +3177,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, * here. */ if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) && - level == PT_PAGE_TABLE_LEVEL && + !kvm_is_zone_device_pfn(pfn) && level == PT_PAGE_TABLE_LEVEL && PageTransCompoundMap(pfn_to_page(pfn)) && !mmu_gfn_lpage_is_disallowed(vcpu, gfn, PT_DIRECTORY_LEVEL)) { unsigned long mask; @@ -3103,8 +3194,6 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, mask = KVM_PAGES_PER_HPAGE(level) - 1; VM_BUG_ON((gfn & mask) != (pfn & mask)); if (pfn & mask) { - gfn &= ~mask; - *gfnp = gfn; kvm_release_pfn_clean(pfn); pfn &= ~mask; kvm_get_pfn(pfn); @@ -3331,11 +3420,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, { int r; int level; - bool force_pt_level = false; + bool force_pt_level; kvm_pfn_t pfn; unsigned long mmu_seq; bool map_writable, write = error_code & PFERR_WRITE_MASK; + bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && + is_nx_huge_page_enabled(); + force_pt_level = lpage_disallowed; level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { /* @@ -3361,22 +3453,20 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) return r; + r = RET_PF_RETRY; spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; if (likely(!force_pt_level)) - transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); - r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); - spin_unlock(&vcpu->kvm->mmu_lock); - - return r; - + transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); + r = __direct_map(vcpu, v, write, map_writable, level, pfn, + prefault, false); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return RET_PF_RETRY; + return r; } @@ -3922,6 +4012,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, unsigned long mmu_seq; int write = error_code & PFERR_WRITE_MASK; bool map_writable; + bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && + is_nx_huge_page_enabled(); MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); @@ -3932,8 +4024,9 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, if (r) return r; - force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn, - PT_DIRECTORY_LEVEL); + force_pt_level = + lpage_disallowed || + !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL); level = mapping_level(vcpu, gfn, &force_pt_level); if (likely(!force_pt_level)) { if (level > PT_DIRECTORY_LEVEL && @@ -3954,22 +4047,20 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) return r; + r = RET_PF_RETRY; spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; if (likely(!force_pt_level)) - transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); - r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); - spin_unlock(&vcpu->kvm->mmu_lock); - - return r; - + transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); + r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, + prefault, lpage_disallowed); out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return RET_PF_RETRY; + return r; } static void nonpaging_init_context(struct kvm_vcpu *vcpu, @@ -5253,9 +5344,9 @@ restart: * the guest, and the guest page table is using 4K page size * mapping if the indirect sp has level = 1. */ - if (sp->role.direct && - !kvm_is_reserved_pfn(pfn) && - PageTransCompoundMap(pfn_to_page(pfn))) { + if (sp->role.direct && !kvm_is_reserved_pfn(pfn) && + !kvm_is_zone_device_pfn(pfn) && + PageTransCompoundMap(pfn_to_page(pfn))) { drop_spte(kvm, sptep); need_tlb_flush = 1; goto restart; @@ -5454,7 +5545,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) int nr_to_scan = sc->nr_to_scan; unsigned long freed = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { int idx; @@ -5504,7 +5595,7 @@ unlock: break; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return freed; } @@ -5528,8 +5619,58 @@ static void mmu_destroy_caches(void) kmem_cache_destroy(mmu_page_header_cache); } +static bool get_nx_auto_mode(void) +{ + /* Return true when CPU has the bug, and mitigations are ON */ + return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off(); +} + +static void __set_nx_huge_pages(bool val) +{ + nx_huge_pages = itlb_multihit_kvm_mitigation = val; +} + +static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) +{ + bool old_val = nx_huge_pages; + bool new_val; + + /* In "auto" mode deploy workaround only if CPU has the bug. */ + if (sysfs_streq(val, "off")) + new_val = 0; + else if (sysfs_streq(val, "force")) + new_val = 1; + else if (sysfs_streq(val, "auto")) + new_val = get_nx_auto_mode(); + else if (strtobool(val, &new_val) < 0) + return -EINVAL; + + __set_nx_huge_pages(new_val); + + if (new_val != old_val) { + struct kvm *kvm; + int idx; + + mutex_lock(&kvm_lock); + + list_for_each_entry(kvm, &vm_list, vm_list) { + idx = srcu_read_lock(&kvm->srcu); + kvm_mmu_invalidate_zap_all_pages(kvm); + srcu_read_unlock(&kvm->srcu, idx); + + wake_up_process(kvm->arch.nx_lpage_recovery_thread); + } + mutex_unlock(&kvm_lock); + } + + return 0; +} + int kvm_mmu_module_init(void) { + if (nx_huge_pages == -1) + __set_nx_huge_pages(get_nx_auto_mode()); + kvm_mmu_reset_all_pte_masks(); pte_list_desc_cache = kmem_cache_create("pte_list_desc", @@ -5595,3 +5736,116 @@ void kvm_mmu_module_exit(void) unregister_shrinker(&mmu_shrinker); mmu_audit_disable(); } + +static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp) +{ + unsigned int old_val; + int err; + + old_val = nx_huge_pages_recovery_ratio; + err = param_set_uint(val, kp); + if (err) + return err; + + if (READ_ONCE(nx_huge_pages) && + !old_val && nx_huge_pages_recovery_ratio) { + struct kvm *kvm; + + mutex_lock(&kvm_lock); + + list_for_each_entry(kvm, &vm_list, vm_list) + wake_up_process(kvm->arch.nx_lpage_recovery_thread); + + mutex_unlock(&kvm_lock); + } + + return err; +} + +static void kvm_recover_nx_lpages(struct kvm *kvm) +{ + int rcu_idx; + struct kvm_mmu_page *sp; + unsigned int ratio; + LIST_HEAD(invalid_list); + ulong to_zap; + + rcu_idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + ratio = READ_ONCE(nx_huge_pages_recovery_ratio); + to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0; + while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) { + /* + * We use a separate list instead of just using active_mmu_pages + * because the number of lpage_disallowed pages is expected to + * be relatively small compared to the total. + */ + sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages, + struct kvm_mmu_page, + lpage_disallowed_link); + WARN_ON_ONCE(!sp->lpage_disallowed); + kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); + WARN_ON_ONCE(sp->lpage_disallowed); + + if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) { + kvm_mmu_commit_zap_page(kvm, &invalid_list); + if (to_zap) + cond_resched_lock(&kvm->mmu_lock); + } + } + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, rcu_idx); +} + +static long get_nx_lpage_recovery_timeout(u64 start_time) +{ + return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio) + ? start_time + 60 * HZ - get_jiffies_64() + : MAX_SCHEDULE_TIMEOUT; +} + +static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data) +{ + u64 start_time; + long remaining_time; + + while (true) { + start_time = get_jiffies_64(); + remaining_time = get_nx_lpage_recovery_timeout(start_time); + + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop() && remaining_time > 0) { + schedule_timeout(remaining_time); + remaining_time = get_nx_lpage_recovery_timeout(start_time); + set_current_state(TASK_INTERRUPTIBLE); + } + + set_current_state(TASK_RUNNING); + + if (kthread_should_stop()) + return 0; + + kvm_recover_nx_lpages(kvm); + } +} + +int kvm_mmu_post_init_vm(struct kvm *kvm) +{ + int err; + + err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0, + "kvm-nx-lpage-recovery", + &kvm->arch.nx_lpage_recovery_thread); + if (!err) + kthread_unpark(kvm->arch.nx_lpage_recovery_thread); + + return err; +} + +void kvm_mmu_pre_destroy_vm(struct kvm *kvm) +{ + if (kvm->arch.nx_lpage_recovery_thread) + kthread_stop(kvm->arch.nx_lpage_recovery_thread); +} diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index efc857615d8e..068feab64acf 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -195,4 +195,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_memory_slot *slot, gfn_t gfn); bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, struct kvm_memory_slot *slot, u64 gfn); int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); + +int kvm_mmu_post_init_vm(struct kvm *kvm); +void kvm_mmu_pre_destroy_vm(struct kvm *kvm); + #endif diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index c73bf4e4988c..918b0d5bf272 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -325,6 +325,65 @@ TRACE_EVENT( __entry->kvm_gen == __entry->spte_gen ) ); + +TRACE_EVENT( + kvm_mmu_set_spte, + TP_PROTO(int level, gfn_t gfn, u64 *sptep), + TP_ARGS(level, gfn, sptep), + + TP_STRUCT__entry( + __field(u64, gfn) + __field(u64, spte) + __field(u64, sptep) + __field(u8, level) + /* These depend on page entry type, so compute them now. */ + __field(bool, r) + __field(bool, x) + __field(u8, u) + ), + + TP_fast_assign( + __entry->gfn = gfn; + __entry->spte = *sptep; + __entry->sptep = virt_to_phys(sptep); + __entry->level = level; + __entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK); + __entry->x = is_executable_pte(__entry->spte); + __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1; + ), + + TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx", + __entry->gfn, __entry->spte, + __entry->r ? "r" : "-", + __entry->spte & PT_WRITABLE_MASK ? "w" : "-", + __entry->x ? "x" : "-", + __entry->u == -1 ? "" : (__entry->u ? "u" : "-"), + __entry->level, __entry->sptep + ) +); + +TRACE_EVENT( + kvm_mmu_spte_requested, + TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn), + TP_ARGS(addr, level, pfn), + + TP_STRUCT__entry( + __field(u64, gfn) + __field(u64, pfn) + __field(u8, level) + ), + + TP_fast_assign( + __entry->gfn = addr >> PAGE_SHIFT; + __entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); + __entry->level = level; + ), + + TP_printk("gfn %llx pfn %llx level %d", + __entry->gfn, __entry->pfn, __entry->level + ) +); + #endif /* _TRACE_KVMMMU_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c index e9ea2d45ae66..1209447d6014 100644 --- a/arch/x86/kvm/mtrr.c +++ b/arch/x86/kvm/mtrr.c @@ -202,11 +202,15 @@ static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit) break; case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000: *seg = 1; - *unit = msr - MSR_MTRRfix16K_80000; + *unit = array_index_nospec( + msr - MSR_MTRRfix16K_80000, + MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1); break; case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000: *seg = 2; - *unit = msr - MSR_MTRRfix4K_C0000; + *unit = array_index_nospec( + msr - MSR_MTRRfix4K_C0000, + MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1); break; default: return false; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6288e9d7068e..8cf7a09bdd73 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -522,6 +522,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true); + kvm_release_pfn_clean(pfn); return true; } @@ -595,12 +596,14 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker *gw, int write_fault, int hlevel, - kvm_pfn_t pfn, bool map_writable, bool prefault) + kvm_pfn_t pfn, bool map_writable, bool prefault, + bool lpage_disallowed) { struct kvm_mmu_page *sp = NULL; struct kvm_shadow_walk_iterator it; unsigned direct_access, access = gw->pt_access; int top_level, ret; + gfn_t gfn, base_gfn; direct_access = gw->pte_access; @@ -645,35 +648,48 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, link_shadow_page(vcpu, it.sptep, sp); } - for (; - shadow_walk_okay(&it) && it.level > hlevel; - shadow_walk_next(&it)) { - gfn_t direct_gfn; + /* + * FNAME(page_fault) might have clobbered the bottom bits of + * gw->gfn, restore them from the virtual address. + */ + gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT); + base_gfn = gfn; + trace_kvm_mmu_spte_requested(addr, gw->level, pfn); + + for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { clear_sp_write_flooding_count(it.sptep); + + /* + * We cannot overwrite existing page tables with an NX + * large page, as the leaf could be executable. + */ + disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel); + + base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + if (it.level == hlevel) + break; + validate_direct_spte(vcpu, it.sptep, direct_access); drop_large_spte(vcpu, it.sptep); - if (is_shadow_present_pte(*it.sptep)) - continue; - - direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); - - sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, - true, direct_access); - link_shadow_page(vcpu, it.sptep, sp); + if (!is_shadow_present_pte(*it.sptep)) { + sp = kvm_mmu_get_page(vcpu, base_gfn, addr, + it.level - 1, true, direct_access); + link_shadow_page(vcpu, it.sptep, sp); + if (lpage_disallowed) + account_huge_nx_page(vcpu->kvm, sp); + } } - clear_sp_write_flooding_count(it.sptep); ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, - it.level, gw->gfn, pfn, prefault, map_writable); + it.level, base_gfn, pfn, prefault, map_writable); FNAME(pte_prefetch)(vcpu, gw, it.sptep); - + ++vcpu->stat.pf_fixed; return ret; out_gpte_changed: - kvm_release_pfn_clean(pfn); return RET_PF_RETRY; } @@ -740,9 +756,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, int r; kvm_pfn_t pfn; int level = PT_PAGE_TABLE_LEVEL; - bool force_pt_level = false; unsigned long mmu_seq; bool map_writable, is_self_change_mapping; + bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && + is_nx_huge_page_enabled(); + bool force_pt_level = lpage_disallowed; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); @@ -821,6 +839,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, walker.pte_access &= ~ACC_EXEC_MASK; } + r = RET_PF_RETRY; spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; @@ -829,19 +848,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, if (make_mmu_pages_available(vcpu) < 0) goto out_unlock; if (!force_pt_level) - transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); + transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); r = FNAME(fetch)(vcpu, addr, &walker, write_fault, - level, pfn, map_writable, prefault); - ++vcpu->stat.pf_fixed; + level, pfn, map_writable, prefault, lpage_disallowed); kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); - spin_unlock(&vcpu->kvm->mmu_lock); - - return r; out_unlock: spin_unlock(&vcpu->kvm->mmu_lock); kvm_release_pfn_clean(pfn); - return RET_PF_RETRY; + return r; } static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index a9a62b9a73e2..c67a636b268f 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -2,6 +2,8 @@ #ifndef __KVM_X86_PMU_H #define __KVM_X86_PMU_H +#include + #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu) #define pmu_to_vcpu(pmu) (container_of((pmu), struct kvm_vcpu, arch.pmu)) #define pmc_to_pmu(pmc) (&(pmc)->vcpu->arch.pmu) @@ -81,8 +83,12 @@ static inline bool pmc_is_enabled(struct kvm_pmc *pmc) static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, u32 base) { - if (msr >= base && msr < base + pmu->nr_arch_gp_counters) - return &pmu->gp_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_gp_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_gp_counters); + + return &pmu->gp_counters[index]; + } return NULL; } @@ -92,8 +98,12 @@ static inline struct kvm_pmc *get_fixed_pmc(struct kvm_pmu *pmu, u32 msr) { int base = MSR_CORE_PERF_FIXED_CTR0; - if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) - return &pmu->fixed_counters[msr - base]; + if (msr >= base && msr < base + pmu->nr_arch_fixed_counters) { + u32 index = array_index_nospec(msr - base, + pmu->nr_arch_fixed_counters); + + return &pmu->fixed_counters[index]; + } return NULL; } diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c index 2729131fe9bf..84ae4dd261ca 100644 --- a/arch/x86/kvm/pmu_intel.c +++ b/arch/x86/kvm/pmu_intel.c @@ -87,10 +87,14 @@ static unsigned intel_find_arch_event(struct kvm_pmu *pmu, static unsigned intel_find_fixed_event(int idx) { - if (idx >= ARRAY_SIZE(fixed_pmc_events)) + u32 event; + size_t size = ARRAY_SIZE(fixed_pmc_events); + + if (idx >= size) return PERF_COUNT_HW_MAX; - return intel_arch_events[fixed_pmc_events[idx]].event_type; + event = fixed_pmc_events[array_index_nospec(idx, size)]; + return intel_arch_events[event].event_type; } /* check if a PMC is enabled by comparing it with globl_ctrl bits. */ @@ -131,15 +135,19 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); bool fixed = idx & (1u << 30); struct kvm_pmc *counters; + unsigned int num_counters; idx &= ~(3u << 30); - if (!fixed && idx >= pmu->nr_arch_gp_counters) + if (fixed) { + counters = pmu->fixed_counters; + num_counters = pmu->nr_arch_fixed_counters; + } else { + counters = pmu->gp_counters; + num_counters = pmu->nr_arch_gp_counters; + } + if (idx >= num_counters) return NULL; - if (fixed && idx >= pmu->nr_arch_fixed_counters) - return NULL; - counters = fixed ? pmu->fixed_counters : pmu->gp_counters; - - return &counters[idx]; + return &counters[array_index_nospec(idx, num_counters)]; } static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 093e7f567e69..d63621386418 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -299,7 +299,7 @@ static int vgif = true; module_param(vgif, int, 0444); static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); -static void svm_flush_tlb(struct kvm_vcpu *vcpu); +static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); static void svm_complete_interrupts(struct vcpu_svm *svm); static int nested_svm_exit_handled(struct vcpu_svm *svm); @@ -608,8 +608,14 @@ static int get_npt_level(struct kvm_vcpu *vcpu) static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) { vcpu->arch.efer = efer; - if (!npt_enabled && !(efer & EFER_LMA)) - efer &= ~EFER_LME; + + if (!npt_enabled) { + /* Shadow paging assumes NX to be available. */ + efer |= EFER_NX; + + if (!(efer & EFER_LMA)) + efer &= ~EFER_LME; + } to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); @@ -1082,6 +1088,47 @@ static int avic_ga_log_notifier(u32 ga_tag) return 0; } +/* + * The default MMIO mask is a single bit (excluding the present bit), + * which could conflict with the memory encryption bit. Check for + * memory encryption support and override the default MMIO mask if + * memory encryption is enabled. + */ +static __init void svm_adjust_mmio_mask(void) +{ + unsigned int enc_bit, mask_bit; + u64 msr, mask; + + /* If there is no memory encryption support, use existing mask */ + if (cpuid_eax(0x80000000) < 0x8000001f) + return; + + /* If memory encryption is not enabled, use existing mask */ + rdmsrl(MSR_K8_SYSCFG, msr); + if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + return; + + enc_bit = cpuid_ebx(0x8000001f) & 0x3f; + mask_bit = boot_cpu_data.x86_phys_bits; + + /* Increment the mask bit if it is the same as the encryption bit */ + if (enc_bit == mask_bit) + mask_bit++; + + /* + * If the mask bit location is below 52, then some bits above the + * physical addressing limit will always be reserved, so use the + * rsvd_bits() function to generate the mask. This mask, along with + * the present bit, will be used to generate a page fault with + * PFER.RSV = 1. + * + * If the mask bit location is 52 (or above), then clear the mask. + */ + mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; + + kvm_mmu_set_mmio_spte_mask(mask, mask); +} + static __init int svm_hardware_setup(void) { int cpu; @@ -1117,6 +1164,8 @@ static __init int svm_hardware_setup(void) kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); } + svm_adjust_mmio_mask(); + for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) @@ -2097,7 +2146,7 @@ static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) - svm_flush_tlb(vcpu); + svm_flush_tlb(vcpu, true); vcpu->arch.cr4 = cr4; if (!npt_enabled) @@ -2438,7 +2487,7 @@ static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, svm->vmcb->control.nested_cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_NPT); - svm_flush_tlb(vcpu); + svm_flush_tlb(vcpu, true); } static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, @@ -3111,7 +3160,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; svm->nested.intercept = nested_vmcb->control.intercept; - svm_flush_tlb(&svm->vcpu); + svm_flush_tlb(&svm->vcpu, true); svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) svm->vcpu.arch.hflags |= HF_VINTR_MASK; @@ -4589,7 +4638,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) set_cr_intercept(svm, INTERCEPT_CR8_WRITE); } -static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) +static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu) { return; } @@ -4625,8 +4674,11 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) +static int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) { + if (!vcpu->arch.apicv_active) + return -1; + kvm_lapic_set_irr(vec, vcpu->arch.apic); smp_mb__after_atomic(); @@ -4635,6 +4687,8 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) kvm_cpu_get_apicid(vcpu->cpu)); else kvm_vcpu_wake_up(vcpu); + + return 0; } static bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) @@ -4947,7 +5001,7 @@ static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } -static void svm_flush_tlb(struct kvm_vcpu *vcpu) +static void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5288,7 +5342,7 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) svm->vmcb->save.cr3 = __sme_set(root); mark_dirty(svm->vmcb, VMCB_CR); - svm_flush_tlb(vcpu); + svm_flush_tlb(vcpu, true); } static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) @@ -5302,7 +5356,7 @@ static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) svm->vmcb->save.cr3 = kvm_read_cr3(vcpu); mark_dirty(svm->vmcb, VMCB_CR); - svm_flush_tlb(vcpu); + svm_flush_tlb(vcpu, true); } static int is_disabled(void) @@ -5713,7 +5767,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, - .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, + .set_virtual_apic_mode = svm_set_virtual_apic_mode, .get_enable_apicv = svm_get_enable_apicv, .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, .load_eoi_exitmap = svm_load_eoi_exitmap, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7784b02312ca..f85680b86524 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -591,7 +591,8 @@ struct nested_vmx { */ bool sync_shadow_vmcs; - bool change_vmcs01_virtual_x2apic_mode; + bool change_vmcs01_virtual_apic_mode; + /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; @@ -1601,7 +1602,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) return -1; } -static inline void __invvpid(int ext, u16 vpid, gva_t gva) +static inline void __invvpid(unsigned long ext, u16 vpid, gva_t gva) { struct { u64 vpid : 16; @@ -1615,7 +1616,7 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) : : "a"(&operand), "c"(ext) : "cc", "memory"); } -static inline void __invept(int ext, u64 eptp, gpa_t gpa) +static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) { struct { u64 eptp, gpa; @@ -2258,17 +2259,9 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) u64 guest_efer = vmx->vcpu.arch.efer; u64 ignore_bits = 0; - if (!enable_ept) { - /* - * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing - * host CPUID is more efficient than testing guest CPUID - * or CR4. Host SMEP is anyway a requirement for guest SMEP. - */ - if (boot_cpu_has(X86_FEATURE_SMEP)) - guest_efer |= EFER_NX; - else if (!(guest_efer & EFER_NX)) - ignore_bits |= EFER_NX; - } + /* Shadow paging assumes NX to be available. */ + if (!enable_ept) + guest_efer |= EFER_NX; /* * LMA and LME handled by hardware; SCE meaningless outside long mode. @@ -2825,9 +2818,6 @@ static void setup_msrs(struct vcpu_vmx *vmx) index = __find_msr_index(vmx, MSR_CSTAR); if (index >= 0) move_msr_up(vmx, index, save_nmsrs++); - index = __find_msr_index(vmx, MSR_TSC_AUX); - if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) - move_msr_up(vmx, index, save_nmsrs++); /* * MSR_STAR is only needed on long mode guests, and only * if efer.sce is enabled. @@ -2840,6 +2830,9 @@ static void setup_msrs(struct vcpu_vmx *vmx) index = __find_msr_index(vmx, MSR_EFER); if (index >= 0 && update_transition_efer(vmx, index)) move_msr_up(vmx, index, save_nmsrs++); + index = __find_msr_index(vmx, MSR_TSC_AUX); + if (index >= 0 && guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) + move_msr_up(vmx, index, save_nmsrs++); vmx->save_nmsrs = save_nmsrs; @@ -4427,9 +4420,10 @@ static void exit_lmode(struct kvm_vcpu *vcpu) #endif -static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) +static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid, + bool invalidate_gpa) { - if (enable_ept) { + if (enable_ept && (invalidate_gpa || !enable_vpid)) { if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return; ept_sync_context(construct_eptp(vcpu, vcpu->arch.mmu.root_hpa)); @@ -4438,15 +4432,9 @@ static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid) } } -static void vmx_flush_tlb(struct kvm_vcpu *vcpu) +static void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) { - __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); -} - -static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu) -{ - if (enable_ept) - vmx_flush_tlb(vcpu); + __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa); } static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) @@ -4480,7 +4468,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu) (unsigned long *)&vcpu->arch.regs_dirty)) return; - if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { + if (is_pae_paging(vcpu)) { vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]); vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]); vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]); @@ -4492,7 +4480,7 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) { + if (is_pae_paging(vcpu)) { mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); @@ -4609,6 +4597,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) static int get_ept_level(struct kvm_vcpu *vcpu) { + /* Nested EPT currently only supports 4-level walks. */ + if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) + return 4; if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) return 5; return 4; @@ -4644,7 +4635,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) ept_load_pdptrs(vcpu); } - vmx_flush_tlb(vcpu); + vmx_flush_tlb(vcpu, true); vmcs_writel(GUEST_CR3, guest_cr3); } @@ -5000,6 +4991,26 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) (ss.selector & SEGMENT_RPL_MASK)); } +static bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, + unsigned int port, int size); +static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + unsigned long exit_qualification; + unsigned short port; + int size; + + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); + + exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + + port = exit_qualification >> 16; + size = (exit_qualification & 7) + 1; + + return nested_vmx_check_io_bitmaps(vcpu, port, size); +} + /* * Check if guest state is valid. Returns true if valid, false if * not. @@ -5530,24 +5541,29 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, * 2. If target vcpu isn't running(root mode), kick it to pick up the * interrupt from PIR in next vmentry. */ -static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) +static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) { struct vcpu_vmx *vmx = to_vmx(vcpu); int r; r = vmx_deliver_nested_posted_interrupt(vcpu, vector); if (!r) - return; + return 0; + + if (!vcpu->arch.apicv_active) + return -1; if (pi_test_and_set_pir(vector, &vmx->pi_desc)) - return; + return 0; /* If a previous notification has sent the IPI, nothing to do. */ if (pi_test_and_set_on(&vmx->pi_desc)) - return; + return 0; if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); + + return 0; } /* @@ -8026,8 +8042,10 @@ static int handle_vmread(struct kvm_vcpu *vcpu) /* _system ok, nested_vmx_check_permission has verified cpl=0 */ if (kvm_write_guest_virt_system(vcpu, gva, &field_value, (is_long_mode(vcpu) ? 8 : 4), - &e)) + &e)) { kvm_inject_page_fault(vcpu, &e); + return 1; + } } nested_vmx_succeed(vcpu); @@ -8314,7 +8332,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } - __vmx_flush_tlb(vcpu, vmx->nested.vpid02); + __vmx_flush_tlb(vcpu, vmx->nested.vpid02, true); nested_vmx_succeed(vcpu); return kvm_skip_emulated_instruction(vcpu); @@ -8528,23 +8546,17 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { static const int kvm_vmx_max_exit_handlers = ARRAY_SIZE(kvm_vmx_exit_handlers); -static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) +/* + * Return true if an IO instruction with the specified port and size should cause + * a VM-exit into L1. + */ +bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port, + int size) { - unsigned long exit_qualification; + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); gpa_t bitmap, last_bitmap; - unsigned int port; - int size; u8 b; - if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) - return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); - - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); - - port = exit_qualification >> 16; - size = (exit_qualification & 7) + 1; - last_bitmap = (gpa_t)-1; b = -1; @@ -9295,31 +9307,43 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) vmcs_write32(TPR_THRESHOLD, irr); } -static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) +static void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) { u32 sec_exec_control; + if (!lapic_in_kernel(vcpu)) + return; + /* Postpone execution until vmcs01 is the current VMCS. */ if (is_guest_mode(vcpu)) { - to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true; + to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true; return; } - if (!cpu_has_vmx_virtualize_x2apic_mode()) - return; - if (!cpu_need_tpr_shadow(vcpu)) return; sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); - if (set) { - sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; - } else { - sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; - sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmx_flush_tlb_ept_only(vcpu); + switch (kvm_get_apic_mode(vcpu)) { + case LAPIC_MODE_INVALID: + WARN_ONCE(true, "Invalid local APIC state"); + case LAPIC_MODE_DISABLED: + break; + case LAPIC_MODE_XAPIC: + if (flexpriority_enabled) { + sec_exec_control |= + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmx_flush_tlb(vcpu, true); + } + break; + case LAPIC_MODE_X2APIC: + if (cpu_has_vmx_virtualize_x2apic_mode()) + sec_exec_control |= + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + break; } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); @@ -9347,7 +9371,7 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) !nested_cpu_has2(get_vmcs12(&vmx->vcpu), SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { vmcs_write64(APIC_ACCESS_ADDR, hpa); - vmx_flush_tlb_ept_only(vcpu); + vmx_flush_tlb(vcpu, true); } } @@ -10906,8 +10930,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne * If PAE paging and EPT are both on, CR3 is not used by the CPU and * must not be dereferenced. */ - if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu) && - !nested_ept) { + if (is_pae_paging(vcpu) && !nested_ept) { if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) { *entry_failure_code = ENTRY_FAIL_PDPTE; return 1; @@ -11214,11 +11237,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02); if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) { vmx->nested.last_vpid = vmcs12->virtual_processor_id; - __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02); + __vmx_flush_tlb(vcpu, to_vmx(vcpu)->nested.vpid02, true); } } else { vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid); - vmx_flush_tlb(vcpu); + vmx_flush_tlb(vcpu, true); } } @@ -11242,7 +11265,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } } else if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { - vmx_flush_tlb_ept_only(vcpu); + vmx_flush_tlb(vcpu, true); } /* @@ -11921,7 +11944,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * L1's vpid. TODO: move to a more elaborate solution, giving * each L2 its own vpid and exposing the vpid feature to L1. */ - vmx_flush_tlb(vcpu); + vmx_flush_tlb(vcpu, true); } /* Restore posted intr vector. */ if (nested_cpu_has_posted_intr(vmcs12)) @@ -12190,14 +12213,13 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); - if (vmx->nested.change_vmcs01_virtual_x2apic_mode) { - vmx->nested.change_vmcs01_virtual_x2apic_mode = false; - vmx_set_virtual_x2apic_mode(vcpu, - vcpu->arch.apic_base & X2APIC_ENABLE); + if (vmx->nested.change_vmcs01_virtual_apic_mode) { + vmx->nested.change_vmcs01_virtual_apic_mode = false; + vmx_set_virtual_apic_mode(vcpu); } else if (!nested_cpu_has_ept(vmcs12) && nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { - vmx_flush_tlb_ept_only(vcpu); + vmx_flush_tlb(vcpu, true); } /* This is needed for same reason as it was needed in prepare_vmcs02 */ @@ -12318,11 +12340,86 @@ static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, to_vmx(vcpu)->nested.sync_shadow_vmcs = true; } +static int vmx_check_intercept_io(struct kvm_vcpu *vcpu, + struct x86_instruction_info *info) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + unsigned short port; + bool intercept; + int size; + + if (info->intercept == x86_intercept_in || + info->intercept == x86_intercept_ins) { + port = info->src_val; + size = info->dst_bytes; + } else { + port = info->dst_val; + size = info->src_bytes; + } + + /* + * If the 'use IO bitmaps' VM-execution control is 0, IO instruction + * VM-exits depend on the 'unconditional IO exiting' VM-execution + * control. + * + * Otherwise, IO instruction VM-exits are controlled by the IO bitmaps. + */ + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) + intercept = nested_cpu_has(vmcs12, + CPU_BASED_UNCOND_IO_EXITING); + else + intercept = nested_vmx_check_io_bitmaps(vcpu, port, size); + + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ + return intercept ? X86EMUL_UNHANDLEABLE : X86EMUL_CONTINUE; +} + static int vmx_check_intercept(struct kvm_vcpu *vcpu, struct x86_instruction_info *info, enum x86_intercept_stage stage) { - return X86EMUL_CONTINUE; + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + + switch (info->intercept) { + /* + * RDPID causes #UD if disabled through secondary execution controls. + * Because it is marked as EmulateOnUD, we need to intercept it here. + */ + case x86_intercept_rdtscp: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDTSCP)) { + ctxt->exception.vector = UD_VECTOR; + ctxt->exception.error_code_valid = false; + return X86EMUL_PROPAGATE_FAULT; + } + break; + + case x86_intercept_in: + case x86_intercept_ins: + case x86_intercept_out: + case x86_intercept_outs: + return vmx_check_intercept_io(vcpu, info); + + case x86_intercept_lgdt: + case x86_intercept_lidt: + case x86_intercept_lldt: + case x86_intercept_ltr: + case x86_intercept_sgdt: + case x86_intercept_sidt: + case x86_intercept_sldt: + case x86_intercept_str: + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC)) + return X86EMUL_CONTINUE; + + /* FIXME: produce nested vmexit and return X86EMUL_INTERCEPTED. */ + break; + + /* TODO: check more intercepts... */ + default: + break; + } + + return X86EMUL_UNHANDLEABLE; } #ifdef CONFIG_X86_64 @@ -12754,7 +12851,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, - .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, + .set_virtual_apic_mode = vmx_set_virtual_apic_mode, .set_apic_access_page_addr = vmx_set_apic_access_page_addr, .get_enable_apicv = vmx_get_enable_apicv, .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 98b990f13ae0..d6851636edab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -90,8 +90,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ +#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) @@ -191,7 +191,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, { "mmu_unsync", VM_STAT(mmu_unsync) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, - { "largepages", VM_STAT(lpages) }, + { "largepages", VM_STAT(lpages, .mode = 0444) }, + { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) }, { "max_mmu_page_hash_collisions", VM_STAT(max_mmu_page_hash_collisions) }, { NULL } @@ -275,13 +276,14 @@ int kvm_set_shared_msr(unsigned slot, u64 value, u64 mask) struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu); int err; - if (((value ^ smsr->values[slot].curr) & mask) == 0) + value = (value & mask) | (smsr->values[slot].host & ~mask); + if (value == smsr->values[slot].curr) return 0; - smsr->values[slot].curr = value; err = wrmsrl_safe(shared_msrs_global.msrs[slot], value); if (err) return 1; + smsr->values[slot].curr = value; if (!smsr->registered) { smsr->urn.on_user_return = kvm_on_user_return; user_return_notifier_register(&smsr->urn); @@ -306,23 +308,27 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_apic_base); +enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu) +{ + return kvm_apic_mode(kvm_get_apic_base(vcpu)); +} +EXPORT_SYMBOL_GPL(kvm_get_apic_mode); + int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { - u64 old_state = vcpu->arch.apic_base & - (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); - u64 new_state = msr_info->data & - (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); + enum lapic_mode old_mode = kvm_get_apic_mode(vcpu); + enum lapic_mode new_mode = kvm_apic_mode(msr_info->data); u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff | (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE); - if ((msr_info->data & reserved_bits) || new_state == X2APIC_ENABLE) - return 1; - if (!msr_info->host_initiated && - ((new_state == MSR_IA32_APICBASE_ENABLE && - old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) || - (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) && - old_state == 0))) + if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID) return 1; + if (!msr_info->host_initiated) { + if (old_mode == LAPIC_MODE_X2APIC && new_mode == LAPIC_MODE_XAPIC) + return 1; + if (old_mode == LAPIC_MODE_DISABLED && new_mode == LAPIC_MODE_X2APIC) + return 1; + } kvm_lapic_set_base(vcpu, msr_info->data); return 0; @@ -615,7 +621,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) gfn_t gfn; int r; - if (is_long_mode(vcpu) || !is_pae(vcpu)) + if (!is_pae_paging(vcpu)) return false; if (!test_bit(VCPU_EXREG_PDPTR, @@ -844,8 +850,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (is_long_mode(vcpu) && (cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63))) return 1; - else if (is_pae(vcpu) && is_paging(vcpu) && - !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) + else if (is_pae_paging(vcpu) && + !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) return 1; vcpu->arch.cr3 = cr3; @@ -918,9 +924,11 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - vcpu->arch.db[dr] = val; + vcpu->arch.db[array_index_nospec(dr, size)] = val; if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) vcpu->arch.eff_db[dr] = val; break; @@ -957,9 +965,11 @@ EXPORT_SYMBOL_GPL(kvm_set_dr); int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) { + size_t size = ARRAY_SIZE(vcpu->arch.db); + switch (dr) { case 0 ... 3: - *val = vcpu->arch.db[dr]; + *val = vcpu->arch.db[array_index_nospec(dr, size)]; break; case 4: /* fall through */ @@ -1065,6 +1075,14 @@ u64 kvm_get_arch_capabilities(void) rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data); + /* + * If nx_huge_pages is enabled, KVM's shadow paging will ensure that + * the nested hypervisor runs with NX huge pages. If it is not, + * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other + * L1 guests, so it need not worry about its own (L2) guests. + */ + data |= ARCH_CAP_PSCHANGE_MC_NO; + /* * If we're doing cache flushes (either "always" or "cond") * we will do one whenever the guest does a vmlaunch/vmresume. @@ -1077,8 +1095,40 @@ u64 kvm_get_arch_capabilities(void) if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER) data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH; + if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + data |= ARCH_CAP_RDCL_NO; + if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + data |= ARCH_CAP_SSB_NO; + if (!boot_cpu_has_bug(X86_BUG_MDS)) + data |= ARCH_CAP_MDS_NO; + + /* + * On TAA affected systems, export MDS_NO=0 when: + * - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1. + * - Updated microcode is present. This is detected by + * the presence of ARCH_CAP_TSX_CTRL_MSR and ensures + * that VERW clears CPU buffers. + * + * When MDS_NO=0 is exported, guests deploy clear CPU buffer + * mitigation and don't complain: + * + * "Vulnerable: Clear CPU buffers attempted, no microcode" + * + * If TSX is disabled on the system, guests are also mitigated against + * TAA and clear CPU buffer mitigation is not required for guests. + */ + if (!boot_cpu_has(X86_FEATURE_RTM)) + data &= ~ARCH_CAP_TAA_NO; + else if (!boot_cpu_has_bug(X86_BUG_TAA)) + data |= ARCH_CAP_TAA_NO; + else if (data & ARCH_CAP_TSX_CTRL_MSR) + data &= ~ARCH_CAP_MDS_NO; + + /* KVM does not emulate MSR_IA32_TSX_CTRL. */ + data &= ~ARCH_CAP_TSX_CTRL_MSR; return data; } + EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); static int kvm_get_msr_feature(struct kvm_msr_entry *msr) @@ -2115,7 +2165,10 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + /* only 0 or all 1s can be written to IA32_MCi_CTL * some Linux kernels though clear bit 10 in bank 4 to * workaround a BIOS/GART TBL issue on AMD K8s, ignore @@ -2499,7 +2552,10 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) default: if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MCx_CTL(bank_num)) { - u32 offset = msr - MSR_IA32_MC0_CTL; + u32 offset = array_index_nospec( + msr - MSR_IA32_MC0_CTL, + MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); + data = vcpu->arch.mce_banks[offset]; break; } @@ -6125,17 +6181,17 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { kvm_for_each_vcpu(i, vcpu, kvm) { if (vcpu->cpu != freq->cpu) continue; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); - if (vcpu->cpu != smp_processor_id()) + if (vcpu->cpu != raw_smp_processor_id()) send_ipi = 1; } } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); if (freq->old < freq->new && send_ipi) { /* @@ -6258,7 +6314,7 @@ static void kvm_set_mmio_spte_mask(void) * If reserved bit is not supported, clear the present bit to disable * mmio page fault. */ - if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52) + if (maxphyaddr == 52) mask &= ~1ull; kvm_mmu_set_mmio_spte_mask(mask, mask); @@ -6272,12 +6328,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work) struct kvm_vcpu *vcpu; int i; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) kvm_for_each_vcpu(i, vcpu, kvm) kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); atomic_set(&kvm_guest_has_master_clock, 0); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); } static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); @@ -6943,10 +6999,10 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); } -static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) +static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) { ++vcpu->stat.tlb_flush; - kvm_x86_ops->tlb_flush(vcpu); + kvm_x86_ops->tlb_flush(vcpu, invalidate_gpa); } void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, @@ -7017,7 +7073,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_MMU_SYNC, vcpu)) kvm_mmu_sync_roots(vcpu); if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) - kvm_vcpu_flush_tlb(vcpu); + kvm_vcpu_flush_tlb(vcpu, true); if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; r = 0; @@ -7621,6 +7677,9 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { + if (kvm_mpx_supported()) + kvm_load_guest_fpu(vcpu); + kvm_apic_accept_events(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED && vcpu->arch.pv.pv_unhalted) @@ -7628,6 +7687,8 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, else mp_state->mp_state = vcpu->arch.mp_state; + if (kvm_mpx_supported()) + kvm_put_guest_fpu(vcpu); return 0; } @@ -7747,7 +7808,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, kvm_update_cpuid(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); - if (!is_long_mode(vcpu) && is_pae(vcpu)) { + if (is_pae_paging(vcpu)) { load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu)); mmu_reset_needed = 1; } @@ -8007,7 +8068,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_mmu_unload(vcpu); vcpu_put(vcpu); - kvm_x86_ops->vcpu_free(vcpu); + kvm_arch_vcpu_free(vcpu); } void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -8330,6 +8391,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); + INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); atomic_set(&kvm->arch.noncoherent_dma_count, 0); @@ -8359,6 +8421,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) return 0; } +int kvm_arch_post_init_vm(struct kvm *kvm) +{ + return kvm_mmu_post_init_vm(kvm); +} + static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) { int r; @@ -8462,6 +8529,11 @@ int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) } EXPORT_SYMBOL_GPL(x86_set_memory_region); +void kvm_arch_pre_destroy_vm(struct kvm *kvm) +{ + kvm_mmu_pre_destroy_vm(kvm); +} + void kvm_arch_destroy_vm(struct kvm *kvm) { if (current->mm == kvm->mm) { diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c88305d997b0..68eb0d03e5fc 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -94,6 +94,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu) return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG)); } +static inline bool is_pae_paging(struct kvm_vcpu *vcpu) +{ + return !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu); +} + static inline u32 bit(int bitno) { return 1 << (bitno & 31); diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index e0b85930dd77..5cb9f009f2be 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -333,7 +333,7 @@ AVXcode: 1 06: CLTS 07: SYSRET (o64) 08: INVD -09: WBINVD +09: WBINVD | WBNOINVD (F3) 0a: 0b: UD2 (1B) 0c: @@ -364,7 +364,7 @@ AVXcode: 1 # a ModR/M byte. 1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev 1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv -1c: +1c: Grp20 (1A),(1C) 1d: 1e: 1f: NOP Ev @@ -792,6 +792,8 @@ f3: Grp17 (1A) f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) +f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) +f9: MOVDIRI My,Gy EndTable Table: 3-byte opcode 2 (0x0f 0x3a) @@ -907,7 +909,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev @@ -943,9 +945,9 @@ GrpTable: Grp6 EndTable GrpTable: Grp7 -0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) -1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) -2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv 5: rdpkru (110),(11B) | wrpkru (111),(11B) @@ -1020,7 +1022,7 @@ GrpTable: Grp15 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE | ptwrite Ey (F3),(11B) 5: XRSTOR | lfence (11B) -6: XSAVEOPT | clwb (66) | mfence (11B) +6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) 7: clflush | clflushopt (66) | sfence (11B) EndTable @@ -1051,6 +1053,10 @@ GrpTable: Grp19 6: vscatterpf1qps/d Wx (66),(ev) EndTable +GrpTable: Grp20 +0: cldemote Mb +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 835620ab435f..eaee1a7ed0b5 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -2077,19 +2077,13 @@ int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, .pgd = pgd, .numpages = numpages, .mask_set = __pgprot(0), - .mask_clr = __pgprot(0), + .mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)), .flags = 0, }; if (!(__supported_pte_mask & _PAGE_NX)) goto out; - if (!(page_flags & _PAGE_NX)) - cpa.mask_clr = __pgprot(_PAGE_NX); - - if (!(page_flags & _PAGE_RW)) - cpa.mask_clr = __pgprot(_PAGE_RW); - if (!(page_flags & _PAGE_ENC)) cpa.mask_clr = pgprot_encrypted(cpa.mask_clr); diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index b4fd36271f90..55338b392221 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -590,8 +590,8 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) fixmaps_set++; } -void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, - pgprot_t flags) +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, + phys_addr_t phys, pgprot_t flags) { __native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags)); } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 5400a24e1a8c..c5d7b4ae17ca 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -651,9 +651,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask, * that UV should be updated so that smp_call_function_many(), * etc, are optimal on UV. */ - unsigned int cpu; - - cpu = smp_processor_id(); cpumask = uv_flush_tlb_others(cpumask, info); if (cpumask) smp_call_function_many(cpumask, flush_tlb_func_remote, diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 4210da7b44de..33e9b4f1ce20 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -588,6 +588,17 @@ static void pci_fixup_amd_ehci_pme(struct pci_dev *dev) } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7808, pci_fixup_amd_ehci_pme); +/* + * Device [1022:7914] + * When in D0, PME# doesn't get asserted when plugging USB 2.0 device. + */ +static void pci_fixup_amd_fch_xhci_pme(struct pci_dev *dev) +{ + dev_info(&dev->dev, "PME# does not work under D0, disabling it\n"); + dev->pme_support &= ~(PCI_PM_CAP_PME_D0 >> PCI_PM_CAP_PME_SHIFT); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x7914, pci_fixup_amd_fch_xhci_pme); + /* * Apple MacBook Pro: Avoid [mem 0x7fa00000-0x7fbfffff] * diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 9061babfbc83..e7f19dec16b9 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -480,7 +480,6 @@ void __init efi_init(void) efi_char16_t *c16; char vendor[100] = "unknown"; int i = 0; - void *tmp; #ifdef CONFIG_X86_32 if (boot_params.efi_info.efi_systab_hi || @@ -505,14 +504,16 @@ void __init efi_init(void) /* * Show what we know for posterity */ - c16 = tmp = early_memremap(efi.systab->fw_vendor, 2); + c16 = early_memremap_ro(efi.systab->fw_vendor, + sizeof(vendor) * sizeof(efi_char16_t)); if (c16) { - for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i) - vendor[i] = *c16++; + for (i = 0; i < sizeof(vendor) - 1 && c16[i]; ++i) + vendor[i] = c16[i]; vendor[i] = '\0'; - } else + early_memunmap(c16, sizeof(vendor) * sizeof(efi_char16_t)); + } else { pr_err("Could not map the firmware vendor!\n"); - early_memunmap(tmp, 2); + } pr_info("EFI v%u.%.02u by %s\n", efi.systab->hdr.revision >> 16, @@ -893,9 +894,6 @@ static void __init kexec_enter_virtual_mode(void) if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) runtime_code_page_mkexec(); - - /* clean DUMMY object */ - efi_delete_dummy_variable(); #endif } @@ -932,16 +930,14 @@ static void __init __efi_enter_virtual_mode(void) if (efi_alloc_page_tables()) { pr_err("Failed to allocate EFI page tables\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } efi_merge_regions(); new_memmap = efi_map_regions(&count, &pg_shift); if (!new_memmap) { pr_err("Error reallocating memory, EFI runtime non-functional!\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } pa = __pa(new_memmap); @@ -955,8 +951,7 @@ static void __init __efi_enter_virtual_mode(void) if (efi_memmap_init_late(pa, efi.memmap.desc_size * count)) { pr_err("Failed to remap late EFI memory map\n"); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; + goto err; } if (efi_enabled(EFI_DBG)) { @@ -964,12 +959,11 @@ static void __init __efi_enter_virtual_mode(void) efi_print_memmap(); } - BUG_ON(!efi.systab); + if (WARN_ON(!efi.systab)) + goto err; - if (efi_setup_page_tables(pa, 1 << pg_shift)) { - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return; - } + if (efi_setup_page_tables(pa, 1 << pg_shift)) + goto err; efi_sync_low_kernel_mappings(); @@ -989,9 +983,9 @@ static void __init __efi_enter_virtual_mode(void) } if (status != EFI_SUCCESS) { - pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", - status); - panic("EFI call to SetVirtualAddressMap() failed!"); + pr_err("Unable to switch EFI into virtual mode (status=%lx)!\n", + status); + goto err; } /* @@ -1018,6 +1012,10 @@ static void __init __efi_enter_virtual_mode(void) /* clean DUMMY object */ efi_delete_dummy_variable(); + return; + +err: + clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); } void __init efi_enter_virtual_mode(void) diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index ae369c2bbc3e..0ebb7f94fd51 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -390,11 +390,12 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) return 0; page = alloc_page(GFP_KERNEL|__GFP_DMA32); - if (!page) - panic("Unable to allocate EFI runtime stack < 4GB\n"); + if (!page) { + pr_err("Unable to allocate EFI runtime stack < 4GB\n"); + return 1; + } - efi_scratch.phys_stack = virt_to_phys(page_address(page)); - efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ + efi_scratch.phys_stack = page_to_phys(page + 1); /* stack grows down */ npages = (_etext - _text) >> PAGE_SHIFT; text = __pa(_text); diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 5b513ccffde4..cadd7fd290fa 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -257,10 +257,6 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) return; } - /* No need to reserve regions that will never be freed. */ - if (md.attribute & EFI_MEMORY_RUNTIME) - return; - size += addr % EFI_PAGE_SIZE; size = round_up(size, EFI_PAGE_SIZE); addr = round_down(addr, EFI_PAGE_SIZE); @@ -290,6 +286,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) early_memunmap(new, new_size); efi_memmap_install(new_phys, num_entries); + e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__update_table(e820_table); } /* diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 9c80966c80ba..692a179b1ba3 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -250,9 +250,9 @@ static int get_e820_md5(struct e820_table *table, void *buf) return ret; } -static void hibernation_e820_save(void *buf) +static int hibernation_e820_save(void *buf) { - get_e820_md5(e820_table_firmware, buf); + return get_e820_md5(e820_table_firmware, buf); } static bool hibernation_e820_mismatch(void *buf) @@ -272,8 +272,9 @@ static bool hibernation_e820_mismatch(void *buf) return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false; } #else -static void hibernation_e820_save(void *buf) +static int hibernation_e820_save(void *buf) { + return 0; } static bool hibernation_e820_mismatch(void *buf) @@ -318,9 +319,7 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size) rdr->magic = RESTORE_MAGIC; - hibernation_e820_save(rdr->e820_digest); - - return 0; + return hibernation_e820_save(rdr->e820_digest); } /** diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index b02a36b2c14f..a42015b305f4 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -69,7 +69,7 @@ BEGIN { lprefix1_expr = "\\((66|!F3)\\)" lprefix2_expr = "\\(F3\\)" - lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" + lprefix3_expr = "\\((F2|!F3|66&F2)\\)" lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 @@ -257,7 +257,7 @@ function convert_operands(count,opnd, i,j,imm,mod) return add_flags(imm, mod) } -/^[0-9a-f]+\:/ { +/^[0-9a-f]+:/ { if (NR == 1) next # get index diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index a18703be9ead..4769a069d5bd 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -77,7 +77,9 @@ static efi_system_table_t __init *xen_efi_probe(void) efi.get_variable = xen_efi_get_variable; efi.get_next_variable = xen_efi_get_next_variable; efi.set_variable = xen_efi_set_variable; + efi.set_variable_nonblocking = xen_efi_set_variable; efi.query_variable_info = xen_efi_query_variable_info; + efi.query_variable_info_nonblocking = xen_efi_query_variable_info; efi.update_capsule = xen_efi_update_capsule; efi.query_capsule_caps = xen_efi_query_capsule_caps; efi.get_next_high_mono_count = xen_efi_get_next_high_mono_count; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 515d5e4414c2..00fc683a2011 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -259,19 +259,41 @@ void xen_reboot(int reason) BUG(); } +static int reboot_reason = SHUTDOWN_reboot; +static bool xen_legacy_crash; void xen_emergency_restart(void) { - xen_reboot(SHUTDOWN_reboot); + xen_reboot(reboot_reason); } static int xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr) { - if (!kexec_crash_loaded()) - xen_reboot(SHUTDOWN_crash); + if (!kexec_crash_loaded()) { + if (xen_legacy_crash) + xen_reboot(SHUTDOWN_crash); + + reboot_reason = SHUTDOWN_crash; + + /* + * If panic_timeout==0 then we are supposed to wait forever. + * However, to preserve original dom0 behavior we have to drop + * into hypervisor. (domU behavior is controlled by its + * config file) + */ + if (panic_timeout == 0) + panic_timeout = -1; + } return NOTIFY_DONE; } +static int __init parse_xen_legacy_crash(char *arg) +{ + xen_legacy_crash = true; + return 0; +} +early_param("xen_legacy_crash", parse_xen_legacy_crash); + static struct notifier_block xen_panic_block = { .notifier_call = xen_panic_event, .priority = INT_MIN diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index f79a0cdc6b4e..1f8175bf2a5e 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -909,14 +909,15 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err) static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) { int ret; +#ifdef CONFIG_X86_64 + unsigned int which; + u64 base; +#endif ret = 0; switch (msr) { #ifdef CONFIG_X86_64 - unsigned which; - u64 base; - case MSR_FS_BASE: which = SEGBASE_FS; goto set; case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 3a6feed76dfc..a93d8a7cef26 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -12,6 +12,7 @@ #include #include #include +#include #include @@ -24,6 +25,7 @@ ENTRY(xen_\name) pop %r11 jmp \name END(xen_\name) +_ASM_NOKPROBE(xen_\name) .endm xen_pv_trap divide_error diff --git a/arch/xtensa/kernel/xtensa_ksyms.c b/arch/xtensa/kernel/xtensa_ksyms.c index 672391003e40..dc7b470a423a 100644 --- a/arch/xtensa/kernel/xtensa_ksyms.c +++ b/arch/xtensa/kernel/xtensa_ksyms.c @@ -114,13 +114,6 @@ EXPORT_SYMBOL(__invalidate_icache_range); // FIXME EXPORT_SYMBOL(screen_info); #endif -EXPORT_SYMBOL(outsb); -EXPORT_SYMBOL(outsw); -EXPORT_SYMBOL(outsl); -EXPORT_SYMBOL(insb); -EXPORT_SYMBOL(insw); -EXPORT_SYMBOL(insl); - extern long common_exception_return; EXPORT_SYMBOL(common_exception_return); diff --git a/arch/xtensa/mm/tlb.c b/arch/xtensa/mm/tlb.c index 35c822286bbe..3ce5ccdb054d 100644 --- a/arch/xtensa/mm/tlb.c +++ b/arch/xtensa/mm/tlb.c @@ -218,6 +218,8 @@ static int check_tlb_entry(unsigned w, unsigned e, bool dtlb) unsigned tlbidx = w | (e << PAGE_SHIFT); unsigned r0 = dtlb ? read_dtlb_virtual(tlbidx) : read_itlb_virtual(tlbidx); + unsigned r1 = dtlb ? + read_dtlb_translation(tlbidx) : read_itlb_translation(tlbidx); unsigned vpn = (r0 & PAGE_MASK) | (e << PAGE_SHIFT); unsigned pte = get_pte_for_vaddr(vpn); unsigned mm_asid = (get_rasid_register() >> 8) & ASID_MASK; @@ -233,8 +235,6 @@ static int check_tlb_entry(unsigned w, unsigned e, bool dtlb) } if (tlb_asid == mm_asid) { - unsigned r1 = dtlb ? read_dtlb_translation(tlbidx) : - read_itlb_translation(tlbidx); if ((pte ^ r1) & PAGE_MASK) { pr_err("%cTLB: way: %u, entry: %u, mapping: %08x->%08x, PTE: %08x\n", dtlb ? 'D' : 'I', w, e, r0, r1, pte); diff --git a/block/Kconfig b/block/Kconfig index 28ec55752b68..4d9bcb951d83 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -184,6 +184,23 @@ config BLK_SED_OPAL Enabling this option enables users to setup/unlock/lock Locking ranges for SED devices using the Opal protocol. +config BLK_INLINE_ENCRYPTION + bool "Enable inline encryption support in block layer" + help + Build the blk-crypto subsystem. Enabling this lets the + block layer handle encryption, so users can take + advantage of inline encryption hardware if present. + +config BLK_INLINE_ENCRYPTION_FALLBACK + bool "Enable crypto API fallback for blk-crypto" + depends on BLK_INLINE_ENCRYPTION + select CRYPTO + select CRYPTO_BLKCIPHER + help + Enabling this lets the block layer handle inline encryption + by falling back to the kernel crypto API when inline + encryption hardware is not present. + menu "Partition Types" source "block/partitions/Kconfig" diff --git a/block/Makefile b/block/Makefile index a86f978bc879..7ae32c556de6 100644 --- a/block/Makefile +++ b/block/Makefile @@ -36,3 +36,6 @@ obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o +obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += keyslot-manager.o bio-crypt-ctx.o \ + blk-crypto.o +obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o \ No newline at end of file diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index e65b0da1007b..93863c6173e6 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -3314,7 +3314,12 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) * whether bfqq is being weight-raised, because * bfq_symmetric_scenario() does not take into account also * weight-raised queues (see comments on - * bfq_weights_tree_add()). + * bfq_weights_tree_add()). In particular, if bfqq is being + * weight-raised, it is important to idle only if there are + * other, non-weight-raised queues that may steal throughput + * to bfqq. Actually, we should be even more precise, and + * differentiate between interactive weight raising and + * soft real-time weight raising. * * As a side note, it is worth considering that the above * device-idling countermeasures may however fail in the @@ -3326,7 +3331,8 @@ static bool bfq_bfqq_may_idle(struct bfq_queue *bfqq) * to let requests be served in the desired order until all * the requests already queued in the device have been served. */ - asymmetric_scenario = bfqq->wr_coeff > 1 || + asymmetric_scenario = (bfqq->wr_coeff > 1 && + bfqd->wr_busy_queues < bfqd->busy_queues) || !bfq_symmetric_scenario(bfqd); /* diff --git a/block/bio-crypt-ctx.c b/block/bio-crypt-ctx.c new file mode 100644 index 000000000000..75008b2afea2 --- /dev/null +++ b/block/bio-crypt-ctx.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ + +#include +#include +#include +#include +#include + +#include "blk-crypto-internal.h" + +static int num_prealloc_crypt_ctxs = 128; + +module_param(num_prealloc_crypt_ctxs, int, 0444); +MODULE_PARM_DESC(num_prealloc_crypt_ctxs, + "Number of bio crypto contexts to preallocate"); + +static struct kmem_cache *bio_crypt_ctx_cache; +static mempool_t *bio_crypt_ctx_pool; + +int __init bio_crypt_ctx_init(void) +{ + size_t i; + + bio_crypt_ctx_cache = KMEM_CACHE(bio_crypt_ctx, 0); + if (!bio_crypt_ctx_cache) + return -ENOMEM; + + bio_crypt_ctx_pool = mempool_create_slab_pool(num_prealloc_crypt_ctxs, + bio_crypt_ctx_cache); + if (!bio_crypt_ctx_pool) + return -ENOMEM; + + /* This is assumed in various places. */ + BUILD_BUG_ON(BLK_ENCRYPTION_MODE_INVALID != 0); + + /* Sanity check that no algorithm exceeds the defined limits. */ + for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++) { + BUG_ON(blk_crypto_modes[i].keysize > BLK_CRYPTO_MAX_KEY_SIZE); + BUG_ON(blk_crypto_modes[i].ivsize > BLK_CRYPTO_MAX_IV_SIZE); + } + + return 0; +} + +struct bio_crypt_ctx *bio_crypt_alloc_ctx(gfp_t gfp_mask) +{ + return mempool_alloc(bio_crypt_ctx_pool, gfp_mask); +} +EXPORT_SYMBOL_GPL(bio_crypt_alloc_ctx); + +void bio_crypt_free_ctx(struct bio *bio) +{ + mempool_free(bio->bi_crypt_context, bio_crypt_ctx_pool); + bio->bi_crypt_context = NULL; +} + +void bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask) +{ + const struct bio_crypt_ctx *src_bc = src->bi_crypt_context; + + bio_clone_skip_dm_default_key(dst, src); + + /* + * If a bio is fallback_crypted, then it will be decrypted when + * bio_endio is called. As we only want the data to be decrypted once, + * copies of the bio must not have have a crypt context. + */ + if (!src_bc || bio_crypt_fallback_crypted(src_bc)) + return; + + dst->bi_crypt_context = bio_crypt_alloc_ctx(gfp_mask); + *dst->bi_crypt_context = *src_bc; + + if (src_bc->bc_keyslot >= 0) + keyslot_manager_get_slot(src_bc->bc_ksm, src_bc->bc_keyslot); +} +EXPORT_SYMBOL_GPL(bio_crypt_clone); + +bool bio_crypt_should_process(struct request *rq) +{ + struct bio *bio = rq->bio; + + if (!bio || !bio->bi_crypt_context) + return false; + + return rq->q->ksm == bio->bi_crypt_context->bc_ksm; +} +EXPORT_SYMBOL_GPL(bio_crypt_should_process); + +/* + * Checks that two bio crypt contexts are compatible - i.e. that + * they are mergeable except for data_unit_num continuity. + */ +bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2) +{ + struct bio_crypt_ctx *bc1 = b_1->bi_crypt_context; + struct bio_crypt_ctx *bc2 = b_2->bi_crypt_context; + + if (!bc1) + return !bc2; + return bc2 && bc1->bc_key == bc2->bc_key; +} + +/* + * Checks that two bio crypt contexts are compatible, and also + * that their data_unit_nums are continuous (and can hence be merged) + * in the order b_1 followed by b_2. + */ +bool bio_crypt_ctx_mergeable(struct bio *b_1, unsigned int b1_bytes, + struct bio *b_2) +{ + struct bio_crypt_ctx *bc1 = b_1->bi_crypt_context; + struct bio_crypt_ctx *bc2 = b_2->bi_crypt_context; + + if (!bio_crypt_ctx_compatible(b_1, b_2)) + return false; + + return !bc1 || bio_crypt_dun_is_contiguous(bc1, b1_bytes, bc2->bc_dun); +} + +void bio_crypt_ctx_release_keyslot(struct bio_crypt_ctx *bc) +{ + keyslot_manager_put_slot(bc->bc_ksm, bc->bc_keyslot); + bc->bc_ksm = NULL; + bc->bc_keyslot = -1; +} + +int bio_crypt_ctx_acquire_keyslot(struct bio_crypt_ctx *bc, + struct keyslot_manager *ksm) +{ + int slot = keyslot_manager_get_slot_for_key(ksm, bc->bc_key); + + if (slot < 0) + return slot; + + bc->bc_keyslot = slot; + bc->bc_ksm = ksm; + return 0; +} diff --git a/block/bio.c b/block/bio.c index ba3ef9f6102d..6ba47156b7dd 100644 --- a/block/bio.c +++ b/block/bio.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "blk.h" @@ -243,6 +244,8 @@ fallback: void bio_uninit(struct bio *bio) { bio_disassociate_task(bio); + + bio_crypt_free_ctx(bio); } EXPORT_SYMBOL(bio_uninit); @@ -641,15 +644,12 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) __bio_clone_fast(b, bio); - if (bio_integrity(bio)) { - int ret; + bio_crypt_clone(b, bio, gfp_mask); - ret = bio_integrity_clone(b, bio, gfp_mask); - - if (ret < 0) { - bio_put(b); - return NULL; - } + if (bio_integrity(bio) && + bio_integrity_clone(b, bio, gfp_mask) < 0) { + bio_put(b); + return NULL; } return b; @@ -717,6 +717,8 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, break; } + bio_crypt_clone(bio, bio_src, gfp_mask); + if (bio_integrity(bio_src)) { int ret; @@ -1052,6 +1054,7 @@ void bio_advance(struct bio *bio, unsigned bytes) if (bio_integrity(bio)) bio_integrity_advance(bio, bytes); + bio_crypt_advance(bio, bytes); bio_advance_iter(bio, &bio->bi_iter, bytes); } EXPORT_SYMBOL(bio_advance); @@ -1909,6 +1912,10 @@ void bio_endio(struct bio *bio) again: if (!bio_remaining_done(bio)) return; + + if (!blk_crypto_endio(bio)) + return; + if (!bio_integrity_endio(bio)) return; diff --git a/block/blk-core.c b/block/blk-core.c index bde67e7a3e29..131e57fcea6c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -36,6 +36,7 @@ #include #include /* MTK PATCH */ #include +#include #define CREATE_TRACE_POINTS #include @@ -2226,7 +2227,9 @@ blk_qc_t generic_make_request(struct bio *bio) /* Create a fresh bio_list for all subordinate requests */ bio_list_on_stack[1] = bio_list_on_stack[0]; bio_list_init(&bio_list_on_stack[0]); - ret = q->make_request_fn(q, bio); + + if (!blk_crypto_submit_bio(&bio)) + ret = q->make_request_fn(q, bio); blk_queue_exit(q); @@ -3671,5 +3674,11 @@ int __init blk_dev_init(void) blk_debugfs_root = debugfs_create_dir("block", NULL); #endif + if (bio_crypt_ctx_init() < 0) + panic("Failed to allocate mem for bio crypt ctxs\n"); + + if (blk_crypto_fallback_init() < 0) + panic("Failed to init blk-crypto-fallback\n"); + return 0; } diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c new file mode 100644 index 000000000000..195b04b5df0d --- /dev/null +++ b/block/blk-crypto-fallback.c @@ -0,0 +1,656 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ + +/* + * Refer to Documentation/block/inline-encryption.rst for detailed explanation. + */ + +#define pr_fmt(fmt) "blk-crypto-fallback: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "blk-crypto-internal.h" + +static unsigned int num_prealloc_bounce_pg = 32; +module_param(num_prealloc_bounce_pg, uint, 0); +MODULE_PARM_DESC(num_prealloc_bounce_pg, + "Number of preallocated bounce pages for the blk-crypto crypto API fallback"); + +static unsigned int blk_crypto_num_keyslots = 100; +module_param_named(num_keyslots, blk_crypto_num_keyslots, uint, 0); +MODULE_PARM_DESC(num_keyslots, + "Number of keyslots for the blk-crypto crypto API fallback"); + +static unsigned int num_prealloc_fallback_crypt_ctxs = 128; +module_param(num_prealloc_fallback_crypt_ctxs, uint, 0); +MODULE_PARM_DESC(num_prealloc_crypt_fallback_ctxs, + "Number of preallocated bio fallback crypto contexts for blk-crypto to use during crypto API fallback"); + +struct bio_fallback_crypt_ctx { + struct bio_crypt_ctx crypt_ctx; + /* + * Copy of the bvec_iter when this bio was submitted. + * We only want to en/decrypt the part of the bio as described by the + * bvec_iter upon submission because bio might be split before being + * resubmitted + */ + struct bvec_iter crypt_iter; + u64 fallback_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; +}; + +/* The following few vars are only used during the crypto API fallback */ +static struct kmem_cache *bio_fallback_crypt_ctx_cache; +static mempool_t *bio_fallback_crypt_ctx_pool; + +/* + * Allocating a crypto tfm during I/O can deadlock, so we have to preallocate + * all of a mode's tfms when that mode starts being used. Since each mode may + * need all the keyslots at some point, each mode needs its own tfm for each + * keyslot; thus, a keyslot may contain tfms for multiple modes. However, to + * match the behavior of real inline encryption hardware (which only supports a + * single encryption context per keyslot), we only allow one tfm per keyslot to + * be used at a time - the rest of the unused tfms have their keys cleared. + */ +static DEFINE_MUTEX(tfms_init_lock); +static bool tfms_inited[BLK_ENCRYPTION_MODE_MAX]; + +struct blk_crypto_decrypt_work { + struct work_struct work; + struct bio *bio; +}; + +static struct blk_crypto_keyslot { + struct crypto_skcipher *tfm; + enum blk_crypto_mode_num crypto_mode; + struct crypto_skcipher *tfms[BLK_ENCRYPTION_MODE_MAX]; +} *blk_crypto_keyslots; + +/* The following few vars are only used during the crypto API fallback */ +static struct keyslot_manager *blk_crypto_ksm; +static struct workqueue_struct *blk_crypto_wq; +static mempool_t *blk_crypto_bounce_page_pool; +static struct kmem_cache *blk_crypto_decrypt_work_cache; + +bool bio_crypt_fallback_crypted(const struct bio_crypt_ctx *bc) +{ + return bc && bc->bc_ksm == blk_crypto_ksm; +} + +/* + * This is the key we set when evicting a keyslot. This *should* be the all 0's + * key, but AES-XTS rejects that key, so we use some random bytes instead. + */ +static u8 blank_key[BLK_CRYPTO_MAX_KEY_SIZE]; + +static void blk_crypto_evict_keyslot(unsigned int slot) +{ + struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot]; + enum blk_crypto_mode_num crypto_mode = slotp->crypto_mode; + int err; + + WARN_ON(slotp->crypto_mode == BLK_ENCRYPTION_MODE_INVALID); + + /* Clear the key in the skcipher */ + err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], blank_key, + blk_crypto_modes[crypto_mode].keysize); + WARN_ON(err); + slotp->crypto_mode = BLK_ENCRYPTION_MODE_INVALID; +} + +static int blk_crypto_keyslot_program(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, + unsigned int slot) +{ + struct blk_crypto_keyslot *slotp = &blk_crypto_keyslots[slot]; + const enum blk_crypto_mode_num crypto_mode = key->crypto_mode; + int err; + + if (crypto_mode != slotp->crypto_mode && + slotp->crypto_mode != BLK_ENCRYPTION_MODE_INVALID) { + blk_crypto_evict_keyslot(slot); + } + + if (!slotp->tfms[crypto_mode]) + return -ENOMEM; + slotp->crypto_mode = crypto_mode; + err = crypto_skcipher_setkey(slotp->tfms[crypto_mode], key->raw, + key->size); + if (err) { + blk_crypto_evict_keyslot(slot); + return err; + } + return 0; +} + +static int blk_crypto_keyslot_evict(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, + unsigned int slot) +{ + blk_crypto_evict_keyslot(slot); + return 0; +} + +/* + * The crypto API fallback KSM ops - only used for a bio when it specifies a + * blk_crypto_mode for which we failed to get a keyslot in the device's inline + * encryption hardware (which probably means the device doesn't have inline + * encryption hardware that supports that crypto mode). + */ +static const struct keyslot_mgmt_ll_ops blk_crypto_ksm_ll_ops = { + .keyslot_program = blk_crypto_keyslot_program, + .keyslot_evict = blk_crypto_keyslot_evict, +}; + +static void blk_crypto_encrypt_endio(struct bio *enc_bio) +{ + struct bio *src_bio = enc_bio->bi_private; + int i; + + for (i = 0; i < enc_bio->bi_vcnt; i++) + mempool_free(enc_bio->bi_io_vec[i].bv_page, + blk_crypto_bounce_page_pool); + + src_bio->bi_status = enc_bio->bi_status; + + bio_put(enc_bio); + bio_endio(src_bio); +} + +static struct bio *blk_crypto_clone_bio(struct bio *bio_src) +{ + struct bvec_iter iter; + struct bio_vec bv; + struct bio *bio; + + bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src), NULL); + if (!bio) + return NULL; + bio->bi_disk = bio_src->bi_disk; + bio->bi_opf = bio_src->bi_opf; + bio->bi_ioprio = bio_src->bi_ioprio; + bio->bi_write_hint = bio_src->bi_write_hint; + bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; + bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; + + bio_for_each_segment(bv, bio_src, iter) + bio->bi_io_vec[bio->bi_vcnt++] = bv; + + if (bio_integrity(bio_src) && + bio_integrity_clone(bio, bio_src, GFP_NOIO) < 0) { + bio_put(bio); + return NULL; + } + + bio_clone_blkcg_association(bio, bio_src); + + bio_clone_skip_dm_default_key(bio, bio_src); + + return bio; +} + +static int blk_crypto_alloc_cipher_req(struct bio *src_bio, + struct skcipher_request **ciph_req_ret, + struct crypto_wait *wait) +{ + struct skcipher_request *ciph_req; + const struct blk_crypto_keyslot *slotp; + + slotp = &blk_crypto_keyslots[src_bio->bi_crypt_context->bc_keyslot]; + ciph_req = skcipher_request_alloc(slotp->tfms[slotp->crypto_mode], + GFP_NOIO); + if (!ciph_req) { + src_bio->bi_status = BLK_STS_RESOURCE; + return -ENOMEM; + } + + skcipher_request_set_callback(ciph_req, + CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP, + crypto_req_done, wait); + *ciph_req_ret = ciph_req; + return 0; +} + +static int blk_crypto_split_bio_if_needed(struct bio **bio_ptr) +{ + struct bio *bio = *bio_ptr; + unsigned int i = 0; + unsigned int num_sectors = 0; + struct bio_vec bv; + struct bvec_iter iter; + + bio_for_each_segment(bv, bio, iter) { + num_sectors += bv.bv_len >> SECTOR_SHIFT; + if (++i == BIO_MAX_PAGES) + break; + } + if (num_sectors < bio_sectors(bio)) { + struct bio *split_bio; + + split_bio = bio_split(bio, num_sectors, GFP_NOIO, NULL); + if (!split_bio) { + bio->bi_status = BLK_STS_RESOURCE; + return -ENOMEM; + } + bio_chain(split_bio, bio); + generic_make_request(bio); + *bio_ptr = split_bio; + } + return 0; +} + +union blk_crypto_iv { + __le64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; + u8 bytes[BLK_CRYPTO_MAX_IV_SIZE]; +}; + +static void blk_crypto_dun_to_iv(const u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], + union blk_crypto_iv *iv) +{ + int i; + + for (i = 0; i < BLK_CRYPTO_DUN_ARRAY_SIZE; i++) + iv->dun[i] = cpu_to_le64(dun[i]); +} + +/* + * The crypto API fallback's encryption routine. + * Allocate a bounce bio for encryption, encrypt the input bio using crypto API, + * and replace *bio_ptr with the bounce bio. May split input bio if it's too + * large. + */ +static int blk_crypto_encrypt_bio(struct bio **bio_ptr) +{ + struct bio *src_bio; + struct skcipher_request *ciph_req = NULL; + DECLARE_CRYPTO_WAIT(wait); + u64 curr_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; + union blk_crypto_iv iv; + struct scatterlist src, dst; + struct bio *enc_bio; + unsigned int i, j; + int data_unit_size; + struct bio_crypt_ctx *bc; + int err = 0; + + /* Split the bio if it's too big for single page bvec */ + err = blk_crypto_split_bio_if_needed(bio_ptr); + if (err) + return err; + + src_bio = *bio_ptr; + bc = src_bio->bi_crypt_context; + data_unit_size = bc->bc_key->data_unit_size; + + /* Allocate bounce bio for encryption */ + enc_bio = blk_crypto_clone_bio(src_bio); + if (!enc_bio) { + src_bio->bi_status = BLK_STS_RESOURCE; + return -ENOMEM; + } + + /* + * Use the crypto API fallback keyslot manager to get a crypto_skcipher + * for the algorithm and key specified for this bio. + */ + err = bio_crypt_ctx_acquire_keyslot(bc, blk_crypto_ksm); + if (err) { + src_bio->bi_status = BLK_STS_IOERR; + goto out_put_enc_bio; + } + + /* and then allocate an skcipher_request for it */ + err = blk_crypto_alloc_cipher_req(src_bio, &ciph_req, &wait); + if (err) + goto out_release_keyslot; + + memcpy(curr_dun, bc->bc_dun, sizeof(curr_dun)); + sg_init_table(&src, 1); + sg_init_table(&dst, 1); + + skcipher_request_set_crypt(ciph_req, &src, &dst, data_unit_size, + iv.bytes); + + /* Encrypt each page in the bounce bio */ + for (i = 0; i < enc_bio->bi_vcnt; i++) { + struct bio_vec *enc_bvec = &enc_bio->bi_io_vec[i]; + struct page *plaintext_page = enc_bvec->bv_page; + struct page *ciphertext_page = + mempool_alloc(blk_crypto_bounce_page_pool, GFP_NOIO); + + enc_bvec->bv_page = ciphertext_page; + + if (!ciphertext_page) { + src_bio->bi_status = BLK_STS_RESOURCE; + err = -ENOMEM; + goto out_free_bounce_pages; + } + + sg_set_page(&src, plaintext_page, data_unit_size, + enc_bvec->bv_offset); + sg_set_page(&dst, ciphertext_page, data_unit_size, + enc_bvec->bv_offset); + + /* Encrypt each data unit in this page */ + for (j = 0; j < enc_bvec->bv_len; j += data_unit_size) { + blk_crypto_dun_to_iv(curr_dun, &iv); + err = crypto_wait_req(crypto_skcipher_encrypt(ciph_req), + &wait); + if (err) { + i++; + src_bio->bi_status = BLK_STS_RESOURCE; + goto out_free_bounce_pages; + } + bio_crypt_dun_increment(curr_dun, 1); + src.offset += data_unit_size; + dst.offset += data_unit_size; + } + } + + enc_bio->bi_private = src_bio; + enc_bio->bi_end_io = blk_crypto_encrypt_endio; + *bio_ptr = enc_bio; + + enc_bio = NULL; + err = 0; + goto out_free_ciph_req; + +out_free_bounce_pages: + while (i > 0) + mempool_free(enc_bio->bi_io_vec[--i].bv_page, + blk_crypto_bounce_page_pool); +out_free_ciph_req: + skcipher_request_free(ciph_req); +out_release_keyslot: + bio_crypt_ctx_release_keyslot(bc); +out_put_enc_bio: + if (enc_bio) + bio_put(enc_bio); + + return err; +} + +static void blk_crypto_free_fallback_crypt_ctx(struct bio *bio) +{ + mempool_free(container_of(bio->bi_crypt_context, + struct bio_fallback_crypt_ctx, + crypt_ctx), + bio_fallback_crypt_ctx_pool); + bio->bi_crypt_context = NULL; +} + +/* + * The crypto API fallback's main decryption routine. + * Decrypts input bio in place. + */ +static void blk_crypto_decrypt_bio(struct work_struct *work) +{ + struct blk_crypto_decrypt_work *decrypt_work = + container_of(work, struct blk_crypto_decrypt_work, work); + struct bio *bio = decrypt_work->bio; + struct skcipher_request *ciph_req = NULL; + DECLARE_CRYPTO_WAIT(wait); + struct bio_vec bv; + struct bvec_iter iter; + u64 curr_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; + union blk_crypto_iv iv; + struct scatterlist sg; + struct bio_crypt_ctx *bc = bio->bi_crypt_context; + struct bio_fallback_crypt_ctx *f_ctx = + container_of(bc, struct bio_fallback_crypt_ctx, crypt_ctx); + const int data_unit_size = bc->bc_key->data_unit_size; + unsigned int i; + int err; + + /* + * Use the crypto API fallback keyslot manager to get a crypto_skcipher + * for the algorithm and key specified for this bio. + */ + if (bio_crypt_ctx_acquire_keyslot(bc, blk_crypto_ksm)) { + bio->bi_status = BLK_STS_RESOURCE; + goto out_no_keyslot; + } + + /* and then allocate an skcipher_request for it */ + err = blk_crypto_alloc_cipher_req(bio, &ciph_req, &wait); + if (err) + goto out; + + memcpy(curr_dun, f_ctx->fallback_dun, sizeof(curr_dun)); + sg_init_table(&sg, 1); + skcipher_request_set_crypt(ciph_req, &sg, &sg, data_unit_size, + iv.bytes); + + /* Decrypt each segment in the bio */ + __bio_for_each_segment(bv, bio, iter, f_ctx->crypt_iter) { + struct page *page = bv.bv_page; + + sg_set_page(&sg, page, data_unit_size, bv.bv_offset); + + /* Decrypt each data unit in the segment */ + for (i = 0; i < bv.bv_len; i += data_unit_size) { + blk_crypto_dun_to_iv(curr_dun, &iv); + if (crypto_wait_req(crypto_skcipher_decrypt(ciph_req), + &wait)) { + bio->bi_status = BLK_STS_IOERR; + goto out; + } + bio_crypt_dun_increment(curr_dun, 1); + sg.offset += data_unit_size; + } + } + +out: + skcipher_request_free(ciph_req); + bio_crypt_ctx_release_keyslot(bc); +out_no_keyslot: + kmem_cache_free(blk_crypto_decrypt_work_cache, decrypt_work); + blk_crypto_free_fallback_crypt_ctx(bio); + bio_endio(bio); +} + +/* + * Queue bio for decryption. + * Returns true iff bio was queued for decryption. + */ +bool blk_crypto_queue_decrypt_bio(struct bio *bio) +{ + struct blk_crypto_decrypt_work *decrypt_work; + + /* If there was an IO error, don't queue for decrypt. */ + if (bio->bi_status) + goto out; + + decrypt_work = kmem_cache_zalloc(blk_crypto_decrypt_work_cache, + GFP_ATOMIC); + if (!decrypt_work) { + bio->bi_status = BLK_STS_RESOURCE; + goto out; + } + + INIT_WORK(&decrypt_work->work, blk_crypto_decrypt_bio); + decrypt_work->bio = bio; + queue_work(blk_crypto_wq, &decrypt_work->work); + + return true; +out: + blk_crypto_free_fallback_crypt_ctx(bio); + return false; +} + +/** + * blk_crypto_start_using_mode() - Start using a crypto algorithm on a device + * @mode_num: the blk_crypto_mode we want to allocate ciphers for. + * @data_unit_size: the data unit size that will be used + * @q: the request queue for the device + * + * Upper layers must call this function to ensure that a the crypto API fallback + * has transforms for this algorithm, if they become necessary. + * + * Return: 0 on success and -err on error. + */ +int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, + unsigned int data_unit_size, + struct request_queue *q) +{ + struct blk_crypto_keyslot *slotp; + unsigned int i; + int err = 0; + + /* + * Fast path + * Ensure that updates to blk_crypto_keyslots[i].tfms[mode_num] + * for each i are visible before we try to access them. + */ + if (likely(smp_load_acquire(&tfms_inited[mode_num]))) + return 0; + + /* + * If the keyslot manager of the request queue supports this + * crypto mode, then we don't need to allocate this mode. + */ + if (keyslot_manager_crypto_mode_supported(q->ksm, mode_num, + data_unit_size)) + return 0; + + mutex_lock(&tfms_init_lock); + if (likely(tfms_inited[mode_num])) + goto out; + + for (i = 0; i < blk_crypto_num_keyslots; i++) { + slotp = &blk_crypto_keyslots[i]; + slotp->tfms[mode_num] = crypto_alloc_skcipher( + blk_crypto_modes[mode_num].cipher_str, + 0, 0); + if (IS_ERR(slotp->tfms[mode_num])) { + err = PTR_ERR(slotp->tfms[mode_num]); + slotp->tfms[mode_num] = NULL; + goto out_free_tfms; + } + + crypto_skcipher_set_flags(slotp->tfms[mode_num], + CRYPTO_TFM_REQ_WEAK_KEY); + } + + /* + * Ensure that updates to blk_crypto_keyslots[i].tfms[mode_num] + * for each i are visible before we set tfms_inited[mode_num]. + */ + smp_store_release(&tfms_inited[mode_num], true); + goto out; + +out_free_tfms: + for (i = 0; i < blk_crypto_num_keyslots; i++) { + slotp = &blk_crypto_keyslots[i]; + crypto_free_skcipher(slotp->tfms[mode_num]); + slotp->tfms[mode_num] = NULL; + } +out: + mutex_unlock(&tfms_init_lock); + return err; +} +EXPORT_SYMBOL_GPL(blk_crypto_start_using_mode); + +int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key) +{ + return keyslot_manager_evict_key(blk_crypto_ksm, key); +} + +int blk_crypto_fallback_submit_bio(struct bio **bio_ptr) +{ + struct bio *bio = *bio_ptr; + struct bio_crypt_ctx *bc = bio->bi_crypt_context; + struct bio_fallback_crypt_ctx *f_ctx; + + if (bc->bc_key->is_hw_wrapped) { + pr_warn_once("HW wrapped key cannot be used with fallback.\n"); + bio->bi_status = BLK_STS_NOTSUPP; + return -EOPNOTSUPP; + } + + if (!tfms_inited[bc->bc_key->crypto_mode]) { + bio->bi_status = BLK_STS_IOERR; + return -EIO; + } + + if (bio_data_dir(bio) == WRITE) + return blk_crypto_encrypt_bio(bio_ptr); + + /* + * Mark bio as fallback crypted and replace the bio_crypt_ctx with + * another one contained in a bio_fallback_crypt_ctx, so that the + * fallback has space to store the info it needs for decryption. + */ + bc->bc_ksm = blk_crypto_ksm; + f_ctx = mempool_alloc(bio_fallback_crypt_ctx_pool, GFP_NOIO); + f_ctx->crypt_ctx = *bc; + memcpy(f_ctx->fallback_dun, bc->bc_dun, sizeof(f_ctx->fallback_dun)); + f_ctx->crypt_iter = bio->bi_iter; + + bio_crypt_free_ctx(bio); + bio->bi_crypt_context = &f_ctx->crypt_ctx; + + return 0; +} + +int __init blk_crypto_fallback_init(void) +{ + int i; + unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX]; + + prandom_bytes(blank_key, BLK_CRYPTO_MAX_KEY_SIZE); + + /* All blk-crypto modes have a crypto API fallback. */ + for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++) + crypto_mode_supported[i] = 0xFFFFFFFF; + crypto_mode_supported[BLK_ENCRYPTION_MODE_INVALID] = 0; + + blk_crypto_ksm = keyslot_manager_create(NULL, blk_crypto_num_keyslots, + &blk_crypto_ksm_ll_ops, + crypto_mode_supported, NULL); + if (!blk_crypto_ksm) + return -ENOMEM; + + blk_crypto_wq = alloc_workqueue("blk_crypto_wq", + WQ_UNBOUND | WQ_HIGHPRI | + WQ_MEM_RECLAIM, num_online_cpus()); + if (!blk_crypto_wq) + return -ENOMEM; + + blk_crypto_keyslots = kcalloc(blk_crypto_num_keyslots, + sizeof(blk_crypto_keyslots[0]), + GFP_KERNEL); + if (!blk_crypto_keyslots) + return -ENOMEM; + + blk_crypto_bounce_page_pool = + mempool_create_page_pool(num_prealloc_bounce_pg, 0); + if (!blk_crypto_bounce_page_pool) + return -ENOMEM; + + blk_crypto_decrypt_work_cache = KMEM_CACHE(blk_crypto_decrypt_work, + SLAB_RECLAIM_ACCOUNT); + if (!blk_crypto_decrypt_work_cache) + return -ENOMEM; + + bio_fallback_crypt_ctx_cache = KMEM_CACHE(bio_fallback_crypt_ctx, 0); + if (!bio_fallback_crypt_ctx_cache) + return -ENOMEM; + + bio_fallback_crypt_ctx_pool = + mempool_create_slab_pool(num_prealloc_fallback_crypt_ctxs, + bio_fallback_crypt_ctx_cache); + if (!bio_fallback_crypt_ctx_pool) + return -ENOMEM; + + return 0; +} diff --git a/block/blk-crypto-internal.h b/block/blk-crypto-internal.h new file mode 100644 index 000000000000..40d826b743da --- /dev/null +++ b/block/blk-crypto-internal.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ + +#ifndef __LINUX_BLK_CRYPTO_INTERNAL_H +#define __LINUX_BLK_CRYPTO_INTERNAL_H + +#include + +/* Represents a crypto mode supported by blk-crypto */ +struct blk_crypto_mode { + const char *cipher_str; /* crypto API name (for fallback case) */ + unsigned int keysize; /* key size in bytes */ + unsigned int ivsize; /* iv size in bytes */ +}; + +extern const struct blk_crypto_mode blk_crypto_modes[]; + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK + +int blk_crypto_fallback_submit_bio(struct bio **bio_ptr); + +bool blk_crypto_queue_decrypt_bio(struct bio *bio); + +int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key); + +bool bio_crypt_fallback_crypted(const struct bio_crypt_ctx *bc); + +#else /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */ + +static inline bool bio_crypt_fallback_crypted(const struct bio_crypt_ctx *bc) +{ + return false; +} + +static inline int blk_crypto_fallback_submit_bio(struct bio **bio_ptr) +{ + pr_warn_once("crypto API fallback disabled; failing request\n"); + (*bio_ptr)->bi_status = BLK_STS_NOTSUPP; + return -EIO; +} + +static inline bool blk_crypto_queue_decrypt_bio(struct bio *bio) +{ + WARN_ON(1); + return false; +} + +static inline int +blk_crypto_fallback_evict_key(const struct blk_crypto_key *key) +{ + return 0; +} + +#endif /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */ + +#endif /* __LINUX_BLK_CRYPTO_INTERNAL_H */ diff --git a/block/blk-crypto.c b/block/blk-crypto.c new file mode 100644 index 000000000000..88df1c0e7e5f --- /dev/null +++ b/block/blk-crypto.c @@ -0,0 +1,260 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ + +/* + * Refer to Documentation/block/inline-encryption.rst for detailed explanation. + */ + +#define pr_fmt(fmt) "blk-crypto: " fmt + +#include +#include +#include +#include +#include + +#include "blk-crypto-internal.h" + +const struct blk_crypto_mode blk_crypto_modes[] = { + [BLK_ENCRYPTION_MODE_AES_256_XTS] = { + .cipher_str = "xts(aes)", + .keysize = 64, + .ivsize = 16, + }, + [BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV] = { + .cipher_str = "essiv(cbc(aes),sha256)", + .keysize = 16, + .ivsize = 16, + }, + [BLK_ENCRYPTION_MODE_ADIANTUM] = { + .cipher_str = "adiantum(xchacha12,aes)", + .keysize = 32, + .ivsize = 32, + }, +}; + +/* Check that all I/O segments are data unit aligned */ +static int bio_crypt_check_alignment(struct bio *bio) +{ + const unsigned int data_unit_size = + bio->bi_crypt_context->bc_key->data_unit_size; + struct bvec_iter iter; + struct bio_vec bv; + + bio_for_each_segment(bv, bio, iter) { + if (!IS_ALIGNED(bv.bv_len | bv.bv_offset, data_unit_size)) + return -EIO; + } + return 0; +} + +/** + * blk_crypto_submit_bio - handle submitting bio for inline encryption + * + * @bio_ptr: pointer to original bio pointer + * + * If the bio doesn't have inline encryption enabled or the submitter already + * specified a keyslot for the target device, do nothing. Else, a raw key must + * have been provided, so acquire a device keyslot for it if supported. Else, + * use the crypto API fallback. + * + * When the crypto API fallback is used for encryption, blk-crypto may choose to + * split the bio into 2 - the first one that will continue to be processed and + * the second one that will be resubmitted via generic_make_request. + * A bounce bio will be allocated to encrypt the contents of the aforementioned + * "first one", and *bio_ptr will be updated to this bounce bio. + * + * Return: 0 if bio submission should continue; nonzero if bio_endio() was + * already called so bio submission should abort. + */ +int blk_crypto_submit_bio(struct bio **bio_ptr) +{ + struct bio *bio = *bio_ptr; + struct request_queue *q; + struct bio_crypt_ctx *bc = bio->bi_crypt_context; + int err; + + if (!bc || !bio_has_data(bio)) + return 0; + + /* + * When a read bio is marked for fallback decryption, its bi_iter is + * saved so that when we decrypt the bio later, we know what part of it + * was marked for fallback decryption (when the bio is passed down after + * blk_crypto_submit bio, it may be split or advanced so we cannot rely + * on the bi_iter while decrypting in blk_crypto_endio) + */ + if (bio_crypt_fallback_crypted(bc)) + return 0; + + err = bio_crypt_check_alignment(bio); + if (err) { + bio->bi_status = BLK_STS_IOERR; + goto out; + } + + q = bio->bi_disk->queue; + + if (bc->bc_ksm) { + /* Key already programmed into device? */ + if (q->ksm == bc->bc_ksm) + return 0; + + /* Nope, release the existing keyslot. */ + bio_crypt_ctx_release_keyslot(bc); + } + + /* Get device keyslot if supported */ + if (keyslot_manager_crypto_mode_supported(q->ksm, + bc->bc_key->crypto_mode, + bc->bc_key->data_unit_size)) { + err = bio_crypt_ctx_acquire_keyslot(bc, q->ksm); + if (!err) + return 0; + + pr_warn_once("Failed to acquire keyslot for %s (err=%d). Falling back to crypto API.\n", + bio->bi_disk->disk_name, err); + } + + /* Fallback to crypto API */ + err = blk_crypto_fallback_submit_bio(bio_ptr); + if (err) + goto out; + + return 0; +out: + bio_endio(*bio_ptr); + return err; +} + +/** + * blk_crypto_endio - clean up bio w.r.t inline encryption during bio_endio + * + * @bio: the bio to clean up + * + * If blk_crypto_submit_bio decided to fallback to crypto API for this bio, + * we queue the bio for decryption into a workqueue and return false, + * and call bio_endio(bio) at a later time (after the bio has been decrypted). + * + * If the bio is not to be decrypted by the crypto API, this function releases + * the reference to the keyslot that blk_crypto_submit_bio got. + * + * Return: true if bio_endio should continue; false otherwise (bio_endio will + * be called again when bio has been decrypted). + */ +bool blk_crypto_endio(struct bio *bio) +{ + struct bio_crypt_ctx *bc = bio->bi_crypt_context; + + if (!bc) + return true; + + if (bio_crypt_fallback_crypted(bc)) { + /* + * The only bios who's crypto is handled by the blk-crypto + * fallback when they reach here are those with + * bio_data_dir(bio) == READ, since WRITE bios that are + * encrypted by the crypto API fallback are handled by + * blk_crypto_encrypt_endio(). + */ + return !blk_crypto_queue_decrypt_bio(bio); + } + + if (bc->bc_keyslot >= 0) + bio_crypt_ctx_release_keyslot(bc); + + return true; +} + +/** + * blk_crypto_init_key() - Prepare a key for use with blk-crypto + * @blk_key: Pointer to the blk_crypto_key to initialize. + * @raw_key: Pointer to the raw key. + * @raw_key_size: Size of raw key. Must be at least the required size for the + * chosen @crypto_mode; see blk_crypto_modes[]. (It's allowed + * to be longer than the mode's actual key size, in order to + * support inline encryption hardware that accepts wrapped keys. + * @is_hw_wrapped has to be set for such keys) + * @is_hw_wrapped: Denotes @raw_key is wrapped. + * @crypto_mode: identifier for the encryption algorithm to use + * @data_unit_size: the data unit size to use for en/decryption + * + * Return: The blk_crypto_key that was prepared, or an ERR_PTR() on error. When + * done using the key, it must be freed with blk_crypto_free_key(). + */ +int blk_crypto_init_key(struct blk_crypto_key *blk_key, + const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) +{ + const struct blk_crypto_mode *mode; + static siphash_key_t hash_key; + + memset(blk_key, 0, sizeof(*blk_key)); + + if (crypto_mode >= ARRAY_SIZE(blk_crypto_modes)) + return -EINVAL; + + BUILD_BUG_ON(BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE < BLK_CRYPTO_MAX_KEY_SIZE); + + mode = &blk_crypto_modes[crypto_mode]; + if (is_hw_wrapped) { + if (raw_key_size < mode->keysize || + raw_key_size > BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE) + return -EINVAL; + } else { + if (raw_key_size != mode->keysize) + return -EINVAL; + } + + if (!is_power_of_2(data_unit_size)) + return -EINVAL; + + blk_key->crypto_mode = crypto_mode; + blk_key->data_unit_size = data_unit_size; + blk_key->data_unit_size_bits = ilog2(data_unit_size); + blk_key->size = raw_key_size; + blk_key->is_hw_wrapped = is_hw_wrapped; + memcpy(blk_key->raw, raw_key, raw_key_size); + + /* + * The keyslot manager uses the SipHash of the key to implement O(1) key + * lookups while avoiding leaking information about the keys. It's + * precomputed here so that it only needs to be computed once per key. + */ + get_random_once(&hash_key, sizeof(hash_key)); + blk_key->hash = siphash(raw_key, raw_key_size, &hash_key); + + return 0; +} +EXPORT_SYMBOL_GPL(blk_crypto_init_key); + +/** + * blk_crypto_evict_key() - Evict a key from any inline encryption hardware + * it may have been programmed into + * @q: The request queue who's keyslot manager this key might have been + * programmed into + * @key: The key to evict + * + * Upper layers (filesystems) should call this function to ensure that a key + * is evicted from hardware that it might have been programmed into. This + * will call keyslot_manager_evict_key on the queue's keyslot manager, if one + * exists, and supports the crypto algorithm with the specified data unit size. + * Otherwise, it will evict the key from the blk-crypto-fallback's ksm. + * + * Return: 0 on success, -err on error. + */ +int blk_crypto_evict_key(struct request_queue *q, + const struct blk_crypto_key *key) +{ + if (q->ksm && + keyslot_manager_crypto_mode_supported(q->ksm, key->crypto_mode, + key->data_unit_size)) + return keyslot_manager_evict_key(q->ksm, key); + + return blk_crypto_fallback_evict_key(key); +} +EXPORT_SYMBOL_GPL(blk_crypto_evict_key); diff --git a/block/blk-map.c b/block/blk-map.c index e31be14da8ea..f72a3af689b6 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -152,7 +152,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, return 0; unmap_rq: - __blk_rq_unmap_user(bio); + blk_rq_unmap_user(bio); fail: rq->bio = NULL; return ret; diff --git a/block/blk-merge.c b/block/blk-merge.c index 878fc52f1d78..9f3123181116 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -300,13 +300,7 @@ void blk_recalc_rq_segments(struct request *rq) void blk_recount_segments(struct request_queue *q, struct bio *bio) { - unsigned short seg_cnt; - - /* estimate segment number by bi_vcnt for non-cloned bio */ - if (bio_flagged(bio, BIO_CLONED)) - seg_cnt = bio_segments(bio); - else - seg_cnt = bio->bi_vcnt; + unsigned short seg_cnt = bio_segments(bio); if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) && (seg_cnt < queue_max_segments(q))) @@ -515,6 +509,8 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, req_set_nomerge(q, req); return 0; } + if (!bio_crypt_ctx_mergeable(req->bio, blk_rq_bytes(req), bio)) + return 0; if (!bio_flagged(req->biotail, BIO_SEG_VALID)) blk_recount_segments(q, req->biotail); if (!bio_flagged(bio, BIO_SEG_VALID)) @@ -537,6 +533,8 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, req_set_nomerge(q, req); return 0; } + if (!bio_crypt_ctx_mergeable(bio, bio->bi_iter.bi_size, req->bio)) + return 0; if (!bio_flagged(bio, BIO_SEG_VALID)) blk_recount_segments(q, bio); if (!bio_flagged(req->bio, BIO_SEG_VALID)) @@ -613,6 +611,9 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (blk_integrity_merge_rq(q, req, next) == false) return 0; + if (!bio_crypt_ctx_mergeable(req->bio, blk_rq_bytes(req), next->bio)) + return 0; + /* Merge is OK... */ req->nr_phys_segments = total_phys_segments; return 1; @@ -664,6 +665,31 @@ static void blk_account_io_merge(struct request *req) part_stat_unlock(); } } +/* + * Two cases of handling DISCARD merge: + * If max_discard_segments > 1, the driver takes every bio + * as a range and send them to controller together. The ranges + * needn't to be contiguous. + * Otherwise, the bios/requests will be handled as same as + * others which should be contiguous. + */ +static inline bool blk_discard_mergable(struct request *req) +{ + if (req_op(req) == REQ_OP_DISCARD && + queue_max_discard_segments(req->q) > 1) + return true; + return false; +} + +enum elv_merge blk_try_req_merge(struct request *req, struct request *next) +{ + if (blk_discard_mergable(req)) + return ELEVATOR_DISCARD_MERGE; + else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next)) + return ELEVATOR_BACK_MERGE; + + return ELEVATOR_NO_MERGE; +} /* * For non-mq, this has to be called with the request spinlock acquired. @@ -681,12 +707,6 @@ static struct request *attempt_merge(struct request_queue *q, if (req_op(req) != req_op(next)) return NULL; - /* - * not contiguous - */ - if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) - return NULL; - if (rq_data_dir(req) != rq_data_dir(next) || req->rq_disk != next->rq_disk || req_no_special_merge(next)) @@ -710,11 +730,19 @@ static struct request *attempt_merge(struct request_queue *q, * counts here. Handle DISCARDs separately, as they * have separate settings. */ - if (req_op(req) == REQ_OP_DISCARD) { + + switch (blk_try_req_merge(req, next)) { + case ELEVATOR_DISCARD_MERGE: if (!req_attempt_discard_merge(q, req, next)) return NULL; - } else if (!ll_merge_requests_fn(q, req, next)) + break; + case ELEVATOR_BACK_MERGE: + if (!ll_merge_requests_fn(q, req, next)) + return NULL; + break; + default: return NULL; + } /* * If failfast settings disagree or any of the two is already @@ -743,7 +771,7 @@ static struct request *attempt_merge(struct request_queue *q, req->__data_len += blk_rq_bytes(next); - if (req_op(req) != REQ_OP_DISCARD) + if (!blk_discard_mergable(req)) elv_merge_requests(q, req, next); /* @@ -834,13 +862,16 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) if (rq->write_hint != bio->bi_write_hint) return false; + /* Only merge if the crypt contexts are compatible */ + if (!bio_crypt_ctx_compatible(bio, rq->bio)) + return false; + return true; } enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) { - if (req_op(rq) == REQ_OP_DISCARD && - queue_max_discard_segments(rq->q) > 1) + if (blk_discard_mergable(rq)) return ELEVATOR_DISCARD_MERGE; else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) return ELEVATOR_BACK_MERGE; diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 79969c3c234f..c97fafa1b206 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -145,20 +145,25 @@ static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx, static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) { + const size_t size = PAGE_SIZE - 1; unsigned int i, first = 1; - ssize_t ret = 0; + int ret = 0, pos = 0; for_each_cpu(i, hctx->cpumask) { if (first) - ret += sprintf(ret + page, "%u", i); + ret = snprintf(pos + page, size - pos, "%u", i); else - ret += sprintf(ret + page, ", %u", i); + ret = snprintf(pos + page, size - pos, ", %u", i); + + if (ret >= size - pos) + break; first = 0; + pos += ret; } - ret += sprintf(ret + page, "\n"); - return ret; + ret = snprintf(pos + page, size + 1 - pos, "\n"); + return pos + ret; } static struct attribute *default_ctx_attrs[] = { diff --git a/block/blk-settings.c b/block/blk-settings.c index 474b0b95fcd1..6c2faaa38cc1 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -379,7 +379,7 @@ EXPORT_SYMBOL(blk_queue_max_segment_size); * storage device can address. The default of 512 covers most * hardware. **/ -void blk_queue_logical_block_size(struct request_queue *q, unsigned short size) +void blk_queue_logical_block_size(struct request_queue *q, unsigned int size) { q->limits.logical_block_size = size; diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index 6ca015f92766..6490b2759bcb 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -354,6 +355,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) * but we call blkdev_ioctl, which gets the lock for us */ case BLKRRPART: + case BLKREPORTZONE: + case BLKRESETZONE: return blkdev_ioctl(bdev, mode, cmd, (unsigned long)compat_ptr(arg)); case BLKBSZSET_32: @@ -401,6 +404,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKTRACETEARDOWN: /* compatible */ ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg)); return ret; + case IOC_PR_REGISTER: + case IOC_PR_RESERVE: + case IOC_PR_RELEASE: + case IOC_PR_PREEMPT: + case IOC_PR_PREEMPT_ABORT: + case IOC_PR_CLEAR: + return blkdev_ioctl(bdev, mode, cmd, + (unsigned long)compat_ptr(arg)); default: if (disk->fops->compat_ioctl) ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg); diff --git a/block/keyslot-manager.c b/block/keyslot-manager.c new file mode 100644 index 000000000000..0b6dd460645e --- /dev/null +++ b/block/keyslot-manager.c @@ -0,0 +1,626 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ + +/** + * DOC: The Keyslot Manager + * + * Many devices with inline encryption support have a limited number of "slots" + * into which encryption contexts may be programmed, and requests can be tagged + * with a slot number to specify the key to use for en/decryption. + * + * As the number of slots are limited, and programming keys is expensive on + * many inline encryption hardware, we don't want to program the same key into + * multiple slots - if multiple requests are using the same key, we want to + * program just one slot with that key and use that slot for all requests. + * + * The keyslot manager manages these keyslots appropriately, and also acts as + * an abstraction between the inline encryption hardware and the upper layers. + * + * Lower layer devices will set up a keyslot manager in their request queue + * and tell it how to perform device specific operations like programming/ + * evicting keys from keyslots. + * + * Upper layers will call keyslot_manager_get_slot_for_key() to program a + * key into some slot in the inline encryption hardware. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +struct keyslot { + atomic_t slot_refs; + struct list_head idle_slot_node; + struct hlist_node hash_node; + struct blk_crypto_key key; +}; + +struct keyslot_manager { + unsigned int num_slots; + struct keyslot_mgmt_ll_ops ksm_ll_ops; + unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX]; + void *ll_priv_data; + +#ifdef CONFIG_PM + /* Device for runtime power management (NULL if none) */ + struct device *dev; +#endif + + /* Protects programming and evicting keys from the device */ + struct rw_semaphore lock; + + /* List of idle slots, with least recently used slot at front */ + wait_queue_head_t idle_slots_wait_queue; + struct list_head idle_slots; + spinlock_t idle_slots_lock; + + /* + * Hash table which maps key hashes to keyslots, so that we can find a + * key's keyslot in O(1) time rather than O(num_slots). Protected by + * 'lock'. A cryptographic hash function is used so that timing attacks + * can't leak information about the raw keys. + */ + struct hlist_head *slot_hashtable; + unsigned int slot_hashtable_size; + + /* Per-keyslot data */ + struct keyslot slots[]; +}; + +static inline bool keyslot_manager_is_passthrough(struct keyslot_manager *ksm) +{ + return ksm->num_slots == 0; +} + +#ifdef CONFIG_PM +static inline void keyslot_manager_set_dev(struct keyslot_manager *ksm, + struct device *dev) +{ + ksm->dev = dev; +} + +/* If there's an underlying device and it's suspended, resume it. */ +static inline void keyslot_manager_pm_get(struct keyslot_manager *ksm) +{ + if (ksm->dev) + pm_runtime_get_sync(ksm->dev); +} + +static inline void keyslot_manager_pm_put(struct keyslot_manager *ksm) +{ + if (ksm->dev) + pm_runtime_put_sync(ksm->dev); +} +#else /* CONFIG_PM */ +static inline void keyslot_manager_set_dev(struct keyslot_manager *ksm, + struct device *dev) +{ +} + +static inline void keyslot_manager_pm_get(struct keyslot_manager *ksm) +{ +} + +static inline void keyslot_manager_pm_put(struct keyslot_manager *ksm) +{ +} +#endif /* !CONFIG_PM */ + +static inline void keyslot_manager_hw_enter(struct keyslot_manager *ksm) +{ + /* + * Calling into the driver requires ksm->lock held and the device + * resumed. But we must resume the device first, since that can acquire + * and release ksm->lock via keyslot_manager_reprogram_all_keys(). + */ + keyslot_manager_pm_get(ksm); + down_write(&ksm->lock); +} + +static inline void keyslot_manager_hw_exit(struct keyslot_manager *ksm) +{ + up_write(&ksm->lock); + keyslot_manager_pm_put(ksm); +} + +/** + * keyslot_manager_create() - Create a keyslot manager + * @dev: Device for runtime power management (NULL if none) + * @num_slots: The number of key slots to manage. + * @ksm_ll_ops: The struct keyslot_mgmt_ll_ops for the device that this keyslot + * manager will use to perform operations like programming and + * evicting keys. + * @crypto_mode_supported: Array of size BLK_ENCRYPTION_MODE_MAX of + * bitmasks that represents whether a crypto mode + * and data unit size are supported. The i'th bit + * of crypto_mode_supported[crypto_mode] is set iff + * a data unit size of (1 << i) is supported. We + * only support data unit sizes that are powers of + * 2. + * @ll_priv_data: Private data passed as is to the functions in ksm_ll_ops. + * + * Allocate memory for and initialize a keyslot manager. Called by e.g. + * storage drivers to set up a keyslot manager in their request_queue. + * + * Context: May sleep + * Return: Pointer to constructed keyslot manager or NULL on error. + */ +struct keyslot_manager *keyslot_manager_create( + struct device *dev, + unsigned int num_slots, + const struct keyslot_mgmt_ll_ops *ksm_ll_ops, + const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], + void *ll_priv_data) +{ + struct keyslot_manager *ksm; + unsigned int slot; + unsigned int i; + + if (num_slots == 0) + return NULL; + + /* Check that all ops are specified */ + if (ksm_ll_ops->keyslot_program == NULL || + ksm_ll_ops->keyslot_evict == NULL) + return NULL; + + ksm = kvzalloc(struct_size(ksm, slots, num_slots), GFP_KERNEL); + if (!ksm) + return NULL; + + ksm->num_slots = num_slots; + ksm->ksm_ll_ops = *ksm_ll_ops; + memcpy(ksm->crypto_mode_supported, crypto_mode_supported, + sizeof(ksm->crypto_mode_supported)); + ksm->ll_priv_data = ll_priv_data; + keyslot_manager_set_dev(ksm, dev); + + init_rwsem(&ksm->lock); + + init_waitqueue_head(&ksm->idle_slots_wait_queue); + INIT_LIST_HEAD(&ksm->idle_slots); + + for (slot = 0; slot < num_slots; slot++) { + list_add_tail(&ksm->slots[slot].idle_slot_node, + &ksm->idle_slots); + } + + spin_lock_init(&ksm->idle_slots_lock); + + ksm->slot_hashtable_size = roundup_pow_of_two(num_slots); + ksm->slot_hashtable = kvmalloc_array(ksm->slot_hashtable_size, + sizeof(ksm->slot_hashtable[0]), + GFP_KERNEL); + if (!ksm->slot_hashtable) + goto err_free_ksm; + for (i = 0; i < ksm->slot_hashtable_size; i++) + INIT_HLIST_HEAD(&ksm->slot_hashtable[i]); + + return ksm; + +err_free_ksm: + keyslot_manager_destroy(ksm); + return NULL; +} +EXPORT_SYMBOL_GPL(keyslot_manager_create); + +static inline struct hlist_head * +hash_bucket_for_key(struct keyslot_manager *ksm, + const struct blk_crypto_key *key) +{ + return &ksm->slot_hashtable[key->hash & (ksm->slot_hashtable_size - 1)]; +} + +static void remove_slot_from_lru_list(struct keyslot_manager *ksm, int slot) +{ + unsigned long flags; + + spin_lock_irqsave(&ksm->idle_slots_lock, flags); + list_del(&ksm->slots[slot].idle_slot_node); + spin_unlock_irqrestore(&ksm->idle_slots_lock, flags); +} + +static int find_keyslot(struct keyslot_manager *ksm, + const struct blk_crypto_key *key) +{ + const struct hlist_head *head = hash_bucket_for_key(ksm, key); + const struct keyslot *slotp; + + hlist_for_each_entry(slotp, head, hash_node) { + if (slotp->key.hash == key->hash && + slotp->key.crypto_mode == key->crypto_mode && + slotp->key.size == key->size && + slotp->key.data_unit_size == key->data_unit_size && + !crypto_memneq(slotp->key.raw, key->raw, key->size)) + return slotp - ksm->slots; + } + return -ENOKEY; +} + +static int find_and_grab_keyslot(struct keyslot_manager *ksm, + const struct blk_crypto_key *key) +{ + int slot; + + slot = find_keyslot(ksm, key); + if (slot < 0) + return slot; + if (atomic_inc_return(&ksm->slots[slot].slot_refs) == 1) { + /* Took first reference to this slot; remove it from LRU list */ + remove_slot_from_lru_list(ksm, slot); + } + return slot; +} + +/** + * keyslot_manager_get_slot_for_key() - Program a key into a keyslot. + * @ksm: The keyslot manager to program the key into. + * @key: Pointer to the key object to program, including the raw key, crypto + * mode, and data unit size. + * + * Get a keyslot that's been programmed with the specified key. If one already + * exists, return it with incremented refcount. Otherwise, wait for a keyslot + * to become idle and program it. + * + * Context: Process context. Takes and releases ksm->lock. + * Return: The keyslot on success, else a -errno value. + */ +int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, + const struct blk_crypto_key *key) +{ + int slot; + int err; + struct keyslot *idle_slot; + + if (keyslot_manager_is_passthrough(ksm)) + return 0; + + down_read(&ksm->lock); + slot = find_and_grab_keyslot(ksm, key); + up_read(&ksm->lock); + if (slot != -ENOKEY) + return slot; + + for (;;) { + keyslot_manager_hw_enter(ksm); + slot = find_and_grab_keyslot(ksm, key); + if (slot != -ENOKEY) { + keyslot_manager_hw_exit(ksm); + return slot; + } + + /* + * If we're here, that means there wasn't a slot that was + * already programmed with the key. So try to program it. + */ + if (!list_empty(&ksm->idle_slots)) + break; + + keyslot_manager_hw_exit(ksm); + wait_event(ksm->idle_slots_wait_queue, + !list_empty(&ksm->idle_slots)); + } + + idle_slot = list_first_entry(&ksm->idle_slots, struct keyslot, + idle_slot_node); + slot = idle_slot - ksm->slots; + + err = ksm->ksm_ll_ops.keyslot_program(ksm, key, slot); + if (err) { + wake_up(&ksm->idle_slots_wait_queue); + keyslot_manager_hw_exit(ksm); + return err; + } + + /* Move this slot to the hash list for the new key. */ + if (idle_slot->key.crypto_mode != BLK_ENCRYPTION_MODE_INVALID) + hlist_del(&idle_slot->hash_node); + hlist_add_head(&idle_slot->hash_node, hash_bucket_for_key(ksm, key)); + + atomic_set(&idle_slot->slot_refs, 1); + idle_slot->key = *key; + + remove_slot_from_lru_list(ksm, slot); + + keyslot_manager_hw_exit(ksm); + return slot; +} + +/** + * keyslot_manager_get_slot() - Increment the refcount on the specified slot. + * @ksm: The keyslot manager that we want to modify. + * @slot: The slot to increment the refcount of. + * + * This function assumes that there is already an active reference to that slot + * and simply increments the refcount. This is useful when cloning a bio that + * already has a reference to a keyslot, and we want the cloned bio to also have + * its own reference. + * + * Context: Any context. + */ +void keyslot_manager_get_slot(struct keyslot_manager *ksm, unsigned int slot) +{ + if (keyslot_manager_is_passthrough(ksm)) + return; + + if (WARN_ON(slot >= ksm->num_slots)) + return; + + WARN_ON(atomic_inc_return(&ksm->slots[slot].slot_refs) < 2); +} + +/** + * keyslot_manager_put_slot() - Release a reference to a slot + * @ksm: The keyslot manager to release the reference from. + * @slot: The slot to release the reference from. + * + * Context: Any context. + */ +void keyslot_manager_put_slot(struct keyslot_manager *ksm, unsigned int slot) +{ + unsigned long flags; + + if (keyslot_manager_is_passthrough(ksm)) + return; + + if (WARN_ON(slot >= ksm->num_slots)) + return; + + if (atomic_dec_and_lock_irqsave(&ksm->slots[slot].slot_refs, + &ksm->idle_slots_lock, flags)) { + list_add_tail(&ksm->slots[slot].idle_slot_node, + &ksm->idle_slots); + spin_unlock_irqrestore(&ksm->idle_slots_lock, flags); + wake_up(&ksm->idle_slots_wait_queue); + } +} + +/** + * keyslot_manager_crypto_mode_supported() - Find out if a crypto_mode/data + * unit size combination is supported + * by a ksm. + * @ksm: The keyslot manager to check + * @crypto_mode: The crypto mode to check for. + * @data_unit_size: The data_unit_size for the mode. + * + * Calls and returns the result of the crypto_mode_supported function specified + * by the ksm. + * + * Context: Process context. + * Return: Whether or not this ksm supports the specified crypto_mode/ + * data_unit_size combo. + */ +bool keyslot_manager_crypto_mode_supported(struct keyslot_manager *ksm, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) +{ + if (!ksm) + return false; + if (WARN_ON(crypto_mode >= BLK_ENCRYPTION_MODE_MAX)) + return false; + if (WARN_ON(!is_power_of_2(data_unit_size))) + return false; + return ksm->crypto_mode_supported[crypto_mode] & data_unit_size; +} + +/** + * keyslot_manager_evict_key() - Evict a key from the lower layer device. + * @ksm: The keyslot manager to evict from + * @key: The key to evict + * + * Find the keyslot that the specified key was programmed into, and evict that + * slot from the lower layer device if that slot is not currently in use. + * + * Context: Process context. Takes and releases ksm->lock. + * Return: 0 on success, -EBUSY if the key is still in use, or another + * -errno value on other error. + */ +int keyslot_manager_evict_key(struct keyslot_manager *ksm, + const struct blk_crypto_key *key) +{ + int slot; + int err; + struct keyslot *slotp; + + if (keyslot_manager_is_passthrough(ksm)) { + if (ksm->ksm_ll_ops.keyslot_evict) { + keyslot_manager_hw_enter(ksm); + err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, -1); + keyslot_manager_hw_exit(ksm); + return err; + } + return 0; + } + + keyslot_manager_hw_enter(ksm); + + slot = find_keyslot(ksm, key); + if (slot < 0) { + err = slot; + goto out_unlock; + } + slotp = &ksm->slots[slot]; + + if (atomic_read(&slotp->slot_refs) != 0) { + err = -EBUSY; + goto out_unlock; + } + err = ksm->ksm_ll_ops.keyslot_evict(ksm, key, slot); + if (err) + goto out_unlock; + + hlist_del(&slotp->hash_node); + memzero_explicit(&slotp->key, sizeof(slotp->key)); + err = 0; +out_unlock: + keyslot_manager_hw_exit(ksm); + return err; +} + +/** + * keyslot_manager_reprogram_all_keys() - Re-program all keyslots. + * @ksm: The keyslot manager + * + * Re-program all keyslots that are supposed to have a key programmed. This is + * intended only for use by drivers for hardware that loses its keys on reset. + * + * Context: Process context. Takes and releases ksm->lock. + */ +void keyslot_manager_reprogram_all_keys(struct keyslot_manager *ksm) +{ + unsigned int slot; + + if (WARN_ON(keyslot_manager_is_passthrough(ksm))) + return; + + /* This is for device initialization, so don't resume the device */ + down_write(&ksm->lock); + for (slot = 0; slot < ksm->num_slots; slot++) { + const struct keyslot *slotp = &ksm->slots[slot]; + int err; + + if (slotp->key.crypto_mode == BLK_ENCRYPTION_MODE_INVALID) + continue; + + err = ksm->ksm_ll_ops.keyslot_program(ksm, &slotp->key, slot); + WARN_ON(err); + } + up_write(&ksm->lock); +} +EXPORT_SYMBOL_GPL(keyslot_manager_reprogram_all_keys); + +/** + * keyslot_manager_private() - return the private data stored with ksm + * @ksm: The keyslot manager + * + * Returns the private data passed to the ksm when it was created. + */ +void *keyslot_manager_private(struct keyslot_manager *ksm) +{ + return ksm->ll_priv_data; +} +EXPORT_SYMBOL_GPL(keyslot_manager_private); + +void keyslot_manager_destroy(struct keyslot_manager *ksm) +{ + if (ksm) { + kvfree(ksm->slot_hashtable); + memzero_explicit(ksm, struct_size(ksm, slots, ksm->num_slots)); + kvfree(ksm); + } +} +EXPORT_SYMBOL_GPL(keyslot_manager_destroy); + +/** + * keyslot_manager_create_passthrough() - Create a passthrough keyslot manager + * @dev: Device for runtime power management (NULL if none) + * @ksm_ll_ops: The struct keyslot_mgmt_ll_ops + * @crypto_mode_supported: Bitmasks for supported encryption modes + * @ll_priv_data: Private data passed as is to the functions in ksm_ll_ops. + * + * Allocate memory for and initialize a passthrough keyslot manager. + * Called by e.g. storage drivers to set up a keyslot manager in their + * request_queue, when the storage driver wants to manage its keys by itself. + * This is useful for inline encryption hardware that don't have a small fixed + * number of keyslots, and for layered devices. + * + * See keyslot_manager_create() for more details about the parameters. + * + * Context: This function may sleep + * Return: Pointer to constructed keyslot manager or NULL on error. + */ +struct keyslot_manager *keyslot_manager_create_passthrough( + struct device *dev, + const struct keyslot_mgmt_ll_ops *ksm_ll_ops, + const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], + void *ll_priv_data) +{ + struct keyslot_manager *ksm; + + ksm = kzalloc(sizeof(*ksm), GFP_KERNEL); + if (!ksm) + return NULL; + + ksm->ksm_ll_ops = *ksm_ll_ops; + memcpy(ksm->crypto_mode_supported, crypto_mode_supported, + sizeof(ksm->crypto_mode_supported)); + ksm->ll_priv_data = ll_priv_data; + keyslot_manager_set_dev(ksm, dev); + + init_rwsem(&ksm->lock); + + return ksm; +} +EXPORT_SYMBOL_GPL(keyslot_manager_create_passthrough); + +/** + * keyslot_manager_intersect_modes() - restrict supported modes by child device + * @parent: The keyslot manager for parent device + * @child: The keyslot manager for child device, or NULL + * + * Clear any crypto mode support bits in @parent that aren't set in @child. + * If @child is NULL, then all parent bits are cleared. + * + * Only use this when setting up the keyslot manager for a layered device, + * before it's been exposed yet. + */ +void keyslot_manager_intersect_modes(struct keyslot_manager *parent, + const struct keyslot_manager *child) +{ + if (child) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(child->crypto_mode_supported); i++) { + parent->crypto_mode_supported[i] &= + child->crypto_mode_supported[i]; + } + } else { + memset(parent->crypto_mode_supported, 0, + sizeof(parent->crypto_mode_supported)); + } +} +EXPORT_SYMBOL_GPL(keyslot_manager_intersect_modes); + +/** + * keyslot_manager_derive_raw_secret() - Derive software secret from wrapped key + * @ksm: The keyslot manager + * @wrapped_key: The wrapped key + * @wrapped_key_size: Size of the wrapped key in bytes + * @secret: (output) the software secret + * @secret_size: (output) the number of secret bytes to derive + * + * Given a hardware-wrapped key, ask the hardware to derive a secret which + * software can use for cryptographic tasks other than inline encryption. The + * derived secret is guaranteed to be cryptographically isolated from the key + * with which any inline encryption with this wrapped key would actually be + * done. I.e., both will be derived from the unwrapped key. + * + * Return: 0 on success, -EOPNOTSUPP if hardware-wrapped keys are unsupported, + * or another -errno code. + */ +int keyslot_manager_derive_raw_secret(struct keyslot_manager *ksm, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *secret, unsigned int secret_size) +{ + int err; + + if (ksm->ksm_ll_ops.derive_raw_secret) { + keyslot_manager_hw_enter(ksm); + err = ksm->ksm_ll_ops.derive_raw_secret(ksm, wrapped_key, + wrapped_key_size, + secret, secret_size); + keyslot_manager_hw_exit(ksm); + } else { + err = -EOPNOTSUPP; + } + + return err; +} +EXPORT_SYMBOL_GPL(keyslot_manager_derive_raw_secret); diff --git a/build.config.aarch64 b/build.config.aarch64 new file mode 100644 index 000000000000..523bbc0449f7 --- /dev/null +++ b/build.config.aarch64 @@ -0,0 +1,11 @@ +ARCH=arm64 + +CLANG_TRIPLE=aarch64-linux-gnu- +CROSS_COMPILE=aarch64-linux-androidkernel- +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin + +FILES=" +arch/arm64/boot/Image.gz +vmlinux +System.map +" diff --git a/build.config.common b/build.config.common new file mode 100644 index 000000000000..be3afbc1c6b5 --- /dev/null +++ b/build.config.common @@ -0,0 +1,10 @@ +BRANCH=android-4.14 +KERNEL_DIR=common + +CC=clang +LD=ld.lld +CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r377782b/bin +BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 + +EXTRA_CMDS='' +STOP_SHIP_TRACEPRINTK=1 diff --git a/build.config.cuttlefish.aarch64 b/build.config.cuttlefish.aarch64 index 2cbcdf340485..0cb601958972 100644 --- a/build.config.cuttlefish.aarch64 +++ b/build.config.cuttlefish.aarch64 @@ -1,16 +1,5 @@ -ARCH=arm64 -BRANCH=android-4.14 -CLANG_TRIPLE=aarch64-linux-gnu- -CROSS_COMPILE=aarch64-linux-androidkernel- +. ${ROOT_DIR}/common/build.config.common +. ${ROOT_DIR}/common/build.config.aarch64 + DEFCONFIG=cuttlefish_defconfig -EXTRA_CMDS='' -KERNEL_DIR=common POST_DEFCONFIG_CMDS="check_defconfig" -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r353983c/bin -LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/aarch64/aarch64-linux-android-4.9/bin -FILES=" -arch/arm64/boot/Image.gz -vmlinux -System.map -" -STOP_SHIP_TRACEPRINTK=1 diff --git a/build.config.cuttlefish.x86_64 b/build.config.cuttlefish.x86_64 index 0b1a15b1fe3b..fed773ccc64a 100644 --- a/build.config.cuttlefish.x86_64 +++ b/build.config.cuttlefish.x86_64 @@ -1,16 +1,5 @@ -ARCH=x86_64 -BRANCH=android-4.14 -CLANG_TRIPLE=x86_64-linux-gnu- -CROSS_COMPILE=x86_64-linux-androidkernel- +. ${ROOT_DIR}/common/build.config.common +. ${ROOT_DIR}/common/build.config.x86_64 + DEFCONFIG=x86_64_cuttlefish_defconfig -EXTRA_CMDS='' -KERNEL_DIR=common POST_DEFCONFIG_CMDS="check_defconfig" -CLANG_PREBUILT_BIN=prebuilts-master/clang/host/linux-x86/clang-r353983c/bin -LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin -FILES=" -arch/x86/boot/bzImage -vmlinux -System.map -" -STOP_SHIP_TRACEPRINTK=1 diff --git a/build.config.x86_64 b/build.config.x86_64 new file mode 100644 index 000000000000..df73a47e7220 --- /dev/null +++ b/build.config.x86_64 @@ -0,0 +1,11 @@ +ARCH=x86_64 + +CLANG_TRIPLE=x86_64-linux-gnu- +CROSS_COMPILE=x86_64-linux-androidkernel- +LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gcc/linux-x86/x86/x86_64-linux-android-4.9/bin + +FILES=" +arch/x86/boot/bzImage +vmlinux +System.map +" diff --git a/crypto/af_alg.c b/crypto/af_alg.c index f816a7289104..0679c35adf55 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -139,11 +139,13 @@ void af_alg_release_parent(struct sock *sk) sk = ask->parent; ask = alg_sk(sk); - lock_sock(sk); + local_bh_disable(); + bh_lock_sock(sk); ask->nokey_refcnt -= nokey; if (!last) last = !--ask->refcnt; - release_sock(sk); + bh_unlock_sock(sk); + local_bh_enable(); if (last) sock_put(sk); @@ -1086,7 +1088,7 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err) af_alg_free_resources(areq); sock_put(sk); - iocb->ki_complete(iocb, err ? err : resultlen, 0); + iocb->ki_complete(iocb, err ? err : (int)resultlen, 0); } EXPORT_SYMBOL_GPL(af_alg_async_cb); diff --git a/crypto/algapi.c b/crypto/algapi.c index 50eb828db767..603d2d637209 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -652,11 +652,9 @@ EXPORT_SYMBOL_GPL(crypto_grab_spawn); void crypto_drop_spawn(struct crypto_spawn *spawn) { - if (!spawn->alg) - return; - down_write(&crypto_alg_sem); - list_del(&spawn->list); + if (spawn->alg) + list_del(&spawn->list); up_write(&crypto_alg_sem); } EXPORT_SYMBOL_GPL(crypto_drop_spawn); @@ -664,22 +662,16 @@ EXPORT_SYMBOL_GPL(crypto_drop_spawn); static struct crypto_alg *crypto_spawn_alg(struct crypto_spawn *spawn) { struct crypto_alg *alg; - struct crypto_alg *alg2; down_read(&crypto_alg_sem); alg = spawn->alg; - alg2 = alg; - if (alg2) - alg2 = crypto_mod_get(alg2); + if (alg && !crypto_mod_get(alg)) { + alg->cra_flags |= CRYPTO_ALG_DYING; + alg = NULL; + } up_read(&crypto_alg_sem); - if (!alg2) { - if (alg) - crypto_shoot_alg(alg); - return ERR_PTR(-EAGAIN); - } - - return alg; + return alg ?: ERR_PTR(-EAGAIN); } struct crypto_tfm *crypto_spawn_tfm(struct crypto_spawn *spawn, u32 type, diff --git a/crypto/api.c b/crypto/api.c index ff7a7852bb17..6832b599ec4c 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -340,13 +340,12 @@ static unsigned int crypto_ctxsize(struct crypto_alg *alg, u32 type, u32 mask) return len; } -void crypto_shoot_alg(struct crypto_alg *alg) +static void crypto_shoot_alg(struct crypto_alg *alg) { down_write(&crypto_alg_sem); alg->cra_flags |= CRYPTO_ALG_DYING; up_write(&crypto_alg_sem); } -EXPORT_SYMBOL_GPL(crypto_shoot_alg); struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, u32 mask) diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 2b8fb8f1391e..b6899be8065d 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -288,38 +288,43 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, drop_alg: crypto_mod_put(alg); - if (err) + if (err) { + kfree_skb(skb); return err; + } return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid); } static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb) { - struct crypto_alg *alg; + const size_t start_pos = cb->args[0]; + size_t pos = 0; struct crypto_dump_info info; - int err; - - if (cb->args[0]) - goto out; - - cb->args[0] = 1; + struct crypto_alg *alg; + int res; info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; + down_read(&crypto_alg_sem); list_for_each_entry(alg, &crypto_alg_list, cra_list) { - err = crypto_report_alg(alg, &info); - if (err) - goto out_err; + if (pos >= start_pos) { + res = crypto_report_alg(alg, &info); + if (res == -EMSGSIZE) + break; + if (res) + goto out; + } + pos++; } - + cb->args[0] = pos; + res = skb->len; out: - return skb->len; -out_err: - return err; + up_read(&crypto_alg_sem); + return res; } static int crypto_dump_report_done(struct netlink_callback *cb) @@ -503,7 +508,7 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if ((type == (CRYPTO_MSG_GETALG - CRYPTO_MSG_BASE) && (nlh->nlmsg_flags & NLM_F_DUMP))) { struct crypto_alg *alg; - u16 dump_alloc = 0; + unsigned long dump_alloc = 0; if (link->dump == NULL) return -EINVAL; @@ -511,16 +516,16 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, down_read(&crypto_alg_sem); list_for_each_entry(alg, &crypto_alg_list, cra_list) dump_alloc += CRYPTO_REPORT_MAXSIZE; + up_read(&crypto_alg_sem); { struct netlink_dump_control c = { .dump = link->dump, .done = link->done, - .min_dump_alloc = dump_alloc, + .min_dump_alloc = min(dump_alloc, 65535UL), }; err = netlink_dump_start(crypto_nlsk, skb, nlh, &c); } - up_read(&crypto_alg_sem); return err; } diff --git a/crypto/ecc.c b/crypto/ecc.c index 18f32f2a5e1c..65ee29dda063 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -898,10 +898,34 @@ static void ecc_point_mult(struct ecc_point *result, static inline void ecc_swap_digits(const u64 *in, u64 *out, unsigned int ndigits) { + const __be64 *src = (__force __be64 *)in; int i; for (i = 0; i < ndigits; i++) - out[i] = __swab64(in[ndigits - 1 - i]); + out[i] = be64_to_cpu(src[ndigits - 1 - i]); +} + +static int __ecc_is_key_valid(const struct ecc_curve *curve, + const u64 *private_key, unsigned int ndigits) +{ + u64 one[ECC_MAX_DIGITS] = { 1, }; + u64 res[ECC_MAX_DIGITS]; + + if (!private_key) + return -EINVAL; + + if (curve->g.ndigits != ndigits) + return -EINVAL; + + /* Make sure the private key is in the range [2, n-3]. */ + if (vli_cmp(one, private_key, ndigits) != -1) + return -EINVAL; + vli_sub(res, curve->n, one, ndigits); + vli_sub(res, res, one, ndigits); + if (vli_cmp(res, private_key, ndigits) != 1) + return -EINVAL; + + return 0; } int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits, @@ -910,22 +934,12 @@ int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits, int nbytes; const struct ecc_curve *curve = ecc_get_curve(curve_id); - if (!private_key) - return -EINVAL; - nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT; if (private_key_len != nbytes) return -EINVAL; - if (vli_is_zero(private_key, ndigits)) - return -EINVAL; - - /* Make sure the private key is in the range [1, n-1]. */ - if (vli_cmp(curve->n, private_key, ndigits) != 1) - return -EINVAL; - - return 0; + return __ecc_is_key_valid(curve, private_key, ndigits); } /* @@ -971,11 +985,8 @@ int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey) if (err) return err; - if (vli_is_zero(priv, ndigits)) - return -EINVAL; - - /* Make sure the private key is in the range [1, n-1]. */ - if (vli_cmp(curve->n, priv, ndigits) != 1) + /* Make sure the private key is in the valid range. */ + if (__ecc_is_key_valid(curve, priv, ndigits)) return -EINVAL; ecc_swap_digits(priv, privkey, ndigits); diff --git a/crypto/internal.h b/crypto/internal.h index f07320423191..6262ec0435b4 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -84,7 +84,6 @@ void crypto_alg_tested(const char *name, int err); void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, struct crypto_alg *nalg); void crypto_remove_final(struct list_head *list); -void crypto_shoot_alg(struct crypto_alg *alg); struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, u32 mask); void *crypto_create_tfm(struct crypto_alg *alg, diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index f8ec3d4ba4a8..85082574c515 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -130,7 +130,6 @@ static void pcrypt_aead_done(struct crypto_async_request *areq, int err) struct padata_priv *padata = pcrypt_request_padata(preq); padata->info = err; - req->base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; padata_do_serial(padata); } @@ -394,7 +393,7 @@ static int pcrypt_sysfs_add(struct padata_instance *pinst, const char *name) int ret; pinst->kobj.kset = pcrypt_kset; - ret = kobject_add(&pinst->kobj, NULL, name); + ret = kobject_add(&pinst->kobj, NULL, "%s", name); if (!ret) kobject_uevent(&pinst->kobj, KOBJ_ADD); @@ -505,11 +504,12 @@ err: static void __exit pcrypt_exit(void) { + crypto_unregister_template(&pcrypt_tmpl); + pcrypt_fini_padata(&pencrypt); pcrypt_fini_padata(&pdecrypt); kset_unregister(pcrypt_kset); - crypto_unregister_template(&pcrypt_tmpl); } module_init(pcrypt_init); diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index 407c64bdcdd9..3279b457c4ed 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -261,15 +261,6 @@ static int pkcs1pad_encrypt(struct akcipher_request *req) pkcs1pad_sg_set_buf(req_ctx->in_sg, req_ctx->in_buf, ctx->key_size - 1 - req->src_len, req->src); - req_ctx->out_buf = kmalloc(ctx->key_size, GFP_KERNEL); - if (!req_ctx->out_buf) { - kfree(req_ctx->in_buf); - return -ENOMEM; - } - - pkcs1pad_sg_set_buf(req_ctx->out_sg, req_ctx->out_buf, - ctx->key_size, NULL); - akcipher_request_set_tfm(&req_ctx->child_req, ctx->child); akcipher_request_set_callback(&req_ctx->child_req, req->base.flags, pkcs1pad_encrypt_sign_complete_cb, req); diff --git a/crypto/tgr192.c b/crypto/tgr192.c index 321bc6ff2a9d..904c8444aa0a 100644 --- a/crypto/tgr192.c +++ b/crypto/tgr192.c @@ -25,8 +25,9 @@ #include #include #include -#include #include +#include +#include #define TGR192_DIGEST_SIZE 24 #define TGR160_DIGEST_SIZE 20 @@ -468,10 +469,9 @@ static void tgr192_transform(struct tgr192_ctx *tctx, const u8 * data) u64 a, b, c, aa, bb, cc; u64 x[8]; int i; - const __le64 *ptr = (const __le64 *)data; for (i = 0; i < 8; i++) - x[i] = le64_to_cpu(ptr[i]); + x[i] = get_unaligned_le64(data + i * sizeof(__le64)); /* save */ a = aa = tctx->a; diff --git a/drivers/Kconfig b/drivers/Kconfig index 842a1aa167c8..0fa05ca41ae3 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -9,6 +9,8 @@ source "drivers/bus/Kconfig" source "drivers/connector/Kconfig" +source "drivers/gnss/Kconfig" + source "drivers/mtd/Kconfig" source "drivers/of/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index 31b9ed5c87f0..bf3c54bb2b40 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -184,3 +184,4 @@ obj-$(CONFIG_FSI) += fsi/ obj-$(CONFIG_TEE) += tee/ obj-$(CONFIG_MULTIPLEXER) += mux/ obj-$(CONFIG_TRUSTY) += trusty/ +obj-$(CONFIG_GNSS) += gnss/ diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 51592dd45b06..84b1d30f699c 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -98,6 +98,9 @@ struct lpss_private_data { u32 prv_reg_ctx[LPSS_PRV_REG_COUNT]; }; +/* Devices which need to be in D3 before lpss_iosf_enter_d3_state() proceeds */ +static u32 pmc_atom_d3_mask = 0xfe000ffe; + /* LPSS run time quirks */ static unsigned int lpss_quirks; @@ -174,6 +177,21 @@ static void byt_pwm_setup(struct lpss_private_data *pdata) static void byt_i2c_setup(struct lpss_private_data *pdata) { + const char *uid_str = acpi_device_uid(pdata->adev); + acpi_handle handle = pdata->adev->handle; + unsigned long long shared_host = 0; + acpi_status status; + long uid = 0; + + /* Expected to always be true, but better safe then sorry */ + if (uid_str) + uid = simple_strtol(uid_str, NULL, 10); + + /* Detect I2C bus shared with PUNIT and ignore its d3 status */ + status = acpi_evaluate_integer(handle, "_SEM", NULL, &shared_host); + if (ACPI_SUCCESS(status) && shared_host && uid) + pmc_atom_d3_mask &= ~(BIT_LPSS2_F1_I2C1 << (uid - 1)); + lpss_deassert_reset(pdata); if (readl(pdata->mmio_base + pdata->dev_desc->prv_offset)) @@ -500,12 +518,7 @@ static int acpi_lpss_create_device(struct acpi_device *adev, * have _PS0 and _PS3 without _PSC (and no power resources), so * acpi_bus_init_power() will assume that the BIOS has put them into D0. */ - ret = acpi_device_fix_up_power(adev); - if (ret) { - /* Skip the device, but continue the namespace scan. */ - ret = 0; - goto err_out; - } + acpi_device_fix_up_power(adev); adev->driver_data = pdata; pdev = acpi_create_platform_device(adev, dev_desc->properties); @@ -789,7 +802,7 @@ static void lpss_iosf_enter_d3_state(void) * Here we read the values related to LPSS power island, i.e. LPSS * devices, excluding both LPSS DMA controllers, along with SCC domain. */ - u32 func_dis, d3_sts_0, pmc_status, pmc_mask = 0xfe000ffe; + u32 func_dis, d3_sts_0, pmc_status; int ret; ret = pmc_atom_read(PMC_FUNC_DIS, &func_dis); @@ -807,7 +820,7 @@ static void lpss_iosf_enter_d3_state(void) * Shutdown both LPSS DMA controllers if and only if all other devices * are already in D3hot. */ - pmc_status = (~(d3_sts_0 | func_dis)) & pmc_mask; + pmc_status = (~(d3_sts_0 | func_dis)) & pmc_atom_d3_mask; if (pmc_status) goto exit; diff --git a/drivers/acpi/acpi_memhotplug.c b/drivers/acpi/acpi_memhotplug.c index 6b0d3ef7309c..2ccfbb61ca89 100644 --- a/drivers/acpi/acpi_memhotplug.c +++ b/drivers/acpi/acpi_memhotplug.c @@ -228,7 +228,7 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device) if (node < 0) node = memory_add_physaddr_to_nid(info->start_addr); - result = add_memory(node, info->start_addr, info->length); + result = __add_memory(node, info->start_addr, info->length); /* * If the memory block has been used by the kernel, add_memory() diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index 95600309ce42..0bd1899a287f 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -58,12 +58,14 @@ static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat) } #endif +static bool acpi_no_watchdog; + static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) { const struct acpi_table_wdat *wdat = NULL; acpi_status status; - if (acpi_disabled) + if (acpi_disabled || acpi_no_watchdog) return NULL; status = acpi_get_table(ACPI_SIG_WDAT, 0, @@ -91,6 +93,14 @@ bool acpi_has_watchdog(void) } EXPORT_SYMBOL_GPL(acpi_has_watchdog); +/* ACPI watchdog can be disabled on boot command line */ +static int __init disable_acpi_watchdog(char *str) +{ + acpi_no_watchdog = true; + return 1; +} +__setup("acpi_no_watchdog", disable_acpi_watchdog); + void __init acpi_watchdog_init(void) { const struct acpi_wdat_entry *entries; @@ -129,12 +139,11 @@ void __init acpi_watchdog_init(void) gas = &entries[i].register_region; res.start = gas->address; + res.end = res.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { res.flags = IORESOURCE_MEM; - res.end = res.start + ALIGN(gas->access_width, 4) - 1; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { res.flags = IORESOURCE_IO; - res.end = res.start + gas->access_width - 1; } else { pr_warn("Unsupported address space: %u\n", gas->space_id); diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index a2adfd42f85c..bfddcd989974 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -245,6 +245,8 @@ acpi_ev_default_region_setup(acpi_handle handle, acpi_status acpi_ev_initialize_region(union acpi_operand_object *region_obj); +u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node); + /* * evsci - SCI (System Control Interrupt) handling/dispatch */ diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index 0d45b8bb1678..b10e92de7dd8 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -429,9 +429,9 @@ struct acpi_simple_repair_info { /* Info for running the _REG methods */ struct acpi_reg_walk_info { - acpi_adr_space_type space_id; u32 function; u32 reg_run_count; + acpi_adr_space_type space_id; }; /***************************************************************************** diff --git a/drivers/acpi/acpica/dsfield.c b/drivers/acpi/acpica/dsfield.c index 7bcf5f5ea029..8df4a49a99a6 100644 --- a/drivers/acpi/acpica/dsfield.c +++ b/drivers/acpi/acpica/dsfield.c @@ -273,7 +273,7 @@ cleanup: * FUNCTION: acpi_ds_get_field_names * * PARAMETERS: info - create_field info structure - * ` walk_state - Current method state + * walk_state - Current method state * arg - First parser arg for the field name list * * RETURN: Status diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c index eaa859a89702..1d82e1419397 100644 --- a/drivers/acpi/acpica/dswload.c +++ b/drivers/acpi/acpica/dswload.c @@ -444,6 +444,27 @@ acpi_status acpi_ds_load1_end_op(struct acpi_walk_state *walk_state) ACPI_DEBUG_PRINT((ACPI_DB_DISPATCH, "Op=%p State=%p\n", op, walk_state)); + /* + * Disassembler: handle create field operators here. + * + * create_buffer_field is a deferred op that is typically processed in load + * pass 2. However, disassembly of control method contents walk the parse + * tree with ACPI_PARSE_LOAD_PASS1 and AML_CREATE operators are processed + * in a later walk. This is a problem when there is a control method that + * has the same name as the AML_CREATE object. In this case, any use of the + * name segment will be detected as a method call rather than a reference + * to a buffer field. + * + * This earlier creation during disassembly solves this issue by inserting + * the named object in the ACPI namespace so that references to this name + * would be a name string rather than a method call. + */ + if ((walk_state->parse_flags & ACPI_PARSE_DISASSEMBLE) && + (walk_state->op_info->flags & AML_CREATE)) { + status = acpi_ds_create_buffer_field(op, walk_state); + return_ACPI_STATUS(status); + } + /* We are only interested in opcodes that have an associated name */ if (!(walk_state->op_info->flags & (AML_NAMED | AML_FIELD))) { diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index 28b447ff92df..3a3277f98292 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -677,6 +677,19 @@ acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, ACPI_FUNCTION_TRACE(ev_execute_reg_methods); + /* + * These address spaces do not need a call to _REG, since the ACPI + * specification defines them as: "must always be accessible". Since + * they never change state (never become unavailable), no need to ever + * call _REG on them. Also, a data_table is not a "real" address space, + * so do not call _REG. September 2018. + */ + if ((space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) || + (space_id == ACPI_ADR_SPACE_SYSTEM_IO) || + (space_id == ACPI_ADR_SPACE_DATA_TABLE)) { + return_VOID; + } + info.space_id = space_id; info.function = function; info.reg_run_count = 0; @@ -738,8 +751,8 @@ acpi_ev_reg_run(acpi_handle obj_handle, } /* - * We only care about regions.and objects that are allowed to have address - * space handlers + * We only care about regions and objects that are allowed to have + * address space handlers */ if ((node->type != ACPI_TYPE_REGION) && (node != acpi_gbl_root_node)) { return (AE_OK); diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c index 93ec528bcd9a..3b48f1ecb55b 100644 --- a/drivers/acpi/acpica/evrgnini.c +++ b/drivers/acpi/acpica/evrgnini.c @@ -50,9 +50,6 @@ #define _COMPONENT ACPI_EVENTS ACPI_MODULE_NAME("evrgnini") -/* Local prototypes */ -static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node); - /******************************************************************************* * * FUNCTION: acpi_ev_system_memory_region_setup @@ -67,7 +64,6 @@ static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node); * DESCRIPTION: Setup a system_memory operation region * ******************************************************************************/ - acpi_status acpi_ev_system_memory_region_setup(acpi_handle handle, u32 function, @@ -347,7 +343,7 @@ acpi_ev_pci_config_region_setup(acpi_handle handle, * ******************************************************************************/ -static u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node) +u8 acpi_ev_is_pci_root_bridge(struct acpi_namespace_node *node) { acpi_status status; struct acpi_pnp_device_id *hid; diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index beba9d56a0d8..742a9fe6e235 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -227,7 +227,6 @@ acpi_remove_address_space_handler(acpi_handle device, */ region_obj = handler_obj->address_space.region_list; - } /* Remove this Handler object from the list */ diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 5889f6407fea..cd6fae6ad4c2 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -171,40 +170,40 @@ static int ghes_estatus_pool_init(void) return 0; } -static void ghes_estatus_pool_free_chunk_page(struct gen_pool *pool, +static void ghes_estatus_pool_free_chunk(struct gen_pool *pool, struct gen_pool_chunk *chunk, void *data) { - free_page(chunk->start_addr); + vfree((void *)chunk->start_addr); } static void ghes_estatus_pool_exit(void) { gen_pool_for_each_chunk(ghes_estatus_pool, - ghes_estatus_pool_free_chunk_page, NULL); + ghes_estatus_pool_free_chunk, NULL); gen_pool_destroy(ghes_estatus_pool); } static int ghes_estatus_pool_expand(unsigned long len) { - unsigned long i, pages, size, addr; - int ret; + unsigned long size, addr; ghes_estatus_pool_size_request += PAGE_ALIGN(len); size = gen_pool_size(ghes_estatus_pool); if (size >= ghes_estatus_pool_size_request) return 0; - pages = (ghes_estatus_pool_size_request - size) / PAGE_SIZE; - for (i = 0; i < pages; i++) { - addr = __get_free_page(GFP_KERNEL); - if (!addr) - return -ENOMEM; - ret = gen_pool_add(ghes_estatus_pool, addr, PAGE_SIZE, -1); - if (ret) - return ret; - } - return 0; + addr = (unsigned long)vmalloc(PAGE_ALIGN(len)); + if (!addr) + return -ENOMEM; + + /* + * New allocation must be visible in all pgd before it can be found by + * an NMI allocating from the pool. + */ + vmalloc_sync_all(); + + return gen_pool_add(ghes_estatus_pool, addr, PAGE_ALIGN(len), -1); } static int map_gen_v2(struct ghes *ghes) @@ -936,7 +935,6 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) sev = ghes_severity(ghes->estatus->error_severity); if (sev >= GHES_SEV_PANIC) { - oops_begin(); ghes_print_queued_estatus(); __ghes_panic(ghes); } diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index f0348e388d01..1cb7c6a52f61 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -196,7 +196,7 @@ int acpi_bus_get_private_data(acpi_handle handle, void **data) { acpi_status status; - if (!*data) + if (!data) return -EINVAL; status = acpi_get_data(handle, acpi_bus_private_data_handler, data); diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 033441f1d4c4..d21ee85ab260 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1155,9 +1155,19 @@ static void acpi_dev_pm_detach(struct device *dev, bool power_off) */ int acpi_dev_pm_attach(struct device *dev, bool power_on) { + /* + * Skip devices whose ACPI companions match the device IDs below, + * because they require special power management handling incompatible + * with the generic ACPI PM domain. + */ + static const struct acpi_device_id special_pm_ids[] = { + {"PNP0C0B", }, /* Generic ACPI fan */ + {"INT3404", }, /* Fan */ + {} + }; struct acpi_device *adev = ACPI_COMPANION(dev); - if (!adev) + if (!adev || !acpi_match_device_ids(adev, special_pm_ids)) return -ENODEV; if (dev->pm_domain) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 05fb821c2558..7e91575496b6 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1980,7 +1980,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, offset = to_interleave_offset(offset, mmio); writeq(cmd, mmio->addr.base + offset); - nvdimm_flush(nfit_blk->nd_region); + nvdimm_flush(nfit_blk->nd_region, NULL); if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH) readq(mmio->addr.base + offset); @@ -2029,7 +2029,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, } if (rw) - nvdimm_flush(nfit_blk->nd_region); + nvdimm_flush(nfit_blk->nd_region, NULL); rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0; return rc; diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 191e86c62037..ff36b0101ff0 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -371,19 +371,21 @@ void *__ref acpi_os_map_memory(acpi_physical_address phys, acpi_size size) } EXPORT_SYMBOL_GPL(acpi_os_map_memory); -static void acpi_os_drop_map_ref(struct acpi_ioremap *map) +/* Must be called with mutex_lock(&acpi_ioremap_lock) */ +static unsigned long acpi_os_drop_map_ref(struct acpi_ioremap *map) { - if (!--map->refcount) + unsigned long refcount = --map->refcount; + + if (!refcount) list_del_rcu(&map->list); + return refcount; } static void acpi_os_map_cleanup(struct acpi_ioremap *map) { - if (!map->refcount) { - synchronize_rcu_expedited(); - acpi_unmap(map->phys, map->virt); - kfree(map); - } + synchronize_rcu_expedited(); + acpi_unmap(map->phys, map->virt); + kfree(map); } /** @@ -403,6 +405,7 @@ static void acpi_os_map_cleanup(struct acpi_ioremap *map) void __ref acpi_os_unmap_iomem(void __iomem *virt, acpi_size size) { struct acpi_ioremap *map; + unsigned long refcount; if (!acpi_permanent_mmap) { __acpi_unmap_table(virt, size); @@ -416,10 +419,11 @@ void __ref acpi_os_unmap_iomem(void __iomem *virt, acpi_size size) WARN(true, PREFIX "%s: bad address %p\n", __func__, virt); return; } - acpi_os_drop_map_ref(map); + refcount = acpi_os_drop_map_ref(map); mutex_unlock(&acpi_ioremap_lock); - acpi_os_map_cleanup(map); + if (!refcount) + acpi_os_map_cleanup(map); } EXPORT_SYMBOL_GPL(acpi_os_unmap_iomem); @@ -454,6 +458,7 @@ void acpi_os_unmap_generic_address(struct acpi_generic_address *gas) { u64 addr; struct acpi_ioremap *map; + unsigned long refcount; if (gas->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) return; @@ -469,10 +474,11 @@ void acpi_os_unmap_generic_address(struct acpi_generic_address *gas) mutex_unlock(&acpi_ioremap_lock); return; } - acpi_os_drop_map_ref(map); + refcount = acpi_os_drop_map_ref(map); mutex_unlock(&acpi_ioremap_lock); - acpi_os_map_cleanup(map); + if (!refcount) + acpi_os_map_cleanup(map); } EXPORT_SYMBOL(acpi_os_unmap_generic_address); @@ -1116,6 +1122,7 @@ void acpi_os_wait_events_complete(void) flush_workqueue(kacpid_wq); flush_workqueue(kacpi_notify_wq); } +EXPORT_SYMBOL(acpi_os_wait_events_complete); struct acpi_hp_work { struct work_struct work; diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index eb857d6ea1fe..96911360a28e 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -454,8 +454,9 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) decode_osc_support(root, "OS supports", support); status = acpi_pci_osc_support(root, support); if (ACPI_FAILURE(status)) { - dev_info(&device->dev, "_OSC failed (%s); disabling ASPM\n", - acpi_format_exception(status)); + dev_info(&device->dev, "_OSC failed (%s)%s\n", + acpi_format_exception(status), + pcie_aspm_support_enabled() ? "; disabling ASPM" : ""); *no_aspm = 1; return; } diff --git a/drivers/acpi/sbshc.c b/drivers/acpi/sbshc.c index 7a3431018e0a..5008ead4609a 100644 --- a/drivers/acpi/sbshc.c +++ b/drivers/acpi/sbshc.c @@ -196,6 +196,7 @@ int acpi_smbus_unregister_callback(struct acpi_smb_hc *hc) hc->callback = NULL; hc->context = NULL; mutex_unlock(&hc->lock); + acpi_os_wait_events_complete(); return 0; } @@ -292,6 +293,7 @@ static int acpi_smbus_hc_remove(struct acpi_device *device) hc = acpi_driver_data(device); acpi_ec_remove_query_handler(hc->ec, hc->query_bit); + acpi_os_wait_events_complete(); kfree(hc); device->driver_data = NULL; return 0; diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 43587ac680e4..214c4e2e8ade 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -328,6 +328,11 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Precision 7510"), }, }, + + /* + * Desktops which falsely report a backlight and which our heuristics + * for this do not catch. + */ { .callback = video_detect_force_none, .ident = "Dell OptiPlex 9020M", @@ -336,6 +341,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 9020M"), }, }, + { + .callback = video_detect_force_none, + .ident = "MSI MS-7721", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "MSI"), + DMI_MATCH(DMI_PRODUCT_NAME, "MS-7721"), + }, + }, { }, }; diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig index ee4880bfdcdc..3a90d51d8419 100644 --- a/drivers/android/Kconfig +++ b/drivers/android/Kconfig @@ -20,6 +20,18 @@ config ANDROID_BINDER_IPC Android process, using Binder to identify, invoke and pass arguments between said processes. +config ANDROID_BINDERFS + bool "Android Binderfs filesystem" + depends on ANDROID_BINDER_IPC + default n + ---help--- + Binderfs is a pseudo-filesystem for the Android Binder IPC driver + which can be mounted per-ipc namespace allowing to run multiple + instances of Android. + Each binderfs mount initially only contains a binder-control device. + It can be used to dynamically allocate new binder IPC devices via + ioctls. + config ANDROID_BINDER_DEVICES string "Android Binder devices" depends on ANDROID_BINDER_IPC diff --git a/drivers/android/Makefile b/drivers/android/Makefile index a01254c43ee3..c7856e3200da 100644 --- a/drivers/android/Makefile +++ b/drivers/android/Makefile @@ -1,4 +1,5 @@ ccflags-y += -I$(src) # needed for trace events +obj-$(CONFIG_ANDROID_BINDERFS) += binderfs.o obj-$(CONFIG_ANDROID_BINDER_IPC) += binder.o binder_alloc.o obj-$(CONFIG_ANDROID_BINDER_IPC_SELFTEST) += binder_alloc_selftest.o diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9882a79f7344..62ceafdb684c 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -75,55 +75,16 @@ #include #include #include "binder_alloc.h" +#include "binder_internal.h" #include "binder_trace.h" #ifdef CONFIG_MTK_TASK_TURBO #include #endif -#ifdef CONFIG_MTK_ENG_BUILD -#define BINDER_WATCHDOG "v0.1" -#endif -#define BINDER_USER_TRACKING 1 - -#ifdef BINDER_USER_TRACKING -#include -#include -#endif - #ifdef BINDER_WATCHDOG -#include -#include -#include -#include -#include - static DEFINE_MUTEX(mtk_binder_main_lock); static pid_t system_server_pid; -/*****************************************************************************/ -/* Payload layout of addService(): */ -/* */ -/* Parcel header | IServiceManager.descriptor | Parcel header | Service name */ -/* (Please refer ServiceManagerNative.java:addService()) */ -/* IServiceManager.descriptor is 'android.os.IServiceManager' interleaved */ -/* with character '\0'. */ -/* that is, 'a', '\0', 'n', '\0', 'd', '\0', 'r', '\0', 'o', ... */ -/* */ -/* so the offset of Service name */ -/* = Parcel header x2 + strlen(android.os.IServiceManager) x2 */ -/* = 8x2 + 26x2 = 68 */ -/*****************************************************************************/ -#define MAX_SERVICE_NAME_LEN 32 -#define MAGIC_SERVICE_NAME_OFFSET 68 -#define MAX_ENG_TRANS_LOG_BUFF_LEN 10240 - -enum wait_on_reason { - WAIT_ON_NONE = 0U, - WAIT_ON_READ = 1U, - WAIT_ON_EXEC = 2U, - WAIT_ON_REPLY_READ = 3U, -}; - /* work should be done within how many secs */ #define WAIT_BUDGET_READ 2 #define WAIT_BUDGET_EXEC 4 @@ -153,22 +114,8 @@ static struct dentry *binder_debugfs_dir_entry_root; static struct dentry *binder_debugfs_dir_entry_proc; static atomic_t binder_last_id; -#define BINDER_DEBUG_ENTRY(name) \ -static int binder_##name##_open(struct inode *inode, struct file *file) \ -{ \ - return single_open(file, binder_##name##_show, inode->i_private); \ -} \ -\ -static const struct file_operations binder_##name##_fops = { \ - .owner = THIS_MODULE, \ - .open = binder_##name##_open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ - .release = single_release, \ -} - -static int binder_proc_show(struct seq_file *m, void *unused); -BINDER_DEBUG_ENTRY(proc); +static int proc_show(struct seq_file *m, void *unused); +DEFINE_SHOW_ATTRIBUTE(proc); /* This is only defined in include/asm-arm/sizes.h */ #ifndef SZ_1K @@ -202,7 +149,7 @@ static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR | BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION; module_param_named(debug_mask, binder_debug_mask, uint, 0644); -static char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; +char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; module_param_named(devices, binder_devices_param, charp, 0444); static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait); @@ -276,63 +223,8 @@ static inline void binder_stats_created(enum binder_stat_types type) atomic_inc(&binder_stats.obj_created[type]); } -struct binder_transaction_log_entry { - int debug_id; - int debug_id_done; - int call_type; - int from_proc; - int from_thread; - int target_handle; - int to_proc; - int to_thread; - int to_node; - int data_size; - int offsets_size; - int return_error_line; - uint32_t return_error; - uint32_t return_error_param; - const char *context_name; -#ifdef BINDER_WATCHDOG - unsigned int code; - char service[MAX_SERVICE_NAME_LEN]; - int fd; - struct timespec readstamp; - struct timespec endstamp; - unsigned int cur; -#endif -#ifdef BINDER_USER_TRACKING - struct timespec timestamp; - struct timeval tv; -#endif -}; -#ifdef BINDER_WATCHDOG -struct binder_timeout_log_entry { - enum wait_on_reason r; - pid_t from_proc; - pid_t from_thrd; - pid_t to_proc; - pid_t to_thrd; - unsigned int over_sec; - struct timespec ts; - struct timeval tv; - unsigned int code; - char service[MAX_SERVICE_NAME_LEN]; - int debug_id; -}; -#endif - -struct binder_transaction_log { - atomic_t cur; - bool full; -#ifdef BINDER_WATCHDOG - unsigned int size; - struct binder_transaction_log_entry *entry; -#else - struct binder_transaction_log_entry entry[32]; -#endif -}; -static struct binder_transaction_log binder_transaction_log; -static struct binder_transaction_log binder_transaction_log_failed; +struct binder_transaction_log binder_transaction_log; +struct binder_transaction_log binder_transaction_log_failed; static struct binder_transaction_log_entry *binder_transaction_log_add( struct binder_transaction_log *log) @@ -363,41 +255,6 @@ static struct binder_transaction_log_entry *binder_transaction_log_add( return e; } -#ifdef BINDER_WATCHDOG -static struct binder_transaction_log_entry entry_failed[32]; - -#define BINDER_LOG_RESUME 0x2 -#define BINDER_BUF_WARN 0x4 - -#ifdef CONFIG_MTK_EXTMEM -#include -#else -struct binder_transaction_log_entry entry_t[MAX_ENG_TRANS_LOG_BUFF_LEN]; -#endif -#endif - -struct binder_context { - struct binder_node *binder_context_mgr_node; - struct mutex context_mgr_node_lock; - - kuid_t binder_context_mgr_uid; - const char *name; -}; - -struct binder_device { - struct hlist_node hlist; - struct miscdevice miscdev; - struct binder_context context; -}; - -#ifdef BINDER_WATCHDOG -#ifdef CONFIG_MTK_EXTMEM -#include -#else -struct binder_transaction_log_entry entry_t[MAX_ENG_TRANS_LOG_BUFF_LEN]; -#endif -#endif - /** * struct binder_work - work enqueued on a worklist * @entry: node enqueued on list @@ -665,6 +522,7 @@ struct binder_priority { * @inner_lock: can nest under outer_lock and/or node lock * @outer_lock: no nesting under innor or node lock * Lock order: 1) outer, 2) node, 3) inner + * @binderfs_entry: process-specific binderfs log file * * Bookkeeping structure for binder processes */ @@ -696,6 +554,7 @@ struct binder_proc { struct binder_context *context; spinlock_t inner_lock; spinlock_t outer_lock; + struct dentry *binderfs_entry; }; enum { @@ -6061,6 +5920,8 @@ static int binder_open(struct inode *nodp, struct file *filp) { struct binder_proc *proc; struct binder_device *binder_dev; + struct binderfs_info *info; + struct dentry *binder_binderfs_dir_entry_proc = NULL; binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__, current->group_leader->pid, current->pid); @@ -6082,8 +5943,16 @@ static int binder_open(struct inode *nodp, struct file *filp) proc->default_priority.prio = NICE_TO_PRIO(0); } - binder_dev = container_of(filp->private_data, struct binder_device, - miscdev); + /* binderfs stashes devices in i_private */ + if (is_binderfs_device(nodp)) { + binder_dev = nodp->i_private; + info = nodp->i_sb->s_fs_info; + binder_binderfs_dir_entry_proc = info->proc_log_dir; + } else { + binder_dev = container_of(filp->private_data, + struct binder_device, miscdev); + } + refcount_inc(&binder_dev->ref); proc->context = &binder_dev->context; binder_alloc_init(&proc->alloc); @@ -6111,7 +5980,36 @@ static int binder_open(struct inode *nodp, struct file *filp) proc->debugfs_entry = debugfs_create_file(strbuf, 0444, binder_debugfs_dir_entry_proc, (void *)(unsigned long)proc->pid, - &binder_proc_fops); + &proc_fops); + } + + if (binder_binderfs_dir_entry_proc) { + char strbuf[11]; + struct dentry *binderfs_entry; + + snprintf(strbuf, sizeof(strbuf), "%u", proc->pid); + /* + * Similar to debugfs, the process specific log file is shared + * between contexts. If the file has already been created for a + * process, the following binderfs_create_file() call will + * fail with error code EEXIST if another context of the same + * process invoked binder_open(). This is ok since same as + * debugfs, the log file will contain information on all + * contexts of a given PID. + */ + binderfs_entry = binderfs_create_file(binder_binderfs_dir_entry_proc, + strbuf, &proc_fops, (void *)(unsigned long)proc->pid); + if (!IS_ERR(binderfs_entry)) { + proc->binderfs_entry = binderfs_entry; + } else { + int error; + + error = PTR_ERR(binderfs_entry); + if (error != -EEXIST) { + pr_warn("Unable to create file %s in binderfs (error %d)\n", + strbuf, error); + } + } } return 0; @@ -6153,6 +6051,12 @@ static int binder_release(struct inode *nodp, struct file *filp) struct binder_proc *proc = filp->private_data; debugfs_remove(proc->debugfs_entry); + + if (proc->binderfs_entry) { + binderfs_remove_file(proc->binderfs_entry); + proc->binderfs_entry = NULL; + } + binder_defer_work(proc, BINDER_DEFERRED_RELEASE); return 0; @@ -6226,6 +6130,7 @@ static int binder_node_release(struct binder_node *node, int refs) static void binder_deferred_release(struct binder_proc *proc) { struct binder_context *context = proc->context; + struct binder_device *device; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, active_transactions; @@ -6244,6 +6149,12 @@ static void binder_deferred_release(struct binder_proc *proc) context->binder_context_mgr_node = NULL; } mutex_unlock(&context->context_mgr_node_lock); + device = container_of(proc->context, struct binder_device, context); + if (refcount_dec_and_test(&device->ref)) { + kfree(context->name); + kfree(device); + } + proc->context = NULL; binder_inner_proc_lock(proc); /* * Make sure proc stays alive after we @@ -6773,7 +6684,7 @@ static void print_binder_proc_stats(struct seq_file *m, } -static int binder_state_show(struct seq_file *m, void *unused) +int binder_state_show(struct seq_file *m, void *unused) { struct binder_proc *proc; struct binder_node *node; @@ -6812,7 +6723,7 @@ static int binder_state_show(struct seq_file *m, void *unused) return 0; } -static int binder_stats_show(struct seq_file *m, void *unused) +int binder_stats_show(struct seq_file *m, void *unused) { struct binder_proc *proc; @@ -6828,7 +6739,7 @@ static int binder_stats_show(struct seq_file *m, void *unused) return 0; } -static int binder_transactions_show(struct seq_file *m, void *unused) +int binder_transactions_show(struct seq_file *m, void *unused) { struct binder_proc *proc; @@ -6841,7 +6752,7 @@ static int binder_transactions_show(struct seq_file *m, void *unused) return 0; } -static int binder_proc_show(struct seq_file *m, void *unused) +static int proc_show(struct seq_file *m, void *unused) { struct binder_proc *itr; int pid = (unsigned long)m->private; @@ -6942,7 +6853,7 @@ static void print_binder_transaction_log_entry(struct seq_file *m, "\n" : " (incomplete)\n"); } -static int binder_transaction_log_show(struct seq_file *m, void *unused) +int binder_transaction_log_show(struct seq_file *m, void *unused) { struct binder_transaction_log *log = m->private; unsigned int log_cur = atomic_read(&log->cur); @@ -6978,7 +6889,7 @@ static int binder_transaction_log_show(struct seq_file *m, void *unused) BINDER_DEBUG_ENTRY(timeout_log); #endif -static const struct file_operations binder_fops = { +const struct file_operations binder_fops = { .owner = THIS_MODULE, .poll = binder_poll, .unlocked_ioctl = binder_ioctl, @@ -6989,11 +6900,6 @@ static const struct file_operations binder_fops = { .release = binder_release, }; -BINDER_DEBUG_ENTRY(state); -BINDER_DEBUG_ENTRY(stats); -BINDER_DEBUG_ENTRY(transactions); -BINDER_DEBUG_ENTRY(transaction_log); - static int __init init_binder_device(const char *name) { int ret; @@ -7007,6 +6913,7 @@ static int __init init_binder_device(const char *name) binder_device->miscdev.minor = MISC_DYNAMIC_MINOR; binder_device->miscdev.name = name; + refcount_set(&binder_device->ref, 1); binder_device->context.binder_context_mgr_uid = INVALID_UID; binder_device->context.name = name; mutex_init(&binder_device->context.context_mgr_node_lock); @@ -7025,9 +6932,10 @@ static int __init init_binder_device(const char *name) static int __init binder_init(void) { int ret; - char *device_name, *device_names, *device_tmp; + char *device_name, *device_tmp; struct binder_device *device; struct hlist_node *tmp; + char *device_names = NULL; ret = binder_alloc_shrinker_init(); if (ret) @@ -7079,28 +6987,36 @@ static int __init binder_init(void) #endif } - /* - * Copy the module_parameter string, because we don't want to - * tokenize it in-place. - */ - device_names = kzalloc(strlen(binder_devices_param) + 1, GFP_KERNEL); - if (!device_names) { - ret = -ENOMEM; - goto err_alloc_device_names_failed; - } - strcpy(device_names, binder_devices_param); + if (!IS_ENABLED(CONFIG_ANDROID_BINDERFS) && + strcmp(binder_devices_param, "") != 0) { + /* + * Copy the module_parameter string, because we don't want to + * tokenize it in-place. + */ + device_names = kstrdup(binder_devices_param, GFP_KERNEL); + if (!device_names) { + ret = -ENOMEM; + goto err_alloc_device_names_failed; + } - device_tmp = device_names; - while ((device_name = strsep(&device_tmp, ","))) { - ret = init_binder_device(device_name); - if (ret) - goto err_init_binder_device_failed; + device_tmp = device_names; + while ((device_name = strsep(&device_tmp, ","))) { + ret = init_binder_device(device_name); + if (ret) + goto err_init_binder_device_failed; + } } + + ret = init_binderfs(); + if (ret) + goto err_init_binder_device_failed; + #ifdef BINDER_WATCHDOG init_binder_wtdog(); init_binder_transaction_log( &binder_transaction_log, &binder_transaction_log_failed); #endif + return ret; err_init_binder_device_failed: diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 7b8a4bea8650..ad2d54d2023d 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -282,8 +282,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, return 0; free_range: - for (page_addr = end - PAGE_SIZE; page_addr >= start; - page_addr -= PAGE_SIZE) { + for (page_addr = end - PAGE_SIZE; 1; page_addr -= PAGE_SIZE) { bool ret; size_t index; @@ -296,6 +295,8 @@ free_range: WARN_ON(!ret); trace_binder_free_lru_end(alloc, index); + if (page_addr == start) + break; continue; err_vm_insert_page_failed: @@ -303,7 +304,8 @@ err_vm_insert_page_failed: page->page_ptr = NULL; err_alloc_page_failed: err_page_ptr_cleared: - ; + if (page_addr == start) + break; } err_no_vma: if (mm) { diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h new file mode 100644 index 000000000000..6de682f8b02e --- /dev/null +++ b/drivers/android/binder_internal.h @@ -0,0 +1,243 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_BINDER_INTERNAL_H +#define _LINUX_BINDER_INTERNAL_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_MTK_ENG_BUILD +#define BINDER_WATCHDOG "v0.1" +#endif +#define BINDER_USER_TRACKING 1 + +#ifdef BINDER_USER_TRACKING +#include +#include +#endif + +#ifdef BINDER_WATCHDOG +#include +#include +#include +#include +#include + +/*****************************************************************************/ +/* Payload layout of addService(): */ +/* */ +/* Parcel header | IServiceManager.descriptor | Parcel header | Service name */ +/* (Please refer ServiceManagerNative.java:addService()) */ +/* IServiceManager.descriptor is 'android.os.IServiceManager' interleaved */ +/* with character '\0'. */ +/* that is, 'a', '\0', 'n', '\0', 'd', '\0', 'r', '\0', 'o', ... */ +/* */ +/* so the offset of Service name */ +/* = Parcel header x2 + strlen(android.os.IServiceManager) x2 */ +/* = 8x2 + 26x2 = 68 */ +/*****************************************************************************/ +#define MAX_SERVICE_NAME_LEN 32 +#define MAGIC_SERVICE_NAME_OFFSET 68 +#define MAX_ENG_TRANS_LOG_BUFF_LEN 10240 + +enum wait_on_reason { + WAIT_ON_NONE = 0U, + WAIT_ON_READ = 1U, + WAIT_ON_EXEC = 2U, + WAIT_ON_REPLY_READ = 3U, +}; +#endif + +struct binder_context { + struct binder_node *binder_context_mgr_node; + struct mutex context_mgr_node_lock; + kuid_t binder_context_mgr_uid; + const char *name; +}; + +/** + * struct binder_device - information about a binder device node + * @hlist: list of binder devices (only used for devices requested via + * CONFIG_ANDROID_BINDER_DEVICES) + * @miscdev: information about a binder character device node + * @context: binder context information + * @binderfs_inode: This is the inode of the root dentry of the super block + * belonging to a binderfs mount. + */ +struct binder_device { + struct hlist_node hlist; + struct miscdevice miscdev; + struct binder_context context; + struct inode *binderfs_inode; + refcount_t ref; +}; + +/** + * binderfs_mount_opts - mount options for binderfs + * @max: maximum number of allocatable binderfs binder devices + * @stats_mode: enable binder stats in binderfs. + */ +struct binderfs_mount_opts { + int max; + int stats_mode; +}; + +/** + * binderfs_info - information about a binderfs mount + * @ipc_ns: The ipc namespace the binderfs mount belongs to. + * @control_dentry: This records the dentry of this binderfs mount + * binder-control device. + * @root_uid: uid that needs to be used when a new binder device is + * created. + * @root_gid: gid that needs to be used when a new binder device is + * created. + * @mount_opts: The mount options in use. + * @device_count: The current number of allocated binder devices. + * @proc_log_dir: Pointer to the directory dentry containing process-specific + * logs. + */ +struct binderfs_info { + struct ipc_namespace *ipc_ns; + struct dentry *control_dentry; + kuid_t root_uid; + kgid_t root_gid; + struct binderfs_mount_opts mount_opts; + int device_count; + struct dentry *proc_log_dir; +}; + +extern const struct file_operations binder_fops; + +extern char *binder_devices_param; + +#ifdef CONFIG_ANDROID_BINDERFS +extern bool is_binderfs_device(const struct inode *inode); +extern struct dentry *binderfs_create_file(struct dentry *dir, const char *name, + const struct file_operations *fops, + void *data); +extern void binderfs_remove_file(struct dentry *dentry); +#else +static inline bool is_binderfs_device(const struct inode *inode) +{ + return false; +} +static inline struct dentry *binderfs_create_file(struct dentry *dir, + const char *name, + const struct file_operations *fops, + void *data) +{ + return NULL; +} +static inline void binderfs_remove_file(struct dentry *dentry) {} +#endif + +#ifdef CONFIG_ANDROID_BINDERFS +extern int __init init_binderfs(void); +#else +static inline int __init init_binderfs(void) +{ + return 0; +} +#endif + +int binder_stats_show(struct seq_file *m, void *unused); +DEFINE_SHOW_ATTRIBUTE(binder_stats); + +int binder_state_show(struct seq_file *m, void *unused); +DEFINE_SHOW_ATTRIBUTE(binder_state); + +int binder_transactions_show(struct seq_file *m, void *unused); +DEFINE_SHOW_ATTRIBUTE(binder_transactions); + +int binder_transaction_log_show(struct seq_file *m, void *unused); +DEFINE_SHOW_ATTRIBUTE(binder_transaction_log); + +struct binder_transaction_log_entry { + int debug_id; + int debug_id_done; + int call_type; + int from_proc; + int from_thread; + int target_handle; + int to_proc; + int to_thread; + int to_node; + int data_size; + int offsets_size; + int return_error_line; + uint32_t return_error; + uint32_t return_error_param; + const char *context_name; +#ifdef BINDER_WATCHDOG + unsigned int code; + char service[MAX_SERVICE_NAME_LEN]; + int fd; + struct timespec readstamp; + struct timespec endstamp; + unsigned int cur; +#endif +#ifdef BINDER_USER_TRACKING + struct timespec timestamp; + struct timeval tv; +#endif +}; +#ifdef BINDER_WATCHDOG +struct binder_timeout_log_entry { + enum wait_on_reason r; + pid_t from_proc; + pid_t from_thrd; + pid_t to_proc; + pid_t to_thrd; + unsigned int over_sec; + struct timespec ts; + struct timeval tv; + unsigned int code; + char service[MAX_SERVICE_NAME_LEN]; + int debug_id; +}; +#endif + +struct binder_transaction_log { + atomic_t cur; + bool full; +#ifdef BINDER_WATCHDOG + unsigned int size; + struct binder_transaction_log_entry *entry; +#else + struct binder_transaction_log_entry entry[32]; +#endif +}; + +extern struct binder_transaction_log binder_transaction_log; +extern struct binder_transaction_log binder_transaction_log_failed; + +#ifdef BINDER_WATCHDOG +static struct binder_transaction_log_entry entry_failed[32]; + +#define BINDER_LOG_RESUME 0x2 +#define BINDER_BUF_WARN 0x4 + +#ifdef CONFIG_MTK_EXTMEM +#include +#else +struct binder_transaction_log_entry entry_t[MAX_ENG_TRANS_LOG_BUFF_LEN]; +#endif +#endif + +#ifdef BINDER_WATCHDOG +#ifdef CONFIG_MTK_EXTMEM +#include +#else +struct binder_transaction_log_entry entry_t[MAX_ENG_TRANS_LOG_BUFF_LEN]; +#endif +#endif + + +#endif /* _LINUX_BINDER_INTERNAL_H */ diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c new file mode 100644 index 000000000000..b0bad544052f --- /dev/null +++ b/drivers/android/binderfs.c @@ -0,0 +1,793 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +//#include +#include +#include +#include + +#include "binder_internal.h" + +#define FIRST_INODE 1 +#define SECOND_INODE 2 +#define INODE_OFFSET 3 +#define INTSTRLEN 21 +#define BINDERFS_MAX_MINOR (1U << MINORBITS) +/* Ensure that the initial ipc namespace always has devices available. */ +#define BINDERFS_MAX_MINOR_CAPPED (BINDERFS_MAX_MINOR - 4) + +static dev_t binderfs_dev; +static DEFINE_MUTEX(binderfs_minors_mutex); +static DEFINE_IDA(binderfs_minors); + +enum { + Opt_max, + Opt_stats_mode, + Opt_err +}; + +enum binderfs_stats_mode { + STATS_NONE, + STATS_GLOBAL, +}; + +static const match_table_t tokens = { + { Opt_max, "max=%d" }, + { Opt_stats_mode, "stats=%s" }, + { Opt_err, NULL } +}; + +static inline struct binderfs_info *BINDERFS_I(const struct inode *inode) +{ + return inode->i_sb->s_fs_info; +} + +bool is_binderfs_device(const struct inode *inode) +{ + if (inode->i_sb->s_magic == BINDERFS_SUPER_MAGIC) + return true; + + return false; +} + +/** + * binderfs_binder_device_create - allocate inode from super block of a + * binderfs mount + * @ref_inode: inode from wich the super block will be taken + * @userp: buffer to copy information about new device for userspace to + * @req: struct binderfs_device as copied from userspace + * + * This function allocates a new binder_device and reserves a new minor + * number for it. + * Minor numbers are limited and tracked globally in binderfs_minors. The + * function will stash a struct binder_device for the specific binder + * device in i_private of the inode. + * It will go on to allocate a new inode from the super block of the + * filesystem mount, stash a struct binder_device in its i_private field + * and attach a dentry to that inode. + * + * Return: 0 on success, negative errno on failure + */ +static int binderfs_binder_device_create(struct inode *ref_inode, + struct binderfs_device __user *userp, + struct binderfs_device *req) +{ + int minor, ret; + struct dentry *dentry, *root; + struct binder_device *device; + char *name = NULL; + size_t name_len; + struct inode *inode = NULL; + struct super_block *sb = ref_inode->i_sb; + struct binderfs_info *info = sb->s_fs_info; +#if defined(CONFIG_IPC_NS) + bool use_reserve = (info->ipc_ns == &init_ipc_ns); +#else + bool use_reserve = true; +#endif + + /* Reserve new minor number for the new device. */ + mutex_lock(&binderfs_minors_mutex); + if (++info->device_count <= info->mount_opts.max) + minor = ida_simple_get(&binderfs_minors, 0, + use_reserve ? BINDERFS_MAX_MINOR + 1: + BINDERFS_MAX_MINOR_CAPPED + 1, + GFP_KERNEL); + else + minor = -ENOSPC; + if (minor < 0) { + --info->device_count; + mutex_unlock(&binderfs_minors_mutex); + return minor; + } + mutex_unlock(&binderfs_minors_mutex); + + ret = -ENOMEM; + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + goto err; + + inode = new_inode(sb); + if (!inode) + goto err; + + inode->i_ino = minor + INODE_OFFSET; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + init_special_inode(inode, S_IFCHR | 0600, + MKDEV(MAJOR(binderfs_dev), minor)); + inode->i_fop = &binder_fops; + inode->i_uid = info->root_uid; + inode->i_gid = info->root_gid; + + req->name[BINDERFS_MAX_NAME] = '\0'; /* NUL-terminate */ + name_len = strlen(req->name); + /* Make sure to include terminating NUL byte */ + name = kmemdup(req->name, name_len + 1, GFP_KERNEL); + if (!name) + goto err; + + refcount_set(&device->ref, 1); + device->binderfs_inode = inode; + device->context.binder_context_mgr_uid = INVALID_UID; + device->context.name = name; + device->miscdev.name = name; + device->miscdev.minor = minor; + mutex_init(&device->context.context_mgr_node_lock); + + req->major = MAJOR(binderfs_dev); + req->minor = minor; + + if (userp && copy_to_user(userp, req, sizeof(*req))) { + ret = -EFAULT; + goto err; + } + + root = sb->s_root; + inode_lock(d_inode(root)); + + /* look it up */ + dentry = lookup_one_len(name, root, name_len); + if (IS_ERR(dentry)) { + inode_unlock(d_inode(root)); + ret = PTR_ERR(dentry); + goto err; + } + + if (d_really_is_positive(dentry)) { + /* already exists */ + dput(dentry); + inode_unlock(d_inode(root)); + ret = -EEXIST; + goto err; + } + + inode->i_private = device; + d_instantiate(dentry, inode); + fsnotify_create(root->d_inode, dentry); + inode_unlock(d_inode(root)); + + return 0; + +err: + kfree(name); + kfree(device); + mutex_lock(&binderfs_minors_mutex); + --info->device_count; + ida_remove(&binderfs_minors, minor); + mutex_unlock(&binderfs_minors_mutex); + iput(inode); + + return ret; +} + +/** + * binderfs_ctl_ioctl - handle binder device node allocation requests + * + * The request handler for the binder-control device. All requests operate on + * the binderfs mount the binder-control device resides in: + * - BINDER_CTL_ADD + * Allocate a new binder device. + * + * Return: 0 on success, negative errno on failure + */ +static long binder_ctl_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + int ret = -EINVAL; + struct inode *inode = file_inode(file); + struct binderfs_device __user *device = (struct binderfs_device __user *)arg; + struct binderfs_device device_req; + + switch (cmd) { + case BINDER_CTL_ADD: + ret = copy_from_user(&device_req, device, sizeof(device_req)); + if (ret) { + ret = -EFAULT; + break; + } + + ret = binderfs_binder_device_create(inode, device, &device_req); + break; + default: + break; + } + + return ret; +} + +static void binderfs_evict_inode(struct inode *inode) +{ + struct binder_device *device = inode->i_private; + struct binderfs_info *info = BINDERFS_I(inode); + + clear_inode(inode); + + if (!S_ISCHR(inode->i_mode) || !device) + return; + + mutex_lock(&binderfs_minors_mutex); + --info->device_count; + ida_remove(&binderfs_minors, device->miscdev.minor); + mutex_unlock(&binderfs_minors_mutex); + + if (refcount_dec_and_test(&device->ref)) { + kfree(device->context.name); + kfree(device); + } +} + +/** + * binderfs_parse_mount_opts - parse binderfs mount options + * @data: options to set (can be NULL in which case defaults are used) + */ +static int binderfs_parse_mount_opts(char *data, + struct binderfs_mount_opts *opts) +{ + char *p, *stats; + opts->max = BINDERFS_MAX_MINOR; + opts->stats_mode = STATS_NONE; + + while ((p = strsep(&data, ",")) != NULL) { + substring_t args[MAX_OPT_ARGS]; + int token; + int max_devices; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_max: + if (match_int(&args[0], &max_devices) || + (max_devices < 0 || + (max_devices > BINDERFS_MAX_MINOR))) + return -EINVAL; + + opts->max = max_devices; + break; + case Opt_stats_mode: + if (!capable(CAP_SYS_ADMIN)) + return -EINVAL; + + stats = match_strdup(&args[0]); + if (!stats) + return -ENOMEM; + + if (strcmp(stats, "global") != 0) { + kfree(stats); + return -EINVAL; + } + + opts->stats_mode = STATS_GLOBAL; + kfree(stats); + break; + default: + pr_err("Invalid mount options\n"); + return -EINVAL; + } + } + + return 0; +} + +static int binderfs_remount(struct super_block *sb, int *flags, char *data) +{ + int prev_stats_mode, ret; + struct binderfs_info *info = sb->s_fs_info; + + prev_stats_mode = info->mount_opts.stats_mode; + ret = binderfs_parse_mount_opts(data, &info->mount_opts); + if (ret) + return ret; + + if (prev_stats_mode != info->mount_opts.stats_mode) { + pr_err("Binderfs stats mode cannot be changed during a remount\n"); + info->mount_opts.stats_mode = prev_stats_mode; + return -EINVAL; + } + + return 0; +} + +static int binderfs_show_mount_opts(struct seq_file *seq, struct dentry *root) +{ + struct binderfs_info *info; + + info = root->d_sb->s_fs_info; + if (info->mount_opts.max <= BINDERFS_MAX_MINOR) + seq_printf(seq, ",max=%d", info->mount_opts.max); + if (info->mount_opts.stats_mode == STATS_GLOBAL) + seq_printf(seq, ",stats=global"); + + return 0; +} + +static const struct super_operations binderfs_super_ops = { + .evict_inode = binderfs_evict_inode, + .remount_fs = binderfs_remount, + .show_options = binderfs_show_mount_opts, + .statfs = simple_statfs, +}; + +static inline bool is_binderfs_control_device(const struct dentry *dentry) +{ + struct binderfs_info *info = dentry->d_sb->s_fs_info; + return info->control_dentry == dentry; +} + +static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + if (is_binderfs_control_device(old_dentry) || + is_binderfs_control_device(new_dentry)) + return -EPERM; + + return simple_rename(old_dir, old_dentry, new_dir, new_dentry, flags); +} + +static int binderfs_unlink(struct inode *dir, struct dentry *dentry) +{ + if (is_binderfs_control_device(dentry)) + return -EPERM; + + return simple_unlink(dir, dentry); +} + +static const struct file_operations binder_ctl_fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .unlocked_ioctl = binder_ctl_ioctl, + .compat_ioctl = binder_ctl_ioctl, + .llseek = noop_llseek, +}; + +/** + * binderfs_binder_ctl_create - create a new binder-control device + * @sb: super block of the binderfs mount + * + * This function creates a new binder-control device node in the binderfs mount + * referred to by @sb. + * + * Return: 0 on success, negative errno on failure + */ +static int binderfs_binder_ctl_create(struct super_block *sb) +{ + int minor, ret; + struct dentry *dentry; + struct binder_device *device; + struct inode *inode = NULL; + struct dentry *root = sb->s_root; + struct binderfs_info *info = sb->s_fs_info; +#if defined(CONFIG_IPC_NS) + bool use_reserve = (info->ipc_ns == &init_ipc_ns); +#else + bool use_reserve = true; +#endif + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + return -ENOMEM; + + /* If we have already created a binder-control node, return. */ + if (info->control_dentry) { + ret = 0; + goto out; + } + + ret = -ENOMEM; + inode = new_inode(sb); + if (!inode) + goto out; + + /* Reserve a new minor number for the new device. */ + mutex_lock(&binderfs_minors_mutex); + minor = ida_simple_get(&binderfs_minors, 0, + use_reserve ? BINDERFS_MAX_MINOR + 1: + BINDERFS_MAX_MINOR_CAPPED + 1, + GFP_KERNEL); + mutex_unlock(&binderfs_minors_mutex); + if (minor < 0) { + ret = minor; + goto out; + } + + inode->i_ino = SECOND_INODE; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + init_special_inode(inode, S_IFCHR | 0600, + MKDEV(MAJOR(binderfs_dev), minor)); + inode->i_fop = &binder_ctl_fops; + inode->i_uid = info->root_uid; + inode->i_gid = info->root_gid; + + device->binderfs_inode = inode; + device->miscdev.minor = minor; + + dentry = d_alloc_name(root, "binder-control"); + if (!dentry) + goto out; + + inode->i_private = device; + info->control_dentry = dentry; + d_add(dentry, inode); + + return 0; + +out: + kfree(device); + iput(inode); + + return ret; +} + +static const struct inode_operations binderfs_dir_inode_operations = { + .lookup = simple_lookup, + .rename = binderfs_rename, + .unlink = binderfs_unlink, +}; + +static struct inode *binderfs_make_inode(struct super_block *sb, int mode) +{ + struct inode *ret; + + ret = new_inode(sb); + if (ret) { + ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET); + ret->i_mode = mode; + ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret); + } + return ret; +} + +static struct dentry *binderfs_create_dentry(struct dentry *parent, + const char *name) +{ + struct dentry *dentry; + + dentry = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(dentry)) + return dentry; + + /* Return error if the file/dir already exists. */ + if (d_really_is_positive(dentry)) { + dput(dentry); + return ERR_PTR(-EEXIST); + } + + return dentry; +} + +void binderfs_remove_file(struct dentry *dentry) +{ + struct inode *parent_inode; + + parent_inode = d_inode(dentry->d_parent); + inode_lock(parent_inode); + if (simple_positive(dentry)) { + dget(dentry); + simple_unlink(parent_inode, dentry); + d_delete(dentry); + dput(dentry); + } + inode_unlock(parent_inode); +} + +struct dentry *binderfs_create_file(struct dentry *parent, const char *name, + const struct file_operations *fops, + void *data) +{ + struct dentry *dentry; + struct inode *new_inode, *parent_inode; + struct super_block *sb; + + parent_inode = d_inode(parent); + inode_lock(parent_inode); + + dentry = binderfs_create_dentry(parent, name); + if (IS_ERR(dentry)) + goto out; + + sb = parent_inode->i_sb; + new_inode = binderfs_make_inode(sb, S_IFREG | 0444); + if (!new_inode) { + dput(dentry); + dentry = ERR_PTR(-ENOMEM); + goto out; + } + + new_inode->i_fop = fops; + new_inode->i_private = data; + d_instantiate(dentry, new_inode); + fsnotify_create(parent_inode, dentry); + +out: + inode_unlock(parent_inode); + return dentry; +} + +static struct dentry *binderfs_create_dir(struct dentry *parent, + const char *name) +{ + struct dentry *dentry; + struct inode *new_inode, *parent_inode; + struct super_block *sb; + + parent_inode = d_inode(parent); + inode_lock(parent_inode); + + dentry = binderfs_create_dentry(parent, name); + if (IS_ERR(dentry)) + goto out; + + sb = parent_inode->i_sb; + new_inode = binderfs_make_inode(sb, S_IFDIR | 0755); + if (!new_inode) { + dput(dentry); + dentry = ERR_PTR(-ENOMEM); + goto out; + } + + new_inode->i_fop = &simple_dir_operations; + new_inode->i_op = &simple_dir_inode_operations; + + set_nlink(new_inode, 2); + d_instantiate(dentry, new_inode); + inc_nlink(parent_inode); + fsnotify_mkdir(parent_inode, dentry); + +out: + inode_unlock(parent_inode); + return dentry; +} + +static int init_binder_logs(struct super_block *sb) +{ + struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir; + struct binderfs_info *info; + int ret = 0; + + binder_logs_root_dir = binderfs_create_dir(sb->s_root, + "binder_logs"); + if (IS_ERR(binder_logs_root_dir)) { + ret = PTR_ERR(binder_logs_root_dir); + goto out; + } + + dentry = binderfs_create_file(binder_logs_root_dir, "stats", + &binder_stats_fops, NULL); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + dentry = binderfs_create_file(binder_logs_root_dir, "state", + &binder_state_fops, NULL); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + dentry = binderfs_create_file(binder_logs_root_dir, "transactions", + &binder_transactions_fops, NULL); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + dentry = binderfs_create_file(binder_logs_root_dir, + "transaction_log", + &binder_transaction_log_fops, + &binder_transaction_log); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + dentry = binderfs_create_file(binder_logs_root_dir, + "failed_transaction_log", + &binder_transaction_log_fops, + &binder_transaction_log_failed); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } + + proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc"); + if (IS_ERR(proc_log_dir)) { + ret = PTR_ERR(proc_log_dir); + goto out; + } + info = sb->s_fs_info; + info->proc_log_dir = proc_log_dir; + +out: + return ret; +} + +static int binderfs_fill_super(struct super_block *sb, void *data, int silent) +{ + int ret; + struct binderfs_info *info; + struct inode *inode = NULL; + struct binderfs_device device_info = { 0 }; + const char *name; + size_t len; + + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + + /* + * The binderfs filesystem can be mounted by userns root in a + * non-initial userns. By default such mounts have the SB_I_NODEV flag + * set in s_iflags to prevent security issues where userns root can + * just create random device nodes via mknod() since it owns the + * filesystem mount. But binderfs does not allow to create any files + * including devices nodes. The only way to create binder devices nodes + * is through the binder-control device which userns root is explicitly + * allowed to do. So removing the SB_I_NODEV flag from s_iflags is both + * necessary and safe. + */ + sb->s_iflags &= ~SB_I_NODEV; + sb->s_iflags |= SB_I_NOEXEC; + sb->s_magic = BINDERFS_SUPER_MAGIC; + sb->s_op = &binderfs_super_ops; + sb->s_time_gran = 1; + + sb->s_fs_info = kzalloc(sizeof(struct binderfs_info), GFP_KERNEL); + if (!sb->s_fs_info) + return -ENOMEM; + info = sb->s_fs_info; + + info->ipc_ns = get_ipc_ns(current->nsproxy->ipc_ns); + + ret = binderfs_parse_mount_opts(data, &info->mount_opts); + if (ret) + return ret; + + info->root_gid = make_kgid(sb->s_user_ns, 0); + if (!gid_valid(info->root_gid)) + info->root_gid = GLOBAL_ROOT_GID; + info->root_uid = make_kuid(sb->s_user_ns, 0); + if (!uid_valid(info->root_uid)) + info->root_uid = GLOBAL_ROOT_UID; + + inode = new_inode(sb); + if (!inode) + return -ENOMEM; + + inode->i_ino = FIRST_INODE; + inode->i_fop = &simple_dir_operations; + inode->i_mode = S_IFDIR | 0755; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_op = &binderfs_dir_inode_operations; + set_nlink(inode, 2); + + sb->s_root = d_make_root(inode); + if (!sb->s_root) + return -ENOMEM; + + ret = binderfs_binder_ctl_create(sb); + if (ret) + return ret; + + name = binder_devices_param; + for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { + strscpy(device_info.name, name, len + 1); + ret = binderfs_binder_device_create(inode, NULL, &device_info); + if (ret) + return ret; + name += len; + if (*name == ',') + name++; + } + + if (info->mount_opts.stats_mode == STATS_GLOBAL) + return init_binder_logs(sb); + + return 0; +} + +static struct dentry *binderfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + return mount_nodev(fs_type, flags, data, binderfs_fill_super); +} + +static void binderfs_kill_super(struct super_block *sb) +{ + struct binderfs_info *info = sb->s_fs_info; + + kill_litter_super(sb); + + if (info && info->ipc_ns) + put_ipc_ns(info->ipc_ns); + + kfree(info); +} + +static struct file_system_type binder_fs_type = { + .name = "binder", + .mount = binderfs_mount, + .kill_sb = binderfs_kill_super, + .fs_flags = FS_USERNS_MOUNT, +}; + +int __init init_binderfs(void) +{ + int ret; + const char *name; + size_t len; + + /* Verify that the default binderfs device names are valid. */ + name = binder_devices_param; + for (len = strcspn(name, ","); len > 0; len = strcspn(name, ",")) { + if (len > BINDERFS_MAX_NAME) + return -E2BIG; + name += len; + if (*name == ',') + name++; + } + + /* Allocate new major number for binderfs. */ + ret = alloc_chrdev_region(&binderfs_dev, 0, BINDERFS_MAX_MINOR, + "binder"); + if (ret) + return ret; + + ret = register_filesystem(&binder_fs_type); + if (ret) { + unregister_chrdev_region(binderfs_dev, BINDERFS_MAX_MINOR); + return ret; + } + + return ret; +} diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index cb5339166563..229a5ccd6b73 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -102,7 +102,8 @@ config SATA_AHCI_PLATFORM config AHCI_BRCM tristate "Broadcom AHCI SATA support" - depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM_NSP + depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM_NSP || \ + ARCH_BCM_63XX help This option enables support for the AHCI SATA3 controller found on Broadcom SoC's. diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index f003e301723a..0905c07b8c7e 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -88,6 +88,7 @@ enum board_ids { static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); static void ahci_remove_one(struct pci_dev *dev); +static void ahci_shutdown_one(struct pci_dev *dev); static int ahci_vt8251_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline); static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class, @@ -586,6 +587,7 @@ static struct pci_driver ahci_pci_driver = { .id_table = ahci_pci_tbl, .probe = ahci_init_one, .remove = ahci_remove_one, + .shutdown = ahci_shutdown_one, .driver = { .pm = &ahci_pci_pm_ops, }, @@ -1823,6 +1825,11 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; } +static void ahci_shutdown_one(struct pci_dev *pdev) +{ + ata_pci_shutdown_one(pdev); +} + static void ahci_remove_one(struct pci_dev *pdev) { pm_runtime_get_noresume(&pdev->dev); diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c index 5936d1679bf3..8beb81b24f14 100644 --- a/drivers/ata/ahci_brcm.c +++ b/drivers/ata/ahci_brcm.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "ahci.h" @@ -87,6 +88,7 @@ struct brcm_ahci_priv { u32 port_mask; u32 quirks; enum brcm_ahci_version version; + struct reset_control *rcdev; }; static const struct ata_port_info ahci_brcm_port_info = { @@ -221,19 +223,12 @@ static void brcm_sata_phys_disable(struct brcm_ahci_priv *priv) brcm_sata_phy_disable(priv, i); } -static u32 brcm_ahci_get_portmask(struct platform_device *pdev, +static u32 brcm_ahci_get_portmask(struct ahci_host_priv *hpriv, struct brcm_ahci_priv *priv) { - void __iomem *ahci; - struct resource *res; u32 impl; - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ahci"); - ahci = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(ahci)) - return 0; - - impl = readl(ahci + HOST_PORTS_IMPL); + impl = readl(hpriv->mmio + HOST_PORTS_IMPL); if (fls(impl) > SATA_TOP_MAX_PHYS) dev_warn(priv->dev, "warning: more ports than PHYs (%#x)\n", @@ -241,9 +236,6 @@ static u32 brcm_ahci_get_portmask(struct platform_device *pdev, else if (!impl) dev_info(priv->dev, "no ports found\n"); - devm_iounmap(&pdev->dev, ahci); - devm_release_mem_region(&pdev->dev, res->start, resource_size(res)); - return impl; } @@ -270,11 +262,10 @@ static int brcm_ahci_suspend(struct device *dev) struct ata_host *host = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = host->private_data; struct brcm_ahci_priv *priv = hpriv->plat_data; - int ret; - ret = ahci_platform_suspend(dev); brcm_sata_phys_disable(priv); - return ret; + + return ahci_platform_suspend(dev); } static int brcm_ahci_resume(struct device *dev) @@ -282,11 +273,44 @@ static int brcm_ahci_resume(struct device *dev) struct ata_host *host = dev_get_drvdata(dev); struct ahci_host_priv *hpriv = host->private_data; struct brcm_ahci_priv *priv = hpriv->plat_data; + int ret; + + /* Make sure clocks are turned on before re-configuration */ + ret = ahci_platform_enable_clks(hpriv); + if (ret) + return ret; brcm_sata_init(priv); brcm_sata_phys_enable(priv); brcm_sata_alpm_init(hpriv); - return ahci_platform_resume(dev); + + /* Since we had to enable clocks earlier on, we cannot use + * ahci_platform_resume() as-is since a second call to + * ahci_platform_enable_resources() would bump up the resources + * (regulators, clocks, PHYs) count artificially so we copy the part + * after ahci_platform_enable_resources(). + */ + ret = ahci_platform_enable_phys(hpriv); + if (ret) + goto out_disable_phys; + + ret = ahci_platform_resume_host(dev); + if (ret) + goto out_disable_platform_phys; + + /* We resumed so update PM runtime state */ + pm_runtime_disable(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + + return 0; + +out_disable_platform_phys: + ahci_platform_disable_phys(hpriv); +out_disable_phys: + brcm_sata_phys_disable(priv); + ahci_platform_disable_clks(hpriv); + return ret; } #endif @@ -327,44 +351,74 @@ static int brcm_ahci_probe(struct platform_device *pdev) if (IS_ERR(priv->top_ctrl)) return PTR_ERR(priv->top_ctrl); + /* Reset is optional depending on platform */ + priv->rcdev = devm_reset_control_get(&pdev->dev, "ahci"); + if (!IS_ERR_OR_NULL(priv->rcdev)) + reset_control_deassert(priv->rcdev); + if ((priv->version == BRCM_SATA_BCM7425) || (priv->version == BRCM_SATA_NSP)) { priv->quirks |= BRCM_AHCI_QUIRK_NO_NCQ; priv->quirks |= BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE; } + hpriv = ahci_platform_get_resources(pdev); + if (IS_ERR(hpriv)) { + ret = PTR_ERR(hpriv); + goto out_reset; + } + + ret = ahci_platform_enable_clks(hpriv); + if (ret) + goto out_reset; + + /* Must be first so as to configure endianness including that + * of the standard AHCI register space. + */ brcm_sata_init(priv); - priv->port_mask = brcm_ahci_get_portmask(pdev, priv); - if (!priv->port_mask) - return -ENODEV; + /* Initializes priv->port_mask which is used below */ + priv->port_mask = brcm_ahci_get_portmask(hpriv, priv); + if (!priv->port_mask) { + ret = -ENODEV; + goto out_disable_clks; + } + /* Must be done before ahci_platform_enable_phys() */ brcm_sata_phys_enable(priv); - hpriv = ahci_platform_get_resources(pdev); - if (IS_ERR(hpriv)) - return PTR_ERR(hpriv); hpriv->plat_data = priv; hpriv->flags = AHCI_HFLAG_WAKE_BEFORE_STOP; brcm_sata_alpm_init(hpriv); - ret = ahci_platform_enable_resources(hpriv); - if (ret) - return ret; - if (priv->quirks & BRCM_AHCI_QUIRK_NO_NCQ) hpriv->flags |= AHCI_HFLAG_NO_NCQ; hpriv->flags |= AHCI_HFLAG_NO_WRITE_TO_RO; + ret = ahci_platform_enable_phys(hpriv); + if (ret) + goto out_disable_phys; + ret = ahci_platform_init_host(pdev, hpriv, &ahci_brcm_port_info, &ahci_platform_sht); if (ret) - return ret; + goto out_disable_platform_phys; dev_info(dev, "Broadcom AHCI SATA3 registered\n"); return 0; + +out_disable_platform_phys: + ahci_platform_disable_phys(hpriv); +out_disable_phys: + brcm_sata_phys_disable(priv); +out_disable_clks: + ahci_platform_disable_clks(hpriv); +out_reset: + if (!IS_ERR_OR_NULL(priv->rcdev)) + reset_control_assert(priv->rcdev); + return ret; } static int brcm_ahci_remove(struct platform_device *pdev) @@ -374,12 +428,12 @@ static int brcm_ahci_remove(struct platform_device *pdev) struct brcm_ahci_priv *priv = hpriv->plat_data; int ret; + brcm_sata_phys_disable(priv); + ret = ata_platform_remove_one(pdev); if (ret) return ret; - brcm_sata_phys_disable(priv); - return 0; } diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index cda9a0b5bdaa..7473ff46de66 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -191,7 +191,6 @@ struct ata_port_operations ahci_pmp_retry_srst_ops = { EXPORT_SYMBOL_GPL(ahci_pmp_retry_srst_ops); static bool ahci_em_messages __read_mostly = true; -EXPORT_SYMBOL_GPL(ahci_em_messages); module_param(ahci_em_messages, bool, 0444); /* add other LED protocol types when they become supported */ MODULE_PARM_DESC(ahci_em_messages, diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 70cdbf1b0f9a..5929672b809e 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -46,7 +46,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_ops); * RETURNS: * 0 on success otherwise a negative error code */ -static int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) +int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) { int rc, i; @@ -71,6 +71,7 @@ disable_phys: } return rc; } +EXPORT_SYMBOL_GPL(ahci_platform_enable_phys); /** * ahci_platform_disable_phys - Disable PHYs @@ -78,7 +79,7 @@ disable_phys: * * This function disables all PHYs found in hpriv->phys. */ -static void ahci_platform_disable_phys(struct ahci_host_priv *hpriv) +void ahci_platform_disable_phys(struct ahci_host_priv *hpriv) { int i; @@ -87,6 +88,7 @@ static void ahci_platform_disable_phys(struct ahci_host_priv *hpriv) phy_exit(hpriv->phys[i]); } } +EXPORT_SYMBOL_GPL(ahci_platform_disable_phys); /** * ahci_platform_enable_clks - Enable platform clocks diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index cbb162b683b6..33eb5e342a7a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6676,6 +6676,9 @@ void ata_host_detach(struct ata_host *host) { int i; + /* Ensure ata_port probe has completed */ + async_synchronize_full(); + for (i = 0; i < host->n_ports; i++) ata_port_detach(host->ports[i]); @@ -6703,6 +6706,26 @@ void ata_pci_remove_one(struct pci_dev *pdev) ata_host_detach(host); } +void ata_pci_shutdown_one(struct pci_dev *pdev) +{ + struct ata_host *host = pci_get_drvdata(pdev); + int i; + + for (i = 0; i < host->n_ports; i++) { + struct ata_port *ap = host->ports[i]; + + ap->pflags |= ATA_PFLAG_FROZEN; + + /* Disable port interrupts */ + if (ap->ops->freeze) + ap->ops->freeze(ap); + + /* Stop the port DMA engines */ + if (ap->ops->port_stop) + ap->ops->port_stop(ap); + } +} + /* move to PCI subsystem */ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits) { @@ -7323,6 +7346,7 @@ EXPORT_SYMBOL_GPL(ata_timing_cycle2mode); #ifdef CONFIG_PCI EXPORT_SYMBOL_GPL(pci_test_config_bits); +EXPORT_SYMBOL_GPL(ata_pci_shutdown_one); EXPORT_SYMBOL_GPL(ata_pci_remove_one); #ifdef CONFIG_PM EXPORT_SYMBOL_GPL(ata_pci_device_do_suspend); diff --git a/drivers/ata/pata_ep93xx.c b/drivers/ata/pata_ep93xx.c index 0a550190955a..cc6d06c1b2c7 100644 --- a/drivers/ata/pata_ep93xx.c +++ b/drivers/ata/pata_ep93xx.c @@ -659,7 +659,7 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) * start of new transfer. */ drv_data->dma_rx_data.port = EP93XX_DMA_IDE; - drv_data->dma_rx_data.direction = DMA_FROM_DEVICE; + drv_data->dma_rx_data.direction = DMA_DEV_TO_MEM; drv_data->dma_rx_data.name = "ep93xx-pata-rx"; drv_data->dma_rx_channel = dma_request_channel(mask, ep93xx_pata_dma_filter, &drv_data->dma_rx_data); @@ -667,7 +667,7 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) return; drv_data->dma_tx_data.port = EP93XX_DMA_IDE; - drv_data->dma_tx_data.direction = DMA_TO_DEVICE; + drv_data->dma_tx_data.direction = DMA_MEM_TO_DEV; drv_data->dma_tx_data.name = "ep93xx-pata-tx"; drv_data->dma_tx_channel = dma_request_channel(mask, ep93xx_pata_dma_filter, &drv_data->dma_tx_data); @@ -678,7 +678,7 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) /* Configure receive channel direction and source address */ memset(&conf, 0, sizeof(conf)); - conf.direction = DMA_FROM_DEVICE; + conf.direction = DMA_DEV_TO_MEM; conf.src_addr = drv_data->udma_in_phys; conf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; if (dmaengine_slave_config(drv_data->dma_rx_channel, &conf)) { @@ -689,7 +689,7 @@ static void ep93xx_pata_dma_init(struct ep93xx_pata_data *drv_data) /* Configure transmit channel direction and destination address */ memset(&conf, 0, sizeof(conf)); - conf.direction = DMA_TO_DEVICE; + conf.direction = DMA_MEM_TO_DEV; conf.dst_addr = drv_data->udma_out_phys; conf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; if (dmaengine_slave_config(drv_data->dma_tx_channel, &conf)) { diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index ce47eb17901d..a106d15f6def 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -372,7 +372,7 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb, here = (eni_vcc->descr+skip) & (eni_vcc->words-1); dma[j++] = (here << MID_DMA_COUNT_SHIFT) | (vcc->vci << MID_DMA_VCI_SHIFT) | MID_DT_JK; - j++; + dma[j++] = 0; } here = (eni_vcc->descr+size+skip) & (eni_vcc->words-1); if (!eff) size += skip; @@ -445,7 +445,7 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb, if (size != eff) { dma[j++] = (here << MID_DMA_COUNT_SHIFT) | (vcc->vci << MID_DMA_VCI_SHIFT) | MID_DT_JK; - j++; + dma[j++] = 0; } if (!j || j > 2*RX_DMA_BUF) { printk(KERN_CRIT DEV_LABEL "!j or j too big!!!\n"); diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index 6b6368a56526..0e449ee11ac7 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -927,6 +927,7 @@ static int fs_open(struct atm_vcc *atm_vcc) } if (!to) { printk ("No more free channels for FS50..\n"); + kfree(vcc); return -EBUSY; } vcc->channo = dev->channo; @@ -937,6 +938,7 @@ static int fs_open(struct atm_vcc *atm_vcc) if (((DO_DIRECTION(rxtp) && dev->atm_vccs[vcc->channo])) || ( DO_DIRECTION(txtp) && test_bit (vcc->channo, dev->tx_inuse))) { printk ("Channel is in use for FS155.\n"); + kfree(vcc); return -EBUSY; } } @@ -950,6 +952,7 @@ static int fs_open(struct atm_vcc *atm_vcc) tc, sizeof (struct fs_transmit_config)); if (!tc) { fs_dprintk (FS_DEBUG_OPEN, "fs: can't alloc transmit_config.\n"); + kfree(vcc); return -ENOMEM; } diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index f8b7e86907cc..0a1ad1a1d34f 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -1496,12 +1496,14 @@ fore200e_open(struct atm_vcc *vcc) static void fore200e_close(struct atm_vcc* vcc) { - struct fore200e* fore200e = FORE200E_DEV(vcc->dev); struct fore200e_vcc* fore200e_vcc; + struct fore200e* fore200e; struct fore200e_vc_map* vc_map; unsigned long flags; ASSERT(vcc); + fore200e = FORE200E_DEV(vcc->dev); + ASSERT((vcc->vpi >= 0) && (vcc->vpi < 1<vci >= 0) && (vcc->vci < 1<dev); - struct fore200e_vcc* fore200e_vcc = FORE200E_VCC(vcc); + struct fore200e* fore200e; + struct fore200e_vcc* fore200e_vcc; struct fore200e_vc_map* vc_map; - struct host_txq* txq = &fore200e->host_txq; + struct host_txq* txq; struct host_txq_entry* entry; struct tpd* tpd; struct tpd_haddr tpd_haddr; @@ -1562,9 +1564,18 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb) unsigned char* data; unsigned long flags; - ASSERT(vcc); - ASSERT(fore200e); - ASSERT(fore200e_vcc); + if (!vcc) + return -EINVAL; + + fore200e = FORE200E_DEV(vcc->dev); + fore200e_vcc = FORE200E_VCC(vcc); + + if (!fore200e) + return -EINVAL; + + txq = &fore200e->host_txq; + if (!fore200e_vcc) + return -EINVAL; if (!test_bit(ATM_VF_READY, &vcc->flags)) { DPRINTK(1, "VC %d.%d.%d not ready for tx\n", vcc->itf, vcc->vpi, vcc->vpi); diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 2c288d1f42bb..817c7edfec0b 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -126,7 +126,7 @@ static unsigned long dummy[2] = {0,0}; #define zin_n(r) inl(zatm_dev->base+r*4) #define zin(r) inl(zatm_dev->base+uPD98401_##r*4) #define zout(v,r) outl(v,zatm_dev->base+uPD98401_##r*4) -#define zwait while (zin(CMR) & uPD98401_BUSY) +#define zwait() do {} while (zin(CMR) & uPD98401_BUSY) /* RX0, RX1, TX0, TX1 */ static const int mbx_entries[NR_MBX] = { 1024,1024,1024,1024 }; @@ -140,7 +140,7 @@ static const int mbx_esize[NR_MBX] = { 16,16,4,4 }; /* entry size in bytes */ static void zpokel(struct zatm_dev *zatm_dev,u32 value,u32 addr) { - zwait; + zwait(); zout(value,CER); zout(uPD98401_IND_ACC | uPD98401_IA_BALL | (uPD98401_IA_TGT_CM << uPD98401_IA_TGT_SHIFT) | addr,CMR); @@ -149,10 +149,10 @@ static void zpokel(struct zatm_dev *zatm_dev,u32 value,u32 addr) static u32 zpeekl(struct zatm_dev *zatm_dev,u32 addr) { - zwait; + zwait(); zout(uPD98401_IND_ACC | uPD98401_IA_BALL | uPD98401_IA_RW | (uPD98401_IA_TGT_CM << uPD98401_IA_TGT_SHIFT) | addr,CMR); - zwait; + zwait(); return zin(CER); } @@ -241,7 +241,7 @@ static void refill_pool(struct atm_dev *dev,int pool) } if (first) { spin_lock_irqsave(&zatm_dev->lock, flags); - zwait; + zwait(); zout(virt_to_bus(first),CER); zout(uPD98401_ADD_BAT | (pool << uPD98401_POOL_SHIFT) | count, CMR); @@ -508,9 +508,9 @@ static int open_rx_first(struct atm_vcc *vcc) } if (zatm_vcc->pool < 0) return -EMSGSIZE; spin_lock_irqsave(&zatm_dev->lock, flags); - zwait; + zwait(); zout(uPD98401_OPEN_CHAN,CMR); - zwait; + zwait(); DPRINTK("0x%x 0x%x\n",zin(CMR),zin(CER)); chan = (zin(CMR) & uPD98401_CHAN_ADDR) >> uPD98401_CHAN_ADDR_SHIFT; spin_unlock_irqrestore(&zatm_dev->lock, flags); @@ -571,21 +571,21 @@ static void close_rx(struct atm_vcc *vcc) pos = vcc->vci >> 1; shift = (1-(vcc->vci & 1)) << 4; zpokel(zatm_dev,zpeekl(zatm_dev,pos) & ~(0xffff << shift),pos); - zwait; + zwait(); zout(uPD98401_NOP,CMR); - zwait; + zwait(); zout(uPD98401_NOP,CMR); spin_unlock_irqrestore(&zatm_dev->lock, flags); } spin_lock_irqsave(&zatm_dev->lock, flags); - zwait; + zwait(); zout(uPD98401_DEACT_CHAN | uPD98401_CHAN_RT | (zatm_vcc->rx_chan << uPD98401_CHAN_ADDR_SHIFT),CMR); - zwait; + zwait(); udelay(10); /* why oh why ... ? */ zout(uPD98401_CLOSE_CHAN | uPD98401_CHAN_RT | (zatm_vcc->rx_chan << uPD98401_CHAN_ADDR_SHIFT),CMR); - zwait; + zwait(); if (!(zin(CMR) & uPD98401_CHAN_ADDR)) printk(KERN_CRIT DEV_LABEL "(itf %d): can't close RX channel " "%d\n",vcc->dev->number,zatm_vcc->rx_chan); @@ -699,7 +699,7 @@ printk("NONONONOO!!!!\n"); skb_queue_tail(&zatm_vcc->tx_queue,skb); DPRINTK("QRP=0x%08lx\n",zpeekl(zatm_dev,zatm_vcc->tx_chan*VC_SIZE/4+ uPD98401_TXVC_QRP)); - zwait; + zwait(); zout(uPD98401_TX_READY | (zatm_vcc->tx_chan << uPD98401_CHAN_ADDR_SHIFT),CMR); spin_unlock_irqrestore(&zatm_dev->lock, flags); @@ -891,12 +891,12 @@ static void close_tx(struct atm_vcc *vcc) } spin_lock_irqsave(&zatm_dev->lock, flags); #if 0 - zwait; + zwait(); zout(uPD98401_DEACT_CHAN | (chan << uPD98401_CHAN_ADDR_SHIFT),CMR); #endif - zwait; + zwait(); zout(uPD98401_CLOSE_CHAN | (chan << uPD98401_CHAN_ADDR_SHIFT),CMR); - zwait; + zwait(); if (!(zin(CMR) & uPD98401_CHAN_ADDR)) printk(KERN_CRIT DEV_LABEL "(itf %d): can't close TX channel " "%d\n",vcc->dev->number,chan); @@ -926,9 +926,9 @@ static int open_tx_first(struct atm_vcc *vcc) zatm_vcc->tx_chan = 0; if (vcc->qos.txtp.traffic_class == ATM_NONE) return 0; spin_lock_irqsave(&zatm_dev->lock, flags); - zwait; + zwait(); zout(uPD98401_OPEN_CHAN,CMR); - zwait; + zwait(); DPRINTK("0x%x 0x%x\n",zin(CMR),zin(CER)); chan = (zin(CMR) & uPD98401_CHAN_ADDR) >> uPD98401_CHAN_ADDR_SHIFT; spin_unlock_irqrestore(&zatm_dev->lock, flags); @@ -1559,7 +1559,7 @@ static void zatm_phy_put(struct atm_dev *dev,unsigned char value, struct zatm_dev *zatm_dev; zatm_dev = ZATM_DEV(dev); - zwait; + zwait(); zout(value,CER); zout(uPD98401_IND_ACC | uPD98401_IA_B0 | (uPD98401_IA_TGT_PHY << uPD98401_IA_TGT_SHIFT) | addr,CMR); @@ -1571,10 +1571,10 @@ static unsigned char zatm_phy_get(struct atm_dev *dev,unsigned long addr) struct zatm_dev *zatm_dev; zatm_dev = ZATM_DEV(dev); - zwait; + zwait(); zout(uPD98401_IND_ACC | uPD98401_IA_B0 | uPD98401_IA_RW | (uPD98401_IA_TGT_PHY << uPD98401_IA_TGT_SHIFT) | addr,CMR); - zwait; + zwait(); return zin(CER) & 0xff; } diff --git a/drivers/base/component.c b/drivers/base/component.c index 89b032f2ffd2..08da6160e94d 100644 --- a/drivers/base/component.c +++ b/drivers/base/component.c @@ -461,9 +461,9 @@ int component_bind_all(struct device *master_dev, void *data) } if (ret != 0) { - for (; i--; ) - if (!master->match->compare[i].duplicate) { - c = master->match->compare[i].component; + for (; i > 0; i--) + if (!master->match->compare[i - 1].duplicate) { + c = master->match->compare[i - 1].component; component_unbind(c, master, data); } } diff --git a/drivers/base/core.c b/drivers/base/core.c index 2ec9af90cd28..93c2fc58013e 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -10,6 +10,7 @@ * */ +#include #include #include #include @@ -179,11 +180,20 @@ struct device_link *device_link_add(struct device *consumer, struct device *supplier, u32 flags) { struct device_link *link; + bool rpm_put_supplier = false; if (!consumer || !supplier || ((flags & DL_FLAG_STATELESS) && (flags & DL_FLAG_AUTOREMOVE))) return NULL; + if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) { + if (pm_runtime_get_sync(supplier) < 0) { + pm_runtime_put_noidle(supplier); + return NULL; + } + rpm_put_supplier = true; + } + device_links_write_lock(); device_pm_lock(); @@ -208,13 +218,8 @@ struct device_link *device_link_add(struct device *consumer, if (flags & DL_FLAG_PM_RUNTIME) { if (flags & DL_FLAG_RPM_ACTIVE) { - if (pm_runtime_get_sync(supplier) < 0) { - pm_runtime_put_noidle(supplier); - kfree(link); - link = NULL; - goto out; - } link->rpm_active = true; + rpm_put_supplier = false; } pm_runtime_new_link(consumer); /* @@ -285,6 +290,10 @@ struct device_link *device_link_add(struct device *consumer, out: device_pm_unlock(); device_links_write_unlock(); + + if (rpm_put_supplier) + pm_runtime_put(supplier); + return link; } EXPORT_SYMBOL_GPL(device_link_add); @@ -2845,6 +2854,8 @@ void device_shutdown(void) wait_for_device_probe(); device_block_probing(); + cpufreq_suspend(); + spin_lock(&devices_kset->list_lock); /* * Walk the devices list backward, shutting down each in turn. diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index 62cda40f2156..38aca18fae98 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -542,12 +542,27 @@ ssize_t __weak cpu_show_mds(struct device *dev, return sprintf(buf, "Not affected\n"); } +ssize_t __weak cpu_show_tsx_async_abort(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + +ssize_t __weak cpu_show_itlb_multihit(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Not affected\n"); +} + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); +static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); +static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_meltdown.attr, @@ -556,6 +571,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { &dev_attr_spec_store_bypass.attr, &dev_attr_l1tf.attr, &dev_attr_mds.attr, + &dev_attr_tsx_async_abort.attr, + &dev_attr_itlb_multihit.attr, NULL }; diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 68af218a78f8..38eccf8be4e2 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -384,7 +384,10 @@ static int really_probe(struct device *dev, struct device_driver *drv) atomic_inc(&probe_count); pr_debug("bus: '%s': %s: probing driver %s with device %s\n", drv->bus->name, __func__, drv->name, dev_name(dev)); - WARN_ON(!list_empty(&dev->devres_head)); + if (!list_empty(&dev->devres_head)) { + dev_crit(dev, "Resources present before probing\n"); + return -EBUSY; + } re_probe: dev->driver = drv; diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 1d60b58a8c19..fe1557aa9b10 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -517,15 +517,20 @@ memory_probe_store(struct device *dev, struct device_attribute *attr, if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) return -EINVAL; + ret = lock_device_hotplug_sysfs(); + if (ret) + return ret; + nid = memory_add_physaddr_to_nid(phys_addr); - ret = add_memory(nid, phys_addr, - MIN_MEMORY_BLOCK_SIZE * sections_per_block); + ret = __add_memory(nid, phys_addr, + MIN_MEMORY_BLOCK_SIZE * sections_per_block); if (ret) goto out; ret = count; out: + unlock_device_hotplug(); return ret; } @@ -552,6 +557,9 @@ store_soft_offline_page(struct device *dev, pfn >>= PAGE_SHIFT; if (!pfn_valid(pfn)) return -ENXIO; + /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ + if (!pfn_to_online_page(pfn)) + return -EIO; ret = soft_offline_page(pfn_to_page(pfn), 0); return ret == 0 ? count : ret; } diff --git a/drivers/base/node.c b/drivers/base/node.c index 5c39f14d15a5..7801b94ca6f2 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -67,8 +67,11 @@ static ssize_t node_read_meminfo(struct device *dev, int nid = dev->id; struct pglist_data *pgdat = NODE_DATA(nid); struct sysinfo i; + unsigned long sreclaimable, sunreclaimable; si_meminfo_node(&i, nid); + sreclaimable = node_page_state(pgdat, NR_SLAB_RECLAIMABLE); + sunreclaimable = node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE); n = sprintf(buf, "Node %d MemTotal: %8lu kB\n" "Node %d MemFree: %8lu kB\n" @@ -114,10 +117,14 @@ static ssize_t node_read_meminfo(struct device *dev, "Node %d AnonPages: %8lu kB\n" "Node %d Shmem: %8lu kB\n" "Node %d KernelStack: %8lu kB\n" +#ifdef CONFIG_SHADOW_CALL_STACK + "Node %d ShadowCallStack:%8lu kB\n" +#endif "Node %d PageTables: %8lu kB\n" "Node %d NFS_Unstable: %8lu kB\n" "Node %d Bounce: %8lu kB\n" "Node %d WritebackTmp: %8lu kB\n" + "Node %d KReclaimable: %8lu kB\n" "Node %d Slab: %8lu kB\n" "Node %d SReclaimable: %8lu kB\n" "Node %d SUnreclaim: %8lu kB\n" @@ -134,24 +141,28 @@ static ssize_t node_read_meminfo(struct device *dev, nid, K(node_page_state(pgdat, NR_ANON_MAPPED)), nid, K(i.sharedram), nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB), +#ifdef CONFIG_SHADOW_CALL_STACK + nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024, +#endif nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)), nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)), nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)), nid, K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), - nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE) + - node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)), - nid, K(node_page_state(pgdat, NR_SLAB_RECLAIMABLE)), + nid, K(sreclaimable + + node_page_state(pgdat, NR_KERNEL_MISC_RECLAIMABLE)), + nid, K(sreclaimable + sunreclaimable), + nid, K(sreclaimable), + nid, K(sunreclaimable) #ifdef CONFIG_TRANSPARENT_HUGEPAGE - nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE)), + , nid, K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR), nid, K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR), nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) * - HPAGE_PMD_NR)); -#else - nid, K(node_page_state(pgdat, NR_SLAB_UNRECLAIMABLE))); + HPAGE_PMD_NR) #endif + ); n += hugetlb_report_node_meminfo(nid, buf + n); return n; } diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 9398af5a5edc..1abd2d407f5f 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include "base.h" #include "power/power.h" @@ -67,7 +69,7 @@ void __weak arch_setup_pdev_archdata(struct platform_device *pdev) struct resource *platform_get_resource(struct platform_device *dev, unsigned int type, unsigned int num) { - int i; + u32 i; for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; @@ -162,7 +164,7 @@ struct resource *platform_get_resource_byname(struct platform_device *dev, unsigned int type, const char *name) { - int i; + u32 i; for (i = 0; i < dev->num_resources; i++) { struct resource *r = &dev->resource[i]; @@ -359,7 +361,8 @@ EXPORT_SYMBOL_GPL(platform_device_add_properties); */ int platform_device_add(struct platform_device *pdev) { - int i, ret; + u32 i; + int ret; if (!pdev) return -EINVAL; @@ -425,7 +428,7 @@ int platform_device_add(struct platform_device *pdev) pdev->id = PLATFORM_DEVID_AUTO; } - while (--i >= 0) { + while (i--) { struct resource *r = &pdev->resource[i]; if (r->parent) release_resource(r); @@ -446,7 +449,7 @@ EXPORT_SYMBOL_GPL(platform_device_add); */ void platform_device_del(struct platform_device *pdev) { - int i; + u32 i; if (pdev) { device_remove_properties(&pdev->dev); @@ -542,6 +545,8 @@ struct platform_device *platform_device_register_full( if (!pdev->dev.dma_mask) goto err; + kmemleak_ignore(pdev->dev.dma_mask); + *pdev->dev.dma_mask = pdevinfo->dma_mask; pdev->dev.coherent_dma_mask = pdevinfo->dma_mask; } diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index c276ba1c0a19..e811f2414889 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -369,6 +369,10 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on, return -EAGAIN; } + /* Default to shallowest state. */ + if (!genpd->gov) + genpd->state_idx = 0; + if (genpd->power_off) { int ret; @@ -1598,6 +1602,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd, ret = genpd_set_default_power_state(genpd); if (ret) return ret; + } else if (!gov) { + pr_warn("%s : no governor for states\n", genpd->name); } mutex_lock(&gpd_list_lock); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index b555d9153b33..76d8cbfd9f57 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -280,10 +280,38 @@ static void dpm_wait_for_suppliers(struct device *dev, bool async) device_links_read_unlock(idx); } -static void dpm_wait_for_superior(struct device *dev, bool async) +static bool dpm_wait_for_superior(struct device *dev, bool async) { - dpm_wait(dev->parent, async); + struct device *parent; + + /* + * If the device is resumed asynchronously and the parent's callback + * deletes both the device and the parent itself, the parent object may + * be freed while this function is running, so avoid that by reference + * counting the parent once more unless the device has been deleted + * already (in which case return right away). + */ + mutex_lock(&dpm_list_mtx); + + if (!device_pm_initialized(dev)) { + mutex_unlock(&dpm_list_mtx); + return false; + } + + parent = get_device(dev->parent); + + mutex_unlock(&dpm_list_mtx); + + dpm_wait(parent, async); + put_device(parent); + dpm_wait_for_suppliers(dev, async); + + /* + * If the parent's callback has deleted the device, attempting to resume + * it would be invalid, so avoid doing that then. + */ + return device_pm_initialized(dev); } static void dpm_wait_for_consumers(struct device *dev, bool async) @@ -562,7 +590,8 @@ static int device_resume_noirq(struct device *dev, pm_message_t state, bool asyn if (!dev->power.is_noirq_suspended) goto Out; - dpm_wait_for_superior(dev, async); + if (!dpm_wait_for_superior(dev, async)) + goto Out; if (dev->pm_domain) { info = "noirq power domain "; @@ -714,7 +743,8 @@ static int device_resume_early(struct device *dev, pm_message_t state, bool asyn if (!dev->power.is_late_suspended) goto Out; - dpm_wait_for_superior(dev, async); + if (!dpm_wait_for_superior(dev, async)) + goto Out; if (dev->pm_domain) { info = "early power domain "; @@ -858,7 +888,9 @@ static int device_resume(struct device *dev, pm_message_t state, bool async) goto Complete; } - dpm_wait_for_superior(dev, async); + if (!dpm_wait_for_superior(dev, async)) + goto Complete; + dpm_watchdog_set(&wd, dev); device_lock(dev); diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 7dd05a5c5006..6f7e8f19c0a2 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -953,7 +953,7 @@ EXPORT_SYMBOL_GPL(pm_system_wakeup); void pm_system_cancel_wakeup(void) { - atomic_dec(&pm_abort_suspend); + atomic_dec_if_positive(&pm_abort_suspend); } void pm_wakeup_clear(bool reset) diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c index f499a469e66d..12b2cc9a3fbe 100644 --- a/drivers/bcma/driver_pci.c +++ b/drivers/bcma/driver_pci.c @@ -78,7 +78,7 @@ static u16 bcma_pcie_mdio_read(struct bcma_drv_pci *pc, u16 device, u8 address) v |= (address << BCMA_CORE_PCI_MDIODATA_REGADDR_SHF_OLD); } - v = BCMA_CORE_PCI_MDIODATA_START; + v |= BCMA_CORE_PCI_MDIODATA_START; v |= BCMA_CORE_PCI_MDIODATA_READ; v |= BCMA_CORE_PCI_MDIODATA_TA; @@ -121,7 +121,7 @@ static void bcma_pcie_mdio_write(struct bcma_drv_pci *pc, u16 device, v |= (address << BCMA_CORE_PCI_MDIODATA_REGADDR_SHF_OLD); } - v = BCMA_CORE_PCI_MDIODATA_START; + v |= BCMA_CORE_PCI_MDIODATA_START; v |= BCMA_CORE_PCI_MDIODATA_WRITE; v |= BCMA_CORE_PCI_MDIODATA_TA; v |= data; diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 49908c74bfcb..b0e8d8364876 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c @@ -1699,11 +1699,41 @@ static const struct block_device_operations floppy_fops = { .check_events = amiga_check_events, }; +static struct gendisk *fd_alloc_disk(int drive) +{ + struct gendisk *disk; + + disk = alloc_disk(1); + if (!disk) + goto out; + + disk->queue = blk_init_queue(do_fd_request, &amiflop_lock); + if (IS_ERR(disk->queue)) { + disk->queue = NULL; + goto out_put_disk; + } + + unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL); + if (!unit[drive].trackbuf) + goto out_cleanup_queue; + + return disk; + +out_cleanup_queue: + blk_cleanup_queue(disk->queue); + disk->queue = NULL; +out_put_disk: + put_disk(disk); +out: + unit[drive].type->code = FD_NODRIVE; + return NULL; +} + static int __init fd_probe_drives(void) { int drive,drives,nomem; - printk(KERN_INFO "FD: probing units\nfound "); + pr_info("FD: probing units\nfound"); drives=0; nomem=0; for(drive=0;drivecode == FD_NODRIVE) continue; - disk = alloc_disk(1); + + disk = fd_alloc_disk(drive); if (!disk) { - unit[drive].type->code = FD_NODRIVE; + pr_cont(" no mem for fd%d", drive); + nomem = 1; continue; } unit[drive].gendisk = disk; - - disk->queue = blk_init_queue(do_fd_request, &amiflop_lock); - if (!disk->queue) { - unit[drive].type->code = FD_NODRIVE; - continue; - } - drives++; - if ((unit[drive].trackbuf = kmalloc(FLOPPY_MAX_SECTORS * 512, GFP_KERNEL)) == NULL) { - printk("no mem for "); - unit[drive].type = &drive_types[num_dr_types - 1]; /* FD_NODRIVE */ - drives--; - nomem = 1; - } - printk("fd%d ",drive); + + pr_cont(" fd%d",drive); disk->major = FLOPPY_MAJOR; disk->first_minor = drive; disk->fops = &floppy_fops; @@ -1742,11 +1762,11 @@ static int __init fd_probe_drives(void) } if ((drives > 0) || (nomem == 0)) { if (drives == 0) - printk("no drives"); - printk("\n"); + pr_cont(" no drives"); + pr_cont("\n"); return drives; } - printk("\n"); + pr_cont("\n"); return -ENOMEM; } @@ -1837,30 +1857,6 @@ out_blkdev: return ret; } -#if 0 /* not safe to unload */ -static int __exit amiga_floppy_remove(struct platform_device *pdev) -{ - int i; - - for( i = 0; i < FD_MAX_UNITS; i++) { - if (unit[i].type->code != FD_NODRIVE) { - struct request_queue *q = unit[i].gendisk->queue; - del_gendisk(unit[i].gendisk); - put_disk(unit[i].gendisk); - kfree(unit[i].trackbuf); - if (q) - blk_cleanup_queue(q); - } - } - blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256); - free_irq(IRQ_AMIGA_CIAA_TB, NULL); - free_irq(IRQ_AMIGA_DSKBLK, NULL); - custom.dmacon = DMAF_DISK; /* disable DMA */ - amiga_chip_free(raw_buf); - unregister_blkdev(FLOPPY_MAJOR, "fd"); -} -#endif - static struct platform_driver amiga_floppy_driver = { .driver = { .name = "amiga-floppy", diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 2d7178f7754e..0129b1921cb3 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -529,6 +529,25 @@ static struct kobject *brd_probe(dev_t dev, int *part, void *data) return kobj; } +static inline void brd_check_and_reset_par(void) +{ + if (unlikely(!max_part)) + max_part = 1; + + /* + * make sure 'max_part' can be divided exactly by (1U << MINORBITS), + * otherwise, it is possiable to get same dev_t when adding partitions. + */ + if ((1U << MINORBITS) % max_part != 0) + max_part = 1UL << fls(max_part); + + if (max_part > DISK_MAX_PARTS) { + pr_info("brd: max_part can't be larger than %d, reset max_part = %d.\n", + DISK_MAX_PARTS, DISK_MAX_PARTS); + max_part = DISK_MAX_PARTS; + } +} + static int __init brd_init(void) { struct brd_device *brd, *next; @@ -552,8 +571,7 @@ static int __init brd_init(void) if (register_blkdev(RAMDISK_MAJOR, "ramdisk")) return -EIO; - if (unlikely(!max_part)) - max_part = 1; + brd_check_and_reset_par(); for (i = 0; i < rd_nr; i++) { brd = brd_alloc(i); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 8cb3791898ae..b998e3abca7a 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -334,6 +334,8 @@ static int drbd_thread_setup(void *arg) thi->name[0], resource->name); + allow_kernel_signal(DRBD_SIGKILL); + allow_kernel_signal(SIGXCPU); restart: retval = thi->function(thi); @@ -795,7 +797,6 @@ int __drbd_send_protocol(struct drbd_connection *connection, enum drbd_packet cm if (nc->tentative && connection->agreed_pro_version < 92) { rcu_read_unlock(); - mutex_unlock(&sock->mutex); drbd_err(connection, "--dry-run is not supported by peer"); return -EOPNOTSUPP; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ad13ec66c8e4..31d7fe4480af 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1515,6 +1515,30 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis } } +static int disk_opts_check_al_size(struct drbd_device *device, struct disk_conf *dc) +{ + int err = -EBUSY; + + if (device->act_log && + device->act_log->nr_elements == dc->al_extents) + return 0; + + drbd_suspend_io(device); + /* If IO completion is currently blocked, we would likely wait + * "forever" for the activity log to become unused. So we don't. */ + if (atomic_read(&device->ap_bio_cnt)) + goto out; + + wait_event(device->al_wait, lc_try_lock(device->act_log)); + drbd_al_shrink(device); + err = drbd_check_al_size(device, dc); + lc_unlock(device->act_log); + wake_up(&device->al_wait); +out: + drbd_resume_io(device); + return err; +} + int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) { struct drbd_config_context adm_ctx; @@ -1577,15 +1601,12 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) } } - drbd_suspend_io(device); - wait_event(device->al_wait, lc_try_lock(device->act_log)); - drbd_al_shrink(device); - err = drbd_check_al_size(device, new_disk_conf); - lc_unlock(device->act_log); - wake_up(&device->al_wait); - drbd_resume_io(device); - + err = disk_opts_check_al_size(device, new_disk_conf); if (err) { + /* Could be just "busy". Ignore? + * Introduce dedicated error code? */ + drbd_msg_put_info(adm_ctx.reply_skb, + "Try again without changing current al-extents setting"); retcode = ERR_NOMEM; goto fail_unlock; } @@ -1935,9 +1956,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) } } - if (device->state.conn < C_CONNECTED && - device->state.role == R_PRIMARY && device->ed_uuid && - (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { + if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid && + (device->state.role == R_PRIMARY || device->state.peer == R_PRIMARY) && + (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { drbd_err(device, "Can only attach to data with current UUID=%016llX\n", (unsigned long long)device->ed_uuid); retcode = ERR_DATA_NOT_CURRENT; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8fbdfaacc222..a7c180426c60 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3977,6 +3977,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL; enum determine_dev_size dd = DS_UNCHANGED; sector_t p_size, p_usize, p_csize, my_usize; + sector_t new_size, cur_size; int ldsc = 0; /* local disk size changed */ enum dds_flags ddsf; @@ -3984,6 +3985,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info if (!peer_device) return config_unknown_volume(connection, pi); device = peer_device->device; + cur_size = drbd_get_capacity(device->this_bdev); p_size = be64_to_cpu(p->d_size); p_usize = be64_to_cpu(p->u_size); @@ -3994,7 +3996,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info device->p_size = p_size; if (get_ldev(device)) { - sector_t new_size, cur_size; rcu_read_lock(); my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size; rcu_read_unlock(); @@ -4012,7 +4013,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info /* Never shrink a device with usable data during connect. But allow online shrinking if we are connected. */ new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0); - cur_size = drbd_get_capacity(device->this_bdev); if (new_size < cur_size && device->state.disk >= D_OUTDATED && device->state.conn < C_CONNECTED) { @@ -4077,9 +4077,36 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info * * However, if he sends a zero current size, * take his (user-capped or) backing disk size anyways. + * + * Unless of course he does not have a disk himself. + * In which case we ignore this completely. */ + sector_t new_size = p_csize ?: p_usize ?: p_size; drbd_reconsider_queue_parameters(device, NULL, o); - drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size); + if (new_size == 0) { + /* Ignore, peer does not know nothing. */ + } else if (new_size == cur_size) { + /* nothing to do */ + } else if (cur_size != 0 && p_size == 0) { + drbd_warn(device, "Ignored diskless peer device size (peer:%llu != me:%llu sectors)!\n", + (unsigned long long)new_size, (unsigned long long)cur_size); + } else if (new_size < cur_size && device->state.role == R_PRIMARY) { + drbd_err(device, "The peer's device size is too small! (%llu < %llu sectors); demote me first!\n", + (unsigned long long)new_size, (unsigned long long)cur_size); + conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD); + return -EIO; + } else { + /* I believe the peer, if + * - I don't have a current size myself + * - we agree on the size anyways + * - I do have a current size, am Secondary, + * and he has the only disk + * - I do have a current size, am Primary, + * and he has the only disk, + * which is larger than my current size + */ + drbd_set_my_capacity(device, new_size); + } } if (get_ldev(device)) { @@ -4365,6 +4392,25 @@ static int receive_state(struct drbd_connection *connection, struct packet_info if (peer_state.conn == C_AHEAD) ns.conn = C_BEHIND; + /* TODO: + * if (primary and diskless and peer uuid != effective uuid) + * abort attach on peer; + * + * If this node does not have good data, was already connected, but + * the peer did a late attach only now, trying to "negotiate" with me, + * AND I am currently Primary, possibly frozen, with some specific + * "effective" uuid, this should never be reached, really, because + * we first send the uuids, then the current state. + * + * In this scenario, we already dropped the connection hard + * when we received the unsuitable uuids (receive_uuids(). + * + * Should we want to change this, that is: not drop the connection in + * receive_uuids() already, then we would need to add a branch here + * that aborts the attach of "unsuitable uuids" on the peer in case + * this node is currently Diskless Primary. + */ + if (device->p_uuid && peer_state.disk >= D_NEGOTIATING && get_ldev_if_state(device, D_NEGOTIATING)) { int cr; /* consider resync */ diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 0813c654c893..b452359b6aae 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -688,11 +688,9 @@ request_detach(struct drbd_device *device) CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO); } -enum drbd_state_rv -drbd_request_detach_interruptible(struct drbd_device *device) +int drbd_request_detach_interruptible(struct drbd_device *device) { - enum drbd_state_rv rv; - int ret; + int ret, rv; drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ wait_event_interruptible(device->state_wait, diff --git a/drivers/block/drbd/drbd_state.h b/drivers/block/drbd/drbd_state.h index ea58301d0895..f87371e55e68 100644 --- a/drivers/block/drbd/drbd_state.h +++ b/drivers/block/drbd/drbd_state.h @@ -131,7 +131,7 @@ extern enum drbd_state_rv _drbd_set_state(struct drbd_device *, union drbd_state enum chg_state_flags, struct completion *done); extern void print_st_err(struct drbd_device *, union drbd_state, - union drbd_state, int); + union drbd_state, enum drbd_state_rv); enum drbd_state_rv _conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, @@ -162,8 +162,7 @@ static inline int drbd_request_state(struct drbd_device *device, } /* for use in adm_detach() (drbd_adm_detach(), drbd_adm_down()) */ -enum drbd_state_rv -drbd_request_detach_interruptible(struct drbd_device *device); +int drbd_request_detach_interruptible(struct drbd_device *device); enum drbd_role conn_highest_role(struct drbd_connection *connection); enum drbd_role conn_highest_peer(struct drbd_connection *connection); diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 5f1aa3197244..cbf74731cfce 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -848,14 +848,17 @@ static void reset_fdc_info(int mode) /* selects the fdc and drive, and enables the fdc's input/dma. */ static void set_fdc(int drive) { + unsigned int new_fdc = fdc; + if (drive >= 0 && drive < N_DRIVE) { - fdc = FDC(drive); + new_fdc = FDC(drive); current_drive = drive; } - if (fdc != 1 && fdc != 0) { + if (new_fdc >= N_FDC) { pr_info("bad fdc value\n"); return; } + fdc = new_fdc; set_dor(fdc, ~0, 8); #if N_FDC > 1 set_dor(1 - fdc, ~8, 0); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 87d7c42affbc..ca912eecc74e 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -414,18 +414,20 @@ out_free_page: return ret; } -static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos) +static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, + int mode) { /* - * We use punch hole to reclaim the free space used by the - * image a.k.a. discard. However we do not support discard if - * encryption is enabled, because it may give an attacker - * useful information. + * We use fallocate to manipulate the space mappings used by the image + * a.k.a. discard/zerorange. However we do not support this if + * encryption is enabled, because it may give an attacker useful + * information. */ struct file *file = lo->lo_backing_file; - int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; int ret; + mode |= FALLOC_FL_KEEP_SIZE; + if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) { ret = -EOPNOTSUPP; goto out; @@ -565,9 +567,17 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) switch (req_op(rq)) { case REQ_OP_FLUSH: return lo_req_flush(lo, rq); - case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: - return lo_discard(lo, rq, pos); + /* + * If the caller doesn't want deallocation, call zeroout to + * write zeroes the range. Otherwise, punch them out. + */ + return lo_fallocate(lo, rq, pos, + (rq->cmd_flags & REQ_NOUNMAP) ? + FALLOC_FL_ZERO_RANGE : + FALLOC_FL_PUNCH_HOLE); + case REQ_OP_DISCARD: + return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE); case REQ_OP_WRITE: if (lo->transfer) return lo_write_transfer(lo, rq, pos); @@ -1605,6 +1615,8 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, arg = (unsigned long) compat_ptr(arg); case LOOP_SET_FD: case LOOP_CHANGE_FD: + case LOOP_SET_BLOCK_SIZE: + case LOOP_SET_DIRECT_IO: err = lo_ioctl(bdev, mode, cmd, arg); break; default: diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index a23460084955..8f56e6b2f114 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -228,8 +228,8 @@ static void nbd_put(struct nbd_device *nbd) if (refcount_dec_and_mutex_lock(&nbd->refs, &nbd_index_mutex)) { idr_remove(&nbd_index_idr, nbd->index); - mutex_unlock(&nbd_index_mutex); nbd_dev_remove(nbd); + mutex_unlock(&nbd_index_mutex); } } @@ -648,6 +648,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) ret = -ENOENT; goto out; } + if (cmd->status != BLK_STS_OK) { + dev_err(disk_to_dev(nbd->disk), "Command already handled %p\n", + req); + ret = -ENOENT; + goto out; + } if (test_bit(NBD_CMD_REQUEUED, &cmd->flags)) { dev_err(disk_to_dev(nbd->disk), "Raced with timeout on req %p\n", req); @@ -912,6 +918,26 @@ static blk_status_t nbd_queue_rq(struct blk_mq_hw_ctx *hctx, return ret; } +static struct socket *nbd_get_socket(struct nbd_device *nbd, unsigned long fd, + int *err) +{ + struct socket *sock; + + *err = 0; + sock = sockfd_lookup(fd, err); + if (!sock) + return NULL; + + if (sock->ops->shutdown == sock_no_shutdown) { + dev_err(disk_to_dev(nbd->disk), "Unsupported socket: shutdown callout must be supported.\n"); + *err = -EINVAL; + sockfd_put(sock); + return NULL; + } + + return sock; +} + static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, bool netlink) { @@ -921,7 +947,7 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, struct nbd_sock *nsock; int err; - sock = sockfd_lookup(arg, &err); + sock = nbd_get_socket(nbd, arg, &err); if (!sock) return err; @@ -944,14 +970,15 @@ static int nbd_add_socket(struct nbd_device *nbd, unsigned long arg, sockfd_put(sock); return -ENOMEM; } + + config->socks = socks; + nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); if (!nsock) { sockfd_put(sock); return -ENOMEM; } - config->socks = socks; - nsock->fallback_index = -1; nsock->dead = false; mutex_init(&nsock->tx_lock); @@ -973,7 +1000,7 @@ static int nbd_reconnect_socket(struct nbd_device *nbd, unsigned long arg) int i; int err; - sock = sockfd_lookup(arg, &err); + sock = nbd_get_socket(nbd, arg, &err); if (!sock) return err; @@ -1176,6 +1203,16 @@ static int nbd_start_device(struct nbd_device *nbd) args = kzalloc(sizeof(*args), GFP_KERNEL); if (!args) { sock_shutdown(nbd); + /* + * If num_connections is m (2 < m), + * and NO.1 ~ NO.n(1 < n < m) kzallocs are successful. + * But NO.(n + 1) failed. We still have n recv threads. + * So, add flush_workqueue here to prevent recv threads + * dropping the last config_refs and trying to destroy + * the workqueue from inside the workqueue. + */ + if (i) + flush_workqueue(nbd->recv_workq); return -ENOMEM; } sk_set_memalloc(config->socks[i]->sock->sk); @@ -1207,10 +1244,10 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b mutex_unlock(&nbd->config_lock); ret = wait_event_interruptible(config->recv_wq, atomic_read(&config->recv_threads) == 0); - if (ret) { + if (ret) sock_shutdown(nbd); - flush_workqueue(nbd->recv_workq); - } + flush_workqueue(nbd->recv_workq); + mutex_lock(&nbd->config_lock); bd_set_size(bdev, 0); /* user requested, ignore socket errors */ diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 34997df132e2..6beafaa335c7 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -1025,8 +1025,10 @@ static void rsxx_pci_remove(struct pci_dev *dev) cancel_work_sync(&card->event_work); + destroy_workqueue(card->event_wq); rsxx_destroy_dev(card); rsxx_dma_destroy(card); + destroy_workqueue(card->creg_ctrl.creg_wq); spin_lock_irqsave(&card->irq_lock, flags); rsxx_disable_ier_and_isr(card, CR_INTR_ALL); diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 64d0fc17c174..95649025cde7 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -1417,7 +1417,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev, case SKD_CHECK_STATUS_BUSY_IMMINENT: skd_log_skreq(skdev, skreq, "retry(busy)"); - blk_requeue_request(skdev->queue, req); + blk_mq_requeue_request(req, true); dev_info(&skdev->pdev->dev, "drive BUSY imminent\n"); skdev->state = SKD_DRVR_STATE_BUSY_IMMINENT; skdev->timer_countdown = SKD_TIMER_MINUTES(20); @@ -1427,7 +1427,7 @@ static void skd_resolve_req_exception(struct skd_device *skdev, case SKD_CHECK_STATUS_REQUEUE_REQUEST: if ((unsigned long) ++req->special < SKD_MAX_RETRIES) { skd_log_skreq(skdev, skreq, "retry"); - blk_requeue_request(skdev->queue, req); + blk_mq_requeue_request(req, true); break; } /* fall through */ diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 8767401f75e0..19d226ff15ef 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -271,10 +271,12 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); if (err) { virtqueue_kick(vblk->vqs[qid].vq); - blk_mq_stop_hw_queue(hctx); + /* Don't stop the queue if -ENOMEM: we may have failed to + * bounce the buffer due to global resource outage. + */ + if (err == -ENOSPC) + blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); - /* Out of mem doesn't actually happen, since we fall back - * to direct descriptors */ if (err == -ENOMEM || err == -ENOSPC) return BLK_STS_RESOURCE; return BLK_STS_IOERR; diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 987d665e82de..c1d1b94f71b5 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -929,6 +929,8 @@ next: out_of_memory: pr_alert("%s: out of memory\n", __func__); put_free_pages(ring, pages_to_gnt, segs_to_map); + for (i = last_map; i < num; i++) + pages[i]->handle = BLKBACK_INVALID_HANDLE; return -ENOMEM; } diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index ed4e80779124..e9fa4a1fc791 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -178,6 +178,15 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) blkif->domid = domid; atomic_set(&blkif->refcnt, 1); init_completion(&blkif->drain_complete); + + /* + * Because freeing back to the cache may be deferred, it is not + * safe to unload the module (and hence destroy the cache) until + * this has completed. To prevent premature unloading, take an + * extra module reference here and release only when the object + * has been freed back to the cache. + */ + __module_get(THIS_MODULE); INIT_WORK(&blkif->free_work, xen_blkif_deferred_free); return blkif; @@ -327,6 +336,7 @@ static void xen_blkif_free(struct xen_blkif *blkif) /* Make sure everything is drained before shutting down */ kmem_cache_free(xen_blkif_cachep, blkif); + module_put(THIS_MODULE); } int __init xen_blkif_interface_init(void) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 32ac5f551e55..e6887714fe0a 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1115,8 +1115,8 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, if (!VDEV_IS_EXTENDED(info->vdevice)) { err = xen_translate_vdev(info->vdevice, &minor, &offset); if (err) - return err; - nr_parts = PARTS_PER_DISK; + return err; + nr_parts = PARTS_PER_DISK; } else { minor = BLKIF_MINOR_EXT(info->vdevice); nr_parts = PARTS_PER_EXT_DISK; diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index fe52026d31b5..b44abb57ca7a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -424,13 +424,14 @@ static void reset_bdev(struct zram *zram) static ssize_t backing_dev_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct file *file; struct zram *zram = dev_to_zram(dev); - struct file *file = zram->backing_dev; char *p; ssize_t ret; down_read(&zram->init_lock); - if (!zram->backing_dev) { + file = zram->backing_dev; + if (!file) { memcpy(buf, "none\n", 5); up_read(&zram->init_lock); return 5; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 73561bfd95d4..424f399cc79b 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -1123,7 +1123,7 @@ static int btusb_open(struct hci_dev *hdev) if (data->setup_on_usb) { err = data->setup_on_usb(hdev); if (err < 0) - return err; + goto setup_fail; } data->intf->needs_remote_wakeup = 1; @@ -1155,6 +1155,7 @@ done: failed: clear_bit(BTUSB_INTR_RUNNING, &data->flags); +setup_fail: usb_autopm_put_interface(data->intf); return err; } diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 6d41b2023f09..61971ddbd231 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -50,6 +50,12 @@ #define BCM_LM_DIAG_PKT 0x07 #define BCM_LM_DIAG_SIZE 63 +#define BCM_TYPE49_PKT 0x31 +#define BCM_TYPE49_SIZE 0 + +#define BCM_TYPE52_PKT 0x34 +#define BCM_TYPE52_SIZE 0 + #define BCM_AUTOSUSPEND_DELAY 5000 /* default autosleep delay */ /* platform device driver resources */ @@ -483,12 +489,28 @@ finalize: .lsize = 0, \ .maxlen = BCM_NULL_SIZE +#define BCM_RECV_TYPE49 \ + .type = BCM_TYPE49_PKT, \ + .hlen = BCM_TYPE49_SIZE, \ + .loff = 0, \ + .lsize = 0, \ + .maxlen = BCM_TYPE49_SIZE + +#define BCM_RECV_TYPE52 \ + .type = BCM_TYPE52_PKT, \ + .hlen = BCM_TYPE52_SIZE, \ + .loff = 0, \ + .lsize = 0, \ + .maxlen = BCM_TYPE52_SIZE + static const struct h4_recv_pkt bcm_recv_pkts[] = { { H4_RECV_ACL, .recv = hci_recv_frame }, { H4_RECV_SCO, .recv = hci_recv_frame }, { H4_RECV_EVENT, .recv = hci_recv_frame }, { BCM_RECV_LM_DIAG, .recv = hci_recv_diag }, { BCM_RECV_NULL, .recv = hci_recv_diag }, + { BCM_RECV_TYPE49, .recv = hci_recv_diag }, + { BCM_RECV_TYPE52, .recv = hci_recv_diag }, }; static int bcm_recv(struct hci_uart *hu, const void *data, int count) diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index 57a7f4255ac0..ee6c403de6af 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -605,6 +605,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) if (*ptr == 0xc0) { BT_ERR("Short BCSP packet"); kfree_skb(bcsp->rx_skb); + bcsp->rx_skb = NULL; bcsp->rx_state = BCSP_W4_PKT_START; bcsp->rx_count = 0; } else @@ -620,6 +621,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) bcsp->rx_skb->data[2])) != bcsp->rx_skb->data[3]) { BT_ERR("Error in BCSP hdr checksum"); kfree_skb(bcsp->rx_skb); + bcsp->rx_skb = NULL; bcsp->rx_state = BCSP_W4_PKT_DELIMITER; bcsp->rx_count = 0; continue; @@ -644,6 +646,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) bscp_get_crc(bcsp)); kfree_skb(bcsp->rx_skb); + bcsp->rx_skb = NULL; bcsp->rx_state = BCSP_W4_PKT_DELIMITER; bcsp->rx_count = 0; continue; diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 52e6d4d1608e..69c00a3db538 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -360,6 +360,7 @@ void hci_uart_unregister_device(struct hci_uart *hu) { struct hci_dev *hdev = hu->hdev; + clear_bit(HCI_UART_PROTO_READY, &hu->flags); hci_unregister_dev(hdev); hci_free_dev(hdev); diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index ea6558d4864c..1c90da4af94f 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -410,10 +410,10 @@ static int cdrom_get_disc_info(struct cdrom_device_info *cdi, * hack to have the capability flags defined const, while we can still * change it here without gcc complaining at every line. */ -#define ENSURE(call, bits) \ -do { \ - if (cdo->call == NULL) \ - *change_capability &= ~(bits); \ +#define ENSURE(cdo, call, bits) \ +do { \ + if (cdo->call == NULL) \ + WARN_ON_ONCE((cdo)->capability & (bits)); \ } while (0) /* @@ -589,7 +589,6 @@ int register_cdrom(struct cdrom_device_info *cdi) { static char banner_printed; const struct cdrom_device_ops *cdo = cdi->ops; - int *change_capability = (int *)&cdo->capability; /* hack */ cd_dbg(CD_OPEN, "entering register_cdrom\n"); @@ -601,16 +600,16 @@ int register_cdrom(struct cdrom_device_info *cdi) cdrom_sysctl_register(); } - ENSURE(drive_status, CDC_DRIVE_STATUS); + ENSURE(cdo, drive_status, CDC_DRIVE_STATUS); if (cdo->check_events == NULL && cdo->media_changed == NULL) - *change_capability = ~(CDC_MEDIA_CHANGED | CDC_SELECT_DISC); - ENSURE(tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY); - ENSURE(lock_door, CDC_LOCK); - ENSURE(select_speed, CDC_SELECT_SPEED); - ENSURE(get_last_session, CDC_MULTI_SESSION); - ENSURE(get_mcn, CDC_MCN); - ENSURE(reset, CDC_RESET); - ENSURE(generic_packet, CDC_GENERIC_PACKET); + WARN_ON_ONCE(cdo->capability & (CDC_MEDIA_CHANGED | CDC_SELECT_DISC)); + ENSURE(cdo, tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY); + ENSURE(cdo, lock_door, CDC_LOCK); + ENSURE(cdo, select_speed, CDC_SELECT_SPEED); + ENSURE(cdo, get_last_session, CDC_MULTI_SESSION); + ENSURE(cdo, get_mcn, CDC_MCN); + ENSURE(cdo, reset, CDC_RESET); + ENSURE(cdo, generic_packet, CDC_GENERIC_PACKET); cdi->mc_flags = 0; cdi->options = CDO_USE_FFLAGS; @@ -996,6 +995,12 @@ static void cdrom_count_tracks(struct cdrom_device_info *cdi, tracktype *tracks) tracks->xa = 0; tracks->error = 0; cd_dbg(CD_COUNT_TRACKS, "entering cdrom_count_tracks\n"); + + if (!CDROM_CAN(CDC_PLAY_AUDIO)) { + tracks->error = CDS_NO_INFO; + return; + } + /* Grab the TOC header so we can see how many tracks there are */ ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCHDR, &header); if (ret) { @@ -1162,7 +1167,8 @@ int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, ret = open_for_data(cdi); if (ret) goto err; - cdrom_mmc3_profile(cdi); + if (CDROM_CAN(CDC_GENERIC_PACKET)) + cdrom_mmc3_profile(cdi); if (mode & FMODE_WRITE) { ret = -EROFS; if (cdrom_open_write(cdi)) @@ -2879,6 +2885,9 @@ int cdrom_get_last_written(struct cdrom_device_info *cdi, long *last_written) it doesn't give enough information or fails. then we return the toc contents. */ use_toc: + if (!CDROM_CAN(CDC_PLAY_AUDIO)) + return -ENOSYS; + toc.cdte_format = CDROM_MSF; toc.cdte_track = CDROM_LEADOUT; if ((ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &toc))) diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c index 25173454efa3..091753765d99 100644 --- a/drivers/char/hw_random/omap-rng.c +++ b/drivers/char/hw_random/omap-rng.c @@ -66,6 +66,13 @@ #define OMAP4_RNG_OUTPUT_SIZE 0x8 #define EIP76_RNG_OUTPUT_SIZE 0x10 +/* + * EIP76 RNG takes approx. 700us to produce 16 bytes of output data + * as per testing results. And to account for the lack of udelay()'s + * reliability, we keep the timeout as 1000us. + */ +#define RNG_DATA_FILL_TIMEOUT 100 + enum { RNG_OUTPUT_0_REG = 0, RNG_OUTPUT_1_REG, @@ -175,7 +182,7 @@ static int omap_rng_do_read(struct hwrng *rng, void *data, size_t max, if (max < priv->pdata->data_size) return 0; - for (i = 0; i < 20; i++) { + for (i = 0; i < RNG_DATA_FILL_TIMEOUT; i++) { present = priv->pdata->data_present(priv); if (present || !wait) break; diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index 38b719017186..648e39ce6bd9 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -121,7 +121,8 @@ static int omap3_rom_rng_remove(struct platform_device *pdev) { cancel_delayed_work_sync(&idle_work); hwrng_unregister(&omap3_rom_rng_ops); - clk_disable_unprepare(rng_clk); + if (!rng_idle) + clk_disable_unprepare(rng_clk); return 0; } diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c index 83c695938a2d..f53d47e3355d 100644 --- a/drivers/char/hw_random/stm32-rng.c +++ b/drivers/char/hw_random/stm32-rng.c @@ -166,6 +166,13 @@ static int stm32_rng_probe(struct platform_device *ofdev) return devm_hwrng_register(dev, &priv->rng); } +static int stm32_rng_remove(struct platform_device *ofdev) +{ + pm_runtime_disable(&ofdev->dev); + + return 0; +} + #ifdef CONFIG_PM static int stm32_rng_runtime_suspend(struct device *dev) { @@ -202,6 +209,7 @@ static struct platform_driver stm32_rng_driver = { .of_match_table = stm32_rng_match, }, .probe = stm32_rng_probe, + .remove = stm32_rng_remove, }; module_platform_driver(stm32_rng_driver); diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c index c3a23ec3e76f..a37d9794170c 100644 --- a/drivers/char/ipmi/ipmi_dmi.c +++ b/drivers/char/ipmi/ipmi_dmi.c @@ -197,6 +197,10 @@ static void __init dmi_decode_ipmi(const struct dmi_header *dm) slave_addr = data[DMI_IPMI_SLAVEADDR]; memcpy(&base_addr, data + DMI_IPMI_ADDR, sizeof(unsigned long)); + if (!base_addr) { + pr_err("Base address is zero, assuming no IPMI interface\n"); + return; + } if (len >= DMI_IPMI_VER2_LENGTH) { if (type == IPMI_DMI_TYPE_SSIF) { offset = 0; diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 941bffd9b49c..0146bc3252c5 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -750,10 +750,14 @@ static void msg_done_handler(struct ssif_info *ssif_info, int result, flags = ipmi_ssif_lock_cond(ssif_info, &oflags); msg = ssif_info->curr_msg; if (msg) { + if (data) { + if (len > IPMI_MAX_MSG_LENGTH) + len = IPMI_MAX_MSG_LENGTH; + memcpy(msg->rsp, data, len); + } else { + len = 0; + } msg->rsp_size = len; - if (msg->rsp_size > IPMI_MAX_MSG_LENGTH) - msg->rsp_size = IPMI_MAX_MSG_LENGTH; - memcpy(msg->rsp, data, msg->rsp_size); ssif_info->curr_msg = NULL; } diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c index d256110ba672..0023bde4d4ff 100644 --- a/drivers/char/ppdev.c +++ b/drivers/char/ppdev.c @@ -623,20 +623,27 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (copy_from_user(time32, argp, sizeof(time32))) return -EFAULT; + if ((time32[0] < 0) || (time32[1] < 0)) + return -EINVAL; + return pp_set_timeout(pp->pdev, time32[0], time32[1]); case PPSETTIME64: if (copy_from_user(time64, argp, sizeof(time64))) return -EFAULT; + if ((time64[0] < 0) || (time64[1] < 0)) + return -EINVAL; + + if (IS_ENABLED(CONFIG_SPARC64) && !in_compat_syscall()) + time64[1] >>= 32; + return pp_set_timeout(pp->pdev, time64[0], time64[1]); case PPGETTIME32: jiffies_to_timespec64(pp->pdev->timeout, &ts); time32[0] = ts.tv_sec; time32[1] = ts.tv_nsec / NSEC_PER_USEC; - if ((time32[0] < 0) || (time32[1] < 0)) - return -EINVAL; if (copy_to_user(argp, time32, sizeof(time32))) return -EFAULT; @@ -647,8 +654,9 @@ static int pp_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg) jiffies_to_timespec64(pp->pdev->timeout, &ts); time64[0] = ts.tv_sec; time64[1] = ts.tv_nsec / NSEC_PER_USEC; - if ((time64[0] < 0) || (time64[1] < 0)) - return -EINVAL; + + if (IS_ENABLED(CONFIG_SPARC64) && !in_compat_syscall()) + time64[1] <<= 32; if (copy_to_user(argp, time64, sizeof(time64))) return -EFAULT; diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index 44a3d16231f6..dd64b3b37400 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -1029,6 +1029,10 @@ static int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip) chip->cc_attrs_tbl = devm_kzalloc(&chip->dev, 4 * nr_commands, GFP_KERNEL); + if (!chip->cc_attrs_tbl) { + rc = -ENOMEM; + goto out; + } rc = tpm_buf_init(&buf, TPM2_ST_NO_SESSIONS, TPM2_CC_GET_CAPABILITY); if (rc) diff --git a/drivers/char/ttyprintk.c b/drivers/char/ttyprintk.c index 67549ce88cc9..774748497ace 100644 --- a/drivers/char/ttyprintk.c +++ b/drivers/char/ttyprintk.c @@ -18,10 +18,11 @@ #include #include #include +#include struct ttyprintk_port { struct tty_port port; - struct mutex port_write_mutex; + spinlock_t spinlock; }; static struct ttyprintk_port tpk_port; @@ -100,11 +101,12 @@ static int tpk_open(struct tty_struct *tty, struct file *filp) static void tpk_close(struct tty_struct *tty, struct file *filp) { struct ttyprintk_port *tpkp = tty->driver_data; + unsigned long flags; - mutex_lock(&tpkp->port_write_mutex); + spin_lock_irqsave(&tpkp->spinlock, flags); /* flush tpk_printk buffer */ tpk_printk(NULL, 0); - mutex_unlock(&tpkp->port_write_mutex); + spin_unlock_irqrestore(&tpkp->spinlock, flags); tty_port_close(&tpkp->port, tty, filp); } @@ -116,13 +118,14 @@ static int tpk_write(struct tty_struct *tty, const unsigned char *buf, int count) { struct ttyprintk_port *tpkp = tty->driver_data; + unsigned long flags; int ret; /* exclusive use of tpk_printk within this tty */ - mutex_lock(&tpkp->port_write_mutex); + spin_lock_irqsave(&tpkp->spinlock, flags); ret = tpk_printk(buf, count); - mutex_unlock(&tpkp->port_write_mutex); + spin_unlock_irqrestore(&tpkp->spinlock, flags); return ret; } @@ -172,7 +175,7 @@ static int __init ttyprintk_init(void) { int ret = -ENOMEM; - mutex_init(&tpk_port.port_write_mutex); + spin_lock_init(&tpk_port.spinlock); ttyprintk_driver = tty_alloc_driver(1, TTY_DRIVER_RESET_TERMIOS | diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 65454acd4b97..5200772ab0bd 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1366,24 +1366,24 @@ static void set_console_size(struct port *port, u16 rows, u16 cols) port->cons.ws.ws_col = cols; } -static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) +static int fill_queue(struct virtqueue *vq, spinlock_t *lock) { struct port_buffer *buf; - unsigned int nr_added_bufs; + int nr_added_bufs; int ret; nr_added_bufs = 0; do { buf = alloc_buf(vq->vdev, PAGE_SIZE, 0); if (!buf) - break; + return -ENOMEM; spin_lock_irq(lock); ret = add_inbuf(vq, buf); if (ret < 0) { spin_unlock_irq(lock); free_buf(buf, true); - break; + return ret; } nr_added_bufs++; spin_unlock_irq(lock); @@ -1403,7 +1403,6 @@ static int add_port(struct ports_device *portdev, u32 id) char debugfs_name[16]; struct port *port; dev_t devt; - unsigned int nr_added_bufs; int err; port = kmalloc(sizeof(*port), GFP_KERNEL); @@ -1462,11 +1461,13 @@ static int add_port(struct ports_device *portdev, u32 id) spin_lock_init(&port->outvq_lock); init_waitqueue_head(&port->waitqueue); - /* Fill the in_vq with buffers so the host can send us data. */ - nr_added_bufs = fill_queue(port->in_vq, &port->inbuf_lock); - if (!nr_added_bufs) { + /* We can safely ignore ENOSPC because it means + * the queue already has buffers. Buffers are removed + * only by virtcons_remove(), not by unplug_port() + */ + err = fill_queue(port->in_vq, &port->inbuf_lock); + if (err < 0 && err != -ENOSPC) { dev_err(port->dev, "Error allocating inbufs\n"); - err = -ENOMEM; goto free_device; } @@ -2099,14 +2100,11 @@ static int virtcons_probe(struct virtio_device *vdev) INIT_WORK(&portdev->control_work, &control_work_handler); if (multiport) { - unsigned int nr_added_bufs; - spin_lock_init(&portdev->c_ivq_lock); spin_lock_init(&portdev->c_ovq_lock); - nr_added_bufs = fill_queue(portdev->c_ivq, - &portdev->c_ivq_lock); - if (!nr_added_bufs) { + err = fill_queue(portdev->c_ivq, &portdev->c_ivq_lock); + if (err < 0) { dev_err(&vdev->dev, "Error allocating buffers for control queue\n"); /* @@ -2117,7 +2115,7 @@ static int virtcons_probe(struct virtio_device *vdev) VIRTIO_CONSOLE_DEVICE_READY, 0); /* Device was functional: we need full cleanup. */ virtcons_remove(vdev); - return -ENOMEM; + return err; } } else { /* diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile index 2ce3e79dcb84..071102ab1d0c 100644 --- a/drivers/clk/Makefile +++ b/drivers/clk/Makefile @@ -66,6 +66,7 @@ obj-$(CONFIG_ARCH_HISI) += hisilicon/ obj-y += imgtec/ obj-$(CONFIG_ARCH_MXC) += imx/ obj-$(CONFIG_MACH_INGENIC) += ingenic/ +obj-$(CONFIG_ARCH_K3) += keystone/ obj-$(CONFIG_ARCH_KEYSTONE) += keystone/ obj-$(CONFIG_MACH_LOONGSON32) += loongson1/ obj-$(CONFIG_COMMON_CLK_MEDIATEK) += mediatek/ diff --git a/drivers/clk/at91/clk-audio-pll.c b/drivers/clk/at91/clk-audio-pll.c index da7bafcfbe70..b3eaf654fac9 100644 --- a/drivers/clk/at91/clk-audio-pll.c +++ b/drivers/clk/at91/clk-audio-pll.c @@ -509,7 +509,7 @@ static void __init of_sama5d2_clk_audio_pll_pad_setup(struct device_node *np) static void __init of_sama5d2_clk_audio_pll_pmc_setup(struct device_node *np) { - struct clk_audio_pad *apmc_ck; + struct clk_audio_pmc *apmc_ck; struct clk_init_data init = {}; apmc_ck = kzalloc(sizeof(*apmc_ck), GFP_KERNEL); diff --git a/drivers/clk/at91/clk-generated.c b/drivers/clk/at91/clk-generated.c index 113152425a95..ea23002be4de 100644 --- a/drivers/clk/at91/clk-generated.c +++ b/drivers/clk/at91/clk-generated.c @@ -284,7 +284,7 @@ static void clk_generated_startup(struct clk_generated *gck) static struct clk_hw * __init at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, const char *name, const char **parent_names, - u8 num_parents, u8 id, + u8 num_parents, u8 id, bool pll_audio, const struct clk_range *range) { struct clk_generated *gck; @@ -308,6 +308,7 @@ at91_clk_register_generated(struct regmap *regmap, spinlock_t *lock, gck->regmap = regmap; gck->lock = lock; gck->range = *range; + gck->audio_pll_allowed = pll_audio; clk_generated_startup(gck); hw = &gck->hw; @@ -333,7 +334,6 @@ static void __init of_sama5d2_clk_generated_setup(struct device_node *np) struct device_node *gcknp; struct clk_range range = CLK_RANGE(0, 0); struct regmap *regmap; - struct clk_generated *gck; num_parents = of_clk_get_parent_count(np); if (num_parents == 0 || num_parents > GENERATED_SOURCE_MAX) @@ -350,6 +350,8 @@ static void __init of_sama5d2_clk_generated_setup(struct device_node *np) return; for_each_child_of_node(np, gcknp) { + bool pll_audio = false; + if (of_property_read_u32(gcknp, "reg", &id)) continue; @@ -362,24 +364,14 @@ static void __init of_sama5d2_clk_generated_setup(struct device_node *np) of_at91_get_clk_range(gcknp, "atmel,clk-output-range", &range); + if (of_device_is_compatible(np, "atmel,sama5d2-clk-generated") && + (id == GCK_ID_I2S0 || id == GCK_ID_I2S1 || + id == GCK_ID_CLASSD)) + pll_audio = true; + hw = at91_clk_register_generated(regmap, &pmc_pcr_lock, name, parent_names, num_parents, - id, &range); - - gck = to_clk_generated(hw); - - if (of_device_is_compatible(np, - "atmel,sama5d2-clk-generated")) { - if (gck->id == GCK_ID_SSC0 || gck->id == GCK_ID_SSC1 || - gck->id == GCK_ID_I2S0 || gck->id == GCK_ID_I2S1 || - gck->id == GCK_ID_CLASSD) - gck->audio_pll_allowed = true; - else - gck->audio_pll_allowed = false; - } else { - gck->audio_pll_allowed = false; - } - + id, pll_audio, &range); if (IS_ERR(hw)) continue; diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c index 2f97a843d6d6..90988e7a5b47 100644 --- a/drivers/clk/at91/clk-main.c +++ b/drivers/clk/at91/clk-main.c @@ -162,7 +162,7 @@ at91_clk_register_main_osc(struct regmap *regmap, if (bypass) regmap_update_bits(regmap, AT91_CKGR_MOR, MOR_KEY_MASK | - AT91_PMC_MOSCEN, + AT91_PMC_OSCBYPASS, AT91_PMC_OSCBYPASS | AT91_PMC_KEY); hw = &osc->hw; @@ -354,7 +354,10 @@ static int clk_main_probe_frequency(struct regmap *regmap) regmap_read(regmap, AT91_CKGR_MCFR, &mcfr); if (mcfr & AT91_PMC_MAINRDY) return 0; - usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT); + if (system_state < SYSTEM_RUNNING) + udelay(MAINF_LOOP_MIN_WAIT); + else + usleep_range(MAINF_LOOP_MIN_WAIT, MAINF_LOOP_MAX_WAIT); } while (time_before(prep_time, timeout)); return -ETIMEDOUT; diff --git a/drivers/clk/at91/sckc.c b/drivers/clk/at91/sckc.c index ab6ecefc49ad..43ba2a8b03fa 100644 --- a/drivers/clk/at91/sckc.c +++ b/drivers/clk/at91/sckc.c @@ -74,7 +74,10 @@ static int clk_slow_osc_prepare(struct clk_hw *hw) writel(tmp | AT91_SCKC_OSC32EN, sckcr); - usleep_range(osc->startup_usec, osc->startup_usec + 1); + if (system_state < SYSTEM_RUNNING) + udelay(osc->startup_usec); + else + usleep_range(osc->startup_usec, osc->startup_usec + 1); return 0; } @@ -197,7 +200,10 @@ static int clk_slow_rc_osc_prepare(struct clk_hw *hw) writel(readl(sckcr) | AT91_SCKC_RCEN, sckcr); - usleep_range(osc->startup_usec, osc->startup_usec + 1); + if (system_state < SYSTEM_RUNNING) + udelay(osc->startup_usec); + else + usleep_range(osc->startup_usec, osc->startup_usec + 1); return 0; } @@ -310,7 +316,10 @@ static int clk_sam9x5_slow_set_parent(struct clk_hw *hw, u8 index) writel(tmp, sckcr); - usleep_range(SLOWCK_SW_TIME_USEC, SLOWCK_SW_TIME_USEC + 1); + if (system_state < SYSTEM_RUNNING) + udelay(SLOWCK_SW_TIME_USEC); + else + usleep_range(SLOWCK_SW_TIME_USEC, SLOWCK_SW_TIME_USEC + 1); return 0; } @@ -443,7 +452,10 @@ static int clk_sama5d4_slow_osc_prepare(struct clk_hw *hw) return 0; } - usleep_range(osc->startup_usec, osc->startup_usec + 1); + if (system_state < SYSTEM_RUNNING) + udelay(osc->startup_usec); + else + usleep_range(osc->startup_usec, osc->startup_usec + 1); osc->prepared = true; return 0; diff --git a/drivers/clk/clk-highbank.c b/drivers/clk/clk-highbank.c index 727ed8e1bb72..8e4581004695 100644 --- a/drivers/clk/clk-highbank.c +++ b/drivers/clk/clk-highbank.c @@ -293,6 +293,7 @@ static __init struct clk *hb_clk_init(struct device_node *node, const struct clk /* Map system registers */ srnp = of_find_compatible_node(NULL, NULL, "calxeda,hb-sregs"); hb_clk->reg = of_iomap(srnp, 0); + of_node_put(srnp); BUG_ON(!hb_clk->reg); hb_clk->reg += reg; diff --git a/drivers/clk/clk-qoriq.c b/drivers/clk/clk-qoriq.c index 1a292519d84f..999a90a16609 100644 --- a/drivers/clk/clk-qoriq.c +++ b/drivers/clk/clk-qoriq.c @@ -1382,6 +1382,7 @@ static void __init clockgen_init(struct device_node *np) pr_err("%s: Couldn't map %pOF regs\n", __func__, guts); } + of_node_put(guts); } } diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index 6e22a564843c..4df914b79db6 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2493,11 +2493,17 @@ static int __clk_core_init(struct clk_core *core) if (core->flags & CLK_IS_CRITICAL) { unsigned long flags; - clk_core_prepare(core); + ret = clk_core_prepare(core); + if (ret) + goto out; flags = clk_enable_lock(); - clk_core_enable(core); + ret = clk_core_enable(core); clk_enable_unlock(flags); + if (ret) { + clk_core_unprepare(core); + goto out; + } } /* diff --git a/drivers/clk/imgtec/clk-boston.c b/drivers/clk/imgtec/clk-boston.c index f5d54a64d33c..dddda45127a8 100644 --- a/drivers/clk/imgtec/clk-boston.c +++ b/drivers/clk/imgtec/clk-boston.c @@ -73,31 +73,39 @@ static void __init clk_boston_setup(struct device_node *np) hw = clk_hw_register_fixed_rate(NULL, "input", NULL, 0, in_freq); if (IS_ERR(hw)) { pr_err("failed to register input clock: %ld\n", PTR_ERR(hw)); - goto error; + goto fail_input; } onecell->hws[BOSTON_CLK_INPUT] = hw; hw = clk_hw_register_fixed_rate(NULL, "sys", "input", 0, sys_freq); if (IS_ERR(hw)) { pr_err("failed to register sys clock: %ld\n", PTR_ERR(hw)); - goto error; + goto fail_sys; } onecell->hws[BOSTON_CLK_SYS] = hw; hw = clk_hw_register_fixed_rate(NULL, "cpu", "input", 0, cpu_freq); if (IS_ERR(hw)) { pr_err("failed to register cpu clock: %ld\n", PTR_ERR(hw)); - goto error; + goto fail_cpu; } onecell->hws[BOSTON_CLK_CPU] = hw; err = of_clk_add_hw_provider(np, of_clk_hw_onecell_get, onecell); - if (err) + if (err) { pr_err("failed to add DT provider: %d\n", err); + goto fail_clk_add; + } return; -error: +fail_clk_add: + clk_hw_unregister_fixed_rate(onecell->hws[BOSTON_CLK_CPU]); +fail_cpu: + clk_hw_unregister_fixed_rate(onecell->hws[BOSTON_CLK_SYS]); +fail_sys: + clk_hw_unregister_fixed_rate(onecell->hws[BOSTON_CLK_INPUT]); +fail_input: kfree(onecell); } diff --git a/drivers/clk/imx/clk-imx6q.c b/drivers/clk/imx/clk-imx6q.c index 8eb93eb2f857..e0547654cb7b 100644 --- a/drivers/clk/imx/clk-imx6q.c +++ b/drivers/clk/imx/clk-imx6q.c @@ -431,6 +431,7 @@ static void __init imx6q_clocks_init(struct device_node *ccm_node) np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-anatop"); anatop_base = base = of_iomap(np, 0); WARN_ON(!base); + of_node_put(np); /* Audio/video PLL post dividers do not work on i.MX6q revision 1.0 */ if (clk_on_imx6q() && imx_get_soc_revision() == IMX_CHIP_REVISION_1_0) { diff --git a/drivers/clk/imx/clk-imx6sx.c b/drivers/clk/imx/clk-imx6sx.c index e6d389e333d7..baa07553a0dd 100644 --- a/drivers/clk/imx/clk-imx6sx.c +++ b/drivers/clk/imx/clk-imx6sx.c @@ -164,6 +164,7 @@ static void __init imx6sx_clocks_init(struct device_node *ccm_node) np = of_find_compatible_node(NULL, NULL, "fsl,imx6sx-anatop"); base = of_iomap(np, 0); WARN_ON(!base); + of_node_put(np); clks[IMX6SX_PLL1_BYPASS_SRC] = imx_clk_mux("pll1_bypass_src", base + 0x00, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels)); clks[IMX6SX_PLL2_BYPASS_SRC] = imx_clk_mux("pll2_bypass_src", base + 0x30, 14, 1, pll_bypass_src_sels, ARRAY_SIZE(pll_bypass_src_sels)); diff --git a/drivers/clk/imx/clk-imx7d.c b/drivers/clk/imx/clk-imx7d.c index 0ac9b30c8b90..9f5e5b9d4a25 100644 --- a/drivers/clk/imx/clk-imx7d.c +++ b/drivers/clk/imx/clk-imx7d.c @@ -416,6 +416,7 @@ static void __init imx7d_clocks_init(struct device_node *ccm_node) np = of_find_compatible_node(NULL, NULL, "fsl,imx7d-anatop"); base = of_iomap(np, 0); WARN_ON(!base); + of_node_put(np); clks[IMX7D_PLL_ARM_MAIN_SRC] = imx_clk_mux("pll_arm_main_src", base + 0x60, 14, 2, pll_bypass_src_sel, ARRAY_SIZE(pll_bypass_src_sel)); clks[IMX7D_PLL_DRAM_MAIN_SRC] = imx_clk_mux("pll_dram_main_src", base + 0x70, 14, 2, pll_bypass_src_sel, ARRAY_SIZE(pll_bypass_src_sel)); diff --git a/drivers/clk/imx/clk-vf610.c b/drivers/clk/imx/clk-vf610.c index 6dae54325a91..a334667c450a 100644 --- a/drivers/clk/imx/clk-vf610.c +++ b/drivers/clk/imx/clk-vf610.c @@ -203,6 +203,7 @@ static void __init vf610_clocks_init(struct device_node *ccm_node) np = of_find_compatible_node(NULL, NULL, "fsl,vf610-anatop"); anatop_base = of_iomap(np, 0); BUG_ON(!anatop_base); + of_node_put(np); np = ccm_node; ccm_base = of_iomap(np, 0); diff --git a/drivers/clk/keystone/Kconfig b/drivers/clk/keystone/Kconfig index 7e9f0176578a..b04927d06cd1 100644 --- a/drivers/clk/keystone/Kconfig +++ b/drivers/clk/keystone/Kconfig @@ -7,7 +7,7 @@ config COMMON_CLK_KEYSTONE config TI_SCI_CLK tristate "TI System Control Interface clock drivers" - depends on (ARCH_KEYSTONE || COMPILE_TEST) && OF + depends on (ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST) && OF depends on TI_SCI_PROTOCOL default ARCH_KEYSTONE ---help--- diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c index 92168348ffa6..f2d27addf485 100644 --- a/drivers/clk/meson/gxbb.c +++ b/drivers/clk/meson/gxbb.c @@ -687,6 +687,7 @@ static struct clk_divider gxbb_sar_adc_clk_div = { .ops = &clk_divider_ops, .parent_names = (const char *[]){ "sar_adc_clk_sel" }, .num_parents = 1, + .flags = CLK_SET_RATE_PARENT, }, }; diff --git a/drivers/clk/mmp/clk-of-mmp2.c b/drivers/clk/mmp/clk-of-mmp2.c index 0fc75c395957..10689d8cd386 100644 --- a/drivers/clk/mmp/clk-of-mmp2.c +++ b/drivers/clk/mmp/clk-of-mmp2.c @@ -134,7 +134,7 @@ static DEFINE_SPINLOCK(ssp3_lock); static const char *ssp_parent_names[] = {"vctcxo_4", "vctcxo_2", "vctcxo", "pll1_16"}; static DEFINE_SPINLOCK(timer_lock); -static const char *timer_parent_names[] = {"clk32", "vctcxo_2", "vctcxo_4", "vctcxo"}; +static const char *timer_parent_names[] = {"clk32", "vctcxo_4", "vctcxo_2", "vctcxo"}; static DEFINE_SPINLOCK(reset_lock); @@ -227,8 +227,8 @@ static struct mmp_param_gate_clk apmu_gate_clks[] = { /* The gate clocks has mux parent. */ {MMP2_CLK_SDH0, "sdh0_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH0, 0x1b, 0x1b, 0x0, 0, &sdh_lock}, {MMP2_CLK_SDH1, "sdh1_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH1, 0x1b, 0x1b, 0x0, 0, &sdh_lock}, - {MMP2_CLK_SDH1, "sdh2_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH2, 0x1b, 0x1b, 0x0, 0, &sdh_lock}, - {MMP2_CLK_SDH1, "sdh3_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH3, 0x1b, 0x1b, 0x0, 0, &sdh_lock}, + {MMP2_CLK_SDH2, "sdh2_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH2, 0x1b, 0x1b, 0x0, 0, &sdh_lock}, + {MMP2_CLK_SDH3, "sdh3_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH3, 0x1b, 0x1b, 0x0, 0, &sdh_lock}, {MMP2_CLK_DISP0, "disp0_clk", "disp0_div", CLK_SET_RATE_PARENT, APMU_DISP0, 0x1b, 0x1b, 0x0, 0, &disp0_lock}, {MMP2_CLK_DISP0_SPHY, "disp0_sphy_clk", "disp0_sphy_div", CLK_SET_RATE_PARENT, APMU_DISP0, 0x1024, 0x1024, 0x0, 0, &disp0_lock}, {MMP2_CLK_DISP1, "disp1_clk", "disp1_div", CLK_SET_RATE_PARENT, APMU_DISP1, 0x1b, 0x1b, 0x0, 0, &disp1_lock}, diff --git a/drivers/clk/mvebu/armada-370.c b/drivers/clk/mvebu/armada-370.c index 2c7c1085f883..8fdfa97900cd 100644 --- a/drivers/clk/mvebu/armada-370.c +++ b/drivers/clk/mvebu/armada-370.c @@ -177,8 +177,10 @@ static void __init a370_clk_init(struct device_node *np) mvebu_coreclk_setup(np, &a370_coreclks); - if (cgnp) + if (cgnp) { mvebu_clk_gating_setup(cgnp, a370_gating_desc); + of_node_put(cgnp); + } } CLK_OF_DECLARE(a370_clk, "marvell,armada-370-core-clock", a370_clk_init); diff --git a/drivers/clk/mvebu/armada-xp.c b/drivers/clk/mvebu/armada-xp.c index 0ec44ae9a2a2..df529982adc9 100644 --- a/drivers/clk/mvebu/armada-xp.c +++ b/drivers/clk/mvebu/armada-xp.c @@ -228,7 +228,9 @@ static void __init axp_clk_init(struct device_node *np) mvebu_coreclk_setup(np, &axp_coreclks); - if (cgnp) + if (cgnp) { mvebu_clk_gating_setup(cgnp, axp_gating_desc); + of_node_put(cgnp); + } } CLK_OF_DECLARE(axp_clk, "marvell,armada-xp-core-clock", axp_clk_init); diff --git a/drivers/clk/mvebu/dove.c b/drivers/clk/mvebu/dove.c index 59fad9546c84..5f258c9bb68b 100644 --- a/drivers/clk/mvebu/dove.c +++ b/drivers/clk/mvebu/dove.c @@ -190,10 +190,14 @@ static void __init dove_clk_init(struct device_node *np) mvebu_coreclk_setup(np, &dove_coreclks); - if (ddnp) + if (ddnp) { dove_divider_clk_init(ddnp); + of_node_put(ddnp); + } - if (cgnp) + if (cgnp) { mvebu_clk_gating_setup(cgnp, dove_gating_desc); + of_node_put(cgnp); + } } CLK_OF_DECLARE(dove_clk, "marvell,dove-core-clock", dove_clk_init); diff --git a/drivers/clk/mvebu/kirkwood.c b/drivers/clk/mvebu/kirkwood.c index a2a8d614039d..890ebf623261 100644 --- a/drivers/clk/mvebu/kirkwood.c +++ b/drivers/clk/mvebu/kirkwood.c @@ -333,6 +333,8 @@ static void __init kirkwood_clk_init(struct device_node *np) if (cgnp) { mvebu_clk_gating_setup(cgnp, kirkwood_gating_desc); kirkwood_clk_muxing_setup(cgnp, kirkwood_mux_desc); + + of_node_put(cgnp); } } CLK_OF_DECLARE(kirkwood_clk, "marvell,kirkwood-core-clock", diff --git a/drivers/clk/mvebu/mv98dx3236.c b/drivers/clk/mvebu/mv98dx3236.c index 6e203af73cac..c8a0d03d2cd6 100644 --- a/drivers/clk/mvebu/mv98dx3236.c +++ b/drivers/clk/mvebu/mv98dx3236.c @@ -174,7 +174,9 @@ static void __init mv98dx3236_clk_init(struct device_node *np) mvebu_coreclk_setup(np, &mv98dx3236_core_clocks); - if (cgnp) + if (cgnp) { mvebu_clk_gating_setup(cgnp, mv98dx3236_gating_desc); + of_node_put(cgnp); + } } CLK_OF_DECLARE(mv98dx3236_clk, "marvell,mv98dx3236-core-clock", mv98dx3236_clk_init); diff --git a/drivers/clk/pxa/clk-pxa27x.c b/drivers/clk/pxa/clk-pxa27x.c index 25a30194d27a..b67ea86ff156 100644 --- a/drivers/clk/pxa/clk-pxa27x.c +++ b/drivers/clk/pxa/clk-pxa27x.c @@ -462,6 +462,7 @@ struct dummy_clk { }; static struct dummy_clk dummy_clks[] __initdata = { DUMMY_CLK(NULL, "pxa27x-gpio", "osc_32_768khz"), + DUMMY_CLK(NULL, "pxa-rtc", "osc_32_768khz"), DUMMY_CLK(NULL, "sa1100-rtc", "osc_32_768khz"), DUMMY_CLK("UARTCLK", "pxa2xx-ir", "STUART"), }; diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index 1a0985ae20d2..d3953ea69fda 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -210,8 +210,13 @@ static int _freq_tbl_determine_rate(struct clk_hw *hw, const struct freq_tbl *f, clk_flags = clk_hw_get_flags(hw); p = clk_hw_get_parent_by_index(hw, index); + if (!p) + return -EINVAL; + if (clk_flags & CLK_SET_RATE_PARENT) { if (f->pre_div) { + if (!rate) + rate = req->rate; rate /= 2; rate *= f->pre_div + 1; } diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c index 28ceaf1e9937..ae9352f7706d 100644 --- a/drivers/clk/qcom/common.c +++ b/drivers/clk/qcom/common.c @@ -37,6 +37,9 @@ struct freq_tbl *qcom_find_freq(const struct freq_tbl *f, unsigned long rate) if (!f) return NULL; + if (!f->freq) + return f; + for (; f->freq; f++) if (rate <= f->freq) return f; diff --git a/drivers/clk/qcom/gcc-msm8996.c b/drivers/clk/qcom/gcc-msm8996.c index 7ddec886fcd3..c0b043b1bd24 100644 --- a/drivers/clk/qcom/gcc-msm8996.c +++ b/drivers/clk/qcom/gcc-msm8996.c @@ -140,22 +140,6 @@ static const char * const gcc_xo_gpll0_gpll4_gpll0_early_div[] = { "gpll0_early_div" }; -static const struct parent_map gcc_xo_gpll0_gpll2_gpll3_gpll0_early_div_map[] = { - { P_XO, 0 }, - { P_GPLL0, 1 }, - { P_GPLL2, 2 }, - { P_GPLL3, 3 }, - { P_GPLL0_EARLY_DIV, 6 } -}; - -static const char * const gcc_xo_gpll0_gpll2_gpll3_gpll0_early_div[] = { - "xo", - "gpll0", - "gpll2", - "gpll3", - "gpll0_early_div" -}; - static const struct parent_map gcc_xo_gpll0_gpll1_early_div_gpll1_gpll4_gpll0_early_div_map[] = { { P_XO, 0 }, { P_GPLL0, 1 }, @@ -194,26 +178,6 @@ static const char * const gcc_xo_gpll0_gpll2_gpll3_gpll1_gpll2_early_gpll0_early "gpll0_early_div" }; -static const struct parent_map gcc_xo_gpll0_gpll2_gpll3_gpll1_gpll4_gpll0_early_div_map[] = { - { P_XO, 0 }, - { P_GPLL0, 1 }, - { P_GPLL2, 2 }, - { P_GPLL3, 3 }, - { P_GPLL1, 4 }, - { P_GPLL4, 5 }, - { P_GPLL0_EARLY_DIV, 6 } -}; - -static const char * const gcc_xo_gpll0_gpll2_gpll3_gpll1_gpll4_gpll0_early_div[] = { - "xo", - "gpll0", - "gpll2", - "gpll3", - "gpll1", - "gpll4", - "gpll0_early_div" -}; - static struct clk_fixed_factor xo = { .mult = 1, .div = 1, diff --git a/drivers/clk/renesas/r8a77995-cpg-mssr.c b/drivers/clk/renesas/r8a77995-cpg-mssr.c index e594cf8ee63b..8434d5530fb1 100644 --- a/drivers/clk/renesas/r8a77995-cpg-mssr.c +++ b/drivers/clk/renesas/r8a77995-cpg-mssr.c @@ -141,8 +141,8 @@ static const struct mssr_mod_clk r8a77995_mod_clks[] __initconst = { DEF_MOD("vspbs", 627, R8A77995_CLK_S0D1), DEF_MOD("ehci0", 703, R8A77995_CLK_S3D2), DEF_MOD("hsusb", 704, R8A77995_CLK_S3D2), - DEF_MOD("du1", 723, R8A77995_CLK_S2D1), - DEF_MOD("du0", 724, R8A77995_CLK_S2D1), + DEF_MOD("du1", 723, R8A77995_CLK_S1D1), + DEF_MOD("du0", 724, R8A77995_CLK_S1D1), DEF_MOD("lvds", 727, R8A77995_CLK_S2D1), DEF_MOD("vin7", 804, R8A77995_CLK_S1D2), DEF_MOD("vin6", 805, R8A77995_CLK_S1D2), diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c index 2b0d772b4f43..8cdfcd77e3ad 100644 --- a/drivers/clk/rockchip/clk-rk3188.c +++ b/drivers/clk/rockchip/clk-rk3188.c @@ -362,8 +362,8 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { RK2928_CLKGATE_CON(2), 5, GFLAGS), MUX(SCLK_MAC, "sclk_macref", mux_sclk_macref_p, CLK_SET_RATE_PARENT, RK2928_CLKSEL_CON(21), 4, 1, MFLAGS), - GATE(0, "sclk_mac_lbtest", "sclk_macref", - RK2928_CLKGATE_CON(2), 12, 0, GFLAGS), + GATE(0, "sclk_mac_lbtest", "sclk_macref", 0, + RK2928_CLKGATE_CON(2), 12, GFLAGS), COMPOSITE(0, "hsadc_src", mux_pll_src_gpll_cpll_p, 0, RK2928_CLKSEL_CON(22), 0, 1, MFLAGS, 8, 8, DFLAGS, @@ -391,8 +391,8 @@ static struct rockchip_clk_branch common_clk_branches[] __initdata = { * Clock-Architecture Diagram 4 */ - GATE(SCLK_SMC, "sclk_smc", "hclk_peri", - RK2928_CLKGATE_CON(2), 4, 0, GFLAGS), + GATE(SCLK_SMC, "sclk_smc", "hclk_peri", 0, + RK2928_CLKGATE_CON(2), 4, GFLAGS), COMPOSITE_NOMUX(SCLK_SPI0, "sclk_spi0", "pclk_peri", 0, RK2928_CLKSEL_CON(25), 0, 7, DFLAGS, diff --git a/drivers/clk/rockchip/clk-rk3328.c b/drivers/clk/rockchip/clk-rk3328.c index 33d1cf4e6d80..0e5222d1944b 100644 --- a/drivers/clk/rockchip/clk-rk3328.c +++ b/drivers/clk/rockchip/clk-rk3328.c @@ -392,7 +392,7 @@ static struct rockchip_clk_branch rk3328_clk_branches[] __initdata = { RK3328_CLKGATE_CON(1), 5, GFLAGS, &rk3328_i2s1_fracmux), GATE(SCLK_I2S1, "clk_i2s1", "i2s1_pre", CLK_SET_RATE_PARENT, - RK3328_CLKGATE_CON(0), 6, GFLAGS), + RK3328_CLKGATE_CON(1), 6, GFLAGS), COMPOSITE_NODIV(SCLK_I2S1_OUT, "i2s1_out", mux_i2s1out_p, 0, RK3328_CLKSEL_CON(8), 12, 1, MFLAGS, RK3328_CLKGATE_CON(1), 7, GFLAGS), diff --git a/drivers/clk/samsung/clk-cpu.c b/drivers/clk/samsung/clk-cpu.c index 6686e8ba61f9..82f023f29a61 100644 --- a/drivers/clk/samsung/clk-cpu.c +++ b/drivers/clk/samsung/clk-cpu.c @@ -152,7 +152,7 @@ static int exynos_cpuclk_pre_rate_change(struct clk_notifier_data *ndata, struct exynos_cpuclk *cpuclk, void __iomem *base) { const struct exynos_cpuclk_cfg_data *cfg_data = cpuclk->cfg; - unsigned long alt_prate = clk_get_rate(cpuclk->alt_parent); + unsigned long alt_prate = clk_hw_get_rate(cpuclk->alt_parent); unsigned long alt_div = 0, alt_div_mask = DIV_MASK; unsigned long div0, div1 = 0, mux_reg; unsigned long flags; @@ -280,7 +280,7 @@ static int exynos5433_cpuclk_pre_rate_change(struct clk_notifier_data *ndata, struct exynos_cpuclk *cpuclk, void __iomem *base) { const struct exynos_cpuclk_cfg_data *cfg_data = cpuclk->cfg; - unsigned long alt_prate = clk_get_rate(cpuclk->alt_parent); + unsigned long alt_prate = clk_hw_get_rate(cpuclk->alt_parent); unsigned long alt_div = 0, alt_div_mask = DIV_MASK; unsigned long div0, div1 = 0, mux_reg; unsigned long flags; @@ -432,7 +432,7 @@ int __init exynos_register_cpu_clock(struct samsung_clk_provider *ctx, else cpuclk->clk_nb.notifier_call = exynos_cpuclk_notifier_cb; - cpuclk->alt_parent = __clk_lookup(alt_parent); + cpuclk->alt_parent = __clk_get_hw(__clk_lookup(alt_parent)); if (!cpuclk->alt_parent) { pr_err("%s: could not lookup alternate parent %s\n", __func__, alt_parent); diff --git a/drivers/clk/samsung/clk-cpu.h b/drivers/clk/samsung/clk-cpu.h index d4b6b517fe1b..bd38c6aa3897 100644 --- a/drivers/clk/samsung/clk-cpu.h +++ b/drivers/clk/samsung/clk-cpu.h @@ -49,7 +49,7 @@ struct exynos_cpuclk_cfg_data { */ struct exynos_cpuclk { struct clk_hw hw; - struct clk *alt_parent; + struct clk_hw *alt_parent; void __iomem *ctrl_base; spinlock_t *lock; const struct exynos_cpuclk_cfg_data *cfg; diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c index d8d3cb67b402..3d3026221927 100644 --- a/drivers/clk/samsung/clk-exynos4.c +++ b/drivers/clk/samsung/clk-exynos4.c @@ -1240,6 +1240,7 @@ static unsigned long __init exynos4_get_xom(void) xom = readl(chipid_base + 8); iounmap(chipid_base); + of_node_put(np); } return xom; diff --git a/drivers/clk/samsung/clk-exynos5420.c b/drivers/clk/samsung/clk-exynos5420.c index 500a55415e90..2f54df5bef8e 100644 --- a/drivers/clk/samsung/clk-exynos5420.c +++ b/drivers/clk/samsung/clk-exynos5420.c @@ -170,12 +170,20 @@ static const unsigned long exynos5x_clk_regs[] __initconst = { GATE_BUS_CPU, GATE_SCLK_CPU, CLKOUT_CMU_CPU, + APLL_CON0, + KPLL_CON0, + CPLL_CON0, + DPLL_CON0, EPLL_CON0, EPLL_CON1, EPLL_CON2, RPLL_CON0, RPLL_CON1, RPLL_CON2, + IPLL_CON0, + SPLL_CON0, + VPLL_CON0, + MPLL_CON0, SRC_TOP0, SRC_TOP1, SRC_TOP2, @@ -633,6 +641,7 @@ static const struct samsung_div_clock exynos5420_div_clks[] __initconst = { }; static const struct samsung_gate_clock exynos5420_gate_clks[] __initconst = { + GATE(CLK_SECKEY, "seckey", "aclk66_psgen", GATE_BUS_PERIS1, 1, 0, 0), GATE(CLK_MAU_EPLL, "mau_epll", "mout_mau_epll_clk", SRC_MASK_TOP7, 20, CLK_SET_RATE_PARENT, 0), }; @@ -1167,8 +1176,6 @@ static const struct samsung_gate_clock exynos5x_gate_clks[] __initconst = { GATE(CLK_TMU, "tmu", "aclk66_psgen", GATE_IP_PERIS, 21, 0, 0), GATE(CLK_TMU_GPU, "tmu_gpu", "aclk66_psgen", GATE_IP_PERIS, 22, 0, 0), - GATE(CLK_SECKEY, "seckey", "aclk66_psgen", GATE_BUS_PERIS1, 1, 0, 0), - /* GEN Block */ GATE(CLK_ROTATOR, "rotator", "mout_user_aclk266", GATE_IP_GEN, 1, 0, 0), GATE(CLK_JPEG, "jpeg", "aclk300_jpeg", GATE_IP_GEN, 2, 0, 0), diff --git a/drivers/clk/socfpga/clk-pll-a10.c b/drivers/clk/socfpga/clk-pll-a10.c index 35fabe1a32c3..269467e8e07e 100644 --- a/drivers/clk/socfpga/clk-pll-a10.c +++ b/drivers/clk/socfpga/clk-pll-a10.c @@ -95,6 +95,7 @@ static struct clk * __init __socfpga_pll_init(struct device_node *node, clkmgr_np = of_find_compatible_node(NULL, NULL, "altr,clk-mgr"); clk_mgr_a10_base_addr = of_iomap(clkmgr_np, 0); + of_node_put(clkmgr_np); BUG_ON(!clk_mgr_a10_base_addr); pll_clk->hw.reg = clk_mgr_a10_base_addr + reg; diff --git a/drivers/clk/socfpga/clk-pll.c b/drivers/clk/socfpga/clk-pll.c index c7f463172e4b..b4b44e9b5901 100644 --- a/drivers/clk/socfpga/clk-pll.c +++ b/drivers/clk/socfpga/clk-pll.c @@ -100,6 +100,7 @@ static __init struct clk *__socfpga_pll_init(struct device_node *node, clkmgr_np = of_find_compatible_node(NULL, NULL, "altr,clk-mgr"); clk_mgr_base_addr = of_iomap(clkmgr_np, 0); + of_node_put(clkmgr_np); BUG_ON(!clk_mgr_base_addr); pll_clk->hw.reg = clk_mgr_base_addr + reg; diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c index 2bb4cabf802f..183985c8c9ba 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-a64.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-a64.c @@ -158,7 +158,12 @@ static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_gpu_clk, "pll-gpu", #define SUN50I_A64_PLL_MIPI_REG 0x040 static struct ccu_nkm pll_mipi_clk = { - .enable = BIT(31), + /* + * The bit 23 and 22 are called "LDO{1,2}_EN" on the SoC's + * user manual, and by experiments the PLL doesn't work without + * these bits toggled. + */ + .enable = BIT(31) | BIT(23) | BIT(22), .lock = BIT(28), .n = _SUNXI_CCU_MULT(8, 4), .k = _SUNXI_CCU_MULT_MIN(4, 2, 2), @@ -560,7 +565,7 @@ static const char * const dsi_dphy_parents[] = { "pll-video0", "pll-periph0" }; static const u8 dsi_dphy_table[] = { 0, 2, }; static SUNXI_CCU_M_WITH_MUX_TABLE_GATE(dsi_dphy_clk, "dsi-dphy", dsi_dphy_parents, dsi_dphy_table, - 0x168, 0, 4, 8, 2, BIT(31), CLK_SET_RATE_PARENT); + 0x168, 0, 4, 8, 2, BIT(15), CLK_SET_RATE_PARENT); static SUNXI_CCU_M_WITH_GATE(gpu_clk, "gpu", "pll-gpu", 0x1a0, 0, 3, BIT(31), CLK_SET_RATE_PARENT); @@ -879,11 +884,26 @@ static const struct sunxi_ccu_desc sun50i_a64_ccu_desc = { .num_resets = ARRAY_SIZE(sun50i_a64_ccu_resets), }; +static struct ccu_pll_nb sun50i_a64_pll_cpu_nb = { + .common = &pll_cpux_clk.common, + /* copy from pll_cpux_clk */ + .enable = BIT(31), + .lock = BIT(28), +}; + +static struct ccu_mux_nb sun50i_a64_cpu_nb = { + .common = &cpux_clk.common, + .cm = &cpux_clk.mux, + .delay_us = 1, /* > 8 clock cycles at 24 MHz */ + .bypass_index = 1, /* index of 24 MHz oscillator */ +}; + static int sun50i_a64_ccu_probe(struct platform_device *pdev) { struct resource *res; void __iomem *reg; u32 val; + int ret; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); reg = devm_ioremap_resource(&pdev->dev, res); @@ -897,7 +917,18 @@ static int sun50i_a64_ccu_probe(struct platform_device *pdev) writel(0x515, reg + SUN50I_A64_PLL_MIPI_REG); - return sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a64_ccu_desc); + ret = sunxi_ccu_probe(pdev->dev.of_node, reg, &sun50i_a64_ccu_desc); + if (ret) + return ret; + + /* Gate then ungate PLL CPU after any rate changes */ + ccu_pll_notifier_register(&sun50i_a64_pll_cpu_nb); + + /* Reparent CPU during PLL CPU rate changes */ + ccu_mux_notifier_register(pll_cpux_clk.common.hw.clk, + &sun50i_a64_cpu_nb); + + return 0; } static const struct of_device_id sun50i_a64_ccu_ids[] = { diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-a23.c b/drivers/clk/sunxi-ng/ccu-sun8i-a23.c index d93b452f0df9..1cef040ebe82 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-a23.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-a23.c @@ -132,7 +132,7 @@ static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_mipi_clk, "pll-mipi", 8, 4, /* N */ 4, 2, /* K */ 0, 4, /* M */ - BIT(31), /* gate */ + BIT(31) | BIT(23) | BIT(22), /* gate */ BIT(28), /* lock */ CLK_SET_RATE_UNGATE); diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c index 1729ff6a5aae..b09acda71abe 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-h3.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-h3.c @@ -460,7 +460,7 @@ static const char * const csi_sclk_parents[] = { "pll-periph0", "pll-periph1" }; static SUNXI_CCU_M_WITH_MUX_GATE(csi_sclk_clk, "csi-sclk", csi_sclk_parents, 0x134, 16, 4, 24, 3, BIT(31), 0); -static const char * const csi_mclk_parents[] = { "osc24M", "pll-video", "pll-periph0" }; +static const char * const csi_mclk_parents[] = { "osc24M", "pll-video", "pll-periph1" }; static SUNXI_CCU_M_WITH_MUX_GATE(csi_mclk_clk, "csi-mclk", csi_mclk_parents, 0x134, 0, 5, 8, 3, BIT(15), 0); diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c index 9e3f4088724b..c7f9d974b10d 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c +++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.c @@ -84,7 +84,7 @@ static SUNXI_CCU_NM_WITH_FRAC_GATE_LOCK(pll_ve_clk, "pll-ve", BIT(28), /* lock */ 0); -static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_ddr_clk, "pll-ddr", +static SUNXI_CCU_NKM_WITH_GATE_LOCK(pll_ddr0_clk, "pll-ddr0", "osc24M", 0x020, 8, 5, /* N */ 4, 2, /* K */ @@ -123,6 +123,14 @@ static SUNXI_CCU_NK_WITH_GATE_LOCK_POSTDIV(pll_periph1_clk, "pll-periph1", 2, /* post-div */ 0); +static SUNXI_CCU_NM_WITH_GATE_LOCK(pll_ddr1_clk, "pll-ddr1", + "osc24M", 0x04c, + 8, 7, /* N */ + 0, 2, /* M */ + BIT(31), /* gate */ + BIT(28), /* lock */ + 0); + static const char * const cpu_parents[] = { "osc32k", "osc24M", "pll-cpu", "pll-cpu" }; static SUNXI_CCU_MUX(cpu_clk, "cpu", cpu_parents, @@ -310,7 +318,8 @@ static SUNXI_CCU_GATE(usb_phy0_clk, "usb-phy0", "osc24M", static SUNXI_CCU_GATE(usb_ohci0_clk, "usb-ohci0", "osc24M", 0x0cc, BIT(16), 0); -static const char * const dram_parents[] = { "pll-ddr", "pll-periph0-2x" }; +static const char * const dram_parents[] = { "pll-ddr0", "pll-ddr1", + "pll-periph0-2x" }; static SUNXI_CCU_M_WITH_MUX(dram_clk, "dram", dram_parents, 0x0f4, 0, 4, 20, 2, CLK_IS_CRITICAL); @@ -369,10 +378,11 @@ static struct ccu_common *sun8i_v3s_ccu_clks[] = { &pll_audio_base_clk.common, &pll_video_clk.common, &pll_ve_clk.common, - &pll_ddr_clk.common, + &pll_ddr0_clk.common, &pll_periph0_clk.common, &pll_isp_clk.common, &pll_periph1_clk.common, + &pll_ddr1_clk.common, &cpu_clk.common, &axi_clk.common, &ahb1_clk.common, @@ -457,11 +467,12 @@ static struct clk_hw_onecell_data sun8i_v3s_hw_clks = { [CLK_PLL_AUDIO_8X] = &pll_audio_8x_clk.hw, [CLK_PLL_VIDEO] = &pll_video_clk.common.hw, [CLK_PLL_VE] = &pll_ve_clk.common.hw, - [CLK_PLL_DDR] = &pll_ddr_clk.common.hw, + [CLK_PLL_DDR0] = &pll_ddr0_clk.common.hw, [CLK_PLL_PERIPH0] = &pll_periph0_clk.common.hw, [CLK_PLL_PERIPH0_2X] = &pll_periph0_2x_clk.hw, [CLK_PLL_ISP] = &pll_isp_clk.common.hw, [CLK_PLL_PERIPH1] = &pll_periph1_clk.common.hw, + [CLK_PLL_DDR1] = &pll_ddr1_clk.common.hw, [CLK_CPU] = &cpu_clk.common.hw, [CLK_AXI] = &axi_clk.common.hw, [CLK_AHB1] = &ahb1_clk.common.hw, diff --git a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h index 4a4d36fdad96..a091b7217dfd 100644 --- a/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h +++ b/drivers/clk/sunxi-ng/ccu-sun8i-v3s.h @@ -29,7 +29,7 @@ #define CLK_PLL_AUDIO_8X 5 #define CLK_PLL_VIDEO 6 #define CLK_PLL_VE 7 -#define CLK_PLL_DDR 8 +#define CLK_PLL_DDR0 8 #define CLK_PLL_PERIPH0 9 #define CLK_PLL_PERIPH0_2X 10 #define CLK_PLL_ISP 11 @@ -58,6 +58,8 @@ /* And the GPU module clock is exported */ -#define CLK_NUMBER (CLK_MIPI_CSI + 1) +#define CLK_PLL_DDR1 74 + +#define CLK_NUMBER (CLK_PLL_DDR1 + 1) #endif /* _CCU_SUN8I_H3_H_ */ diff --git a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c index 8936ef87652c..c14bf782b2b3 100644 --- a/drivers/clk/sunxi-ng/ccu-sun9i-a80.c +++ b/drivers/clk/sunxi-ng/ccu-sun9i-a80.c @@ -1231,7 +1231,7 @@ static int sun9i_a80_ccu_probe(struct platform_device *pdev) /* Enforce d1 = 0, d2 = 0 for Audio PLL */ val = readl(reg + SUN9I_A80_PLL_AUDIO_REG); - val &= (BIT(16) & BIT(18)); + val &= ~(BIT(16) | BIT(18)); writel(val, reg + SUN9I_A80_PLL_AUDIO_REG); /* Enforce P = 1 for both CPU cluster PLLs */ diff --git a/drivers/clk/tegra/clk-tegra-periph.c b/drivers/clk/tegra/clk-tegra-periph.c index 848255cc0209..d300a256fcac 100644 --- a/drivers/clk/tegra/clk-tegra-periph.c +++ b/drivers/clk/tegra/clk-tegra-periph.c @@ -825,7 +825,11 @@ static struct tegra_periph_init_data gate_clks[] = { GATE("vcp", "clk_m", 29, 0, tegra_clk_vcp, 0), GATE("apbdma", "clk_m", 34, 0, tegra_clk_apbdma, 0), GATE("kbc", "clk_32k", 36, TEGRA_PERIPH_ON_APB | TEGRA_PERIPH_NO_RESET, tegra_clk_kbc, 0), - GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, 0), + /* + * Critical for RAM re-repair operation, which must occur on resume + * from LP1 system suspend and as part of CCPLEX cluster switching. + */ + GATE("fuse", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse, CLK_IS_CRITICAL), GATE("fuse_burn", "clk_m", 39, TEGRA_PERIPH_ON_APB, tegra_clk_fuse_burn, 0), GATE("kfuse", "clk_m", 40, TEGRA_PERIPH_ON_APB, tegra_clk_kfuse, 0), GATE("apbif", "clk_m", 107, TEGRA_PERIPH_ON_APB, tegra_clk_apbif, 0), diff --git a/drivers/clk/ti/clk-dra7-atl.c b/drivers/clk/ti/clk-dra7-atl.c index 148815470431..beb672a215b6 100644 --- a/drivers/clk/ti/clk-dra7-atl.c +++ b/drivers/clk/ti/clk-dra7-atl.c @@ -174,7 +174,6 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node) struct clk_init_data init = { NULL }; const char **parent_names = NULL; struct clk *clk; - int ret; clk_hw = kzalloc(sizeof(*clk_hw), GFP_KERNEL); if (!clk_hw) { @@ -207,11 +206,6 @@ static void __init of_dra7_atl_clock_setup(struct device_node *node) clk = ti_clk_register(NULL, &clk_hw->hw, node->name); if (!IS_ERR(clk)) { - ret = ti_clk_add_alias(NULL, clk, node->name); - if (ret) { - clk_unregister(clk); - goto cleanup; - } of_clk_add_provider(node, of_clk_src_simple_get, clk); kfree(parent_names); return; diff --git a/drivers/clocksource/asm9260_timer.c b/drivers/clocksource/asm9260_timer.c index 38cd2feb87c4..0ce760776406 100644 --- a/drivers/clocksource/asm9260_timer.c +++ b/drivers/clocksource/asm9260_timer.c @@ -198,6 +198,10 @@ static int __init asm9260_timer_init(struct device_node *np) } clk = of_clk_get(np, 0); + if (IS_ERR(clk)) { + pr_err("Failed to get clk!\n"); + return PTR_ERR(clk); + } ret = clk_prepare_enable(clk); if (ret) { diff --git a/drivers/clocksource/bcm2835_timer.c b/drivers/clocksource/bcm2835_timer.c index 39e489a96ad7..8894cfc32be0 100644 --- a/drivers/clocksource/bcm2835_timer.c +++ b/drivers/clocksource/bcm2835_timer.c @@ -134,7 +134,7 @@ static int __init bcm2835_timer_init(struct device_node *node) ret = setup_irq(irq, &timer->act); if (ret) { pr_err("Can't set up timer IRQ\n"); - goto err_iounmap; + goto err_timer_free; } clockevents_config_and_register(&timer->evt, freq, 0xf, 0xffffffff); @@ -143,6 +143,9 @@ static int __init bcm2835_timer_init(struct device_node *node) return 0; +err_timer_free: + kfree(timer); + err_iounmap: iounmap(base); return ret; diff --git a/drivers/clocksource/exynos_mct.c b/drivers/clocksource/exynos_mct.c index aaf5bfa9bd9c..e3ae041ac30e 100644 --- a/drivers/clocksource/exynos_mct.c +++ b/drivers/clocksource/exynos_mct.c @@ -563,7 +563,19 @@ static int __init exynos4_timer_resources(struct device_node *np, void __iomem * return 0; out_irq: - free_percpu_irq(mct_irqs[MCT_L0_IRQ], &percpu_mct_tick); + if (mct_int_type == MCT_INT_PPI) { + free_percpu_irq(mct_irqs[MCT_L0_IRQ], &percpu_mct_tick); + } else { + for_each_possible_cpu(cpu) { + struct mct_clock_event_device *pcpu_mevt = + per_cpu_ptr(&percpu_mct_tick, cpu); + + if (pcpu_mevt->evt.irq != -1) { + free_irq(pcpu_mevt->evt.irq, pcpu_mevt); + pcpu_mevt->evt.irq = -1; + } + } + } return err; } diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index e09e8bf0bb9b..3cd62f7c33e3 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -75,18 +75,17 @@ struct sh_cmt_info { enum sh_cmt_model model; unsigned long width; /* 16 or 32 bit version of hardware block */ - unsigned long overflow_bit; - unsigned long clear_bits; + u32 overflow_bit; + u32 clear_bits; /* callbacks for CMSTR and CMCSR access */ - unsigned long (*read_control)(void __iomem *base, unsigned long offs); + u32 (*read_control)(void __iomem *base, unsigned long offs); void (*write_control)(void __iomem *base, unsigned long offs, - unsigned long value); + u32 value); /* callbacks for CMCNT and CMCOR access */ - unsigned long (*read_count)(void __iomem *base, unsigned long offs); - void (*write_count)(void __iomem *base, unsigned long offs, - unsigned long value); + u32 (*read_count)(void __iomem *base, unsigned long offs); + void (*write_count)(void __iomem *base, unsigned long offs, u32 value); }; struct sh_cmt_channel { @@ -100,13 +99,13 @@ struct sh_cmt_channel { unsigned int timer_bit; unsigned long flags; - unsigned long match_value; - unsigned long next_match_value; - unsigned long max_match_value; + u32 match_value; + u32 next_match_value; + u32 max_match_value; raw_spinlock_t lock; struct clock_event_device ced; struct clocksource cs; - unsigned long total_cycles; + u64 total_cycles; bool cs_enabled; }; @@ -157,24 +156,22 @@ struct sh_cmt_device { #define SH_CMT32_CMCSR_CKS_RCLK1 (7 << 0) #define SH_CMT32_CMCSR_CKS_MASK (7 << 0) -static unsigned long sh_cmt_read16(void __iomem *base, unsigned long offs) +static u32 sh_cmt_read16(void __iomem *base, unsigned long offs) { return ioread16(base + (offs << 1)); } -static unsigned long sh_cmt_read32(void __iomem *base, unsigned long offs) +static u32 sh_cmt_read32(void __iomem *base, unsigned long offs) { return ioread32(base + (offs << 2)); } -static void sh_cmt_write16(void __iomem *base, unsigned long offs, - unsigned long value) +static void sh_cmt_write16(void __iomem *base, unsigned long offs, u32 value) { iowrite16(value, base + (offs << 1)); } -static void sh_cmt_write32(void __iomem *base, unsigned long offs, - unsigned long value) +static void sh_cmt_write32(void __iomem *base, unsigned long offs, u32 value) { iowrite32(value, base + (offs << 2)); } @@ -236,7 +233,7 @@ static const struct sh_cmt_info sh_cmt_info[] = { #define CMCNT 1 /* channel register */ #define CMCOR 2 /* channel register */ -static inline unsigned long sh_cmt_read_cmstr(struct sh_cmt_channel *ch) +static inline u32 sh_cmt_read_cmstr(struct sh_cmt_channel *ch) { if (ch->iostart) return ch->cmt->info->read_control(ch->iostart, 0); @@ -244,8 +241,7 @@ static inline unsigned long sh_cmt_read_cmstr(struct sh_cmt_channel *ch) return ch->cmt->info->read_control(ch->cmt->mapbase, 0); } -static inline void sh_cmt_write_cmstr(struct sh_cmt_channel *ch, - unsigned long value) +static inline void sh_cmt_write_cmstr(struct sh_cmt_channel *ch, u32 value) { if (ch->iostart) ch->cmt->info->write_control(ch->iostart, 0, value); @@ -253,39 +249,35 @@ static inline void sh_cmt_write_cmstr(struct sh_cmt_channel *ch, ch->cmt->info->write_control(ch->cmt->mapbase, 0, value); } -static inline unsigned long sh_cmt_read_cmcsr(struct sh_cmt_channel *ch) +static inline u32 sh_cmt_read_cmcsr(struct sh_cmt_channel *ch) { return ch->cmt->info->read_control(ch->ioctrl, CMCSR); } -static inline void sh_cmt_write_cmcsr(struct sh_cmt_channel *ch, - unsigned long value) +static inline void sh_cmt_write_cmcsr(struct sh_cmt_channel *ch, u32 value) { ch->cmt->info->write_control(ch->ioctrl, CMCSR, value); } -static inline unsigned long sh_cmt_read_cmcnt(struct sh_cmt_channel *ch) +static inline u32 sh_cmt_read_cmcnt(struct sh_cmt_channel *ch) { return ch->cmt->info->read_count(ch->ioctrl, CMCNT); } -static inline void sh_cmt_write_cmcnt(struct sh_cmt_channel *ch, - unsigned long value) +static inline void sh_cmt_write_cmcnt(struct sh_cmt_channel *ch, u32 value) { ch->cmt->info->write_count(ch->ioctrl, CMCNT, value); } -static inline void sh_cmt_write_cmcor(struct sh_cmt_channel *ch, - unsigned long value) +static inline void sh_cmt_write_cmcor(struct sh_cmt_channel *ch, u32 value) { ch->cmt->info->write_count(ch->ioctrl, CMCOR, value); } -static unsigned long sh_cmt_get_counter(struct sh_cmt_channel *ch, - int *has_wrapped) +static u32 sh_cmt_get_counter(struct sh_cmt_channel *ch, u32 *has_wrapped) { - unsigned long v1, v2, v3; - int o1, o2; + u32 v1, v2, v3; + u32 o1, o2; o1 = sh_cmt_read_cmcsr(ch) & ch->cmt->info->overflow_bit; @@ -305,7 +297,8 @@ static unsigned long sh_cmt_get_counter(struct sh_cmt_channel *ch, static void sh_cmt_start_stop_ch(struct sh_cmt_channel *ch, int start) { - unsigned long flags, value; + unsigned long flags; + u32 value; /* start stop register shared by multiple timer channels */ raw_spin_lock_irqsave(&ch->cmt->lock, flags); @@ -412,11 +405,11 @@ static void sh_cmt_disable(struct sh_cmt_channel *ch) static void sh_cmt_clock_event_program_verify(struct sh_cmt_channel *ch, int absolute) { - unsigned long new_match; - unsigned long value = ch->next_match_value; - unsigned long delay = 0; - unsigned long now = 0; - int has_wrapped; + u32 value = ch->next_match_value; + u32 new_match; + u32 delay = 0; + u32 now = 0; + u32 has_wrapped; now = sh_cmt_get_counter(ch, &has_wrapped); ch->flags |= FLAG_REPROGRAM; /* force reprogram */ @@ -613,9 +606,10 @@ static struct sh_cmt_channel *cs_to_sh_cmt(struct clocksource *cs) static u64 sh_cmt_clocksource_read(struct clocksource *cs) { struct sh_cmt_channel *ch = cs_to_sh_cmt(cs); - unsigned long flags, raw; - unsigned long value; - int has_wrapped; + unsigned long flags; + u32 has_wrapped; + u64 value; + u32 raw; raw_spin_lock_irqsave(&ch->lock, flags); value = ch->total_cycles; @@ -688,7 +682,7 @@ static int sh_cmt_register_clocksource(struct sh_cmt_channel *ch, cs->disable = sh_cmt_clocksource_disable; cs->suspend = sh_cmt_clocksource_suspend; cs->resume = sh_cmt_clocksource_resume; - cs->mask = CLOCKSOURCE_MASK(sizeof(unsigned long) * 8); + cs->mask = CLOCKSOURCE_MASK(sizeof(u64) * 8); cs->flags = CLOCK_SOURCE_IS_CONTINUOUS; dev_info(&ch->cmt->pdev->dev, "ch%u: used as clock source\n", diff --git a/drivers/clocksource/timer-fttmr010.c b/drivers/clocksource/timer-fttmr010.c index cdfe1c82f3f0..3928f3999015 100644 --- a/drivers/clocksource/timer-fttmr010.c +++ b/drivers/clocksource/timer-fttmr010.c @@ -21,7 +21,7 @@ #include /* - * Register definitions for the timers + * Register definitions common for all the timer variants. */ #define TIMER1_COUNT (0x00) #define TIMER1_LOAD (0x04) @@ -36,9 +36,10 @@ #define TIMER3_MATCH1 (0x28) #define TIMER3_MATCH2 (0x2c) #define TIMER_CR (0x30) -#define TIMER_INTR_STATE (0x34) -#define TIMER_INTR_MASK (0x38) +/* + * Control register (TMC30) bit fields for fttmr010/gemini/moxart timers. + */ #define TIMER_1_CR_ENABLE BIT(0) #define TIMER_1_CR_CLOCK BIT(1) #define TIMER_1_CR_INT BIT(2) @@ -53,8 +54,9 @@ #define TIMER_3_CR_UPDOWN BIT(11) /* - * The Aspeed AST2400 moves bits around in the control register - * and lacks bits for setting the timer to count upwards. + * Control register (TMC30) bit fields for aspeed ast2400/ast2500 timers. + * The aspeed timers move bits around in the control register and lacks + * bits for setting the timer to count upwards. */ #define TIMER_1_CR_ASPEED_ENABLE BIT(0) #define TIMER_1_CR_ASPEED_CLOCK BIT(1) @@ -66,6 +68,18 @@ #define TIMER_3_CR_ASPEED_CLOCK BIT(9) #define TIMER_3_CR_ASPEED_INT BIT(10) +/* + * Interrupt status/mask register definitions for fttmr010/gemini/moxart + * timers. + * The registers don't exist and they are not needed on aspeed timers + * because: + * - aspeed timer overflow interrupt is controlled by bits in Control + * Register (TMC30). + * - aspeed timers always generate interrupt when either one of the + * Match registers equals to Status register. + */ +#define TIMER_INTR_STATE (0x34) +#define TIMER_INTR_MASK (0x38) #define TIMER_1_INT_MATCH1 BIT(0) #define TIMER_1_INT_MATCH2 BIT(1) #define TIMER_1_INT_OVERFLOW BIT(2) @@ -80,7 +94,7 @@ struct fttmr010 { void __iomem *base; unsigned int tick_rate; - bool count_down; + bool is_aspeed; u32 t1_enable_val; struct clock_event_device clkevt; #ifdef CONFIG_ARM @@ -130,7 +144,7 @@ static int fttmr010_timer_set_next_event(unsigned long cycles, cr &= ~fttmr010->t1_enable_val; writel(cr, fttmr010->base + TIMER_CR); - if (fttmr010->count_down) { + if (fttmr010->is_aspeed) { /* * ASPEED Timer Controller will load TIMER1_LOAD register * into TIMER1_COUNT register when the timer is re-enabled. @@ -175,16 +189,17 @@ static int fttmr010_timer_set_oneshot(struct clock_event_device *evt) /* Setup counter start from 0 or ~0 */ writel(0, fttmr010->base + TIMER1_COUNT); - if (fttmr010->count_down) + if (fttmr010->is_aspeed) { writel(~0, fttmr010->base + TIMER1_LOAD); - else + } else { writel(0, fttmr010->base + TIMER1_LOAD); - /* Enable interrupt */ - cr = readl(fttmr010->base + TIMER_INTR_MASK); - cr &= ~(TIMER_1_INT_OVERFLOW | TIMER_1_INT_MATCH2); - cr |= TIMER_1_INT_MATCH1; - writel(cr, fttmr010->base + TIMER_INTR_MASK); + /* Enable interrupt */ + cr = readl(fttmr010->base + TIMER_INTR_MASK); + cr &= ~(TIMER_1_INT_OVERFLOW | TIMER_1_INT_MATCH2); + cr |= TIMER_1_INT_MATCH1; + writel(cr, fttmr010->base + TIMER_INTR_MASK); + } return 0; } @@ -201,9 +216,8 @@ static int fttmr010_timer_set_periodic(struct clock_event_device *evt) writel(cr, fttmr010->base + TIMER_CR); /* Setup timer to fire at 1/HZ intervals. */ - if (fttmr010->count_down) { + if (fttmr010->is_aspeed) { writel(period, fttmr010->base + TIMER1_LOAD); - writel(0, fttmr010->base + TIMER1_MATCH1); } else { cr = 0xffffffff - (period - 1); writel(cr, fttmr010->base + TIMER1_COUNT); @@ -281,23 +295,21 @@ static int __init fttmr010_common_init(struct device_node *np, bool is_aspeed) } /* - * The Aspeed AST2400 moves bits around in the control register, - * otherwise it works the same. + * The Aspeed timers move bits around in the control register. */ if (is_aspeed) { fttmr010->t1_enable_val = TIMER_1_CR_ASPEED_ENABLE | TIMER_1_CR_ASPEED_INT; - /* Downward not available */ - fttmr010->count_down = true; + fttmr010->is_aspeed = true; } else { fttmr010->t1_enable_val = TIMER_1_CR_ENABLE | TIMER_1_CR_INT; - } - /* - * Reset the interrupt mask and status - */ - writel(TIMER_INT_ALL_MASK, fttmr010->base + TIMER_INTR_MASK); - writel(0, fttmr010->base + TIMER_INTR_STATE); + /* + * Reset the interrupt mask and status + */ + writel(TIMER_INT_ALL_MASK, fttmr010->base + TIMER_INTR_MASK); + writel(0, fttmr010->base + TIMER_INTR_STATE); + } /* * Enable timer 1 count up, timer 2 count up, except on Aspeed, @@ -306,9 +318,8 @@ static int __init fttmr010_common_init(struct device_node *np, bool is_aspeed) if (is_aspeed) val = TIMER_2_CR_ASPEED_ENABLE; else { - val = TIMER_2_CR_ENABLE; - if (!fttmr010->count_down) - val |= TIMER_1_CR_UPDOWN | TIMER_2_CR_UPDOWN; + val = TIMER_2_CR_ENABLE | TIMER_1_CR_UPDOWN | + TIMER_2_CR_UPDOWN; } writel(val, fttmr010->base + TIMER_CR); @@ -321,7 +332,7 @@ static int __init fttmr010_common_init(struct device_node *np, bool is_aspeed) writel(0, fttmr010->base + TIMER2_MATCH1); writel(0, fttmr010->base + TIMER2_MATCH2); - if (fttmr010->count_down) { + if (fttmr010->is_aspeed) { writel(~0, fttmr010->base + TIMER2_LOAD); clocksource_mmio_init(fttmr010->base + TIMER2_COUNT, "FTTMR010-TIMER2", @@ -371,7 +382,7 @@ static int __init fttmr010_common_init(struct device_node *np, bool is_aspeed) #ifdef CONFIG_ARM /* Also use this timer for delays */ - if (fttmr010->count_down) + if (fttmr010->is_aspeed) fttmr010->delay_timer.read_current_timer = fttmr010_read_current_timer_down; else diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 2a3fe83ec337..6f4a9a8faccc 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -202,6 +202,11 @@ static int __init sun5i_setup_clocksource(struct device_node *node, } rate = clk_get_rate(clk); + if (!rate) { + pr_err("Couldn't get parent clock rate\n"); + ret = -EINVAL; + goto err_disable_clk; + } cs->timer.base = base; cs->timer.clk = clk; @@ -275,6 +280,11 @@ static int __init sun5i_setup_clockevent(struct device_node *node, void __iomem } rate = clk_get_rate(clk); + if (!rate) { + pr_err("Couldn't get parent clock rate\n"); + ret = -EINVAL; + goto err_disable_clk; + } ce->timer.base = base; ce->timer.ticks_per_jiffy = DIV_ROUND_UP(rate, HZ); diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 48c6d845b1cb..a3d70402605b 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -257,6 +257,15 @@ config CPUFREQ_DT_PLATDEV If in doubt, say N. +config CPUFREQ_DUMMY + tristate "Dummy CPU frequency driver" + help + This option adds a generic dummy CPUfreq driver, which sets a fake + 2-frequency table when initializing each policy and otherwise does + nothing. + + If in doubt, say N + if X86 source "drivers/cpufreq/Kconfig.x86" endif diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index d98486f8a00f..5e44c740c5cb 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -24,6 +24,8 @@ obj-$(CONFIG_CPU_FREQ_GOV_ATTR_SET) += cpufreq_governor_attr_set.o obj-$(CONFIG_CPUFREQ_DT) += cpufreq-dt.o obj-$(CONFIG_CPUFREQ_DT_PLATDEV) += cpufreq-dt-platdev.o +obj-$(CONFIG_CPUFREQ_DUMMY) += dummy-cpufreq.o + ################################################################################## # x86 drivers. # Link order matters. K8 is preferred to ACPI because of firmware bugs in early diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 7281a2c19c36..39c462711eae 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -468,12 +468,12 @@ static int brcm_avs_set_pstate(struct private_data *priv, unsigned int pstate) return __issue_avs_command(priv, AVS_CMD_SET_PSTATE, true, args); } -static unsigned long brcm_avs_get_voltage(void __iomem *base) +static u32 brcm_avs_get_voltage(void __iomem *base) { return readl(base + AVS_MBOX_VOLTAGE1); } -static unsigned long brcm_avs_get_frequency(void __iomem *base) +static u32 brcm_avs_get_frequency(void __iomem *base) { return readl(base + AVS_MBOX_FREQUENCY) * 1000; /* in kHz */ } @@ -762,8 +762,8 @@ static bool brcm_avs_is_firmware_loaded(struct private_data *priv) rc = brcm_avs_get_pmap(priv, NULL); magic = readl(priv->base + AVS_MBOX_MAGIC); - return (magic == AVS_FIRMWARE_MAGIC) && (rc != -ENOTSUPP) && - (rc != -EINVAL); + return (magic == AVS_FIRMWARE_MAGIC) && ((rc != -ENOTSUPP) || + (rc != -EINVAL)); } static unsigned int brcm_avs_cpufreq_get(unsigned int cpu) @@ -973,14 +973,14 @@ static ssize_t show_brcm_avs_voltage(struct cpufreq_policy *policy, char *buf) { struct private_data *priv = policy->driver_data; - return sprintf(buf, "0x%08lx\n", brcm_avs_get_voltage(priv->base)); + return sprintf(buf, "0x%08x\n", brcm_avs_get_voltage(priv->base)); } static ssize_t show_brcm_avs_frequency(struct cpufreq_policy *policy, char *buf) { struct private_data *priv = policy->driver_data; - return sprintf(buf, "0x%08lx\n", brcm_avs_get_frequency(priv->base)); + return sprintf(buf, "0x%08x\n", brcm_avs_get_frequency(priv->base)); } cpufreq_freq_attr_ro(brcm_avs_pstate); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index eaf59510823a..d136c503df8c 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -918,6 +918,9 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) struct freq_attr *fattr = to_attr(attr); ssize_t ret; + if (!fattr->show) + return -EIO; + down_read(&policy->rwsem); ret = fattr->show(policy, buf); up_read(&policy->rwsem); @@ -932,6 +935,9 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, struct freq_attr *fattr = to_attr(attr); ssize_t ret = -EINVAL; + if (!fattr->store) + return -EIO; + cpus_read_lock(); if (cpu_online(policy->cpu)) { @@ -1681,6 +1687,9 @@ void cpufreq_resume(void) if (!cpufreq_driver) return; + if (unlikely(!cpufreq_suspended)) + return; + cpufreq_suspended = false; if (!has_target() && !cpufreq_driver->resume) @@ -2506,6 +2515,13 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) if (cpufreq_disabled()) return -ENODEV; + /* + * The cpufreq core depends heavily on the availability of device + * structure, make sure they are available before proceeding further. + */ + if (!get_cpu_device(0)) + return -EPROBE_DEFER; + if (!driver_data || !driver_data->verify || !driver_data->init || !(driver_data->setpolicy || driver_data->target_index || driver_data->target) || @@ -2610,14 +2626,6 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) } EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); -/* - * Stop cpufreq at shutdown to make sure it isn't holding any locks - * or mutexes when secondary CPUs are halted. - */ -static struct syscore_ops cpufreq_syscore_ops = { - .shutdown = cpufreq_suspend, -}; - struct kobject *cpufreq_global_kobject; EXPORT_SYMBOL(cpufreq_global_kobject); @@ -2629,8 +2637,6 @@ static int __init cpufreq_core_init(void) cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj); BUG_ON(!cpufreq_global_kobject); - register_syscore_ops(&cpufreq_syscore_ops); - return 0; } module_param(off, int, 0444); diff --git a/drivers/cpufreq/dummy-cpufreq.c b/drivers/cpufreq/dummy-cpufreq.c new file mode 100644 index 000000000000..e614a27af11c --- /dev/null +++ b/drivers/cpufreq/dummy-cpufreq.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 Google, Inc. + */ +#include +#include + +static struct cpufreq_frequency_table freq_table[] = { + { .frequency = 1 }, + { .frequency = 2 }, + { .frequency = CPUFREQ_TABLE_END }, +}; + +static int dummy_cpufreq_target_index(struct cpufreq_policy *policy, + unsigned int index) +{ + return 0; +} + +static int dummy_cpufreq_driver_init(struct cpufreq_policy *policy) +{ + return cpufreq_table_validate_and_show(policy, freq_table); +} + +static unsigned int dummy_cpufreq_get(unsigned int cpu) +{ + return 1; +} + +static int dummy_cpufreq_verify(struct cpufreq_policy *policy) +{ + return 0; +} + +static struct cpufreq_driver dummy_cpufreq_driver = { + .name = "dummy", + .target_index = dummy_cpufreq_target_index, + .init = dummy_cpufreq_driver_init, + .get = dummy_cpufreq_get, + .verify = dummy_cpufreq_verify, + .attr = cpufreq_generic_attr, +}; + +static int __init dummy_cpufreq_init(void) +{ + return cpufreq_register_driver(&dummy_cpufreq_driver); +} + +static void __exit dummy_cpufreq_exit(void) +{ + cpufreq_unregister_driver(&dummy_cpufreq_driver); +} + +module_init(dummy_cpufreq_init); +module_exit(dummy_cpufreq_exit); + +MODULE_AUTHOR("Connor O'Brien "); +MODULE_DESCRIPTION("dummy cpufreq driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index a28bb8f3f395..33854bf127f9 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -1002,9 +1002,14 @@ static struct cpufreq_driver powernv_cpufreq_driver = { static int init_chip_info(void) { - unsigned int chip[256]; + unsigned int *chip; unsigned int cpu, i; unsigned int prev_chip_id = UINT_MAX; + int ret = 0; + + chip = kcalloc(num_possible_cpus(), sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; for_each_possible_cpu(cpu) { unsigned int id = cpu_to_chip_id(cpu); @@ -1016,8 +1021,10 @@ static int init_chip_info(void) } chips = kcalloc(nr_chips, sizeof(struct chip), GFP_KERNEL); - if (!chips) - return -ENOMEM; + if (!chips) { + ret = -ENOMEM; + goto free_and_return; + } for (i = 0; i < nr_chips; i++) { chips[i].id = chip[i]; @@ -1027,7 +1034,9 @@ static int init_chip_info(void) per_cpu(chip_info, cpu) = &chips[i]; } - return 0; +free_and_return: + kfree(chip); + return ret; } static inline void clean_chip_info(void) diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c index 4bf47de6101f..cadc324bedb4 100644 --- a/drivers/cpufreq/ti-cpufreq.c +++ b/drivers/cpufreq/ti-cpufreq.c @@ -205,6 +205,7 @@ static int ti_cpufreq_init(void) np = of_find_node_by_path("/"); match = of_match_node(ti_cpufreq_of_match, np); + of_node_put(np); if (!match) return -ENODEV; diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index dc32f34e68d9..01acd88c4193 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -62,25 +62,24 @@ static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv) * __cpuidle_set_driver - set per CPU driver variables for the given driver. * @drv: a valid pointer to a struct cpuidle_driver * - * For each CPU in the driver's cpumask, unset the registered driver per CPU - * to @drv. - * - * Returns 0 on success, -EBUSY if the CPUs have driver(s) already. + * Returns 0 on success, -EBUSY if any CPU in the cpumask have a driver + * different from drv already. */ static inline int __cpuidle_set_driver(struct cpuidle_driver *drv) { int cpu; for_each_cpu(cpu, drv->cpumask) { + struct cpuidle_driver *old_drv; - if (__cpuidle_get_cpu_driver(cpu)) { - __cpuidle_unset_driver(drv); + old_drv = __cpuidle_get_cpu_driver(cpu); + if (old_drv && old_drv != drv) return -EBUSY; - } - - per_cpu(cpuidle_drivers, cpu) = drv; } + for_each_cpu(cpu, drv->cpumask) + per_cpu(cpuidle_drivers, cpu) = drv; + return 0; } diff --git a/drivers/crypto/amcc/crypto4xx_core.c b/drivers/crypto/amcc/crypto4xx_core.c index 8d4d8db244e9..d1d041de7f8a 100644 --- a/drivers/crypto/amcc/crypto4xx_core.c +++ b/drivers/crypto/amcc/crypto4xx_core.c @@ -399,12 +399,8 @@ static u32 crypto4xx_build_sdr(struct crypto4xx_device *dev) dma_alloc_coherent(dev->core_dev->device, dev->scatter_buffer_size * PPC4XX_NUM_SD, &dev->scatter_buffer_pa, GFP_ATOMIC); - if (!dev->scatter_buffer_va) { - dma_free_coherent(dev->core_dev->device, - sizeof(struct ce_sd) * PPC4XX_NUM_SD, - dev->sdr, dev->sdr_pa); + if (!dev->scatter_buffer_va) return -ENOMEM; - } sd_array = dev->sdr; diff --git a/drivers/crypto/amcc/crypto4xx_trng.h b/drivers/crypto/amcc/crypto4xx_trng.h index 931d22531f51..7bbda51b7337 100644 --- a/drivers/crypto/amcc/crypto4xx_trng.h +++ b/drivers/crypto/amcc/crypto4xx_trng.h @@ -26,9 +26,9 @@ void ppc4xx_trng_probe(struct crypto4xx_core_device *core_dev); void ppc4xx_trng_remove(struct crypto4xx_core_device *core_dev); #else static inline void ppc4xx_trng_probe( - struct crypto4xx_device *dev __maybe_unused) { } + struct crypto4xx_core_device *dev __maybe_unused) { } static inline void ppc4xx_trng_remove( - struct crypto4xx_device *dev __maybe_unused) { } + struct crypto4xx_core_device *dev __maybe_unused) { } #endif #endif diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c index 11129b796dda..b8153142bcc6 100644 --- a/drivers/crypto/atmel-aes.c +++ b/drivers/crypto/atmel-aes.c @@ -91,7 +91,6 @@ struct atmel_aes_caps { bool has_dualbuff; bool has_cfb64; - bool has_ctr32; bool has_gcm; bool has_xts; bool has_authenc; @@ -990,8 +989,9 @@ static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd) struct atmel_aes_ctr_ctx *ctx = atmel_aes_ctr_ctx_cast(dd->ctx); struct ablkcipher_request *req = ablkcipher_request_cast(dd->areq); struct scatterlist *src, *dst; - u32 ctr, blocks; size_t datalen; + u32 ctr; + u16 blocks, start, end; bool use_dma, fragmented = false; /* Check for transfer completion. */ @@ -1003,27 +1003,17 @@ static int atmel_aes_ctr_transfer(struct atmel_aes_dev *dd) datalen = req->nbytes - ctx->offset; blocks = DIV_ROUND_UP(datalen, AES_BLOCK_SIZE); ctr = be32_to_cpu(ctx->iv[3]); - if (dd->caps.has_ctr32) { - /* Check 32bit counter overflow. */ - u32 start = ctr; - u32 end = start + blocks - 1; - if (end < start) { - ctr |= 0xffffffff; - datalen = AES_BLOCK_SIZE * -start; - fragmented = true; - } - } else { - /* Check 16bit counter overflow. */ - u16 start = ctr & 0xffff; - u16 end = start + (u16)blocks - 1; + /* Check 16bit counter overflow. */ + start = ctr & 0xffff; + end = start + blocks - 1; - if (blocks >> 16 || end < start) { - ctr |= 0xffff; - datalen = AES_BLOCK_SIZE * (0x10000-start); - fragmented = true; - } + if (blocks >> 16 || end < start) { + ctr |= 0xffff; + datalen = AES_BLOCK_SIZE * (0x10000 - start); + fragmented = true; } + use_dma = (datalen >= ATMEL_AES_DMA_THRESHOLD); /* Jump to offset. */ @@ -2536,7 +2526,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) { dd->caps.has_dualbuff = 0; dd->caps.has_cfb64 = 0; - dd->caps.has_ctr32 = 0; dd->caps.has_gcm = 0; dd->caps.has_xts = 0; dd->caps.has_authenc = 0; @@ -2547,7 +2536,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) case 0x500: dd->caps.has_dualbuff = 1; dd->caps.has_cfb64 = 1; - dd->caps.has_ctr32 = 1; dd->caps.has_gcm = 1; dd->caps.has_xts = 1; dd->caps.has_authenc = 1; @@ -2556,7 +2544,6 @@ static void atmel_aes_get_cap(struct atmel_aes_dev *dd) case 0x200: dd->caps.has_dualbuff = 1; dd->caps.has_cfb64 = 1; - dd->caps.has_ctr32 = 1; dd->caps.has_gcm = 1; dd->caps.max_burst_size = 4; break; diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c index 3e2f41b3eaf3..15e68774034a 100644 --- a/drivers/crypto/atmel-sha.c +++ b/drivers/crypto/atmel-sha.c @@ -1921,12 +1921,7 @@ static int atmel_sha_hmac_setkey(struct crypto_ahash *tfm, const u8 *key, { struct atmel_sha_hmac_ctx *hmac = crypto_ahash_ctx(tfm); - if (atmel_sha_hmac_key_set(&hmac->hkey, key, keylen)) { - crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - return 0; + return atmel_sha_hmac_key_set(&hmac->hkey, key, keylen); } static int atmel_sha_hmac_init(struct ahash_request *req) diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c index 9f82e14983f6..a886245b931e 100644 --- a/drivers/crypto/axis/artpec6_crypto.c +++ b/drivers/crypto/axis/artpec6_crypto.c @@ -1256,7 +1256,7 @@ static int artpec6_crypto_aead_set_key(struct crypto_aead *tfm, const u8 *key, if (len != 16 && len != 24 && len != 32) { crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -1; + return -EINVAL; } ctx->key_length = len; diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index b6be383a51a6..279e907590e9 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -718,7 +718,7 @@ static int handle_ahash_req(struct iproc_reqctx_s *rctx) */ unsigned int new_data_len; - unsigned int chunk_start = 0; + unsigned int __maybe_unused chunk_start = 0; u32 db_size; /* Length of data field, incl gcm and hash padding */ int pad_len = 0; /* total pad len, including gcm, hash, stat padding */ u32 data_pad_len = 0; /* length of GCM/CCM padding */ @@ -1676,8 +1676,6 @@ static void spu_rx_callback(struct mbox_client *cl, void *msg) struct spu_hw *spu = &iproc_priv.spu; struct brcm_message *mssg = msg; struct iproc_reqctx_s *rctx; - struct iproc_ctx_s *ctx; - struct crypto_async_request *areq; int err = 0; rctx = mssg->ctx; @@ -1687,8 +1685,6 @@ static void spu_rx_callback(struct mbox_client *cl, void *msg) err = -EFAULT; goto cb_finish; } - areq = rctx->parent; - ctx = rctx->ctx; /* process the SPU status */ err = spu->spu_status_process(rctx->msg_buf.rx_stat); @@ -4637,12 +4633,16 @@ static int spu_register_ahash(struct iproc_alg_s *driver_alg) hash->halg.statesize = sizeof(struct spu_hash_export_s); if (driver_alg->auth_info.mode != HASH_MODE_HMAC) { - hash->setkey = ahash_setkey; hash->init = ahash_init; hash->update = ahash_update; hash->final = ahash_final; hash->finup = ahash_finup; hash->digest = ahash_digest; + if ((driver_alg->auth_info.alg == HASH_ALG_AES) && + ((driver_alg->auth_info.mode == HASH_MODE_XCBC) || + (driver_alg->auth_info.mode == HASH_MODE_CMAC))) { + hash->setkey = ahash_setkey; + } } else { hash->setkey = ahash_hmac_setkey; hash->init = ahash_hmac_init; diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index fde07d4ff019..ff6718a11e9e 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -353,7 +353,10 @@ static int __init caam_rng_init(void) goto free_rng_ctx; dev_info(dev, "registering rng-caam\n"); - return hwrng_register(&caam_rng); + + err = hwrng_register(&caam_rng); + if (!err) + return err; free_rng_ctx: kfree(rng_ctx); diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c index 8da88beb1abb..832ba2afdcd5 100644 --- a/drivers/crypto/caam/error.c +++ b/drivers/crypto/caam/error.c @@ -22,7 +22,7 @@ void caam_dump_sg(const char *level, const char *prefix_str, int prefix_type, size_t len; void *buf; - for (it = sg; it && tlen > 0 ; it = sg_next(sg)) { + for (it = sg; it && tlen > 0 ; it = sg_next(it)) { /* * make sure the scatterlist's page * has a valid virtual memory mapping diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c index 89291c15015c..3f768699332b 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes.c +++ b/drivers/crypto/ccp/ccp-crypto-aes.c @@ -1,7 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 /* * AMD Cryptographic Coprocessor (CCP) AES crypto API support * - * Copyright (C) 2013,2016 Advanced Micro Devices, Inc. + * Copyright (C) 2013-2019 Advanced Micro Devices, Inc. * * Author: Tom Lendacky * @@ -79,8 +80,7 @@ static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt) return -EINVAL; if (((ctx->u.aes.mode == CCP_AES_MODE_ECB) || - (ctx->u.aes.mode == CCP_AES_MODE_CBC) || - (ctx->u.aes.mode == CCP_AES_MODE_CFB)) && + (ctx->u.aes.mode == CCP_AES_MODE_CBC)) && (req->nbytes & (AES_BLOCK_SIZE - 1))) return -EINVAL; @@ -291,7 +291,7 @@ static struct ccp_aes_def aes_algs[] = { .version = CCP_VERSION(3, 0), .name = "cfb(aes)", .driver_name = "cfb-aes-ccp", - .blocksize = AES_BLOCK_SIZE, + .blocksize = 1, .ivsize = AES_BLOCK_SIZE, .alg_defaults = &ccp_aes_defaults, }, diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c index 240bebbcb8ac..ae0cc0a4dc5c 100644 --- a/drivers/crypto/ccp/ccp-dev-v3.c +++ b/drivers/crypto/ccp/ccp-dev-v3.c @@ -590,6 +590,7 @@ const struct ccp_vdata ccpv3_platform = { .setup = NULL, .perform = &ccp3_actions, .offset = 0, + .rsamax = CCP_RSA_MAX_WIDTH, }; const struct ccp_vdata ccpv3 = { diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c index d608043c0280..df82af3dd970 100644 --- a/drivers/crypto/ccp/ccp-dmaengine.c +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -341,6 +341,7 @@ static struct ccp_dma_desc *ccp_alloc_dma_desc(struct ccp_dma_chan *chan, desc->tx_desc.flags = flags; desc->tx_desc.tx_submit = ccp_tx_submit; desc->ccp = chan->ccp; + INIT_LIST_HEAD(&desc->entry); INIT_LIST_HEAD(&desc->pending); INIT_LIST_HEAD(&desc->active); desc->status = DMA_IN_PROGRESS; diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 1e2e42106dee..330853a2702f 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -458,8 +458,8 @@ static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q, return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true); } -static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, - struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_aes_engine *aes = &cmd->u.aes; struct ccp_dm_workarea key, ctx; @@ -614,8 +614,8 @@ e_key: return ret; } -static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, - struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_aes_engine *aes = &cmd->u.aes; struct ccp_dm_workarea key, ctx, final_wa, tag; @@ -897,7 +897,8 @@ e_key: return ret; } -static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_aes_engine *aes = &cmd->u.aes; struct ccp_dm_workarea key, ctx; @@ -907,12 +908,6 @@ static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) bool in_place = false; int ret; - if (aes->mode == CCP_AES_MODE_CMAC) - return ccp_run_aes_cmac_cmd(cmd_q, cmd); - - if (aes->mode == CCP_AES_MODE_GCM) - return ccp_run_aes_gcm_cmd(cmd_q, cmd); - if (!((aes->key_len == AES_KEYSIZE_128) || (aes->key_len == AES_KEYSIZE_192) || (aes->key_len == AES_KEYSIZE_256))) @@ -1080,8 +1075,8 @@ e_key: return ret; } -static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, - struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_xts_aes_engine *xts = &cmd->u.xts; struct ccp_dm_workarea key, ctx; @@ -1280,7 +1275,8 @@ e_key: return ret; } -static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_des3_engine *des3 = &cmd->u.des3; @@ -1293,6 +1289,9 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) int ret; /* Error checks */ + if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) + return -EINVAL; + if (!cmd_q->ccp->vdata->perform->des3) return -EINVAL; @@ -1375,8 +1374,6 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) * passthru option to convert from big endian to little endian. */ if (des3->mode != CCP_DES3_MODE_ECB) { - u32 load_mode; - op.sb_ctx = cmd_q->sb_ctx; ret = ccp_init_dm_workarea(&ctx, cmd_q, @@ -1392,12 +1389,8 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) if (ret) goto e_ctx; - if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) - load_mode = CCP_PASSTHRU_BYTESWAP_NOOP; - else - load_mode = CCP_PASSTHRU_BYTESWAP_256BIT; ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, - load_mode); + CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { cmd->engine_error = cmd_q->cmd_error; goto e_ctx; @@ -1459,10 +1452,6 @@ static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) } /* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */ - if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) - dm_offset = CCP_SB_BYTES - des3->iv_len; - else - dm_offset = 0; ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0, DES3_EDE_BLOCK_SIZE); } @@ -1483,7 +1472,8 @@ e_key: return ret; } -static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_sha_engine *sha = &cmd->u.sha; struct ccp_dm_workarea ctx; @@ -1827,7 +1817,8 @@ e_ctx: return ret; } -static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_rsa_engine *rsa = &cmd->u.rsa; struct ccp_dm_workarea exp, src, dst; @@ -1958,8 +1949,8 @@ e_sb: return ret; } -static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, - struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_passthru_engine *pt = &cmd->u.passthru; struct ccp_dm_workarea mask; @@ -2090,7 +2081,8 @@ e_mask: return ret; } -static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q, +static noinline_for_stack int +ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap; @@ -2431,7 +2423,8 @@ e_src: return ret; } -static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +static noinline_for_stack int +ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) { struct ccp_ecc_engine *ecc = &cmd->u.ecc; @@ -2468,7 +2461,17 @@ int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) switch (cmd->engine) { case CCP_ENGINE_AES: - ret = ccp_run_aes_cmd(cmd_q, cmd); + switch (cmd->u.aes.mode) { + case CCP_AES_MODE_CMAC: + ret = ccp_run_aes_cmac_cmd(cmd_q, cmd); + break; + case CCP_AES_MODE_GCM: + ret = ccp_run_aes_gcm_cmd(cmd_q, cmd); + break; + default: + ret = ccp_run_aes_cmd(cmd_q, cmd); + break; + } break; case CCP_ENGINE_XTS_AES_128: ret = ccp_run_xts_aes_cmd(cmd_q, cmd); diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index bb7b59fc5c08..8d39f3a07bf8 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -2693,9 +2693,6 @@ static int chcr_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) aeadctx->mayverify = VERIFY_SW; break; default: - - crypto_tfm_set_flags((struct crypto_tfm *) tfm, - CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize); @@ -2720,8 +2717,6 @@ static int chcr_4106_4309_setauthsize(struct crypto_aead *tfm, aeadctx->mayverify = VERIFY_HW; break; default: - crypto_tfm_set_flags((struct crypto_tfm *)tfm, - CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize); @@ -2762,8 +2757,6 @@ static int chcr_ccm_setauthsize(struct crypto_aead *tfm, aeadctx->mayverify = VERIFY_HW; break; default: - crypto_tfm_set_flags((struct crypto_tfm *)tfm, - CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } return crypto_aead_setauthsize(aeadctx->sw_cipher, authsize); @@ -2790,8 +2783,7 @@ static int chcr_ccm_common_setkey(struct crypto_aead *aead, ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256; } else { - crypto_tfm_set_flags((struct crypto_tfm *)aead, - CRYPTO_TFM_RES_BAD_KEY_LEN); + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); aeadctx->enckey_len = 0; return -EINVAL; } @@ -2831,8 +2823,7 @@ static int chcr_aead_rfc4309_setkey(struct crypto_aead *aead, const u8 *key, int error; if (keylen < 3) { - crypto_tfm_set_flags((struct crypto_tfm *)aead, - CRYPTO_TFM_RES_BAD_KEY_LEN); + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); aeadctx->enckey_len = 0; return -EINVAL; } @@ -2883,8 +2874,7 @@ static int chcr_gcm_setkey(struct crypto_aead *aead, const u8 *key, } else if (keylen == AES_KEYSIZE_256) { ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; } else { - crypto_tfm_set_flags((struct crypto_tfm *)aead, - CRYPTO_TFM_RES_BAD_KEY_LEN); + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); pr_err("GCM: Invalid key length %d\n", keylen); ret = -EINVAL; goto out; diff --git a/drivers/crypto/mxc-scc.c b/drivers/crypto/mxc-scc.c index e01c46387df8..519086730791 100644 --- a/drivers/crypto/mxc-scc.c +++ b/drivers/crypto/mxc-scc.c @@ -178,12 +178,12 @@ static int mxc_scc_get_data(struct mxc_scc_ctx *ctx, else from = scc->black_memory; - dev_dbg(scc->dev, "pcopy: from 0x%p %d bytes\n", from, + dev_dbg(scc->dev, "pcopy: from 0x%p %zu bytes\n", from, ctx->dst_nents * 8); len = sg_pcopy_from_buffer(ablkreq->dst, ctx->dst_nents, from, ctx->size, ctx->offset); if (!len) { - dev_err(scc->dev, "pcopy err from 0x%p (len=%d)\n", from, len); + dev_err(scc->dev, "pcopy err from 0x%p (len=%zu)\n", from, len); return -EINVAL; } @@ -274,7 +274,7 @@ static int mxc_scc_put_data(struct mxc_scc_ctx *ctx, len = sg_pcopy_to_buffer(req->src, ctx->src_nents, to, len, ctx->offset); if (!len) { - dev_err(scc->dev, "pcopy err to 0x%p (len=%d)\n", to, len); + dev_err(scc->dev, "pcopy err to 0x%p (len=%zu)\n", to, len); return -EINVAL; } @@ -335,9 +335,9 @@ static void mxc_scc_ablkcipher_next(struct mxc_scc_ctx *ctx, return; } - dev_dbg(scc->dev, "Start encryption (0x%p/0x%p)\n", - (void *)readl(scc->base + SCC_SCM_RED_START), - (void *)readl(scc->base + SCC_SCM_BLACK_START)); + dev_dbg(scc->dev, "Start encryption (0x%x/0x%x)\n", + readl(scc->base + SCC_SCM_RED_START), + readl(scc->base + SCC_SCM_BLACK_START)); /* clear interrupt control registers */ writel(SCC_SCM_INTR_CTRL_CLR_INTR, diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index a98a25733a22..e1e1e8110790 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -28,9 +28,24 @@ #define DCP_MAX_CHANS 4 #define DCP_BUF_SZ PAGE_SIZE +#define DCP_SHA_PAY_SZ 64 #define DCP_ALIGNMENT 64 +/* + * Null hashes to align with hw behavior on imx6sl and ull + * these are flipped for consistency with hw output + */ +const uint8_t sha1_null_hash[] = + "\x09\x07\xd8\xaf\x90\x18\x60\x95\xef\xbf" + "\x55\x32\x0d\x4b\x6b\x5e\xee\xa3\x39\xda"; + +const uint8_t sha256_null_hash[] = + "\x55\xb8\x52\x78\x1b\x99\x95\xa4" + "\x4c\x93\x9b\x64\xe4\x41\xae\x27" + "\x24\xb9\x6f\x99\xc8\xf4\xfb\x9a" + "\x14\x1c\xfc\x98\x42\xc4\xb0\xe3"; + /* DCP DMA descriptor. */ struct dcp_dma_desc { uint32_t next_cmd_addr; @@ -48,6 +63,7 @@ struct dcp_coherent_block { uint8_t aes_in_buf[DCP_BUF_SZ]; uint8_t aes_out_buf[DCP_BUF_SZ]; uint8_t sha_in_buf[DCP_BUF_SZ]; + uint8_t sha_out_buf[DCP_SHA_PAY_SZ]; uint8_t aes_key[2 * AES_KEYSIZE_128]; @@ -209,6 +225,12 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, dma_addr_t dst_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_out_buf, DCP_BUF_SZ, DMA_FROM_DEVICE); + if (actx->fill % AES_BLOCK_SIZE) { + dev_err(sdcp->dev, "Invalid block size!\n"); + ret = -EINVAL; + goto aes_done_run; + } + /* Fill in the DMA descriptor. */ desc->control0 = MXS_DCP_CONTROL0_DECR_SEMAPHORE | MXS_DCP_CONTROL0_INTERRUPT | @@ -238,6 +260,7 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, ret = mxs_dcp_start_dma(actx); +aes_done_run: dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128, DMA_TO_DEVICE); dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE); @@ -264,13 +287,15 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) uint8_t *out_tmp, *src_buf, *dst_buf = NULL; uint32_t dst_off = 0; + uint32_t last_out_len = 0; uint8_t *key = sdcp->coh->aes_key; int ret = 0; int split = 0; - unsigned int i, len, clen, rem = 0; + unsigned int i, len, clen, rem = 0, tlen = 0; int init = 0; + bool limit_hit = false; actx->fill = 0; @@ -289,6 +314,11 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) for_each_sg(req->src, src, nents, i) { src_buf = sg_virt(src); len = sg_dma_len(src); + tlen += len; + limit_hit = tlen > req->nbytes; + + if (limit_hit) + len = req->nbytes - (tlen - len); do { if (actx->fill + len > out_off) @@ -305,13 +335,15 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) * If we filled the buffer or this is the last SG, * submit the buffer. */ - if (actx->fill == out_off || sg_is_last(src)) { + if (actx->fill == out_off || sg_is_last(src) || + limit_hit) { ret = mxs_dcp_run_aes(actx, req, init); if (ret) return ret; init = 0; out_tmp = out_buf; + last_out_len = actx->fill; while (dst && actx->fill) { if (!split) { dst_buf = sg_virt(dst); @@ -334,6 +366,19 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) } } } while (len); + + if (limit_hit) + break; + } + + /* Copy the IV for CBC for chaining */ + if (!rctx->ecb) { + if (rctx->enc) + memcpy(req->info, out_buf+(last_out_len-AES_BLOCK_SIZE), + AES_BLOCK_SIZE); + else + memcpy(req->info, in_buf+(last_out_len-AES_BLOCK_SIZE), + AES_BLOCK_SIZE); } return ret; @@ -513,8 +558,6 @@ static int mxs_dcp_run_sha(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm); struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req); - struct hash_alg_common *halg = crypto_hash_alg_common(tfm); - struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; dma_addr_t digest_phys = 0; @@ -536,10 +579,23 @@ static int mxs_dcp_run_sha(struct ahash_request *req) desc->payload = 0; desc->status = 0; + /* + * Align driver with hw behavior when generating null hashes + */ + if (rctx->init && rctx->fini && desc->size == 0) { + struct hash_alg_common *halg = crypto_hash_alg_common(tfm); + const uint8_t *sha_buf = + (actx->alg == MXS_DCP_CONTROL1_HASH_SELECT_SHA1) ? + sha1_null_hash : sha256_null_hash; + memcpy(sdcp->coh->sha_out_buf, sha_buf, halg->digestsize); + ret = 0; + goto done_run; + } + /* Set HASH_TERM bit for last transfer block. */ if (rctx->fini) { - digest_phys = dma_map_single(sdcp->dev, req->result, - halg->digestsize, DMA_FROM_DEVICE); + digest_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_out_buf, + DCP_SHA_PAY_SZ, DMA_FROM_DEVICE); desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM; desc->payload = digest_phys; } @@ -547,9 +603,10 @@ static int mxs_dcp_run_sha(struct ahash_request *req) ret = mxs_dcp_start_dma(actx); if (rctx->fini) - dma_unmap_single(sdcp->dev, digest_phys, halg->digestsize, + dma_unmap_single(sdcp->dev, digest_phys, DCP_SHA_PAY_SZ, DMA_FROM_DEVICE); +done_run: dma_unmap_single(sdcp->dev, buf_phys, DCP_BUF_SZ, DMA_TO_DEVICE); return ret; @@ -567,6 +624,7 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq) const int nents = sg_nents(req->src); uint8_t *in_buf = sdcp->coh->sha_in_buf; + uint8_t *out_buf = sdcp->coh->sha_out_buf; uint8_t *src_buf; @@ -621,11 +679,9 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq) actx->fill = 0; - /* For some reason, the result is flipped. */ - for (i = 0; i < halg->digestsize / 2; i++) { - swap(req->result[i], - req->result[halg->digestsize - i - 1]); - } + /* For some reason the result is flipped */ + for (i = 0; i < halg->digestsize; i++) + req->result[i] = out_buf[halg->digestsize - i - 1]; } return 0; diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index b6f14844702e..7eaeb8507e06 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -1616,6 +1616,11 @@ static const struct of_device_id spacc_of_id_table[] = { MODULE_DEVICE_TABLE(of, spacc_of_id_table); #endif /* CONFIG_OF */ +static void spacc_tasklet_kill(void *data) +{ + tasklet_kill(data); +} + static int spacc_probe(struct platform_device *pdev) { int i, err, ret = -EINVAL; @@ -1659,6 +1664,14 @@ static int spacc_probe(struct platform_device *pdev) return -ENXIO; } + tasklet_init(&engine->complete, spacc_spacc_complete, + (unsigned long)engine); + + ret = devm_add_action(&pdev->dev, spacc_tasklet_kill, + &engine->complete); + if (ret) + return ret; + if (devm_request_irq(&pdev->dev, irq->start, spacc_spacc_irq, 0, engine->name, engine)) { dev_err(engine->dev, "failed to request IRQ\n"); @@ -1721,8 +1734,6 @@ static int spacc_probe(struct platform_device *pdev) INIT_LIST_HEAD(&engine->completed); INIT_LIST_HEAD(&engine->in_progress); engine->in_flight = 0; - tasklet_init(&engine->complete, spacc_spacc_complete, - (unsigned long)engine); platform_set_drvdata(pdev, engine); diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index aec66159566d..9a5213cbcbe1 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -323,7 +323,7 @@ static void s5p_unset_indata(struct s5p_aes_dev *dev) } static int s5p_make_sg_cpy(struct s5p_aes_dev *dev, struct scatterlist *src, - struct scatterlist **dst) + struct scatterlist **dst) { void *pages; int len; @@ -569,7 +569,7 @@ static int s5p_set_indata_start(struct s5p_aes_dev *dev, } static int s5p_set_outdata_start(struct s5p_aes_dev *dev, - struct ablkcipher_request *req) + struct ablkcipher_request *req) { struct scatterlist *sg; int err; diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c index 4835dd4a9e50..4909f820e953 100644 --- a/drivers/crypto/stm32/stm32-hash.c +++ b/drivers/crypto/stm32/stm32-hash.c @@ -361,7 +361,7 @@ static int stm32_hash_xmit_cpu(struct stm32_hash_dev *hdev, return -ETIMEDOUT; if ((hdev->flags & HASH_FLAGS_HMAC) && - (hdev->flags & ~HASH_FLAGS_HMAC_KEY)) { + (!(hdev->flags & HASH_FLAGS_HMAC_KEY))) { hdev->flags |= HASH_FLAGS_HMAC_KEY; stm32_hash_write_key(hdev); if (stm32_hash_wait_busy(hdev)) diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c index 5cf64746731a..22e491857925 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c @@ -81,7 +81,8 @@ static int sun4i_ss_opti_poll(struct skcipher_request *areq) oi = 0; oo = 0; do { - todo = min3(rx_cnt, ileft, (mi.length - oi) / 4); + todo = min(rx_cnt, ileft); + todo = min_t(size_t, todo, (mi.length - oi) / 4); if (todo) { ileft -= todo; writesl(ss->base + SS_RXFIFO, mi.addr + oi, todo); @@ -96,7 +97,8 @@ static int sun4i_ss_opti_poll(struct skcipher_request *areq) rx_cnt = SS_RXFIFO_SPACES(spaces); tx_cnt = SS_TXFIFO_SPACES(spaces); - todo = min3(tx_cnt, oleft, (mo.length - oo) / 4); + todo = min(tx_cnt, oleft); + todo = min_t(size_t, todo, (mo.length - oo) / 4); if (todo) { oleft -= todo; readsl(ss->base + SS_TXFIFO, mo.addr + oo, todo); @@ -220,7 +222,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) * todo is the number of consecutive 4byte word that we * can read from current SG */ - todo = min3(rx_cnt, ileft / 4, (mi.length - oi) / 4); + todo = min(rx_cnt, ileft / 4); + todo = min_t(size_t, todo, (mi.length - oi) / 4); if (todo && !ob) { writesl(ss->base + SS_RXFIFO, mi.addr + oi, todo); @@ -234,8 +237,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) * we need to be able to write all buf in one * pass, so it is why we min() with rx_cnt */ - todo = min3(rx_cnt * 4 - ob, ileft, - mi.length - oi); + todo = min(rx_cnt * 4 - ob, ileft); + todo = min_t(size_t, todo, mi.length - oi); memcpy(buf + ob, mi.addr + oi, todo); ileft -= todo; oi += todo; @@ -255,7 +258,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) spaces = readl(ss->base + SS_FCSR); rx_cnt = SS_RXFIFO_SPACES(spaces); tx_cnt = SS_TXFIFO_SPACES(spaces); - dev_dbg(ss->dev, "%x %u/%u %u/%u cnt=%u %u/%u %u/%u cnt=%u %u\n", + dev_dbg(ss->dev, + "%x %u/%zu %u/%u cnt=%u %u/%zu %u/%u cnt=%u %u\n", mode, oi, mi.length, ileft, areq->cryptlen, rx_cnt, oo, mo.length, oleft, areq->cryptlen, tx_cnt, ob); @@ -263,7 +267,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) if (!tx_cnt) continue; /* todo in 4bytes word */ - todo = min3(tx_cnt, oleft / 4, (mo.length - oo) / 4); + todo = min(tx_cnt, oleft / 4); + todo = min_t(size_t, todo, (mo.length - oo) / 4); if (todo) { readsl(ss->base + SS_TXFIFO, mo.addr + oo, todo); oleft -= todo * 4; @@ -287,7 +292,8 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) * no more than remaining buffer * no need to test against oleft */ - todo = min(mo.length - oo, obl - obo); + todo = min_t(size_t, + mo.length - oo, obl - obo); memcpy(mo.addr + oo, bufo + obo, todo); oleft -= todo; obo += todo; diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c b/drivers/crypto/sunxi-ss/sun4i-ss-hash.c index f6936bb3b7be..2d178e013535 100644 --- a/drivers/crypto/sunxi-ss/sun4i-ss-hash.c +++ b/drivers/crypto/sunxi-ss/sun4i-ss-hash.c @@ -179,7 +179,7 @@ static int sun4i_hash(struct ahash_request *areq) */ unsigned int i = 0, end, fill, min_fill, nwait, nbw = 0, j = 0, todo; unsigned int in_i = 0; - u32 spaces, rx_cnt = SS_RX_DEFAULT, bf[32] = {0}, wb = 0, v, ivmode = 0; + u32 spaces, rx_cnt = SS_RX_DEFAULT, bf[32] = {0}, v, ivmode = 0; struct sun4i_req_ctx *op = ahash_request_ctx(areq); struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct sun4i_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm); @@ -188,6 +188,7 @@ static int sun4i_hash(struct ahash_request *areq) struct sg_mapping_iter mi; int in_r, err = 0; size_t copied = 0; + __le32 wb = 0; dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x", __func__, crypto_tfm_alg_name(areq->base.tfm), @@ -276,8 +277,8 @@ static int sun4i_hash(struct ahash_request *areq) */ while (op->len < 64 && i < end) { /* how many bytes we can read from current SG */ - in_r = min3(mi.length - in_i, end - i, - 64 - op->len); + in_r = min(end - i, 64 - op->len); + in_r = min_t(size_t, mi.length - in_i, in_r); memcpy(op->buf + op->len, mi.addr + in_i, in_r); op->len += in_r; i += in_r; @@ -297,8 +298,8 @@ static int sun4i_hash(struct ahash_request *areq) } if (mi.length - in_i > 3 && i < end) { /* how many bytes we can read from current SG */ - in_r = min3(mi.length - in_i, areq->nbytes - i, - ((mi.length - in_i) / 4) * 4); + in_r = min_t(size_t, mi.length - in_i, areq->nbytes - i); + in_r = min_t(size_t, ((mi.length - in_i) / 4) * 4, in_r); /* how many bytes we can write in the device*/ todo = min3((u32)(end - i) / 4, rx_cnt, (u32)in_r / 4); writesl(ss->base + SS_RXFIFO, mi.addr + in_i, todo); @@ -324,8 +325,8 @@ static int sun4i_hash(struct ahash_request *areq) if ((areq->nbytes - i) < 64) { while (i < areq->nbytes && in_i < mi.length && op->len < 64) { /* how many bytes we can read from current SG */ - in_r = min3(mi.length - in_i, areq->nbytes - i, - 64 - op->len); + in_r = min(areq->nbytes - i, 64 - op->len); + in_r = min_t(size_t, mi.length - in_i, in_r); memcpy(op->buf + op->len, mi.addr + in_i, in_r); op->len += in_r; i += in_r; @@ -399,7 +400,7 @@ hash_final: nbw = op->len - 4 * nwait; if (nbw) { - wb = *(u32 *)(op->buf + nwait * 4); + wb = cpu_to_le32(*(u32 *)(op->buf + nwait * 4)); wb &= GENMASK((nbw * 8) - 1, 0); op->byte_count += nbw; @@ -408,7 +409,7 @@ hash_final: /* write the remaining bytes of the nbw buffer */ wb |= ((1 << 7) << (nbw * 8)); - bf[j++] = wb; + bf[j++] = le32_to_cpu(wb); /* * number of space to pad to obtain 64o minus 8(size) minus 4 (final 1) @@ -427,13 +428,13 @@ hash_final: /* write the length of data */ if (op->mode == SS_OP_SHA1) { - __be64 bits = cpu_to_be64(op->byte_count << 3); - bf[j++] = lower_32_bits(bits); - bf[j++] = upper_32_bits(bits); + __be64 *bits = (__be64 *)&bf[j]; + *bits = cpu_to_be64(op->byte_count << 3); + j += 2; } else { - __le64 bits = op->byte_count << 3; - bf[j++] = lower_32_bits(bits); - bf[j++] = upper_32_bits(bits); + __le64 *bits = (__le64 *)&bf[j]; + *bits = cpu_to_le64(op->byte_count << 3); + j += 2; } writesl(ss->base + SS_RXFIFO, bf, j); @@ -475,7 +476,7 @@ hash_final: } } else { for (i = 0; i < 4; i++) { - v = readl(ss->base + SS_MD0 + i * 4); + v = cpu_to_le32(readl(ss->base + SS_MD0 + i * 4)); memcpy(areq->result + i * 4, &v, 4); } } diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c index 5035b0dc1e40..e2231a1a05a1 100644 --- a/drivers/crypto/virtio/virtio_crypto_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_algs.c @@ -110,8 +110,6 @@ virtio_crypto_alg_validate_key(int key_len, uint32_t *alg) *alg = VIRTIO_CRYPTO_CIPHER_AES_CBC; break; default: - pr_err("virtio_crypto: Unsupported key length: %d\n", - key_len); return -EINVAL; } return 0; @@ -485,6 +483,11 @@ static int virtio_crypto_ablkcipher_encrypt(struct ablkcipher_request *req) /* Use the first data virtqueue as default */ struct data_queue *data_vq = &vcrypto->data_vq[0]; + if (!req->nbytes) + return 0; + if (req->nbytes % AES_BLOCK_SIZE) + return -EINVAL; + vc_req->dataq = data_vq; vc_req->alg_cb = virtio_crypto_dataq_sym_callback; vc_sym_req->ablkcipher_ctx = ctx; @@ -505,6 +508,11 @@ static int virtio_crypto_ablkcipher_decrypt(struct ablkcipher_request *req) /* Use the first data virtqueue as default */ struct data_queue *data_vq = &vcrypto->data_vq[0]; + if (!req->nbytes) + return 0; + if (req->nbytes % AES_BLOCK_SIZE) + return -EINVAL; + vc_req->dataq = data_vq; vc_req->alg_cb = virtio_crypto_dataq_sym_callback; vc_sym_req->ablkcipher_ctx = ctx; diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index cab32cfec9c4..709670d2b553 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -3,13 +3,13 @@ obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -TARGET := linux-ppc64le +override flavour := linux-ppc64le else -TARGET := linux-ppc64 +override flavour := linux-ppc64 endif quiet_cmd_perl = PERL $@ - cmd_perl = $(PERL) $(<) $(TARGET) > $(@) + cmd_perl = $(PERL) $(<) $(flavour) > $(@) targets += aesp8-ppc.S ghashp8-ppc.S diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig index e31b7caefa04..2acce28d9a73 100644 --- a/drivers/devfreq/Kconfig +++ b/drivers/devfreq/Kconfig @@ -103,7 +103,8 @@ config ARM_TEGRA_DEVFREQ config ARM_RK3399_DMC_DEVFREQ tristate "ARM RK3399 DMC DEVFREQ Driver" - depends on ARCH_ROCKCHIP + depends on (ARCH_ROCKCHIP && HAVE_ARM_SMCCC) || \ + (COMPILE_TEST && HAVE_ARM_SMCCC) select DEVFREQ_EVENT_ROCKCHIP_DFI select DEVFREQ_GOV_SIMPLE_ONDEMAND select PM_DEVFREQ_EVENT diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 8a411514a7c5..b05e6a15221c 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -133,6 +133,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) int lev, prev_lev, ret = 0; unsigned long cur_time; + lockdep_assert_held(&devfreq->lock); cur_time = jiffies; /* Immediately exit if previous_freq is not initialized yet. */ @@ -483,11 +484,6 @@ static void devfreq_dev_release(struct device *dev) struct devfreq *devfreq = to_devfreq(dev); mutex_lock(&devfreq_list_lock); - if (IS_ERR(find_device_devfreq(devfreq->dev.parent))) { - mutex_unlock(&devfreq_list_lock); - dev_warn(&devfreq->dev, "releasing devfreq which doesn't exist\n"); - return; - } list_del(&devfreq->node); mutex_unlock(&devfreq_list_lock); @@ -517,7 +513,6 @@ struct devfreq *devfreq_add_device(struct device *dev, { struct devfreq *devfreq; struct devfreq_governor *governor; - static atomic_t devfreq_no = ATOMIC_INIT(-1); int err = 0; if (!dev || !profile || !governor_name) { @@ -546,6 +541,7 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->dev.parent = dev; devfreq->dev.class = devfreq_class; devfreq->dev.release = devfreq_dev_release; + INIT_LIST_HEAD(&devfreq->node); devfreq->profile = profile; strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN); devfreq->previous_freq = profile->initial_freq; @@ -559,8 +555,7 @@ struct devfreq *devfreq_add_device(struct device *dev, mutex_lock(&devfreq->lock); } - dev_set_name(&devfreq->dev, "devfreq%d", - atomic_inc_return(&devfreq_no)); + dev_set_name(&devfreq->dev, "%s", dev_name(dev)); err = device_register(&devfreq->dev); if (err) { mutex_unlock(&devfreq->lock); @@ -905,6 +900,14 @@ err_out: } EXPORT_SYMBOL(devfreq_remove_governor); +static ssize_t name_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct devfreq *devfreq = to_devfreq(dev); + return sprintf(buf, "%s\n", dev_name(devfreq->dev.parent)); +} +static DEVICE_ATTR_RO(name); + static ssize_t governor_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -977,7 +980,7 @@ static ssize_t available_governors_show(struct device *d, * The devfreq with immutable governor (e.g., passive) shows * only own governor. */ - if (df->governor->immutable) { + if (df->governor && df->governor->immutable) { count = scnprintf(&buf[count], DEVFREQ_NAME_LEN, "%s ", df->governor_name); /* @@ -1161,12 +1164,17 @@ static ssize_t trans_stat_show(struct device *dev, int i, j; unsigned int max_state = devfreq->profile->max_state; - if (!devfreq->stop_polling && - devfreq_update_status(devfreq, devfreq->previous_freq)) - return 0; if (max_state == 0) return sprintf(buf, "Not Supported.\n"); + mutex_lock(&devfreq->lock); + if (!devfreq->stop_polling && + devfreq_update_status(devfreq, devfreq->previous_freq)) { + mutex_unlock(&devfreq->lock); + return 0; + } + mutex_unlock(&devfreq->lock); + len = sprintf(buf, " From : To\n"); len += sprintf(buf + len, " :"); for (i = 0; i < max_state; i++) @@ -1198,6 +1206,7 @@ static ssize_t trans_stat_show(struct device *dev, static DEVICE_ATTR_RO(trans_stat); static struct attribute *devfreq_attrs[] = { + &dev_attr_name.attr, &dev_attr_governor.attr, &dev_attr_available_governors.attr, &dev_attr_cur_freq.attr, diff --git a/drivers/devfreq/event/Kconfig b/drivers/devfreq/event/Kconfig index cd949800eed9..8851bc4e8e3e 100644 --- a/drivers/devfreq/event/Kconfig +++ b/drivers/devfreq/event/Kconfig @@ -33,7 +33,7 @@ config DEVFREQ_EVENT_EXYNOS_PPMU config DEVFREQ_EVENT_ROCKCHIP_DFI tristate "ROCKCHIP DFI DEVFREQ event Driver" - depends on ARCH_ROCKCHIP + depends on ARCH_ROCKCHIP || COMPILE_TEST help This add the devfreq-event driver for Rockchip SoC. It provides DFI (DDR Monitor Module) driver to count ddr load. diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index b19e05806bc1..4dae8341bb7d 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -230,7 +230,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a, a_fences = get_fences(a, &a_num_fences); b_fences = get_fences(b, &b_num_fences); if (a_num_fences > INT_MAX - b_num_fences) - return NULL; + goto err; num_fences = a_num_fences + b_num_fences; diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 327326bd411e..7f47201e6b8f 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -143,7 +143,7 @@ config DMA_JZ4740 config DMA_JZ4780 tristate "JZ4780 DMA support" - depends on MACH_JZ4780 || COMPILE_TEST + depends on MIPS || COMPILE_TEST select DMA_ENGINE select DMA_VIRTUAL_CHANNELS help diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index 74794c9859f6..f0932f25a9b1 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1797,13 +1797,10 @@ static struct dma_chan *coh901318_xlate(struct of_phandle_args *dma_spec, static int coh901318_config(struct coh901318_chan *cohc, struct coh901318_params *param) { - unsigned long flags; const struct coh901318_params *p; int channel = cohc->id; void __iomem *virtbase = cohc->base->virtbase; - spin_lock_irqsave(&cohc->lock, flags); - if (param) p = param; else @@ -1823,8 +1820,6 @@ static int coh901318_config(struct coh901318_chan *cohc, coh901318_set_conf(cohc, p->config); coh901318_set_ctrl(cohc, p->ctrl_lli_last); - spin_unlock_irqrestore(&cohc->lock, flags); - return 0; } @@ -1949,8 +1944,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc) return; } - spin_lock(&cohc->lock); - /* * When we reach this point, at least one queue item * should have been moved over from cohc->queue to @@ -1971,8 +1964,6 @@ static void dma_tc_handle(struct coh901318_chan *cohc) if (coh901318_queue_start(cohc) == NULL) cohc->busy = 0; - spin_unlock(&cohc->lock); - /* * This tasklet will remove items from cohc->active * and thus terminates them. diff --git a/drivers/dma/cppi41.c b/drivers/dma/cppi41.c index ddd4a3932127..cf119a8ccdd5 100644 --- a/drivers/dma/cppi41.c +++ b/drivers/dma/cppi41.c @@ -585,9 +585,22 @@ static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( enum dma_transfer_direction dir, unsigned long tx_flags, void *context) { struct cppi41_channel *c = to_cpp41_chan(chan); + struct dma_async_tx_descriptor *txd = NULL; + struct cppi41_dd *cdd = c->cdd; struct cppi41_desc *d; struct scatterlist *sg; unsigned int i; + int error; + + error = pm_runtime_get(cdd->ddev.dev); + if (error < 0) { + pm_runtime_put_noidle(cdd->ddev.dev); + + return NULL; + } + + if (cdd->is_suspended) + goto err_out_not_ready; d = c->desc; for_each_sg(sgl, sg, sg_len, i) { @@ -610,7 +623,13 @@ static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( d++; } - return &c->txd; + txd = &c->txd; + +err_out_not_ready: + pm_runtime_mark_last_busy(cdd->ddev.dev); + pm_runtime_put_autosuspend(cdd->ddev.dev); + + return txd; } static void cppi41_compute_td_desc(struct cppi41_desc *d) diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c index 7f0b9aa15867..9887f2a14aa9 100644 --- a/drivers/dma/dma-axi-dmac.c +++ b/drivers/dma/dma-axi-dmac.c @@ -451,7 +451,7 @@ static struct dma_async_tx_descriptor *axi_dmac_prep_interleaved( if (chan->hw_2d) { if (!axi_dmac_check_len(chan, xt->sgl[0].size) || - !axi_dmac_check_len(chan, xt->numf)) + xt->numf == 0) return NULL; if (xt->sgl[0].size + dst_icg > chan->max_length || xt->sgl[0].size + src_icg > chan->max_length) diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c index 803cfb4523b0..aca2d6fd92d5 100644 --- a/drivers/dma/dma-jz4780.c +++ b/drivers/dma/dma-jz4780.c @@ -580,7 +580,7 @@ static enum dma_status jz4780_dma_tx_status(struct dma_chan *chan, to_jz4780_dma_desc(vdesc), 0); } else if (cookie == jzchan->desc->vdesc.tx.cookie) { txstate->residue = jz4780_dma_desc_residue(jzchan, jzchan->desc, - (jzchan->curr_hwdesc + 1) % jzchan->desc->count); + jzchan->curr_hwdesc + 1); } else txstate->residue = 0; diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index b451354735d3..faaaf10311ec 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -192,7 +192,7 @@ __dma_device_satisfies_mask(struct dma_device *device, static struct module *dma_chan_to_owner(struct dma_chan *chan) { - return chan->device->dev->driver->owner; + return chan->device->owner; } /** @@ -928,6 +928,8 @@ int dma_async_device_register(struct dma_device *device) return -EIO; } + device->owner = device->dev->driver->owner; + if (dma_has_cap(DMA_MEMCPY, device->cap_mask) && !device->device_prep_dma_memcpy) { dev_err(device->dev, "Device claims capability %s, but op is not defined\n", diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 0f389e008ce6..055d83b6cb68 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -160,12 +160,14 @@ static void dwc_initialize_chan_idma32(struct dw_dma_chan *dwc) static void dwc_initialize_chan_dw(struct dw_dma_chan *dwc) { + struct dw_dma *dw = to_dw_dma(dwc->chan.device); u32 cfghi = DWC_CFGH_FIFO_MODE; u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority); bool hs_polarity = dwc->dws.hs_polarity; cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id); cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id); + cfghi |= DWC_CFGH_PROTCTL(dw->pdata->protctl); /* Set polarity of handshake interface */ cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0; diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index bc31fe802061..b408c07662f5 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -87,13 +87,20 @@ static void dw_dma_acpi_controller_register(struct dw_dma *dw) dma_cap_set(DMA_SLAVE, info->dma_cap); info->filter_fn = dw_dma_acpi_filter; - ret = devm_acpi_dma_controller_register(dev, acpi_dma_simple_xlate, - info); + ret = acpi_dma_controller_register(dev, acpi_dma_simple_xlate, info); if (ret) dev_err(dev, "could not register acpi_dma_controller\n"); } + +static void dw_dma_acpi_controller_free(struct dw_dma *dw) +{ + struct device *dev = dw->dma.dev; + + acpi_dma_controller_free(dev); +} #else /* !CONFIG_ACPI */ static inline void dw_dma_acpi_controller_register(struct dw_dma *dw) {} +static inline void dw_dma_acpi_controller_free(struct dw_dma *dw) {} #endif /* !CONFIG_ACPI */ #ifdef CONFIG_OF @@ -162,6 +169,12 @@ dw_dma_parse_dt(struct platform_device *pdev) pdata->multi_block[tmp] = 1; } + if (!of_property_read_u32(np, "snps,dma-protection-control", &tmp)) { + if (tmp > CHAN_PROTCTL_MASK) + return NULL; + pdata->protctl = tmp; + } + return pdata; } #else @@ -243,6 +256,9 @@ static int dw_remove(struct platform_device *pdev) { struct dw_dma_chip *chip = platform_get_drvdata(pdev); + if (ACPI_HANDLE(&pdev->dev)) + dw_dma_acpi_controller_free(chip->dw); + if (pdev->dev.of_node) of_dma_controller_free(pdev->dev.of_node); diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h index 09e7dfdbb790..646c9c960c07 100644 --- a/drivers/dma/dw/regs.h +++ b/drivers/dma/dw/regs.h @@ -200,6 +200,10 @@ enum dw_dma_msize { #define DWC_CFGH_FCMODE (1 << 0) #define DWC_CFGH_FIFO_MODE (1 << 1) #define DWC_CFGH_PROTCTL(x) ((x) << 2) +#define DWC_CFGH_PROTCTL_DATA (0 << 2) /* data access - always set */ +#define DWC_CFGH_PROTCTL_PRIV (1 << 2) /* privileged -> AHB HPROT[1] */ +#define DWC_CFGH_PROTCTL_BUFFER (2 << 2) /* bufferable -> AHB HPROT[2] */ +#define DWC_CFGH_PROTCTL_CACHE (4 << 2) /* cacheable -> AHB HPROT[3] */ #define DWC_CFGH_DS_UPD_EN (1 << 5) #define DWC_CFGH_SS_UPD_EN (1 << 6) #define DWC_CFGH_SRC_PER(x) ((x) << 7) diff --git a/drivers/dma/edma.c b/drivers/dma/edma.c index 519c24465dea..57a49fe713fd 100644 --- a/drivers/dma/edma.c +++ b/drivers/dma/edma.c @@ -2340,8 +2340,10 @@ static int edma_probe(struct platform_device *pdev) ecc->tc_list = devm_kcalloc(dev, ecc->num_tc, sizeof(*ecc->tc_list), GFP_KERNEL); - if (!ecc->tc_list) - return -ENOMEM; + if (!ecc->tc_list) { + ret = -ENOMEM; + goto err_reg1; + } for (i = 0;; i++) { ret = of_parse_phandle_with_fixed_args(node, "ti,tptcs", diff --git a/drivers/dma/hsu/hsu.c b/drivers/dma/hsu/hsu.c index 29d04ca71d52..15525a2b8ebd 100644 --- a/drivers/dma/hsu/hsu.c +++ b/drivers/dma/hsu/hsu.c @@ -64,10 +64,10 @@ static void hsu_dma_chan_start(struct hsu_dma_chan *hsuc) if (hsuc->direction == DMA_MEM_TO_DEV) { bsr = config->dst_maxburst; - mtsr = config->src_addr_width; + mtsr = config->dst_addr_width; } else if (hsuc->direction == DMA_DEV_TO_MEM) { bsr = config->src_maxburst; - mtsr = config->dst_addr_width; + mtsr = config->src_addr_width; } hsu_chan_disable(hsuc); diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c index 0fc12a8783e3..99f3f22ed647 100644 --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c @@ -1441,6 +1441,14 @@ static void sdma_add_scripts(struct sdma_engine *sdma, if (!sdma->script_number) sdma->script_number = SDMA_SCRIPT_ADDRS_ARRAY_SIZE_V1; + if (sdma->script_number > sizeof(struct sdma_script_start_addrs) + / sizeof(s32)) { + dev_err(sdma->dev, + "SDMA script number %d not match with firmware.\n", + sdma->script_number); + return; + } + for (i = 0; i < sdma->script_number; i++) if (addr_arr[i] > 0) saddr_arr[i] = addr_arr[i]; diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index f70cc74032ea..e3899ae429e0 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -388,10 +388,11 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags) descs->virt = dma_alloc_coherent(to_dev(ioat_chan), SZ_2M, &descs->hw, flags); - if (!descs->virt && (i > 0)) { + if (!descs->virt) { int idx; for (idx = 0; idx < i; idx++) { + descs = &ioat_chan->descs[idx]; dma_free_coherent(to_dev(ioat_chan), SZ_2M, descs->virt, descs->hw); descs->virt = NULL; diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c index 68680e4151ea..9103a0425f75 100644 --- a/drivers/dma/ioat/init.c +++ b/drivers/dma/ioat/init.c @@ -129,7 +129,7 @@ static void ioat_init_channel(struct ioatdma_device *ioat_dma, struct ioatdma_chan *ioat_chan, int idx); static void ioat_intr_quirk(struct ioatdma_device *ioat_dma); -static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma); +static void ioat_enumerate_channels(struct ioatdma_device *ioat_dma); static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma); static int ioat_dca_enabled = 1; @@ -575,7 +575,7 @@ static void ioat_dma_remove(struct ioatdma_device *ioat_dma) * ioat_enumerate_channels - find and initialize the device's channels * @ioat_dma: the ioat dma device to be enumerated */ -static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma) +static void ioat_enumerate_channels(struct ioatdma_device *ioat_dma) { struct ioatdma_chan *ioat_chan; struct device *dev = &ioat_dma->pdev->dev; @@ -594,7 +594,7 @@ static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma) xfercap_log = readb(ioat_dma->reg_base + IOAT_XFERCAP_OFFSET); xfercap_log &= 0x1f; /* bits [4:0] valid */ if (xfercap_log == 0) - return 0; + return; dev_dbg(dev, "%s: xfercap = %d\n", __func__, 1 << xfercap_log); for (i = 0; i < dma->chancnt; i++) { @@ -611,7 +611,6 @@ static int ioat_enumerate_channels(struct ioatdma_device *ioat_dma) } } dma->chancnt = i; - return i; } /** diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c index 219ae3b545db..803045c92f3b 100644 --- a/drivers/dma/k3dma.c +++ b/drivers/dma/k3dma.c @@ -222,9 +222,11 @@ static irqreturn_t k3_dma_int_handler(int irq, void *dev_id) c = p->vchan; if (c && (tc1 & BIT(i))) { spin_lock_irqsave(&c->vc.lock, flags); - vchan_cookie_complete(&p->ds_run->vd); - p->ds_done = p->ds_run; - p->ds_run = NULL; + if (p->ds_run != NULL) { + vchan_cookie_complete(&p->ds_run->vd); + p->ds_done = p->ds_run; + p->ds_run = NULL; + } spin_unlock_irqrestore(&c->vc.lock, flags); } if (c && (tc2 & BIT(i))) { @@ -264,6 +266,10 @@ static int k3_dma_start_txd(struct k3_dma_chan *c) if (BIT(c->phy->idx) & k3_dma_get_chan_stat(d)) return -EAGAIN; + /* Avoid losing track of ds_run if a transaction is in flight */ + if (c->phy->ds_run) + return -EAGAIN; + if (vd) { struct k3_dma_desc_sw *ds = container_of(vd, struct k3_dma_desc_sw, vd); diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 1993889003fd..1c57577f49fe 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -1059,6 +1059,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev, mv_chan->op_in_desc = XOR_MODE_IN_DESC; dma_dev = &mv_chan->dmadev; + dma_dev->dev = &pdev->dev; mv_chan->xordev = xordev; /* @@ -1091,7 +1092,6 @@ mv_xor_channel_add(struct mv_xor_device *xordev, dma_dev->device_free_chan_resources = mv_xor_free_chan_resources; dma_dev->device_tx_status = mv_xor_status; dma_dev->device_issue_pending = mv_xor_issue_pending; - dma_dev->dev = &pdev->dev; /* set prep routines based on capability */ if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index 8fbf175fdcc7..57c5cc51f862 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -690,7 +690,21 @@ static int bam_dma_terminate_all(struct dma_chan *chan) /* remove all transactions, including active transaction */ spin_lock_irqsave(&bchan->vc.lock, flag); + /* + * If we have transactions queued, then some might be committed to the + * hardware in the desc fifo. The only way to reset the desc fifo is + * to do a hardware reset (either by pipe or the entire block). + * bam_chan_init_hw() will trigger a pipe reset, and also reinit the + * pipe. If the pipe is left disabled (default state after pipe reset) + * and is accessed by a connected hardware engine, a fatal error in + * the BAM will occur. There is a small window where this could happen + * with bam_chan_init_hw(), but it is assumed that the caller has + * stopped activity on any attached hardware engine. Make sure to do + * this first so that the BAM hardware doesn't cause memory corruption + * by accessing freed resources. + */ if (bchan->curr_txd) { + bam_chan_init_hw(bchan, bchan->curr_txd->dir); list_add(&bchan->curr_txd->vd.node, &bchan->vc.desc_issued); bchan->curr_txd = NULL; } diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 19c7433e8309..f7ca57125ac7 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -200,6 +200,7 @@ struct rcar_dmac { struct dma_device engine; struct device *dev; void __iomem *iomem; + struct device_dma_parameters parms; unsigned int n_channels; struct rcar_dmac_chan *channels; @@ -1764,6 +1765,8 @@ static int rcar_dmac_probe(struct platform_device *pdev) dmac->dev = &pdev->dev; platform_set_drvdata(pdev, dmac); + dmac->dev->dma_parms = &dmac->parms; + dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); ret = rcar_dmac_parse_of(&pdev->dev, dmac); diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c index 7db2766b5fe9..3402494cadf9 100644 --- a/drivers/dma/tegra20-apb-dma.c +++ b/drivers/dma/tegra20-apb-dma.c @@ -288,7 +288,7 @@ static struct tegra_dma_desc *tegra_dma_desc_get( /* Do not allocate if desc are waiting for ack */ list_for_each_entry(dma_desc, &tdc->free_dma_desc, node) { - if (async_tx_test_ack(&dma_desc->txd)) { + if (async_tx_test_ack(&dma_desc->txd) && !dma_desc->cb_count) { list_del(&dma_desc->node); spin_unlock_irqrestore(&tdc->lock, flags); dma_desc->txd.flags = 0; @@ -755,10 +755,6 @@ static int tegra_dma_terminate_all(struct dma_chan *dc) bool was_busy; spin_lock_irqsave(&tdc->lock, flags); - if (list_empty(&tdc->pending_sg_req)) { - spin_unlock_irqrestore(&tdc->lock, flags); - return 0; - } if (!tdc->busy) goto skip_dma_stop; diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c index 09b6756366c3..4f4733d831a1 100644 --- a/drivers/dma/tegra210-adma.c +++ b/drivers/dma/tegra210-adma.c @@ -98,6 +98,7 @@ struct tegra_adma_chan_regs { unsigned int src_addr; unsigned int trg_addr; unsigned int fifo_ctrl; + unsigned int cmd; unsigned int tc; }; @@ -127,6 +128,7 @@ struct tegra_adma_chan { enum dma_transfer_direction sreq_dir; unsigned int sreq_index; bool sreq_reserved; + struct tegra_adma_chan_regs ch_regs; /* Transfer count and position info */ unsigned int tx_buf_count; @@ -635,8 +637,30 @@ static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec, static int tegra_adma_runtime_suspend(struct device *dev) { struct tegra_adma *tdma = dev_get_drvdata(dev); + struct tegra_adma_chan_regs *ch_reg; + struct tegra_adma_chan *tdc; + int i; tdma->global_cmd = tdma_read(tdma, ADMA_GLOBAL_CMD); + if (!tdma->global_cmd) + goto clk_disable; + + for (i = 0; i < tdma->nr_channels; i++) { + tdc = &tdma->channels[i]; + ch_reg = &tdc->ch_regs; + ch_reg->cmd = tdma_ch_read(tdc, ADMA_CH_CMD); + /* skip if channel is not active */ + if (!ch_reg->cmd) + continue; + ch_reg->tc = tdma_ch_read(tdc, ADMA_CH_TC); + ch_reg->src_addr = tdma_ch_read(tdc, ADMA_CH_LOWER_SRC_ADDR); + ch_reg->trg_addr = tdma_ch_read(tdc, ADMA_CH_LOWER_TRG_ADDR); + ch_reg->ctrl = tdma_ch_read(tdc, ADMA_CH_CTRL); + ch_reg->fifo_ctrl = tdma_ch_read(tdc, ADMA_CH_FIFO_CTRL); + ch_reg->config = tdma_ch_read(tdc, ADMA_CH_CONFIG); + } + +clk_disable: clk_disable_unprepare(tdma->ahub_clk); return 0; @@ -645,7 +669,9 @@ static int tegra_adma_runtime_suspend(struct device *dev) static int tegra_adma_runtime_resume(struct device *dev) { struct tegra_adma *tdma = dev_get_drvdata(dev); - int ret; + struct tegra_adma_chan_regs *ch_reg; + struct tegra_adma_chan *tdc; + int ret, i; ret = clk_prepare_enable(tdma->ahub_clk); if (ret) { @@ -654,6 +680,24 @@ static int tegra_adma_runtime_resume(struct device *dev) } tdma_write(tdma, ADMA_GLOBAL_CMD, tdma->global_cmd); + if (!tdma->global_cmd) + return 0; + + for (i = 0; i < tdma->nr_channels; i++) { + tdc = &tdma->channels[i]; + ch_reg = &tdc->ch_regs; + /* skip if channel was not active earlier */ + if (!ch_reg->cmd) + continue; + tdma_ch_write(tdc, ADMA_CH_TC, ch_reg->tc); + tdma_ch_write(tdc, ADMA_CH_LOWER_SRC_ADDR, ch_reg->src_addr); + tdma_ch_write(tdc, ADMA_CH_LOWER_TRG_ADDR, ch_reg->trg_addr); + tdma_ch_write(tdc, ADMA_CH_CTRL, ch_reg->ctrl); + tdma_ch_write(tdc, ADMA_CH_FIFO_CTRL, ch_reg->fifo_ctrl); + tdma_ch_write(tdc, ADMA_CH_CONFIG, ch_reg->config); + tdma_ch_write(tdc, ADMA_CH_CMD, ch_reg->cmd); + } + return 0; } @@ -700,16 +744,6 @@ static int tegra_adma_probe(struct platform_device *pdev) return PTR_ERR(tdma->ahub_clk); } - pm_runtime_enable(&pdev->dev); - - ret = pm_runtime_get_sync(&pdev->dev); - if (ret < 0) - goto rpm_disable; - - ret = tegra_adma_init(tdma); - if (ret) - goto rpm_put; - INIT_LIST_HEAD(&tdma->dma_dev.channels); for (i = 0; i < tdma->nr_channels; i++) { struct tegra_adma_chan *tdc = &tdma->channels[i]; @@ -727,6 +761,16 @@ static int tegra_adma_probe(struct platform_device *pdev) tdc->tdma = tdma; } + pm_runtime_enable(&pdev->dev); + + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) + goto rpm_disable; + + ret = tegra_adma_init(tdma); + if (ret) + goto rpm_put; + dma_cap_set(DMA_SLAVE, tdma->dma_dev.cap_mask); dma_cap_set(DMA_PRIVATE, tdma->dma_dev.cap_mask); dma_cap_set(DMA_CYCLIC, tdma->dma_dev.cap_mask); @@ -768,13 +812,13 @@ static int tegra_adma_probe(struct platform_device *pdev) dma_remove: dma_async_device_unregister(&tdma->dma_dev); -irq_dispose: - while (--i >= 0) - irq_dispose_mapping(tdma->channels[i].irq); rpm_put: pm_runtime_put_sync(&pdev->dev); rpm_disable: pm_runtime_disable(&pdev->dev); +irq_dispose: + while (--i >= 0) + irq_dispose_mapping(tdma->channels[i].irq); return ret; } diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c index 896bafb7a532..cf6588cc3efd 100644 --- a/drivers/dma/timb_dma.c +++ b/drivers/dma/timb_dma.c @@ -545,7 +545,7 @@ static struct dma_async_tx_descriptor *td_prep_slave_sg(struct dma_chan *chan, } dma_sync_single_for_device(chan2dmadev(chan), td_desc->txd.phys, - td_desc->desc_list_len, DMA_MEM_TO_DEV); + td_desc->desc_list_len, DMA_TO_DEVICE); return &td_desc->txd; } diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c index 8722bcba489d..2db352308e5c 100644 --- a/drivers/dma/xilinx/xilinx_dma.c +++ b/drivers/dma/xilinx/xilinx_dma.c @@ -72,6 +72,9 @@ #define XILINX_DMA_DMACR_CIRC_EN BIT(1) #define XILINX_DMA_DMACR_RUNSTOP BIT(0) #define XILINX_DMA_DMACR_FSYNCSRC_MASK GENMASK(6, 5) +#define XILINX_DMA_DMACR_DELAY_MASK GENMASK(31, 24) +#define XILINX_DMA_DMACR_FRAME_COUNT_MASK GENMASK(23, 16) +#define XILINX_DMA_DMACR_MASTER_MASK GENMASK(11, 8) #define XILINX_DMA_REG_DMASR 0x0004 #define XILINX_DMA_DMASR_EOL_LATE_ERR BIT(15) @@ -2057,8 +2060,10 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan, chan->config.gen_lock = cfg->gen_lock; chan->config.master = cfg->master; + dmacr &= ~XILINX_DMA_DMACR_GENLOCK_EN; if (cfg->gen_lock && chan->genlock) { dmacr |= XILINX_DMA_DMACR_GENLOCK_EN; + dmacr &= ~XILINX_DMA_DMACR_MASTER_MASK; dmacr |= cfg->master << XILINX_DMA_DMACR_MASTER_SHIFT; } @@ -2072,11 +2077,13 @@ int xilinx_vdma_channel_set_config(struct dma_chan *dchan, chan->config.delay = cfg->delay; if (cfg->coalesc <= XILINX_DMA_DMACR_FRAME_COUNT_MAX) { + dmacr &= ~XILINX_DMA_DMACR_FRAME_COUNT_MASK; dmacr |= cfg->coalesc << XILINX_DMA_DMACR_FRAME_COUNT_SHIFT; chan->config.coalesc = cfg->coalesc; } if (cfg->delay <= XILINX_DMA_DMACR_DELAY_MAX) { + dmacr &= ~XILINX_DMA_DMACR_DELAY_MASK; dmacr |= cfg->delay << XILINX_DMA_DMACR_DELAY_SHIFT; chan->config.delay = cfg->delay; } diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 40fb0e7ff8fd..b36abd253786 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2863,6 +2863,7 @@ static int init_csrows(struct mem_ctl_info *mci) dimm = csrow->channels[j]->dimm; dimm->mtype = pvt->dram_type; dimm->edac_mode = edac_mode; + dimm->grain = 64; } } diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index f7fa05fee45a..329021189c38 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -680,22 +680,18 @@ static int del_mc_from_global_list(struct mem_ctl_info *mci) struct mem_ctl_info *edac_mc_find(int idx) { - struct mem_ctl_info *mci = NULL; + struct mem_ctl_info *mci; struct list_head *item; mutex_lock(&mem_ctls_mutex); list_for_each(item, &mc_devices) { mci = list_entry(item, struct mem_ctl_info, link); - - if (mci->mc_idx >= idx) { - if (mci->mc_idx == idx) { - goto unlock; - } - break; - } + if (mci->mc_idx == idx) + goto unlock; } + mci = NULL; unlock: mutex_unlock(&mem_ctls_mutex); return mci; diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 6f80eb65c26c..acae39278669 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -187,6 +187,7 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, /* Cleans the error report buffer */ memset(e, 0, sizeof (*e)); e->error_count = 1; + e->grain = 1; strcpy(e->label, "unknown label"); e->msg = pvt->msg; e->other_detail = pvt->other_detail; @@ -282,7 +283,7 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, /* Error grain */ if (mem_err->validation_bits & CPER_MEM_VALID_PA_MASK) - e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); + e->grain = ~mem_err->physical_addr_mask + 1; /* Memory error location, mapped on e->location */ p = e->location; @@ -389,8 +390,13 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, if (p > pvt->other_detail) *(p - 1) = '\0'; + /* Sanity-check driver-supplied grain value. */ + if (WARN_ON_ONCE(!e->grain)) + e->grain = 1; + + grain_bits = fls_long(e->grain - 1); + /* Generate the trace event */ - grain_bits = fls_long(e->grain); snprintf(pvt->detail_location, sizeof(pvt->detail_location), "APEI location: %s %s", e->location, e->other_detail); trace_mc_event(type, e->msg, e->label, e->error_count, diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index b0b390a1da15..ddd5990211f8 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -2915,35 +2915,27 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, * cccc = channel * If the mask doesn't match, report an error to the parsing logic */ - if (! ((errcode & 0xef80) == 0x80)) { - optype = "Can't parse: it is not a mem"; - } else { - switch (optypenum) { - case 0: - optype = "generic undef request error"; - break; - case 1: - optype = "memory read error"; - break; - case 2: - optype = "memory write error"; - break; - case 3: - optype = "addr/cmd error"; - break; - case 4: - optype = "memory scrubbing error"; - break; - default: - optype = "reserved"; - break; - } + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + break; + default: + optype = "reserved"; + break; } - /* Only decode errors with an valid address (ADDRV) */ - if (!GET_BITFIELD(m->status, 58, 58)) - return; - if (pvt->info.type == KNIGHTS_LANDING) { if (channel == 14) { edac_dbg(0, "%s%s err_code:%04x:%04x EDRAM bank %d\n", @@ -3049,17 +3041,11 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, { struct mce *mce = (struct mce *)data; struct mem_ctl_info *mci; - struct sbridge_pvt *pvt; char *type; if (edac_get_report_status() == EDAC_REPORTING_DISABLED) return NOTIFY_DONE; - mci = get_mci_for_node_id(mce->socketid, IMC0); - if (!mci) - return NOTIFY_DONE; - pvt = mci->pvt_info; - /* * Just let mcelog handle it if the error is * outside the memory controller. A memory error @@ -3069,6 +3055,22 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, if ((mce->status & 0xefff) >> 7 != 1) return NOTIFY_DONE; + /* Check ADDRV bit in STATUS */ + if (!GET_BITFIELD(mce->status, 58, 58)) + return NOTIFY_DONE; + + /* Check MISCV bit in STATUS */ + if (!GET_BITFIELD(mce->status, 59, 59)) + return NOTIFY_DONE; + + /* Check address type in MISC (physical address only) */ + if (GET_BITFIELD(mce->misc, 6, 8) != 2) + return NOTIFY_DONE; + + mci = get_mci_for_node_id(mce->socketid, IMC0); + if (!mci) + return NOTIFY_DONE; + if (mce->mcgstatus & MCG_STATUS_MCIP) type = "Exception"; else diff --git a/drivers/edac/thunderx_edac.c b/drivers/edac/thunderx_edac.c index f35d87519a3e..dfefa39e9351 100644 --- a/drivers/edac/thunderx_edac.c +++ b/drivers/edac/thunderx_edac.c @@ -1905,7 +1905,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) default: dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n", l2c->pdev->device); - return IRQ_NONE; + goto err_free; } while (CIRC_CNT(l2c->ring_head, l2c->ring_tail, @@ -1927,7 +1927,7 @@ static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) l2c->ring_tail++; } - return IRQ_HANDLED; + ret = IRQ_HANDLED; err_free: kfree(other); diff --git a/drivers/extcon/extcon-intel-cht-wc.c b/drivers/extcon/extcon-intel-cht-wc.c index 60baaf693103..c562d9d69ae3 100644 --- a/drivers/extcon/extcon-intel-cht-wc.c +++ b/drivers/extcon/extcon-intel-cht-wc.c @@ -155,7 +155,7 @@ static int cht_wc_extcon_get_charger(struct cht_wc_extcon_data *ext, dev_warn(ext->dev, "Unhandled charger type %d, defaulting to SDP\n", ret); - /* Fall through, treat as SDP */ + return EXTCON_CHG_USB_SDP; case CHT_WC_USBSRC_TYPE_SDP: case CHT_WC_USBSRC_TYPE_FLOAT_DP_DN: case CHT_WC_USBSRC_TYPE_OTHER: diff --git a/drivers/extcon/extcon-max8997.c b/drivers/extcon/extcon-max8997.c index 4a0612fb9c07..b9b48d45a6dc 100644 --- a/drivers/extcon/extcon-max8997.c +++ b/drivers/extcon/extcon-max8997.c @@ -321,12 +321,10 @@ static int max8997_muic_handle_usb(struct max8997_muic_info *info, { int ret = 0; - if (usb_type == MAX8997_USB_HOST) { - ret = max8997_muic_set_path(info, info->path_usb, attached); - if (ret < 0) { - dev_err(info->dev, "failed to update muic register\n"); - return ret; - } + ret = max8997_muic_set_path(info, info->path_usb, attached); + if (ret < 0) { + dev_err(info->dev, "failed to update muic register\n"); + return ret; } switch (usb_type) { diff --git a/drivers/extcon/extcon-sm5502.c b/drivers/extcon/extcon-sm5502.c index 106ef0297b53..1a1ee3db3455 100644 --- a/drivers/extcon/extcon-sm5502.c +++ b/drivers/extcon/extcon-sm5502.c @@ -69,6 +69,10 @@ struct sm5502_muic_info { /* Default value of SM5502 register to bring up MUIC device. */ static struct reg_data sm5502_reg_data[] = { { + .reg = SM5502_REG_RESET, + .val = SM5502_REG_RESET_MASK, + .invert = true, + }, { .reg = SM5502_REG_CONTROL, .val = SM5502_REG_CONTROL_MASK_INT_MASK, .invert = false, diff --git a/drivers/extcon/extcon-sm5502.h b/drivers/extcon/extcon-sm5502.h index 974b53222f56..12f8b01e5753 100644 --- a/drivers/extcon/extcon-sm5502.h +++ b/drivers/extcon/extcon-sm5502.h @@ -241,6 +241,8 @@ enum sm5502_reg { #define DM_DP_SWITCH_UART ((DM_DP_CON_SWITCH_UART <hh_data + HH_DATA_OFF(sizeof(*h))); h->h_proto = type; memcpy(h->h_dest, neigh->ha, net->addr_len); - hh->hh_len = FWNET_HLEN; + + /* Pairs with the READ_ONCE() in neigh_resolve_output(), + * neigh_hh_output() and neigh_update_hhs(). + */ + smp_store_release(&hh->hh_len, FWNET_HLEN); return 0; } diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c index 2f452f1f7c8a..53f27a6e2d76 100644 --- a/drivers/firmware/dell_rbu.c +++ b/drivers/firmware/dell_rbu.c @@ -45,6 +45,7 @@ #include #include #include +#include MODULE_AUTHOR("Abhay Salunke "); MODULE_DESCRIPTION("Driver for updating BIOS image on DELL systems"); @@ -181,6 +182,11 @@ static int create_packet(void *data, size_t length) packet_data_temp_buf = NULL; } } + /* + * set to uncachable or it may never get written back before reboot + */ + set_memory_uc((unsigned long)packet_data_temp_buf, 1 << ordernum); + spin_lock(&rbu_data.lock); newpacket->data = packet_data_temp_buf; @@ -349,6 +355,8 @@ static void packet_empty_list(void) * to make sure there are no stale RBU packets left in memory */ memset(newpacket->data, 0, rbu_data.packetsize); + set_memory_wb((unsigned long)newpacket->data, + 1 << newpacket->ordernum); free_pages((unsigned long) newpacket->data, newpacket->ordernum); kfree(newpacket); diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index db404aab82b2..209dc5aefc31 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -498,7 +498,7 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx, pcie->device_id.vendor_id, pcie->device_id.device_id); p = pcie->device_id.class_code; - printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]); + printk("%s""class_code: %02x%02x%02x\n", pfx, p[2], p[1], p[0]); } if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER) printk("%s""serial number: 0x%04x, 0x%04x\n", pfx, diff --git a/drivers/firmware/efi/efivars.c b/drivers/firmware/efi/efivars.c index 3e626fd9bd4e..1c65f5ac4368 100644 --- a/drivers/firmware/efi/efivars.c +++ b/drivers/firmware/efi/efivars.c @@ -139,13 +139,16 @@ static ssize_t efivar_attr_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); char *str = buf; + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; if (var->Attributes & EFI_VARIABLE_NON_VOLATILE) @@ -172,13 +175,16 @@ static ssize_t efivar_size_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); char *str = buf; + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; str += sprintf(str, "0x%lx\n", var->DataSize); @@ -189,12 +195,15 @@ static ssize_t efivar_data_read(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; + unsigned long size = sizeof(var->Data); + int ret; if (!entry || !buf) return -EINVAL; - var->DataSize = 1024; - if (efivar_entry_get(entry, &var->Attributes, &var->DataSize, var->Data)) + ret = efivar_entry_get(entry, &var->Attributes, &size, var->Data); + var->DataSize = size; + if (ret) return -EIO; memcpy(buf, var->Data, var->DataSize); @@ -263,6 +272,9 @@ efivar_store_raw(struct efivar_entry *entry, const char *buf, size_t count) u8 *data; int err; + if (!entry || !buf) + return -EINVAL; + if (is_compat()) { struct compat_efi_variable *compat; @@ -314,14 +326,16 @@ efivar_show_raw(struct efivar_entry *entry, char *buf) { struct efi_variable *var = &entry->var; struct compat_efi_variable *compat; + unsigned long datasize = sizeof(var->Data); size_t size; + int ret; if (!entry || !buf) return 0; - var->DataSize = 1024; - if (efivar_entry_get(entry, &entry->var.Attributes, - &entry->var.DataSize, entry->var.Data)) + ret = efivar_entry_get(entry, &var->Attributes, &datasize, var->Data); + var->DataSize = datasize; + if (ret) return -EIO; if (is_compat()) { diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c index 24c461dea7af..fd8053f9556e 100644 --- a/drivers/firmware/efi/libstub/gop.c +++ b/drivers/firmware/efi/libstub/gop.c @@ -85,30 +85,6 @@ setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, } } -static efi_status_t -__gop_query32(efi_system_table_t *sys_table_arg, - struct efi_graphics_output_protocol_32 *gop32, - struct efi_graphics_output_mode_info **info, - unsigned long *size, u64 *fb_base) -{ - struct efi_graphics_output_protocol_mode_32 *mode; - efi_graphics_output_protocol_query_mode query_mode; - efi_status_t status; - unsigned long m; - - m = gop32->mode; - mode = (struct efi_graphics_output_protocol_mode_32 *)m; - query_mode = (void *)(unsigned long)gop32->query_mode; - - status = __efi_call_early(query_mode, (void *)gop32, mode->mode, size, - info); - if (status != EFI_SUCCESS) - return status; - - *fb_base = mode->frame_buffer_base; - return status; -} - static efi_status_t setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_guid_t *proto, unsigned long size, void **gop_handle) @@ -121,7 +97,7 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, u64 fb_base; struct efi_pixel_bitmask pixel_info; int pixel_format; - efi_status_t status = EFI_NOT_FOUND; + efi_status_t status; u32 *handles = (u32 *)(unsigned long)gop_handle; int i; @@ -130,6 +106,7 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, nr_gops = size / sizeof(u32); for (i = 0; i < nr_gops; i++) { + struct efi_graphics_output_protocol_mode_32 *mode; struct efi_graphics_output_mode_info *info = NULL; efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; bool conout_found = false; @@ -147,9 +124,11 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, if (status == EFI_SUCCESS) conout_found = true; - status = __gop_query32(sys_table_arg, gop32, &info, &size, - ¤t_fb_base); - if (status == EFI_SUCCESS && (!first_gop || conout_found) && + mode = (void *)(unsigned long)gop32->mode; + info = (void *)(unsigned long)mode->info; + current_fb_base = mode->frame_buffer_base; + + if ((!first_gop || conout_found) && info->pixel_format != PIXEL_BLT_ONLY) { /* * Systems that use the UEFI Console Splitter may @@ -177,7 +156,7 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, /* Did we find any GOPs? */ if (!first_gop) - goto out; + return EFI_NOT_FOUND; /* EFI framebuffer */ si->orig_video_isVGA = VIDEO_TYPE_EFI; @@ -199,32 +178,8 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si, si->lfb_size = si->lfb_linelength * si->lfb_height; si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; -out: - return status; -} -static efi_status_t -__gop_query64(efi_system_table_t *sys_table_arg, - struct efi_graphics_output_protocol_64 *gop64, - struct efi_graphics_output_mode_info **info, - unsigned long *size, u64 *fb_base) -{ - struct efi_graphics_output_protocol_mode_64 *mode; - efi_graphics_output_protocol_query_mode query_mode; - efi_status_t status; - unsigned long m; - - m = gop64->mode; - mode = (struct efi_graphics_output_protocol_mode_64 *)m; - query_mode = (void *)(unsigned long)gop64->query_mode; - - status = __efi_call_early(query_mode, (void *)gop64, mode->mode, size, - info); - if (status != EFI_SUCCESS) - return status; - - *fb_base = mode->frame_buffer_base; - return status; + return EFI_SUCCESS; } static efi_status_t @@ -239,7 +194,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, u64 fb_base; struct efi_pixel_bitmask pixel_info; int pixel_format; - efi_status_t status = EFI_NOT_FOUND; + efi_status_t status; u64 *handles = (u64 *)(unsigned long)gop_handle; int i; @@ -248,6 +203,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, nr_gops = size / sizeof(u64); for (i = 0; i < nr_gops; i++) { + struct efi_graphics_output_protocol_mode_64 *mode; struct efi_graphics_output_mode_info *info = NULL; efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; bool conout_found = false; @@ -265,9 +221,11 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, if (status == EFI_SUCCESS) conout_found = true; - status = __gop_query64(sys_table_arg, gop64, &info, &size, - ¤t_fb_base); - if (status == EFI_SUCCESS && (!first_gop || conout_found) && + mode = (void *)(unsigned long)gop64->mode; + info = (void *)(unsigned long)mode->info; + current_fb_base = mode->frame_buffer_base; + + if ((!first_gop || conout_found) && info->pixel_format != PIXEL_BLT_ONLY) { /* * Systems that use the UEFI Console Splitter may @@ -295,7 +253,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, /* Did we find any GOPs? */ if (!first_gop) - goto out; + return EFI_NOT_FOUND; /* EFI framebuffer */ si->orig_video_isVGA = VIDEO_TYPE_EFI; @@ -317,8 +275,8 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si, si->lfb_size = si->lfb_linelength * si->lfb_height; si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; -out: - return status; + + return EFI_SUCCESS; } /* diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c index c8f169bf2e27..62337be07afc 100644 --- a/drivers/firmware/google/gsmi.c +++ b/drivers/firmware/google/gsmi.c @@ -480,11 +480,10 @@ static ssize_t eventlog_write(struct file *filp, struct kobject *kobj, if (count < sizeof(u32)) return -EINVAL; param.type = *(u32 *)buf; - count -= sizeof(u32); buf += sizeof(u32); /* The remaining buffer is the data payload */ - if (count > gsmi_dev.data_buf->length) + if ((count - sizeof(u32)) > gsmi_dev.data_buf->length) return -EINVAL; param.data_len = count - sizeof(u32); @@ -504,7 +503,7 @@ static ssize_t eventlog_write(struct file *filp, struct kobject *kobj, spin_unlock_irqrestore(&gsmi_dev.lock, flags); - return rc; + return (rc == 0) ? count : rc; } diff --git a/drivers/firmware/qcom_scm-64.c b/drivers/firmware/qcom_scm-64.c index 6e6d561708e2..e9001075f676 100644 --- a/drivers/firmware/qcom_scm-64.c +++ b/drivers/firmware/qcom_scm-64.c @@ -158,7 +158,7 @@ static int qcom_scm_call(struct device *dev, u32 svc_id, u32 cmd_id, kfree(args_virt); } - if (res->a0 < 0) + if ((long)res->a0 < 0) return qcom_scm_remap_error(res->a0); return 0; diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c index 4ea63d9bd131..8feca59c1f6b 100644 --- a/drivers/fsi/fsi-core.c +++ b/drivers/fsi/fsi-core.c @@ -419,6 +419,31 @@ static int fsi_slave_scan(struct fsi_slave *slave) return 0; } +static unsigned long aligned_access_size(size_t offset, size_t count) +{ + unsigned long offset_unit, count_unit; + + /* Criteria: + * + * 1. Access size must be less than or equal to the maximum access + * width or the highest power-of-two factor of offset + * 2. Access size must be less than or equal to the amount specified by + * count + * + * The access width is optimal if we can calculate 1 to be strictly + * equal while still satisfying 2. + */ + + /* Find 1 by the bottom bit of offset (with a 4 byte access cap) */ + offset_unit = BIT(__builtin_ctzl(offset | 4)); + + /* Find 2 by the top bit of count */ + count_unit = BIT(8 * sizeof(unsigned long) - 1 - __builtin_clzl(count)); + + /* Constrain the maximum access width to the minimum of both criteria */ + return BIT(__builtin_ctzl(offset_unit | count_unit)); +} + static ssize_t fsi_slave_sysfs_raw_read(struct file *file, struct kobject *kobj, struct bin_attribute *attr, char *buf, loff_t off, size_t count) @@ -434,8 +459,7 @@ static ssize_t fsi_slave_sysfs_raw_read(struct file *file, return -EINVAL; for (total_len = 0; total_len < count; total_len += read_len) { - read_len = min_t(size_t, count, 4); - read_len -= off & 0x3; + read_len = aligned_access_size(off, count - total_len); rc = fsi_slave_read(slave, off, buf + total_len, read_len); if (rc) @@ -462,8 +486,7 @@ static ssize_t fsi_slave_sysfs_raw_write(struct file *file, return -EINVAL; for (total_len = 0; total_len < count; total_len += write_len) { - write_len = min_t(size_t, count, 4); - write_len -= off & 0x3; + write_len = aligned_access_size(off, count - total_len); rc = fsi_slave_write(slave, off, buf + total_len, write_len); if (rc) diff --git a/drivers/gnss/Kconfig b/drivers/gnss/Kconfig new file mode 100644 index 000000000000..db79453a975d --- /dev/null +++ b/drivers/gnss/Kconfig @@ -0,0 +1,33 @@ +# +# GNSS receiver configuration +# + +menuconfig GNSS + tristate "GNSS receiver support" + ---help--- + Say Y here if you have a GNSS receiver (e.g. a GPS receiver). + + To compile this driver as a module, choose M here: the module will + be called gnss. + +if GNSS + +config GNSS_SERIAL + tristate + +config GNSS_CMDLINE_SERIAL + tristate "Command line test driver for GNSS" + depends on SERIAL_DEV_BUS + select GNSS_SERIAL + ---help--- + Say Y here if you want to test the GNSS subsystem but do not have a + way to communicate a binding through firmware such as DT or ACPI. + The correct serdev device and protocol type must be specified on + the module command line. + + To compile this driver as a module, choose M here: the module will + be called gnss-cmdline. + + If unsure, say N. + +endif # GNSS diff --git a/drivers/gnss/Makefile b/drivers/gnss/Makefile new file mode 100644 index 000000000000..f637e90527d6 --- /dev/null +++ b/drivers/gnss/Makefile @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the GNSS subsystem. +# + +obj-$(CONFIG_GNSS) += gnss.o +gnss-y := core.o + +obj-$(CONFIG_GNSS_SERIAL) += gnss-serial.o +gnss-serial-y := serial.o + +obj-$(CONFIG_GNSS_CMDLINE_SERIAL) += gnss-cmdline.o +gnss-cmdline-y := cmdline.o diff --git a/drivers/gnss/cmdline.c b/drivers/gnss/cmdline.c new file mode 100644 index 000000000000..3e1d24636b8c --- /dev/null +++ b/drivers/gnss/cmdline.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test driver for GNSS. This driver requires the serdev binding and protocol + * type to be specified on the module command line. + * + * Copyright 2019 Google LLC + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "serial.h" + +#define GNSS_CMDLINE_MODULE_NAME "gnss-cmdline" + +#define gnss_cmdline_err(...) \ + pr_err(GNSS_CMDLINE_MODULE_NAME ": " __VA_ARGS__) + +static char *serdev; +module_param(serdev, charp, 0644); +MODULE_PARM_DESC(serdev, "serial device to wrap"); + +static int type; +module_param(type, int, 0644); +MODULE_PARM_DESC(serdev, "GNSS protocol type (see 'enum gnss_type')"); + +static struct serdev_device *serdev_device; + +static int name_match(struct device *dev, void *data) +{ + return strstr(dev_name(dev), data) != NULL; +} + +static int __init gnss_cmdline_init(void) +{ + struct device *serial_dev, *port_dev, *serdev_dev; + char *driver_name, *port_name, *serdev_name; + char *serdev_dup, *serdev_dup_sep; + struct gnss_serial *gserial; + int err = -ENODEV; + + /* User did not set the serdev module parameter */ + if (!serdev) + return 0; + + if (type < 0 || type >= GNSS_TYPE_COUNT) { + gnss_cmdline_err("invalid gnss type '%d'\n", type); + return -EINVAL; + } + + serdev_dup = serdev_dup_sep = kstrdup(serdev, GFP_KERNEL); + if (!serdev_dup) + return -ENOMEM; + + driver_name = strsep(&serdev_dup_sep, "/"); + if (!driver_name) { + gnss_cmdline_err("driver name missing\n"); + goto err_free_serdev_dup; + } + + port_name = strsep(&serdev_dup_sep, "/"); + if (!port_name) { + gnss_cmdline_err("port name missing\n"); + goto err_free_serdev_dup; + } + + serdev_name = strsep(&serdev_dup_sep, "/"); + if (!serdev_name) { + gnss_cmdline_err("serdev name missing\n"); + goto err_free_serdev_dup; + } + + /* Find the driver device instance (e.g. serial8250) */ + serial_dev = bus_find_device_by_name(&platform_bus_type, + NULL, driver_name); + if (!serial_dev) { + gnss_cmdline_err("no device '%s'\n", driver_name); + goto err_free_serdev_dup; + } + + /* Find the port device instance (e.g. serial0) */ + port_dev = device_find_child(serial_dev, port_name, name_match); + if (!port_dev) { + gnss_cmdline_err("no port '%s'\n", port_name); + goto err_free_serdev_dup; + } + + /* Find the serdev device instance (e.g. serial0-0) */ + serdev_dev = device_find_child(port_dev, serdev_name, name_match); + if (!serdev_dev) { + gnss_cmdline_err("no serdev '%s'\n", serdev_name); + goto err_free_serdev_dup; + } + + gserial = gnss_serial_allocate(to_serdev_device(serdev_dev), 0); + if (IS_ERR(gserial)) { + err = PTR_ERR(gserial); + goto err_free_serdev_dup; + } + + gserial->gdev->type = type; + + err = gnss_serial_register(gserial); + if (err) { + gnss_serial_free(gserial); + goto err_free_serdev_dup; + } + + serdev_device = to_serdev_device(serdev_dev); + err = 0; +err_free_serdev_dup: + kfree(serdev_dup); + return err; +} + +static void __exit gnss_cmdline_exit(void) +{ + struct gnss_serial *gserial; + + if (!serdev_device) + return; + + gserial = serdev_device_get_drvdata(serdev_device); + + gnss_serial_deregister(gserial); + gnss_serial_free(gserial); +} + +module_init(gnss_cmdline_init); +module_exit(gnss_cmdline_exit); + +MODULE_AUTHOR("Alistair Delva "); +MODULE_DESCRIPTION("GNSS command line driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gnss/core.c b/drivers/gnss/core.c new file mode 100644 index 000000000000..0adf55e748e4 --- /dev/null +++ b/drivers/gnss/core.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * GNSS receiver core + * + * Copyright (C) 2018 Johan Hovold + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define GNSS_FLAG_HAS_WRITE_RAW BIT(0) + +#define GNSS_MINORS 16 + +static DEFINE_IDA(gnss_minors); +static dev_t gnss_first; + +/* FIFO size must be a power of two */ +#define GNSS_READ_FIFO_SIZE 4096 +#define GNSS_WRITE_BUF_SIZE 1024 + +#define to_gnss_device(d) container_of((d), struct gnss_device, dev) + +static int gnss_open(struct inode *inode, struct file *file) +{ + struct gnss_device *gdev; + int ret = 0; + + gdev = container_of(inode->i_cdev, struct gnss_device, cdev); + + get_device(&gdev->dev); + + nonseekable_open(inode, file); + file->private_data = gdev; + + down_write(&gdev->rwsem); + if (gdev->disconnected) { + ret = -ENODEV; + goto unlock; + } + + if (gdev->count++ == 0) { + ret = gdev->ops->open(gdev); + if (ret) + gdev->count--; + } +unlock: + up_write(&gdev->rwsem); + + if (ret) + put_device(&gdev->dev); + + return ret; +} + +static int gnss_release(struct inode *inode, struct file *file) +{ + struct gnss_device *gdev = file->private_data; + + down_write(&gdev->rwsem); + if (gdev->disconnected) + goto unlock; + + if (--gdev->count == 0) { + gdev->ops->close(gdev); + kfifo_reset(&gdev->read_fifo); + } +unlock: + up_write(&gdev->rwsem); + + put_device(&gdev->dev); + + return 0; +} + +static ssize_t gnss_read(struct file *file, char __user *buf, + size_t count, loff_t *pos) +{ + struct gnss_device *gdev = file->private_data; + unsigned int copied; + int ret; + + mutex_lock(&gdev->read_mutex); + while (kfifo_is_empty(&gdev->read_fifo)) { + mutex_unlock(&gdev->read_mutex); + + if (gdev->disconnected) + return 0; + + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + + ret = wait_event_interruptible(gdev->read_queue, + gdev->disconnected || + !kfifo_is_empty(&gdev->read_fifo)); + if (ret) + return -ERESTARTSYS; + + mutex_lock(&gdev->read_mutex); + } + + ret = kfifo_to_user(&gdev->read_fifo, buf, count, &copied); + if (ret == 0) + ret = copied; + + mutex_unlock(&gdev->read_mutex); + + return ret; +} + +static ssize_t gnss_write(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + struct gnss_device *gdev = file->private_data; + size_t written = 0; + int ret; + + if (gdev->disconnected) + return -EIO; + + if (!count) + return 0; + + if (!(gdev->flags & GNSS_FLAG_HAS_WRITE_RAW)) + return -EIO; + + /* Ignoring O_NONBLOCK, write_raw() is synchronous. */ + + ret = mutex_lock_interruptible(&gdev->write_mutex); + if (ret) + return -ERESTARTSYS; + + for (;;) { + size_t n = count - written; + + if (n > GNSS_WRITE_BUF_SIZE) + n = GNSS_WRITE_BUF_SIZE; + + if (copy_from_user(gdev->write_buf, buf, n)) { + ret = -EFAULT; + goto out_unlock; + } + + /* + * Assumes write_raw can always accept GNSS_WRITE_BUF_SIZE + * bytes. + * + * FIXME: revisit + */ + down_read(&gdev->rwsem); + if (!gdev->disconnected) + ret = gdev->ops->write_raw(gdev, gdev->write_buf, n); + else + ret = -EIO; + up_read(&gdev->rwsem); + + if (ret < 0) + break; + + written += ret; + buf += ret; + + if (written == count) + break; + } + + if (written) + ret = written; +out_unlock: + mutex_unlock(&gdev->write_mutex); + + return ret; +} + +static unsigned int gnss_poll(struct file *file, poll_table *wait) +{ + struct gnss_device *gdev = file->private_data; + unsigned int mask = 0; + + poll_wait(file, &gdev->read_queue, wait); + + if (!kfifo_is_empty(&gdev->read_fifo)) + mask |= POLLIN | POLLRDNORM; + if (gdev->disconnected) + mask |= POLLHUP; + + return mask; +} + +static const struct file_operations gnss_fops = { + .owner = THIS_MODULE, + .open = gnss_open, + .release = gnss_release, + .read = gnss_read, + .write = gnss_write, + .poll = gnss_poll, + .llseek = no_llseek, +}; + +static struct class *gnss_class; + +static void gnss_device_release(struct device *dev) +{ + struct gnss_device *gdev = to_gnss_device(dev); + + kfree(gdev->write_buf); + kfifo_free(&gdev->read_fifo); + ida_simple_remove(&gnss_minors, gdev->id); + kfree(gdev); +} + +struct gnss_device *gnss_allocate_device(struct device *parent) +{ + struct gnss_device *gdev; + struct device *dev; + int id; + int ret; + + gdev = kzalloc(sizeof(*gdev), GFP_KERNEL); + if (!gdev) + return NULL; + + id = ida_simple_get(&gnss_minors, 0, GNSS_MINORS, GFP_KERNEL); + if (id < 0) { + kfree(gdev); + return NULL; + } + + gdev->id = id; + + dev = &gdev->dev; + device_initialize(dev); + dev->devt = gnss_first + id; + dev->class = gnss_class; + dev->parent = parent; + dev->release = gnss_device_release; + dev_set_drvdata(dev, gdev); + dev_set_name(dev, "gnss%d", id); + + init_rwsem(&gdev->rwsem); + mutex_init(&gdev->read_mutex); + mutex_init(&gdev->write_mutex); + init_waitqueue_head(&gdev->read_queue); + + ret = kfifo_alloc(&gdev->read_fifo, GNSS_READ_FIFO_SIZE, GFP_KERNEL); + if (ret) + goto err_put_device; + + gdev->write_buf = kzalloc(GNSS_WRITE_BUF_SIZE, GFP_KERNEL); + if (!gdev->write_buf) + goto err_put_device; + + cdev_init(&gdev->cdev, &gnss_fops); + gdev->cdev.owner = THIS_MODULE; + + return gdev; + +err_put_device: + put_device(dev); + + return NULL; +} +EXPORT_SYMBOL_GPL(gnss_allocate_device); + +void gnss_put_device(struct gnss_device *gdev) +{ + put_device(&gdev->dev); +} +EXPORT_SYMBOL_GPL(gnss_put_device); + +int gnss_register_device(struct gnss_device *gdev) +{ + int ret; + + /* Set a flag which can be accessed without holding the rwsem. */ + if (gdev->ops->write_raw != NULL) + gdev->flags |= GNSS_FLAG_HAS_WRITE_RAW; + + ret = cdev_device_add(&gdev->cdev, &gdev->dev); + if (ret) { + dev_err(&gdev->dev, "failed to add device: %d\n", ret); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(gnss_register_device); + +void gnss_deregister_device(struct gnss_device *gdev) +{ + down_write(&gdev->rwsem); + gdev->disconnected = true; + if (gdev->count) { + wake_up_interruptible(&gdev->read_queue); + gdev->ops->close(gdev); + } + up_write(&gdev->rwsem); + + cdev_device_del(&gdev->cdev, &gdev->dev); +} +EXPORT_SYMBOL_GPL(gnss_deregister_device); + +/* + * Caller guarantees serialisation. + * + * Must not be called for a closed device. + */ +int gnss_insert_raw(struct gnss_device *gdev, const unsigned char *buf, + size_t count) +{ + int ret; + + ret = kfifo_in(&gdev->read_fifo, buf, count); + + wake_up_interruptible(&gdev->read_queue); + + return ret; +} +EXPORT_SYMBOL_GPL(gnss_insert_raw); + +static const char * const gnss_type_names[GNSS_TYPE_COUNT] = { + [GNSS_TYPE_NMEA] = "NMEA", + [GNSS_TYPE_SIRF] = "SiRF", + [GNSS_TYPE_UBX] = "UBX", +}; + +static const char *gnss_type_name(struct gnss_device *gdev) +{ + const char *name = NULL; + + if (gdev->type < GNSS_TYPE_COUNT) + name = gnss_type_names[gdev->type]; + + if (!name) + dev_WARN(&gdev->dev, "type name not defined\n"); + + return name; +} + +static ssize_t type_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct gnss_device *gdev = to_gnss_device(dev); + + return sprintf(buf, "%s\n", gnss_type_name(gdev)); +} +static DEVICE_ATTR_RO(type); + +static struct attribute *gnss_attrs[] = { + &dev_attr_type.attr, + NULL, +}; +ATTRIBUTE_GROUPS(gnss); + +static int gnss_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct gnss_device *gdev = to_gnss_device(dev); + int ret; + + ret = add_uevent_var(env, "GNSS_TYPE=%s", gnss_type_name(gdev)); + if (ret) + return ret; + + return 0; +} + +static int __init gnss_module_init(void) +{ + int ret; + + ret = alloc_chrdev_region(&gnss_first, 0, GNSS_MINORS, "gnss"); + if (ret < 0) { + pr_err("failed to allocate device numbers: %d\n", ret); + return ret; + } + + gnss_class = class_create(THIS_MODULE, "gnss"); + if (IS_ERR(gnss_class)) { + ret = PTR_ERR(gnss_class); + pr_err("failed to create class: %d\n", ret); + goto err_unregister_chrdev; + } + + gnss_class->dev_groups = gnss_groups; + gnss_class->dev_uevent = gnss_uevent; + + pr_info("GNSS driver registered with major %d\n", MAJOR(gnss_first)); + + return 0; + +err_unregister_chrdev: + unregister_chrdev_region(gnss_first, GNSS_MINORS); + + return ret; +} +module_init(gnss_module_init); + +static void __exit gnss_module_exit(void) +{ + class_destroy(gnss_class); + unregister_chrdev_region(gnss_first, GNSS_MINORS); + ida_destroy(&gnss_minors); +} +module_exit(gnss_module_exit); + +MODULE_AUTHOR("Johan Hovold "); +MODULE_DESCRIPTION("GNSS receiver core"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gnss/serial.c b/drivers/gnss/serial.c new file mode 100644 index 000000000000..def64b36d994 --- /dev/null +++ b/drivers/gnss/serial.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generic serial GNSS receiver driver + * + * Copyright (C) 2018 Johan Hovold + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "serial.h" + +static int gnss_serial_open(struct gnss_device *gdev) +{ + struct gnss_serial *gserial = gnss_get_drvdata(gdev); + struct serdev_device *serdev = gserial->serdev; + int ret; + + ret = serdev_device_open(serdev); + if (ret) + return ret; + + serdev_device_set_baudrate(serdev, gserial->speed); + serdev_device_set_flow_control(serdev, false); + + ret = pm_runtime_get_sync(&serdev->dev); + if (ret < 0) { + pm_runtime_put_noidle(&serdev->dev); + goto err_close; + } + + return 0; + +err_close: + serdev_device_close(serdev); + + return ret; +} + +static void gnss_serial_close(struct gnss_device *gdev) +{ + struct gnss_serial *gserial = gnss_get_drvdata(gdev); + struct serdev_device *serdev = gserial->serdev; + + serdev_device_close(serdev); + + pm_runtime_put(&serdev->dev); +} + +static int gnss_serial_write_raw(struct gnss_device *gdev, + const unsigned char *buf, size_t count) +{ + struct gnss_serial *gserial = gnss_get_drvdata(gdev); + struct serdev_device *serdev = gserial->serdev; + int ret; + + /* write is only buffered synchronously */ + ret = serdev_device_write(serdev, buf, count, MAX_SCHEDULE_TIMEOUT); + if (ret < 0 || ret < count) + return ret; + + /* FIXME: determine if interrupted? */ + serdev_device_wait_until_sent(serdev, 0); + + return count; +} + +static const struct gnss_operations gnss_serial_gnss_ops = { + .open = gnss_serial_open, + .close = gnss_serial_close, + .write_raw = gnss_serial_write_raw, +}; + +static int gnss_serial_receive_buf(struct serdev_device *serdev, + const unsigned char *buf, size_t count) +{ + struct gnss_serial *gserial = serdev_device_get_drvdata(serdev); + struct gnss_device *gdev = gserial->gdev; + + return gnss_insert_raw(gdev, buf, count); +} + +static const struct serdev_device_ops gnss_serial_serdev_ops = { + .receive_buf = gnss_serial_receive_buf, + .write_wakeup = serdev_device_write_wakeup, +}; + +static int gnss_serial_set_power(struct gnss_serial *gserial, + enum gnss_serial_pm_state state) +{ + if (!gserial->ops || !gserial->ops->set_power) + return 0; + + return gserial->ops->set_power(gserial, state); +} + +/* + * FIXME: need to provide subdriver defaults or separate dt parsing from + * allocation. + */ +static int gnss_serial_parse_dt(struct serdev_device *serdev) +{ + struct gnss_serial *gserial = serdev_device_get_drvdata(serdev); + struct device_node *node = serdev->dev.of_node; + u32 speed = 4800; + + of_property_read_u32(node, "current-speed", &speed); + + gserial->speed = speed; + + return 0; +} + +struct gnss_serial *gnss_serial_allocate(struct serdev_device *serdev, + size_t data_size) +{ + struct gnss_serial *gserial; + struct gnss_device *gdev; + int ret; + + gserial = kzalloc(sizeof(*gserial) + data_size, GFP_KERNEL); + if (!gserial) + return ERR_PTR(-ENOMEM); + + gdev = gnss_allocate_device(&serdev->dev); + if (!gdev) { + ret = -ENOMEM; + goto err_free_gserial; + } + + gdev->ops = &gnss_serial_gnss_ops; + gnss_set_drvdata(gdev, gserial); + + gserial->serdev = serdev; + gserial->gdev = gdev; + + serdev_device_set_drvdata(serdev, gserial); + serdev_device_set_client_ops(serdev, &gnss_serial_serdev_ops); + + ret = gnss_serial_parse_dt(serdev); + if (ret) + goto err_put_device; + + return gserial; + +err_put_device: + gnss_put_device(gserial->gdev); +err_free_gserial: + kfree(gserial); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(gnss_serial_allocate); + +void gnss_serial_free(struct gnss_serial *gserial) +{ + gnss_put_device(gserial->gdev); + kfree(gserial); +}; +EXPORT_SYMBOL_GPL(gnss_serial_free); + +int gnss_serial_register(struct gnss_serial *gserial) +{ + struct serdev_device *serdev = gserial->serdev; + int ret; + + if (IS_ENABLED(CONFIG_PM)) { + pm_runtime_enable(&serdev->dev); + } else { + ret = gnss_serial_set_power(gserial, GNSS_SERIAL_ACTIVE); + if (ret < 0) + return ret; + } + + ret = gnss_register_device(gserial->gdev); + if (ret) + goto err_disable_rpm; + + return 0; + +err_disable_rpm: + if (IS_ENABLED(CONFIG_PM)) + pm_runtime_disable(&serdev->dev); + else + gnss_serial_set_power(gserial, GNSS_SERIAL_OFF); + + return ret; +} +EXPORT_SYMBOL_GPL(gnss_serial_register); + +void gnss_serial_deregister(struct gnss_serial *gserial) +{ + struct serdev_device *serdev = gserial->serdev; + + gnss_deregister_device(gserial->gdev); + + if (IS_ENABLED(CONFIG_PM)) + pm_runtime_disable(&serdev->dev); + else + gnss_serial_set_power(gserial, GNSS_SERIAL_OFF); +} +EXPORT_SYMBOL_GPL(gnss_serial_deregister); + +#ifdef CONFIG_PM +static int gnss_serial_runtime_suspend(struct device *dev) +{ + struct gnss_serial *gserial = dev_get_drvdata(dev); + + return gnss_serial_set_power(gserial, GNSS_SERIAL_STANDBY); +} + +static int gnss_serial_runtime_resume(struct device *dev) +{ + struct gnss_serial *gserial = dev_get_drvdata(dev); + + return gnss_serial_set_power(gserial, GNSS_SERIAL_ACTIVE); +} +#endif /* CONFIG_PM */ + +static int gnss_serial_prepare(struct device *dev) +{ + if (pm_runtime_suspended(dev)) + return 1; + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int gnss_serial_suspend(struct device *dev) +{ + struct gnss_serial *gserial = dev_get_drvdata(dev); + int ret = 0; + + /* + * FIXME: serdev currently lacks support for managing the underlying + * device's wakeup settings. A workaround would be to close the serdev + * device here if it is open. + */ + + if (!pm_runtime_suspended(dev)) + ret = gnss_serial_set_power(gserial, GNSS_SERIAL_STANDBY); + + return ret; +} + +static int gnss_serial_resume(struct device *dev) +{ + struct gnss_serial *gserial = dev_get_drvdata(dev); + int ret = 0; + + if (!pm_runtime_suspended(dev)) + ret = gnss_serial_set_power(gserial, GNSS_SERIAL_ACTIVE); + + return ret; +} +#endif /* CONFIG_PM_SLEEP */ + +const struct dev_pm_ops gnss_serial_pm_ops = { + .prepare = gnss_serial_prepare, + SET_SYSTEM_SLEEP_PM_OPS(gnss_serial_suspend, gnss_serial_resume) + SET_RUNTIME_PM_OPS(gnss_serial_runtime_suspend, gnss_serial_runtime_resume, NULL) +}; +EXPORT_SYMBOL_GPL(gnss_serial_pm_ops); + +MODULE_AUTHOR("Johan Hovold "); +MODULE_DESCRIPTION("Generic serial GNSS receiver driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gnss/serial.h b/drivers/gnss/serial.h new file mode 100644 index 000000000000..980ffdc86c2a --- /dev/null +++ b/drivers/gnss/serial.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Generic serial GNSS receiver driver + * + * Copyright (C) 2018 Johan Hovold + */ + +#ifndef _LINUX_GNSS_SERIAL_H +#define _LINUX_GNSS_SERIAL_H + +#include +#include + +struct gnss_serial { + struct serdev_device *serdev; + struct gnss_device *gdev; + speed_t speed; + const struct gnss_serial_ops *ops; + unsigned long drvdata[0]; +}; + +enum gnss_serial_pm_state { + GNSS_SERIAL_OFF, + GNSS_SERIAL_ACTIVE, + GNSS_SERIAL_STANDBY, +}; + +struct gnss_serial_ops { + int (*set_power)(struct gnss_serial *gserial, + enum gnss_serial_pm_state state); +}; + +extern const struct dev_pm_ops gnss_serial_pm_ops; + +struct gnss_serial *gnss_serial_allocate(struct serdev_device *gserial, + size_t data_size); +void gnss_serial_free(struct gnss_serial *gserial); + +int gnss_serial_register(struct gnss_serial *gserial); +void gnss_serial_deregister(struct gnss_serial *gserial); + +static inline void *gnss_serial_get_drvdata(struct gnss_serial *gserial) +{ + return gserial->drvdata; +} + +#endif /* _LINUX_GNSS_SERIAL_H */ diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index b3d1ddd546a5..8f230c9dadd1 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -989,6 +989,7 @@ config GPIO_LP87565 config GPIO_MAX77620 tristate "GPIO support for PMIC MAX77620 and MAX20024" depends on MFD_MAX77620 + select GPIOLIB_IRQCHIP help GPIO driver for MAX77620 and MAX20024 PMIC from Maxim Semiconductor. MAX77620 PMIC has 8 pins that can be configured as GPIOs. The diff --git a/drivers/gpio/gpio-grgpio.c b/drivers/gpio/gpio-grgpio.c index 6544a16ab02e..7541bd327e6c 100644 --- a/drivers/gpio/gpio-grgpio.c +++ b/drivers/gpio/gpio-grgpio.c @@ -259,17 +259,16 @@ static int grgpio_irq_map(struct irq_domain *d, unsigned int irq, lirq->irq = irq; uirq = &priv->uirqs[lirq->index]; if (uirq->refcnt == 0) { + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); ret = request_irq(uirq->uirq, grgpio_irq_handler, 0, dev_name(priv->dev), priv); if (ret) { dev_err(priv->dev, "Could not request underlying irq %d\n", uirq->uirq); - - spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); - return ret; } + spin_lock_irqsave(&priv->gc.bgpio_lock, flags); } uirq->refcnt++; @@ -315,8 +314,11 @@ static void grgpio_irq_unmap(struct irq_domain *d, unsigned int irq) if (index >= 0) { uirq = &priv->uirqs[lirq->index]; uirq->refcnt--; - if (uirq->refcnt == 0) + if (uirq->refcnt == 0) { + spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); free_irq(uirq->uirq, priv); + return; + } } spin_unlock_irqrestore(&priv->gc.bgpio_lock, flags); diff --git a/drivers/gpio/gpio-max77620.c b/drivers/gpio/gpio-max77620.c index 538bce4b5b42..78254ed93206 100644 --- a/drivers/gpio/gpio-max77620.c +++ b/drivers/gpio/gpio-max77620.c @@ -163,13 +163,13 @@ static int max77620_gpio_set_debounce(struct max77620_gpio *mgpio, case 0: val = MAX77620_CNFG_GPIO_DBNC_None; break; - case 1 ... 8: + case 1 ... 8000: val = MAX77620_CNFG_GPIO_DBNC_8ms; break; - case 9 ... 16: + case 8001 ... 16000: val = MAX77620_CNFG_GPIO_DBNC_16ms; break; - case 17 ... 32: + case 16001 ... 32000: val = MAX77620_CNFG_GPIO_DBNC_32ms; break; default: diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index 8c93dec498fa..d5f735ce0dd4 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -306,6 +306,7 @@ static int mpc8xxx_probe(struct platform_device *pdev) return -ENOMEM; gc = &mpc8xxx_gc->gc; + gc->parent = &pdev->dev; if (of_property_read_bool(np, "little-endian")) { ret = bgpio_init(gc, &pdev->dev, 4, @@ -337,7 +338,8 @@ static int mpc8xxx_probe(struct platform_device *pdev) * It's assumed that only a single type of gpio controller is available * on the current machine, so overwriting global data is fine. */ - mpc8xxx_irq_chip.irq_set_type = devtype->irq_set_type; + if (devtype->irq_set_type) + mpc8xxx_irq_chip.irq_set_type = devtype->irq_set_type; if (devtype->gpio_dir_out) gc->direction_output = devtype->gpio_dir_out; diff --git a/drivers/gpio/gpio-syscon.c b/drivers/gpio/gpio-syscon.c index 537cec7583fc..cf88a0bfe99e 100644 --- a/drivers/gpio/gpio-syscon.c +++ b/drivers/gpio/gpio-syscon.c @@ -122,7 +122,7 @@ static int syscon_gpio_dir_out(struct gpio_chip *chip, unsigned offset, int val) BIT(offs % SYSCON_REG_BITS)); } - priv->data->set(chip, offset, val); + chip->set(chip, offset, val); return 0; } diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c index b3cc948a2d8b..f1d7066b6637 100644 --- a/drivers/gpio/gpio-zynq.c +++ b/drivers/gpio/gpio-zynq.c @@ -639,6 +639,8 @@ static void zynq_gpio_restore_context(struct zynq_gpio *gpio) unsigned int bank_num; for (bank_num = 0; bank_num < gpio->p_data->max_bank; bank_num++) { + writel_relaxed(ZYNQ_GPIO_IXR_DISABLE_ALL, gpio->base_addr + + ZYNQ_GPIO_INTDIS_OFFSET(bank_num)); writel_relaxed(gpio->context.datalsw[bank_num], gpio->base_addr + ZYNQ_GPIO_DATA_LSW_OFFSET(bank_num)); @@ -648,9 +650,6 @@ static void zynq_gpio_restore_context(struct zynq_gpio *gpio) writel_relaxed(gpio->context.dirm[bank_num], gpio->base_addr + ZYNQ_GPIO_DIRM_OFFSET(bank_num)); - writel_relaxed(gpio->context.int_en[bank_num], - gpio->base_addr + - ZYNQ_GPIO_INTEN_OFFSET(bank_num)); writel_relaxed(gpio->context.int_type[bank_num], gpio->base_addr + ZYNQ_GPIO_INTTYPE_OFFSET(bank_num)); @@ -660,6 +659,9 @@ static void zynq_gpio_restore_context(struct zynq_gpio *gpio) writel_relaxed(gpio->context.int_any[bank_num], gpio->base_addr + ZYNQ_GPIO_INTANY_OFFSET(bank_num)); + writel_relaxed(~(gpio->context.int_en[bank_num]), + gpio->base_addr + + ZYNQ_GPIO_INTEN_OFFSET(bank_num)); } } diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 715babaa886a..c7b9125c8ec2 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -24,11 +24,19 @@ #include "gpiolib.h" +#define QUIRK_NO_EDGE_EVENTS_ON_BOOT 0x01l +#define QUIRK_NO_WAKEUP 0x02l + static int run_edge_events_on_boot = -1; module_param(run_edge_events_on_boot, int, 0444); MODULE_PARM_DESC(run_edge_events_on_boot, "Run edge _AEI event-handlers at boot: 0=no, 1=yes, -1=auto"); +static int honor_wakeup = -1; +module_param(honor_wakeup, int, 0444); +MODULE_PARM_DESC(honor_wakeup, + "Honor the ACPI wake-capable flag: 0=no, 1=yes, -1=auto"); + /** * struct acpi_gpio_event - ACPI GPIO event handler data * @@ -339,7 +347,7 @@ static acpi_status acpi_gpiochip_alloc_event(struct acpi_resource *ares, event->handle = evt_handle; event->handler = handler; event->irq = irq; - event->irq_is_wake = agpio->wake_capable == ACPI_WAKE_CAPABLE; + event->irq_is_wake = honor_wakeup && agpio->wake_capable == ACPI_WAKE_CAPABLE; event->pin = pin; event->desc = desc; @@ -1312,25 +1320,75 @@ static int acpi_gpio_handle_deferred_request_irqs(void) /* We must use _sync so that this runs after the first deferred_probe run */ late_initcall_sync(acpi_gpio_handle_deferred_request_irqs); -static const struct dmi_system_id run_edge_events_on_boot_blacklist[] = { +static const struct dmi_system_id gpiolib_acpi_quirks[] = { { + /* + * The Minix Neo Z83-4 has a micro-USB-B id-pin handler for + * a non existing micro-USB-B connector which puts the HDMI + * DDC pins in GPIO mode, breaking HDMI support. + */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "MINIX"), DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"), - } + }, + .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT, + }, + { + /* + * The Terra Pad 1061 has a micro-USB-B id-pin handler, which + * instead of controlling the actual micro-USB-B turns the 5V + * boost for its USB-A connector off. The actual micro-USB-B + * connector is wired for charging only. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Wortmann_AG"), + DMI_MATCH(DMI_PRODUCT_NAME, "TERRA_PAD_1061"), + }, + .driver_data = (void *)QUIRK_NO_EDGE_EVENTS_ON_BOOT, + }, + { + /* + * Various HP X2 10 Cherry Trail models use an external + * embedded-controller connected via I2C + an ACPI GPIO + * event handler. The embedded controller generates various + * spurious wakeup events when suspended. So disable wakeup + * for its handler (it uses the only ACPI GPIO event handler). + * This breaks wakeup when opening the lid, the user needs + * to press the power-button to wakeup the system. The + * alternative is suspend simply not working, which is worse. + */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP x2 Detachable 10-p0XX"), + }, + .driver_data = (void *)QUIRK_NO_WAKEUP, }, {} /* Terminating entry */ }; static int acpi_gpio_setup_params(void) { + const struct dmi_system_id *id; + long quirks = 0; + + id = dmi_first_match(gpiolib_acpi_quirks); + if (id) + quirks = (long)id->driver_data; + if (run_edge_events_on_boot < 0) { - if (dmi_check_system(run_edge_events_on_boot_blacklist)) + if (quirks & QUIRK_NO_EDGE_EVENTS_ON_BOOT) run_edge_events_on_boot = 0; else run_edge_events_on_boot = 1; } + if (honor_wakeup < 0) { + if (quirks & QUIRK_NO_WAKEUP) + honor_wakeup = 0; + else + honor_wakeup = 1; + } + return 0; } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index c7f5f0be2d74..f0777a7a4305 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -206,6 +206,14 @@ int gpiod_get_direction(struct gpio_desc *desc) chip = gpiod_to_chip(desc); offset = gpio_chip_hwgpio(desc); + /* + * Open drain emulation using input mode may incorrectly report + * input here, fix that up. + */ + if (test_bit(FLAG_OPEN_DRAIN, &desc->flags) && + test_bit(FLAG_IS_OUT, &desc->flags)) + return 0; + if (!chip->get_direction) return status; @@ -3159,8 +3167,9 @@ static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id, if (chip->ngpio <= p->chip_hwnum) { dev_err(dev, - "requested GPIO %d is out of range [0..%d] for chip %s\n", - idx, chip->ngpio, chip->label); + "requested GPIO %u (%u) is out of range [0..%u] for chip %s\n", + idx, p->chip_hwnum, chip->ngpio - 1, + chip->label); return ERR_PTR(-EINVAL); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index cc4e18dcd8b6..2153f19e59cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -336,17 +336,9 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * path_size += le16_to_cpu(path->usSize); if (device_support & le16_to_cpu(path->usDeviceTag)) { - uint8_t con_obj_id, con_obj_num, con_obj_type; - - con_obj_id = + uint8_t con_obj_id = (le16_to_cpu(path->usConnObjectId) & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - con_obj_num = - (le16_to_cpu(path->usConnObjectId) & ENUM_ID_MASK) - >> ENUM_ID_SHIFT; - con_obj_type = - (le16_to_cpu(path->usConnObjectId) & - OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; /* Skip TV/CV support */ if ((le16_to_cpu(path->usDeviceTag) == @@ -371,15 +363,7 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * router.ddc_valid = false; router.cd_valid = false; for (j = 0; j < ((le16_to_cpu(path->usSize) - 8) / 2); j++) { - uint8_t grph_obj_id, grph_obj_num, grph_obj_type; - - grph_obj_id = - (le16_to_cpu(path->usGraphicObjIds[j]) & - OBJECT_ID_MASK) >> OBJECT_ID_SHIFT; - grph_obj_num = - (le16_to_cpu(path->usGraphicObjIds[j]) & - ENUM_ID_MASK) >> ENUM_ID_SHIFT; - grph_obj_type = + uint8_t grph_obj_type = (le16_to_cpu(path->usGraphicObjIds[j]) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 4dd68d821353..4894d8a87c04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -572,6 +572,41 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, if (ret == -EPROBE_DEFER) return ret; +#ifdef CONFIG_DRM_AMDGPU_SI + if (!amdgpu_si_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + dev_info(&pdev->dev, + "SI support provided by radeon.\n"); + dev_info(&pdev->dev, + "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + if (!amdgpu_cik_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + dev_info(&pdev->dev, + "CIK support provided by radeon.\n"); + dev_info(&pdev->dev, + "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif + /* Get rid of things like offb */ ret = amdgpu_kick_out_firmware_fb(pdev); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 884ed359f249..c93e72d8ac5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -87,41 +87,6 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) struct amdgpu_device *adev; int r, acpi_status; -#ifdef CONFIG_DRM_AMDGPU_SI - if (!amdgpu_si_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - case CHIP_OLAND: - case CHIP_HAINAN: - dev_info(dev->dev, - "SI support provided by radeon.\n"); - dev_info(dev->dev, - "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" - ); - return -ENODEV; - } - } -#endif -#ifdef CONFIG_DRM_AMDGPU_CIK - if (!amdgpu_cik_support) { - switch (flags & AMD_ASIC_MASK) { - case CHIP_KAVERI: - case CHIP_BONAIRE: - case CHIP_HAWAII: - case CHIP_KABINI: - case CHIP_MULLINS: - dev_info(dev->dev, - "CIK support provided by radeon.\n"); - dev_info(dev->dev, - "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" - ); - return -ENODEV; - } - } -#endif - adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL); if (adev == NULL) { return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index ed8c3739015b..b35b0741fd97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -125,6 +125,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(vram_obj, &vram_map); if (r) { @@ -170,6 +171,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) } dma_fence_put(fence); + fence = NULL; r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index ff7d4827385e..7a2366bd1fba 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -279,7 +279,12 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev) } static u32 soc15_get_xclk(struct amdgpu_device *adev) { - return adev->clock.spll.reference_freq; + u32 reference_clock = adev->clock.spll.reference_freq; + + if (adev->asic_type == CHIP_RAVEN) + return reference_clock / 4; + + return reference_clock; } diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c index d73281095fac..976109c20d49 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c @@ -79,7 +79,11 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) struct videomode vm; unsigned long prate; unsigned int cfg; - int div; + int div, ret; + + ret = clk_prepare_enable(crtc->dc->hlcdc->sys_clk); + if (ret) + return; vm.vfront_porch = adj->crtc_vsync_start - adj->crtc_vdisplay; vm.vback_porch = adj->crtc_vtotal - adj->crtc_vsync_end; @@ -138,6 +142,8 @@ static void atmel_hlcdc_crtc_mode_set_nofb(struct drm_crtc *c) ATMEL_HLCDC_VSPSU | ATMEL_HLCDC_VSPHO | ATMEL_HLCDC_GUARDTIME_MASK | ATMEL_HLCDC_MODE_MASK, cfg); + + clk_disable_unprepare(crtc->dc->hlcdc->sys_clk); } static enum drm_mode_status diff --git a/drivers/gpu/drm/bridge/analogix-anx78xx.c b/drivers/gpu/drm/bridge/analogix-anx78xx.c index 9385eb0b1ee4..cd2bfd7bf048 100644 --- a/drivers/gpu/drm/bridge/analogix-anx78xx.c +++ b/drivers/gpu/drm/bridge/analogix-anx78xx.c @@ -725,7 +725,9 @@ static int anx78xx_init_pdata(struct anx78xx *anx78xx) /* 1.0V digital core power regulator */ pdata->dvdd10 = devm_regulator_get(dev, "dvdd10"); if (IS_ERR(pdata->dvdd10)) { - DRM_ERROR("DVDD10 regulator not found\n"); + if (PTR_ERR(pdata->dvdd10) != -EPROBE_DEFER) + DRM_ERROR("DVDD10 regulator not found\n"); + return PTR_ERR(pdata->dvdd10); } @@ -1344,7 +1346,9 @@ static int anx78xx_i2c_probe(struct i2c_client *client, err = anx78xx_init_pdata(anx78xx); if (err) { - DRM_ERROR("Failed to initialize pdata: %d\n", err); + if (err != -EPROBE_DEFER) + DRM_ERROR("Failed to initialize pdata: %d\n", err); + return err; } diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 4db31b89507c..cc1094f90125 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -39,6 +39,7 @@ #include +#define DDC_CI_ADDR 0x37 #define DDC_SEGMENT_ADDR 0x30 #define HDMI_EDID_LEN 512 @@ -320,6 +321,15 @@ static int dw_hdmi_i2c_xfer(struct i2c_adapter *adap, u8 addr = msgs[0].addr; int i, ret = 0; + if (addr == DDC_CI_ADDR) + /* + * The internal I2C controller does not support the multi-byte + * read and write operations needed for DDC/CI. + * TOFIX: Blacklist the DDC/CI address until we filter out + * unsupported I2C operations. + */ + return -EOPNOTSUPP; + dev_dbg(hdmi->dev, "xfer: num: %d, addr: %#x\n", num, addr); for (i = 0; i < num; i++) { @@ -1733,7 +1743,7 @@ static int dw_hdmi_setup(struct dw_hdmi *hdmi, struct drm_display_mode *mode) /* HDMI Initialization Step E - Configure audio */ hdmi_clk_regenerator_update_pixel_clock(hdmi); - hdmi_enable_audio_clk(hdmi, true); + hdmi_enable_audio_clk(hdmi, hdmi->audio_enable); } /* not for DVI mode */ diff --git a/drivers/gpu/drm/drm_debugfs_crc.c b/drivers/gpu/drm/drm_debugfs_crc.c index 2901b7944068..6858c80d2eb5 100644 --- a/drivers/gpu/drm/drm_debugfs_crc.c +++ b/drivers/gpu/drm/drm_debugfs_crc.c @@ -101,8 +101,8 @@ static ssize_t crc_control_write(struct file *file, const char __user *ubuf, if (IS_ERR(source)) return PTR_ERR(source); - if (source[len] == '\n') - source[len] = '\0'; + if (source[len - 1] == '\n') + source[len - 1] = '\0'; spin_lock_irq(&crc->lock); diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index bb9a9852ec22..9d94c306c8ca 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -274,7 +274,7 @@ static void drm_dp_encode_sideband_req(struct drm_dp_sideband_msg_req_body *req, memcpy(&buf[idx], req->u.i2c_read.transactions[i].bytes, req->u.i2c_read.transactions[i].num_bytes); idx += req->u.i2c_read.transactions[i].num_bytes; - buf[idx] = (req->u.i2c_read.transactions[i].no_stop_bit & 0x1) << 5; + buf[idx] = (req->u.i2c_read.transactions[i].no_stop_bit & 0x1) << 4; buf[idx] |= (req->u.i2c_read.transactions[i].i2c_transaction_delay & 0xf); idx++; } @@ -982,9 +982,20 @@ static struct drm_dp_mst_port *drm_dp_mst_get_port_ref_locked(struct drm_dp_mst_ static struct drm_dp_mst_port *drm_dp_get_validated_port_ref(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port) { struct drm_dp_mst_port *rport = NULL; + mutex_lock(&mgr->lock); - if (mgr->mst_primary) - rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary, port); + /* + * Port may or may not be 'valid' but we don't care about that when + * destroying the port and we are guaranteed that the port pointer + * will be valid until we've finished + */ + if (current_work() == &mgr->destroy_connector_work) { + kref_get(&port->kref); + rport = port; + } else if (mgr->mst_primary) { + rport = drm_dp_mst_get_port_ref_locked(mgr->mst_primary, + port); + } mutex_unlock(&mgr->lock); return rport; } @@ -1540,7 +1551,11 @@ static void process_single_up_tx_qlock(struct drm_dp_mst_topology_mgr *mgr, if (ret != 1) DRM_DEBUG_KMS("failed to send msg in q %d\n", ret); - txmsg->dst->tx_slots[txmsg->seqno] = NULL; + if (txmsg->seqno != -1) { + WARN_ON((unsigned int)txmsg->seqno > + ARRAY_SIZE(txmsg->dst->tx_slots)); + txmsg->dst->tx_slots[txmsg->seqno] = NULL; + } } static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr, diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index ed01e3aae0e8..dfdc7d3147fb 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -164,6 +164,9 @@ static const struct edid_quirk { /* Medion MD 30217 PG */ { "MED", 0x7b8, EDID_QUIRK_PREFER_LARGE_75 }, + /* Lenovo G50 */ + { "SDC", 18514, EDID_QUIRK_FORCE_6BPC }, + /* Panel in Samsung NP700G7A-S01PL notebook reports 6bpc */ { "SEC", 0xd033, EDID_QUIRK_FORCE_8BPC }, diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index f1259a0c2883..eb6bf881c465 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1590,7 +1590,7 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, * Changes struct fb_var_screeninfo are currently not pushed back * to KMS, hence fail if different settings are requested. */ - if (var->bits_per_pixel != fb->format->cpp[0] * 8 || + if (var->bits_per_pixel > fb->format->cpp[0] * 8 || var->xres > fb->width || var->yres > fb->height || var->xres_virtual > fb->width || var->yres_virtual > fb->height) { DRM_DEBUG("fb requested width/height/bpp can't fit in current fb " @@ -1615,6 +1615,11 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var, drm_fb_helper_fill_pixel_fmt(var, fb->format->depth); } + /* + * Likewise, bits_per_pixel should be rounded up to a supported value. + */ + var->bits_per_pixel = fb->format->cpp[0] * 8; + /* * drm fbdev emulation doesn't support changing the pixel format at all, * so reject all pixel format changing requests. diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index c55f338e380b..d2c042af36b8 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1035,6 +1035,15 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma) return -EACCES; } + if (node->readonly) { + if (vma->vm_flags & VM_WRITE) { + drm_gem_object_put_unlocked(obj); + return -EINVAL; + } + + vma->vm_flags &= ~VM_MAYWRITE; + } + ret = drm_gem_mmap_obj(obj, drm_vma_node_size(node) << PAGE_SHIFT, vma); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 53f319369de5..318ebf3763ba 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -640,9 +640,9 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c index d2375cf783d3..5798c1b2bb51 100644 --- a/drivers/gpu/drm/drm_property.c +++ b/drivers/gpu/drm/drm_property.c @@ -540,7 +540,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length, struct drm_property_blob *blob; int ret; - if (!length || length > ULONG_MAX - sizeof(struct drm_property_blob)) + if (!length || length > INT_MAX - sizeof(struct drm_property_blob)) return ERR_PTR(-EINVAL); blob = kvzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c index 2d955d7d7b6d..e154e6fb64da 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c @@ -207,7 +207,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu) mutex_lock(&obj->lock); pages = etnaviv_gem_get_pages(obj); mutex_unlock(&obj->lock); - if (pages) { + if (!IS_ERR(pages)) { int j; iter.hdr->data[0] = bomap - bomap_start; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c index ae884723e9b1..880b95511b98 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c @@ -26,7 +26,7 @@ struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj) int npages = obj->size >> PAGE_SHIFT; if (WARN_ON(!etnaviv_obj->pages)) /* should have already pinned! */ - return NULL; + return ERR_PTR(-EINVAL); return drm_prime_pages_to_sg(etnaviv_obj->pages, npages); } diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c index 2570c7f647a6..883fc45870dd 100644 --- a/drivers/gpu/drm/gma500/framebuffer.c +++ b/drivers/gpu/drm/gma500/framebuffer.c @@ -486,6 +486,7 @@ static int psbfb_probe(struct drm_fb_helper *helper, container_of(helper, struct psb_fbdev, psb_fb_helper); struct drm_device *dev = psb_fbdev->psb_fb_helper.dev; struct drm_psb_private *dev_priv = dev->dev_private; + unsigned int fb_size; int bytespp; bytespp = sizes->surface_bpp / 8; @@ -495,8 +496,11 @@ static int psbfb_probe(struct drm_fb_helper *helper, /* If the mode will not fit in 32bit then switch to 16bit to get a console on full resolution. The X mode setting server will allocate its own 32bit GEM framebuffer */ - if (ALIGN(sizes->fb_width * bytespp, 64) * sizes->fb_height > - dev_priv->vram_stolen_size) { + fb_size = ALIGN(sizes->surface_width * bytespp, 64) * + sizes->surface_height; + fb_size = ALIGN(fb_size, PAGE_SIZE); + + if (fb_size > dev_priv->vram_stolen_size) { sizes->surface_bpp = 16; sizes->surface_depth = 16; } diff --git a/drivers/gpu/drm/gma500/oaktrail_crtc.c b/drivers/gpu/drm/gma500/oaktrail_crtc.c index 0fff269d3fe6..42785f3df60f 100644 --- a/drivers/gpu/drm/gma500/oaktrail_crtc.c +++ b/drivers/gpu/drm/gma500/oaktrail_crtc.c @@ -139,6 +139,7 @@ static bool mrst_sdvo_find_best_pll(const struct gma_limit_t *limit, s32 freq_error, min_error = 100000; memset(best_clock, 0, sizeof(*best_clock)); + memset(&clock, 0, sizeof(clock)); for (clock.m = limit->m.min; clock.m <= limit->m.max; clock.m++) { for (clock.n = limit->n.min; clock.n <= limit->n.max; @@ -195,6 +196,7 @@ static bool mrst_lvds_find_best_pll(const struct gma_limit_t *limit, int err = target; memset(best_clock, 0, sizeof(*best_clock)); + memset(&clock, 0, sizeof(clock)); for (clock.m = limit->m.min; clock.m <= limit->m.max; clock.m++) { for (clock.p1 = limit->p1.min; clock.p1 <= limit->p1.max; diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c index 8bd29075ae4e..edcca1761500 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c @@ -71,7 +71,6 @@ static int hibmc_drm_fb_create(struct drm_fb_helper *helper, DRM_DEBUG_DRIVER("surface width(%d), height(%d) and bpp(%d)\n", sizes->surface_width, sizes->surface_height, sizes->surface_bpp); - sizes->surface_depth = 32; bytes_per_pixel = DIV_ROUND_UP(sizes->surface_bpp, 8); diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c index 576a417690d4..128d6cfb7bbb 100644 --- a/drivers/gpu/drm/i810/i810_dma.c +++ b/drivers/gpu/drm/i810/i810_dma.c @@ -721,7 +721,7 @@ static void i810_dma_dispatch_vertex(struct drm_device *dev, if (nbox > I810_NR_SAREA_CLIPRECTS) nbox = I810_NR_SAREA_CLIPRECTS; - if (used > 4 * 1024) + if (used < 0 || used > 4 * 1024) used = 0; if (sarea_priv->dirty) @@ -1041,7 +1041,7 @@ static void i810_dma_dispatch_mc(struct drm_device *dev, struct drm_buf *buf, in if (u != I810_BUF_CLIENT) DRM_DEBUG("MC found buffer that isn't mine!\n"); - if (used > 4 * 1024) + if (used < 0 || used > 4 * 1024) used = 0; sarea_priv->dirty = 0x7f; diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 02c61a1ad56a..e9f9063dbf63 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -513,9 +513,9 @@ void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr, intel_vgpu_reset_mmio(vgpu, dmlr); populate_pvinfo_page(vgpu); - intel_vgpu_reset_display(vgpu); if (dmlr) { + intel_vgpu_reset_display(vgpu); intel_vgpu_reset_cfg_space(vgpu); /* only reset the failsafe mode when dmlr reset */ vgpu->failsafe = false; diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 8ba932b22f7c..e4b9eb1f6b60 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -26,6 +26,7 @@ */ #include "i915_drv.h" +#include "intel_ringbuffer.h" /** * DOC: batch buffer command parser @@ -50,13 +51,11 @@ * granting userspace undue privileges. There are three categories of privilege. * * First, commands which are explicitly defined as privileged or which should - * only be used by the kernel driver. The parser generally rejects such - * commands, though it may allow some from the drm master process. + * only be used by the kernel driver. The parser rejects such commands * * Second, commands which access registers. To support correct/enhanced * userspace functionality, particularly certain OpenGL extensions, the parser - * provides a whitelist of registers which userspace may safely access (for both - * normal and drm master processes). + * provides a whitelist of registers which userspace may safely access * * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). * The parser always rejects such commands. @@ -81,9 +80,9 @@ * in the per-engine command tables. * * Other command table entries map fairly directly to high level categories - * mentioned above: rejected, master-only, register whitelist. The parser - * implements a number of checks, including the privileged memory checks, via a - * general bitmasking mechanism. + * mentioned above: rejected, register whitelist. The parser implements a number + * of checks, including the privileged memory checks, via a general bitmasking + * mechanism. */ /* @@ -101,8 +100,6 @@ struct drm_i915_cmd_descriptor { * CMD_DESC_REJECT: The command is never allowed * CMD_DESC_REGISTER: The command should be checked against the * register whitelist for the appropriate ring - * CMD_DESC_MASTER: The command is allowed if the submitting process - * is the DRM master */ u32 flags; #define CMD_DESC_FIXED (1<<0) @@ -110,7 +107,6 @@ struct drm_i915_cmd_descriptor { #define CMD_DESC_REJECT (1<<2) #define CMD_DESC_REGISTER (1<<3) #define CMD_DESC_BITMASK (1<<4) -#define CMD_DESC_MASTER (1<<5) /* * The command's unique identification bits and the bitmask to get them. @@ -191,7 +187,7 @@ struct drm_i915_cmd_table { #define CMD(op, opm, f, lm, fl, ...) \ { \ .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ - .cmd = { (op), ~0u << (opm) }, \ + .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \ .length = { (lm) }, \ __VA_ARGS__ \ } @@ -206,14 +202,13 @@ struct drm_i915_cmd_table { #define R CMD_DESC_REJECT #define W CMD_DESC_REGISTER #define B CMD_DESC_BITMASK -#define M CMD_DESC_MASTER /* Command Mask Fixed Len Action ---------------------------------------------------------- */ -static const struct drm_i915_cmd_descriptor common_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { CMD( MI_NOOP, SMI, F, 1, S ), CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), - CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ), CMD( MI_ARB_CHECK, SMI, F, 1, S ), CMD( MI_REPORT_HEAD, SMI, F, 1, S ), CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), @@ -243,7 +238,7 @@ static const struct drm_i915_cmd_descriptor common_cmds[] = { CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), }; -static const struct drm_i915_cmd_descriptor render_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = { CMD( MI_FLUSH, SMI, F, 1, S ), CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_PREDICATE, SMI, F, 1, S ), @@ -310,7 +305,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_RS_CONTEXT, SMI, F, 1, S ), - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), @@ -327,7 +322,7 @@ static const struct drm_i915_cmd_descriptor hsw_render_cmds[] = { CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), }; -static const struct drm_i915_cmd_descriptor video_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -371,7 +366,7 @@ static const struct drm_i915_cmd_descriptor video_cmds[] = { CMD( MFX_WAIT, SMFX, F, 1, S ), }; -static const struct drm_i915_cmd_descriptor vecs_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = { CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), CMD( MI_SET_APPID, SMI, F, 1, S ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, @@ -409,7 +404,7 @@ static const struct drm_i915_cmd_descriptor vecs_cmds[] = { }}, ), }; -static const struct drm_i915_cmd_descriptor blt_cmds[] = { +static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = { CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, .bits = {{ @@ -443,10 +438,64 @@ static const struct drm_i915_cmd_descriptor blt_cmds[] = { }; static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { - CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), }; +/* + * For Gen9 we can still rely on the h/w to enforce cmd security, and only + * need to re-enforce the register access checks. We therefore only need to + * teach the cmdparser how to find the end of each command, and identify + * register accesses. The table doesn't need to reject any commands, and so + * the only commands listed here are: + * 1) Those that touch registers + * 2) Those that do not have the default 8-bit length + * + * Note that the default MI length mask chosen for this table is 0xFF, not + * the 0x3F used on older devices. This is because the vast majority of MI + * cmds on Gen9 use a standard 8-bit Length field. + * All the Gen9 blitter instructions are standard 0xFF length mask, and + * none allow access to non-general registers, so in fact no BLT cmds are + * included in the table at all. + * + */ +static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = { + CMD( MI_NOOP, SMI, F, 1, S ), + CMD( MI_USER_INTERRUPT, SMI, F, 1, S ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ), + CMD( MI_FLUSH, SMI, F, 1, S ), + CMD( MI_ARB_CHECK, SMI, F, 1, S ), + CMD( MI_REPORT_HEAD, SMI, F, 1, S ), + CMD( MI_ARB_ON_OFF, SMI, F, 1, S ), + CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), + CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), + CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), + CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ), + CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ), + CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W, + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), + + /* + * We allow BB_START but apply further checks. We just sanitize the + * basic fields here. + */ +#define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0) +#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1) + CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B, + .bits = {{ + .offset = 0, + .mask = MI_BB_START_OPERAND_MASK, + .expected = MI_BB_START_OPERAND_EXPECT, + }}, ), +}; + static const struct drm_i915_cmd_descriptor noop_desc = CMD(MI_NOOP, SMI, F, 1, S); @@ -460,40 +509,44 @@ static const struct drm_i915_cmd_descriptor noop_desc = #undef R #undef W #undef B -#undef M -static const struct drm_i915_cmd_table gen7_render_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, }; -static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { render_cmds, ARRAY_SIZE(render_cmds) }, +static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, }; -static const struct drm_i915_cmd_table gen7_video_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { video_cmds, ARRAY_SIZE(video_cmds) }, +static const struct drm_i915_cmd_table gen7_video_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) }, }; -static const struct drm_i915_cmd_table hsw_vebox_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { vecs_cmds, ARRAY_SIZE(vecs_cmds) }, +static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) }, }; -static const struct drm_i915_cmd_table gen7_blt_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, }; -static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { - { common_cmds, ARRAY_SIZE(common_cmds) }, - { blt_cmds, ARRAY_SIZE(blt_cmds) }, +static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, + { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, }; +static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = { + { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) }, +}; + + /* * Register whitelists, sorted by increasing register offset. */ @@ -609,17 +662,27 @@ static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), }; -static const struct drm_i915_reg_descriptor ivb_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), - REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), -}; - -static const struct drm_i915_reg_descriptor hsw_master_regs[] = { - REG32(FORCEWAKE_MT), - REG32(DERRMR), +static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { + REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), + REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), + REG32(BCS_SWCTRL), + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + REG64_IDX(BCS_GPR, 0), + REG64_IDX(BCS_GPR, 1), + REG64_IDX(BCS_GPR, 2), + REG64_IDX(BCS_GPR, 3), + REG64_IDX(BCS_GPR, 4), + REG64_IDX(BCS_GPR, 5), + REG64_IDX(BCS_GPR, 6), + REG64_IDX(BCS_GPR, 7), + REG64_IDX(BCS_GPR, 8), + REG64_IDX(BCS_GPR, 9), + REG64_IDX(BCS_GPR, 10), + REG64_IDX(BCS_GPR, 11), + REG64_IDX(BCS_GPR, 12), + REG64_IDX(BCS_GPR, 13), + REG64_IDX(BCS_GPR, 14), + REG64_IDX(BCS_GPR, 15), }; #undef REG64 @@ -628,28 +691,27 @@ static const struct drm_i915_reg_descriptor hsw_master_regs[] = { struct drm_i915_reg_table { const struct drm_i915_reg_descriptor *regs; int num_regs; - bool master; }; static const struct drm_i915_reg_table ivb_render_reg_tables[] = { - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, }; static const struct drm_i915_reg_table ivb_blt_reg_tables[] = { - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, - { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, }; static const struct drm_i915_reg_table hsw_render_reg_tables[] = { - { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, - { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false }, - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, + { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, + { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) }, }; static const struct drm_i915_reg_table hsw_blt_reg_tables[] = { - { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, - { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, +}; + +static const struct drm_i915_reg_table gen9_blt_reg_tables[] = { + { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) }, }; static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) @@ -707,6 +769,17 @@ static u32 gen7_blt_get_cmd_length_mask(u32 cmd_header) return 0; } +static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header) +{ + u32 client = cmd_header >> INSTR_CLIENT_SHIFT; + + if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT) + return 0xFF; + + DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); + return 0; +} + static bool validate_cmds_sorted(const struct intel_engine_cs *engine, const struct drm_i915_cmd_table *cmd_tables, int cmd_table_count) @@ -798,22 +871,15 @@ struct cmd_node { */ static inline u32 cmd_header_key(u32 x) { - u32 shift; - switch (x >> INSTR_CLIENT_SHIFT) { default: case INSTR_MI_CLIENT: - shift = STD_MI_OPCODE_SHIFT; - break; + return x >> STD_MI_OPCODE_SHIFT; case INSTR_RC_CLIENT: - shift = STD_3D_OPCODE_SHIFT; - break; + return x >> STD_3D_OPCODE_SHIFT; case INSTR_BC_CLIENT: - shift = STD_2D_OPCODE_SHIFT; - break; + return x >> STD_2D_OPCODE_SHIFT; } - - return x >> shift; } static int init_hash_table(struct intel_engine_cs *engine, @@ -871,18 +937,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) int cmd_table_count; int ret; - if (!IS_GEN7(engine->i915)) + if (!IS_GEN7(engine->i915) && !(IS_GEN9(engine->i915) && + engine->id == BCS)) return; switch (engine->id) { case RCS: if (IS_HASWELL(engine->i915)) { - cmd_tables = hsw_render_ring_cmds; + cmd_tables = hsw_render_ring_cmd_table; cmd_table_count = - ARRAY_SIZE(hsw_render_ring_cmds); + ARRAY_SIZE(hsw_render_ring_cmd_table); } else { - cmd_tables = gen7_render_cmds; - cmd_table_count = ARRAY_SIZE(gen7_render_cmds); + cmd_tables = gen7_render_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table); } if (IS_HASWELL(engine->i915)) { @@ -892,36 +959,46 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) engine->reg_tables = ivb_render_reg_tables; engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables); } - engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; break; case VCS: - cmd_tables = gen7_video_cmds; - cmd_table_count = ARRAY_SIZE(gen7_video_cmds); + cmd_tables = gen7_video_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table); engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; case BCS: - if (IS_HASWELL(engine->i915)) { - cmd_tables = hsw_blt_ring_cmds; - cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); + engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; + if (IS_GEN9(engine->i915)) { + cmd_tables = gen9_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table); + engine->get_cmd_length_mask = + gen9_blt_get_cmd_length_mask; + + /* BCS Engine unsafe without parser */ + engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER; + } else if (IS_HASWELL(engine->i915)) { + cmd_tables = hsw_blt_ring_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); } else { - cmd_tables = gen7_blt_cmds; - cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); + cmd_tables = gen7_blt_cmd_table; + cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); } - if (IS_HASWELL(engine->i915)) { + if (IS_GEN9(engine->i915)) { + engine->reg_tables = gen9_blt_reg_tables; + engine->reg_table_count = + ARRAY_SIZE(gen9_blt_reg_tables); + } else if (IS_HASWELL(engine->i915)) { engine->reg_tables = hsw_blt_reg_tables; engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables); } else { engine->reg_tables = ivb_blt_reg_tables; engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables); } - - engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; break; case VECS: - cmd_tables = hsw_vebox_cmds; - cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); + cmd_tables = hsw_vebox_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table); /* VECS can use the same length_mask function as VCS */ engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; break; @@ -947,7 +1024,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) return; } - engine->needs_cmd_parser = true; + engine->flags |= I915_ENGINE_USING_CMD_PARSER; } /** @@ -959,7 +1036,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) */ void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) { - if (!engine->needs_cmd_parser) + if (!intel_engine_using_cmd_parser(engine)) return; fini_hash_table(engine); @@ -1033,22 +1110,16 @@ __find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr) } static const struct drm_i915_reg_descriptor * -find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) +find_reg(const struct intel_engine_cs *engine, u32 addr) { const struct drm_i915_reg_table *table = engine->reg_tables; + const struct drm_i915_reg_descriptor *reg = NULL; int count = engine->reg_table_count; - do { - if (!table->master || is_master) { - const struct drm_i915_reg_descriptor *reg; + for (; !reg && (count > 0); ++table, --count) + reg = __find_reg(table->regs, table->num_regs, addr); - reg = __find_reg(table->regs, table->num_regs, addr); - if (reg != NULL) - return reg; - } - } while (table++, --count); - - return NULL; + return reg; } /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ @@ -1133,8 +1204,7 @@ unpin_src: static bool check_cmd(const struct intel_engine_cs *engine, const struct drm_i915_cmd_descriptor *desc, - const u32 *cmd, u32 length, - const bool is_master) + const u32 *cmd, u32 length) { if (desc->flags & CMD_DESC_SKIP) return true; @@ -1144,12 +1214,6 @@ static bool check_cmd(const struct intel_engine_cs *engine, return false; } - if ((desc->flags & CMD_DESC_MASTER) && !is_master) { - DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", - *cmd); - return false; - } - if (desc->flags & CMD_DESC_REGISTER) { /* * Get the distance between individual register offset @@ -1163,7 +1227,7 @@ static bool check_cmd(const struct intel_engine_cs *engine, offset += step) { const u32 reg_addr = cmd[offset] & desc->reg.mask; const struct drm_i915_reg_descriptor *reg = - find_reg(engine, is_master, reg_addr); + find_reg(engine, reg_addr); if (!reg) { DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", @@ -1218,6 +1282,12 @@ static bool check_cmd(const struct intel_engine_cs *engine, continue; } + if (desc->bits[i].offset >= length) { + DRM_DEBUG_DRIVER("CMD: Rejected command 0x%08X, too short to check bitmask (%s)\n", + *cmd, engine->name); + return false; + } + dword = cmd[desc->bits[i].offset] & desc->bits[i].mask; @@ -1235,16 +1305,112 @@ static bool check_cmd(const struct intel_engine_cs *engine, return true; } +static int check_bbstart(const struct i915_gem_context *ctx, + u32 *cmd, u32 offset, u32 length, + u32 batch_len, + u64 batch_start, + u64 shadow_batch_start) +{ + u64 jump_offset, jump_target; + u32 target_cmd_offset, target_cmd_index; + + /* For igt compatibility on older platforms */ + if (CMDPARSER_USES_GGTT(ctx->i915)) { + DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n"); + return -EACCES; + } + + if (length != 3) { + DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n", + length); + return -EINVAL; + } + + jump_target = *(u64*)(cmd+1); + jump_offset = jump_target - batch_start; + + /* + * Any underflow of jump_target is guaranteed to be outside the range + * of a u32, so >= test catches both too large and too small + */ + if (jump_offset >= batch_len) { + DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n", + jump_target); + return -EINVAL; + } + + /* + * This cannot overflow a u32 because we already checked jump_offset + * is within the BB, and the batch_len is a u32 + */ + target_cmd_offset = lower_32_bits(jump_offset); + target_cmd_index = target_cmd_offset / sizeof(u32); + + *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset; + + if (target_cmd_index == offset) + return 0; + + if (ctx->jump_whitelist_cmds <= target_cmd_index) { + DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n"); + return -EINVAL; + } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) { + DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", + jump_target); + return -EINVAL; + } + + return 0; +} + +static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len) +{ + const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32)); + const u32 exact_size = BITS_TO_LONGS(batch_cmds); + u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds)); + unsigned long *next_whitelist; + + if (CMDPARSER_USES_GGTT(ctx->i915)) + return; + + if (batch_cmds <= ctx->jump_whitelist_cmds) { + bitmap_zero(ctx->jump_whitelist, batch_cmds); + return; + } + +again: + next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL); + if (next_whitelist) { + kfree(ctx->jump_whitelist); + ctx->jump_whitelist = next_whitelist; + ctx->jump_whitelist_cmds = + next_size * BITS_PER_BYTE * sizeof(long); + return; + } + + if (next_size > exact_size) { + next_size = exact_size; + goto again; + } + + DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); + bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds); + + return; +} + #define LENGTH_BIAS 2 /** * i915_parse_cmds() - parse a submitted batch buffer for privilege violations + * @ctx: the context in which the batch is to execute * @engine: the engine on which the batch is to execute * @batch_obj: the batch buffer in question - * @shadow_batch_obj: copy of the batch buffer in question + * @batch_start: Canonical base address of batch * @batch_start_offset: byte offset in the batch at which execution starts * @batch_len: length of the commands in batch_obj - * @is_master: is the submitting process the drm master? + * @shadow_batch_obj: copy of the batch buffer in question + * @shadow_batch_start: Canonical base address of shadow_batch_obj * * Parses the specified batch buffer looking for privilege violations as * described in the overview. @@ -1252,14 +1418,17 @@ static bool check_cmd(const struct intel_engine_cs *engine, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ -int intel_engine_cmd_parser(struct intel_engine_cs *engine, + +int intel_engine_cmd_parser(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, + u64 batch_start, u32 batch_start_offset, u32 batch_len, - bool is_master) + struct drm_i915_gem_object *shadow_batch_obj, + u64 shadow_batch_start) { - u32 *cmd, *batch_end; + u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc; bool needs_clflush_after = false; @@ -1273,6 +1442,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, return PTR_ERR(cmd); } + init_whitelist(ctx, batch_len); + /* * We use the batch length as size because the shadow object is as * large or larger and copy_batch() will write MI_NOPs to the extra @@ -1282,31 +1453,15 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, do { u32 length; - if (*cmd == MI_BATCH_BUFFER_END) { - if (needs_clflush_after) { - void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); - drm_clflush_virt_range(ptr, - (void *)(cmd + 1) - ptr); - } + if (*cmd == MI_BATCH_BUFFER_END) break; - } desc = find_cmd(engine, *cmd, desc, &default_desc); if (!desc) { DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", *cmd); ret = -EINVAL; - break; - } - - /* - * If the batch buffer contains a chained batch, return an - * error that tells the caller to abort and dispatch the - * workload as a non-secure batch. - */ - if (desc->cmd.value == MI_BATCH_BUFFER_START) { - ret = -EACCES; - break; + goto err; } if (desc->flags & CMD_DESC_FIXED) @@ -1320,22 +1475,43 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, length, batch_end - cmd); ret = -EINVAL; + goto err; + } + + if (!check_cmd(engine, desc, cmd, length)) { + ret = -EACCES; + goto err; + } + + if (desc->cmd.value == MI_BATCH_BUFFER_START) { + ret = check_bbstart(ctx, cmd, offset, length, + batch_len, batch_start, + shadow_batch_start); + + if (ret) + goto err; break; } - if (!check_cmd(engine, desc, cmd, length, is_master)) { - ret = -EACCES; - break; - } + if (ctx->jump_whitelist_cmds > offset) + set_bit(offset, ctx->jump_whitelist); cmd += length; + offset += length; if (cmd >= batch_end) { DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); ret = -EINVAL; - break; + goto err; } } while (1); + if (needs_clflush_after) { + void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); + + drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); + } + +err: i915_gem_object_unpin_map(shadow_batch_obj); return ret; } @@ -1357,7 +1533,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) /* If the command parser is not enabled, report 0 - unsupported */ for_each_engine(engine, dev_priv, id) { - if (engine->needs_cmd_parser) { + if (intel_engine_using_cmd_parser(engine)) { active = true; break; } @@ -1382,6 +1558,7 @@ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv) * the parser enabled. * 9. Don't whitelist or handle oacontrol specially, as ownership * for oacontrol state is moving to i915-perf. + * 10. Support for Gen9 BCS Parsing */ - return 9; + return 10; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c75f4ccbcdef..02a2af7c8166 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -323,7 +323,7 @@ static int i915_getparam(struct drm_device *dev, void *data, value = i915.semaphores; break; case I915_PARAM_HAS_SECURE_BATCHES: - value = capable(CAP_SYS_ADMIN); + value = HAS_SECURE_BATCHES(dev_priv) && capable(CAP_SYS_ADMIN); break; case I915_PARAM_CMD_PARSER_VERSION: value = i915_cmd_parser_get_version(dev_priv); @@ -1564,6 +1564,7 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) disable_rpm_wakeref_asserts(dev_priv); intel_display_set_init_power(dev_priv, false); + i915_rc6_ctx_wa_suspend(dev_priv); fw_csr = !IS_GEN9_LP(dev_priv) && suspend_to_idle(dev_priv) && dev_priv->csr.dmc_payload; @@ -1800,6 +1801,7 @@ static int i915_drm_resume_early(struct drm_device *dev) intel_display_set_init_power(dev_priv, true); i915_gem_sanitize(dev_priv); + i915_rc6_ctx_wa_resume(dev_priv); enable_rpm_wakeref_asserts(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 41f51509c9e4..a5fb7404b29e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1320,6 +1320,7 @@ struct intel_gen6_power_mgmt { enum { LOW_POWER, BETWEEN, HIGH_POWER } power; bool enabled; + bool ctx_corrupted; struct delayed_work autoenable_work; atomic_t num_waiters; atomic_t boosts; @@ -2980,6 +2981,12 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv)) #define IS_GEN9_BC(dev_priv) (IS_GEN9(dev_priv) && !IS_LP(dev_priv)) +/* + * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution + * All later gens can run the final buffer from the ppgtt + */ +#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN7(dev_priv) + #define ENGINE_MASK(id) BIT(id) #define RENDER_RING ENGINE_MASK(RCS) #define BSD_RING ENGINE_MASK(VCS) @@ -2996,6 +3003,8 @@ intel_info(const struct drm_i915_private *dev_priv) #define HAS_BLT(dev_priv) HAS_ENGINE(dev_priv, BCS) #define HAS_VEBOX(dev_priv) HAS_ENGINE(dev_priv, VECS) +#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6) + #define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc) #define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop) #define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED)) @@ -3017,9 +3026,12 @@ intel_info(const struct drm_i915_private *dev_priv) /* Early gen2 have a totally busted CS tlb and require pinned batches. */ #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) +#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \ + (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) == 9) + /* WaRsDisableCoarsePowerGating:skl,bxt */ #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ - (IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) + (INTEL_GEN(dev_priv) == 9) /* * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts @@ -3391,6 +3403,14 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, u64 alignment, u64 flags); +struct i915_vma * __must_check +i915_gem_object_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view, + u64 size, + u64 alignment, + u64 flags); + int i915_gem_object_unbind(struct drm_i915_gem_object *obj); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); @@ -3841,12 +3861,14 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); -int intel_engine_cmd_parser(struct intel_engine_cs *engine, +int intel_engine_cmd_parser(struct i915_gem_context *cxt, + struct intel_engine_cs *engine, struct drm_i915_gem_object *batch_obj, - struct drm_i915_gem_object *shadow_batch_obj, + u64 user_batch_start, u32 batch_start_offset, u32 batch_len, - bool is_master); + struct drm_i915_gem_object *shadow_batch_obj, + u64 shadow_batch_start); /* i915_perf.c */ extern void i915_perf_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 727018a16cca..9263b65720bc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1834,6 +1834,10 @@ int i915_gem_fault(struct vm_fault *vmf) unsigned int flags; int ret; + /* Sanity check that we allow writing into this object */ + if (i915_gem_object_is_readonly(obj) && write) + return VM_FAULT_SIGBUS; + /* We don't use vmf->pgoff since that has the fake offset */ page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; @@ -3239,6 +3243,12 @@ i915_gem_idle_work_handler(struct work_struct *work) if (INTEL_GEN(dev_priv) >= 6) gen6_rps_idle(dev_priv); + + if (NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv)) { + i915_rc6_ctx_wa_check(dev_priv); + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + } + intel_runtime_pm_put(dev_priv); out_unlock: mutex_unlock(&dev->struct_mutex); @@ -3996,6 +4006,20 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_address_space *vm = &dev_priv->ggtt.base; + + return i915_gem_object_pin(obj, vm, view, size, alignment, + flags | PIN_GLOBAL); +} + +struct i915_vma * +i915_gem_object_pin(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view, + u64 size, + u64 alignment, + u64 flags) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct i915_vma *vma; int ret; @@ -4053,7 +4077,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, return ERR_PTR(ret); } - ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); + ret = i915_vma_pin(vma, size, alignment, flags); if (ret) return ERR_PTR(ret); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8afd2ce59b8d..cdb67889817c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -141,6 +141,8 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) __i915_gem_object_release_unless_active(ce->state->obj); } + kfree(ctx->jump_whitelist); + kfree(ctx->name); put_pid(ctx->pid); @@ -321,6 +323,9 @@ __create_hw_context(struct drm_i915_private *dev_priv, else ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE; + ctx->jump_whitelist = NULL; + ctx->jump_whitelist_cmds = 0; + return ctx; err_pid: @@ -988,18 +993,19 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, if (args->ctx_id == DEFAULT_CONTEXT_HANDLE) return -ENOENT; - ctx = i915_gem_context_lookup(file_priv, args->ctx_id); - if (!ctx) - return -ENOENT; - - ret = mutex_lock_interruptible(&dev->struct_mutex); + ret = i915_mutex_lock_interruptible(dev); if (ret) - goto out; + return ret; + + ctx = i915_gem_context_lookup(file_priv, args->ctx_id); + if (!ctx) { + mutex_unlock(&dev->struct_mutex); + return -ENOENT; + } __destroy_hw_context(ctx, file_priv); mutex_unlock(&dev->struct_mutex); -out: i915_gem_context_put(ctx); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 44688e22a5c2..b651c5f427b9 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -181,6 +181,12 @@ struct i915_gem_context { /** remap_slice: Bitmask of cache lines that need remapping */ u8 remap_slice; + /** jump_whitelist: Bit array for tracking cmds during cmdparsing */ + unsigned long *jump_whitelist; + + /** jump_whitelist_cmds: No of cmd slots available */ + u32 jump_whitelist_cmds; + /** handles_vma: rbtree to look up our context specific obj/vma for * the user handle. (user handles are per fd, but the binding is * per vm, which may be one per context or shared with the global GTT) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 4cc9ce4b5b16..d99d05a91032 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -268,6 +268,13 @@ static inline u64 gen8_noncanonical_addr(u64 address) return address & GENMASK_ULL(GEN8_HIGH_ADDRESS_BIT, 0); } +static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) +{ + return intel_engine_requires_cmd_parser(eb->engine) || + (intel_engine_using_cmd_parser(eb->engine) && + eb->args->batch_len); +} + static int eb_create(struct i915_execbuffer *eb) { if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) { @@ -1165,6 +1172,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, if (unlikely(!cache->rq)) { int err; + /* If we need to copy for the cmdparser, we will stall anyway */ + if (eb_use_cmdparser(eb)) + return ERR_PTR(-EWOULDBLOCK); + err = __reloc_gpu_alloc(eb, vma, len); if (unlikely(err)) return ERR_PTR(err); @@ -1902,10 +1913,38 @@ static int i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) return 0; } -static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) +static struct i915_vma * +shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *dev_priv = eb->i915; + struct i915_address_space *vm; + u64 flags; + + /* + * PPGTT backed shadow buffers must be mapped RO, to prevent + * post-scan tampering + */ + if (CMDPARSER_USES_GGTT(dev_priv)) { + flags = PIN_GLOBAL; + vm = &dev_priv->ggtt.base; + } else if (eb->vm->has_read_only) { + flags = PIN_USER; + vm = eb->vm; + i915_gem_object_set_readonly(obj); + } else { + DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); + return ERR_PTR(-EINVAL); + } + + return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags); +} + +static struct i915_vma *eb_parse(struct i915_execbuffer *eb) { struct drm_i915_gem_object *shadow_batch_obj; struct i915_vma *vma; + u64 batch_start; + u64 shadow_batch_start; int err; shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, @@ -1913,29 +1952,53 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) if (IS_ERR(shadow_batch_obj)) return ERR_CAST(shadow_batch_obj); - err = intel_engine_cmd_parser(eb->engine, + vma = shadow_batch_pin(eb, shadow_batch_obj); + if (IS_ERR(vma)) + goto out; + + batch_start = gen8_canonical_addr(eb->batch->node.start) + + eb->batch_start_offset; + + shadow_batch_start = gen8_canonical_addr(vma->node.start); + + err = intel_engine_cmd_parser(eb->ctx, + eb->engine, eb->batch->obj, - shadow_batch_obj, + batch_start, eb->batch_start_offset, eb->batch_len, - is_master); + shadow_batch_obj, + shadow_batch_start); + if (err) { - if (err == -EACCES) /* unhandled chained batch */ + i915_vma_unpin(vma); + + /* + * Unsafe GGTT-backed buffers can still be submitted safely + * as non-secure. + * For PPGTT backing however, we have no choice but to forcibly + * reject unsafe buffers + */ + if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES)) + /* Execute original buffer non-secure */ vma = NULL; else vma = ERR_PTR(err); + goto out; } - vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); - if (IS_ERR(vma)) - goto out; - eb->vma[eb->buffer_count] = i915_vma_get(vma); eb->flags[eb->buffer_count] = __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; vma->exec_flags = &eb->flags[eb->buffer_count]; eb->buffer_count++; + eb->batch_start_offset = 0; + eb->batch = vma; + /* eb->batch_len unchanged */ + + if (CMDPARSER_USES_GGTT(eb->i915)) + eb->batch_flags |= I915_DISPATCH_SECURE; out: i915_gem_object_unpin_pages(shadow_batch_obj); @@ -2186,6 +2249,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, struct drm_i915_gem_exec_object2 *exec, struct drm_syncobj **fences) { + struct drm_i915_private *dev_priv = to_i915(dev); struct i915_execbuffer eb; struct dma_fence *in_fence = NULL; struct sync_file *out_fence = NULL; @@ -2195,7 +2259,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS); - eb.i915 = to_i915(dev); + eb.i915 = dev_priv; eb.file = file; eb.args = args; if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) @@ -2217,8 +2281,15 @@ i915_gem_do_execbuffer(struct drm_device *dev, eb.batch_flags = 0; if (args->flags & I915_EXEC_SECURE) { + if (INTEL_GEN(dev_priv) >= 11) + return -ENODEV; + + /* Return -EPERM to trigger fallback code on old binaries. */ + if (!HAS_SECURE_BATCHES(dev_priv)) + return -EPERM; + if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) - return -EPERM; + return -EPERM; eb.batch_flags |= I915_DISPATCH_SECURE; } @@ -2305,34 +2376,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, goto err_vma; } - if (eb.engine->needs_cmd_parser && eb.batch_len) { + if (eb.batch_len == 0) + eb.batch_len = eb.batch->size - eb.batch_start_offset; + + if (eb_use_cmdparser(&eb)) { struct i915_vma *vma; - vma = eb_parse(&eb, drm_is_current_master(file)); + vma = eb_parse(&eb); if (IS_ERR(vma)) { err = PTR_ERR(vma); goto err_vma; } - - if (vma) { - /* - * Batch parsed and accepted: - * - * Set the DISPATCH_SECURE bit to remove the NON_SECURE - * bit from MI_BATCH_BUFFER_START commands issued in - * the dispatch_execbuffer implementations. We - * specifically don't want that set on batches the - * command parser has accepted. - */ - eb.batch_flags |= I915_DISPATCH_SECURE; - eb.batch_start_offset = 0; - eb.batch = vma; - } } - if (eb.batch_len == 0) - eb.batch_len = eb.batch->size - eb.batch_start_offset; - /* * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ad524cb0f6fc..47d178817a29 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -159,7 +159,8 @@ int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv, if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) return 0; - if (enable_ppgtt == 1) + /* Full PPGTT is required by the Gen9 cmdparser */ + if (enable_ppgtt == 1 && INTEL_GEN(dev_priv) != 9) return 1; if (enable_ppgtt == 2 && has_full_ppgtt) @@ -207,9 +208,9 @@ static int ppgtt_bind_vma(struct i915_vma *vma, vma->pages = vma->obj->mm.pages; - /* Currently applicable only to VLV */ + /* Applicable to VLV, and gen8+ */ pte_flags = 0; - if (vma->obj->gt_ro) + if (i915_gem_object_is_readonly(vma->obj)) pte_flags |= PTE_READ_ONLY; vma->vm->insert_entries(vma->vm, vma, cache_level, pte_flags); @@ -223,10 +224,13 @@ static void ppgtt_unbind_vma(struct i915_vma *vma) } static gen8_pte_t gen8_pte_encode(dma_addr_t addr, - enum i915_cache_level level) + enum i915_cache_level level, + u32 flags) { - gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW; - pte |= addr; + gen8_pte_t pte = addr | _PAGE_PRESENT | _PAGE_RW; + + if (unlikely(flags & PTE_READ_ONLY)) + pte &= ~_PAGE_RW; switch (level) { case I915_CACHE_NONE: @@ -487,7 +491,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm, struct i915_page_table *pt) { fill_px(vm, pt, - gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC)); + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0)); } static void gen6_initialize_pt(struct i915_address_space *vm, @@ -691,7 +695,7 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, unsigned int pte = gen8_pte_index(start); unsigned int pte_end = pte + num_entries; const gen8_pte_t scratch_pte = - gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); gen8_pte_t *vaddr; GEM_BUG_ON(num_entries > pt->used_ptes); @@ -863,10 +867,11 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, struct i915_page_directory_pointer *pdp, struct sgt_dma *iter, struct gen8_insert_pte *idx, - enum i915_cache_level cache_level) + enum i915_cache_level cache_level, + u32 flags) { struct i915_page_directory *pd; - const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level); + const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level, flags); gen8_pte_t *vaddr; bool ret; @@ -917,20 +922,20 @@ gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt, static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level cache_level, - u32 unused) + u32 flags) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct sgt_dma iter = sgt_dma(vma); struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter, &idx, - cache_level); + cache_level, flags); } static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level cache_level, - u32 unused) + u32 flags) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); struct sgt_dma iter = sgt_dma(vma); @@ -938,7 +943,7 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm, struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start); while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[idx.pml4e++], &iter, - &idx, cache_level)) + &idx, cache_level, flags)) GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4); } @@ -1264,7 +1269,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) { struct i915_address_space *vm = &ppgtt->base; const gen8_pte_t scratch_pte = - gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); u64 start = 0, length = ppgtt->base.total; if (use_4lvl(vm)) { @@ -1339,6 +1344,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) return ret; } + /* + * From bdw, there is support for read-only pages in the PPGTT. + * + * XXX GVT is not honouring the lack of RW in the PTE bits. + */ + ppgtt->base.has_read_only = !intel_vgpu_active(dev_priv); + /* There are only few exceptions for gen >=6. chv and bxt. * And we are not sure about the latter so play safe for now. */ @@ -2078,7 +2090,7 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, gen8_pte_t __iomem *pte = (gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT); - gen8_set_pte(pte, gen8_pte_encode(addr, level)); + gen8_set_pte(pte, gen8_pte_encode(addr, level, 0)); ggtt->invalidate(vm->i915); } @@ -2086,14 +2098,19 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level level, - u32 unused) + u32 flags) { struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); struct sgt_iter sgt_iter; gen8_pte_t __iomem *gtt_entries; - const gen8_pte_t pte_encode = gen8_pte_encode(0, level); + const gen8_pte_t pte_encode = gen8_pte_encode(0, level, 0); dma_addr_t addr; + /* + * Note that we ignore PTE_READ_ONLY here. The caller must be careful + * not to allow the user to override access to a read only page. + */ + gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm; gtt_entries += vma->node.start >> PAGE_SHIFT; for_each_sgt_dma(addr, sgt_iter, vma->pages) @@ -2162,7 +2179,7 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, unsigned first_entry = start >> PAGE_SHIFT; unsigned num_entries = length >> PAGE_SHIFT; const gen8_pte_t scratch_pte = - gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); + gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0); gen8_pte_t __iomem *gtt_base = (gen8_pte_t __iomem *)ggtt->gsm + first_entry; const int max_entries = ggtt_total_entries(ggtt) - first_entry; @@ -2223,13 +2240,14 @@ struct insert_entries { struct i915_address_space *vm; struct i915_vma *vma; enum i915_cache_level level; + u32 flags; }; static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) { struct insert_entries *arg = _arg; - gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, 0); + gen8_ggtt_insert_entries(arg->vm, arg->vma, arg->level, arg->flags); bxt_vtd_ggtt_wa(arg->vm); return 0; @@ -2238,9 +2256,9 @@ static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, struct i915_vma *vma, enum i915_cache_level level, - u32 unused) + u32 flags) { - struct insert_entries arg = { vm, vma, level }; + struct insert_entries arg = { vm, vma, level, flags }; stop_machine(bxt_vtd_ggtt_insert_entries__cb, &arg, NULL); } @@ -2337,9 +2355,9 @@ static int ggtt_bind_vma(struct i915_vma *vma, return ret; } - /* Currently applicable only to VLV */ + /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ pte_flags = 0; - if (obj->gt_ro) + if (i915_gem_object_is_readonly(obj)) pte_flags |= PTE_READ_ONLY; intel_runtime_pm_get(i915); @@ -2381,7 +2399,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, /* Currently applicable only to VLV */ pte_flags = 0; - if (vma->obj->gt_ro) + if (i915_gem_object_is_readonly(vma->obj)) pte_flags |= PTE_READ_ONLY; if (flags & I915_VMA_LOCAL_BIND) { @@ -3063,6 +3081,10 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) */ mutex_lock(&dev_priv->drm.struct_mutex); i915_address_space_init(&ggtt->base, dev_priv, "[global]"); + + /* Only VLV supports read-only GGTT mappings */ + ggtt->base.has_read_only = IS_VALLEYVIEW(dev_priv); + if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv)) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; mutex_unlock(&dev_priv->drm.struct_mutex); @@ -3095,7 +3117,6 @@ int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt()) return -EIO; - return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 0dbbe840f5f0..4b63d6cbd81e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -295,7 +295,12 @@ struct i915_address_space { struct list_head unbound_list; struct pagevec free_pages; - bool pt_kmap_wc; + + /* Some systems require uncached updates of the page directories */ + bool pt_kmap_wc:1; + + /* Some systems support read-only mappings for GGTT and/or PPGTT */ + bool has_read_only:1; /* FIXME: Need a more generic return type */ gen6_pte_t (*pte_encode)(dma_addr_t addr, diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index c30d8f808185..39cfe04dcdb8 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -140,7 +140,6 @@ struct drm_i915_gem_object { * Is the object to be mapped as read-only to the GPU * Only honoured if hardware has relevant pte bit */ - unsigned long gt_ro:1; unsigned int cache_level:3; unsigned int cache_coherent:2; #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) @@ -313,6 +312,18 @@ static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj) reservation_object_unlock(obj->resv); } +static inline void +i915_gem_object_set_readonly(struct drm_i915_gem_object *obj) +{ + obj->base.vma_node.readonly = true; +} + +static inline bool +i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj) +{ + return obj->base.vma_node.readonly; +} + static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 813a3b546d6e..1d556dcbd656 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -252,6 +252,10 @@ static void mark_busy(struct drm_i915_private *i915) GEM_BUG_ON(!i915->gt.active_requests); intel_runtime_pm_get_noresume(i915); + + if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); + i915->gt.awake = true; intel_enable_gt_powersave(i915); diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 05ae8c4a8a1b..480d20758324 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -690,8 +690,28 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, i915_gem_gtt_finish_pages(obj, pages); for_each_sgt_page(page, sgt_iter, pages) { - if (obj->mm.dirty) + if (obj->mm.dirty && trylock_page(page)) { + /* + * As this may not be anonymous memory (e.g. shmem) + * but exist on a real mapping, we have to lock + * the page in order to dirty it -- holding + * the page reference is not sufficient to + * prevent the inode from being truncated. + * Play safe and take the lock. + * + * However...! + * + * The mmu-notifier can be invalidated for a + * migrate_page, that is alreadying holding the lock + * on the page. Such a try_to_unmap() will result + * in us calling put_pages() and so recursively try + * to lock the page. We avoid that deadlock with + * a trylock_page() and in exchange we risk missing + * some page dirtying. + */ set_page_dirty(page); + unlock_page(page); + } mark_page_accessed(page); put_page(page); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2e706f1abe64..1db70350af0b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -358,6 +358,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN8_CONFIG0 _MMIO(0xD00) #define GEN9_DEFAULT_FIXES (1 << 3 | 1 << 2 | 1 << 1) +#define GEN8_RC6_CTX_INFO _MMIO(0x8504) + #define GAC_ECO_BITS _MMIO(0x14090) #define ECOBITS_SNB_BIT (1<<13) #define ECOBITS_PPGTT_CACHE64B (3<<8) @@ -703,6 +705,10 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) */ #define BCS_SWCTRL _MMIO(0x22200) +/* There are 16 GPR registers */ +#define BCS_GPR(n) _MMIO(0x22600 + (n) * 8) +#define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4) + #define GPGPU_THREADS_DISPATCHED _MMIO(0x2290) #define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4) #define HS_INVOCATION_COUNT _MMIO(0x2300) @@ -6722,6 +6728,10 @@ enum { #define SKL_CSR_DC5_DC6_COUNT _MMIO(0x8002C) #define BXT_CSR_DC3_DC5_COUNT _MMIO(0x80038) +/* Display Internal Timeout Register */ +#define RM_TIMEOUT _MMIO(0x42060) +#define MMIO_TIMEOUT_US(us) ((us) << 0) + /* interrupts */ #define DE_MASTER_IRQ_CONTROL (1 << 31) #define DE_SPRITEB_FLIP_DONE (1 << 29) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 3adb9c3b412e..cba10cdab2a9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1838,6 +1838,9 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv); void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); +bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915); +void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915); +void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915); void gen6_rps_busy(struct drm_i915_private *dev_priv); void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); void gen6_rps_idle(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e0483c068d23..baff1f01bfc7 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1101,17 +1101,14 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) *batch++ = MI_NOOP; - /* WaClearSlmSpaceAtContextSwitch:kbl */ - /* Actual scratch location is at 128 bytes offset */ - if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) { - batch = gen8_emit_pipe_control(batch, - PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE, - i915_ggtt_offset(engine->scratch) - + 2 * CACHELINE_BYTES); - } + /* WaClearSlmSpaceAtContextSwitch:skl,bxt,kbl,glk,cfl */ + batch = gen8_emit_pipe_control(batch, + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE, + i915_ggtt_offset(engine->scratch) + + 2 * CACHELINE_BYTES); /* WaMediaPoolStateCmdInWABB:bxt,glk */ if (HAS_POOLED_EU(engine->i915)) { diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cb377b003321..674410682ccc 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -121,6 +121,14 @@ static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) */ I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | PWM1_GATING_DIS | PWM2_GATING_DIS); + + /* + * Lower the display internal timeout. + * This is needed to avoid any hard hangs when DSI port PLL + * is off and a MMIO access is attempted by any privilege + * application, using batch buffers or any other means. + */ + I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950)); } static void glk_init_clock_gating(struct drm_i915_private *dev_priv) @@ -6274,19 +6282,23 @@ static void gen9_disable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RP_CONTROL, 0); } -static void gen6_disable_rps(struct drm_i915_private *dev_priv) +static void gen6_disable_rc6(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RC_CONTROL, 0); +} + +static void gen6_disable_rps(struct drm_i915_private *dev_priv) +{ I915_WRITE(GEN6_RPNSWREQ, 1 << 31); I915_WRITE(GEN6_RP_CONTROL, 0); } -static void cherryview_disable_rps(struct drm_i915_private *dev_priv) +static void cherryview_disable_rc6(struct drm_i915_private *dev_priv) { I915_WRITE(GEN6_RC_CONTROL, 0); } -static void valleyview_disable_rps(struct drm_i915_private *dev_priv) +static void valleyview_disable_rc6(struct drm_i915_private *dev_priv) { /* we're doing forcewake before Disabling RC6, * This what the BIOS expects when going into suspend */ @@ -6537,7 +6549,8 @@ static void gen9_enable_rc6(struct drm_i915_private *dev_priv) I915_WRITE(GEN9_RENDER_PG_IDLE_HYSTERESIS, 25); /* 3a: Enable RC6 */ - if (intel_enable_rc6() & INTEL_RC6_ENABLE) + if (!dev_priv->rps.ctx_corrupted && + intel_enable_rc6() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; DRM_INFO("RC6 %s\n", onoff(rc6_mask & GEN6_RC_CTL_RC6_ENABLE)); I915_WRITE(GEN6_RC6_THRESHOLD, 37500); /* 37.5/125ms per EI */ @@ -6586,7 +6599,8 @@ static void gen8_enable_rps(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ /* 3: Enable RC6 */ - if (intel_enable_rc6() & INTEL_RC6_ENABLE) + if (!dev_priv->rps.ctx_corrupted && + intel_enable_rc6() & INTEL_RC6_ENABLE) rc6_mask = GEN6_RC_CTL_RC6_ENABLE; intel_print_rc6_info(dev_priv, rc6_mask); if (IS_BROADWELL(dev_priv)) @@ -7767,6 +7781,95 @@ static void intel_init_emon(struct drm_i915_private *dev_priv) dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); } +static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv) +{ + return !I915_READ(GEN8_RC6_CTX_INFO); +} + +static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915) +{ + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return; + + if (i915_rc6_ctx_corrupted(i915)) { + DRM_INFO("RC6 context corrupted, disabling runtime power management\n"); + i915->rps.ctx_corrupted = true; + intel_runtime_pm_get(i915); + } +} + +static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915) +{ + if (i915->rps.ctx_corrupted) { + intel_runtime_pm_put(i915); + i915->rps.ctx_corrupted = false; + } +} + +/** + * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA + * @i915: i915 device + * + * Perform any steps needed to clean up the RC6 CTX WA before system suspend. + */ +void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915) +{ + if (i915->rps.ctx_corrupted) + intel_runtime_pm_put(i915); +} + +/** + * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA + * @i915: i915 device + * + * Perform any steps needed to re-init the RC6 CTX WA after system resume. + */ +void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915) +{ + if (!i915->rps.ctx_corrupted) + return; + + if (i915_rc6_ctx_corrupted(i915)) { + intel_runtime_pm_get(i915); + return; + } + + DRM_INFO("RC6 context restored, re-enabling runtime power management\n"); + i915->rps.ctx_corrupted = false; +} + +static void intel_disable_rc6(struct drm_i915_private *dev_priv); + +/** + * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption + * @i915: i915 device + * + * Check if an RC6 CTX corruption has happened since the last check and if so + * disable RC6 and runtime power management. + * + * Return false if no context corruption has happened since the last call of + * this function, true otherwise. +*/ +bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915) +{ + if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) + return false; + + if (i915->rps.ctx_corrupted) + return false; + + if (!i915_rc6_ctx_corrupted(i915)) + return false; + + DRM_NOTE("RC6 context corruption, disabling runtime power management\n"); + + intel_disable_rc6(i915); + i915->rps.ctx_corrupted = true; + intel_runtime_pm_get_noresume(i915); + + return true; +} + void intel_init_gt_powersave(struct drm_i915_private *dev_priv) { /* @@ -7781,6 +7884,8 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->drm.struct_mutex); mutex_lock(&dev_priv->rps.hw_lock); + i915_rc6_ctx_wa_init(dev_priv); + /* Initialize RPS limits (for userspace) */ if (IS_CHERRYVIEW(dev_priv)) cherryview_init_gt_powersave(dev_priv); @@ -7830,6 +7935,8 @@ void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) if (IS_VALLEYVIEW(dev_priv)) valleyview_cleanup_gt_powersave(dev_priv); + i915_rc6_ctx_wa_cleanup(dev_priv); + if (!i915.enable_rc6) intel_runtime_pm_put(dev_priv); } @@ -7861,6 +7968,35 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) gen6_reset_rps_interrupts(dev_priv); } +static void __intel_disable_rc6(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) >= 9) + gen9_disable_rc6(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + cherryview_disable_rc6(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) + valleyview_disable_rc6(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_disable_rc6(dev_priv); +} + +static void intel_disable_rc6(struct drm_i915_private *dev_priv) +{ + mutex_lock(&dev_priv->rps.hw_lock); + __intel_disable_rc6(dev_priv); + mutex_unlock(&dev_priv->rps.hw_lock); +} + +static void intel_disable_rps(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) >= 9) + gen9_disable_rps(dev_priv); + else if (INTEL_GEN(dev_priv) >= 6) + gen6_disable_rps(dev_priv); + else if (IS_IRONLAKE_M(dev_priv)) + ironlake_disable_drps(dev_priv); +} + void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) { if (!READ_ONCE(dev_priv->rps.enabled)) @@ -7868,20 +8004,11 @@ void intel_disable_gt_powersave(struct drm_i915_private *dev_priv) mutex_lock(&dev_priv->rps.hw_lock); - if (INTEL_GEN(dev_priv) >= 9) { - gen9_disable_rc6(dev_priv); - gen9_disable_rps(dev_priv); - } else if (IS_CHERRYVIEW(dev_priv)) { - cherryview_disable_rps(dev_priv); - } else if (IS_VALLEYVIEW(dev_priv)) { - valleyview_disable_rps(dev_priv); - } else if (INTEL_GEN(dev_priv) >= 6) { - gen6_disable_rps(dev_priv); - } else if (IS_IRONLAKE_M(dev_priv)) { - ironlake_disable_drps(dev_priv); - } + __intel_disable_rc6(dev_priv); + intel_disable_rps(dev_priv); dev_priv->rps.enabled = false; + mutex_unlock(&dev_priv->rps.hw_lock); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cdf084ef5aae..63667a5c2c87 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1358,6 +1358,7 @@ void intel_ring_unpin(struct intel_ring *ring) static struct i915_vma * intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) { + struct i915_address_space *vm = &dev_priv->ggtt.base; struct drm_i915_gem_object *obj; struct i915_vma *vma; @@ -1367,10 +1368,14 @@ intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) if (IS_ERR(obj)) return ERR_CAST(obj); - /* mark ring buffers as read-only from GPU side by default */ - obj->gt_ro = 1; + /* + * Mark ring buffers as read-only from GPU side (so no stray overwrites) + * if supported by the platform's GGTT. + */ + if (vm->has_read_only) + i915_gem_object_set_readonly(obj); - vma = i915_vma_instance(obj, &dev_priv->ggtt.base, NULL); + vma = i915_vma_instance(obj, vm, NULL); if (IS_ERR(vma)) goto err; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 6b2067f10824..774e3772d0ed 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -417,7 +417,9 @@ struct intel_engine_cs { struct intel_engine_hangcheck hangcheck; - bool needs_cmd_parser; +#define I915_ENGINE_USING_CMD_PARSER BIT(0) +#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(3) + unsigned int flags; /* * Table of commands the command parser needs to know about @@ -444,6 +446,18 @@ struct intel_engine_cs { u32 (*get_cmd_length_mask)(u32 cmd_header); }; +static inline bool +intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_USING_CMD_PARSER; +} + +static inline bool +intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; +} + static inline unsigned int intel_engine_flag(const struct intel_engine_cs *engine) { diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 7791313405b5..c8671b1578c6 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -394,19 +394,17 @@ static const unsigned int a3xx_registers[] = { 0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e, 0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8, 0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7, - 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356, - 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d, - 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472, - 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef, - 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511, - 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed, - 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a, - 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce, - 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec, - 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749, - 0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d, - 0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036, - 0x303c, 0x303c, 0x305e, 0x305f, + 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444, + 0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, + 0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, + 0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, + 0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, + 0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, + 0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, + 0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, + 0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, + 0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d, + 0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f, ~0 /* sentinel */ }; diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index a9a0b56f1fbc..b9cb7c09e05a 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -34,6 +34,8 @@ #include "dsi_cfg.h" #include "msm_kms.h" +#define DSI_RESET_TOGGLE_DELAY_MS 20 + static int dsi_get_version(const void __iomem *base, u32 *major, u32 *minor) { u32 ver; @@ -906,7 +908,7 @@ static void dsi_sw_reset(struct msm_dsi_host *msm_host) wmb(); /* clocks need to be enabled before reset */ dsi_write(msm_host, REG_DSI_RESET, 1); - wmb(); /* make sure reset happen */ + msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */ dsi_write(msm_host, REG_DSI_RESET, 0); } @@ -1288,7 +1290,7 @@ static void dsi_sw_reset_restore(struct msm_dsi_host *msm_host) /* dsi controller can only be reset while clocks are running */ dsi_write(msm_host, REG_DSI_RESET, 1); - wmb(); /* make sure reset happen */ + msleep(DSI_RESET_TOGGLE_DELAY_MS); /* make sure reset happen */ dsi_write(msm_host, REG_DSI_RESET, 0); wmb(); /* controller out of reset */ dsi_write(msm_host, REG_DSI_CTRL, data0); diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c index 855248132b2b..9fbfa9f94e6c 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_manager.c +++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c @@ -400,7 +400,7 @@ static int dsi_mgr_connector_get_modes(struct drm_connector *connector) return num; } -static int dsi_mgr_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status dsi_mgr_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { int id = dsi_mgr_connector_get_id(connector); @@ -543,6 +543,7 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) struct msm_dsi *msm_dsi1 = dsi_mgr_get_dsi(DSI_1); struct mipi_dsi_host *host = msm_dsi->host; struct drm_panel *panel = msm_dsi->panel; + struct msm_dsi_pll *src_pll; bool is_dual_dsi = IS_DUAL_DSI(); int ret; @@ -583,6 +584,10 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge) id, ret); } + /* Save PLL status if it is a clock source */ + src_pll = msm_dsi_phy_get_pll(msm_dsi->phy); + msm_dsi_pll_save_state(src_pll); + ret = msm_dsi_host_power_off(host); if (ret) pr_err("%s: host %d power off failed,%d\n", __func__, id, ret); diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 7c9bf91bc22b..c0a7fa56d9a7 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -613,10 +613,6 @@ void msm_dsi_phy_disable(struct msm_dsi_phy *phy) if (!phy || !phy->cfg->ops.disable) return; - /* Save PLL status if it is a clock source */ - if (phy->usecase != MSM_DSI_PHY_SLAVE) - msm_dsi_pll_save_state(phy->pll); - phy->cfg->ops.disable(phy); dsi_phy_regulator_disable(phy); diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c index 824067d2d427..42f0ecb0cf35 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c @@ -635,7 +635,7 @@ fail: if (cfg_handler) mdp5_cfg_destroy(cfg_handler); - return NULL; + return ERR_PTR(ret); } static struct mdp5_cfg_platform *mdp5_get_config(struct platform_device *dev) diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c index 440977677001..99d356b6e915 100644 --- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c @@ -1004,8 +1004,8 @@ static void mdp5_crtc_wait_for_pp_done(struct drm_crtc *crtc) ret = wait_for_completion_timeout(&mdp5_crtc->pp_completion, msecs_to_jiffies(50)); if (ret == 0) - dev_warn(dev->dev, "pp done time out, lm=%d\n", - mdp5_cstate->pipeline.mixer->lm); + dev_warn_ratelimited(dev->dev, "pp done time out, lm=%d\n", + mdp5_cstate->pipeline.mixer->lm); } static void mdp5_crtc_wait_for_flush_done(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 77c45a2ebd83..d9c0687435a0 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -420,6 +420,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) if (ret) goto fail; + if (!dev->dma_parms) { + dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms), + GFP_KERNEL); + if (!dev->dma_parms) + return -ENOMEM; + } + dma_set_max_seg_size(dev, DMA_BIT_MASK(32)); + msm_gem_shrinker_init(ddev); switch (get_mdp_ver(pdev)) { diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h index dc7454e7f19a..b46e99f7641e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.h +++ b/drivers/gpu/drm/nouveau/nouveau_connector.h @@ -29,6 +29,7 @@ #include +#include #include #include #include @@ -37,6 +38,60 @@ struct nvkm_i2c_port; +#define nouveau_conn_atom(p) \ + container_of((p), struct nouveau_conn_atom, state) + +struct nouveau_conn_atom { + struct drm_connector_state state; + + struct { + /* The enum values specifically defined here match nv50/gf119 + * hw values, and the code relies on this. + */ + enum { + DITHERING_MODE_OFF = 0x00, + DITHERING_MODE_ON = 0x01, + DITHERING_MODE_DYNAMIC2X2 = 0x10 | DITHERING_MODE_ON, + DITHERING_MODE_STATIC2X2 = 0x18 | DITHERING_MODE_ON, + DITHERING_MODE_TEMPORAL = 0x20 | DITHERING_MODE_ON, + DITHERING_MODE_AUTO + } mode; + enum { + DITHERING_DEPTH_6BPC = 0x00, + DITHERING_DEPTH_8BPC = 0x02, + DITHERING_DEPTH_AUTO + } depth; + } dither; + + struct { + int mode; /* DRM_MODE_SCALE_* */ + struct { + enum { + UNDERSCAN_OFF, + UNDERSCAN_ON, + UNDERSCAN_AUTO, + } mode; + u32 hborder; + u32 vborder; + } underscan; + bool full; + } scaler; + + struct { + int color_vibrance; + int vibrant_hue; + } procamp; + + union { + struct { + bool dither:1; + bool scaler:1; + bool procamp:1; + }; + u8 mask; + } set; +}; + struct nouveau_connector { struct drm_connector base; enum dcb_connector_type type; @@ -111,61 +166,6 @@ extern int nouveau_ignorelid; extern int nouveau_duallink; extern int nouveau_hdmimhz; -#include -#define nouveau_conn_atom(p) \ - container_of((p), struct nouveau_conn_atom, state) - -struct nouveau_conn_atom { - struct drm_connector_state state; - - struct { - /* The enum values specifically defined here match nv50/gf119 - * hw values, and the code relies on this. - */ - enum { - DITHERING_MODE_OFF = 0x00, - DITHERING_MODE_ON = 0x01, - DITHERING_MODE_DYNAMIC2X2 = 0x10 | DITHERING_MODE_ON, - DITHERING_MODE_STATIC2X2 = 0x18 | DITHERING_MODE_ON, - DITHERING_MODE_TEMPORAL = 0x20 | DITHERING_MODE_ON, - DITHERING_MODE_AUTO - } mode; - enum { - DITHERING_DEPTH_6BPC = 0x00, - DITHERING_DEPTH_8BPC = 0x02, - DITHERING_DEPTH_AUTO - } depth; - } dither; - - struct { - int mode; /* DRM_MODE_SCALE_* */ - struct { - enum { - UNDERSCAN_OFF, - UNDERSCAN_ON, - UNDERSCAN_AUTO, - } mode; - u32 hborder; - u32 vborder; - } underscan; - bool full; - } scaler; - - struct { - int color_vibrance; - int vibrant_hue; - } procamp; - - union { - struct { - bool dither:1; - bool scaler:1; - bool procamp:1; - }; - u8 mask; - } set; -}; - void nouveau_conn_attach_properties(struct drm_connector *); void nouveau_conn_reset(struct drm_connector *); struct drm_connector_state * diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index 99e14e3e0fe4..72532539369f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -158,7 +158,7 @@ nouveau_fence_wait_uevent_handler(struct nvif_notify *notify) fence = list_entry(fctx->pending.next, typeof(*fence), head); chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock)); - if (nouveau_fence_update(fence->channel, fctx)) + if (nouveau_fence_update(chan, fctx)) ret = NVIF_NOTIFY_DROP; } spin_unlock_irqrestore(&fctx->lock, flags); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c index 0c0310498afd..cd9666583d4b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c @@ -73,6 +73,8 @@ nv50_disp_chan_mthd(struct nv50_disp_chan *chan, int debug) if (debug > subdev->debug) return; + if (!mthd) + return; for (i = 0; (list = mthd->data[i].mthd) != NULL; i++) { u32 base = chan->head * mthd->addr; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c index de8b806b88fd..7618b2eb4fdf 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c @@ -143,23 +143,24 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name, nent = (fuc.size / sizeof(struct gk20a_fw_av)); - pack = vzalloc((sizeof(*pack) * max_classes) + - (sizeof(*init) * (nent + 1))); + pack = vzalloc((sizeof(*pack) * (max_classes + 1)) + + (sizeof(*init) * (nent + max_classes + 1))); if (!pack) { ret = -ENOMEM; goto end; } - init = (void *)(pack + max_classes); + init = (void *)(pack + max_classes + 1); - for (i = 0; i < nent; i++) { - struct gf100_gr_init *ent = &init[i]; + for (i = 0; i < nent; i++, init++) { struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc.data)[i]; u32 class = av->addr & 0xffff; u32 addr = (av->addr & 0xffff0000) >> 14; if (prevclass != class) { - pack[classidx].init = ent; + if (prevclass) /* Add terminator to the method list. */ + init++; + pack[classidx].init = init; pack[classidx].type = class; prevclass = class; if (++classidx >= max_classes) { @@ -169,10 +170,10 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *fw_name, } } - ent->addr = addr; - ent->data = av->data; - ent->count = 1; - ent->pitch = 1; + init->addr = addr; + init->data = av->data; + init->count = 1; + init->pitch = 1; } *ppack = pack; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c index 60ece0a8a2e1..1d2d6bae73cd 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gddr3.c @@ -87,7 +87,7 @@ nvkm_gddr3_calc(struct nvkm_ram *ram) WR = (ram->next->bios.timing[2] & 0x007f0000) >> 16; /* XXX: Get these values from the VBIOS instead */ DLL = !(ram->mr[1] & 0x1); - RON = !(ram->mr[1] & 0x300) >> 8; + RON = !((ram->mr[1] & 0x300) >> 8); break; default: return -ENOSYS; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c index 11b28b086a06..7b052879af72 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/memx.c @@ -88,10 +88,10 @@ nvkm_memx_fini(struct nvkm_memx **pmemx, bool exec) if (exec) { nvkm_pmu_send(pmu, reply, PROC_MEMX, MEMX_MSG_EXEC, memx->base, finish); + nvkm_debug(subdev, "Exec took %uns, PMU_IN %08x\n", + reply[0], reply[1]); } - nvkm_debug(subdev, "Exec took %uns, PMU_IN %08x\n", - reply[0], reply[1]); kfree(memx); return 0; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c index 30491d132d59..fbd10a67c6c6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c @@ -108,6 +108,7 @@ gm20b_secboot_new(struct nvkm_device *device, int index, struct gm200_secboot *gsb; struct nvkm_acr *acr; + *psb = NULL; acr = acr_r352_new(BIT(NVKM_SECBOOT_FALCON_FECS) | BIT(NVKM_SECBOOT_FALCON_PMU)); if (IS_ERR(acr)) @@ -116,10 +117,8 @@ gm20b_secboot_new(struct nvkm_device *device, int index, acr->optional_falcons = BIT(NVKM_SECBOOT_FALCON_PMU); gsb = kzalloc(sizeof(*gsb), GFP_KERNEL); - if (!gsb) { - psb = NULL; + if (!gsb) return -ENOMEM; - } *psb = &gsb->base; ret = nvkm_secboot_ctor(&gm20b_secboot, acr, device, index, &gsb->base); diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index ce8b353b5753..ba31c7674fcd 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7012,8 +7012,8 @@ static int cik_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* XXX this should actually be a bus address, not an MC address. same on older asics */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 7d39ed63e5be..b24401f21e93 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -1820,8 +1820,8 @@ static int r100_packet0_check(struct radeon_cs_parser *p, track->textures[i].use_pitch = 1; } else { track->textures[i].use_pitch = 0; - track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); - track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT); + track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT); } if (idx_value & RADEON_TXFORMAT_CUBIC_MAP_ENABLE) track->textures[i].tex_coord_type = 2; diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c index c22321cc5a41..c2b506c707a2 100644 --- a/drivers/gpu/drm/radeon/r200.c +++ b/drivers/gpu/drm/radeon/r200.c @@ -476,8 +476,8 @@ int r200_packet0_check(struct radeon_cs_parser *p, track->textures[i].use_pitch = 1; } else { track->textures[i].use_pitch = 0; - track->textures[i].width = 1 << ((idx_value >> RADEON_TXFORMAT_WIDTH_SHIFT) & RADEON_TXFORMAT_WIDTH_MASK); - track->textures[i].height = 1 << ((idx_value >> RADEON_TXFORMAT_HEIGHT_SHIFT) & RADEON_TXFORMAT_HEIGHT_MASK); + track->textures[i].width = 1 << ((idx_value & RADEON_TXFORMAT_WIDTH_MASK) >> RADEON_TXFORMAT_WIDTH_SHIFT); + track->textures[i].height = 1 << ((idx_value & RADEON_TXFORMAT_HEIGHT_MASK) >> RADEON_TXFORMAT_HEIGHT_SHIFT); } if (idx_value & R200_TXFORMAT_LOOKUP_DISABLE) track->textures[i].lookup_disable = true; diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index e06e2d8feab3..a724bb87cfad 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3690,8 +3690,8 @@ int r600_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* set dummy read address to ring address */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 4f94b78cb464..d86110cdf085 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -119,6 +119,8 @@ static void dce5_crtc_load_lut(struct drm_crtc *crtc) DRM_DEBUG_KMS("%d\n", radeon_crtc->crtc_id); + msleep(10); + WREG32(NI_INPUT_CSC_CONTROL + radeon_crtc->crtc_offset, (NI_INPUT_CSC_GRPH_MODE(NI_INPUT_CSC_BYPASS) | NI_INPUT_CSC_OVL_MODE(NI_INPUT_CSC_BYPASS))); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 54d97dd5780a..f4becad0a78c 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -368,19 +368,11 @@ radeon_pci_remove(struct pci_dev *pdev) static void radeon_pci_shutdown(struct pci_dev *pdev) { - struct drm_device *ddev = pci_get_drvdata(pdev); - /* if we are running in a VM, make sure the device * torn down properly on reboot/shutdown */ if (radeon_device_is_virtual()) radeon_pci_remove(pdev); - - /* Some adapters need to be suspended before a - * shutdown occurs in order to prevent an error - * during kexec. - */ - radeon_suspend_kms(ddev, true, true, false); } static int radeon_pmops_suspend(struct device *dev) diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 1907c950d76f..1144cafea9ac 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5993,8 +5993,8 @@ static int si_irq_init(struct radeon_device *rdev) } /* setup interrupt control */ - /* set dummy read address to ring address */ - WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8); + /* set dummy read address to dummy page address */ + WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8); interrupt_cntl = RREG32(INTERRUPT_CNTL); /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index 90d5b41007bf..9e5645e4cb55 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -1956,6 +1956,7 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev) case 0x682C: si_pi->cac_weights = cac_weights_cape_verde_pro; si_pi->dte_data = dte_data_sun_xt; + update_dte_from_pl2 = true; break; case 0x6825: case 0x6827: diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/shmobile/shmob_drm_drv.c index 592572554eb0..58d8a98c749b 100644 --- a/drivers/gpu/drm/shmobile/shmob_drm_drv.c +++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.c @@ -233,8 +233,8 @@ static int shmob_drm_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); sdev->mmio = devm_ioremap_resource(&pdev->dev, res); - if (sdev->mmio == NULL) - return -ENOMEM; + if (IS_ERR(sdev->mmio)) + return PTR_ERR(sdev->mmio); ret = shmob_drm_setup_clocks(sdev, pdata->clk_source); if (ret < 0) diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c index cf65e32b5090..0399bb18d387 100644 --- a/drivers/gpu/drm/sti/sti_hda.c +++ b/drivers/gpu/drm/sti/sti_hda.c @@ -721,7 +721,6 @@ static int sti_hda_bind(struct device *dev, struct device *master, void *data) return 0; err_sysfs: - drm_bridge_remove(bridge); return -EINVAL; } diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c index 30f02d2fdd03..bbb195a92e93 100644 --- a/drivers/gpu/drm/sti/sti_hdmi.c +++ b/drivers/gpu/drm/sti/sti_hdmi.c @@ -1314,7 +1314,6 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data) return 0; err_sysfs: - drm_bridge_remove(bridge); hdmi->drm_connector = NULL; return -EINVAL; } diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index 3cf1a6932fac..298d6a8bab12 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -438,8 +438,6 @@ static void sun4i_hdmi_unbind(struct device *dev, struct device *master, struct sun4i_hdmi *hdmi = dev_get_drvdata(dev); cec_unregister_adapter(hdmi->cec_adap); - drm_connector_cleanup(&hdmi->connector); - drm_encoder_cleanup(&hdmi->encoder); i2c_del_adapter(hdmi->i2c); clk_disable_unprepare(hdmi->mod_clk); clk_disable_unprepare(hdmi->bus_clk); diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c index 5cf2527bffc8..d7a8fea94557 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_tmds_clk.c @@ -50,7 +50,7 @@ static unsigned long sun4i_tmds_calc_divider(unsigned long rate, (rate - tmp_rate) < (rate - best_rate)) { best_rate = tmp_rate; best_m = m; - is_double = d; + is_double = (d == 2) ? true : false; } } } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c index 36c7b6c839c0..738ad2fc79a2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf_res.c @@ -210,8 +210,10 @@ int vmw_cmdbuf_res_add(struct vmw_cmdbuf_res_manager *man, cres->hash.key = user_key | (res_type << 24); ret = drm_ht_insert_item(&man->resources, &cres->hash); - if (unlikely(ret != 0)) + if (unlikely(ret != 0)) { + kfree(cres); goto out_invalid_key; + } cres->state = VMW_CMDBUF_RES_ADD; cres->res = vmw_resource_reference(res); diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index acd99783bbca..67f3c050c4cf 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -545,7 +545,8 @@ out: return err; } -static inline int copy_gathers(struct host1x_job *job, struct device *dev) +static inline int copy_gathers(struct device *host, struct host1x_job *job, + struct device *dev) { struct host1x_firewall fw; size_t size = 0; @@ -570,12 +571,12 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) * Try a non-blocking allocation from a higher priority pools first, * as awaiting for the allocation here is a major performance hit. */ - job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy, + job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, GFP_NOWAIT); /* the higher priority allocation failed, try the generic-blocking */ if (!job->gather_copy_mapped) - job->gather_copy_mapped = dma_alloc_wc(dev, size, + job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, GFP_KERNEL); if (!job->gather_copy_mapped) @@ -636,7 +637,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) goto out; if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { - err = copy_gathers(job, dev); + err = copy_gathers(host->dev, job, dev); if (err) goto out; } @@ -701,7 +702,7 @@ void host1x_job_unpin(struct host1x_job *job) job->num_unpins = 0; if (job->gather_copy_size) - dma_free_wc(job->channel->dev, job->gather_copy_size, + dma_free_wc(host->dev, job->gather_copy_size, job->gather_copy_mapped, job->gather_copy); } EXPORT_SYMBOL(host1x_job_unpin); diff --git a/drivers/gpu/ipu-v3/ipu-pre.c b/drivers/gpu/ipu-v3/ipu-pre.c index 1d1612e28854..6fd4af647f59 100644 --- a/drivers/gpu/ipu-v3/ipu-pre.c +++ b/drivers/gpu/ipu-v3/ipu-pre.c @@ -102,6 +102,7 @@ struct ipu_pre { void *buffer_virt; bool in_use; unsigned int safe_window_end; + unsigned int last_bufaddr; }; static DEFINE_MUTEX(ipu_pre_list_mutex); @@ -177,6 +178,7 @@ void ipu_pre_configure(struct ipu_pre *pre, unsigned int width, writel(bufaddr, pre->regs + IPU_PRE_CUR_BUF); writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF); + pre->last_bufaddr = bufaddr; val = IPU_PRE_PREF_ENG_CTRL_INPUT_PIXEL_FORMAT(0) | IPU_PRE_PREF_ENG_CTRL_INPUT_ACTIVE_BPP(active_bpp) | @@ -218,7 +220,11 @@ void ipu_pre_update(struct ipu_pre *pre, unsigned int bufaddr) unsigned short current_yblock; u32 val; + if (bufaddr == pre->last_bufaddr) + return; + writel(bufaddr, pre->regs + IPU_PRE_NEXT_BUF); + pre->last_bufaddr = bufaddr; do { if (time_after(jiffies, timeout)) { diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index eca4c9d97110..d9185240239a 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -609,6 +609,17 @@ config HID_MULTITOUCH To compile this driver as a module, choose M here: the module will be called hid-multitouch. +config HID_NINTENDO + tristate "Nintendo Joy-Con and Pro Controller support" + depends on HID + help + Adds support for the Nintendo Switch Joy-Cons and Pro Controller. + All controllers support bluetooth, and the Pro Controller also supports + its USB mode. + + To compile this driver as a module, choose M here: the + module will be called hid-nintendo. + config HID_NTI tristate "NTI keyboard adapters" ---help--- @@ -813,6 +824,14 @@ config HID_SPEEDLINK ---help--- Support for Speedlink Vicious and Divine Cezanne mouse. +config HID_STEAM + tristate "Steam Controller support" + depends on HID + ---help--- + Say Y here if you have a Steam Controller if you want to use it + without running the Steam Client. It supports both the wired and + the wireless adaptor. + config HID_STEELSERIES tristate "Steelseries SRW-S1 steering wheel support" depends on HID diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile index 235bd2a7b333..aa7497748bb1 100644 --- a/drivers/hid/Makefile +++ b/drivers/hid/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_HID_MAYFLASH) += hid-mf.o obj-$(CONFIG_HID_MICROSOFT) += hid-microsoft.o obj-$(CONFIG_HID_MONTEREY) += hid-monterey.o obj-$(CONFIG_HID_MULTITOUCH) += hid-multitouch.o +obj-$(CONFIG_HID_NINTENDO) += hid-nintendo.o obj-$(CONFIG_HID_NTI) += hid-nti.o obj-$(CONFIG_HID_NTRIG) += hid-ntrig.o obj-$(CONFIG_HID_ORTEK) += hid-ortek.o @@ -94,6 +95,7 @@ obj-$(CONFIG_HID_SAMSUNG) += hid-samsung.o obj-$(CONFIG_HID_SMARTJOYPLUS) += hid-sjoy.o obj-$(CONFIG_HID_SONY) += hid-sony.o obj-$(CONFIG_HID_SPEEDLINK) += hid-speedlink.o +obj-$(CONFIG_HID_STEAM) += hid-steam.o obj-$(CONFIG_HID_STEELSERIES) += hid-steelseries.o obj-$(CONFIG_HID_SUNPLUS) += hid-sunplus.o obj-$(CONFIG_HID_GREENASIA) += hid-gaff.o diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index d0a81a03ddbd..8ab8f2350bbc 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -343,7 +343,8 @@ static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi, unsigned long **bit, int *max) { if (usage->hid == (HID_UP_CUSTOM | 0x0003) || - usage->hid == (HID_UP_MSVENDOR | 0x0003)) { + usage->hid == (HID_UP_MSVENDOR | 0x0003) || + usage->hid == (HID_UP_HPVENDOR2 | 0x0003)) { /* The fn key on Apple USB keyboards */ set_bit(EV_REP, hi->input->evbit); hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN); diff --git a/drivers/hid/hid-axff.c b/drivers/hid/hid-axff.c index a594e478a1e2..843aed4dec80 100644 --- a/drivers/hid/hid-axff.c +++ b/drivers/hid/hid-axff.c @@ -75,13 +75,20 @@ static int axff_init(struct hid_device *hid) { struct axff_device *axff; struct hid_report *report; - struct hid_input *hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list =&hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int field_count = 0; int i, j; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index a306493e2e97..1c23a82b62d3 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -195,17 +195,38 @@ static unsigned hid_lookup_collection(struct hid_parser *parser, unsigned type) return 0; /* we know nothing about this usage type */ } +/* + * Concatenate usage which defines 16 bits or less with the + * currently defined usage page to form a 32 bit usage + */ + +static void complete_usage(struct hid_parser *parser, unsigned int index) +{ + parser->local.usage[index] &= 0xFFFF; + parser->local.usage[index] |= + (parser->global.usage_page & 0xFFFF) << 16; +} + /* * Add a usage to the temporary parser table. */ -static int hid_add_usage(struct hid_parser *parser, unsigned usage) +static int hid_add_usage(struct hid_parser *parser, unsigned usage, u8 size) { if (parser->local.usage_index >= HID_MAX_USAGES) { hid_err(parser->device, "usage index exceeded\n"); return -1; } parser->local.usage[parser->local.usage_index] = usage; + + /* + * If Usage item only includes usage id, concatenate it with + * currently defined usage page + */ + if (size <= 2) + complete_usage(parser, parser->local.usage_index); + + parser->local.usage_size[parser->local.usage_index] = size; parser->local.collection_index[parser->local.usage_index] = parser->collection_stack_ptr ? parser->collection_stack[parser->collection_stack_ptr - 1] : 0; @@ -247,6 +268,12 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign offset = report->size; report->size += parser->global.report_size * parser->global.report_count; + /* Total size check: Allow for possible report index byte */ + if (report->size > (HID_MAX_BUFFER_SIZE - 1) << 3) { + hid_err(parser->device, "report is too long\n"); + return -1; + } + if (!parser->local.usage_index) /* Ignore padding fields */ return 0; @@ -465,7 +492,7 @@ static int hid_parser_local(struct hid_parser *parser, struct hid_item *item) if (item->size <= 2) data = (parser->global.usage_page << 16) + data; - return hid_add_usage(parser, data); + return hid_add_usage(parser, data, item->size); case HID_LOCAL_ITEM_TAG_USAGE_MINIMUM: @@ -509,7 +536,7 @@ static int hid_parser_local(struct hid_parser *parser, struct hid_item *item) } for (n = parser->local.usage_minimum; n <= data; n++) - if (hid_add_usage(parser, n)) { + if (hid_add_usage(parser, n, item->size)) { dbg_hid("hid_add_usage failed\n"); return -1; } @@ -523,6 +550,41 @@ static int hid_parser_local(struct hid_parser *parser, struct hid_item *item) return 0; } +/* + * Concatenate Usage Pages into Usages where relevant: + * As per specification, 6.2.2.8: "When the parser encounters a main item it + * concatenates the last declared Usage Page with a Usage to form a complete + * usage value." + */ + +static void hid_concatenate_last_usage_page(struct hid_parser *parser) +{ + int i; + unsigned int usage_page; + unsigned int current_page; + + if (!parser->local.usage_index) + return; + + usage_page = parser->global.usage_page; + + /* + * Concatenate usage page again only if last declared Usage Page + * has not been already used in previous usages concatenation + */ + for (i = parser->local.usage_index - 1; i >= 0; i--) { + if (parser->local.usage_size[i] > 2) + /* Ignore extended usages */ + continue; + + current_page = parser->local.usage[i] >> 16; + if (current_page == usage_page) + break; + + complete_usage(parser, i); + } +} + /* * Process a main item. */ @@ -532,6 +594,8 @@ static int hid_parser_main(struct hid_parser *parser, struct hid_item *item) __u32 data; int ret; + hid_concatenate_last_usage_page(parser); + data = item_udata(item); switch (item->tag) { @@ -711,6 +775,10 @@ static void hid_scan_feature_usage(struct hid_parser *parser, u32 usage) if (usage == 0xff0000c5 && parser->global.report_count == 256 && parser->global.report_size == 8) parser->scan_flags |= HID_SCAN_FLAG_MT_WIN_8; + + if (usage == 0xff0000c6 && parser->global.report_count == 1 && + parser->global.report_size == 8) + parser->scan_flags |= HID_SCAN_FLAG_MT_WIN_8; } static void hid_scan_collection(struct hid_parser *parser, unsigned type) @@ -741,6 +809,8 @@ static int hid_scan_main(struct hid_parser *parser, struct hid_item *item) __u32 data; int i; + hid_concatenate_last_usage_page(parser); + data = item_udata(item); switch (item->tag) { @@ -967,6 +1037,7 @@ int hid_open_report(struct hid_device *device) __u8 *start; __u8 *buf; __u8 *end; + __u8 *next; int ret; static int (*dispatch_type[])(struct hid_parser *parser, struct hid_item *item) = { @@ -1020,7 +1091,8 @@ int hid_open_report(struct hid_device *device) device->collection_size = HID_DEFAULT_NUM_COLLECTIONS; ret = -EINVAL; - while ((start = fetch_item(start, end, &item)) != NULL) { + while ((next = fetch_item(start, end, &item)) != NULL) { + start = next; if (item.format != HID_ITEM_FORMAT_SHORT) { hid_err(device, "unexpected long global item\n"); @@ -1049,7 +1121,8 @@ int hid_open_report(struct hid_device *device) } } - hid_err(device, "item fetching failed at offset %d\n", (int)(end - start)); + hid_err(device, "item fetching failed at offset %u/%u\n", + size - (unsigned int)(end - start), size); err: vfree(parser); hid_close_report(device); @@ -1503,7 +1576,9 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, rsize = ((report->size - 1) >> 3) + 1; - if (rsize > HID_MAX_BUFFER_SIZE) + if (report_enum->numbered && rsize >= HID_MAX_BUFFER_SIZE) + rsize = HID_MAX_BUFFER_SIZE - 1; + else if (rsize > HID_MAX_BUFFER_SIZE) rsize = HID_MAX_BUFFER_SIZE; if (csize < rsize) { @@ -2195,6 +2270,16 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, USB_DEVICE_ID_NINTENDO_WIIMOTE2) }, #endif +#if IS_ENABLED(CONFIG_HID_NINTENDO) + { HID_USB_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_PROCON) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_PROCON) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_JOYCONL) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_JOYCONR) }, +#endif #if IS_ENABLED(CONFIG_HID_NTI) { HID_USB_DEVICE(USB_VENDOR_ID_NTI, USB_DEVICE_ID_USB_SUN) }, #endif @@ -2372,6 +2457,11 @@ static const struct hid_device_id hid_have_special_driver[] = { #if IS_ENABLED(CONFIG_HID_UDRAW_PS3) { HID_USB_DEVICE(USB_VENDOR_ID_THQ, USB_DEVICE_ID_THQ_PS3_UDRAW) }, #endif +#if IS_ENABLED(CONFIG_HID_STEAM) + { HID_USB_DEVICE(USB_VENDOR_ID_VALVE, USB_DEVICE_ID_STEAM_CONTROLLER) }, + { HID_USB_DEVICE(USB_VENDOR_ID_VALVE, USB_DEVICE_ID_STEAM_CONTROLLER_WIRELESS) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_VALVE, USB_DEVICE_ID_STEAM_CONTROLLER_BT) }, +#endif #if IS_ENABLED(CONFIG_HID_WALTOP) { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_5_8_INCH) }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SLIM_TABLET_12_1_INCH) }, diff --git a/drivers/hid/hid-dr.c b/drivers/hid/hid-dr.c index 818ea7d93533..309969b8dc2e 100644 --- a/drivers/hid/hid-dr.c +++ b/drivers/hid/hid-dr.c @@ -87,13 +87,19 @@ static int drff_init(struct hid_device *hid) { struct drff_device *drff; struct hid_report *report; - struct hid_input *hidinput = list_first_entry(&hid->inputs, - struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-emsff.c b/drivers/hid/hid-emsff.c index d82d75bb11f7..80f9a02dfa69 100644 --- a/drivers/hid/hid-emsff.c +++ b/drivers/hid/hid-emsff.c @@ -59,13 +59,19 @@ static int emsff_init(struct hid_device *hid) { struct emsff_device *emsff; struct hid_report *report; - struct hid_input *hidinput = list_first_entry(&hid->inputs, - struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-gaff.c b/drivers/hid/hid-gaff.c index 2d8cead3adca..5a02c50443cb 100644 --- a/drivers/hid/hid-gaff.c +++ b/drivers/hid/hid-gaff.c @@ -77,14 +77,20 @@ static int gaff_init(struct hid_device *hid) { struct gaff_device *gaff; struct hid_report *report; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct list_head *report_ptr = report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-holtekff.c b/drivers/hid/hid-holtekff.c index 9325545fc3ae..3e84551cca9c 100644 --- a/drivers/hid/hid-holtekff.c +++ b/drivers/hid/hid-holtekff.c @@ -140,13 +140,19 @@ static int holtekff_init(struct hid_device *hid) { struct holtekff_device *holtekff; struct hid_report *report; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output report found\n"); return -ENODEV; diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index 5f1de24206ab..220b3e5c9c39 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -322,60 +322,24 @@ static void mousevsc_on_receive(struct hv_device *device, static void mousevsc_on_channel_callback(void *context) { - const int packet_size = 0x100; - int ret; struct hv_device *device = context; - u32 bytes_recvd; - u64 req_id; struct vmpacket_descriptor *desc; - unsigned char *buffer; - int bufferlen = packet_size; - - buffer = kmalloc(bufferlen, GFP_ATOMIC); - if (!buffer) - return; - - do { - ret = vmbus_recvpacket_raw(device->channel, buffer, - bufferlen, &bytes_recvd, &req_id); - - switch (ret) { - case 0: - if (bytes_recvd <= 0) { - kfree(buffer); - return; - } - desc = (struct vmpacket_descriptor *)buffer; - - switch (desc->type) { - case VM_PKT_COMP: - break; - - case VM_PKT_DATA_INBAND: - mousevsc_on_receive(device, desc); - break; - - default: - pr_err("unhandled packet type %d, tid %llx len %d\n", - desc->type, req_id, bytes_recvd); - break; - } + foreach_vmbus_pkt(desc, device->channel) { + switch (desc->type) { + case VM_PKT_COMP: break; - case -ENOBUFS: - kfree(buffer); - /* Handle large packet */ - bufferlen = bytes_recvd; - buffer = kmalloc(bytes_recvd, GFP_ATOMIC); - - if (!buffer) - return; + case VM_PKT_DATA_INBAND: + mousevsc_on_receive(device, desc); + break; + default: + pr_err("Unhandled packet type %d, tid %llx len %d\n", + desc->type, desc->trans_id, desc->len8 * 8); break; } - } while (1); - + } } static int mousevsc_connect_to_vsp(struct hv_device *device) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 1e2e6e58256a..71342b5cb703 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -816,6 +816,9 @@ #define USB_VENDOR_ID_NINTENDO 0x057e #define USB_DEVICE_ID_NINTENDO_WIIMOTE 0x0306 #define USB_DEVICE_ID_NINTENDO_WIIMOTE2 0x0330 +#define USB_DEVICE_ID_NINTENDO_JOYCONL 0x2006 +#define USB_DEVICE_ID_NINTENDO_JOYCONR 0x2007 +#define USB_DEVICE_ID_NINTENDO_PROCON 0x2009 #define USB_VENDOR_ID_NOVATEK 0x0603 #define USB_DEVICE_ID_NOVATEK_PCT 0x0600 @@ -996,6 +999,11 @@ #define USB_VENDOR_ID_STANTUM_SITRONIX 0x1403 #define USB_DEVICE_ID_MTP_SITRONIX 0x5001 +#define USB_VENDOR_ID_VALVE 0x28de +#define USB_DEVICE_ID_STEAM_CONTROLLER 0x1102 +#define USB_DEVICE_ID_STEAM_CONTROLLER_WIRELESS 0x1142 +#define USB_DEVICE_ID_STEAM_CONTROLLER_BT 0x1106 + #define USB_VENDOR_ID_STEELSERIES 0x1038 #define USB_DEVICE_ID_STEELSERIES_SRWS1 0x1410 @@ -1024,6 +1032,7 @@ #define USB_DEVICE_ID_SYNAPTICS_LTS2 0x1d10 #define USB_DEVICE_ID_SYNAPTICS_HD 0x0ac3 #define USB_DEVICE_ID_SYNAPTICS_QUAD_HD 0x1ac3 +#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012 0x2968 #define USB_DEVICE_ID_SYNAPTICS_TP_V103 0x5710 #define USB_VENDOR_ID_TEXAS_INSTRUMENTS 0x2047 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index d723185de3ba..14e4003fde4d 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -328,6 +328,9 @@ static const struct hid_device_id hid_battery_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SYMBOL, USB_DEVICE_ID_SYMBOL_SCANNER_3), HID_BATTERY_QUIRK_IGNORE }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_T100CHI_KEYBOARD), + HID_BATTERY_QUIRK_IGNORE }, {} }; @@ -1113,9 +1116,15 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel } mapped: - if (device->driver->input_mapped && device->driver->input_mapped(device, - hidinput, field, usage, &bit, &max) < 0) - goto ignore; + if (device->driver->input_mapped && + device->driver->input_mapped(device, hidinput, field, usage, + &bit, &max) < 0) { + /* + * The driver indicated that no further generic handling + * of the usage is desired. + */ + return; + } set_bit(usage->type, input->evbit); @@ -1173,9 +1182,11 @@ mapped: set_bit(MSC_SCAN, input->mscbit); } -ignore: return; +ignore: + usage->type = 0; + usage->code = 0; } void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value) diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 98b059d79bc8..f2e23f81601e 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -43,6 +43,10 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field, static const struct hid_device_id ite_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) }, { HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) }, + /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */ + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); diff --git a/drivers/hid/hid-lg2ff.c b/drivers/hid/hid-lg2ff.c index 0e3fb1a7e421..6909d9c2fc67 100644 --- a/drivers/hid/hid-lg2ff.c +++ b/drivers/hid/hid-lg2ff.c @@ -62,11 +62,17 @@ int lg2ff_init(struct hid_device *hid) { struct lg2ff_device *lg2ff; struct hid_report *report; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; int error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + /* Check that the report looks ok */ report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7); if (!report) diff --git a/drivers/hid/hid-lg3ff.c b/drivers/hid/hid-lg3ff.c index 8c2da183d3bc..acf739fc4060 100644 --- a/drivers/hid/hid-lg3ff.c +++ b/drivers/hid/hid-lg3ff.c @@ -129,12 +129,19 @@ static const signed short ff3_joystick_ac[] = { int lg3ff_init(struct hid_device *hid) { - struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; const signed short *ff_bits = ff3_joystick_ac; int error; int i; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + /* Check that the report looks ok */ if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 35)) return -ENODEV; diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c index 127f1335a1da..1b109a5cf922 100644 --- a/drivers/hid/hid-lg4ff.c +++ b/drivers/hid/hid-lg4ff.c @@ -1261,8 +1261,8 @@ static int lg4ff_handle_multimode_wheel(struct hid_device *hid, u16 *real_produc int lg4ff_init(struct hid_device *hid) { - struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; struct hid_report *report = list_entry(report_list->next, struct hid_report, list); const struct usb_device_descriptor *udesc = &(hid_to_usb_dev(hid)->descriptor); @@ -1274,6 +1274,13 @@ int lg4ff_init(struct hid_device *hid) int mmode_ret, mmode_idx = -1; u16 real_product_id; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + /* Check that the report looks ok */ if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7)) return -1; diff --git a/drivers/hid/hid-lgff.c b/drivers/hid/hid-lgff.c index e1394af0ae7b..1871cdcd1e0a 100644 --- a/drivers/hid/hid-lgff.c +++ b/drivers/hid/hid-lgff.c @@ -127,12 +127,19 @@ static void hid_lgff_set_autocenter(struct input_dev *dev, u16 magnitude) int lgff_init(struct hid_device* hid) { - struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; const signed short *ff_bits = ff_joystick; int error; int i; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + /* Check that the report looks ok */ if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7)) return -ENODEV; diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index b705cbb58ca6..6ad776b4711b 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -978,6 +978,9 @@ static int hidpp20_batterylevel_get_battery_capacity(struct hidpp_device *hidpp, ret = hidpp_send_fap_command_sync(hidpp, feature_index, CMD_BATTERY_LEVEL_STATUS_GET_BATTERY_LEVEL_STATUS, NULL, 0, &response); + /* Ignore these intermittent errors */ + if (ret == HIDPP_ERROR_RESOURCE_ERROR) + return -EIO; if (ret > 0) { hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n", __func__, ret); @@ -1867,8 +1870,8 @@ static void hidpp_ff_destroy(struct ff_device *ff) static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) { struct hid_device *hid = hidpp->hid_dev; - struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; const struct usb_device_descriptor *udesc = &(hid_to_usb_dev(hid)->descriptor); const u16 bcdDevice = le16_to_cpu(udesc->bcdDevice); struct ff_device *ff; @@ -1877,6 +1880,13 @@ static int hidpp_ff_init(struct hidpp_device *hidpp, u8 feature_index) int error, j, num_slots; u8 version; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + if (!dev) { hid_err(hid, "Struct input_dev not set!\n"); return -EINVAL; diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c new file mode 100644 index 000000000000..3695b96694bd --- /dev/null +++ b/drivers/hid/hid-nintendo.c @@ -0,0 +1,820 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * HID driver for Nintendo Switch Joy-Cons and Pro Controllers + * + * Copyright (c) 2019 Daniel J. Ogorchock + * + * The following resources/projects were referenced for this driver: + * https://github.com/dekuNukem/Nintendo_Switch_Reverse_Engineering + * https://gitlab.com/pjranki/joycon-linux-kernel (Peter Rankin) + * https://github.com/FrotBot/SwitchProConLinuxUSB + * https://github.com/MTCKC/ProconXInput + * hid-wiimote kernel hid driver + * hid-logitech-hidpp driver + * + * This driver supports the Nintendo Switch Joy-Cons and Pro Controllers. The + * Pro Controllers can either be used over USB or Bluetooth. + * + * The driver will retrieve the factory calibration info from the controllers, + * so little to no user calibration should be required. + * + */ + +#include "hid-ids.h" +#include +#include +#include +#include +#include +#include + +/* + * Reference the url below for the following HID report defines: + * https://github.com/dekuNukem/Nintendo_Switch_Reverse_Engineering + */ + +/* Output Reports */ +static const u8 JC_OUTPUT_RUMBLE_AND_SUBCMD = 0x01; +static const u8 JC_OUTPUT_FW_UPDATE_PKT = 0x03; +static const u8 JC_OUTPUT_RUMBLE_ONLY = 0x10; +static const u8 JC_OUTPUT_MCU_DATA = 0x11; +static const u8 JC_OUTPUT_USB_CMD = 0x80; + +/* Subcommand IDs */ +static const u8 JC_SUBCMD_STATE /*= 0x00*/; +static const u8 JC_SUBCMD_MANUAL_BT_PAIRING = 0x01; +static const u8 JC_SUBCMD_REQ_DEV_INFO = 0x02; +static const u8 JC_SUBCMD_SET_REPORT_MODE = 0x03; +static const u8 JC_SUBCMD_TRIGGERS_ELAPSED = 0x04; +static const u8 JC_SUBCMD_GET_PAGE_LIST_STATE = 0x05; +static const u8 JC_SUBCMD_SET_HCI_STATE = 0x06; +static const u8 JC_SUBCMD_RESET_PAIRING_INFO = 0x07; +static const u8 JC_SUBCMD_LOW_POWER_MODE = 0x08; +static const u8 JC_SUBCMD_SPI_FLASH_READ = 0x10; +static const u8 JC_SUBCMD_SPI_FLASH_WRITE = 0x11; +static const u8 JC_SUBCMD_RESET_MCU = 0x20; +static const u8 JC_SUBCMD_SET_MCU_CONFIG = 0x21; +static const u8 JC_SUBCMD_SET_MCU_STATE = 0x22; +static const u8 JC_SUBCMD_SET_PLAYER_LIGHTS = 0x30; +static const u8 JC_SUBCMD_GET_PLAYER_LIGHTS = 0x31; +static const u8 JC_SUBCMD_SET_HOME_LIGHT = 0x38; +static const u8 JC_SUBCMD_ENABLE_IMU = 0x40; +static const u8 JC_SUBCMD_SET_IMU_SENSITIVITY = 0x41; +static const u8 JC_SUBCMD_WRITE_IMU_REG = 0x42; +static const u8 JC_SUBCMD_READ_IMU_REG = 0x43; +static const u8 JC_SUBCMD_ENABLE_VIBRATION = 0x48; +static const u8 JC_SUBCMD_GET_REGULATED_VOLTAGE = 0x50; + +/* Input Reports */ +static const u8 JC_INPUT_BUTTON_EVENT = 0x3F; +static const u8 JC_INPUT_SUBCMD_REPLY = 0x21; +static const u8 JC_INPUT_IMU_DATA = 0x30; +static const u8 JC_INPUT_MCU_DATA = 0x31; +static const u8 JC_INPUT_USB_RESPONSE = 0x81; + +/* Feature Reports */ +static const u8 JC_FEATURE_LAST_SUBCMD = 0x02; +static const u8 JC_FEATURE_OTA_FW_UPGRADE = 0x70; +static const u8 JC_FEATURE_SETUP_MEM_READ = 0x71; +static const u8 JC_FEATURE_MEM_READ = 0x72; +static const u8 JC_FEATURE_ERASE_MEM_SECTOR = 0x73; +static const u8 JC_FEATURE_MEM_WRITE = 0x74; +static const u8 JC_FEATURE_LAUNCH = 0x75; + +/* USB Commands */ +static const u8 JC_USB_CMD_CONN_STATUS = 0x01; +static const u8 JC_USB_CMD_HANDSHAKE = 0x02; +static const u8 JC_USB_CMD_BAUDRATE_3M = 0x03; +static const u8 JC_USB_CMD_NO_TIMEOUT = 0x04; +static const u8 JC_USB_CMD_EN_TIMEOUT = 0x05; +static const u8 JC_USB_RESET = 0x06; +static const u8 JC_USB_PRE_HANDSHAKE = 0x91; +static const u8 JC_USB_SEND_UART = 0x92; + +/* SPI storage addresses of factory calibration data */ +static const u16 JC_CAL_DATA_START = 0x603d; +static const u16 JC_CAL_DATA_END = 0x604e; +#define JC_CAL_DATA_SIZE (JC_CAL_DATA_END - JC_CAL_DATA_START + 1) + + +/* The raw analog joystick values will be mapped in terms of this magnitude */ +static const u16 JC_MAX_STICK_MAG = 32767; +static const u16 JC_STICK_FUZZ = 250; +static const u16 JC_STICK_FLAT = 500; + +/* States for controller state machine */ +enum joycon_ctlr_state { + JOYCON_CTLR_STATE_INIT, + JOYCON_CTLR_STATE_READ, +}; + +struct joycon_stick_cal { + s32 max; + s32 min; + s32 center; +}; + +/* + * All the controller's button values are stored in a u32. + * They can be accessed with bitwise ANDs. + */ +static const u32 JC_BTN_Y = BIT(0); +static const u32 JC_BTN_X = BIT(1); +static const u32 JC_BTN_B = BIT(2); +static const u32 JC_BTN_A = BIT(3); +static const u32 JC_BTN_SR_R = BIT(4); +static const u32 JC_BTN_SL_R = BIT(5); +static const u32 JC_BTN_R = BIT(6); +static const u32 JC_BTN_ZR = BIT(7); +static const u32 JC_BTN_MINUS = BIT(8); +static const u32 JC_BTN_PLUS = BIT(9); +static const u32 JC_BTN_RSTICK = BIT(10); +static const u32 JC_BTN_LSTICK = BIT(11); +static const u32 JC_BTN_HOME = BIT(12); +static const u32 JC_BTN_CAP = BIT(13); /* capture button */ +static const u32 JC_BTN_DOWN = BIT(16); +static const u32 JC_BTN_UP = BIT(17); +static const u32 JC_BTN_RIGHT = BIT(18); +static const u32 JC_BTN_LEFT = BIT(19); +static const u32 JC_BTN_SR_L = BIT(20); +static const u32 JC_BTN_SL_L = BIT(21); +static const u32 JC_BTN_L = BIT(22); +static const u32 JC_BTN_ZL = BIT(23); + +enum joycon_msg_type { + JOYCON_MSG_TYPE_NONE, + JOYCON_MSG_TYPE_USB, + JOYCON_MSG_TYPE_SUBCMD, +}; + +struct joycon_subcmd_request { + u8 output_id; /* must be 0x01 for subcommand, 0x10 for rumble only */ + u8 packet_num; /* incremented every send */ + u8 rumble_data[8]; + u8 subcmd_id; + u8 data[0]; /* length depends on the subcommand */ +} __packed; + +struct joycon_subcmd_reply { + u8 ack; /* MSB 1 for ACK, 0 for NACK */ + u8 id; /* id of requested subcmd */ + u8 data[0]; /* will be at most 35 bytes */ +} __packed; + +struct joycon_input_report { + u8 id; + u8 timer; + u8 bat_con; /* battery and connection info */ + u8 button_status[3]; + u8 left_stick[3]; + u8 right_stick[3]; + u8 vibrator_report; + + /* + * If support for firmware updates, gyroscope data, and/or NFC/IR + * are added in the future, this can be swapped for a union. + */ + struct joycon_subcmd_reply reply; +} __packed; + +#define JC_MAX_RESP_SIZE (sizeof(struct joycon_input_report) + 35) + +/* Each physical controller is associated with a joycon_ctlr struct */ +struct joycon_ctlr { + struct hid_device *hdev; + struct input_dev *input; + enum joycon_ctlr_state ctlr_state; + + /* The following members are used for synchronous sends/receives */ + enum joycon_msg_type msg_type; + u8 subcmd_num; + struct mutex output_mutex; + u8 input_buf[JC_MAX_RESP_SIZE]; + wait_queue_head_t wait; + bool received_resp; + u8 usb_ack_match; + u8 subcmd_ack_match; + + /* factory calibration data */ + struct joycon_stick_cal left_stick_cal_x; + struct joycon_stick_cal left_stick_cal_y; + struct joycon_stick_cal right_stick_cal_x; + struct joycon_stick_cal right_stick_cal_y; + +}; + +static int __joycon_hid_send(struct hid_device *hdev, u8 *data, size_t len) +{ + u8 *buf; + int ret; + + buf = kmemdup(data, len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + ret = hid_hw_output_report(hdev, buf, len); + kfree(buf); + if (ret < 0) + hid_dbg(hdev, "Failed to send output report ret=%d\n", ret); + return ret; +} + +static int joycon_hid_send_sync(struct joycon_ctlr *ctlr, u8 *data, size_t len) +{ + int ret; + + ret = __joycon_hid_send(ctlr->hdev, data, len); + if (ret < 0) { + memset(ctlr->input_buf, 0, JC_MAX_RESP_SIZE); + return ret; + } + + if (!wait_event_timeout(ctlr->wait, ctlr->received_resp, HZ)) { + hid_dbg(ctlr->hdev, "synchronous send/receive timed out\n"); + memset(ctlr->input_buf, 0, JC_MAX_RESP_SIZE); + return -ETIMEDOUT; + } + + ctlr->received_resp = false; + return 0; +} + +static int joycon_send_usb(struct joycon_ctlr *ctlr, u8 cmd) +{ + int ret; + u8 buf[2] = {JC_OUTPUT_USB_CMD}; + + buf[1] = cmd; + ctlr->usb_ack_match = cmd; + ctlr->msg_type = JOYCON_MSG_TYPE_USB; + ret = joycon_hid_send_sync(ctlr, buf, sizeof(buf)); + if (ret) + hid_dbg(ctlr->hdev, "send usb command failed; ret=%d\n", ret); + return ret; +} + +static int joycon_send_subcmd(struct joycon_ctlr *ctlr, + struct joycon_subcmd_request *subcmd, + size_t data_len) +{ + int ret; + + subcmd->output_id = JC_OUTPUT_RUMBLE_AND_SUBCMD; + subcmd->packet_num = ctlr->subcmd_num; + if (++ctlr->subcmd_num > 0xF) + ctlr->subcmd_num = 0; + ctlr->subcmd_ack_match = subcmd->subcmd_id; + ctlr->msg_type = JOYCON_MSG_TYPE_SUBCMD; + + ret = joycon_hid_send_sync(ctlr, (u8 *)subcmd, + sizeof(*subcmd) + data_len); + if (ret < 0) + hid_dbg(ctlr->hdev, "send subcommand failed; ret=%d\n", ret); + else + ret = 0; + return ret; +} + +/* Supply nibbles for flash and on. Ones correspond to active */ +static int joycon_set_player_leds(struct joycon_ctlr *ctlr, u8 flash, u8 on) +{ + struct joycon_subcmd_request *req; + u8 buffer[sizeof(*req) + 1] = { 0 }; + + req = (struct joycon_subcmd_request *)buffer; + req->subcmd_id = JC_SUBCMD_SET_PLAYER_LIGHTS; + req->data[0] = (flash << 4) | on; + + hid_dbg(ctlr->hdev, "setting player leds\n"); + return joycon_send_subcmd(ctlr, req, 1); +} + +static const u16 DFLT_STICK_CAL_CEN = 2000; +static const u16 DFLT_STICK_CAL_MAX = 3500; +static const u16 DFLT_STICK_CAL_MIN = 500; +static int joycon_request_calibration(struct joycon_ctlr *ctlr) +{ + struct joycon_subcmd_request *req; + u8 buffer[sizeof(*req) + 5] = { 0 }; + struct joycon_input_report *report; + struct joycon_stick_cal *cal_x; + struct joycon_stick_cal *cal_y; + s32 x_max_above; + s32 x_min_below; + s32 y_max_above; + s32 y_min_below; + u8 *data; + u8 *raw_cal; + int ret; + + req = (struct joycon_subcmd_request *)buffer; + req->subcmd_id = JC_SUBCMD_SPI_FLASH_READ; + data = req->data; + data[0] = 0xFF & JC_CAL_DATA_START; + data[1] = 0xFF & (JC_CAL_DATA_START >> 8); + data[2] = 0xFF & (JC_CAL_DATA_START >> 16); + data[3] = 0xFF & (JC_CAL_DATA_START >> 24); + data[4] = JC_CAL_DATA_SIZE; + + hid_dbg(ctlr->hdev, "requesting cal data\n"); + ret = joycon_send_subcmd(ctlr, req, 5); + if (ret) { + hid_warn(ctlr->hdev, + "Failed to read stick cal, using defaults; ret=%d\n", + ret); + + ctlr->left_stick_cal_x.center = DFLT_STICK_CAL_CEN; + ctlr->left_stick_cal_x.max = DFLT_STICK_CAL_MAX; + ctlr->left_stick_cal_x.min = DFLT_STICK_CAL_MIN; + + ctlr->left_stick_cal_y.center = DFLT_STICK_CAL_CEN; + ctlr->left_stick_cal_y.max = DFLT_STICK_CAL_MAX; + ctlr->left_stick_cal_y.min = DFLT_STICK_CAL_MIN; + + ctlr->right_stick_cal_x.center = DFLT_STICK_CAL_CEN; + ctlr->right_stick_cal_x.max = DFLT_STICK_CAL_MAX; + ctlr->right_stick_cal_x.min = DFLT_STICK_CAL_MIN; + + ctlr->right_stick_cal_y.center = DFLT_STICK_CAL_CEN; + ctlr->right_stick_cal_y.max = DFLT_STICK_CAL_MAX; + ctlr->right_stick_cal_y.min = DFLT_STICK_CAL_MIN; + + return ret; + } + + report = (struct joycon_input_report *)ctlr->input_buf; + raw_cal = &report->reply.data[5]; + + /* left stick calibration parsing */ + cal_x = &ctlr->left_stick_cal_x; + cal_y = &ctlr->left_stick_cal_y; + + x_max_above = hid_field_extract(ctlr->hdev, (raw_cal + 0), 0, 12); + y_max_above = hid_field_extract(ctlr->hdev, (raw_cal + 1), 4, 12); + cal_x->center = hid_field_extract(ctlr->hdev, (raw_cal + 3), 0, 12); + cal_y->center = hid_field_extract(ctlr->hdev, (raw_cal + 4), 4, 12); + x_min_below = hid_field_extract(ctlr->hdev, (raw_cal + 6), 0, 12); + y_min_below = hid_field_extract(ctlr->hdev, (raw_cal + 7), 4, 12); + cal_x->max = cal_x->center + x_max_above; + cal_x->min = cal_x->center - x_min_below; + cal_y->max = cal_y->center + y_max_above; + cal_y->min = cal_y->center - y_min_below; + + /* right stick calibration parsing */ + raw_cal += 9; + cal_x = &ctlr->right_stick_cal_x; + cal_y = &ctlr->right_stick_cal_y; + + cal_x->center = hid_field_extract(ctlr->hdev, (raw_cal + 0), 0, 12); + cal_y->center = hid_field_extract(ctlr->hdev, (raw_cal + 1), 4, 12); + x_min_below = hid_field_extract(ctlr->hdev, (raw_cal + 3), 0, 12); + y_min_below = hid_field_extract(ctlr->hdev, (raw_cal + 4), 4, 12); + x_max_above = hid_field_extract(ctlr->hdev, (raw_cal + 6), 0, 12); + y_max_above = hid_field_extract(ctlr->hdev, (raw_cal + 7), 4, 12); + cal_x->max = cal_x->center + x_max_above; + cal_x->min = cal_x->center - x_min_below; + cal_y->max = cal_y->center + y_max_above; + cal_y->min = cal_y->center - y_min_below; + + hid_dbg(ctlr->hdev, "calibration:\n" + "l_x_c=%d l_x_max=%d l_x_min=%d\n" + "l_y_c=%d l_y_max=%d l_y_min=%d\n" + "r_x_c=%d r_x_max=%d r_x_min=%d\n" + "r_y_c=%d r_y_max=%d r_y_min=%d\n", + ctlr->left_stick_cal_x.center, + ctlr->left_stick_cal_x.max, + ctlr->left_stick_cal_x.min, + ctlr->left_stick_cal_y.center, + ctlr->left_stick_cal_y.max, + ctlr->left_stick_cal_y.min, + ctlr->right_stick_cal_x.center, + ctlr->right_stick_cal_x.max, + ctlr->right_stick_cal_x.min, + ctlr->right_stick_cal_y.center, + ctlr->right_stick_cal_y.max, + ctlr->right_stick_cal_y.min); + + return 0; +} + +static int joycon_set_report_mode(struct joycon_ctlr *ctlr) +{ + struct joycon_subcmd_request *req; + u8 buffer[sizeof(*req) + 1] = { 0 }; + + req = (struct joycon_subcmd_request *)buffer; + req->subcmd_id = JC_SUBCMD_SET_REPORT_MODE; + req->data[0] = 0x30; /* standard, full report mode */ + + hid_dbg(ctlr->hdev, "setting controller report mode\n"); + return joycon_send_subcmd(ctlr, req, 1); +} + +static s32 joycon_map_stick_val(struct joycon_stick_cal *cal, s32 val) +{ + s32 center = cal->center; + s32 min = cal->min; + s32 max = cal->max; + s32 new_val; + + if (val > center) { + new_val = (val - center) * JC_MAX_STICK_MAG; + new_val /= (max - center); + } else { + new_val = (center - val) * -JC_MAX_STICK_MAG; + new_val /= (center - min); + } + new_val = clamp(new_val, (s32)-JC_MAX_STICK_MAG, (s32)JC_MAX_STICK_MAG); + return new_val; +} + +static void joycon_parse_report(struct joycon_ctlr *ctlr, + struct joycon_input_report *rep) +{ + struct input_dev *dev = ctlr->input; + u32 btns; + u32 id = ctlr->hdev->product; + + btns = hid_field_extract(ctlr->hdev, rep->button_status, 0, 24); + + if (id != USB_DEVICE_ID_NINTENDO_JOYCONR) { + u16 raw_x; + u16 raw_y; + s32 x; + s32 y; + + /* get raw stick values */ + raw_x = hid_field_extract(ctlr->hdev, rep->left_stick, 0, 12); + raw_y = hid_field_extract(ctlr->hdev, + rep->left_stick + 1, 4, 12); + /* map the stick values */ + x = joycon_map_stick_val(&ctlr->left_stick_cal_x, raw_x); + y = -joycon_map_stick_val(&ctlr->left_stick_cal_y, raw_y); + /* report sticks */ + input_report_abs(dev, ABS_X, x); + input_report_abs(dev, ABS_Y, y); + + /* report buttons */ + input_report_key(dev, BTN_TL, btns & JC_BTN_L); + input_report_key(dev, BTN_TL2, btns & JC_BTN_ZL); + if (id != USB_DEVICE_ID_NINTENDO_PROCON) { + /* Report the S buttons as the non-existent triggers */ + input_report_key(dev, BTN_TR, btns & JC_BTN_SL_L); + input_report_key(dev, BTN_TR2, btns & JC_BTN_SR_L); + } + input_report_key(dev, BTN_SELECT, btns & JC_BTN_MINUS); + input_report_key(dev, BTN_THUMBL, btns & JC_BTN_LSTICK); + input_report_key(dev, BTN_Z, btns & JC_BTN_CAP); + input_report_key(dev, BTN_DPAD_DOWN, btns & JC_BTN_DOWN); + input_report_key(dev, BTN_DPAD_UP, btns & JC_BTN_UP); + input_report_key(dev, BTN_DPAD_RIGHT, btns & JC_BTN_RIGHT); + input_report_key(dev, BTN_DPAD_LEFT, btns & JC_BTN_LEFT); + } + if (id != USB_DEVICE_ID_NINTENDO_JOYCONL) { + u16 raw_x; + u16 raw_y; + s32 x; + s32 y; + + /* get raw stick values */ + raw_x = hid_field_extract(ctlr->hdev, rep->right_stick, 0, 12); + raw_y = hid_field_extract(ctlr->hdev, + rep->right_stick + 1, 4, 12); + /* map stick values */ + x = joycon_map_stick_val(&ctlr->right_stick_cal_x, raw_x); + y = -joycon_map_stick_val(&ctlr->right_stick_cal_y, raw_y); + /* report sticks */ + input_report_abs(dev, ABS_RX, x); + input_report_abs(dev, ABS_RY, y); + + /* report buttons */ + input_report_key(dev, BTN_TR, btns & JC_BTN_R); + input_report_key(dev, BTN_TR2, btns & JC_BTN_ZR); + if (id != USB_DEVICE_ID_NINTENDO_PROCON) { + /* Report the S buttons as the non-existent triggers */ + input_report_key(dev, BTN_TL, btns & JC_BTN_SL_R); + input_report_key(dev, BTN_TL2, btns & JC_BTN_SR_R); + } + input_report_key(dev, BTN_START, btns & JC_BTN_PLUS); + input_report_key(dev, BTN_THUMBR, btns & JC_BTN_RSTICK); + input_report_key(dev, BTN_MODE, btns & JC_BTN_HOME); + input_report_key(dev, BTN_WEST, btns & JC_BTN_Y); + input_report_key(dev, BTN_NORTH, btns & JC_BTN_X); + input_report_key(dev, BTN_EAST, btns & JC_BTN_A); + input_report_key(dev, BTN_SOUTH, btns & JC_BTN_B); + } + + input_sync(dev); +} + + +static const unsigned int joycon_button_inputs_l[] = { + BTN_SELECT, BTN_Z, BTN_THUMBL, + BTN_DPAD_UP, BTN_DPAD_DOWN, BTN_DPAD_LEFT, BTN_DPAD_RIGHT, + BTN_TL, BTN_TL2, + 0 /* 0 signals end of array */ +}; + +static const unsigned int joycon_button_inputs_r[] = { + BTN_START, BTN_MODE, BTN_THUMBR, + BTN_SOUTH, BTN_EAST, BTN_NORTH, BTN_WEST, + BTN_TR, BTN_TR2, + 0 /* 0 signals end of array */ +}; + +static DEFINE_MUTEX(joycon_input_num_mutex); +static int joycon_input_create(struct joycon_ctlr *ctlr) +{ + struct hid_device *hdev; + static int input_num = 1; + const char *name; + int ret; + int i; + + hdev = ctlr->hdev; + + switch (hdev->product) { + case USB_DEVICE_ID_NINTENDO_PROCON: + name = "Nintendo Switch Pro Controller"; + break; + case USB_DEVICE_ID_NINTENDO_JOYCONL: + name = "Nintendo Switch Left Joy-Con"; + break; + case USB_DEVICE_ID_NINTENDO_JOYCONR: + name = "Nintendo Switch Right Joy-Con"; + break; + default: /* Should be impossible */ + hid_err(hdev, "Invalid hid product\n"); + return -EINVAL; + } + + ctlr->input = devm_input_allocate_device(&hdev->dev); + if (!ctlr->input) + return -ENOMEM; + ctlr->input->id.bustype = hdev->bus; + ctlr->input->id.vendor = hdev->vendor; + ctlr->input->id.product = hdev->product; + ctlr->input->id.version = hdev->version; + ctlr->input->name = name; + input_set_drvdata(ctlr->input, ctlr); + + + /* set up sticks */ + if (hdev->product != USB_DEVICE_ID_NINTENDO_JOYCONR) { + input_set_abs_params(ctlr->input, ABS_X, + -JC_MAX_STICK_MAG, JC_MAX_STICK_MAG, + JC_STICK_FUZZ, JC_STICK_FLAT); + input_set_abs_params(ctlr->input, ABS_Y, + -JC_MAX_STICK_MAG, JC_MAX_STICK_MAG, + JC_STICK_FUZZ, JC_STICK_FLAT); + } + if (hdev->product != USB_DEVICE_ID_NINTENDO_JOYCONL) { + input_set_abs_params(ctlr->input, ABS_RX, + -JC_MAX_STICK_MAG, JC_MAX_STICK_MAG, + JC_STICK_FUZZ, JC_STICK_FLAT); + input_set_abs_params(ctlr->input, ABS_RY, + -JC_MAX_STICK_MAG, JC_MAX_STICK_MAG, + JC_STICK_FUZZ, JC_STICK_FLAT); + } + + /* set up buttons */ + if (hdev->product != USB_DEVICE_ID_NINTENDO_JOYCONR) { + for (i = 0; joycon_button_inputs_l[i] > 0; i++) + input_set_capability(ctlr->input, EV_KEY, + joycon_button_inputs_l[i]); + } + if (hdev->product != USB_DEVICE_ID_NINTENDO_JOYCONL) { + for (i = 0; joycon_button_inputs_r[i] > 0; i++) + input_set_capability(ctlr->input, EV_KEY, + joycon_button_inputs_r[i]); + } + + ret = input_register_device(ctlr->input); + if (ret) + return ret; + + /* Set the default controller player leds based on controller number */ + mutex_lock(&joycon_input_num_mutex); + mutex_lock(&ctlr->output_mutex); + ret = joycon_set_player_leds(ctlr, 0, 0xF >> (4 - input_num)); + if (ret) + hid_warn(ctlr->hdev, "Failed to set leds; ret=%d\n", ret); + mutex_unlock(&ctlr->output_mutex); + if (++input_num > 4) + input_num = 1; + mutex_unlock(&joycon_input_num_mutex); + + return 0; +} + +/* Common handler for parsing inputs */ +static int joycon_ctlr_read_handler(struct joycon_ctlr *ctlr, u8 *data, + int size) +{ + int ret = 0; + + if (data[0] == JC_INPUT_SUBCMD_REPLY || data[0] == JC_INPUT_IMU_DATA || + data[0] == JC_INPUT_MCU_DATA) { + if (size >= 12) /* make sure it contains the input report */ + joycon_parse_report(ctlr, + (struct joycon_input_report *)data); + } + + return ret; +} + +static int joycon_ctlr_handle_event(struct joycon_ctlr *ctlr, u8 *data, + int size) +{ + int ret = 0; + bool match = false; + struct joycon_input_report *report; + + if (unlikely(mutex_is_locked(&ctlr->output_mutex)) && + ctlr->msg_type != JOYCON_MSG_TYPE_NONE) { + switch (ctlr->msg_type) { + case JOYCON_MSG_TYPE_USB: + if (size < 2) + break; + if (data[0] == JC_INPUT_USB_RESPONSE && + data[1] == ctlr->usb_ack_match) + match = true; + break; + case JOYCON_MSG_TYPE_SUBCMD: + if (size < sizeof(struct joycon_input_report) || + data[0] != JC_INPUT_SUBCMD_REPLY) + break; + report = (struct joycon_input_report *)data; + if (report->reply.id == ctlr->subcmd_ack_match) + match = true; + break; + default: + break; + } + + if (match) { + memcpy(ctlr->input_buf, data, + min(size, (int)JC_MAX_RESP_SIZE)); + ctlr->msg_type = JOYCON_MSG_TYPE_NONE; + ctlr->received_resp = true; + wake_up(&ctlr->wait); + + /* This message has been handled */ + return 1; + } + } + + if (ctlr->ctlr_state == JOYCON_CTLR_STATE_READ) + ret = joycon_ctlr_read_handler(ctlr, data, size); + + return ret; +} + +static int nintendo_hid_event(struct hid_device *hdev, + struct hid_report *report, u8 *raw_data, int size) +{ + struct joycon_ctlr *ctlr = hid_get_drvdata(hdev); + + if (size < 1) + return -EINVAL; + + return joycon_ctlr_handle_event(ctlr, raw_data, size); +} + +static int nintendo_hid_probe(struct hid_device *hdev, + const struct hid_device_id *id) +{ + int ret; + struct joycon_ctlr *ctlr; + + hid_dbg(hdev, "probe - start\n"); + + ctlr = devm_kzalloc(&hdev->dev, sizeof(*ctlr), GFP_KERNEL); + if (!ctlr) { + ret = -ENOMEM; + goto err; + } + + ctlr->hdev = hdev; + ctlr->ctlr_state = JOYCON_CTLR_STATE_INIT; + hid_set_drvdata(hdev, ctlr); + mutex_init(&ctlr->output_mutex); + init_waitqueue_head(&ctlr->wait); + + ret = hid_parse(hdev); + if (ret) { + hid_err(hdev, "HID parse failed\n"); + goto err; + } + + ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW); + if (ret) { + hid_err(hdev, "HW start failed\n"); + goto err; + } + + ret = hid_hw_open(hdev); + if (ret) { + hid_err(hdev, "cannot start hardware I/O\n"); + goto err_stop; + } + + hid_device_io_start(hdev); + + /* Initialize the controller */ + mutex_lock(&ctlr->output_mutex); + /* if handshake command fails, assume ble pro controller */ + if (hdev->product == USB_DEVICE_ID_NINTENDO_PROCON && + !joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE)) { + hid_dbg(hdev, "detected USB controller\n"); + /* set baudrate for improved latency */ + ret = joycon_send_usb(ctlr, JC_USB_CMD_BAUDRATE_3M); + if (ret) { + hid_err(hdev, "Failed to set baudrate; ret=%d\n", ret); + goto err_mutex; + } + /* handshake */ + ret = joycon_send_usb(ctlr, JC_USB_CMD_HANDSHAKE); + if (ret) { + hid_err(hdev, "Failed handshake; ret=%d\n", ret); + goto err_mutex; + } + /* + * Set no timeout (to keep controller in USB mode). + * This doesn't send a response, so ignore the timeout. + */ + joycon_send_usb(ctlr, JC_USB_CMD_NO_TIMEOUT); + } + + /* get controller calibration data, and parse it */ + ret = joycon_request_calibration(ctlr); + if (ret) { + /* + * We can function with default calibration, but it may be + * inaccurate. Provide a warning, and continue on. + */ + hid_warn(hdev, "Analog stick positions may be inaccurate\n"); + } + + /* Set the reporting mode to 0x30, which is the full report mode */ + ret = joycon_set_report_mode(ctlr); + if (ret) { + hid_err(hdev, "Failed to set report mode; ret=%d\n", ret); + goto err_mutex; + } + + mutex_unlock(&ctlr->output_mutex); + + ret = joycon_input_create(ctlr); + if (ret) { + hid_err(hdev, "Failed to create input device; ret=%d\n", ret); + goto err_close; + } + + ctlr->ctlr_state = JOYCON_CTLR_STATE_READ; + + hid_dbg(hdev, "probe - success\n"); + return 0; + +err_mutex: + mutex_unlock(&ctlr->output_mutex); +err_close: + hid_hw_close(hdev); +err_stop: + hid_hw_stop(hdev); +err: + hid_err(hdev, "probe - fail = %d\n", ret); + return ret; +} + +static void nintendo_hid_remove(struct hid_device *hdev) +{ + hid_dbg(hdev, "remove\n"); + hid_hw_close(hdev); + hid_hw_stop(hdev); +} + +static const struct hid_device_id nintendo_hid_devices[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_PROCON) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_PROCON) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_JOYCONL) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_NINTENDO, + USB_DEVICE_ID_NINTENDO_JOYCONR) }, + { } +}; +MODULE_DEVICE_TABLE(hid, nintendo_hid_devices); + +static struct hid_driver nintendo_hid_driver = { + .name = "nintendo", + .id_table = nintendo_hid_devices, + .probe = nintendo_hid_probe, + .remove = nintendo_hid_remove, + .raw_event = nintendo_hid_event, +}; +module_hid_driver(nintendo_hid_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel J. Ogorchock "); +MODULE_DESCRIPTION("Driver for Nintendo Switch Controllers"); diff --git a/drivers/hid/hid-sony.c b/drivers/hid/hid-sony.c index 6ce9b5e1a06f..c8b07a182c0b 100644 --- a/drivers/hid/hid-sony.c +++ b/drivers/hid/hid-sony.c @@ -2163,9 +2163,15 @@ static int sony_play_effect(struct input_dev *dev, void *data, static int sony_init_ff(struct sony_sc *sc) { - struct hid_input *hidinput = list_entry(sc->hdev->inputs.next, - struct hid_input, list); - struct input_dev *input_dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *input_dev; + + if (list_empty(&sc->hdev->inputs)) { + hid_err(sc->hdev, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(sc->hdev->inputs.next, struct hid_input, list); + input_dev = hidinput->input; input_set_capability(input_dev, EV_FF, FF_RUMBLE); return input_ff_create_memless(input_dev, NULL, sony_play_effect); diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c new file mode 100644 index 000000000000..44e1eefc5b24 --- /dev/null +++ b/drivers/hid/hid-steam.c @@ -0,0 +1,1141 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * HID driver for Valve Steam Controller + * + * Copyright (c) 2018 Rodrigo Rivas Costa + * + * Supports both the wired and wireless interfaces. + * + * This controller has a builtin emulation of mouse and keyboard: the right pad + * can be used as a mouse, the shoulder buttons are mouse buttons, A and B + * buttons are ENTER and ESCAPE, and so on. This is implemented as additional + * HID interfaces. + * + * This is known as the "lizard mode", because apparently lizards like to use + * the computer from the coach, without a proper mouse and keyboard. + * + * This driver will disable the lizard mode when the input device is opened + * and re-enable it when the input device is closed, so as not to break user + * mode behaviour. The lizard_mode parameter can be used to change that. + * + * There are a few user space applications (notably Steam Client) that use + * the hidraw interface directly to create input devices (XTest, uinput...). + * In order to avoid breaking them this driver creates a layered hidraw device, + * so it can detect when the client is running and then: + * - it will not send any command to the controller. + * - this input device will be removed, to avoid double input of the same + * user action. + * When the client is closed, this input device will be created again. + * + * For additional functions, such as changing the right-pad margin or switching + * the led, you can use the user-space tool at: + * + * https://github.com/rodrigorc/steamctrl + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "hid-ids.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Rodrigo Rivas Costa "); + +static bool lizard_mode = true; + +static DEFINE_MUTEX(steam_devices_lock); +static LIST_HEAD(steam_devices); + +#define STEAM_QUIRK_WIRELESS BIT(0) + +/* Touch pads are 40 mm in diameter and 65535 units */ +#define STEAM_PAD_RESOLUTION 1638 +/* Trigger runs are about 5 mm and 256 units */ +#define STEAM_TRIGGER_RESOLUTION 51 +/* Joystick runs are about 5 mm and 256 units */ +#define STEAM_JOYSTICK_RESOLUTION 51 + +#define STEAM_PAD_FUZZ 256 + +/* + * Commands that can be sent in a feature report. + * Thanks to Valve for some valuable hints. + */ +#define STEAM_CMD_SET_MAPPINGS 0x80 +#define STEAM_CMD_CLEAR_MAPPINGS 0x81 +#define STEAM_CMD_GET_MAPPINGS 0x82 +#define STEAM_CMD_GET_ATTRIB 0x83 +#define STEAM_CMD_GET_ATTRIB_LABEL 0x84 +#define STEAM_CMD_DEFAULT_MAPPINGS 0x85 +#define STEAM_CMD_FACTORY_RESET 0x86 +#define STEAM_CMD_WRITE_REGISTER 0x87 +#define STEAM_CMD_CLEAR_REGISTER 0x88 +#define STEAM_CMD_READ_REGISTER 0x89 +#define STEAM_CMD_GET_REGISTER_LABEL 0x8a +#define STEAM_CMD_GET_REGISTER_MAX 0x8b +#define STEAM_CMD_GET_REGISTER_DEFAULT 0x8c +#define STEAM_CMD_SET_MODE 0x8d +#define STEAM_CMD_DEFAULT_MOUSE 0x8e +#define STEAM_CMD_FORCEFEEDBAK 0x8f +#define STEAM_CMD_REQUEST_COMM_STATUS 0xb4 +#define STEAM_CMD_GET_SERIAL 0xae + +/* Some useful register ids */ +#define STEAM_REG_LPAD_MODE 0x07 +#define STEAM_REG_RPAD_MODE 0x08 +#define STEAM_REG_RPAD_MARGIN 0x18 +#define STEAM_REG_LED 0x2d +#define STEAM_REG_GYRO_MODE 0x30 + +/* Raw event identifiers */ +#define STEAM_EV_INPUT_DATA 0x01 +#define STEAM_EV_CONNECT 0x03 +#define STEAM_EV_BATTERY 0x04 + +/* Values for GYRO_MODE (bitmask) */ +#define STEAM_GYRO_MODE_OFF 0x0000 +#define STEAM_GYRO_MODE_STEERING 0x0001 +#define STEAM_GYRO_MODE_TILT 0x0002 +#define STEAM_GYRO_MODE_SEND_ORIENTATION 0x0004 +#define STEAM_GYRO_MODE_SEND_RAW_ACCEL 0x0008 +#define STEAM_GYRO_MODE_SEND_RAW_GYRO 0x0010 + +/* Other random constants */ +#define STEAM_SERIAL_LEN 10 + +struct steam_device { + struct list_head list; + spinlock_t lock; + struct hid_device *hdev, *client_hdev; + struct mutex mutex; + bool client_opened; + struct input_dev __rcu *input; + unsigned long quirks; + struct work_struct work_connect; + bool connected; + char serial_no[STEAM_SERIAL_LEN + 1]; + struct power_supply_desc battery_desc; + struct power_supply __rcu *battery; + u8 battery_charge; + u16 voltage; +}; + +static int steam_recv_report(struct steam_device *steam, + u8 *data, int size) +{ + struct hid_report *r; + u8 *buf; + int ret; + + r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0]; + if (hid_report_len(r) < 64) + return -EINVAL; + + buf = hid_alloc_report_buf(r, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* + * The report ID is always 0, so strip the first byte from the output. + * hid_report_len() is not counting the report ID, so +1 to the length + * or else we get a EOVERFLOW. We are safe from a buffer overflow + * because hid_alloc_report_buf() allocates +7 bytes. + */ + ret = hid_hw_raw_request(steam->hdev, 0x00, + buf, hid_report_len(r) + 1, + HID_FEATURE_REPORT, HID_REQ_GET_REPORT); + if (ret > 0) + memcpy(data, buf + 1, min(size, ret - 1)); + kfree(buf); + return ret; +} + +static int steam_send_report(struct steam_device *steam, + u8 *cmd, int size) +{ + struct hid_report *r; + u8 *buf; + unsigned int retries = 50; + int ret; + + r = steam->hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0]; + if (hid_report_len(r) < 64) + return -EINVAL; + + buf = hid_alloc_report_buf(r, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + /* The report ID is always 0 */ + memcpy(buf + 1, cmd, size); + + /* + * Sometimes the wireless controller fails with EPIPE + * when sending a feature report. + * Doing a HID_REQ_GET_REPORT and waiting for a while + * seems to fix that. + */ + do { + ret = hid_hw_raw_request(steam->hdev, 0, + buf, size + 1, + HID_FEATURE_REPORT, HID_REQ_SET_REPORT); + if (ret != -EPIPE) + break; + msleep(20); + } while (--retries); + + kfree(buf); + if (ret < 0) + hid_err(steam->hdev, "%s: error %d (%*ph)\n", __func__, + ret, size, cmd); + return ret; +} + +static inline int steam_send_report_byte(struct steam_device *steam, u8 cmd) +{ + return steam_send_report(steam, &cmd, 1); +} + +static int steam_write_registers(struct steam_device *steam, + /* u8 reg, u16 val */...) +{ + /* Send: 0x87 len (reg valLo valHi)* */ + u8 reg; + u16 val; + u8 cmd[64] = {STEAM_CMD_WRITE_REGISTER, 0x00}; + va_list args; + + va_start(args, steam); + for (;;) { + reg = va_arg(args, int); + if (reg == 0) + break; + val = va_arg(args, int); + cmd[cmd[1] + 2] = reg; + cmd[cmd[1] + 3] = val & 0xff; + cmd[cmd[1] + 4] = val >> 8; + cmd[1] += 3; + } + va_end(args); + + return steam_send_report(steam, cmd, 2 + cmd[1]); +} + +static int steam_get_serial(struct steam_device *steam) +{ + /* + * Send: 0xae 0x15 0x01 + * Recv: 0xae 0x15 0x01 serialnumber (10 chars) + */ + int ret; + u8 cmd[] = {STEAM_CMD_GET_SERIAL, 0x15, 0x01}; + u8 reply[3 + STEAM_SERIAL_LEN + 1]; + + ret = steam_send_report(steam, cmd, sizeof(cmd)); + if (ret < 0) + return ret; + ret = steam_recv_report(steam, reply, sizeof(reply)); + if (ret < 0) + return ret; + if (reply[0] != 0xae || reply[1] != 0x15 || reply[2] != 0x01) + return -EIO; + reply[3 + STEAM_SERIAL_LEN] = 0; + strlcpy(steam->serial_no, reply + 3, sizeof(steam->serial_no)); + return 0; +} + +/* + * This command requests the wireless adaptor to post an event + * with the connection status. Useful if this driver is loaded when + * the controller is already connected. + */ +static inline int steam_request_conn_status(struct steam_device *steam) +{ + return steam_send_report_byte(steam, STEAM_CMD_REQUEST_COMM_STATUS); +} + +static void steam_set_lizard_mode(struct steam_device *steam, bool enable) +{ + if (enable) { + /* enable esc, enter, cursors */ + steam_send_report_byte(steam, STEAM_CMD_DEFAULT_MAPPINGS); + /* enable mouse */ + steam_send_report_byte(steam, STEAM_CMD_DEFAULT_MOUSE); + steam_write_registers(steam, + STEAM_REG_RPAD_MARGIN, 0x01, /* enable margin */ + 0); + } else { + /* disable esc, enter, cursor */ + steam_send_report_byte(steam, STEAM_CMD_CLEAR_MAPPINGS); + steam_write_registers(steam, + STEAM_REG_RPAD_MODE, 0x07, /* disable mouse */ + STEAM_REG_RPAD_MARGIN, 0x00, /* disable margin */ + 0); + } +} + +static int steam_input_open(struct input_dev *dev) +{ + struct steam_device *steam = input_get_drvdata(dev); + + mutex_lock(&steam->mutex); + if (!steam->client_opened && lizard_mode) + steam_set_lizard_mode(steam, false); + mutex_unlock(&steam->mutex); + return 0; +} + +static void steam_input_close(struct input_dev *dev) +{ + struct steam_device *steam = input_get_drvdata(dev); + + mutex_lock(&steam->mutex); + if (!steam->client_opened && lizard_mode) + steam_set_lizard_mode(steam, true); + mutex_unlock(&steam->mutex); +} + +static enum power_supply_property steam_battery_props[] = { + POWER_SUPPLY_PROP_PRESENT, + POWER_SUPPLY_PROP_SCOPE, + POWER_SUPPLY_PROP_VOLTAGE_NOW, + POWER_SUPPLY_PROP_CAPACITY, +}; + +static int steam_battery_get_property(struct power_supply *psy, + enum power_supply_property psp, + union power_supply_propval *val) +{ + struct steam_device *steam = power_supply_get_drvdata(psy); + unsigned long flags; + s16 volts; + u8 batt; + int ret = 0; + + spin_lock_irqsave(&steam->lock, flags); + volts = steam->voltage; + batt = steam->battery_charge; + spin_unlock_irqrestore(&steam->lock, flags); + + switch (psp) { + case POWER_SUPPLY_PROP_PRESENT: + val->intval = 1; + break; + case POWER_SUPPLY_PROP_SCOPE: + val->intval = POWER_SUPPLY_SCOPE_DEVICE; + break; + case POWER_SUPPLY_PROP_VOLTAGE_NOW: + val->intval = volts * 1000; /* mV -> uV */ + break; + case POWER_SUPPLY_PROP_CAPACITY: + val->intval = batt; + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int steam_battery_register(struct steam_device *steam) +{ + struct power_supply *battery; + struct power_supply_config battery_cfg = { .drv_data = steam, }; + unsigned long flags; + int ret; + + steam->battery_desc.type = POWER_SUPPLY_TYPE_BATTERY; + steam->battery_desc.properties = steam_battery_props; + steam->battery_desc.num_properties = ARRAY_SIZE(steam_battery_props); + steam->battery_desc.get_property = steam_battery_get_property; + steam->battery_desc.name = devm_kasprintf(&steam->hdev->dev, + GFP_KERNEL, "steam-controller-%s-battery", + steam->serial_no); + if (!steam->battery_desc.name) + return -ENOMEM; + + /* avoid the warning of 0% battery while waiting for the first info */ + spin_lock_irqsave(&steam->lock, flags); + steam->voltage = 3000; + steam->battery_charge = 100; + spin_unlock_irqrestore(&steam->lock, flags); + + battery = power_supply_register(&steam->hdev->dev, + &steam->battery_desc, &battery_cfg); + if (IS_ERR(battery)) { + ret = PTR_ERR(battery); + hid_err(steam->hdev, + "%s:power_supply_register failed with error %d\n", + __func__, ret); + return ret; + } + rcu_assign_pointer(steam->battery, battery); + power_supply_powers(battery, &steam->hdev->dev); + return 0; +} + +static int steam_input_register(struct steam_device *steam) +{ + struct hid_device *hdev = steam->hdev; + struct input_dev *input; + int ret; + + rcu_read_lock(); + input = rcu_dereference(steam->input); + rcu_read_unlock(); + if (input) { + dbg_hid("%s: already connected\n", __func__); + return 0; + } + + input = input_allocate_device(); + if (!input) + return -ENOMEM; + + input_set_drvdata(input, steam); + input->dev.parent = &hdev->dev; + input->open = steam_input_open; + input->close = steam_input_close; + + input->name = (steam->quirks & STEAM_QUIRK_WIRELESS) ? + "Wireless Steam Controller" : + "Steam Controller"; + input->phys = hdev->phys; + input->uniq = steam->serial_no; + input->id.bustype = hdev->bus; + input->id.vendor = hdev->vendor; + input->id.product = hdev->product; + input->id.version = hdev->version; + + input_set_capability(input, EV_KEY, BTN_TR2); + input_set_capability(input, EV_KEY, BTN_TL2); + input_set_capability(input, EV_KEY, BTN_TR); + input_set_capability(input, EV_KEY, BTN_TL); + input_set_capability(input, EV_KEY, BTN_Y); + input_set_capability(input, EV_KEY, BTN_B); + input_set_capability(input, EV_KEY, BTN_X); + input_set_capability(input, EV_KEY, BTN_A); + input_set_capability(input, EV_KEY, BTN_DPAD_UP); + input_set_capability(input, EV_KEY, BTN_DPAD_RIGHT); + input_set_capability(input, EV_KEY, BTN_DPAD_LEFT); + input_set_capability(input, EV_KEY, BTN_DPAD_DOWN); + input_set_capability(input, EV_KEY, BTN_SELECT); + input_set_capability(input, EV_KEY, BTN_MODE); + input_set_capability(input, EV_KEY, BTN_START); + input_set_capability(input, EV_KEY, BTN_GEAR_DOWN); + input_set_capability(input, EV_KEY, BTN_GEAR_UP); + input_set_capability(input, EV_KEY, BTN_THUMBR); + input_set_capability(input, EV_KEY, BTN_THUMBL); + input_set_capability(input, EV_KEY, BTN_THUMB); + input_set_capability(input, EV_KEY, BTN_THUMB2); + + input_set_abs_params(input, ABS_HAT2Y, 0, 255, 0, 0); + input_set_abs_params(input, ABS_HAT2X, 0, 255, 0, 0); + input_set_abs_params(input, ABS_X, -32767, 32767, 0, 0); + input_set_abs_params(input, ABS_Y, -32767, 32767, 0, 0); + input_set_abs_params(input, ABS_RX, -32767, 32767, + STEAM_PAD_FUZZ, 0); + input_set_abs_params(input, ABS_RY, -32767, 32767, + STEAM_PAD_FUZZ, 0); + input_set_abs_params(input, ABS_HAT0X, -32767, 32767, + STEAM_PAD_FUZZ, 0); + input_set_abs_params(input, ABS_HAT0Y, -32767, 32767, + STEAM_PAD_FUZZ, 0); + input_abs_set_res(input, ABS_X, STEAM_JOYSTICK_RESOLUTION); + input_abs_set_res(input, ABS_Y, STEAM_JOYSTICK_RESOLUTION); + input_abs_set_res(input, ABS_RX, STEAM_PAD_RESOLUTION); + input_abs_set_res(input, ABS_RY, STEAM_PAD_RESOLUTION); + input_abs_set_res(input, ABS_HAT0X, STEAM_PAD_RESOLUTION); + input_abs_set_res(input, ABS_HAT0Y, STEAM_PAD_RESOLUTION); + input_abs_set_res(input, ABS_HAT2Y, STEAM_TRIGGER_RESOLUTION); + input_abs_set_res(input, ABS_HAT2X, STEAM_TRIGGER_RESOLUTION); + + ret = input_register_device(input); + if (ret) + goto input_register_fail; + + rcu_assign_pointer(steam->input, input); + return 0; + +input_register_fail: + input_free_device(input); + return ret; +} + +static void steam_input_unregister(struct steam_device *steam) +{ + struct input_dev *input; + rcu_read_lock(); + input = rcu_dereference(steam->input); + rcu_read_unlock(); + if (!input) + return; + RCU_INIT_POINTER(steam->input, NULL); + synchronize_rcu(); + input_unregister_device(input); +} + +static void steam_battery_unregister(struct steam_device *steam) +{ + struct power_supply *battery; + + rcu_read_lock(); + battery = rcu_dereference(steam->battery); + rcu_read_unlock(); + + if (!battery) + return; + RCU_INIT_POINTER(steam->battery, NULL); + synchronize_rcu(); + power_supply_unregister(battery); +} + +static int steam_register(struct steam_device *steam) +{ + int ret; + bool client_opened; + + /* + * This function can be called several times in a row with the + * wireless adaptor, without steam_unregister() between them, because + * another client send a get_connection_status command, for example. + * The battery and serial number are set just once per device. + */ + if (!steam->serial_no[0]) { + /* + * Unlikely, but getting the serial could fail, and it is not so + * important, so make up a serial number and go on. + */ + mutex_lock(&steam->mutex); + if (steam_get_serial(steam) < 0) + strlcpy(steam->serial_no, "XXXXXXXXXX", + sizeof(steam->serial_no)); + mutex_unlock(&steam->mutex); + + hid_info(steam->hdev, "Steam Controller '%s' connected", + steam->serial_no); + + /* ignore battery errors, we can live without it */ + if (steam->quirks & STEAM_QUIRK_WIRELESS) + steam_battery_register(steam); + + mutex_lock(&steam_devices_lock); + list_add(&steam->list, &steam_devices); + mutex_unlock(&steam_devices_lock); + } + + mutex_lock(&steam->mutex); + client_opened = steam->client_opened; + if (!client_opened) + steam_set_lizard_mode(steam, lizard_mode); + mutex_unlock(&steam->mutex); + + if (!client_opened) + ret = steam_input_register(steam); + else + ret = 0; + + return ret; +} + +static void steam_unregister(struct steam_device *steam) +{ + steam_battery_unregister(steam); + steam_input_unregister(steam); + if (steam->serial_no[0]) { + hid_info(steam->hdev, "Steam Controller '%s' disconnected", + steam->serial_no); + mutex_lock(&steam_devices_lock); + list_del(&steam->list); + mutex_unlock(&steam_devices_lock); + steam->serial_no[0] = 0; + } +} + +static void steam_work_connect_cb(struct work_struct *work) +{ + struct steam_device *steam = container_of(work, struct steam_device, + work_connect); + unsigned long flags; + bool connected; + int ret; + + spin_lock_irqsave(&steam->lock, flags); + connected = steam->connected; + spin_unlock_irqrestore(&steam->lock, flags); + + if (connected) { + ret = steam_register(steam); + if (ret) { + hid_err(steam->hdev, + "%s:steam_register failed with error %d\n", + __func__, ret); + } + } else { + steam_unregister(steam); + } +} + +static bool steam_is_valve_interface(struct hid_device *hdev) +{ + struct hid_report_enum *rep_enum; + + /* + * The wired device creates 3 interfaces: + * 0: emulated mouse. + * 1: emulated keyboard. + * 2: the real game pad. + * The wireless device creates 5 interfaces: + * 0: emulated keyboard. + * 1-4: slots where up to 4 real game pads will be connected to. + * We know which one is the real gamepad interface because they are the + * only ones with a feature report. + */ + rep_enum = &hdev->report_enum[HID_FEATURE_REPORT]; + return !list_empty(&rep_enum->report_list); +} + +static int steam_client_ll_parse(struct hid_device *hdev) +{ + struct steam_device *steam = hdev->driver_data; + + return hid_parse_report(hdev, steam->hdev->dev_rdesc, + steam->hdev->dev_rsize); +} + +static int steam_client_ll_start(struct hid_device *hdev) +{ + return 0; +} + +static void steam_client_ll_stop(struct hid_device *hdev) +{ +} + +static int steam_client_ll_open(struct hid_device *hdev) +{ + struct steam_device *steam = hdev->driver_data; + + mutex_lock(&steam->mutex); + steam->client_opened = true; + mutex_unlock(&steam->mutex); + + steam_input_unregister(steam); + + return 0; +} + +static void steam_client_ll_close(struct hid_device *hdev) +{ + struct steam_device *steam = hdev->driver_data; + + unsigned long flags; + bool connected; + + spin_lock_irqsave(&steam->lock, flags); + connected = steam->connected; + spin_unlock_irqrestore(&steam->lock, flags); + + mutex_lock(&steam->mutex); + steam->client_opened = false; + if (connected) + steam_set_lizard_mode(steam, lizard_mode); + mutex_unlock(&steam->mutex); + + if (connected) + steam_input_register(steam); +} + +static int steam_client_ll_raw_request(struct hid_device *hdev, + unsigned char reportnum, u8 *buf, + size_t count, unsigned char report_type, + int reqtype) +{ + struct steam_device *steam = hdev->driver_data; + + return hid_hw_raw_request(steam->hdev, reportnum, buf, count, + report_type, reqtype); +} + +static struct hid_ll_driver steam_client_ll_driver = { + .parse = steam_client_ll_parse, + .start = steam_client_ll_start, + .stop = steam_client_ll_stop, + .open = steam_client_ll_open, + .close = steam_client_ll_close, + .raw_request = steam_client_ll_raw_request, +}; + +static struct hid_device *steam_create_client_hid(struct hid_device *hdev) +{ + struct hid_device *client_hdev; + + client_hdev = hid_allocate_device(); + if (IS_ERR(client_hdev)) + return client_hdev; + + client_hdev->ll_driver = &steam_client_ll_driver; + client_hdev->dev.parent = hdev->dev.parent; + client_hdev->bus = hdev->bus; + client_hdev->vendor = hdev->vendor; + client_hdev->product = hdev->product; + client_hdev->version = hdev->version; + client_hdev->type = hdev->type; + client_hdev->country = hdev->country; + strlcpy(client_hdev->name, hdev->name, + sizeof(client_hdev->name)); + strlcpy(client_hdev->phys, hdev->phys, + sizeof(client_hdev->phys)); + /* + * Since we use the same device info than the real interface to + * trick userspace, we will be calling steam_probe recursively. + * We need to recognize the client interface somehow. + */ + client_hdev->group = HID_GROUP_STEAM; + return client_hdev; +} + +static int steam_probe(struct hid_device *hdev, + const struct hid_device_id *id) +{ + struct steam_device *steam; + int ret; + + ret = hid_parse(hdev); + if (ret) { + hid_err(hdev, + "%s:parse of hid interface failed\n", __func__); + return ret; + } + + /* + * The virtual client_dev is only used for hidraw. + * Also avoid the recursive probe. + */ + if (hdev->group == HID_GROUP_STEAM) + return hid_hw_start(hdev, HID_CONNECT_HIDRAW); + /* + * The non-valve interfaces (mouse and keyboard emulation) are + * connected without changes. + */ + if (!steam_is_valve_interface(hdev)) + return hid_hw_start(hdev, HID_CONNECT_DEFAULT); + + steam = devm_kzalloc(&hdev->dev, sizeof(*steam), GFP_KERNEL); + if (!steam) { + ret = -ENOMEM; + goto steam_alloc_fail; + } + steam->hdev = hdev; + hid_set_drvdata(hdev, steam); + spin_lock_init(&steam->lock); + mutex_init(&steam->mutex); + steam->quirks = id->driver_data; + INIT_WORK(&steam->work_connect, steam_work_connect_cb); + + steam->client_hdev = steam_create_client_hid(hdev); + if (IS_ERR(steam->client_hdev)) { + ret = PTR_ERR(steam->client_hdev); + goto client_hdev_fail; + } + steam->client_hdev->driver_data = steam; + + /* + * With the real steam controller interface, do not connect hidraw. + * Instead, create the client_hid and connect that. + */ + ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_HIDRAW); + if (ret) + goto hid_hw_start_fail; + + ret = hid_add_device(steam->client_hdev); + if (ret) + goto client_hdev_add_fail; + + ret = hid_hw_open(hdev); + if (ret) { + hid_err(hdev, + "%s:hid_hw_open\n", + __func__); + goto hid_hw_open_fail; + } + + if (steam->quirks & STEAM_QUIRK_WIRELESS) { + hid_info(hdev, "Steam wireless receiver connected"); + steam_request_conn_status(steam); + } else { + ret = steam_register(steam); + if (ret) { + hid_err(hdev, + "%s:steam_register failed with error %d\n", + __func__, ret); + goto input_register_fail; + } + } + + return 0; + +input_register_fail: +hid_hw_open_fail: +client_hdev_add_fail: + hid_hw_stop(hdev); +hid_hw_start_fail: + hid_destroy_device(steam->client_hdev); +client_hdev_fail: + cancel_work_sync(&steam->work_connect); +steam_alloc_fail: + hid_err(hdev, "%s: failed with error %d\n", + __func__, ret); + return ret; +} + +static void steam_remove(struct hid_device *hdev) +{ + struct steam_device *steam = hid_get_drvdata(hdev); + + if (!steam || hdev->group == HID_GROUP_STEAM) { + hid_hw_stop(hdev); + return; + } + + hid_destroy_device(steam->client_hdev); + steam->client_opened = false; + cancel_work_sync(&steam->work_connect); + if (steam->quirks & STEAM_QUIRK_WIRELESS) { + hid_info(hdev, "Steam wireless receiver disconnected"); + } + hid_hw_close(hdev); + hid_hw_stop(hdev); + steam_unregister(steam); +} + +static void steam_do_connect_event(struct steam_device *steam, bool connected) +{ + unsigned long flags; + bool changed; + + spin_lock_irqsave(&steam->lock, flags); + changed = steam->connected != connected; + steam->connected = connected; + spin_unlock_irqrestore(&steam->lock, flags); + + if (changed && schedule_work(&steam->work_connect) == 0) + dbg_hid("%s: connected=%d event already queued\n", + __func__, connected); +} + +/* + * Some input data in the protocol has the opposite sign. + * Clamp the values to 32767..-32767 so that the range is + * symmetrical and can be negated safely. + */ +static inline s16 steam_le16(u8 *data) +{ + s16 x = (s16) le16_to_cpup((__le16 *)data); + + return x == -32768 ? -32767 : x; +} + +/* + * The size for this message payload is 60. + * The known values are: + * (* values are not sent through wireless) + * (* accelerator/gyro is disabled by default) + * Offset| Type | Mapped to |Meaning + * -------+-------+-----------+-------------------------- + * 4-7 | u32 | -- | sequence number + * 8-10 | 24bit | see below | buttons + * 11 | u8 | ABS_HAT2Y | left trigger + * 12 | u8 | ABS_HAT2X | right trigger + * 13-15 | -- | -- | always 0 + * 16-17 | s16 | ABS_X/ABS_HAT0X | X value + * 18-19 | s16 | ABS_Y/ABS_HAT0Y | Y value + * 20-21 | s16 | ABS_RX | right-pad X value + * 22-23 | s16 | ABS_RY | right-pad Y value + * 24-25 | s16 | -- | * left trigger + * 26-27 | s16 | -- | * right trigger + * 28-29 | s16 | -- | * accelerometer X value + * 30-31 | s16 | -- | * accelerometer Y value + * 32-33 | s16 | -- | * accelerometer Z value + * 34-35 | s16 | -- | gyro X value + * 36-36 | s16 | -- | gyro Y value + * 38-39 | s16 | -- | gyro Z value + * 40-41 | s16 | -- | quaternion W value + * 42-43 | s16 | -- | quaternion X value + * 44-45 | s16 | -- | quaternion Y value + * 46-47 | s16 | -- | quaternion Z value + * 48-49 | -- | -- | always 0 + * 50-51 | s16 | -- | * left trigger (uncalibrated) + * 52-53 | s16 | -- | * right trigger (uncalibrated) + * 54-55 | s16 | -- | * joystick X value (uncalibrated) + * 56-57 | s16 | -- | * joystick Y value (uncalibrated) + * 58-59 | s16 | -- | * left-pad X value + * 60-61 | s16 | -- | * left-pad Y value + * 62-63 | u16 | -- | * battery voltage + * + * The buttons are: + * Bit | Mapped to | Description + * ------+------------+-------------------------------- + * 8.0 | BTN_TR2 | right trigger fully pressed + * 8.1 | BTN_TL2 | left trigger fully pressed + * 8.2 | BTN_TR | right shoulder + * 8.3 | BTN_TL | left shoulder + * 8.4 | BTN_Y | button Y + * 8.5 | BTN_B | button B + * 8.6 | BTN_X | button X + * 8.7 | BTN_A | button A + * 9.0 | BTN_DPAD_UP | lef-pad up + * 9.1 | BTN_DPAD_RIGHT | lef-pad right + * 9.2 | BTN_DPAD_LEFT | lef-pad left + * 9.3 | BTN_DPAD_DOWN | lef-pad down + * 9.4 | BTN_SELECT | menu left + * 9.5 | BTN_MODE | steam logo + * 9.6 | BTN_START | menu right + * 9.7 | BTN_GEAR_DOWN | left back lever + * 10.0 | BTN_GEAR_UP | right back lever + * 10.1 | -- | left-pad clicked + * 10.2 | BTN_THUMBR | right-pad clicked + * 10.3 | BTN_THUMB | left-pad touched (but see explanation below) + * 10.4 | BTN_THUMB2 | right-pad touched + * 10.5 | -- | unknown + * 10.6 | BTN_THUMBL | joystick clicked + * 10.7 | -- | lpad_and_joy + */ + +static void steam_do_input_event(struct steam_device *steam, + struct input_dev *input, u8 *data) +{ + /* 24 bits of buttons */ + u8 b8, b9, b10; + s16 x, y; + bool lpad_touched, lpad_and_joy; + + b8 = data[8]; + b9 = data[9]; + b10 = data[10]; + + input_report_abs(input, ABS_HAT2Y, data[11]); + input_report_abs(input, ABS_HAT2X, data[12]); + + /* + * These two bits tells how to interpret the values X and Y. + * lpad_and_joy tells that the joystick and the lpad are used at the + * same time. + * lpad_touched tells whether X/Y are to be read as lpad coord or + * joystick values. + * (lpad_touched || lpad_and_joy) tells if the lpad is really touched. + */ + lpad_touched = b10 & BIT(3); + lpad_and_joy = b10 & BIT(7); + x = steam_le16(data + 16); + y = -steam_le16(data + 18); + + input_report_abs(input, lpad_touched ? ABS_HAT0X : ABS_X, x); + input_report_abs(input, lpad_touched ? ABS_HAT0Y : ABS_Y, y); + /* Check if joystick is centered */ + if (lpad_touched && !lpad_and_joy) { + input_report_abs(input, ABS_X, 0); + input_report_abs(input, ABS_Y, 0); + } + /* Check if lpad is untouched */ + if (!(lpad_touched || lpad_and_joy)) { + input_report_abs(input, ABS_HAT0X, 0); + input_report_abs(input, ABS_HAT0Y, 0); + } + + input_report_abs(input, ABS_RX, steam_le16(data + 20)); + input_report_abs(input, ABS_RY, -steam_le16(data + 22)); + + input_event(input, EV_KEY, BTN_TR2, !!(b8 & BIT(0))); + input_event(input, EV_KEY, BTN_TL2, !!(b8 & BIT(1))); + input_event(input, EV_KEY, BTN_TR, !!(b8 & BIT(2))); + input_event(input, EV_KEY, BTN_TL, !!(b8 & BIT(3))); + input_event(input, EV_KEY, BTN_Y, !!(b8 & BIT(4))); + input_event(input, EV_KEY, BTN_B, !!(b8 & BIT(5))); + input_event(input, EV_KEY, BTN_X, !!(b8 & BIT(6))); + input_event(input, EV_KEY, BTN_A, !!(b8 & BIT(7))); + input_event(input, EV_KEY, BTN_SELECT, !!(b9 & BIT(4))); + input_event(input, EV_KEY, BTN_MODE, !!(b9 & BIT(5))); + input_event(input, EV_KEY, BTN_START, !!(b9 & BIT(6))); + input_event(input, EV_KEY, BTN_GEAR_DOWN, !!(b9 & BIT(7))); + input_event(input, EV_KEY, BTN_GEAR_UP, !!(b10 & BIT(0))); + input_event(input, EV_KEY, BTN_THUMBR, !!(b10 & BIT(2))); + input_event(input, EV_KEY, BTN_THUMBL, !!(b10 & BIT(6))); + input_event(input, EV_KEY, BTN_THUMB, lpad_touched || lpad_and_joy); + input_event(input, EV_KEY, BTN_THUMB2, !!(b10 & BIT(4))); + input_event(input, EV_KEY, BTN_DPAD_UP, !!(b9 & BIT(0))); + input_event(input, EV_KEY, BTN_DPAD_RIGHT, !!(b9 & BIT(1))); + input_event(input, EV_KEY, BTN_DPAD_LEFT, !!(b9 & BIT(2))); + input_event(input, EV_KEY, BTN_DPAD_DOWN, !!(b9 & BIT(3))); + + input_sync(input); +} + +/* + * The size for this message payload is 11. + * The known values are: + * Offset| Type | Meaning + * -------+-------+--------------------------- + * 4-7 | u32 | sequence number + * 8-11 | -- | always 0 + * 12-13 | u16 | voltage (mV) + * 14 | u8 | battery percent + */ +static void steam_do_battery_event(struct steam_device *steam, + struct power_supply *battery, u8 *data) +{ + unsigned long flags; + + s16 volts = steam_le16(data + 12); + u8 batt = data[14]; + + /* Creating the battery may have failed */ + rcu_read_lock(); + battery = rcu_dereference(steam->battery); + if (likely(battery)) { + spin_lock_irqsave(&steam->lock, flags); + steam->voltage = volts; + steam->battery_charge = batt; + spin_unlock_irqrestore(&steam->lock, flags); + power_supply_changed(battery); + } + rcu_read_unlock(); +} + +static int steam_raw_event(struct hid_device *hdev, + struct hid_report *report, u8 *data, + int size) +{ + struct steam_device *steam = hid_get_drvdata(hdev); + struct input_dev *input; + struct power_supply *battery; + + if (!steam) + return 0; + + if (steam->client_opened) + hid_input_report(steam->client_hdev, HID_FEATURE_REPORT, + data, size, 0); + /* + * All messages are size=64, all values little-endian. + * The format is: + * Offset| Meaning + * -------+-------------------------------------------- + * 0-1 | always 0x01, 0x00, maybe protocol version? + * 2 | type of message + * 3 | length of the real payload (not checked) + * 4-n | payload data, depends on the type + * + * There are these known types of message: + * 0x01: input data (60 bytes) + * 0x03: wireless connect/disconnect (1 byte) + * 0x04: battery status (11 bytes) + */ + + if (size != 64 || data[0] != 1 || data[1] != 0) + return 0; + + switch (data[2]) { + case STEAM_EV_INPUT_DATA: + if (steam->client_opened) + return 0; + rcu_read_lock(); + input = rcu_dereference(steam->input); + if (likely(input)) + steam_do_input_event(steam, input, data); + rcu_read_unlock(); + break; + case STEAM_EV_CONNECT: + /* + * The payload of this event is a single byte: + * 0x01: disconnected. + * 0x02: connected. + */ + switch (data[4]) { + case 0x01: + steam_do_connect_event(steam, false); + break; + case 0x02: + steam_do_connect_event(steam, true); + break; + } + break; + case STEAM_EV_BATTERY: + if (steam->quirks & STEAM_QUIRK_WIRELESS) { + rcu_read_lock(); + battery = rcu_dereference(steam->battery); + if (likely(battery)) { + steam_do_battery_event(steam, battery, data); + } else { + dbg_hid( + "%s: battery data without connect event\n", + __func__); + steam_do_connect_event(steam, true); + } + rcu_read_unlock(); + } + break; + } + return 0; +} + +static int steam_param_set_lizard_mode(const char *val, + const struct kernel_param *kp) +{ + struct steam_device *steam; + int ret; + + ret = param_set_bool(val, kp); + if (ret) + return ret; + + mutex_lock(&steam_devices_lock); + list_for_each_entry(steam, &steam_devices, list) { + mutex_lock(&steam->mutex); + if (!steam->client_opened) + steam_set_lizard_mode(steam, lizard_mode); + mutex_unlock(&steam->mutex); + } + mutex_unlock(&steam_devices_lock); + return 0; +} + +static const struct kernel_param_ops steam_lizard_mode_ops = { + .set = steam_param_set_lizard_mode, + .get = param_get_bool, +}; + +module_param_cb(lizard_mode, &steam_lizard_mode_ops, &lizard_mode, 0644); +MODULE_PARM_DESC(lizard_mode, + "Enable mouse and keyboard emulation (lizard mode) when the gamepad is not in use"); + +static const struct hid_device_id steam_controllers[] = { + { /* Wired Steam Controller */ + HID_USB_DEVICE(USB_VENDOR_ID_VALVE, + USB_DEVICE_ID_STEAM_CONTROLLER) + }, + { /* Wireless Steam Controller */ + HID_USB_DEVICE(USB_VENDOR_ID_VALVE, + USB_DEVICE_ID_STEAM_CONTROLLER_WIRELESS), + .driver_data = STEAM_QUIRK_WIRELESS + }, + {} +}; + +MODULE_DEVICE_TABLE(hid, steam_controllers); + +static struct hid_driver steam_controller_driver = { + .name = "hid-steam", + .id_table = steam_controllers, + .probe = steam_probe, + .remove = steam_remove, + .raw_event = steam_raw_event, +}; + +module_hid_driver(steam_controller_driver); \ No newline at end of file diff --git a/drivers/hid/hid-tmff.c b/drivers/hid/hid-tmff.c index cfa0cb22c9b3..d98e471a5f7b 100644 --- a/drivers/hid/hid-tmff.c +++ b/drivers/hid/hid-tmff.c @@ -136,12 +136,18 @@ static int tmff_init(struct hid_device *hid, const signed short *ff_bits) struct tmff_device *tmff; struct hid_report *report; struct list_head *report_list; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); - struct input_dev *input_dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *input_dev; int error; int i; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + input_dev = hidinput->input; + tmff = kzalloc(sizeof(struct tmff_device), GFP_KERNEL); if (!tmff) return -ENOMEM; diff --git a/drivers/hid/hid-zpff.c b/drivers/hid/hid-zpff.c index a29756c6ca02..4e7e01be99b1 100644 --- a/drivers/hid/hid-zpff.c +++ b/drivers/hid/hid-zpff.c @@ -66,11 +66,17 @@ static int zpff_init(struct hid_device *hid) { struct zpff_device *zpff; struct hid_report *report; - struct hid_input *hidinput = list_entry(hid->inputs.next, - struct hid_input, list); - struct input_dev *dev = hidinput->input; + struct hid_input *hidinput; + struct input_dev *dev; int i, error; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + hidinput = list_entry(hid->inputs.next, struct hid_input, list); + dev = hidinput->input; + for (i = 0; i < 4; i++) { report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, i, 1); if (!report) diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index 5652bd0ffb4d..5243c4120819 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -257,13 +257,14 @@ out: static unsigned int hidraw_poll(struct file *file, poll_table *wait) { struct hidraw_list *list = file->private_data; + unsigned int mask = POLLOUT | POLLWRNORM; /* hidraw is always writable */ poll_wait(file, &list->hidraw->wait, wait); if (list->head != list->tail) - return POLLIN | POLLRDNORM; + mask |= POLLIN | POLLRDNORM; if (!list->hidraw->exist) - return POLLERR | POLLHUP; - return 0; + mask |= POLLERR | POLLHUP; + return mask; } static int hidraw_open(struct inode *inode, struct file *file) diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c index cac262a912c1..95052373a828 100644 --- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -322,6 +322,33 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + /* + * There are at least 2 Primebook C11B versions, the older + * version has a product-name of "Primebook C11B", and a + * bios version / release / firmware revision of: + * V2.1.2 / 05/03/2018 / 18.2 + * The new version has "PRIMEBOOK C11B" as product-name and a + * bios version / release / firmware revision of: + * CFALKSW05_BIOS_V1.1.2 / 11/19/2018 / 19.2 + * Only the older version needs this quirk, note the newer + * version will not match as it has a different product-name. + */ + .ident = "Trekstor Primebook C11B", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Primebook C11B"), + }, + .driver_data = (void *)&sipodev_desc + }, + { + .ident = "Trekstor SURFBOOK E11B", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SURFBOOK E11B"), + }, + .driver_data = (void *)&sipodev_desc + }, { .ident = "Direkt-Tek DTLAPY116-2", .matches = { @@ -330,6 +357,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + .ident = "Direkt-Tek DTLAPY133-1", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Direkt-Tek"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "DTLAPY133-1"), + }, + .driver_data = (void *)&sipodev_desc + }, { .ident = "Mediacom Flexbook Edge 11", .matches = { @@ -338,6 +373,14 @@ static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { }, .driver_data = (void *)&sipodev_desc }, + { + .ident = "Odys Winbook 13", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "AXDIA International GmbH"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "WINBOOK 13"), + }, + .driver_data = (void *)&sipodev_desc + }, { } /* Terminate list */ }; diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.c b/drivers/hid/intel-ish-hid/ishtp-hid.c index cd23903ddcf1..e918d78e541c 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid.c @@ -222,7 +222,7 @@ int ishtp_hid_probe(unsigned int cur_hid_dev, err_hid_device: kfree(hid_data); err_hid_data: - kfree(hid); + hid_destroy_device(hid); return rv; } diff --git a/drivers/hid/intel-ish-hid/ishtp/client-buffers.c b/drivers/hid/intel-ish-hid/ishtp/client-buffers.c index b9b917d2d50d..c41dbb167c91 100644 --- a/drivers/hid/intel-ish-hid/ishtp/client-buffers.c +++ b/drivers/hid/intel-ish-hid/ishtp/client-buffers.c @@ -90,7 +90,7 @@ int ishtp_cl_alloc_tx_ring(struct ishtp_cl *cl) return 0; out: dev_err(&cl->device->dev, "error in allocating Tx pool\n"); - ishtp_cl_free_rx_ring(cl); + ishtp_cl_free_tx_ring(cl); return -ENOMEM; } diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c index 6f67d73b184e..c749f449c7cb 100644 --- a/drivers/hid/uhid.c +++ b/drivers/hid/uhid.c @@ -25,6 +25,7 @@ #include #include #include +#include #define UHID_NAME "uhid" #define UHID_BUFSIZE 32 @@ -768,13 +769,14 @@ unlock: static unsigned int uhid_char_poll(struct file *file, poll_table *wait) { struct uhid_device *uhid = file->private_data; + unsigned int mask = POLLOUT | POLLWRNORM; /* uhid is always writable */ poll_wait(file, &uhid->waitq, wait); if (uhid->head != uhid->tail) - return POLLIN | POLLRDNORM; + mask |= POLLIN | POLLRDNORM; - return 0; + return mask; } static const struct file_operations uhid_fops = { diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index ce342fd0457e..d9602f3a359e 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -254,12 +254,51 @@ static int hiddev_release(struct inode * inode, struct file * file) return 0; } +static int __hiddev_open(struct hiddev *hiddev, struct file *file) +{ + struct hiddev_list *list; + int error; + + lockdep_assert_held(&hiddev->existancelock); + + list = vzalloc(sizeof(*list)); + if (!list) + return -ENOMEM; + + mutex_init(&list->thread_lock); + list->hiddev = hiddev; + + if (!hiddev->open++) { + error = hid_hw_power(hiddev->hid, PM_HINT_FULLON); + if (error < 0) + goto err_drop_count; + + error = hid_hw_open(hiddev->hid); + if (error < 0) + goto err_normal_power; + } + + spin_lock_irq(&hiddev->list_lock); + list_add_tail(&list->node, &hiddev->list); + spin_unlock_irq(&hiddev->list_lock); + + file->private_data = list; + + return 0; + +err_normal_power: + hid_hw_power(hiddev->hid, PM_HINT_NORMAL); +err_drop_count: + hiddev->open--; + vfree(list); + return error; +} + /* * open file op */ static int hiddev_open(struct inode *inode, struct file *file) { - struct hiddev_list *list; struct usb_interface *intf; struct hid_device *hid; struct hiddev *hiddev; @@ -268,66 +307,14 @@ static int hiddev_open(struct inode *inode, struct file *file) intf = usbhid_find_interface(iminor(inode)); if (!intf) return -ENODEV; + hid = usb_get_intfdata(intf); hiddev = hid->hiddev; - if (!(list = vzalloc(sizeof(struct hiddev_list)))) - return -ENOMEM; - mutex_init(&list->thread_lock); - list->hiddev = hiddev; - file->private_data = list; - - /* - * no need for locking because the USB major number - * is shared which usbcore guards against disconnect - */ - if (list->hiddev->exist) { - if (!list->hiddev->open++) { - res = hid_hw_open(hiddev->hid); - if (res < 0) - goto bail; - } - } else { - res = -ENODEV; - goto bail; - } - - spin_lock_irq(&list->hiddev->list_lock); - list_add_tail(&list->node, &hiddev->list); - spin_unlock_irq(&list->hiddev->list_lock); - mutex_lock(&hiddev->existancelock); - /* - * recheck exist with existance lock held to - * avoid opening a disconnected device - */ - if (!list->hiddev->exist) { - res = -ENODEV; - goto bail_unlock; - } - if (!list->hiddev->open++) - if (list->hiddev->exist) { - struct hid_device *hid = hiddev->hid; - res = hid_hw_power(hid, PM_HINT_FULLON); - if (res < 0) - goto bail_unlock; - res = hid_hw_open(hid); - if (res < 0) - goto bail_normal_power; - } - mutex_unlock(&hiddev->existancelock); - return 0; -bail_normal_power: - hid_hw_power(hid, PM_HINT_NORMAL); -bail_unlock: + res = hiddev->exist ? __hiddev_open(hiddev, file) : -ENODEV; mutex_unlock(&hiddev->existancelock); - spin_lock_irq(&list->hiddev->list_lock); - list_del(&list->node); - spin_unlock_irq(&list->hiddev->list_lock); -bail: - file->private_data = NULL; - vfree(list); return res; } @@ -967,9 +954,9 @@ void hiddev_disconnect(struct hid_device *hid) hiddev->exist = 0; if (hiddev->open) { - mutex_unlock(&hiddev->existancelock); hid_hw_close(hiddev->hid); wake_up_interruptible(&hiddev->wait); + mutex_unlock(&hiddev->existancelock); } else { mutex_unlock(&hiddev->existancelock); kfree(hiddev); diff --git a/drivers/hid/wacom.h b/drivers/hid/wacom.h index 3c37c3cbf6f1..9c0900c35b23 100644 --- a/drivers/hid/wacom.h +++ b/drivers/hid/wacom.h @@ -205,6 +205,21 @@ static inline void wacom_schedule_work(struct wacom_wac *wacom_wac, } } +/* + * Convert a signed 32-bit integer to an unsigned n-bit integer. Undoes + * the normally-helpful work of 'hid_snto32' for fields that use signed + * ranges for questionable reasons. + */ +static inline __u32 wacom_s32tou(s32 value, __u8 n) +{ + switch (n) { + case 8: return ((__u8)value); + case 16: return ((__u16)value); + case 32: return ((__u32)value); + } + return value & (1 << (n - 1)) ? value & (~(~0U << n)) : value; +} + extern const struct hid_device_id wacom_ids[]; void wacom_wac_irq(struct wacom_wac *wacom_wac, size_t len); diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 2e0c4df6ad08..1eb868403664 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2182,7 +2182,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field case HID_DG_TOOLSERIALNUMBER: if (value) { wacom_wac->serial[0] = (wacom_wac->serial[0] & ~0xFFFFFFFFULL); - wacom_wac->serial[0] |= (__u32)value; + wacom_wac->serial[0] |= wacom_s32tou(value, field->report_size); } return; case HID_DG_TWIST: @@ -2198,15 +2198,17 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field return; case WACOM_HID_WD_SERIALHI: if (value) { + __u32 raw_value = wacom_s32tou(value, field->report_size); + wacom_wac->serial[0] = (wacom_wac->serial[0] & 0xFFFFFFFF); - wacom_wac->serial[0] |= ((__u64)value) << 32; + wacom_wac->serial[0] |= ((__u64)raw_value) << 32; /* * Non-USI EMR devices may contain additional tool type * information here. See WACOM_HID_WD_TOOLTYPE case for * more details. */ if (value >> 20 == 1) { - wacom_wac->id[0] |= value & 0xFFFFF; + wacom_wac->id[0] |= raw_value & 0xFFFFF; } } return; @@ -2218,7 +2220,7 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field * bitwise OR so the complete value can be built * up over time :( */ - wacom_wac->id[0] |= value; + wacom_wac->id[0] |= wacom_s32tou(value, field->report_size); return; case WACOM_HID_WD_OFFSETLEFT: if (features->offset_left && value != features->offset_left) diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index fe041f22521d..23f312b4c6aa 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -148,6 +148,17 @@ static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu) int hv_synic_alloc(void) { int cpu; + struct hv_per_cpu_context *hv_cpu; + + /* + * First, zero all per-cpu memory areas so hv_synic_free() can + * detect what memory has been allocated and cleanup properly + * after any failures. + */ + for_each_present_cpu(cpu) { + hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); + memset(hv_cpu, 0, sizeof(*hv_cpu)); + } hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids, GFP_ATOMIC); @@ -157,10 +168,8 @@ int hv_synic_alloc(void) } for_each_present_cpu(cpu) { - struct hv_per_cpu_context *hv_cpu - = per_cpu_ptr(hv_context.cpu_context, cpu); + hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); - memset(hv_cpu, 0, sizeof(*hv_cpu)); tasklet_init(&hv_cpu->msg_dpc, vmbus_on_msg_dpc, (unsigned long) hv_cpu); diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 0824405f93fb..2d93c8f454bc 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1170,10 +1170,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, unsigned int i = 0; struct page *pg; - if (num_pages < alloc_unit) - return 0; - - for (i = 0; (i * alloc_unit) < num_pages; i++) { + for (i = 0; i < num_pages / alloc_unit; i++) { if (bl_resp->hdr.size + sizeof(union dm_mem_page_range) > PAGE_SIZE) return i * alloc_unit; @@ -1207,7 +1204,7 @@ static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm, } - return num_pages; + return i * alloc_unit; } static void balloon_up(struct work_struct *dummy) @@ -1222,9 +1219,6 @@ static void balloon_up(struct work_struct *dummy) long avail_pages; unsigned long floor; - /* The host balloons pages in 2M granularity. */ - WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0); - /* * We will attempt 2M allocations. However, if we fail to * allocate 2M chunks, we will go back to 4k allocations. @@ -1234,14 +1228,13 @@ static void balloon_up(struct work_struct *dummy) avail_pages = si_mem_available(); floor = compute_balloon_floor(); - /* Refuse to balloon below the floor, keep the 2M granularity. */ + /* Refuse to balloon below the floor. */ if (avail_pages < num_pages || avail_pages - num_pages < floor) { pr_warn("Balloon request will be partially fulfilled. %s\n", avail_pages < num_pages ? "Not enough memory." : "Balloon floor reached."); num_pages = avail_pages > floor ? (avail_pages - floor) : 0; - num_pages -= num_pages % PAGES_IN_2M; } while (!done) { diff --git a/drivers/hwmon/adt7462.c b/drivers/hwmon/adt7462.c index 19f2a6d48bac..bdd7679fd298 100644 --- a/drivers/hwmon/adt7462.c +++ b/drivers/hwmon/adt7462.c @@ -426,7 +426,7 @@ static int ADT7462_REG_VOLT(struct adt7462_data *data, int which) return 0x95; break; } - return -ENODEV; + return 0; } /* Provide labels for sysfs */ diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index 37db2eb66ed7..d7d1f2467100 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -297,9 +297,10 @@ static inline u16 volt2reg(int channel, long volt, u8 bypass_attn) long reg; if (bypass_attn & (1 << channel)) - reg = (volt * 1024) / 2250; + reg = DIV_ROUND_CLOSEST(volt * 1024, 2250); else - reg = (volt * r[1] * 1024) / ((r[0] + r[1]) * 2250); + reg = DIV_ROUND_CLOSEST(volt * r[1] * 1024, + (r[0] + r[1]) * 2250); return clamp_val(reg, 0, 1023) & (0xff << 2); } diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 7b53065e9882..652973d83a07 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -51,6 +51,7 @@ struct hwmon_device_attribute { #define to_hwmon_attr(d) \ container_of(d, struct hwmon_device_attribute, dev_attr) +#define to_dev_attr(a) container_of(a, struct device_attribute, attr) /* * Thermal zone information @@ -58,7 +59,7 @@ struct hwmon_device_attribute { * also provides the sensor index. */ struct hwmon_thermal_data { - struct hwmon_device *hwdev; /* Reference to hwmon device */ + struct device *dev; /* Reference to hwmon device */ int index; /* sensor index */ }; @@ -95,9 +96,27 @@ static const struct attribute_group *hwmon_dev_attr_groups[] = { NULL }; +static void hwmon_free_attrs(struct attribute **attrs) +{ + int i; + + for (i = 0; attrs[i]; i++) { + struct device_attribute *dattr = to_dev_attr(attrs[i]); + struct hwmon_device_attribute *hattr = to_hwmon_attr(dattr); + + kfree(hattr); + } + kfree(attrs); +} + static void hwmon_dev_release(struct device *dev) { - kfree(to_hwmon_device(dev)); + struct hwmon_device *hwdev = to_hwmon_device(dev); + + if (hwdev->group.attrs) + hwmon_free_attrs(hwdev->group.attrs); + kfree(hwdev->groups); + kfree(hwdev); } static struct class hwmon_class = { @@ -121,11 +140,11 @@ static DEFINE_IDA(hwmon_ida); static int hwmon_thermal_get_temp(void *data, int *temp) { struct hwmon_thermal_data *tdata = data; - struct hwmon_device *hwdev = tdata->hwdev; + struct hwmon_device *hwdev = to_hwmon_device(tdata->dev); int ret; long t; - ret = hwdev->chip->ops->read(&hwdev->dev, hwmon_temp, hwmon_temp_input, + ret = hwdev->chip->ops->read(tdata->dev, hwmon_temp, hwmon_temp_input, tdata->index, &t); if (ret < 0) return ret; @@ -139,26 +158,31 @@ static const struct thermal_zone_of_device_ops hwmon_thermal_ops = { .get_temp = hwmon_thermal_get_temp, }; -static int hwmon_thermal_add_sensor(struct device *dev, - struct hwmon_device *hwdev, int index) +static int hwmon_thermal_add_sensor(struct device *dev, int index) { struct hwmon_thermal_data *tdata; + struct thermal_zone_device *tzd; tdata = devm_kzalloc(dev, sizeof(*tdata), GFP_KERNEL); if (!tdata) return -ENOMEM; - tdata->hwdev = hwdev; + tdata->dev = dev; tdata->index = index; - devm_thermal_zone_of_sensor_register(&hwdev->dev, index, tdata, - &hwmon_thermal_ops); + tzd = devm_thermal_zone_of_sensor_register(dev, index, tdata, + &hwmon_thermal_ops); + /* + * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV, + * so ignore that error but forward any other error. + */ + if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV)) + return PTR_ERR(tzd); return 0; } #else -static int hwmon_thermal_add_sensor(struct device *dev, - struct hwmon_device *hwdev, int index) +static int hwmon_thermal_add_sensor(struct device *dev, int index) { return 0; } @@ -235,8 +259,7 @@ static bool is_string_attr(enum hwmon_sensor_types type, u32 attr) (type == hwmon_fan && attr == hwmon_fan_label); } -static struct attribute *hwmon_genattr(struct device *dev, - const void *drvdata, +static struct attribute *hwmon_genattr(const void *drvdata, enum hwmon_sensor_types type, u32 attr, int index, @@ -264,7 +287,7 @@ static struct attribute *hwmon_genattr(struct device *dev, if ((mode & S_IWUGO) && !ops->write) return ERR_PTR(-EINVAL); - hattr = devm_kzalloc(dev, sizeof(*hattr), GFP_KERNEL); + hattr = kzalloc(sizeof(*hattr), GFP_KERNEL); if (!hattr) return ERR_PTR(-ENOMEM); @@ -467,8 +490,7 @@ static int hwmon_num_channel_attrs(const struct hwmon_channel_info *info) return n; } -static int hwmon_genattrs(struct device *dev, - const void *drvdata, +static int hwmon_genattrs(const void *drvdata, struct attribute **attrs, const struct hwmon_ops *ops, const struct hwmon_channel_info *info) @@ -494,7 +516,7 @@ static int hwmon_genattrs(struct device *dev, attr_mask &= ~BIT(attr); if (attr >= template_size) return -EINVAL; - a = hwmon_genattr(dev, drvdata, info->type, attr, i, + a = hwmon_genattr(drvdata, info->type, attr, i, templates[attr], ops); if (IS_ERR(a)) { if (PTR_ERR(a) != -ENOENT) @@ -508,8 +530,7 @@ static int hwmon_genattrs(struct device *dev, } static struct attribute ** -__hwmon_create_attrs(struct device *dev, const void *drvdata, - const struct hwmon_chip_info *chip) +__hwmon_create_attrs(const void *drvdata, const struct hwmon_chip_info *chip) { int ret, i, aindex = 0, nattrs = 0; struct attribute **attrs; @@ -520,15 +541,17 @@ __hwmon_create_attrs(struct device *dev, const void *drvdata, if (nattrs == 0) return ERR_PTR(-EINVAL); - attrs = devm_kcalloc(dev, nattrs + 1, sizeof(*attrs), GFP_KERNEL); + attrs = kcalloc(nattrs + 1, sizeof(*attrs), GFP_KERNEL); if (!attrs) return ERR_PTR(-ENOMEM); for (i = 0; chip->info[i]; i++) { - ret = hwmon_genattrs(dev, drvdata, &attrs[aindex], chip->ops, + ret = hwmon_genattrs(drvdata, &attrs[aindex], chip->ops, chip->info[i]); - if (ret < 0) + if (ret < 0) { + hwmon_free_attrs(attrs); return ERR_PTR(ret); + } aindex += ret; } @@ -570,14 +593,13 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, for (i = 0; groups[i]; i++) ngroups++; - hwdev->groups = devm_kcalloc(dev, ngroups, sizeof(*groups), - GFP_KERNEL); + hwdev->groups = kcalloc(ngroups, sizeof(*groups), GFP_KERNEL); if (!hwdev->groups) { err = -ENOMEM; goto free_hwmon; } - attrs = __hwmon_create_attrs(dev, drvdata, chip); + attrs = __hwmon_create_attrs(drvdata, chip); if (IS_ERR(attrs)) { err = PTR_ERR(attrs); goto free_hwmon; @@ -621,8 +643,13 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, if (!chip->ops->is_visible(drvdata, hwmon_temp, hwmon_temp_input, j)) continue; - if (info[i]->config[j] & HWMON_T_INPUT) - hwmon_thermal_add_sensor(dev, hwdev, j); + if (info[i]->config[j] & HWMON_T_INPUT) { + err = hwmon_thermal_add_sensor(hdev, j); + if (err) { + device_unregister(hdev); + goto ida_remove; + } + } } } } @@ -630,7 +657,7 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, return hdev; free_hwmon: - kfree(hwdev); + hwmon_dev_release(hdev); ida_remove: ida_simple_remove(&hwmon_ida, id); return ERR_PTR(err); diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c index e6b49500c52a..8c9555313fc3 100644 --- a/drivers/hwmon/ina3221.c +++ b/drivers/hwmon/ina3221.c @@ -38,9 +38,9 @@ #define INA3221_WARN3 0x0c #define INA3221_MASK_ENABLE 0x0f -#define INA3221_CONFIG_MODE_SHUNT BIT(1) -#define INA3221_CONFIG_MODE_BUS BIT(2) -#define INA3221_CONFIG_MODE_CONTINUOUS BIT(3) +#define INA3221_CONFIG_MODE_SHUNT BIT(0) +#define INA3221_CONFIG_MODE_BUS BIT(1) +#define INA3221_CONFIG_MODE_CONTINUOUS BIT(2) #define INA3221_RSHUNT_DEFAULT 10000 diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index 005ffb5ffa92..1737bb5fbaaf 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -165,7 +165,7 @@ static int lm75_write(struct device *dev, enum hwmon_sensor_types type, temp = DIV_ROUND_CLOSEST(temp << (resolution - 8), 1000) << (16 - resolution); - return regmap_write(data->regmap, reg, temp); + return regmap_write(data->regmap, reg, (u16)temp); } static umode_t lm75_is_visible(const void *data, enum hwmon_sensor_types type, diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c index 38ffbdb0a85f..779ec8fdfae0 100644 --- a/drivers/hwmon/nct7802.c +++ b/drivers/hwmon/nct7802.c @@ -32,8 +32,8 @@ static const u8 REG_VOLTAGE[5] = { 0x09, 0x0a, 0x0c, 0x0d, 0x0e }; static const u8 REG_VOLTAGE_LIMIT_LSB[2][5] = { - { 0x40, 0x00, 0x42, 0x44, 0x46 }, - { 0x3f, 0x00, 0x41, 0x43, 0x45 }, + { 0x46, 0x00, 0x40, 0x42, 0x44 }, + { 0x45, 0x00, 0x3f, 0x41, 0x43 }, }; static const u8 REG_VOLTAGE_LIMIT_MSB[5] = { 0x48, 0x00, 0x47, 0x47, 0x48 }; diff --git a/drivers/hwmon/pmbus/ltc2978.c b/drivers/hwmon/pmbus/ltc2978.c index 58b789c28b48..94eea2ac6251 100644 --- a/drivers/hwmon/pmbus/ltc2978.c +++ b/drivers/hwmon/pmbus/ltc2978.c @@ -89,8 +89,8 @@ enum chips { ltc2974, ltc2975, ltc2977, ltc2978, ltc2980, ltc3880, ltc3882, #define LTC_POLL_TIMEOUT 100 /* in milli-seconds */ -#define LTC_NOT_BUSY BIT(5) -#define LTC_NOT_PENDING BIT(4) +#define LTC_NOT_BUSY BIT(6) +#define LTC_NOT_PENDING BIT(5) /* * LTC2978 clears peak data whenever the CLEAR_FAULTS command is executed, which diff --git a/drivers/hwmon/pmbus/tps53679.c b/drivers/hwmon/pmbus/tps53679.c index 85b515cd9df0..2bc352c5357f 100644 --- a/drivers/hwmon/pmbus/tps53679.c +++ b/drivers/hwmon/pmbus/tps53679.c @@ -80,7 +80,14 @@ static struct pmbus_driver_info tps53679_info = { static int tps53679_probe(struct i2c_client *client, const struct i2c_device_id *id) { - return pmbus_do_probe(client, id, &tps53679_info); + struct pmbus_driver_info *info; + + info = devm_kmemdup(&client->dev, &tps53679_info, sizeof(*info), + GFP_KERNEL); + if (!info) + return -ENOMEM; + + return pmbus_do_probe(client, id, info); } static const struct i2c_device_id tps53679_id[] = { diff --git a/drivers/hwmon/pwm-fan.c b/drivers/hwmon/pwm-fan.c index 6d30bec04f2d..f981da686d7e 100644 --- a/drivers/hwmon/pwm-fan.c +++ b/drivers/hwmon/pwm-fan.c @@ -221,8 +221,12 @@ static int pwm_fan_probe(struct platform_device *pdev) ctx->pwm = devm_of_pwm_get(&pdev->dev, pdev->dev.of_node, NULL); if (IS_ERR(ctx->pwm)) { - dev_err(&pdev->dev, "Could not get PWM\n"); - return PTR_ERR(ctx->pwm); + ret = PTR_ERR(ctx->pwm); + + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, "Could not get PWM: %d\n", ret); + + return ret; } platform_set_drvdata(pdev, ctx); diff --git a/drivers/hwmon/shtc1.c b/drivers/hwmon/shtc1.c index decd7df995ab..2a18539591ea 100644 --- a/drivers/hwmon/shtc1.c +++ b/drivers/hwmon/shtc1.c @@ -38,7 +38,7 @@ static const unsigned char shtc1_cmd_read_id_reg[] = { 0xef, 0xc8 }; /* constants for reading the ID register */ #define SHTC1_ID 0x07 -#define SHTC1_ID_REG_MASK 0x1f +#define SHTC1_ID_REG_MASK 0x3f /* delays for non-blocking i2c commands, both in us */ #define SHTC1_NONBLOCKING_WAIT_TIME_HPM 14400 diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c index 8ac89d0781cc..a575e1cdb81a 100644 --- a/drivers/hwmon/w83627hf.c +++ b/drivers/hwmon/w83627hf.c @@ -130,17 +130,23 @@ superio_select(struct w83627hf_sio_data *sio, int ld) outb(ld, sio->sioaddr + 1); } -static inline void +static inline int superio_enter(struct w83627hf_sio_data *sio) { + if (!request_muxed_region(sio->sioaddr, 2, DRVNAME)) + return -EBUSY; + outb(0x87, sio->sioaddr); outb(0x87, sio->sioaddr); + + return 0; } static inline void superio_exit(struct w83627hf_sio_data *sio) { outb(0xAA, sio->sioaddr); + release_region(sio->sioaddr, 2); } #define W627_DEVID 0x52 @@ -1278,7 +1284,7 @@ static DEVICE_ATTR_RO(name); static int __init w83627hf_find(int sioaddr, unsigned short *addr, struct w83627hf_sio_data *sio_data) { - int err = -ENODEV; + int err; u16 val; static __initconst char *const names[] = { @@ -1290,7 +1296,11 @@ static int __init w83627hf_find(int sioaddr, unsigned short *addr, }; sio_data->sioaddr = sioaddr; - superio_enter(sio_data); + err = superio_enter(sio_data); + if (err) + return err; + + err = -ENODEV; val = force_id ? force_id : superio_inb(sio_data, DEVID); switch (val) { case W627_DEVID: @@ -1644,9 +1654,21 @@ static int w83627thf_read_gpio5(struct platform_device *pdev) struct w83627hf_sio_data *sio_data = dev_get_platdata(&pdev->dev); int res = 0xff, sel; - superio_enter(sio_data); + if (superio_enter(sio_data)) { + /* + * Some other driver reserved the address space for itself. + * We don't want to fail driver instantiation because of that, + * so display a warning and keep going. + */ + dev_warn(&pdev->dev, + "Can not read VID data: Failed to enable SuperIO access\n"); + return res; + } + superio_select(sio_data, W83627HF_LD_GPIO5); + res = 0xff; + /* Make sure these GPIO pins are enabled */ if (!(superio_inb(sio_data, W83627THF_GPIO5_EN) & (1<<3))) { dev_dbg(&pdev->dev, "GPIO5 disabled, no VID function\n"); @@ -1677,7 +1699,17 @@ static int w83687thf_read_vid(struct platform_device *pdev) struct w83627hf_sio_data *sio_data = dev_get_platdata(&pdev->dev); int res = 0xff; - superio_enter(sio_data); + if (superio_enter(sio_data)) { + /* + * Some other driver reserved the address space for itself. + * We don't want to fail driver instantiation because of that, + * so display a warning and keep going. + */ + dev_warn(&pdev->dev, + "Can not read VID data: Failed to enable SuperIO access\n"); + return res; + } + superio_select(sio_data, W83627HF_LD_HWM); /* Make sure these GPIO pins are enabled */ diff --git a/drivers/hwtracing/coresight/Kconfig b/drivers/hwtracing/coresight/Kconfig index ef9cb3c164e1..44d1650f398e 100644 --- a/drivers/hwtracing/coresight/Kconfig +++ b/drivers/hwtracing/coresight/Kconfig @@ -31,6 +31,17 @@ config CORESIGHT_LINK_AND_SINK_TMC complies with the generic implementation of the component without special enhancement or added features. +config CORESIGHT_CATU + bool "Coresight Address Translation Unit (CATU) driver" + depends on CORESIGHT_LINK_AND_SINK_TMC + help + Enable support for the Coresight Address Translation Unit (CATU). + CATU supports a scatter gather table of 4K pages, with forward/backward + lookup. CATU helps TMC ETR to use a large physically non-contiguous trace + buffer by translating the addresses used by ETR to the physical address + by looking up the provided table. CATU can also be used in pass-through + mode where the address is not translated. + config CORESIGHT_SINK_TPIU bool "Coresight generic TPIU driver" depends on CORESIGHT_LINKS_AND_SINKS @@ -64,6 +75,7 @@ config CORESIGHT_SOURCE_ETM4X bool "CoreSight Embedded Trace Macrocell 4.x driver" depends on ARM64 select CORESIGHT_LINKS_AND_SINKS + select PID_IN_CONTEXTIDR help This driver provides support for the ETM4.x tracer module, tracing the instructions that a processor is executing. This is primarily useful diff --git a/drivers/hwtracing/coresight/Makefile b/drivers/hwtracing/coresight/Makefile index 61db9dd0d571..41870ded51a3 100644 --- a/drivers/hwtracing/coresight/Makefile +++ b/drivers/hwtracing/coresight/Makefile @@ -18,3 +18,4 @@ obj-$(CONFIG_CORESIGHT_SOURCE_ETM4X) += coresight-etm4x.o \ obj-$(CONFIG_CORESIGHT_DYNAMIC_REPLICATOR) += coresight-dynamic-replicator.o obj-$(CONFIG_CORESIGHT_STM) += coresight-stm.o obj-$(CONFIG_CORESIGHT_CPU_DEBUG) += coresight-cpu-debug.o +obj-$(CONFIG_CORESIGHT_CATU) += coresight-catu.o diff --git a/drivers/hwtracing/coresight/coresight-catu.c b/drivers/hwtracing/coresight/coresight-catu.c new file mode 100644 index 000000000000..ff94e58845b7 --- /dev/null +++ b/drivers/hwtracing/coresight/coresight-catu.c @@ -0,0 +1,577 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 Arm Limited. All rights reserved. + * + * Coresight Address Translation Unit support + * + * Author: Suzuki K Poulose + */ + +#include +#include +#include +#include +#include +#include + +#include "coresight-catu.h" +#include "coresight-priv.h" +#include "coresight-tmc.h" + +#define csdev_to_catu_drvdata(csdev) \ + dev_get_drvdata(csdev->dev.parent) + +/* Verbose output for CATU table contents */ +#ifdef CATU_DEBUG +#define catu_dbg(x, ...) dev_dbg(x, __VA_ARGS__) +#else +#define catu_dbg(x, ...) do {} while (0) +#endif + +struct catu_etr_buf { + struct tmc_sg_table *catu_table; + dma_addr_t sladdr; +}; + +/* + * CATU uses a page size of 4KB for page tables as well as data pages. + * Each 64bit entry in the table has the following format. + * + * 63 12 1 0 + * ------------------------------------ + * | Address [63-12] | SBZ | V| + * ------------------------------------ + * + * Where bit[0] V indicates if the address is valid or not. + * Each 4K table pages have upto 256 data page pointers, taking upto 2K + * size. There are two Link pointers, pointing to the previous and next + * table pages respectively at the end of the 4K page. (i.e, entry 510 + * and 511). + * E.g, a table of two pages could look like : + * + * Table Page 0 Table Page 1 + * SLADDR ===> x------------------x x--> x-----------------x + * INADDR ->| Page 0 | V | | | Page 256 | V | <- INADDR+1M + * |------------------| | |-----------------| + * INADDR+4K ->| Page 1 | V | | | | + * |------------------| | |-----------------| + * | Page 2 | V | | | | + * |------------------| | |-----------------| + * | ... | V | | | ... | + * |------------------| | |-----------------| + * INADDR+1020K| Page 255 | V | | | Page 511 | V | + * SLADDR+2K==>|------------------| | |-----------------| + * | UNUSED | | | | | + * |------------------| | | | + * | UNUSED | | | | | + * |------------------| | | | + * | ... | | | | | + * |------------------| | |-----------------| + * | IGNORED | 0 | | | Table Page 0| 1 | + * |------------------| | |-----------------| + * | Table Page 1| 1 |--x | IGNORED | 0 | + * x------------------x x-----------------x + * SLADDR+4K==> + * + * The base input address (used by the ETR, programmed in INADDR_{LO,HI}) + * must be aligned to 1MB (the size addressable by a single page table). + * The CATU maps INADDR{LO:HI} to the first page in the table pointed + * to by SLADDR{LO:HI} and so on. + * + */ +typedef u64 cate_t; + +#define CATU_PAGE_SHIFT 12 +#define CATU_PAGE_SIZE (1UL << CATU_PAGE_SHIFT) +#define CATU_PAGES_PER_SYSPAGE (PAGE_SIZE / CATU_PAGE_SIZE) + +/* Page pointers are only allocated in the first 2K half */ +#define CATU_PTRS_PER_PAGE ((CATU_PAGE_SIZE >> 1) / sizeof(cate_t)) +#define CATU_PTRS_PER_SYSPAGE (CATU_PAGES_PER_SYSPAGE * CATU_PTRS_PER_PAGE) +#define CATU_LINK_PREV ((CATU_PAGE_SIZE / sizeof(cate_t)) - 2) +#define CATU_LINK_NEXT ((CATU_PAGE_SIZE / sizeof(cate_t)) - 1) + +#define CATU_ADDR_SHIFT 12 +#define CATU_ADDR_MASK ~(((cate_t)1 << CATU_ADDR_SHIFT) - 1) +#define CATU_ENTRY_VALID ((cate_t)0x1) +#define CATU_VALID_ENTRY(addr) \ + (((cate_t)(addr) & CATU_ADDR_MASK) | CATU_ENTRY_VALID) +#define CATU_ENTRY_ADDR(entry) ((cate_t)(entry) & ~((cate_t)CATU_ENTRY_VALID)) + +/* CATU expects the INADDR to be aligned to 1M. */ +#define CATU_DEFAULT_INADDR (1ULL << 20) + +/* + * catu_get_table : Retrieve the table pointers for the given @offset + * within the buffer. The buffer is wrapped around to a valid offset. + * + * Returns : The CPU virtual address for the beginning of the table + * containing the data page pointer for @offset. If @daddrp is not NULL, + * @daddrp points the DMA address of the beginning of the table. + */ +static inline cate_t *catu_get_table(struct tmc_sg_table *catu_table, + unsigned long offset, + dma_addr_t *daddrp) +{ + unsigned long buf_size = tmc_sg_table_buf_size(catu_table); + unsigned int table_nr, pg_idx, pg_offset; + struct tmc_pages *table_pages = &catu_table->table_pages; + void *ptr; + + /* Make sure offset is within the range */ + offset %= buf_size; + + /* + * Each table can address 1MB and a single kernel page can + * contain "CATU_PAGES_PER_SYSPAGE" CATU tables. + */ + table_nr = offset >> 20; + /* Find the table page where the table_nr lies in */ + pg_idx = table_nr / CATU_PAGES_PER_SYSPAGE; + pg_offset = (table_nr % CATU_PAGES_PER_SYSPAGE) * CATU_PAGE_SIZE; + if (daddrp) + *daddrp = table_pages->daddrs[pg_idx] + pg_offset; + ptr = page_address(table_pages->pages[pg_idx]); + return (cate_t *)((unsigned long)ptr + pg_offset); +} + +#ifdef CATU_DEBUG +static void catu_dump_table(struct tmc_sg_table *catu_table) +{ + int i; + cate_t *table; + unsigned long table_end, buf_size, offset = 0; + + buf_size = tmc_sg_table_buf_size(catu_table); + dev_dbg(catu_table->dev, + "Dump table %p, tdaddr: %llx\n", + catu_table, catu_table->table_daddr); + + while (offset < buf_size) { + table_end = offset + SZ_1M < buf_size ? + offset + SZ_1M : buf_size; + table = catu_get_table(catu_table, offset, NULL); + for (i = 0; offset < table_end; i++, offset += CATU_PAGE_SIZE) + dev_dbg(catu_table->dev, "%d: %llx\n", i, table[i]); + dev_dbg(catu_table->dev, "Prev : %llx, Next: %llx\n", + table[CATU_LINK_PREV], table[CATU_LINK_NEXT]); + dev_dbg(catu_table->dev, "== End of sub-table ==="); + } + dev_dbg(catu_table->dev, "== End of Table ==="); +} + +#else +static inline void catu_dump_table(struct tmc_sg_table *catu_table) +{ +} +#endif + +static inline cate_t catu_make_entry(dma_addr_t addr) +{ + return addr ? CATU_VALID_ENTRY(addr) : 0; +} + +/* + * catu_populate_table : Populate the given CATU table. + * The table is always populated as a circular table. + * i.e, the "prev" link of the "first" table points to the "last" + * table and the "next" link of the "last" table points to the + * "first" table. The buffer should be made linear by calling + * catu_set_table(). + */ +static void +catu_populate_table(struct tmc_sg_table *catu_table) +{ + int i; + int sys_pidx; /* Index to current system data page */ + int catu_pidx; /* Index of CATU page within the system data page */ + unsigned long offset, buf_size, table_end; + dma_addr_t data_daddr; + dma_addr_t prev_taddr, next_taddr, cur_taddr; + cate_t *table_ptr, *next_table; + + buf_size = tmc_sg_table_buf_size(catu_table); + sys_pidx = catu_pidx = 0; + offset = 0; + + table_ptr = catu_get_table(catu_table, 0, &cur_taddr); + prev_taddr = 0; /* Prev link for the first table */ + + while (offset < buf_size) { + /* + * The @offset is always 1M aligned here and we have an + * empty table @table_ptr to fill. Each table can address + * upto 1MB data buffer. The last table may have fewer + * entries if the buffer size is not aligned. + */ + table_end = (offset + SZ_1M) < buf_size ? + (offset + SZ_1M) : buf_size; + for (i = 0; offset < table_end; + i++, offset += CATU_PAGE_SIZE) { + + data_daddr = catu_table->data_pages.daddrs[sys_pidx] + + catu_pidx * CATU_PAGE_SIZE; + catu_dbg(catu_table->dev, + "[table %5ld:%03d] 0x%llx\n", + (offset >> 20), i, data_daddr); + table_ptr[i] = catu_make_entry(data_daddr); + /* Move the pointers for data pages */ + catu_pidx = (catu_pidx + 1) % CATU_PAGES_PER_SYSPAGE; + if (catu_pidx == 0) + sys_pidx++; + } + + /* + * If we have finished all the valid entries, fill the rest of + * the table (i.e, last table page) with invalid entries, + * to fail the lookups. + */ + if (offset == buf_size) { + memset(&table_ptr[i], 0, + sizeof(cate_t) * (CATU_PTRS_PER_PAGE - i)); + next_taddr = 0; + } else { + next_table = catu_get_table(catu_table, + offset, &next_taddr); + } + + table_ptr[CATU_LINK_PREV] = catu_make_entry(prev_taddr); + table_ptr[CATU_LINK_NEXT] = catu_make_entry(next_taddr); + + catu_dbg(catu_table->dev, + "[table%5ld]: Cur: 0x%llx Prev: 0x%llx, Next: 0x%llx\n", + (offset >> 20) - 1, cur_taddr, prev_taddr, next_taddr); + + /* Update the prev/next addresses */ + if (next_taddr) { + prev_taddr = cur_taddr; + cur_taddr = next_taddr; + table_ptr = next_table; + } + } + + /* Sync the table for device */ + tmc_sg_table_sync_table(catu_table); +} + +static struct tmc_sg_table * +catu_init_sg_table(struct device *catu_dev, int node, + ssize_t size, void **pages) +{ + int nr_tpages; + struct tmc_sg_table *catu_table; + + /* + * Each table can address upto 1MB and we can have + * CATU_PAGES_PER_SYSPAGE tables in a system page. + */ + nr_tpages = DIV_ROUND_UP(size, SZ_1M) / CATU_PAGES_PER_SYSPAGE; + catu_table = tmc_alloc_sg_table(catu_dev, node, nr_tpages, + size >> PAGE_SHIFT, pages); + if (IS_ERR(catu_table)) + return catu_table; + + catu_populate_table(catu_table); + dev_dbg(catu_dev, + "Setup table %p, size %ldKB, %d table pages\n", + catu_table, (unsigned long)size >> 10, nr_tpages); + catu_dump_table(catu_table); + return catu_table; +} + +static void catu_free_etr_buf(struct etr_buf *etr_buf) +{ + struct catu_etr_buf *catu_buf; + + if (!etr_buf || etr_buf->mode != ETR_MODE_CATU || !etr_buf->private) + return; + + catu_buf = etr_buf->private; + tmc_free_sg_table(catu_buf->catu_table); + kfree(catu_buf); +} + +static ssize_t catu_get_data_etr_buf(struct etr_buf *etr_buf, u64 offset, + size_t len, char **bufpp) +{ + struct catu_etr_buf *catu_buf = etr_buf->private; + + return tmc_sg_table_get_data(catu_buf->catu_table, offset, len, bufpp); +} + +static void catu_sync_etr_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp) +{ + struct catu_etr_buf *catu_buf = etr_buf->private; + struct tmc_sg_table *catu_table = catu_buf->catu_table; + u64 r_offset, w_offset; + + /* + * ETR started off at etr_buf->hwaddr. Convert the RRP/RWP to + * offsets within the trace buffer. + */ + r_offset = rrp - etr_buf->hwaddr; + w_offset = rwp - etr_buf->hwaddr; + + if (!etr_buf->full) { + etr_buf->len = w_offset - r_offset; + if (w_offset < r_offset) + etr_buf->len += etr_buf->size; + } else { + etr_buf->len = etr_buf->size; + } + + etr_buf->offset = r_offset; + tmc_sg_table_sync_data_range(catu_table, r_offset, etr_buf->len); +} + +static int catu_alloc_etr_buf(struct tmc_drvdata *tmc_drvdata, + struct etr_buf *etr_buf, int node, void **pages) +{ + struct coresight_device *csdev; + struct device *catu_dev; + struct tmc_sg_table *catu_table; + struct catu_etr_buf *catu_buf; + + csdev = tmc_etr_get_catu_device(tmc_drvdata); + if (!csdev) + return -ENODEV; + catu_dev = csdev->dev.parent; + catu_buf = kzalloc(sizeof(*catu_buf), GFP_KERNEL); + if (!catu_buf) + return -ENOMEM; + + catu_table = catu_init_sg_table(catu_dev, node, etr_buf->size, pages); + if (IS_ERR(catu_table)) { + kfree(catu_buf); + return PTR_ERR(catu_table); + } + + etr_buf->mode = ETR_MODE_CATU; + etr_buf->private = catu_buf; + etr_buf->hwaddr = CATU_DEFAULT_INADDR; + + catu_buf->catu_table = catu_table; + /* Get the table base address */ + catu_buf->sladdr = catu_table->table_daddr; + + return 0; +} + +const struct etr_buf_operations etr_catu_buf_ops = { + .alloc = catu_alloc_etr_buf, + .free = catu_free_etr_buf, + .sync = catu_sync_etr_buf, + .get_data = catu_get_data_etr_buf, +}; + +coresight_simple_reg32(struct catu_drvdata, devid, CORESIGHT_DEVID); +coresight_simple_reg32(struct catu_drvdata, control, CATU_CONTROL); +coresight_simple_reg32(struct catu_drvdata, status, CATU_STATUS); +coresight_simple_reg32(struct catu_drvdata, mode, CATU_MODE); +coresight_simple_reg32(struct catu_drvdata, axictrl, CATU_AXICTRL); +coresight_simple_reg32(struct catu_drvdata, irqen, CATU_IRQEN); +coresight_simple_reg64(struct catu_drvdata, sladdr, + CATU_SLADDRLO, CATU_SLADDRHI); +coresight_simple_reg64(struct catu_drvdata, inaddr, + CATU_INADDRLO, CATU_INADDRHI); + +static struct attribute *catu_mgmt_attrs[] = { + &dev_attr_devid.attr, + &dev_attr_control.attr, + &dev_attr_status.attr, + &dev_attr_mode.attr, + &dev_attr_axictrl.attr, + &dev_attr_irqen.attr, + &dev_attr_sladdr.attr, + &dev_attr_inaddr.attr, + NULL, +}; + +static const struct attribute_group catu_mgmt_group = { + .attrs = catu_mgmt_attrs, + .name = "mgmt", +}; + +static const struct attribute_group *catu_groups[] = { + &catu_mgmt_group, + NULL, +}; + + +static inline int catu_wait_for_ready(struct catu_drvdata *drvdata) +{ + return coresight_timeout(drvdata->base, + CATU_STATUS, CATU_STATUS_READY, 1); +} + +static int catu_enable_hw(struct catu_drvdata *drvdata, void *data) +{ + u32 control, mode; + struct etr_buf *etr_buf = data; + + if (catu_wait_for_ready(drvdata)) + dev_warn(drvdata->dev, "Timeout while waiting for READY\n"); + + control = catu_read_control(drvdata); + if (control & BIT(CATU_CONTROL_ENABLE)) { + dev_warn(drvdata->dev, "CATU is already enabled\n"); + return -EBUSY; + } + + control |= BIT(CATU_CONTROL_ENABLE); + + if (etr_buf && etr_buf->mode == ETR_MODE_CATU) { + struct catu_etr_buf *catu_buf = etr_buf->private; + + mode = CATU_MODE_TRANSLATE; + catu_write_axictrl(drvdata, CATU_OS_AXICTRL); + catu_write_sladdr(drvdata, catu_buf->sladdr); + catu_write_inaddr(drvdata, CATU_DEFAULT_INADDR); + } else { + mode = CATU_MODE_PASS_THROUGH; + catu_write_sladdr(drvdata, 0); + catu_write_inaddr(drvdata, 0); + } + + catu_write_irqen(drvdata, 0); + catu_write_mode(drvdata, mode); + catu_write_control(drvdata, control); + dev_dbg(drvdata->dev, "Enabled in %s mode\n", + (mode == CATU_MODE_PASS_THROUGH) ? + "Pass through" : + "Translate"); + return 0; +} + +static int catu_enable(struct coresight_device *csdev, void *data) +{ + int rc; + struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev); + + CS_UNLOCK(catu_drvdata->base); + rc = catu_enable_hw(catu_drvdata, data); + CS_LOCK(catu_drvdata->base); + return rc; +} + +static int catu_disable_hw(struct catu_drvdata *drvdata) +{ + int rc = 0; + + catu_write_control(drvdata, 0); + if (catu_wait_for_ready(drvdata)) { + dev_info(drvdata->dev, "Timeout while waiting for READY\n"); + rc = -EAGAIN; + } + + dev_dbg(drvdata->dev, "Disabled\n"); + return rc; +} + +static int catu_disable(struct coresight_device *csdev, void *__unused) +{ + int rc; + struct catu_drvdata *catu_drvdata = csdev_to_catu_drvdata(csdev); + + CS_UNLOCK(catu_drvdata->base); + rc = catu_disable_hw(catu_drvdata); + CS_LOCK(catu_drvdata->base); + return rc; +} + +const struct coresight_ops_helper catu_helper_ops = { + .enable = catu_enable, + .disable = catu_disable, +}; + +const struct coresight_ops catu_ops = { + .helper_ops = &catu_helper_ops, +}; + +static int catu_probe(struct amba_device *adev, const struct amba_id *id) +{ + int ret = 0; + u32 dma_mask; + struct catu_drvdata *drvdata; + struct coresight_desc catu_desc; + struct coresight_platform_data *pdata = NULL; + struct device *dev = &adev->dev; + struct device_node *np = dev->of_node; + void __iomem *base; + + if (np) { + pdata = of_get_coresight_platform_data(dev, np); + if (IS_ERR(pdata)) { + ret = PTR_ERR(pdata); + goto out; + } + dev->platform_data = pdata; + } + + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) { + ret = -ENOMEM; + goto out; + } + + drvdata->dev = dev; + dev_set_drvdata(dev, drvdata); + base = devm_ioremap_resource(dev, &adev->res); + if (IS_ERR(base)) { + ret = PTR_ERR(base); + goto out; + } + + /* Setup dma mask for the device */ + dma_mask = readl_relaxed(base + CORESIGHT_DEVID) & 0x3f; + switch (dma_mask) { + case 32: + case 40: + case 44: + case 48: + case 52: + case 56: + case 64: + break; + default: + /* Default to the 40bits as supported by TMC-ETR */ + dma_mask = 40; + } + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_mask)); + if (ret) + goto out; + + drvdata->base = base; + catu_desc.pdata = pdata; + catu_desc.dev = dev; + catu_desc.groups = catu_groups; + catu_desc.type = CORESIGHT_DEV_TYPE_HELPER; + catu_desc.subtype.helper_subtype = CORESIGHT_DEV_SUBTYPE_HELPER_CATU; + catu_desc.ops = &catu_ops; + drvdata->csdev = coresight_register(&catu_desc); + if (IS_ERR(drvdata->csdev)) + ret = PTR_ERR(drvdata->csdev); +out: + pm_runtime_put(&adev->dev); + return ret; +} + +static struct amba_id catu_ids[] = { + { + .id = 0x000bb9ee, + .mask = 0x000fffff, + }, + {}, +}; + +static struct amba_driver catu_driver = { + .drv = { + .name = "coresight-catu", + .owner = THIS_MODULE, + .suppress_bind_attrs = true, + }, + .probe = catu_probe, + .id_table = catu_ids, +}; + +builtin_amba_driver(catu_driver); diff --git a/drivers/hwtracing/coresight/coresight-catu.h b/drivers/hwtracing/coresight/coresight-catu.h new file mode 100644 index 000000000000..1d2ad183fd92 --- /dev/null +++ b/drivers/hwtracing/coresight/coresight-catu.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Arm Limited. All rights reserved. + * + * Author: Suzuki K Poulose + */ + +#ifndef _CORESIGHT_CATU_H +#define _CORESIGHT_CATU_H + +#include "coresight-priv.h" + +/* Register offset from base */ +#define CATU_CONTROL 0x000 +#define CATU_MODE 0x004 +#define CATU_AXICTRL 0x008 +#define CATU_IRQEN 0x00c +#define CATU_SLADDRLO 0x020 +#define CATU_SLADDRHI 0x024 +#define CATU_INADDRLO 0x028 +#define CATU_INADDRHI 0x02c +#define CATU_STATUS 0x100 +#define CATU_DEVARCH 0xfbc + +#define CATU_CONTROL_ENABLE 0 + +#define CATU_MODE_PASS_THROUGH 0U +#define CATU_MODE_TRANSLATE 1U + +#define CATU_AXICTRL_ARCACHE_SHIFT 4 +#define CATU_AXICTRL_ARCACHE_MASK 0xf +#define CATU_AXICTRL_ARPROT_MASK 0x3 +#define CATU_AXICTRL_ARCACHE(arcache) \ + (((arcache) & CATU_AXICTRL_ARCACHE_MASK) << CATU_AXICTRL_ARCACHE_SHIFT) + +#define CATU_AXICTRL_VAL(arcache, arprot) \ + (CATU_AXICTRL_ARCACHE(arcache) | ((arprot) & CATU_AXICTRL_ARPROT_MASK)) + +#define AXI3_AxCACHE_WB_READ_ALLOC 0x7 +/* + * AXI - ARPROT bits: + * See AMBA AXI & ACE Protocol specification (ARM IHI 0022E) + * sectionA4.7 Access Permissions. + * + * Bit 0: 0 - Unprivileged access, 1 - Privileged access + * Bit 1: 0 - Secure access, 1 - Non-secure access. + * Bit 2: 0 - Data access, 1 - instruction access. + * + * CATU AXICTRL:ARPROT[2] is res0 as we always access data. + */ +#define CATU_OS_ARPROT 0x2 + +#define CATU_OS_AXICTRL \ + CATU_AXICTRL_VAL(AXI3_AxCACHE_WB_READ_ALLOC, CATU_OS_ARPROT) + +#define CATU_STATUS_READY 8 +#define CATU_STATUS_ADRERR 0 +#define CATU_STATUS_AXIERR 4 + +#define CATU_IRQEN_ON 0x1 +#define CATU_IRQEN_OFF 0x0 + +struct catu_drvdata { + struct device *dev; + void __iomem *base; + struct coresight_device *csdev; + int irq; +}; + +#define CATU_REG32(name, offset) \ +static inline u32 \ +catu_read_##name(struct catu_drvdata *drvdata) \ +{ \ + return coresight_read_reg_pair(drvdata->base, offset, -1); \ +} \ +static inline void \ +catu_write_##name(struct catu_drvdata *drvdata, u32 val) \ +{ \ + coresight_write_reg_pair(drvdata->base, val, offset, -1); \ +} + +#define CATU_REG_PAIR(name, lo_off, hi_off) \ +static inline u64 \ +catu_read_##name(struct catu_drvdata *drvdata) \ +{ \ + return coresight_read_reg_pair(drvdata->base, lo_off, hi_off); \ +} \ +static inline void \ +catu_write_##name(struct catu_drvdata *drvdata, u64 val) \ +{ \ + coresight_write_reg_pair(drvdata->base, val, lo_off, hi_off); \ +} + +CATU_REG32(control, CATU_CONTROL); +CATU_REG32(mode, CATU_MODE); +CATU_REG32(irqen, CATU_IRQEN); +CATU_REG32(axictrl, CATU_AXICTRL); +CATU_REG_PAIR(sladdr, CATU_SLADDRLO, CATU_SLADDRHI) +CATU_REG_PAIR(inaddr, CATU_INADDRLO, CATU_INADDRHI) + +static inline bool coresight_is_catu_device(struct coresight_device *csdev) +{ + if (!IS_ENABLED(CONFIG_CORESIGHT_CATU)) + return false; + if (csdev->type != CORESIGHT_DEV_TYPE_HELPER) + return false; + if (csdev->subtype.helper_subtype != CORESIGHT_DEV_SUBTYPE_HELPER_CATU) + return false; + return true; +} + +extern const struct etr_buf_operations etr_catu_buf_ops; + +#endif diff --git a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c index accc2056f7c6..6a3fd22d757c 100644 --- a/drivers/hwtracing/coresight/coresight-dynamic-replicator.c +++ b/drivers/hwtracing/coresight/coresight-dynamic-replicator.c @@ -34,56 +34,131 @@ * @dev: the device entity associated with this component * @atclk: optional clock for the core parts of the replicator. * @csdev: component vitals needed by the framework + * @spinlock: serialize enable/disable operations. */ struct replicator_state { void __iomem *base; struct device *dev; struct clk *atclk; struct coresight_device *csdev; + spinlock_t spinlock; }; -static int replicator_enable(struct coresight_device *csdev, int inport, - int outport) +/* + * replicator_reset : Reset the replicator configuration to sane values. + */ +static void replicator_reset(struct replicator_state *drvdata) { - struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent); - CS_UNLOCK(drvdata->base); - /* - * Ensure that the other port is disabled - * 0x00 - passing through the replicator unimpeded - * 0xff - disable (or impede) the flow of ATB data - */ - if (outport == 0) { - writel_relaxed(0x00, drvdata->base + REPLICATOR_IDFILTER0); - writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1); - } else { - writel_relaxed(0x00, drvdata->base + REPLICATOR_IDFILTER1); + if (!coresight_claim_device_unlocked(drvdata->base)) { writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0); + writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1); + coresight_disclaim_device_unlocked(drvdata->base); } CS_LOCK(drvdata->base); - - dev_info(drvdata->dev, "REPLICATOR enabled\n"); - return 0; } -static void replicator_disable(struct coresight_device *csdev, int inport, - int outport) +static int dynamic_replicator_enable(struct replicator_state *drvdata, + int inport, int outport) { + int rc = 0; + u32 reg; + + switch (outport) { + case 0: + reg = REPLICATOR_IDFILTER0; + break; + case 1: + reg = REPLICATOR_IDFILTER1; + break; + default: + WARN_ON(1); + return -EINVAL; + } + + CS_UNLOCK(drvdata->base); + + if ((readl_relaxed(drvdata->base + REPLICATOR_IDFILTER0) == 0xff) && + (readl_relaxed(drvdata->base + REPLICATOR_IDFILTER1) == 0xff)) + rc = coresight_claim_device_unlocked(drvdata->base); + + /* Ensure that the outport is enabled. */ + if (!rc) + writel_relaxed(0x00, drvdata->base + reg); + CS_LOCK(drvdata->base); + + return rc; +} + +static int replicator_enable(struct coresight_device *csdev, int inport, + int outport) +{ + int rc = 0; struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent); + unsigned long flags; + bool first_enable = false; + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (atomic_read(&csdev->refcnt[outport]) == 0) { + rc = dynamic_replicator_enable(drvdata, inport, outport); + if (!rc) + first_enable = true; + } + if (!rc) + atomic_inc(&csdev->refcnt[outport]); + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + if (first_enable) + dev_dbg(&csdev->dev, "REPLICATOR enabled\n"); + return rc; +} + +static void dynamic_replicator_disable(struct replicator_state *drvdata, + int inport, int outport) +{ + u32 reg; + + switch (outport) { + case 0: + reg = REPLICATOR_IDFILTER0; + break; + case 1: + reg = REPLICATOR_IDFILTER1; + break; + default: + WARN_ON(1); + return; + } CS_UNLOCK(drvdata->base); /* disable the flow of ATB data through port */ - if (outport == 0) - writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0); - else - writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1); + writel_relaxed(0xff, drvdata->base + reg); + if ((readl_relaxed(drvdata->base + REPLICATOR_IDFILTER0) == 0xff) && + (readl_relaxed(drvdata->base + REPLICATOR_IDFILTER1) == 0xff)) + coresight_disclaim_device_unlocked(drvdata->base); CS_LOCK(drvdata->base); +} - dev_info(drvdata->dev, "REPLICATOR disabled\n"); +static void replicator_disable(struct coresight_device *csdev, int inport, + int outport) +{ + struct replicator_state *drvdata = dev_get_drvdata(csdev->dev.parent); + unsigned long flags; + bool last_disable = false; + + spin_lock_irqsave(&drvdata->spinlock, flags); + if (atomic_dec_return(&csdev->refcnt[outport]) == 0) { + dynamic_replicator_disable(drvdata, inport, outport); + last_disable = true; + } + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + if (last_disable) + dev_dbg(drvdata->dev, "REPLICATOR disabled\n"); } static const struct coresight_ops_link replicator_link_ops = { @@ -156,6 +231,7 @@ static int replicator_probe(struct amba_device *adev, const struct amba_id *id) dev_set_drvdata(dev, drvdata); pm_runtime_put(&adev->dev); + spin_lock_init(&drvdata->spinlock); desc.type = CORESIGHT_DEV_TYPE_LINK; desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_SPLIT; desc.ops = &replicator_cs_ops; @@ -163,10 +239,12 @@ static int replicator_probe(struct amba_device *adev, const struct amba_id *id) desc.dev = &adev->dev; desc.groups = replicator_groups; drvdata->csdev = coresight_register(&desc); - if (IS_ERR(drvdata->csdev)) - return PTR_ERR(drvdata->csdev); - return 0; + if (!IS_ERR(drvdata->csdev)) { + replicator_reset(drvdata); + return 0; + } + return PTR_ERR(drvdata->csdev); } #ifdef CONFIG_PM diff --git a/drivers/hwtracing/coresight/coresight-etb10.c b/drivers/hwtracing/coresight/coresight-etb10.c index d14a9cb7959a..ad1109639956 100644 --- a/drivers/hwtracing/coresight/coresight-etb10.c +++ b/drivers/hwtracing/coresight/coresight-etb10.c @@ -12,7 +12,7 @@ * GNU General Public License for more details. */ -#include +#include #include #include #include @@ -36,6 +36,7 @@ #include #include "coresight-priv.h" +#include "coresight-etm-perf.h" #define ETB_RAM_DEPTH_REG 0x004 #define ETB_STATUS_REG 0x00c @@ -79,8 +80,8 @@ * @miscdev: specifics to handle "/dev/xyz.etb" entry. * @spinlock: only one at a time pls. * @reading: synchronise user space access to etb buffer. - * @mode: this ETB is being used. * @buf: area of memory where ETB buffer content gets sent. + * @mode: this ETB is being used. * @buffer_depth: size of @buf. * @trigger_cntr: amount of words to store after a trigger. */ @@ -92,12 +93,15 @@ struct etb_drvdata { struct miscdevice miscdev; spinlock_t spinlock; local_t reading; - local_t mode; u8 *buf; + u32 mode; u32 buffer_depth; u32 trigger_cntr; }; +static int etb_set_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle); + static unsigned int etb_get_buffer_depth(struct etb_drvdata *drvdata) { u32 depth = 0; @@ -111,7 +115,7 @@ static unsigned int etb_get_buffer_depth(struct etb_drvdata *drvdata) return depth; } -static void etb_enable_hw(struct etb_drvdata *drvdata) +static void __etb_enable_hw(struct etb_drvdata *drvdata) { int i; u32 depth; @@ -139,40 +143,104 @@ static void etb_enable_hw(struct etb_drvdata *drvdata) CS_LOCK(drvdata->base); } -static int etb_enable(struct coresight_device *csdev, u32 mode) +static int etb_enable_hw(struct etb_drvdata *drvdata) { - u32 val; - unsigned long flags; - struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + int rc = coresight_claim_device(drvdata->base); - val = local_cmpxchg(&drvdata->mode, - CS_MODE_DISABLED, mode); - /* - * When accessing from Perf, a HW buffer can be handled - * by a single trace entity. In sysFS mode many tracers - * can be logging to the same HW buffer. - */ - if (val == CS_MODE_PERF) - return -EBUSY; + if (rc) + return rc; - /* Don't let perf disturb sysFS sessions */ - if (val == CS_MODE_SYSFS && mode == CS_MODE_PERF) - return -EBUSY; - - /* Nothing to do, the tracer is already enabled. */ - if (val == CS_MODE_SYSFS) - goto out; - - spin_lock_irqsave(&drvdata->spinlock, flags); - etb_enable_hw(drvdata); - spin_unlock_irqrestore(&drvdata->spinlock, flags); - -out: - dev_info(drvdata->dev, "ETB enabled\n"); + __etb_enable_hw(drvdata); return 0; } -static void etb_disable_hw(struct etb_drvdata *drvdata) +static int etb_enable_sysfs(struct coresight_device *csdev) +{ + int ret = 0; + unsigned long flags; + struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + spin_lock_irqsave(&drvdata->spinlock, flags); + + /* Don't messup with perf sessions. */ + if (drvdata->mode == CS_MODE_PERF) { + ret = -EBUSY; + goto out; + } + + if (drvdata->mode == CS_MODE_DISABLED) { + ret = etb_enable_hw(drvdata); + if (ret) + goto out; + + drvdata->mode = CS_MODE_SYSFS; + } + + atomic_inc(csdev->refcnt); +out: + spin_unlock_irqrestore(&drvdata->spinlock, flags); + return ret; +} + +static int etb_enable_perf(struct coresight_device *csdev, void *data) +{ + int ret = 0; + unsigned long flags; + struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + spin_lock_irqsave(&drvdata->spinlock, flags); + + /* No need to continue if the component is already in use. */ + if (drvdata->mode != CS_MODE_DISABLED) { + ret = -EBUSY; + goto out; + } + + /* + * We don't have an internal state to clean up if we fail to setup + * the perf buffer. So we can perform the step before we turn the + * ETB on and leave without cleaning up. + */ + ret = etb_set_buffer(csdev, (struct perf_output_handle *)data); + if (ret) + goto out; + + ret = etb_enable_hw(drvdata); + if (!ret) { + drvdata->mode = CS_MODE_PERF; + atomic_inc(csdev->refcnt); + } + +out: + spin_unlock_irqrestore(&drvdata->spinlock, flags); + return ret; +} + +static int etb_enable(struct coresight_device *csdev, u32 mode, void *data) +{ + int ret; + struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + switch (mode) { + case CS_MODE_SYSFS: + ret = etb_enable_sysfs(csdev); + break; + case CS_MODE_PERF: + ret = etb_enable_perf(csdev, data); + break; + default: + ret = -EINVAL; + break; + } + + if (ret) + return ret; + + dev_dbg(drvdata->dev, "ETB enabled\n"); + return 0; +} + +static void __etb_disable_hw(struct etb_drvdata *drvdata) { u32 ffcr; @@ -207,7 +275,6 @@ static void etb_dump_hw(struct etb_drvdata *drvdata) bool lost = false; int i; u8 *buf_ptr; - const u32 *barrier; u32 read_data, depth; u32 read_ptr, write_ptr; u32 frame_off, frame_endoff; @@ -238,19 +305,16 @@ static void etb_dump_hw(struct etb_drvdata *drvdata) depth = drvdata->buffer_depth; buf_ptr = drvdata->buf; - barrier = barrier_pkt; for (i = 0; i < depth; i++) { read_data = readl_relaxed(drvdata->base + ETB_RAM_READ_DATA_REG); - if (lost && *barrier) { - read_data = *barrier; - barrier++; - } - *(u32 *)buf_ptr = read_data; buf_ptr += 4; } + if (lost) + coresight_insert_barrier_packet(drvdata->buf); + if (frame_off) { buf_ptr -= (frame_endoff * 4); for (i = 0; i < frame_endoff; i++) { @@ -266,30 +330,43 @@ static void etb_dump_hw(struct etb_drvdata *drvdata) CS_LOCK(drvdata->base); } -static void etb_disable(struct coresight_device *csdev) +static void etb_disable_hw(struct etb_drvdata *drvdata) +{ + __etb_disable_hw(drvdata); + etb_dump_hw(drvdata); + coresight_disclaim_device(drvdata->base); +} + +static int etb_disable(struct coresight_device *csdev) { struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); unsigned long flags; spin_lock_irqsave(&drvdata->spinlock, flags); + + if (atomic_dec_return(csdev->refcnt)) { + spin_unlock_irqrestore(&drvdata->spinlock, flags); + return -EBUSY; + } + + /* Complain if we (somehow) got out of sync */ + WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED); etb_disable_hw(drvdata); - etb_dump_hw(drvdata); + drvdata->mode = CS_MODE_DISABLED; spin_unlock_irqrestore(&drvdata->spinlock, flags); - local_set(&drvdata->mode, CS_MODE_DISABLED); - - dev_info(drvdata->dev, "ETB disabled\n"); + dev_dbg(drvdata->dev, "ETB disabled\n"); + return 0; } -static void *etb_alloc_buffer(struct coresight_device *csdev, int cpu, - void **pages, int nr_pages, bool overwrite) +static void *etb_alloc_buffer(struct coresight_device *csdev, + struct perf_event *event, void **pages, + int nr_pages, bool overwrite) { - int node; + int node, cpu = event->cpu; struct cs_buffers *buf; - if (cpu == -1) - cpu = smp_processor_id(); - node = cpu_to_node(cpu); + node = (cpu == -1) ? NUMA_NO_NODE : cpu_to_node(cpu); buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node); if (!buf) @@ -310,12 +387,14 @@ static void etb_free_buffer(void *config) } static int etb_set_buffer(struct coresight_device *csdev, - struct perf_output_handle *handle, - void *sink_config) + struct perf_output_handle *handle) { int ret = 0; unsigned long head; - struct cs_buffers *buf = sink_config; + struct cs_buffers *buf = etm_perf_sink_config(handle); + + if (!buf) + return -EINVAL; /* wrap head around to the amount of space we have */ head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); @@ -331,37 +410,7 @@ static int etb_set_buffer(struct coresight_device *csdev, return ret; } -static unsigned long etb_reset_buffer(struct coresight_device *csdev, - struct perf_output_handle *handle, - void *sink_config) -{ - unsigned long size = 0; - struct cs_buffers *buf = sink_config; - - if (buf) { - /* - * In snapshot mode ->data_size holds the new address of the - * ring buffer's head. The size itself is the whole address - * range since we want the latest information. - */ - if (buf->snapshot) - handle->head = local_xchg(&buf->data_size, - buf->nr_pages << PAGE_SHIFT); - - /* - * Tell the tracer PMU how much we got in this run and if - * something went wrong along the way. Nobody else can use - * this cs_buffers instance until we are done. As such - * resetting parameters here and squaring off with the ring - * buffer API in the tracer PMU is fine. - */ - size = local_xchg(&buf->data_size, 0); - } - - return size; -} - -static void etb_update_buffer(struct coresight_device *csdev, +static unsigned long etb_update_buffer(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config) { @@ -370,17 +419,18 @@ static void etb_update_buffer(struct coresight_device *csdev, u8 *buf_ptr; const u32 *barrier; u32 read_ptr, write_ptr, capacity; - u32 status, read_data, to_read; - unsigned long offset; + u32 status, read_data; + unsigned long offset, to_read, flags; struct cs_buffers *buf = sink_config; struct etb_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); if (!buf) - return; + return 0; capacity = drvdata->buffer_depth * ETB_FRAME_SIZE_WORDS; - etb_disable_hw(drvdata); + spin_lock_irqsave(&drvdata->spinlock, flags); + __etb_disable_hw(drvdata); CS_UNLOCK(drvdata->base); /* unit is in words, not bytes */ @@ -445,7 +495,13 @@ static void etb_update_buffer(struct coresight_device *csdev, lost = true; } - if (lost) + /* + * Don't set the TRUNCATED flag in snapshot mode because 1) the + * captured buffer is expected to be truncated and 2) a full buffer + * prevents the event from being re-enabled by the perf core, + * resulting in stale data being send to user space. + */ + if (!buf->snapshot && lost) perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); /* finally tell HW where we want to start reading from */ @@ -459,7 +515,7 @@ static void etb_update_buffer(struct coresight_device *csdev, buf_ptr = buf->data_pages[cur] + offset; read_data = readl_relaxed(drvdata->base + ETB_RAM_READ_DATA_REG); - if (lost && *barrier) { + if (lost && i < CORESIGHT_BARRIER_PKT_SIZE) { read_data = *barrier; barrier++; } @@ -481,18 +537,18 @@ static void etb_update_buffer(struct coresight_device *csdev, writel_relaxed(0x0, drvdata->base + ETB_RAM_WRITE_POINTER); /* - * In snapshot mode all we have to do is communicate to - * perf_aux_output_end() the address of the current head. In full - * trace mode the same function expects a size to move rb->aux_head - * forward. + * In snapshot mode we have to update the handle->head to point + * to the new location. */ - if (buf->snapshot) - local_set(&buf->data_size, (cur * PAGE_SIZE) + offset); - else - local_add(to_read, &buf->data_size); - - etb_enable_hw(drvdata); + if (buf->snapshot) { + handle->head = (cur * PAGE_SIZE) + offset; + to_read = buf->nr_pages << PAGE_SHIFT; + } + __etb_enable_hw(drvdata); CS_LOCK(drvdata->base); + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + return to_read; } static const struct coresight_ops_sink etb_sink_ops = { @@ -500,8 +556,6 @@ static const struct coresight_ops_sink etb_sink_ops = { .disable = etb_disable, .alloc_buffer = etb_alloc_buffer, .free_buffer = etb_free_buffer, - .set_buffer = etb_set_buffer, - .reset_buffer = etb_reset_buffer, .update_buffer = etb_update_buffer, }; @@ -514,14 +568,14 @@ static void etb_dump(struct etb_drvdata *drvdata) unsigned long flags; spin_lock_irqsave(&drvdata->spinlock, flags); - if (local_read(&drvdata->mode) == CS_MODE_SYSFS) { - etb_disable_hw(drvdata); + if (drvdata->mode == CS_MODE_SYSFS) { + __etb_disable_hw(drvdata); etb_dump_hw(drvdata); - etb_enable_hw(drvdata); + __etb_enable_hw(drvdata); } spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_info(drvdata->dev, "ETB dumped\n"); + dev_dbg(drvdata->dev, "ETB dumped\n"); } static int etb_open(struct inode *inode, struct file *file) diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c index 8a0ad77574e7..5cc053d7ed05 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.c +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c @@ -23,7 +23,9 @@ #include #include #include +#include #include +#include #include #include @@ -33,32 +35,23 @@ static struct pmu etm_pmu; static bool etm_perf_up; -/** - * struct etm_event_data - Coresight specifics associated to an event - * @work: Handle to free allocated memory outside IRQ context. - * @mask: Hold the CPU(s) this event was set for. - * @snk_config: The sink configuration. - * @path: An array of path, each slot for one CPU. - */ -struct etm_event_data { - struct work_struct work; - cpumask_t mask; - void *snk_config; - struct list_head **path; -}; - static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle); static DEFINE_PER_CPU(struct coresight_device *, csdev_src); /* ETMv3.5/PTM's ETMCR is 'config' */ PMU_FORMAT_ATTR(cycacc, "config:" __stringify(ETM_OPT_CYCACC)); +PMU_FORMAT_ATTR(contextid, "config:" __stringify(ETM_OPT_CTXTID)); PMU_FORMAT_ATTR(timestamp, "config:" __stringify(ETM_OPT_TS)); PMU_FORMAT_ATTR(retstack, "config:" __stringify(ETM_OPT_RETSTK)); +/* Sink ID - same for all ETMs */ +PMU_FORMAT_ATTR(sinkid, "config2:0-31"); static struct attribute *etm_config_formats_attr[] = { &format_attr_cycacc.attr, + &format_attr_contextid.attr, &format_attr_timestamp.attr, &format_attr_retstack.attr, + &format_attr_sinkid.attr, NULL, }; @@ -67,11 +60,33 @@ static const struct attribute_group etm_pmu_format_group = { .attrs = etm_config_formats_attr, }; -static const struct attribute_group *etm_pmu_attr_groups[] = { - &etm_pmu_format_group, +static struct attribute *etm_config_sinks_attr[] = { NULL, }; +static const struct attribute_group etm_pmu_sinks_group = { + .name = "sinks", + .attrs = etm_config_sinks_attr, +}; + +static const struct attribute_group *etm_pmu_attr_groups[] = { + &etm_pmu_format_group, + &etm_pmu_sinks_group, + NULL, +}; + +static inline struct list_head ** +etm_event_cpu_path_ptr(struct etm_event_data *data, int cpu) +{ + return per_cpu_ptr(data->path, cpu); +} + +static inline struct list_head * +etm_event_cpu_path(struct etm_event_data *data, int cpu) +{ + return *etm_event_cpu_path_ptr(data, cpu); +} + static void etm_event_read(struct perf_event *event) {} static int etm_addr_filters_alloc(struct perf_event *event) @@ -116,38 +131,50 @@ out: return ret; } +static void free_sink_buffer(struct etm_event_data *event_data) +{ + int cpu; + cpumask_t *mask = &event_data->mask; + struct coresight_device *sink; + + if (WARN_ON(cpumask_empty(mask))) + return; + + if (!event_data->snk_config) + return; + + cpu = cpumask_first(mask); + sink = coresight_get_sink(etm_event_cpu_path(event_data, cpu)); + sink_ops(sink)->free_buffer(event_data->snk_config); +} + static void free_event_data(struct work_struct *work) { int cpu; cpumask_t *mask; struct etm_event_data *event_data; - struct coresight_device *sink; event_data = container_of(work, struct etm_event_data, work); mask = &event_data->mask; - /* - * First deal with the sink configuration. See comment in - * etm_setup_aux() about why we take the first available path. - */ - if (event_data->snk_config) { - cpu = cpumask_first(mask); - sink = coresight_get_sink(event_data->path[cpu]); - if (sink_ops(sink)->free_buffer) - sink_ops(sink)->free_buffer(event_data->snk_config); - } + + /* Free the sink buffers, if there are any */ + free_sink_buffer(event_data); for_each_cpu(cpu, mask) { - if (!(IS_ERR_OR_NULL(event_data->path[cpu]))) - coresight_release_path(event_data->path[cpu]); + struct list_head **ppath; + + ppath = etm_event_cpu_path_ptr(event_data, cpu); + if (!(IS_ERR_OR_NULL(*ppath))) + coresight_release_path(*ppath); + *ppath = NULL; } - kfree(event_data->path); + free_percpu(event_data->path); kfree(event_data); } static void *alloc_event_data(int cpu) { - int size; cpumask_t *mask; struct etm_event_data *event_data; @@ -156,16 +183,12 @@ static void *alloc_event_data(int cpu) if (!event_data) return NULL; - /* Make sure nothing disappears under us */ - get_online_cpus(); - size = num_online_cpus(); mask = &event_data->mask; if (cpu != -1) cpumask_set_cpu(cpu, mask); else - cpumask_copy(mask, cpu_online_mask); - put_online_cpus(); + cpumask_copy(mask, cpu_present_mask); /* * Each CPU has a single path between source and destination. As such @@ -175,8 +198,8 @@ static void *alloc_event_data(int cpu) * unused memory when dealing with single CPU trace scenarios is small * compared to the cost of searching through an optimized array. */ - event_data->path = kcalloc(size, - sizeof(struct list_head *), GFP_KERNEL); + event_data->path = alloc_percpu(struct list_head *); + if (!event_data->path) { kfree(event_data); return NULL; @@ -192,61 +215,80 @@ static void etm_free_aux(void *data) schedule_work(&event_data->work); } -static void *etm_setup_aux(int event_cpu, void **pages, +static void *etm_setup_aux(struct perf_event *event, void **pages, int nr_pages, bool overwrite) { - int cpu; + u32 id; + int cpu = event->cpu; cpumask_t *mask; struct coresight_device *sink; struct etm_event_data *event_data = NULL; - event_data = alloc_event_data(event_cpu); + event_data = alloc_event_data(cpu); if (!event_data) return NULL; INIT_WORK(&event_data->work, free_event_data); - /* - * In theory nothing prevent tracers in a trace session from being - * associated with different sinks, nor having a sink per tracer. But - * until we have HW with this kind of topology we need to assume tracers - * in a trace session are using the same sink. Therefore go through - * the coresight bus and pick the first enabled sink. - * - * When operated from sysFS users are responsible to enable the sink - * while from perf, the perf tools will do it based on the choice made - * on the cmd line. As such the "enable_sink" flag in sysFS is reset. - */ - sink = coresight_get_enabled_sink(true); + /* First get the selected sink from user space. */ + if (event->attr.config2) { + id = (u32)event->attr.config2; + sink = coresight_get_sink_by_id(id); + } else { + sink = coresight_get_enabled_sink(true); + } + if (!sink) goto err; mask = &event_data->mask; - /* Setup the path for each CPU in a trace session */ + /* + * Setup the path for each CPU in a trace session. We try to build + * trace path for each CPU in the mask. If we don't find an ETM + * for the CPU or fail to build a path, we clear the CPU from the + * mask and continue with the rest. If ever we try to trace on those + * CPUs, we can handle it and fail the session. + */ for_each_cpu(cpu, mask) { + struct list_head *path; struct coresight_device *csdev; csdev = per_cpu(csdev_src, cpu); - if (!csdev) - goto err; + /* + * If there is no ETM associated with this CPU clear it from + * the mask and continue with the rest. If ever we try to trace + * on this CPU, we handle it accordingly. + */ + if (!csdev) { + cpumask_clear_cpu(cpu, mask); + continue; + } /* * Building a path doesn't enable it, it simply builds a * list of devices from source to sink that can be * referenced later when the path is actually needed. */ - event_data->path[cpu] = coresight_build_path(csdev, sink); - if (IS_ERR(event_data->path[cpu])) - goto err; + path = coresight_build_path(csdev, sink); + if (IS_ERR(path)) { + cpumask_clear_cpu(cpu, mask); + continue; + } + + *etm_event_cpu_path_ptr(event_data, cpu) = path; } - if (!sink_ops(sink)->alloc_buffer) + /* If we don't have any CPUs ready for tracing, abort */ + cpu = cpumask_first(mask); + if (cpu >= nr_cpu_ids) goto err; - cpu = cpumask_first(mask); - /* Get the AUX specific data from the sink buffer */ + if (!sink_ops(sink)->alloc_buffer || !sink_ops(sink)->free_buffer) + goto err; + + /* Allocate the sink buffer for this session */ event_data->snk_config = - sink_ops(sink)->alloc_buffer(sink, cpu, pages, + sink_ops(sink)->alloc_buffer(sink, event, pages, nr_pages, overwrite); if (!event_data->snk_config) goto err; @@ -266,6 +308,7 @@ static void etm_event_start(struct perf_event *event, int flags) struct etm_event_data *event_data; struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); + struct list_head *path; if (!csdev) goto fail; @@ -278,18 +321,14 @@ static void etm_event_start(struct perf_event *event, int flags) if (!event_data) goto fail; + path = etm_event_cpu_path(event_data, cpu); /* We need a sink, no need to continue without one */ - sink = coresight_get_sink(event_data->path[cpu]); - if (WARN_ON_ONCE(!sink || !sink_ops(sink)->set_buffer)) - goto fail_end_stop; - - /* Configure the sink */ - if (sink_ops(sink)->set_buffer(sink, handle, - event_data->snk_config)) + sink = coresight_get_sink(path); + if (WARN_ON_ONCE(!sink)) goto fail_end_stop; /* Nothing will happen without a path */ - if (coresight_enable_path(event_data->path[cpu], CS_MODE_PERF)) + if (coresight_enable_path(path, CS_MODE_PERF, handle)) goto fail_end_stop; /* Tell the perf core the event is alive */ @@ -297,11 +336,13 @@ static void etm_event_start(struct perf_event *event, int flags) /* Finally enable the tracer */ if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF)) - goto fail_end_stop; + goto fail_disable_path; out: return; +fail_disable_path: + coresight_disable_path(path); fail_end_stop: perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); perf_aux_output_end(handle, 0); @@ -317,6 +358,7 @@ static void etm_event_stop(struct perf_event *event, int mode) struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu); struct perf_output_handle *handle = this_cpu_ptr(&ctx_handle); struct etm_event_data *event_data = perf_get_aux(handle); + struct list_head *path; if (event->hw.state == PERF_HES_STOPPED) return; @@ -324,7 +366,11 @@ static void etm_event_stop(struct perf_event *event, int mode) if (!csdev) return; - sink = coresight_get_sink(event_data->path[cpu]); + path = etm_event_cpu_path(event_data, cpu); + if (!path) + return; + + sink = coresight_get_sink(path); if (!sink) return; @@ -342,20 +388,13 @@ static void etm_event_stop(struct perf_event *event, int mode) if (!sink_ops(sink)->update_buffer) return; - sink_ops(sink)->update_buffer(sink, handle, + size = sink_ops(sink)->update_buffer(sink, handle, event_data->snk_config); - - if (!sink_ops(sink)->reset_buffer) - return; - - size = sink_ops(sink)->reset_buffer(sink, handle, - event_data->snk_config); - perf_aux_output_end(handle, size); } /* Disabling the path make its elements available to other sessions */ - coresight_disable_path(event_data->path[cpu]); + coresight_disable_path(path); } static int etm_event_add(struct perf_event *event, int mode) @@ -430,15 +469,16 @@ static int etm_addr_filters_validate(struct list_head *filters) static void etm_addr_filters_sync(struct perf_event *event) { struct perf_addr_filters_head *head = perf_event_addr_filters(event); - unsigned long start, stop, *offs = event->addr_filters_offs; + unsigned long start, stop; + struct perf_addr_filter_range *fr = event->addr_filter_ranges; struct etm_filters *filters = event->hw.addr_filters; struct etm_filter *etm_filter; struct perf_addr_filter *filter; int i = 0; list_for_each_entry(filter, &head->list, entry) { - start = filter->offset + offs[i]; - stop = start + filter->size; + start = fr[i].start; + stop = start + fr[i].size; etm_filter = &filters->etm_filter[i]; if (filter->range == 1) { @@ -485,11 +525,84 @@ int etm_perf_symlink(struct coresight_device *csdev, bool link) return 0; } +static ssize_t etm_perf_sink_name_show(struct device *dev, + struct device_attribute *dattr, + char *buf) +{ + struct dev_ext_attribute *ea; + + ea = container_of(dattr, struct dev_ext_attribute, attr); + return scnprintf(buf, PAGE_SIZE, "0x%lx\n", (unsigned long)(ea->var)); +} + +int etm_perf_add_symlink_sink(struct coresight_device *csdev) +{ + int ret; + unsigned long hash; + const char *name; + struct device *pmu_dev = etm_pmu.dev; + struct device *dev = &csdev->dev; + struct dev_ext_attribute *ea; + + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) + return -EINVAL; + + if (csdev->ea != NULL) + return -EINVAL; + + if (!etm_perf_up) + return -EPROBE_DEFER; + + ea = devm_kzalloc(dev, sizeof(*ea), GFP_KERNEL); + if (!ea) + return -ENOMEM; + + name = dev_name(dev); + /* See function coresight_get_sink_by_id() to know where this is used */ + hash = hashlen_hash(hashlen_string(NULL, name)); + + sysfs_attr_init(&ea->attr.attr); + ea->attr.attr.name = devm_kstrdup(dev, name, GFP_KERNEL); + if (!ea->attr.attr.name) + return -ENOMEM; + + ea->attr.attr.mode = 0444; + ea->attr.show = etm_perf_sink_name_show; + ea->var = (unsigned long *)hash; + + ret = sysfs_add_file_to_group(&pmu_dev->kobj, + &ea->attr.attr, "sinks"); + + if (!ret) + csdev->ea = ea; + + return ret; +} + +void etm_perf_del_symlink_sink(struct coresight_device *csdev) +{ + struct device *pmu_dev = etm_pmu.dev; + struct dev_ext_attribute *ea = csdev->ea; + + if (csdev->type != CORESIGHT_DEV_TYPE_SINK && + csdev->type != CORESIGHT_DEV_TYPE_LINKSINK) + return; + + if (!ea) + return; + + sysfs_remove_file_from_group(&pmu_dev->kobj, + &ea->attr.attr, "sinks"); + csdev->ea = NULL; +} + static int __init etm_perf_init(void) { int ret; - etm_pmu.capabilities = PERF_PMU_CAP_EXCLUSIVE; + etm_pmu.capabilities = (PERF_PMU_CAP_EXCLUSIVE | + PERF_PMU_CAP_ITRACE); etm_pmu.attr_groups = etm_pmu_attr_groups; etm_pmu.task_ctx_nr = perf_sw_context; diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h index 3ffc9feb2d64..56ae1e68c0eb 100644 --- a/drivers/hwtracing/coresight/coresight-etm-perf.h +++ b/drivers/hwtracing/coresight/coresight-etm-perf.h @@ -18,6 +18,7 @@ #ifndef _CORESIGHT_ETM_PERF_H #define _CORESIGHT_ETM_PERF_H +#include #include "coresight-priv.h" struct coresight_device; @@ -53,13 +54,42 @@ struct etm_filters { bool ssstatus; }; +/** + * struct etm_event_data - Coresight specifics associated to an event + * @work: Handle to free allocated memory outside IRQ context. + * @mask: Hold the CPU(s) this event was set for. + * @snk_config: The sink configuration. + * @path: An array of path, each slot for one CPU. + */ +struct etm_event_data { + struct work_struct work; + cpumask_t mask; + void *snk_config; + struct list_head * __percpu *path; +}; #ifdef CONFIG_CORESIGHT int etm_perf_symlink(struct coresight_device *csdev, bool link); +int etm_perf_add_symlink_sink(struct coresight_device *csdev); +void etm_perf_del_symlink_sink(struct coresight_device *csdev); +static inline void *etm_perf_sink_config(struct perf_output_handle *handle) +{ + struct etm_event_data *data = perf_get_aux(handle); + if (data) + return data->snk_config; + return NULL; +} #else static inline int etm_perf_symlink(struct coresight_device *csdev, bool link) { return -EINVAL; } +int etm_perf_add_symlink_sink(struct coresight_device *csdev) +{ return -EINVAL; } +void etm_perf_del_symlink_sink(struct coresight_device *csdev) {} +static inline void *etm_perf_sink_config(struct perf_output_handle *handle) +{ + return NULL; +} #endif /* CONFIG_CORESIGHT */ diff --git a/drivers/hwtracing/coresight/coresight-etm3x.c b/drivers/hwtracing/coresight/coresight-etm3x.c index e5b1ec57dbde..80a2b32da813 100644 --- a/drivers/hwtracing/coresight/coresight-etm3x.c +++ b/drivers/hwtracing/coresight/coresight-etm3x.c @@ -364,11 +364,10 @@ static int etm_parse_event_config(struct etm_drvdata *drvdata, return 0; } -static void etm_enable_hw(void *info) +static int etm_enable_hw(struct etm_drvdata *drvdata) { - int i; + int i, rc; u32 etmcr; - struct etm_drvdata *drvdata = info; struct etm_config *config = &drvdata->config; CS_UNLOCK(drvdata->base); @@ -379,6 +378,9 @@ static void etm_enable_hw(void *info) etm_set_pwrup(drvdata); /* Make sure all registers are accessible */ etm_os_unlock(drvdata); + rc = coresight_claim_device_unlocked(drvdata->base); + if (rc) + goto done; etm_set_prog(drvdata); @@ -427,9 +429,29 @@ static void etm_enable_hw(void *info) etm_writel(drvdata, 0x0, ETMVMIDCVR); etm_clr_prog(drvdata); + +done: + if (rc) + etm_set_pwrdwn(drvdata); CS_LOCK(drvdata->base); - dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu); + dev_dbg(drvdata->dev, "cpu: %d enable smp call done: %d\n", + drvdata->cpu, rc); + return rc; +} + +struct etm_enable_arg { + struct etm_drvdata *drvdata; + int rc; +}; + +static void etm_enable_hw_smp_call(void *info) +{ + struct etm_enable_arg *arg = info; + + if (WARN_ON(!arg)) + return; + arg->rc = etm_enable_hw(arg->drvdata); } static int etm_cpu_id(struct coresight_device *csdev) @@ -484,14 +506,13 @@ static int etm_enable_perf(struct coresight_device *csdev, /* Configure the tracer based on the session's specifics */ etm_parse_event_config(drvdata, event); /* And enable it */ - etm_enable_hw(drvdata); - - return 0; + return etm_enable_hw(drvdata); } static int etm_enable_sysfs(struct coresight_device *csdev) { struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + struct etm_enable_arg arg = { 0 }; int ret; spin_lock(&drvdata->spinlock); @@ -501,20 +522,21 @@ static int etm_enable_sysfs(struct coresight_device *csdev) * hw configuration will take place on the local CPU during bring up. */ if (cpu_online(drvdata->cpu)) { + arg.drvdata = drvdata; ret = smp_call_function_single(drvdata->cpu, - etm_enable_hw, drvdata, 1); - if (ret) - goto err; + etm_enable_hw_smp_call, &arg, 1); + if (!ret) + ret = arg.rc; + if (!ret) + drvdata->sticky_enable = true; + } else { + ret = -ENODEV; } - drvdata->sticky_enable = true; spin_unlock(&drvdata->spinlock); - dev_info(drvdata->dev, "ETM tracing enabled\n"); - return 0; - -err: - spin_unlock(&drvdata->spinlock); + if (!ret) + dev_dbg(drvdata->dev, "ETM tracing enabled\n"); return ret; } @@ -564,6 +586,8 @@ static void etm_disable_hw(void *info) for (i = 0; i < drvdata->nr_cntr; i++) config->cntr_val[i] = etm_readl(drvdata, ETMCNTVRn(i)); + coresight_disclaim_device_unlocked(drvdata->base); + etm_set_pwrdwn(drvdata); CS_LOCK(drvdata->base); @@ -613,7 +637,7 @@ static void etm_disable_sysfs(struct coresight_device *csdev) spin_unlock(&drvdata->spinlock); cpus_read_unlock(); - dev_info(drvdata->dev, "ETM tracing disabled\n"); + dev_dbg(drvdata->dev, "ETM tracing disabled\n"); } static void etm_disable(struct coresight_device *csdev, diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c index 4e6eab53e34e..2f021c1a2fa6 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c @@ -667,10 +667,13 @@ static ssize_t cyc_threshold_store(struct device *dev, if (kstrtoul(buf, 16, &val)) return -EINVAL; + + /* mask off max threshold before checking min value */ + val &= ETM_CYC_THRESHOLD_MASK; if (val < drvdata->ccitmin) return -EINVAL; - config->ccctlr = val & ETM_CYC_THRESHOLD_MASK; + config->ccctlr = val; return size; } static DEVICE_ATTR_RW(cyc_threshold); @@ -701,14 +704,16 @@ static ssize_t bb_ctrl_store(struct device *dev, return -EINVAL; if (!drvdata->nr_addr_cmp) return -EINVAL; + /* - * Bit[7:0] selects which address range comparator is used for - * branch broadcast control. + * Bit[8] controls include(1) / exclude(0), bits[0-7] select + * individual range comparators. If include then at least 1 + * range must be selected. */ - if (BMVAL(val, 0, 7) > drvdata->nr_addr_cmp) + if ((val & BIT(8)) && (BMVAL(val, 0, 7) == 0)) return -EINVAL; - config->bb_ctrl = val; + config->bb_ctrl = val & GENMASK(8, 0); return size; } static DEVICE_ATTR_RW(bb_ctrl); @@ -1341,8 +1346,8 @@ static ssize_t seq_event_store(struct device *dev, spin_lock(&drvdata->spinlock); idx = config->seq_idx; - /* RST, bits[7:0] */ - config->seq_ctrl[idx] = val & 0xFF; + /* Seq control has two masks B[15:8] F[7:0] */ + config->seq_ctrl[idx] = val & 0xFFFF; spin_unlock(&drvdata->spinlock); return size; } @@ -1597,7 +1602,7 @@ static ssize_t res_ctrl_store(struct device *dev, if (idx % 2 != 0) /* PAIRINV, bit[21] */ val &= ~BIT(21); - config->res_ctrl[idx] = val; + config->res_ctrl[idx] = val & GENMASK(21, 0); spin_unlock(&drvdata->spinlock); return size; } diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c index b0141ba7b741..9e550558ba6d 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.c +++ b/drivers/hwtracing/coresight/coresight-etm4x.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -33,8 +34,10 @@ #include #include #include +#include #include #include +#include #include "coresight-etm4x.h" #include "coresight-etm-perf.h" @@ -42,6 +45,15 @@ static int boot_enable; module_param_named(boot_enable, boot_enable, int, S_IRUGO); +#define PARAM_PM_SAVE_FIRMWARE 0 /* save self-hosted state as per firmware */ +#define PARAM_PM_SAVE_NEVER 1 /* never save any state */ +#define PARAM_PM_SAVE_SELF_HOSTED 2 /* save self-hosted state only */ + +static int pm_save_enable = PARAM_PM_SAVE_FIRMWARE; +module_param(pm_save_enable, int, 0444); +MODULE_PARM_DESC(pm_save_enable, + "Save/restore state on power down: 1 = never, 2 = self-hosted"); + /* The number of ETMv4 currently registered */ static int etm4_count; static struct etmv4_drvdata *etmdrvdata[NR_CPUS]; @@ -59,6 +71,14 @@ static void etm4_os_unlock(struct etmv4_drvdata *drvdata) isb(); } +static void etm4_os_lock(struct etmv4_drvdata *drvdata) +{ + /* Writing 0x1 to TRCOSLAR locks the trace registers */ + writel_relaxed(0x1, drvdata->base + TRCOSLAR); + drvdata->os_unlock = false; + isb(); +} + static bool etm4_arch_supported(u8 arch) { /* Mask out the minor version number */ @@ -85,16 +105,24 @@ static int etm4_trace_id(struct coresight_device *csdev) return drvdata->trcid; } -static void etm4_enable_hw(void *info) +struct etm4_enable_arg { + struct etmv4_drvdata *drvdata; + int rc; +}; + +static int etm4_enable_hw(struct etmv4_drvdata *drvdata) { - int i; - struct etmv4_drvdata *drvdata = info; + int i, rc; struct etmv4_config *config = &drvdata->config; CS_UNLOCK(drvdata->base); etm4_os_unlock(drvdata); + rc = coresight_claim_device_unlocked(drvdata->base); + if (rc) + goto done; + /* Disable the trace unit before programming trace registers */ writel_relaxed(0, drvdata->base + TRCPRGCTLR); @@ -136,8 +164,11 @@ static void etm4_enable_hw(void *info) drvdata->base + TRCCNTVRn(i)); } - /* Resource selector pair 0 is always implemented and reserved */ - for (i = 0; i < drvdata->nr_resource * 2; i++) + /* + * Resource selector pair 0 is always implemented and reserved. As + * such start at 2. + */ + for (i = 2; i < drvdata->nr_resource * 2; i++) writel_relaxed(config->res_ctrl[i], drvdata->base + TRCRSCTLRn(i)); @@ -188,9 +219,106 @@ static void etm4_enable_hw(void *info) dsb(sy); isb(); +done: CS_LOCK(drvdata->base); - dev_dbg(drvdata->dev, "cpu: %d enable smp call done\n", drvdata->cpu); + dev_dbg(drvdata->dev, "cpu: %d enable smp call done: %d\n", + drvdata->cpu, rc); + return rc; +} + +static void etm4_enable_hw_smp_call(void *info) +{ + struct etm4_enable_arg *arg = info; + + if (WARN_ON(!arg)) + return; + arg->rc = etm4_enable_hw(arg->drvdata); +} + +/* + * The goal of function etm4_config_timestamp_event() is to configure a + * counter that will tell the tracer to emit a timestamp packet when it + * reaches zero. This is done in order to get a more fine grained idea + * of when instructions are executed so that they can be correlated + * with execution on other CPUs. + * + * To do this the counter itself is configured to self reload and + * TRCRSCTLR1 (always true) used to get the counter to decrement. From + * there a resource selector is configured with the counter and the + * timestamp control register to use the resource selector to trigger the + * event that will insert a timestamp packet in the stream. + */ +static int etm4_config_timestamp_event(struct etmv4_drvdata *drvdata) +{ + int ctridx, ret = -EINVAL; + int counter, rselector; + u32 val = 0; + struct etmv4_config *config = &drvdata->config; + + /* No point in trying if we don't have at least one counter */ + if (!drvdata->nr_cntr) + goto out; + + /* Find a counter that hasn't been initialised */ + for (ctridx = 0; ctridx < drvdata->nr_cntr; ctridx++) + if (config->cntr_val[ctridx] == 0) + break; + + /* All the counters have been configured already, bail out */ + if (ctridx == drvdata->nr_cntr) { + pr_debug("%s: no available counter found\n", __func__); + ret = -ENOSPC; + goto out; + } + + /* + * Searching for an available resource selector to use, starting at + * '2' since every implementation has at least 2 resource selector. + * ETMIDR4 gives the number of resource selector _pairs_, + * hence multiply by 2. + */ + for (rselector = 2; rselector < drvdata->nr_resource * 2; rselector++) + if (!config->res_ctrl[rselector]) + break; + + if (rselector == drvdata->nr_resource * 2) { + pr_debug("%s: no available resource selector found\n", + __func__); + ret = -ENOSPC; + goto out; + } + + /* Remember what counter we used */ + counter = 1 << ctridx; + + /* + * Initialise original and reload counter value to the smallest + * possible value in order to get as much precision as we can. + */ + config->cntr_val[ctridx] = 1; + config->cntrldvr[ctridx] = 1; + + /* Set the trace counter control register */ + val = 0x1 << 16 | /* Bit 16, reload counter automatically */ + 0x0 << 7 | /* Select single resource selector */ + 0x1; /* Resource selector 1, i.e always true */ + + config->cntr_ctrl[ctridx] = val; + + val = 0x2 << 16 | /* Group 0b0010 - Counter and sequencers */ + counter << 0; /* Counter to use */ + + config->res_ctrl[rselector] = val; + + val = 0x0 << 7 | /* Select single resource selector */ + rselector; /* Resource selector */ + + config->ts_ctrl = val; + + ret = 0; +out: + return ret; } static int etm4_parse_event_config(struct etmv4_drvdata *drvdata, @@ -228,9 +356,29 @@ static int etm4_parse_event_config(struct etmv4_drvdata *drvdata, /* TRM: Must program this for cycacc to work */ config->ccctlr = ETM_CYC_THRESHOLD_DEFAULT; } - if (attr->config & BIT(ETM_OPT_TS)) + if (attr->config & BIT(ETM_OPT_TS)) { + /* + * Configure timestamps to be emitted at regular intervals in + * order to correlate instructions executed on different CPUs + * (CPU-wide trace scenarios). + */ + ret = etm4_config_timestamp_event(drvdata); + + /* + * No need to go further if timestamp intervals can't + * be configured. + */ + if (ret) + goto out; + /* bit[11], Global timestamp tracing bit */ config->cfg |= BIT(11); + } + + if (attr->config & BIT(ETM_OPT_CTXTID)) + /* bit[6], Context ID tracing bit */ + config->cfg |= BIT(ETM4_CFG_BIT_CTXTID); + /* return stack - enable if selected and supported */ if ((attr->config & BIT(ETM_OPT_RETSTK)) && drvdata->retstack) /* bit[12], Return stack enable bit */ @@ -256,7 +404,7 @@ static int etm4_enable_perf(struct coresight_device *csdev, if (ret) goto out; /* And enable it */ - etm4_enable_hw(drvdata); + ret = etm4_enable_hw(drvdata); out: return ret; @@ -265,6 +413,7 @@ out: static int etm4_enable_sysfs(struct coresight_device *csdev) { struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + struct etm4_enable_arg arg = { 0 }; int ret; spin_lock(&drvdata->spinlock); @@ -273,19 +422,17 @@ static int etm4_enable_sysfs(struct coresight_device *csdev) * Executing etm4_enable_hw on the cpu whose ETM is being enabled * ensures that register writes occur when cpu is powered. */ + arg.drvdata = drvdata; ret = smp_call_function_single(drvdata->cpu, - etm4_enable_hw, drvdata, 1); - if (ret) - goto err; - - drvdata->sticky_enable = true; + etm4_enable_hw_smp_call, &arg, 1); + if (!ret) + ret = arg.rc; + if (!ret) + drvdata->sticky_enable = true; spin_unlock(&drvdata->spinlock); - dev_info(drvdata->dev, "ETM tracing enabled\n"); - return 0; - -err: - spin_unlock(&drvdata->spinlock); + if (!ret) + dev_dbg(drvdata->dev, "ETM tracing enabled\n"); return ret; } @@ -346,6 +493,8 @@ static void etm4_disable_hw(void *info) isb(); writel_relaxed(control, drvdata->base + TRCPRGCTLR); + coresight_disclaim_device_unlocked(drvdata->base); + CS_LOCK(drvdata->base); dev_dbg(drvdata->dev, "cpu: %d disable smp call done\n", drvdata->cpu); @@ -398,7 +547,7 @@ static void etm4_disable_sysfs(struct coresight_device *csdev) spin_unlock(&drvdata->spinlock); cpus_read_unlock(); - dev_info(drvdata->dev, "ETM tracing disabled\n"); + dev_dbg(drvdata->dev, "ETM tracing disabled\n"); } static void etm4_disable(struct coresight_device *csdev, @@ -623,7 +772,7 @@ static void etm4_set_default_config(struct etmv4_config *config) config->vinst_ctrl |= BIT(0); } -static u64 etm4_get_access_type(struct etmv4_config *config) +static u64 etm4_get_ns_access_type(struct etmv4_config *config) { u64 access_type = 0; @@ -634,17 +783,26 @@ static u64 etm4_get_access_type(struct etmv4_config *config) * Bit[13] Exception level 1 - OS * Bit[14] Exception level 2 - Hypervisor * Bit[15] Never implemented - * - * Always stay away from hypervisor mode. */ - access_type = ETM_EXLEVEL_NS_HYP; - - if (config->mode & ETM_MODE_EXCL_KERN) - access_type |= ETM_EXLEVEL_NS_OS; + if (!is_kernel_in_hyp_mode()) { + /* Stay away from hypervisor mode for non-VHE */ + access_type = ETM_EXLEVEL_NS_HYP; + if (config->mode & ETM_MODE_EXCL_KERN) + access_type |= ETM_EXLEVEL_NS_OS; + } else if (config->mode & ETM_MODE_EXCL_KERN) { + access_type = ETM_EXLEVEL_NS_HYP; + } if (config->mode & ETM_MODE_EXCL_USER) access_type |= ETM_EXLEVEL_NS_APP; + return access_type; +} + +static u64 etm4_get_access_type(struct etmv4_config *config) +{ + u64 access_type = etm4_get_ns_access_type(config); + /* * EXLEVEL_S, bits[11:8], don't trace anything happening * in secure state. @@ -898,20 +1056,10 @@ void etm4_config_trace_mode(struct etmv4_config *config) addr_acc = config->addr_acc[ETM_DEFAULT_ADDR_COMP]; /* clear default config */ - addr_acc &= ~(ETM_EXLEVEL_NS_APP | ETM_EXLEVEL_NS_OS); + addr_acc &= ~(ETM_EXLEVEL_NS_APP | ETM_EXLEVEL_NS_OS | + ETM_EXLEVEL_NS_HYP); - /* - * EXLEVEL_NS, bits[15:12] - * The Exception levels are: - * Bit[12] Exception level 0 - Application - * Bit[13] Exception level 1 - OS - * Bit[14] Exception level 2 - Hypervisor - * Bit[15] Never implemented - */ - if (mode & ETM_MODE_EXCL_KERN) - addr_acc |= ETM_EXLEVEL_NS_OS; - else - addr_acc |= ETM_EXLEVEL_NS_APP; + addr_acc |= etm4_get_ns_access_type(config); config->addr_acc[ETM_DEFAULT_ADDR_COMP] = addr_acc; config->addr_acc[ETM_DEFAULT_ADDR_COMP + 1] = addr_acc; @@ -961,6 +1109,288 @@ static void etm4_init_trace_id(struct etmv4_drvdata *drvdata) drvdata->trcid = coresight_get_trace_id(drvdata->cpu); } +#ifdef CONFIG_CPU_PM +static int etm4_cpu_save(struct etmv4_drvdata *drvdata) +{ + int i, ret = 0; + struct etmv4_save_state *state; + struct device *etm_dev = &drvdata->csdev->dev; + + /* + * As recommended by 3.4.1 ("The procedure when powering down the PE") + * of ARM IHI 0064D + */ + dsb(sy); + isb(); + + CS_UNLOCK(drvdata->base); + + /* Lock the OS lock to disable trace and external debugger access */ + etm4_os_lock(drvdata); + + /* wait for TRCSTATR.PMSTABLE to go up */ + if (coresight_timeout(drvdata->base, TRCSTATR, + TRCSTATR_PMSTABLE_BIT, 1)) { + dev_err(etm_dev, + "timeout while waiting for PM Stable Status\n"); + etm4_os_unlock(drvdata); + ret = -EBUSY; + goto out; + } + + state = drvdata->save_state; + + state->trcprgctlr = readl(drvdata->base + TRCPRGCTLR); + state->trcprocselr = readl(drvdata->base + TRCPROCSELR); + state->trcconfigr = readl(drvdata->base + TRCCONFIGR); + state->trcauxctlr = readl(drvdata->base + TRCAUXCTLR); + state->trceventctl0r = readl(drvdata->base + TRCEVENTCTL0R); + state->trceventctl1r = readl(drvdata->base + TRCEVENTCTL1R); + state->trcstallctlr = readl(drvdata->base + TRCSTALLCTLR); + state->trctsctlr = readl(drvdata->base + TRCTSCTLR); + state->trcsyncpr = readl(drvdata->base + TRCSYNCPR); + state->trcccctlr = readl(drvdata->base + TRCCCCTLR); + state->trcbbctlr = readl(drvdata->base + TRCBBCTLR); + state->trctraceidr = readl(drvdata->base + TRCTRACEIDR); + state->trcqctlr = readl(drvdata->base + TRCQCTLR); + + state->trcvictlr = readl(drvdata->base + TRCVICTLR); + state->trcviiectlr = readl(drvdata->base + TRCVIIECTLR); + state->trcvissctlr = readl(drvdata->base + TRCVISSCTLR); + state->trcvipcssctlr = readl(drvdata->base + TRCVIPCSSCTLR); + state->trcvdctlr = readl(drvdata->base + TRCVDCTLR); + state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR); + state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR); + + for (i = 0; i < drvdata->nrseqstate; i++) + state->trcseqevr[i] = readl(drvdata->base + TRCSEQEVRn(i)); + + state->trcseqrstevr = readl(drvdata->base + TRCSEQRSTEVR); + state->trcseqstr = readl(drvdata->base + TRCSEQSTR); + state->trcextinselr = readl(drvdata->base + TRCEXTINSELR); + + for (i = 0; i < drvdata->nr_cntr; i++) { + state->trccntrldvr[i] = readl(drvdata->base + TRCCNTRLDVRn(i)); + state->trccntctlr[i] = readl(drvdata->base + TRCCNTCTLRn(i)); + state->trccntvr[i] = readl(drvdata->base + TRCCNTVRn(i)); + } + + for (i = 0; i < drvdata->nr_resource * 2; i++) + state->trcrsctlr[i] = readl(drvdata->base + TRCRSCTLRn(i)); + + for (i = 0; i < drvdata->nr_ss_cmp; i++) { + state->trcssccr[i] = readl(drvdata->base + TRCSSCCRn(i)); + state->trcsscsr[i] = readl(drvdata->base + TRCSSCSRn(i)); + state->trcsspcicr[i] = readl(drvdata->base + TRCSSPCICRn(i)); + } + + for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) { + state->trcacvr[i] = readl(drvdata->base + TRCACVRn(i)); + state->trcacatr[i] = readl(drvdata->base + TRCACATRn(i)); + } + + /* + * Data trace stream is architecturally prohibited for A profile cores + * so we don't save (or later restore) trcdvcvr and trcdvcmr - As per + * section 1.3.4 ("Possible functional configurations of an ETMv4 trace + * unit") of ARM IHI 0064D. + */ + + for (i = 0; i < drvdata->numcidc; i++) + state->trccidcvr[i] = readl(drvdata->base + TRCCIDCVRn(i)); + + for (i = 0; i < drvdata->numvmidc; i++) + state->trcvmidcvr[i] = readl(drvdata->base + TRCVMIDCVRn(i)); + + state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0); + state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1); + + state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0); + state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR1); + + state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR); + + state->trcpdcr = readl(drvdata->base + TRCPDCR); + + /* wait for TRCSTATR.IDLE to go up */ + if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1)) { + dev_err(etm_dev, + "timeout while waiting for Idle Trace Status\n"); + etm4_os_unlock(drvdata); + ret = -EBUSY; + goto out; + } + + drvdata->state_needs_restore = true; + + /* + * Power can be removed from the trace unit now. We do this to + * potentially save power on systems that respect the TRCPDCR_PU + * despite requesting software to save/restore state. + */ + writel_relaxed((state->trcpdcr & ~TRCPDCR_PU), + drvdata->base + TRCPDCR); + +out: + CS_LOCK(drvdata->base); + return ret; +} + +static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) +{ + int i; + struct etmv4_save_state *state = drvdata->save_state; + + CS_UNLOCK(drvdata->base); + + writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET); + + writel_relaxed(state->trcprgctlr, drvdata->base + TRCPRGCTLR); + writel_relaxed(state->trcprocselr, drvdata->base + TRCPROCSELR); + writel_relaxed(state->trcconfigr, drvdata->base + TRCCONFIGR); + writel_relaxed(state->trcauxctlr, drvdata->base + TRCAUXCTLR); + writel_relaxed(state->trceventctl0r, drvdata->base + TRCEVENTCTL0R); + writel_relaxed(state->trceventctl1r, drvdata->base + TRCEVENTCTL1R); + writel_relaxed(state->trcstallctlr, drvdata->base + TRCSTALLCTLR); + writel_relaxed(state->trctsctlr, drvdata->base + TRCTSCTLR); + writel_relaxed(state->trcsyncpr, drvdata->base + TRCSYNCPR); + writel_relaxed(state->trcccctlr, drvdata->base + TRCCCCTLR); + writel_relaxed(state->trcbbctlr, drvdata->base + TRCBBCTLR); + writel_relaxed(state->trctraceidr, drvdata->base + TRCTRACEIDR); + writel_relaxed(state->trcqctlr, drvdata->base + TRCQCTLR); + + writel_relaxed(state->trcvictlr, drvdata->base + TRCVICTLR); + writel_relaxed(state->trcviiectlr, drvdata->base + TRCVIIECTLR); + writel_relaxed(state->trcvissctlr, drvdata->base + TRCVISSCTLR); + writel_relaxed(state->trcvipcssctlr, drvdata->base + TRCVIPCSSCTLR); + writel_relaxed(state->trcvdctlr, drvdata->base + TRCVDCTLR); + writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR); + writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR); + + for (i = 0; i < drvdata->nrseqstate; i++) + writel_relaxed(state->trcseqevr[i], + drvdata->base + TRCSEQEVRn(i)); + + writel_relaxed(state->trcseqrstevr, drvdata->base + TRCSEQRSTEVR); + writel_relaxed(state->trcseqstr, drvdata->base + TRCSEQSTR); + writel_relaxed(state->trcextinselr, drvdata->base + TRCEXTINSELR); + + for (i = 0; i < drvdata->nr_cntr; i++) { + writel_relaxed(state->trccntrldvr[i], + drvdata->base + TRCCNTRLDVRn(i)); + writel_relaxed(state->trccntctlr[i], + drvdata->base + TRCCNTCTLRn(i)); + writel_relaxed(state->trccntvr[i], + drvdata->base + TRCCNTVRn(i)); + } + + for (i = 0; i < drvdata->nr_resource * 2; i++) + writel_relaxed(state->trcrsctlr[i], + drvdata->base + TRCRSCTLRn(i)); + + for (i = 0; i < drvdata->nr_ss_cmp; i++) { + writel_relaxed(state->trcssccr[i], + drvdata->base + TRCSSCCRn(i)); + writel_relaxed(state->trcsscsr[i], + drvdata->base + TRCSSCSRn(i)); + writel_relaxed(state->trcsspcicr[i], + drvdata->base + TRCSSPCICRn(i)); + } + + for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) { + writel_relaxed(state->trcacvr[i], + drvdata->base + TRCACVRn(i)); + writel_relaxed(state->trcacatr[i], + drvdata->base + TRCACATRn(i)); + } + + for (i = 0; i < drvdata->numcidc; i++) + writel_relaxed(state->trccidcvr[i], + drvdata->base + TRCCIDCVRn(i)); + + for (i = 0; i < drvdata->numvmidc; i++) + writel_relaxed(state->trcvmidcvr[i], + drvdata->base + TRCVMIDCVRn(i)); + + writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0); + writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1); + + writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0); + writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR1); + + writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET); + + writel_relaxed(state->trcpdcr, drvdata->base + TRCPDCR); + + drvdata->state_needs_restore = false; + + /* + * As recommended by section 4.3.7 ("Synchronization when using the + * memory-mapped interface") of ARM IHI 0064D + */ + dsb(sy); + isb(); + + /* Unlock the OS lock to re-enable trace and external debug access */ + etm4_os_unlock(drvdata); + CS_LOCK(drvdata->base); +} + +static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd, + void *v) +{ + struct etmv4_drvdata *drvdata; + unsigned int cpu = smp_processor_id(); + + if (!etmdrvdata[cpu]) + return NOTIFY_OK; + + drvdata = etmdrvdata[cpu]; + + if (!drvdata->save_state) + return NOTIFY_OK; + + if (WARN_ON_ONCE(drvdata->cpu != cpu)) + return NOTIFY_BAD; + + switch (cmd) { + case CPU_PM_ENTER: + /* save the state if self-hosted coresight is in use */ + if (local_read(&drvdata->mode)) + if (etm4_cpu_save(drvdata)) + return NOTIFY_BAD; + break; + case CPU_PM_EXIT: + /* fallthrough */ + case CPU_PM_ENTER_FAILED: + if (drvdata->state_needs_restore) + etm4_cpu_restore(drvdata); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static struct notifier_block etm4_cpu_pm_nb = { + .notifier_call = etm4_cpu_pm_notify, +}; + +static int etm4_cpu_pm_register(void) +{ + return cpu_pm_register_notifier(&etm4_cpu_pm_nb); +} + +static void etm4_cpu_pm_unregister(void) +{ + cpu_pm_unregister_notifier(&etm4_cpu_pm_nb); +} +#else +static int etm4_cpu_pm_register(void) { return 0; } +static void etm4_cpu_pm_unregister(void) { } +#endif + static int etm4_probe(struct amba_device *adev, const struct amba_id *id) { int ret; @@ -986,6 +1416,17 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) drvdata->dev = &adev->dev; dev_set_drvdata(dev, drvdata); + if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE) + pm_save_enable = coresight_loses_context_with_cpu(dev) ? + PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER; + + if (pm_save_enable != PARAM_PM_SAVE_NEVER) { + drvdata->save_state = devm_kmalloc(dev, + sizeof(struct etmv4_save_state), GFP_KERNEL); + if (!drvdata->save_state) + return -ENOMEM; + } + /* Validity for the resource is already checked by the AMBA core */ base = devm_ioremap_resource(dev, res); if (IS_ERR(base)) @@ -1014,6 +1455,10 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) if (ret < 0) goto err_arch_supported; hp_online = ret; + + ret = etm4_cpu_pm_register(); + if (ret) + goto err_arch_supported; } cpus_read_unlock(); @@ -1057,6 +1502,8 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id) err_arch_supported: if (--etm4_count == 0) { + etm4_cpu_pm_unregister(); + cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING); if (hp_online) cpuhp_remove_state_nocalls(hp_online); diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index b3b5ea7b7fb3..397fd40556b0 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -182,6 +182,7 @@ ETM_MODE_EXCL_USER) #define TRCSTATR_IDLE_BIT 0 +#define TRCSTATR_PMSTABLE_BIT 1 #define ETM_DEFAULT_ADDR_COMP 0 /* PowerDown Control Register bits */ @@ -291,6 +292,65 @@ struct etmv4_config { u32 ext_inp; }; +/** + * struct etm4_save_state - state to be preserved when ETM is without power + */ +struct etmv4_save_state { + u32 trcprgctlr; + u32 trcprocselr; + u32 trcconfigr; + u32 trcauxctlr; + u32 trceventctl0r; + u32 trceventctl1r; + u32 trcstallctlr; + u32 trctsctlr; + u32 trcsyncpr; + u32 trcccctlr; + u32 trcbbctlr; + u32 trctraceidr; + u32 trcqctlr; + + u32 trcvictlr; + u32 trcviiectlr; + u32 trcvissctlr; + u32 trcvipcssctlr; + u32 trcvdctlr; + u32 trcvdsacctlr; + u32 trcvdarcctlr; + + u32 trcseqevr[ETM_MAX_SEQ_STATES]; + u32 trcseqrstevr; + u32 trcseqstr; + u32 trcextinselr; + u32 trccntrldvr[ETMv4_MAX_CNTR]; + u32 trccntctlr[ETMv4_MAX_CNTR]; + u32 trccntvr[ETMv4_MAX_CNTR]; + + u32 trcrsctlr[ETM_MAX_RES_SEL * 2]; + + u32 trcssccr[ETM_MAX_SS_CMP]; + u32 trcsscsr[ETM_MAX_SS_CMP]; + u32 trcsspcicr[ETM_MAX_SS_CMP]; + + u64 trcacvr[ETM_MAX_SINGLE_ADDR_CMP]; + u64 trcacatr[ETM_MAX_SINGLE_ADDR_CMP]; + u64 trccidcvr[ETMv4_MAX_CTXID_CMP]; + u32 trcvmidcvr[ETM_MAX_VMID_CMP]; + u32 trccidcctlr0; + u32 trccidcctlr1; + u32 trcvmidcctlr0; + u32 trcvmidcctlr1; + + u32 trcclaimset; + + u32 cntr_val[ETMv4_MAX_CNTR]; + u32 seq_state; + u32 vinst_ctrl; + u32 ss_status[ETM_MAX_SS_CMP]; + + u32 trcpdcr; +}; + /** * struct etm4_drvdata - specifics associated to an ETM component * @base: Memory mapped base address for this component. @@ -347,6 +407,8 @@ struct etmv4_config { * @atbtrig: If the implementation can support ATB triggers * @lpoverride: If the implementation can support low-power state over. * @config: structure holding configuration parameters. + * @save_state: State to be preserved across power loss + * @state_needs_restore: True when there is context to restore after PM exit */ struct etmv4_drvdata { void __iomem *base; @@ -393,6 +455,8 @@ struct etmv4_drvdata { bool atbtrig; bool lpoverride; struct etmv4_config config; + struct etmv4_save_state *save_state; + bool state_needs_restore; }; /* Address comparator access types */ diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index 77642e0e955b..6b9fce3a94be 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include #include #include #include @@ -32,6 +34,7 @@ #define FUNNEL_HOLDTIME_MASK 0xf00 #define FUNNEL_HOLDTIME_SHFT 0x8 #define FUNNEL_HOLDTIME (0x7 << FUNNEL_HOLDTIME_SHFT) +#define FUNNEL_ENSx_MASK 0xff /** * struct funnel_drvdata - specifics associated to a funnel component @@ -40,6 +43,7 @@ * @atclk: optional clock for the core parts of the funnel. * @csdev: component vitals needed by the framework. * @priority: port selection order. + * @spinlock: serialize enable/disable operations. */ struct funnel_drvdata { void __iomem *base; @@ -47,36 +51,60 @@ struct funnel_drvdata { struct clk *atclk; struct coresight_device *csdev; unsigned long priority; + spinlock_t spinlock; }; -static void funnel_enable_hw(struct funnel_drvdata *drvdata, int port) +static int dynamic_funnel_enable_hw(struct funnel_drvdata *drvdata, int port) { u32 functl; + int rc = 0; CS_UNLOCK(drvdata->base); functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL); + /* Claim the device only when we enable the first slave */ + if (!(functl & FUNNEL_ENSx_MASK)) { + rc = coresight_claim_device_unlocked(drvdata->base); + if (rc) + goto done; + } + functl &= ~FUNNEL_HOLDTIME_MASK; functl |= FUNNEL_HOLDTIME; functl |= (1 << port); writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL); writel_relaxed(drvdata->priority, drvdata->base + FUNNEL_PRICTL); - +done: CS_LOCK(drvdata->base); + return rc; } static int funnel_enable(struct coresight_device *csdev, int inport, int outport) { + int rc = 0; struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + unsigned long flags; + bool first_enable = false; - funnel_enable_hw(drvdata, inport); + spin_lock_irqsave(&drvdata->spinlock, flags); + if (atomic_read(&csdev->refcnt[inport]) == 0) { + if (drvdata->base) + rc = dynamic_funnel_enable_hw(drvdata, inport); + if (!rc) + first_enable = true; + } + if (!rc) + atomic_inc(&csdev->refcnt[inport]); + spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_info(drvdata->dev, "FUNNEL inport %d enabled\n", inport); - return 0; + if (first_enable) + dev_dbg(drvdata->dev, "FUNNEL inport %d enabled\n", inport); + return rc; } -static void funnel_disable_hw(struct funnel_drvdata *drvdata, int inport) +static void dynamic_funnel_disable_hw(struct funnel_drvdata *drvdata, + int inport) { u32 functl; @@ -86,6 +114,10 @@ static void funnel_disable_hw(struct funnel_drvdata *drvdata, int inport) functl &= ~(1 << inport); writel_relaxed(functl, drvdata->base + FUNNEL_FUNCTL); + /* Disclaim the device if none of the slaves are now active */ + if (!(functl & FUNNEL_ENSx_MASK)) + coresight_disclaim_device_unlocked(drvdata->base); + CS_LOCK(drvdata->base); } @@ -93,10 +125,19 @@ static void funnel_disable(struct coresight_device *csdev, int inport, int outport) { struct funnel_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + unsigned long flags; + bool last_disable = false; - funnel_disable_hw(drvdata, inport); + spin_lock_irqsave(&drvdata->spinlock, flags); + if (atomic_dec_return(&csdev->refcnt[inport]) == 0) { + if (drvdata->base) + dynamic_funnel_disable_hw(drvdata, inport); + last_disable = true; + } + spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_info(drvdata->dev, "FUNNEL inport %d disabled\n", inport); + if (last_disable) + dev_dbg(&csdev->dev, "FUNNEL inport %d disabled\n", inport);; } static const struct coresight_ops_link funnel_link_ops = { @@ -168,56 +209,71 @@ static struct attribute *coresight_funnel_attrs[] = { }; ATTRIBUTE_GROUPS(coresight_funnel); -static int funnel_probe(struct amba_device *adev, const struct amba_id *id) +static int funnel_probe(struct device *dev, struct resource *res) { int ret; void __iomem *base; - struct device *dev = &adev->dev; struct coresight_platform_data *pdata = NULL; struct funnel_drvdata *drvdata; - struct resource *res = &adev->res; struct coresight_desc desc = { 0 }; - struct device_node *np = adev->dev.of_node; + struct device_node *np = dev->of_node; if (np) { pdata = of_get_coresight_platform_data(dev, np); if (IS_ERR(pdata)) return PTR_ERR(pdata); - adev->dev.platform_data = pdata; + dev->platform_data = pdata; } + if (of_device_is_compatible(np, "arm,coresight-funnel")) + pr_warn_once("Uses OBSOLETE CoreSight funnel binding\n"); + drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL); if (!drvdata) return -ENOMEM; - drvdata->dev = &adev->dev; - drvdata->atclk = devm_clk_get(&adev->dev, "atclk"); /* optional */ + drvdata->dev = dev; + drvdata->atclk = devm_clk_get(dev, "atclk"); /* optional */ if (!IS_ERR(drvdata->atclk)) { ret = clk_prepare_enable(drvdata->atclk); if (ret) return ret; } + + /* + * Map the device base for dynamic-funnel, which has been + * validated by AMBA core. + */ + if (res) { + base = devm_ioremap_resource(dev, res); + if (IS_ERR(base)) { + ret = PTR_ERR(base); + goto out_disable_clk; + } + drvdata->base = base; + desc.groups = coresight_funnel_groups; + } + dev_set_drvdata(dev, drvdata); - /* Validity for the resource is already checked by the AMBA core */ - base = devm_ioremap_resource(dev, res); - if (IS_ERR(base)) - return PTR_ERR(base); - - drvdata->base = base; - pm_runtime_put(&adev->dev); - + spin_lock_init(&drvdata->spinlock); desc.type = CORESIGHT_DEV_TYPE_LINK; desc.subtype.link_subtype = CORESIGHT_DEV_SUBTYPE_LINK_MERG; desc.ops = &funnel_cs_ops; desc.pdata = pdata; desc.dev = dev; - desc.groups = coresight_funnel_groups; drvdata->csdev = coresight_register(&desc); - if (IS_ERR(drvdata->csdev)) - return PTR_ERR(drvdata->csdev); + if (IS_ERR(drvdata->csdev)) { + ret = PTR_ERR(drvdata->csdev); + goto out_disable_clk; + } - return 0; + pm_runtime_put(dev); + +out_disable_clk: + if (ret && !IS_ERR_OR_NULL(drvdata->atclk)) + clk_disable_unprepare(drvdata->atclk); + return ret; } #ifdef CONFIG_PM @@ -246,7 +302,48 @@ static const struct dev_pm_ops funnel_dev_pm_ops = { SET_RUNTIME_PM_OPS(funnel_runtime_suspend, funnel_runtime_resume, NULL) }; -static const struct amba_id funnel_ids[] = { +static int static_funnel_probe(struct platform_device *pdev) +{ + int ret; + + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + + /* Static funnel do not have programming base */ + ret = funnel_probe(&pdev->dev, NULL); + + if (ret) { + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_disable(&pdev->dev); + } + + return ret; +} + +static const struct of_device_id static_funnel_match[] = { + {.compatible = "arm,coresight-static-funnel"}, + {} +}; + +static struct platform_driver static_funnel_driver = { + .probe = static_funnel_probe, + .driver = { + .name = "coresight-static-funnel", + .of_match_table = static_funnel_match, + .pm = &funnel_dev_pm_ops, + .suppress_bind_attrs = true, + }, +}; +builtin_platform_driver(static_funnel_driver); + +static int dynamic_funnel_probe(struct amba_device *adev, + const struct amba_id *id) +{ + return funnel_probe(&adev->dev, &adev->res); +} + +static const struct amba_id dynamic_funnel_ids[] = { { .id = 0x0003b908, .mask = 0x0003ffff, @@ -259,14 +356,14 @@ static const struct amba_id funnel_ids[] = { { 0, 0}, }; -static struct amba_driver funnel_driver = { +static struct amba_driver dynamic_funnel_driver = { .drv = { - .name = "coresight-funnel", + .name = "coresight-dynamic-funnel", .owner = THIS_MODULE, .pm = &funnel_dev_pm_ops, .suppress_bind_attrs = true, }, - .probe = funnel_probe, - .id_table = funnel_ids, + .probe = dynamic_funnel_probe, + .id_table = dynamic_funnel_ids, }; -builtin_amba_driver(funnel_driver); +builtin_amba_driver(dynamic_funnel_driver); diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h index f1d0e21d8cab..71b4c37fe88c 100644 --- a/drivers/hwtracing/coresight/coresight-priv.h +++ b/drivers/hwtracing/coresight/coresight-priv.h @@ -32,6 +32,13 @@ #define CORESIGHT_DEVID 0xfc8 #define CORESIGHT_DEVTYPE 0xfcc + +/* + * Coresight device CLAIM protocol. + * See PSCI - ARM DEN 0022D, Section: 6.8.1 Debug and Trace save and restore. + */ +#define CORESIGHT_CLAIM_SELF_HOSTED BIT(1) + #define TIMEOUT_US 100 #define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb) @@ -64,7 +71,8 @@ static DEVICE_ATTR_RO(name) #define coresight_simple_reg64(type, name, lo_off, hi_off) \ __coresight_simple_func(type, NULL, name, lo_off, hi_off) -extern const u32 barrier_pkt[5]; +extern const u32 barrier_pkt[4]; +#define CORESIGHT_BARRIER_PKT_SIZE (sizeof(barrier_pkt)) enum etm_addr_type { ETM_ADDR_TYPE_NONE, @@ -98,6 +106,13 @@ struct cs_buffers { void **data_pages; }; +static inline void coresight_insert_barrier_packet(void *buf) +{ + if (buf) + memcpy(buf, barrier_pkt, CORESIGHT_BARRIER_PKT_SIZE); +} + + static inline void CS_LOCK(void __iomem *addr) { do { @@ -136,9 +151,10 @@ static inline void coresight_write_reg_pair(void __iomem *addr, u64 val, } void coresight_disable_path(struct list_head *path); -int coresight_enable_path(struct list_head *path, u32 mode); +int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data); struct coresight_device *coresight_get_sink(struct list_head *path); struct coresight_device *coresight_get_enabled_sink(bool reset); +struct coresight_device *coresight_get_sink_by_id(u32 id); struct list_head *coresight_build_path(struct coresight_device *csdev, struct coresight_device *sink); void coresight_release_path(struct list_head *path); diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c index 3756e71cb8f5..93fe5785c8a4 100644 --- a/drivers/hwtracing/coresight/coresight-replicator.c +++ b/drivers/hwtracing/coresight/coresight-replicator.c @@ -42,7 +42,8 @@ static int replicator_enable(struct coresight_device *csdev, int inport, { struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - dev_info(drvdata->dev, "REPLICATOR enabled\n"); + if (atomic_inc_return(&csdev->refcnt[outport]) == 1) + dev_dbg(drvdata->dev, "REPLICATOR enabled\n"); return 0; } @@ -51,7 +52,8 @@ static void replicator_disable(struct coresight_device *csdev, int inport, { struct replicator_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); - dev_info(drvdata->dev, "REPLICATOR disabled\n"); + if (atomic_dec_return(&csdev->refcnt[outport]) == 0) + dev_dbg(drvdata->dev, "REPLICATOR disabled\n"); } static const struct coresight_ops_link replicator_link_ops = { diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c index 92a780a6df1d..696455891ec4 100644 --- a/drivers/hwtracing/coresight/coresight-stm.c +++ b/drivers/hwtracing/coresight/coresight-stm.c @@ -218,7 +218,7 @@ static int stm_enable(struct coresight_device *csdev, stm_enable_hw(drvdata); spin_unlock(&drvdata->spinlock); - dev_info(drvdata->dev, "STM tracing enabled\n"); + dev_dbg(drvdata->dev, "STM tracing enabled\n"); return 0; } @@ -281,7 +281,7 @@ static void stm_disable(struct coresight_device *csdev, pm_runtime_put(drvdata->dev); local_set(&drvdata->mode, CS_MODE_DISABLED); - dev_info(drvdata->dev, "STM tracing disabled\n"); + dev_dbg(drvdata->dev, "STM tracing disabled\n"); } } diff --git a/drivers/hwtracing/coresight/coresight-tmc-etf.c b/drivers/hwtracing/coresight/coresight-tmc-etf.c index e2513b786242..dcd39b5eb684 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etf.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etf.c @@ -15,14 +15,19 @@ * this program. If not, see . */ +#include #include #include #include #include #include "coresight-priv.h" #include "coresight-tmc.h" +#include "coresight-etm-perf.h" -static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata) +static int tmc_set_etf_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle); + +static void __tmc_etb_enable_hw(struct tmc_drvdata *drvdata) { CS_UNLOCK(drvdata->base); @@ -41,44 +46,44 @@ static void tmc_etb_enable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); } +static int tmc_etb_enable_hw(struct tmc_drvdata *drvdata) +{ + int rc = coresight_claim_device(drvdata->base); + + if (rc) + return rc; + + __tmc_etb_enable_hw(drvdata); + return 0; +} + static void tmc_etb_dump_hw(struct tmc_drvdata *drvdata) { - bool lost = false; char *bufp; - const u32 *barrier; - u32 read_data, status; + u32 read_data, lost; int i; - /* - * Get a hold of the status register and see if a wrap around - * has occurred. - */ - status = readl_relaxed(drvdata->base + TMC_STS); - if (status & TMC_STS_FULL) - lost = true; - + /* Check if the buffer wrapped around. */ + lost = readl_relaxed(drvdata->base + TMC_STS) & TMC_STS_FULL; bufp = drvdata->buf; drvdata->len = 0; - barrier = barrier_pkt; while (1) { for (i = 0; i < drvdata->memwidth; i++) { read_data = readl_relaxed(drvdata->base + TMC_RRD); if (read_data == 0xFFFFFFFF) - return; - - if (lost && *barrier) { - read_data = *barrier; - barrier++; - } - + goto done; memcpy(bufp, &read_data, 4); bufp += 4; drvdata->len += 4; } } +done: + if (lost) + coresight_insert_barrier_packet(drvdata->buf); + return; } -static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) +static void __tmc_etb_disable_hw(struct tmc_drvdata *drvdata) { CS_UNLOCK(drvdata->base); @@ -94,7 +99,13 @@ static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); } -static void tmc_etf_enable_hw(struct tmc_drvdata *drvdata) +static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata) +{ + coresight_disclaim_device(drvdata); + __tmc_etb_disable_hw(drvdata); +} + +static void __tmc_etf_enable_hw(struct tmc_drvdata *drvdata) { CS_UNLOCK(drvdata->base); @@ -110,16 +121,45 @@ static void tmc_etf_enable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); } +static int tmc_etf_enable_hw(struct tmc_drvdata *drvdata) +{ + int rc = coresight_claim_device(drvdata->base); + + if (rc) + return rc; + + __tmc_etf_enable_hw(drvdata); + return 0; +} + static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata) { CS_UNLOCK(drvdata->base); tmc_flush_and_stop(drvdata); tmc_disable_hw(drvdata); - + coresight_disclaim_device_unlocked(drvdata->base); CS_LOCK(drvdata->base); } +/* + * Return the available trace data in the buffer from @pos, with + * a maximum limit of @len, updating the @bufpp on where to + * find it. + */ +ssize_t tmc_etb_get_sysfs_trace(struct tmc_drvdata *drvdata, + loff_t pos, size_t len, char **bufpp) +{ + ssize_t actual = len; + + /* Adjust the len to available size @pos */ + if (pos + actual > drvdata->len) + actual = drvdata->len - pos; + if (actual > 0) + *bufpp = drvdata->buf + pos; + return actual; +} + static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev) { int ret = 0; @@ -155,8 +195,10 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev) * sink is already enabled no memory is needed and the HW need not be * touched. */ - if (drvdata->mode == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) { + atomic_inc(csdev->refcnt); goto out; + } /* * If drvdata::buf isn't NULL, memory was allocated for a previous @@ -174,8 +216,14 @@ static int tmc_enable_etf_sink_sysfs(struct coresight_device *csdev) drvdata->buf = buf; } - drvdata->mode = CS_MODE_SYSFS; - tmc_etb_enable_hw(drvdata); + ret = tmc_etb_enable_hw(drvdata); + if (!ret) { + drvdata->mode = CS_MODE_SYSFS; + atomic_inc(csdev->refcnt); + } else { + /* Free up the buffer if we failed to enable */ + used = false; + } out: spin_unlock_irqrestore(&drvdata->spinlock, flags); @@ -186,37 +234,42 @@ out: return ret; } -static int tmc_enable_etf_sink_perf(struct coresight_device *csdev) +static int tmc_enable_etf_sink_perf(struct coresight_device *csdev, void *data) { int ret = 0; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + struct perf_output_handle *handle = data; spin_lock_irqsave(&drvdata->spinlock, flags); - if (drvdata->reading) { + do { ret = -EINVAL; - goto out; - } + if (drvdata->reading) + break; + /* + * In Perf mode there can be only one writer per sink. There + * is also no need to continue if the ETB/ETF is already + * operated from sysFS. + */ + if (drvdata->mode != CS_MODE_DISABLED) + break; - /* - * In Perf mode there can be only one writer per sink. There - * is also no need to continue if the ETB/ETR is already operated - * from sysFS. - */ - if (drvdata->mode != CS_MODE_DISABLED) { - ret = -EINVAL; - goto out; - } - - drvdata->mode = CS_MODE_PERF; - tmc_etb_enable_hw(drvdata); -out: + ret = tmc_set_etf_buffer(csdev, handle); + if (ret) + break; + ret = tmc_etb_enable_hw(drvdata); + if (!ret) { + drvdata->mode = CS_MODE_PERF; + atomic_inc(csdev->refcnt); + } + } while (0); spin_unlock_irqrestore(&drvdata->spinlock, flags); return ret; } -static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode) +static int tmc_enable_etf_sink(struct coresight_device *csdev, + u32 mode, void *data) { int ret; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); @@ -226,7 +279,7 @@ static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode) ret = tmc_enable_etf_sink_sysfs(csdev); break; case CS_MODE_PERF: - ret = tmc_enable_etf_sink_perf(csdev); + ret = tmc_enable_etf_sink_perf(csdev, data); break; /* We shouldn't be here */ default: @@ -237,37 +290,45 @@ static int tmc_enable_etf_sink(struct coresight_device *csdev, u32 mode) if (ret) return ret; - dev_info(drvdata->dev, "TMC-ETB/ETF enabled\n"); + dev_dbg(drvdata->dev, "TMC-ETB/ETF enabled\n"); return 0; } -static void tmc_disable_etf_sink(struct coresight_device *csdev) +static int tmc_disable_etf_sink(struct coresight_device *csdev) { unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); spin_lock_irqsave(&drvdata->spinlock, flags); + if (drvdata->reading) { spin_unlock_irqrestore(&drvdata->spinlock, flags); - return; + return -EBUSY; } - /* Disable the TMC only if it needs to */ - if (drvdata->mode != CS_MODE_DISABLED) { - tmc_etb_disable_hw(drvdata); - drvdata->mode = CS_MODE_DISABLED; + if (atomic_dec_return(csdev->refcnt)) { + spin_unlock_irqrestore(&drvdata->spinlock, flags); + return -EBUSY; } + /* Complain if we (somehow) got out of sync */ + WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED); + tmc_etb_disable_hw(drvdata); + drvdata->mode = CS_MODE_DISABLED; + spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_info(drvdata->dev, "TMC-ETB/ETF disabled\n"); + dev_dbg(drvdata->dev, "TMC-ETB/ETF disabled\n"); + return 0; } static int tmc_enable_etf_link(struct coresight_device *csdev, int inport, int outport) { + int ret = 0; unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + bool first_enable = false; spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { @@ -275,12 +336,20 @@ static int tmc_enable_etf_link(struct coresight_device *csdev, return -EBUSY; } - tmc_etf_enable_hw(drvdata); - drvdata->mode = CS_MODE_SYSFS; + if (atomic_read(&csdev->refcnt[0]) == 0) { + ret = tmc_etf_enable_hw(drvdata); + if (!ret) { + drvdata->mode = CS_MODE_SYSFS; + first_enable = true; + } + } + if (!ret) + atomic_inc(&csdev->refcnt[0]); spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_info(drvdata->dev, "TMC-ETF enabled\n"); - return 0; + if (first_enable) + dev_dbg(drvdata->dev, "TMC-ETF enabled\n"); + return ret; } static void tmc_disable_etf_link(struct coresight_device *csdev, @@ -288,6 +357,7 @@ static void tmc_disable_etf_link(struct coresight_device *csdev, { unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + bool last_disable = false; spin_lock_irqsave(&drvdata->spinlock, flags); if (drvdata->reading) { @@ -295,22 +365,25 @@ static void tmc_disable_etf_link(struct coresight_device *csdev, return; } - tmc_etf_disable_hw(drvdata); - drvdata->mode = CS_MODE_DISABLED; + if (atomic_dec_return(&csdev->refcnt[0]) == 0) { + tmc_etf_disable_hw(drvdata); + drvdata->mode = CS_MODE_DISABLED; + last_disable = true; + } spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_info(drvdata->dev, "TMC-ETF disabled\n"); + if (last_disable) + dev_dbg(drvdata->dev, "TMC-ETF disabled\n"); } -static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, int cpu, - void **pages, int nr_pages, bool overwrite) +static void *tmc_alloc_etf_buffer(struct coresight_device *csdev, + struct perf_event *event, void **pages, + int nr_pages, bool overwrite) { - int node; + int node, cpu = event->cpu; struct cs_buffers *buf; - if (cpu == -1) - cpu = smp_processor_id(); - node = cpu_to_node(cpu); + node = (cpu == -1) ? NUMA_NO_NODE : cpu_to_node(cpu); /* Allocate memory structure for interaction with Perf */ buf = kzalloc_node(sizeof(struct cs_buffers), GFP_KERNEL, node); @@ -332,12 +405,14 @@ static void tmc_free_etf_buffer(void *config) } static int tmc_set_etf_buffer(struct coresight_device *csdev, - struct perf_output_handle *handle, - void *sink_config) + struct perf_output_handle *handle) { int ret = 0; unsigned long head; - struct cs_buffers *buf = sink_config; + struct cs_buffers *buf = etm_perf_sink_config(handle); + + if (!buf) + return -EINVAL; /* wrap head around to the amount of space we have */ head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1); @@ -353,36 +428,7 @@ static int tmc_set_etf_buffer(struct coresight_device *csdev, return ret; } -static unsigned long tmc_reset_etf_buffer(struct coresight_device *csdev, - struct perf_output_handle *handle, - void *sink_config) -{ - long size = 0; - struct cs_buffers *buf = sink_config; - - if (buf) { - /* - * In snapshot mode ->data_size holds the new address of the - * ring buffer's head. The size itself is the whole address - * range since we want the latest information. - */ - if (buf->snapshot) - handle->head = local_xchg(&buf->data_size, - buf->nr_pages << PAGE_SHIFT); - /* - * Tell the tracer PMU how much we got in this run and if - * something went wrong along the way. Nobody else can use - * this cs_buffers instance until we are done. As such - * resetting parameters here and squaring off with the ring - * buffer API in the tracer PMU is fine. - */ - size = local_xchg(&buf->data_size, 0); - } - - return size; -} - -static void tmc_update_etf_buffer(struct coresight_device *csdev, +static unsigned long tmc_update_etf_buffer(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config) { @@ -391,18 +437,19 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, const u32 *barrier; u32 *buf_ptr; u64 read_ptr, write_ptr; - u32 status, to_read; - unsigned long offset; + u32 status; + unsigned long offset, to_read, flags; struct cs_buffers *buf = sink_config; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); if (!buf) - return; + return 0; /* This shouldn't happen */ if (WARN_ON_ONCE(drvdata->mode != CS_MODE_PERF)) - return; + return 0; + spin_lock_irqsave(&drvdata->spinlock, flags); CS_UNLOCK(drvdata->base); tmc_flush_and_stop(drvdata); @@ -425,33 +472,16 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, /* * The TMC RAM buffer may be bigger than the space available in the * perf ring buffer (handle->size). If so advance the RRP so that we - * get the latest trace data. + * get the latest trace data. In snapshot mode none of that matters + * since we are expected to clobber stale data in favour of the latest + * traces. */ - if (to_read > handle->size) { - u32 mask = 0; - - /* - * The value written to RRP must be byte-address aligned to - * the width of the trace memory databus _and_ to a frame - * boundary (16 byte), whichever is the biggest. For example, - * for 32-bit, 64-bit and 128-bit wide trace memory, the four - * LSBs must be 0s. For 256-bit wide trace memory, the five - * LSBs must be 0s. - */ - switch (drvdata->memwidth) { - case TMC_MEM_INTF_WIDTH_32BITS: - case TMC_MEM_INTF_WIDTH_64BITS: - case TMC_MEM_INTF_WIDTH_128BITS: - mask = GENMASK(31, 5); - break; - case TMC_MEM_INTF_WIDTH_256BITS: - mask = GENMASK(31, 6); - break; - } + if (!buf->snapshot && to_read > handle->size) { + u32 mask = tmc_get_memwidth_mask(drvdata); /* * Make sure the new size is aligned in accordance with the - * requirement explained above. + * requirement explained in function tmc_get_memwidth_mask(). */ to_read = handle->size & mask; /* Move the RAM read pointer up */ @@ -464,7 +494,13 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, lost = true; } - if (lost) + /* + * Don't set the TRUNCATED flag in snapshot mode because 1) the + * captured buffer is expected to be truncated and 2) a full buffer + * prevents the event from being re-enabled by the perf core, + * resulting in stale data being send to user space. + */ + if (!buf->snapshot && lost) perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); cur = buf->cur; @@ -490,18 +526,15 @@ static void tmc_update_etf_buffer(struct coresight_device *csdev, } } - /* - * In snapshot mode all we have to do is communicate to - * perf_aux_output_end() the address of the current head. In full - * trace mode the same function expects a size to move rb->aux_head - * forward. - */ - if (buf->snapshot) - local_set(&buf->data_size, (cur * PAGE_SIZE) + offset); - else - local_add(to_read, &buf->data_size); - + /* In snapshot mode we have to update the head */ + if (buf->snapshot) { + handle->head = (cur * PAGE_SIZE) + offset; + to_read = buf->nr_pages << PAGE_SHIFT; + } CS_LOCK(drvdata->base); + spin_unlock_irqrestore(&drvdata->spinlock, flags); + + return to_read; } static const struct coresight_ops_sink tmc_etf_sink_ops = { @@ -509,8 +542,6 @@ static const struct coresight_ops_sink tmc_etf_sink_ops = { .disable = tmc_disable_etf_sink, .alloc_buffer = tmc_alloc_etf_buffer, .free_buffer = tmc_free_etf_buffer, - .set_buffer = tmc_set_etf_buffer, - .reset_buffer = tmc_reset_etf_buffer, .update_buffer = tmc_update_etf_buffer, }; @@ -567,7 +598,7 @@ int tmc_read_prepare_etb(struct tmc_drvdata *drvdata) /* Disable the TMC if need be */ if (drvdata->mode == CS_MODE_SYSFS) - tmc_etb_disable_hw(drvdata); + __tmc_etb_disable_hw(drvdata); drvdata->reading = true; out: @@ -607,7 +638,7 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata) * can't be NULL. */ memset(drvdata->buf, 0, drvdata->size); - tmc_etb_enable_hw(drvdata); + __tmc_etb_enable_hw(drvdata); } else { /* * The ETB/ETF is not tracing and the buffer was just read. diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index 68fbc8f7450e..8cd03f78aab2 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -15,24 +15,953 @@ * this program. If not, see . */ +#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include "coresight-catu.h" +#include "coresight-etm-perf.h" #include "coresight-priv.h" #include "coresight-tmc.h" -static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) +struct etr_flat_buf { + struct device *dev; + dma_addr_t daddr; + void *vaddr; + size_t size; +}; + +/* + * etr_perf_buffer - Perf buffer used for ETR + * @drvdata - The ETR drvdaga this buffer has been allocated for. + * @etr_buf - Actual buffer used by the ETR + * @pid - The PID this etr_perf_buffer belongs to. + * @snaphost - Perf session mode + * @head - handle->head at the beginning of the session. + * @nr_pages - Number of pages in the ring buffer. + * @pages - Array of Pages in the ring buffer. + */ +struct etr_perf_buffer { + struct tmc_drvdata *drvdata; + struct etr_buf *etr_buf; + pid_t pid; + bool snapshot; + unsigned long head; + int nr_pages; + void **pages; +}; + +/* Convert the perf index to an offset within the ETR buffer */ +#define PERF_IDX2OFF(idx, buf) ((idx) % ((buf)->nr_pages << PAGE_SHIFT)) + +/* Lower limit for ETR hardware buffer */ +#define TMC_ETR_PERF_MIN_BUF_SIZE SZ_1M + +/* + * The TMC ETR SG has a page size of 4K. The SG table contains pointers + * to 4KB buffers. However, the OS may use a PAGE_SIZE different from + * 4K (i.e, 16KB or 64KB). This implies that a single OS page could + * contain more than one SG buffer and tables. + * + * A table entry has the following format: + * + * ---Bit31------------Bit4-------Bit1-----Bit0-- + * | Address[39:12] | SBZ | Entry Type | + * ---------------------------------------------- + * + * Address: Bits [39:12] of a physical page address. Bits [11:0] are + * always zero. + * + * Entry type: + * b00 - Reserved. + * b01 - Last entry in the tables, points to 4K page buffer. + * b10 - Normal entry, points to 4K page buffer. + * b11 - Link. The address points to the base of next table. + */ + +typedef u32 sgte_t; + +#define ETR_SG_PAGE_SHIFT 12 +#define ETR_SG_PAGE_SIZE (1UL << ETR_SG_PAGE_SHIFT) +#define ETR_SG_PAGES_PER_SYSPAGE (PAGE_SIZE / ETR_SG_PAGE_SIZE) +#define ETR_SG_PTRS_PER_PAGE (ETR_SG_PAGE_SIZE / sizeof(sgte_t)) +#define ETR_SG_PTRS_PER_SYSPAGE (PAGE_SIZE / sizeof(sgte_t)) + +#define ETR_SG_ET_MASK 0x3 +#define ETR_SG_ET_LAST 0x1 +#define ETR_SG_ET_NORMAL 0x2 +#define ETR_SG_ET_LINK 0x3 + +#define ETR_SG_ADDR_SHIFT 4 + +#define ETR_SG_ENTRY(addr, type) \ + (sgte_t)((((addr) >> ETR_SG_PAGE_SHIFT) << ETR_SG_ADDR_SHIFT) | \ + (type & ETR_SG_ET_MASK)) + +#define ETR_SG_ADDR(entry) \ + (((dma_addr_t)(entry) >> ETR_SG_ADDR_SHIFT) << ETR_SG_PAGE_SHIFT) +#define ETR_SG_ET(entry) ((entry) & ETR_SG_ET_MASK) + +/* + * struct etr_sg_table : ETR SG Table + * @sg_table: Generic SG Table holding the data/table pages. + * @hwaddr: hwaddress used by the TMC, which is the base + * address of the table. + */ +struct etr_sg_table { + struct tmc_sg_table *sg_table; + dma_addr_t hwaddr; +}; + +/* + * tmc_etr_sg_table_entries: Total number of table entries required to map + * @nr_pages system pages. + * + * We need to map @nr_pages * ETR_SG_PAGES_PER_SYSPAGE data pages. + * Each TMC page can map (ETR_SG_PTRS_PER_PAGE - 1) buffer pointers, + * with the last entry pointing to another page of table entries. + * If we spill over to a new page for mapping 1 entry, we could as + * well replace the link entry of the previous page with the last entry. + */ +static inline unsigned long __attribute_const__ +tmc_etr_sg_table_entries(int nr_pages) +{ + unsigned long nr_sgpages = nr_pages * ETR_SG_PAGES_PER_SYSPAGE; + unsigned long nr_sglinks = nr_sgpages / (ETR_SG_PTRS_PER_PAGE - 1); + /* + * If we spill over to a new page for 1 entry, we could as well + * make it the LAST entry in the previous page, skipping the Link + * address. + */ + if (nr_sglinks && (nr_sgpages % (ETR_SG_PTRS_PER_PAGE - 1) < 2)) + nr_sglinks--; + return nr_sgpages + nr_sglinks; +} + +/* + * tmc_pages_get_offset: Go through all the pages in the tmc_pages + * and map the device address @addr to an offset within the virtual + * contiguous buffer. + */ +static long +tmc_pages_get_offset(struct tmc_pages *tmc_pages, dma_addr_t addr) +{ + int i; + dma_addr_t page_start; + + for (i = 0; i < tmc_pages->nr_pages; i++) { + page_start = tmc_pages->daddrs[i]; + if (addr >= page_start && addr < (page_start + PAGE_SIZE)) + return i * PAGE_SIZE + (addr - page_start); + } + + return -EINVAL; +} + +/* + * tmc_pages_free : Unmap and free the pages used by tmc_pages. + * If the pages were not allocated in tmc_pages_alloc(), we would + * simply drop the refcount. + */ +static void tmc_pages_free(struct tmc_pages *tmc_pages, + struct device *dev, enum dma_data_direction dir) +{ + int i; + + for (i = 0; i < tmc_pages->nr_pages; i++) { + if (tmc_pages->daddrs && tmc_pages->daddrs[i]) + dma_unmap_page(dev, tmc_pages->daddrs[i], + PAGE_SIZE, dir); + if (tmc_pages->pages && tmc_pages->pages[i]) + __free_page(tmc_pages->pages[i]); + } + + kfree(tmc_pages->pages); + kfree(tmc_pages->daddrs); + tmc_pages->pages = NULL; + tmc_pages->daddrs = NULL; + tmc_pages->nr_pages = 0; +} + +/* + * tmc_pages_alloc : Allocate and map pages for a given @tmc_pages. + * If @pages is not NULL, the list of page virtual addresses are + * used as the data pages. The pages are then dma_map'ed for @dev + * with dma_direction @dir. + * + * Returns 0 upon success, else the error number. + */ +static int tmc_pages_alloc(struct tmc_pages *tmc_pages, + struct device *dev, int node, + enum dma_data_direction dir, void **pages) +{ + int i, nr_pages; + dma_addr_t paddr; + struct page *page; + + nr_pages = tmc_pages->nr_pages; + tmc_pages->daddrs = kcalloc(nr_pages, sizeof(*tmc_pages->daddrs), + GFP_KERNEL); + if (!tmc_pages->daddrs) + return -ENOMEM; + tmc_pages->pages = kcalloc(nr_pages, sizeof(*tmc_pages->pages), + GFP_KERNEL); + if (!tmc_pages->pages) { + kfree(tmc_pages->daddrs); + tmc_pages->daddrs = NULL; + return -ENOMEM; + } + + for (i = 0; i < nr_pages; i++) { + if (pages && pages[i]) { + page = virt_to_page(pages[i]); + /* Hold a refcount on the page */ + get_page(page); + } else { + page = alloc_pages_node(node, + GFP_KERNEL | __GFP_ZERO, 0); + } + paddr = dma_map_page(dev, page, 0, PAGE_SIZE, dir); + if (dma_mapping_error(dev, paddr)) + goto err; + tmc_pages->daddrs[i] = paddr; + tmc_pages->pages[i] = page; + } + return 0; +err: + tmc_pages_free(tmc_pages, dev, dir); + return -ENOMEM; +} + +static inline long +tmc_sg_get_data_page_offset(struct tmc_sg_table *sg_table, dma_addr_t addr) +{ + return tmc_pages_get_offset(&sg_table->data_pages, addr); +} + +static inline void tmc_free_table_pages(struct tmc_sg_table *sg_table) +{ + if (sg_table->table_vaddr) + vunmap(sg_table->table_vaddr); + tmc_pages_free(&sg_table->table_pages, sg_table->dev, DMA_TO_DEVICE); +} + +static void tmc_free_data_pages(struct tmc_sg_table *sg_table) +{ + if (sg_table->data_vaddr) + vunmap(sg_table->data_vaddr); + tmc_pages_free(&sg_table->data_pages, sg_table->dev, DMA_FROM_DEVICE); +} + +void tmc_free_sg_table(struct tmc_sg_table *sg_table) +{ + tmc_free_table_pages(sg_table); + tmc_free_data_pages(sg_table); +} + +/* + * Alloc pages for the table. Since this will be used by the device, + * allocate the pages closer to the device (i.e, dev_to_node(dev) + * rather than the CPU node). + */ +static int tmc_alloc_table_pages(struct tmc_sg_table *sg_table) +{ + int rc; + struct tmc_pages *table_pages = &sg_table->table_pages; + + rc = tmc_pages_alloc(table_pages, sg_table->dev, + dev_to_node(sg_table->dev), + DMA_TO_DEVICE, NULL); + if (rc) + return rc; + sg_table->table_vaddr = vmap(table_pages->pages, + table_pages->nr_pages, + VM_MAP, + PAGE_KERNEL); + if (!sg_table->table_vaddr) + rc = -ENOMEM; + else + sg_table->table_daddr = table_pages->daddrs[0]; + return rc; +} + +static int tmc_alloc_data_pages(struct tmc_sg_table *sg_table, void **pages) +{ + int rc; + + /* Allocate data pages on the node requested by the caller */ + rc = tmc_pages_alloc(&sg_table->data_pages, + sg_table->dev, sg_table->node, + DMA_FROM_DEVICE, pages); + if (!rc) { + sg_table->data_vaddr = vmap(sg_table->data_pages.pages, + sg_table->data_pages.nr_pages, + VM_MAP, + PAGE_KERNEL); + if (!sg_table->data_vaddr) + rc = -ENOMEM; + } + return rc; +} + +/* + * tmc_alloc_sg_table: Allocate and setup dma pages for the TMC SG table + * and data buffers. TMC writes to the data buffers and reads from the SG + * Table pages. + * + * @dev - Device to which page should be DMA mapped. + * @node - Numa node for mem allocations + * @nr_tpages - Number of pages for the table entries. + * @nr_dpages - Number of pages for Data buffer. + * @pages - Optional list of virtual address of pages. + */ +struct tmc_sg_table *tmc_alloc_sg_table(struct device *dev, + int node, + int nr_tpages, + int nr_dpages, + void **pages) +{ + long rc; + struct tmc_sg_table *sg_table; + + sg_table = kzalloc(sizeof(*sg_table), GFP_KERNEL); + if (!sg_table) + return ERR_PTR(-ENOMEM); + sg_table->data_pages.nr_pages = nr_dpages; + sg_table->table_pages.nr_pages = nr_tpages; + sg_table->node = node; + sg_table->dev = dev; + + rc = tmc_alloc_data_pages(sg_table, pages); + if (!rc) + rc = tmc_alloc_table_pages(sg_table); + if (rc) { + tmc_free_sg_table(sg_table); + kfree(sg_table); + return ERR_PTR(rc); + } + + return sg_table; +} + +/* + * tmc_sg_table_sync_data_range: Sync the data buffer written + * by the device from @offset upto a @size bytes. + */ +void tmc_sg_table_sync_data_range(struct tmc_sg_table *table, + u64 offset, u64 size) +{ + int i, index, start; + int npages = DIV_ROUND_UP(size, PAGE_SIZE); + struct device *dev = table->dev; + struct tmc_pages *data = &table->data_pages; + + start = offset >> PAGE_SHIFT; + for (i = start; i < (start + npages); i++) { + index = i % data->nr_pages; + dma_sync_single_for_cpu(dev, data->daddrs[index], + PAGE_SIZE, DMA_FROM_DEVICE); + } +} + +/* tmc_sg_sync_table: Sync the page table */ +void tmc_sg_table_sync_table(struct tmc_sg_table *sg_table) +{ + int i; + struct device *dev = sg_table->dev; + struct tmc_pages *table_pages = &sg_table->table_pages; + + for (i = 0; i < table_pages->nr_pages; i++) + dma_sync_single_for_device(dev, table_pages->daddrs[i], + PAGE_SIZE, DMA_TO_DEVICE); +} + +/* + * tmc_sg_table_get_data: Get the buffer pointer for data @offset + * in the SG buffer. The @bufpp is updated to point to the buffer. + * Returns : + * the length of linear data available at @offset. + * or + * <= 0 if no data is available. + */ +ssize_t tmc_sg_table_get_data(struct tmc_sg_table *sg_table, + u64 offset, size_t len, char **bufpp) +{ + size_t size; + int pg_idx = offset >> PAGE_SHIFT; + int pg_offset = offset & (PAGE_SIZE - 1); + struct tmc_pages *data_pages = &sg_table->data_pages; + + size = tmc_sg_table_buf_size(sg_table); + if (offset >= size) + return -EINVAL; + + /* Make sure we don't go beyond the end */ + len = (len < (size - offset)) ? len : size - offset; + /* Respect the page boundaries */ + len = (len < (PAGE_SIZE - pg_offset)) ? len : (PAGE_SIZE - pg_offset); + if (len > 0) + *bufpp = page_address(data_pages->pages[pg_idx]) + pg_offset; + return len; +} + +#ifdef ETR_SG_DEBUG +/* Map a dma address to virtual address */ +static unsigned long +tmc_sg_daddr_to_vaddr(struct tmc_sg_table *sg_table, + dma_addr_t addr, bool table) +{ + long offset; + unsigned long base; + struct tmc_pages *tmc_pages; + + if (table) { + tmc_pages = &sg_table->table_pages; + base = (unsigned long)sg_table->table_vaddr; + } else { + tmc_pages = &sg_table->data_pages; + base = (unsigned long)sg_table->data_vaddr; + } + + offset = tmc_pages_get_offset(tmc_pages, addr); + if (offset < 0) + return 0; + return base + offset; +} + +/* Dump the given sg_table */ +static void tmc_etr_sg_table_dump(struct etr_sg_table *etr_table) +{ + sgte_t *ptr; + int i = 0; + dma_addr_t addr; + struct tmc_sg_table *sg_table = etr_table->sg_table; + + ptr = (sgte_t *)tmc_sg_daddr_to_vaddr(sg_table, + etr_table->hwaddr, true); + while (ptr) { + addr = ETR_SG_ADDR(*ptr); + switch (ETR_SG_ET(*ptr)) { + case ETR_SG_ET_NORMAL: + dev_dbg(sg_table->dev, + "%05d: %p\t:[N] 0x%llx\n", i, ptr, addr); + ptr++; + break; + case ETR_SG_ET_LINK: + dev_dbg(sg_table->dev, + "%05d: *** %p\t:{L} 0x%llx ***\n", + i, ptr, addr); + ptr = (sgte_t *)tmc_sg_daddr_to_vaddr(sg_table, + addr, true); + break; + case ETR_SG_ET_LAST: + dev_dbg(sg_table->dev, + "%05d: ### %p\t:[L] 0x%llx ###\n", + i, ptr, addr); + return; + default: + dev_dbg(sg_table->dev, + "%05d: xxx %p\t:[INVALID] 0x%llx xxx\n", + i, ptr, addr); + return; + } + i++; + } + dev_dbg(sg_table->dev, "******* End of Table *****\n"); +} +#else +static inline void tmc_etr_sg_table_dump(struct etr_sg_table *etr_table) {} +#endif + +/* + * Populate the SG Table page table entries from table/data + * pages allocated. Each Data page has ETR_SG_PAGES_PER_SYSPAGE SG pages. + * So does a Table page. So we keep track of indices of the tables + * in each system page and move the pointers accordingly. + */ +#define INC_IDX_ROUND(idx, size) ((idx) = ((idx) + 1) % (size)) +static void tmc_etr_sg_table_populate(struct etr_sg_table *etr_table) +{ + dma_addr_t paddr; + int i, type, nr_entries; + int tpidx = 0; /* index to the current system table_page */ + int sgtidx = 0; /* index to the sg_table within the current syspage */ + int sgtentry = 0; /* the entry within the sg_table */ + int dpidx = 0; /* index to the current system data_page */ + int spidx = 0; /* index to the SG page within the current data page */ + sgte_t *ptr; /* pointer to the table entry to fill */ + struct tmc_sg_table *sg_table = etr_table->sg_table; + dma_addr_t *table_daddrs = sg_table->table_pages.daddrs; + dma_addr_t *data_daddrs = sg_table->data_pages.daddrs; + + nr_entries = tmc_etr_sg_table_entries(sg_table->data_pages.nr_pages); + /* + * Use the contiguous virtual address of the table to update entries. + */ + ptr = sg_table->table_vaddr; + /* + * Fill all the entries, except the last entry to avoid special + * checks within the loop. + */ + for (i = 0; i < nr_entries - 1; i++) { + if (sgtentry == ETR_SG_PTRS_PER_PAGE - 1) { + /* + * Last entry in a sg_table page is a link address to + * the next table page. If this sg_table is the last + * one in the system page, it links to the first + * sg_table in the next system page. Otherwise, it + * links to the next sg_table page within the system + * page. + */ + if (sgtidx == ETR_SG_PAGES_PER_SYSPAGE - 1) { + paddr = table_daddrs[tpidx + 1]; + } else { + paddr = table_daddrs[tpidx] + + (ETR_SG_PAGE_SIZE * (sgtidx + 1)); + } + type = ETR_SG_ET_LINK; + } else { + /* + * Update the indices to the data_pages to point to the + * next sg_page in the data buffer. + */ + type = ETR_SG_ET_NORMAL; + paddr = data_daddrs[dpidx] + spidx * ETR_SG_PAGE_SIZE; + if (!INC_IDX_ROUND(spidx, ETR_SG_PAGES_PER_SYSPAGE)) + dpidx++; + } + *ptr++ = ETR_SG_ENTRY(paddr, type); + /* + * Move to the next table pointer, moving the table page index + * if necessary + */ + if (!INC_IDX_ROUND(sgtentry, ETR_SG_PTRS_PER_PAGE)) { + if (!INC_IDX_ROUND(sgtidx, ETR_SG_PAGES_PER_SYSPAGE)) + tpidx++; + } + } + + /* Set up the last entry, which is always a data pointer */ + paddr = data_daddrs[dpidx] + spidx * ETR_SG_PAGE_SIZE; + *ptr++ = ETR_SG_ENTRY(paddr, ETR_SG_ET_LAST); +} + +/* + * tmc_init_etr_sg_table: Allocate a TMC ETR SG table, data buffer of @size and + * populate the table. + * + * @dev - Device pointer for the TMC + * @node - NUMA node where the memory should be allocated + * @size - Total size of the data buffer + * @pages - Optional list of page virtual address + */ +static struct etr_sg_table * +tmc_init_etr_sg_table(struct device *dev, int node, + unsigned long size, void **pages) +{ + int nr_entries, nr_tpages; + int nr_dpages = size >> PAGE_SHIFT; + struct tmc_sg_table *sg_table; + struct etr_sg_table *etr_table; + + etr_table = kzalloc(sizeof(*etr_table), GFP_KERNEL); + if (!etr_table) + return ERR_PTR(-ENOMEM); + nr_entries = tmc_etr_sg_table_entries(nr_dpages); + nr_tpages = DIV_ROUND_UP(nr_entries, ETR_SG_PTRS_PER_SYSPAGE); + + sg_table = tmc_alloc_sg_table(dev, node, nr_tpages, nr_dpages, pages); + if (IS_ERR(sg_table)) { + kfree(etr_table); + return ERR_CAST(sg_table); + } + + etr_table->sg_table = sg_table; + /* TMC should use table base address for DBA */ + etr_table->hwaddr = sg_table->table_daddr; + tmc_etr_sg_table_populate(etr_table); + /* Sync the table pages for the HW */ + tmc_sg_table_sync_table(sg_table); + tmc_etr_sg_table_dump(etr_table); + + return etr_table; +} + +/* + * tmc_etr_alloc_flat_buf: Allocate a contiguous DMA buffer. + */ +static int tmc_etr_alloc_flat_buf(struct tmc_drvdata *drvdata, + struct etr_buf *etr_buf, int node, + void **pages) +{ + struct etr_flat_buf *flat_buf; + + /* We cannot reuse existing pages for flat buf */ + if (pages) + return -EINVAL; + + flat_buf = kzalloc(sizeof(*flat_buf), GFP_KERNEL); + if (!flat_buf) + return -ENOMEM; + + flat_buf->vaddr = dma_alloc_coherent(drvdata->dev, etr_buf->size, + &flat_buf->daddr, GFP_KERNEL); + if (!flat_buf->vaddr) { + kfree(flat_buf); + return -ENOMEM; + } + + flat_buf->size = etr_buf->size; + flat_buf->dev = drvdata->dev; + etr_buf->hwaddr = flat_buf->daddr; + etr_buf->mode = ETR_MODE_FLAT; + etr_buf->private = flat_buf; + return 0; +} + +static void tmc_etr_free_flat_buf(struct etr_buf *etr_buf) +{ + struct etr_flat_buf *flat_buf = etr_buf->private; + + if (flat_buf && flat_buf->daddr) + dma_free_coherent(flat_buf->dev, flat_buf->size, + flat_buf->vaddr, flat_buf->daddr); + kfree(flat_buf); +} + +static void tmc_etr_sync_flat_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp) +{ + /* + * Adjust the buffer to point to the beginning of the trace data + * and update the available trace data. + */ + etr_buf->offset = rrp - etr_buf->hwaddr; + if (etr_buf->full) + etr_buf->len = etr_buf->size; + else + etr_buf->len = rwp - rrp; +} + +static ssize_t tmc_etr_get_data_flat_buf(struct etr_buf *etr_buf, + u64 offset, size_t len, char **bufpp) +{ + struct etr_flat_buf *flat_buf = etr_buf->private; + + *bufpp = (char *)flat_buf->vaddr + offset; + /* + * tmc_etr_buf_get_data already adjusts the length to handle + * buffer wrapping around. + */ + return len; +} + +static const struct etr_buf_operations etr_flat_buf_ops = { + .alloc = tmc_etr_alloc_flat_buf, + .free = tmc_etr_free_flat_buf, + .sync = tmc_etr_sync_flat_buf, + .get_data = tmc_etr_get_data_flat_buf, +}; + +/* + * tmc_etr_alloc_sg_buf: Allocate an SG buf @etr_buf. Setup the parameters + * appropriately. + */ +static int tmc_etr_alloc_sg_buf(struct tmc_drvdata *drvdata, + struct etr_buf *etr_buf, int node, + void **pages) +{ + struct etr_sg_table *etr_table; + + etr_table = tmc_init_etr_sg_table(drvdata->dev, node, + etr_buf->size, pages); + if (IS_ERR(etr_table)) + return -ENOMEM; + etr_buf->hwaddr = etr_table->hwaddr; + etr_buf->mode = ETR_MODE_ETR_SG; + etr_buf->private = etr_table; + return 0; +} + +static void tmc_etr_free_sg_buf(struct etr_buf *etr_buf) +{ + struct etr_sg_table *etr_table = etr_buf->private; + + if (etr_table) { + tmc_free_sg_table(etr_table->sg_table); + kfree(etr_table); + } +} + +static ssize_t tmc_etr_get_data_sg_buf(struct etr_buf *etr_buf, u64 offset, + size_t len, char **bufpp) +{ + struct etr_sg_table *etr_table = etr_buf->private; + + return tmc_sg_table_get_data(etr_table->sg_table, offset, len, bufpp); +} + +static void tmc_etr_sync_sg_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp) +{ + long r_offset, w_offset; + struct etr_sg_table *etr_table = etr_buf->private; + struct tmc_sg_table *table = etr_table->sg_table; + + /* Convert hw address to offset in the buffer */ + r_offset = tmc_sg_get_data_page_offset(table, rrp); + if (r_offset < 0) { + dev_warn(table->dev, + "Unable to map RRP %llx to offset\n", rrp); + etr_buf->len = 0; + return; + } + + w_offset = tmc_sg_get_data_page_offset(table, rwp); + if (w_offset < 0) { + dev_warn(table->dev, + "Unable to map RWP %llx to offset\n", rwp); + etr_buf->len = 0; + return; + } + + etr_buf->offset = r_offset; + if (etr_buf->full) + etr_buf->len = etr_buf->size; + else + etr_buf->len = ((w_offset < r_offset) ? etr_buf->size : 0) + + w_offset - r_offset; + tmc_sg_table_sync_data_range(table, r_offset, etr_buf->len); +} + +static const struct etr_buf_operations etr_sg_buf_ops = { + .alloc = tmc_etr_alloc_sg_buf, + .free = tmc_etr_free_sg_buf, + .sync = tmc_etr_sync_sg_buf, + .get_data = tmc_etr_get_data_sg_buf, +}; + +/* + * TMC ETR could be connected to a CATU device, which can provide address + * translation service. This is represented by the Output port of the TMC + * (ETR) connected to the input port of the CATU. + * + * Returns : coresight_device ptr for the CATU device if a CATU is found. + * : NULL otherwise. + */ +struct coresight_device * +tmc_etr_get_catu_device(struct tmc_drvdata *drvdata) +{ + int i; + struct coresight_device *tmp, *etr = drvdata->csdev; + + if (!IS_ENABLED(CONFIG_CORESIGHT_CATU)) + return NULL; + + for (i = 0; i < etr->nr_outport; i++) { + tmp = etr->conns[i].child_dev; + if (tmp && coresight_is_catu_device(tmp)) + return tmp; + } + + return NULL; +} + +static inline int tmc_etr_enable_catu(struct tmc_drvdata *drvdata, + struct etr_buf *etr_buf) +{ + struct coresight_device *catu = tmc_etr_get_catu_device(drvdata); + + if (catu && helper_ops(catu)->enable) + return helper_ops(catu)->enable(catu, etr_buf); + return 0; +} + +static inline void tmc_etr_disable_catu(struct tmc_drvdata *drvdata) +{ + struct coresight_device *catu = tmc_etr_get_catu_device(drvdata); + + if (catu && helper_ops(catu)->disable) + helper_ops(catu)->disable(catu, drvdata->etr_buf); +} + +static const struct etr_buf_operations *etr_buf_ops[] = { + [ETR_MODE_FLAT] = &etr_flat_buf_ops, + [ETR_MODE_ETR_SG] = &etr_sg_buf_ops, + [ETR_MODE_CATU] = IS_ENABLED(CONFIG_CORESIGHT_CATU) + ? &etr_catu_buf_ops : NULL, +}; + +static inline int tmc_etr_mode_alloc_buf(int mode, + struct tmc_drvdata *drvdata, + struct etr_buf *etr_buf, int node, + void **pages) +{ + int rc = -EINVAL; + + switch (mode) { + case ETR_MODE_FLAT: + case ETR_MODE_ETR_SG: + case ETR_MODE_CATU: + if (etr_buf_ops[mode] && etr_buf_ops[mode]->alloc) + rc = etr_buf_ops[mode]->alloc(drvdata, etr_buf, + node, pages); + if (!rc) + etr_buf->ops = etr_buf_ops[mode]; + return rc; + default: + return -EINVAL; + } +} + +/* + * tmc_alloc_etr_buf: Allocate a buffer use by ETR. + * @drvdata : ETR device details. + * @size : size of the requested buffer. + * @flags : Required properties for the buffer. + * @node : Node for memory allocations. + * @pages : An optional list of pages. + */ +static struct etr_buf *tmc_alloc_etr_buf(struct tmc_drvdata *drvdata, + ssize_t size, int flags, + int node, void **pages) +{ + int rc = -ENOMEM; + bool has_etr_sg, has_iommu; + bool has_sg, has_catu; + struct etr_buf *etr_buf; + + has_etr_sg = tmc_etr_has_cap(drvdata, TMC_ETR_SG); + has_iommu = iommu_get_domain_for_dev(drvdata->dev); + has_catu = !!tmc_etr_get_catu_device(drvdata); + + has_sg = has_catu || has_etr_sg; + + etr_buf = kzalloc(sizeof(*etr_buf), GFP_KERNEL); + if (!etr_buf) + return ERR_PTR(-ENOMEM); + + etr_buf->size = size; + + /* + * If we have to use an existing list of pages, we cannot reliably + * use a contiguous DMA memory (even if we have an IOMMU). Otherwise, + * we use the contiguous DMA memory if at least one of the following + * conditions is true: + * a) The ETR cannot use Scatter-Gather. + * b) we have a backing IOMMU + * c) The requested memory size is smaller (< 1M). + * + * Fallback to available mechanisms. + * + */ + if (!pages && + (!has_sg || has_iommu || size < SZ_1M)) + rc = tmc_etr_mode_alloc_buf(ETR_MODE_FLAT, drvdata, + etr_buf, node, pages); + if (rc && has_etr_sg) + rc = tmc_etr_mode_alloc_buf(ETR_MODE_ETR_SG, drvdata, + etr_buf, node, pages); + if (rc && has_catu) + rc = tmc_etr_mode_alloc_buf(ETR_MODE_CATU, drvdata, + etr_buf, node, pages); + if (rc) { + kfree(etr_buf); + return ERR_PTR(rc); + } + + refcount_set(&etr_buf->refcount, 1); + dev_dbg(drvdata->dev, "allocated buffer of size %ldKB in mode %d\n", + (unsigned long)size >> 10, etr_buf->mode); + return etr_buf; +} + +static void tmc_free_etr_buf(struct etr_buf *etr_buf) +{ + WARN_ON(!etr_buf->ops || !etr_buf->ops->free); + etr_buf->ops->free(etr_buf); + kfree(etr_buf); +} + +/* + * tmc_etr_buf_get_data: Get the pointer the trace data at @offset + * with a maximum of @len bytes. + * Returns: The size of the linear data available @pos, with *bufpp + * updated to point to the buffer. + */ +static ssize_t tmc_etr_buf_get_data(struct etr_buf *etr_buf, + u64 offset, size_t len, char **bufpp) +{ + /* Adjust the length to limit this transaction to end of buffer */ + len = (len < (etr_buf->size - offset)) ? len : etr_buf->size - offset; + + return etr_buf->ops->get_data(etr_buf, (u64)offset, len, bufpp); +} + +static inline s64 +tmc_etr_buf_insert_barrier_packet(struct etr_buf *etr_buf, u64 offset) +{ + ssize_t len; + char *bufp; + + len = tmc_etr_buf_get_data(etr_buf, offset, + CORESIGHT_BARRIER_PKT_SIZE, &bufp); + if (WARN_ON(len < CORESIGHT_BARRIER_PKT_SIZE)) + return -EINVAL; + coresight_insert_barrier_packet(bufp); + return offset + CORESIGHT_BARRIER_PKT_SIZE; +} + +/* + * tmc_sync_etr_buf: Sync the trace buffer availability with drvdata. + * Makes sure the trace data is synced to the memory for consumption. + * @etr_buf->offset will hold the offset to the beginning of the trace data + * within the buffer, with @etr_buf->len bytes to consume. + */ +static void tmc_sync_etr_buf(struct tmc_drvdata *drvdata) +{ + struct etr_buf *etr_buf = drvdata->etr_buf; + u64 rrp, rwp; + u32 status; + + rrp = tmc_read_rrp(drvdata); + rwp = tmc_read_rwp(drvdata); + status = readl_relaxed(drvdata->base + TMC_STS); + + /* + * If there were memory errors in the session, truncate the + * buffer. + */ + if (WARN_ON_ONCE(status & TMC_STS_MEMERR)) { + dev_dbg(&drvdata->csdev->dev, + "tmc memory error detected, truncating buffer\n"); + etr_buf->len = 0; + etr_buf->full = 0; + return; + } + + etr_buf->full = status & TMC_STS_FULL; + + WARN_ON(!etr_buf->ops || !etr_buf->ops->sync); + + etr_buf->ops->sync(etr_buf, rrp, rwp); +} + +static void __tmc_etr_enable_hw(struct tmc_drvdata *drvdata) { u32 axictl, sts; - - /* Zero out the memory to help with debug */ - memset(drvdata->vaddr, 0, drvdata->size); + struct etr_buf *etr_buf = drvdata->etr_buf; CS_UNLOCK(drvdata->base); /* Wait for TMCSReady bit to be set */ tmc_wait_for_tmcready(drvdata); - writel_relaxed(drvdata->size / 4, drvdata->base + TMC_RSZ); + writel_relaxed(etr_buf->size / 4, drvdata->base + TMC_RSZ); writel_relaxed(TMC_MODE_CIRCULAR_BUFFER, drvdata->base + TMC_MODE); axictl = readl_relaxed(drvdata->base + TMC_AXICTL); @@ -45,16 +974,19 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) axictl |= TMC_AXICTL_ARCACHE_OS; } + if (etr_buf->mode == ETR_MODE_ETR_SG) + axictl |= TMC_AXICTL_SCT_GAT_MODE; + writel_relaxed(axictl, drvdata->base + TMC_AXICTL); - tmc_write_dba(drvdata, drvdata->paddr); + tmc_write_dba(drvdata, etr_buf->hwaddr); /* * If the TMC pointers must be programmed before the session, * we have to set it properly (i.e, RRP/RWP to base address and * STS to "not full"). */ if (tmc_etr_has_cap(drvdata, TMC_ETR_SAVE_RESTORE)) { - tmc_write_rrp(drvdata, drvdata->paddr); - tmc_write_rwp(drvdata, drvdata->paddr); + tmc_write_rrp(drvdata, etr_buf->hwaddr); + tmc_write_rwp(drvdata, etr_buf->hwaddr); sts = readl_relaxed(drvdata->base + TMC_STS) & ~TMC_STS_FULL; writel_relaxed(sts, drvdata->base + TMC_STS); } @@ -69,40 +1001,101 @@ static void tmc_etr_enable_hw(struct tmc_drvdata *drvdata) CS_LOCK(drvdata->base); } -static void tmc_etr_dump_hw(struct tmc_drvdata *drvdata) +static int tmc_etr_enable_hw(struct tmc_drvdata *drvdata, + struct etr_buf *etr_buf) { - const u32 *barrier; - u32 val; - u32 *temp; - u64 rwp; + int rc; - rwp = tmc_read_rwp(drvdata); - val = readl_relaxed(drvdata->base + TMC_STS); + /* Callers should provide an appropriate buffer for use */ + if (WARN_ON(!etr_buf)) + return -EINVAL; + + if ((etr_buf->mode == ETR_MODE_ETR_SG) && + WARN_ON(!tmc_etr_has_cap(drvdata, TMC_ETR_SG))) + return -EINVAL; + + if (WARN_ON(drvdata->etr_buf)) + return -EBUSY; /* - * Adjust the buffer to point to the beginning of the trace data - * and update the available trace data. + * If this ETR is connected to a CATU, enable it before we turn + * this on. */ - if (val & TMC_STS_FULL) { - drvdata->buf = drvdata->vaddr + rwp - drvdata->paddr; - drvdata->len = drvdata->size; + rc = tmc_etr_enable_catu(drvdata, etr_buf); + if (rc) + return rc; + rc = coresight_claim_device(drvdata->base); + if (!rc) { + drvdata->etr_buf = etr_buf; + __tmc_etr_enable_hw(drvdata); + } - barrier = barrier_pkt; - temp = (u32 *)drvdata->buf; + return rc; +} - while (*barrier) { - *temp = *barrier; - temp++; - barrier++; - } +/* + * Return the available trace data in the buffer (starts at etr_buf->offset, + * limited by etr_buf->len) from @pos, with a maximum limit of @len, + * also updating the @bufpp on where to find it. Since the trace data + * starts at anywhere in the buffer, depending on the RRP, we adjust the + * @len returned to handle buffer wrapping around. + * + * We are protected here by drvdata->reading != 0, which ensures the + * sysfs_buf stays alive. + */ +ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata, + loff_t pos, size_t len, char **bufpp) +{ + s64 offset; + ssize_t actual = len; + struct etr_buf *etr_buf = drvdata->sysfs_buf; + if (pos + actual > etr_buf->len) + actual = etr_buf->len - pos; + if (actual <= 0) + return actual; + + /* Compute the offset from which we read the data */ + offset = etr_buf->offset + pos; + if (offset >= etr_buf->size) + offset -= etr_buf->size; + return tmc_etr_buf_get_data(etr_buf, offset, actual, bufpp); +} + +static struct etr_buf * +tmc_etr_setup_sysfs_buf(struct tmc_drvdata *drvdata) +{ + return tmc_alloc_etr_buf(drvdata, drvdata->size, + 0, cpu_to_node(0), NULL); +} + +static void +tmc_etr_free_sysfs_buf(struct etr_buf *buf) +{ + if (buf) + tmc_free_etr_buf(buf); +} + +static void tmc_etr_sync_sysfs_buf(struct tmc_drvdata *drvdata) +{ + struct etr_buf *etr_buf = drvdata->etr_buf; + + if (WARN_ON(drvdata->sysfs_buf != etr_buf)) { + tmc_etr_free_sysfs_buf(drvdata->sysfs_buf); + drvdata->sysfs_buf = NULL; } else { - drvdata->buf = drvdata->vaddr; - drvdata->len = rwp - drvdata->paddr; + tmc_sync_etr_buf(drvdata); + /* + * Insert barrier packets at the beginning, if there was + * an overflow. + */ + if (etr_buf->full) + tmc_etr_buf_insert_barrier_packet(etr_buf, + etr_buf->offset); } } -static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) +static void __tmc_etr_disable_hw(struct tmc_drvdata *drvdata) { CS_UNLOCK(drvdata->base); @@ -112,45 +1105,55 @@ static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) * read before the TMC is disabled. */ if (drvdata->mode == CS_MODE_SYSFS) - tmc_etr_dump_hw(drvdata); + tmc_etr_sync_sysfs_buf(drvdata); + tmc_disable_hw(drvdata); CS_LOCK(drvdata->base); + +} + +static void tmc_etr_disable_hw(struct tmc_drvdata *drvdata) +{ + __tmc_etr_disable_hw(drvdata); + /* Disable CATU device if this ETR is connected to one */ + tmc_etr_disable_catu(drvdata); + coresight_disclaim_device(drvdata->base); + /* Reset the ETR buf used by hardware */ + drvdata->etr_buf = NULL; } static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) { int ret = 0; - bool used = false; unsigned long flags; - void __iomem *vaddr = NULL; - dma_addr_t paddr; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + struct etr_buf *sysfs_buf = NULL, *new_buf = NULL, *free_buf = NULL; /* - * If we don't have a buffer release the lock and allocate memory. - * Otherwise keep the lock and move along. + * If we are enabling the ETR from disabled state, we need to make + * sure we have a buffer with the right size. The etr_buf is not reset + * immediately after we stop the tracing in SYSFS mode as we wait for + * the user to collect the data. We may be able to reuse the existing + * buffer, provided the size matches. Any allocation has to be done + * with the lock released. */ spin_lock_irqsave(&drvdata->spinlock, flags); - if (!drvdata->vaddr) { + sysfs_buf = READ_ONCE(drvdata->sysfs_buf); + if (!sysfs_buf || (sysfs_buf->size != drvdata->size)) { spin_unlock_irqrestore(&drvdata->spinlock, flags); - /* - * Contiguous memory can't be allocated while a spinlock is - * held. As such allocate memory here and free it if a buffer - * has already been allocated (from a previous session). - */ - vaddr = dma_alloc_coherent(drvdata->dev, drvdata->size, - &paddr, GFP_KERNEL); - if (!vaddr) - return -ENOMEM; + /* Allocate memory with the locks released */ + free_buf = new_buf = tmc_etr_setup_sysfs_buf(drvdata); + if (IS_ERR(new_buf)) + return PTR_ERR(new_buf); /* Let's try again */ spin_lock_irqsave(&drvdata->spinlock, flags); } - if (drvdata->reading) { + if (drvdata->reading || drvdata->mode == CS_MODE_PERF) { ret = -EBUSY; goto out; } @@ -158,106 +1161,512 @@ static int tmc_enable_etr_sink_sysfs(struct coresight_device *csdev) /* * In sysFS mode we can have multiple writers per sink. Since this * sink is already enabled no memory is needed and the HW need not be - * touched. + * touched, even if the buffer size has changed. */ - if (drvdata->mode == CS_MODE_SYSFS) + if (drvdata->mode == CS_MODE_SYSFS) { + atomic_inc(csdev->refcnt); goto out; - - /* - * If drvdata::buf == NULL, use the memory allocated above. - * Otherwise a buffer still exists from a previous session, so - * simply use that. - */ - if (drvdata->buf == NULL) { - used = true; - drvdata->vaddr = vaddr; - drvdata->paddr = paddr; - drvdata->buf = drvdata->vaddr; } - drvdata->mode = CS_MODE_SYSFS; - tmc_etr_enable_hw(drvdata); + /* + * If we don't have a buffer or it doesn't match the requested size, + * use the buffer allocated above. Otherwise reuse the existing buffer. + */ + sysfs_buf = READ_ONCE(drvdata->sysfs_buf); + if (!sysfs_buf || (new_buf && sysfs_buf->size != new_buf->size)) { + free_buf = sysfs_buf; + drvdata->sysfs_buf = new_buf; + } + + ret = tmc_etr_enable_hw(drvdata, drvdata->sysfs_buf); + if (!ret) { + drvdata->mode = CS_MODE_SYSFS; + atomic_inc(csdev->refcnt); + } out: spin_unlock_irqrestore(&drvdata->spinlock, flags); /* Free memory outside the spinlock if need be */ - if (!used && vaddr) - dma_free_coherent(drvdata->dev, drvdata->size, vaddr, paddr); + if (free_buf) + tmc_etr_free_sysfs_buf(free_buf); if (!ret) - dev_info(drvdata->dev, "TMC-ETR enabled\n"); + dev_dbg(drvdata->dev, "TMC-ETR enabled\n"); return ret; } -static int tmc_enable_etr_sink_perf(struct coresight_device *csdev) +/* + * alloc_etr_buf: Allocate ETR buffer for use by perf. + * The size of the hardware buffer is dependent on the size configured + * via sysfs and the perf ring buffer size. We prefer to allocate the + * largest possible size, scaling down the size by half until it + * reaches a minimum limit (1M), beyond which we give up. + */ +static struct etr_buf * +alloc_etr_buf(struct tmc_drvdata *drvdata, struct perf_event *event, + int nr_pages, void **pages, bool snapshot) { - int ret = 0; - unsigned long flags; - struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + int node; + struct etr_buf *etr_buf; + unsigned long size; - spin_lock_irqsave(&drvdata->spinlock, flags); - if (drvdata->reading) { - ret = -EINVAL; - goto out; + node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu); + /* + * Try to match the perf ring buffer size if it is larger + * than the size requested via sysfs. + */ + if ((nr_pages << PAGE_SHIFT) > drvdata->size) { + etr_buf = tmc_alloc_etr_buf(drvdata, (nr_pages << PAGE_SHIFT), + 0, node, NULL); + if (!IS_ERR(etr_buf)) + goto done; } /* - * In Perf mode there can be only one writer per sink. There - * is also no need to continue if the ETR is already operated - * from sysFS. + * Else switch to configured size for this ETR + * and scale down until we hit the minimum limit. */ - if (drvdata->mode != CS_MODE_DISABLED) { - ret = -EINVAL; + size = drvdata->size; + do { + etr_buf = tmc_alloc_etr_buf(drvdata, size, 0, node, NULL); + if (!IS_ERR(etr_buf)) + goto done; + size /= 2; + } while (size >= TMC_ETR_PERF_MIN_BUF_SIZE); + + return ERR_PTR(-ENOMEM); + +done: + return etr_buf; +} + +static struct etr_buf * +get_perf_etr_buf_cpu_wide(struct tmc_drvdata *drvdata, + struct perf_event *event, int nr_pages, + void **pages, bool snapshot) +{ + int ret; + pid_t pid = task_pid_nr(event->owner); + struct etr_buf *etr_buf; + +retry: + /* + * An etr_perf_buffer is associated with an event and holds a reference + * to the AUX ring buffer that was created for that event. In CPU-wide + * N:1 mode multiple events (one per CPU), each with its own AUX ring + * buffer, share a sink. As such an etr_perf_buffer is created for each + * event but a single etr_buf associated with the ETR is shared between + * them. The last event in a trace session will copy the content of the + * etr_buf to its AUX ring buffer. Ring buffer associated to other + * events are simply not used an freed as events are destoyed. We still + * need to allocate a ring buffer for each event since we don't know + * which event will be last. + */ + + /* + * The first thing to do here is check if an etr_buf has already been + * allocated for this session. If so it is shared with this event, + * otherwise it is created. + */ + mutex_lock(&drvdata->idr_mutex); + etr_buf = idr_find(&drvdata->idr, pid); + if (etr_buf) { + refcount_inc(&etr_buf->refcount); + mutex_unlock(&drvdata->idr_mutex); + return etr_buf; + } + + /* If we made it here no buffer has been allocated, do so now. */ + mutex_unlock(&drvdata->idr_mutex); + + etr_buf = alloc_etr_buf(drvdata, event, nr_pages, pages, snapshot); + if (IS_ERR(etr_buf)) + return etr_buf; + + /* Now that we have a buffer, add it to the IDR. */ + mutex_lock(&drvdata->idr_mutex); + ret = idr_alloc(&drvdata->idr, etr_buf, pid, pid + 1, GFP_KERNEL); + mutex_unlock(&drvdata->idr_mutex); + + /* Another event with this session ID has allocated this buffer. */ + if (ret == -ENOSPC) { + tmc_free_etr_buf(etr_buf); + goto retry; + } + + /* The IDR can't allocate room for a new session, abandon ship. */ + if (ret == -ENOMEM) { + tmc_free_etr_buf(etr_buf); + return ERR_PTR(ret); + } + + + return etr_buf; +} + +static struct etr_buf * +get_perf_etr_buf_per_thread(struct tmc_drvdata *drvdata, + struct perf_event *event, int nr_pages, + void **pages, bool snapshot) +{ + /* + * In per-thread mode the etr_buf isn't shared, so just go ahead + * with memory allocation. + */ + return alloc_etr_buf(drvdata, event, nr_pages, pages, snapshot); +} + +static struct etr_buf * +get_perf_etr_buf(struct tmc_drvdata *drvdata, struct perf_event *event, + int nr_pages, void **pages, bool snapshot) +{ + if (event->cpu == -1) + return get_perf_etr_buf_per_thread(drvdata, event, nr_pages, + pages, snapshot); + + return get_perf_etr_buf_cpu_wide(drvdata, event, nr_pages, + pages, snapshot); +} + +static struct etr_perf_buffer * +tmc_etr_setup_perf_buf(struct tmc_drvdata *drvdata, struct perf_event *event, + int nr_pages, void **pages, bool snapshot) +{ + int node; + struct etr_buf *etr_buf; + struct etr_perf_buffer *etr_perf; + + node = (event->cpu == -1) ? NUMA_NO_NODE : cpu_to_node(event->cpu); + + etr_perf = kzalloc_node(sizeof(*etr_perf), GFP_KERNEL, node); + if (!etr_perf) + return ERR_PTR(-ENOMEM); + + etr_buf = get_perf_etr_buf(drvdata, event, nr_pages, pages, snapshot); + if (!IS_ERR(etr_buf)) + goto done; + + kfree(etr_perf); + return ERR_PTR(-ENOMEM); + +done: + /* + * Keep a reference to the ETR this buffer has been allocated for + * in order to have access to the IDR in tmc_free_etr_buffer(). + */ + etr_perf->drvdata = drvdata; + etr_perf->etr_buf = etr_buf; + + return etr_perf; +} + + +static void *tmc_alloc_etr_buffer(struct coresight_device *csdev, + struct perf_event *event, void **pages, + int nr_pages, bool snapshot) +{ + struct etr_perf_buffer *etr_perf; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + + etr_perf = tmc_etr_setup_perf_buf(drvdata, event, + nr_pages, pages, snapshot); + if (IS_ERR(etr_perf)) { + dev_dbg(drvdata->dev, "Unable to allocate ETR buffer\n"); + return NULL; + } + + etr_perf->pid = task_pid_nr(event->owner); + etr_perf->snapshot = snapshot; + etr_perf->nr_pages = nr_pages; + etr_perf->pages = pages; + + return etr_perf; +} + +static void tmc_free_etr_buffer(void *config) +{ + struct etr_perf_buffer *etr_perf = config; + struct tmc_drvdata *drvdata = etr_perf->drvdata; + struct etr_buf *buf, *etr_buf = etr_perf->etr_buf; + + if (!etr_buf) + goto free_etr_perf_buffer; + + mutex_lock(&drvdata->idr_mutex); + /* If we are not the last one to use the buffer, don't touch it. */ + if (!refcount_dec_and_test(&etr_buf->refcount)) { + mutex_unlock(&drvdata->idr_mutex); + goto free_etr_perf_buffer; + } + + /* We are the last one, remove from the IDR and free the buffer. */ + buf = idr_remove(&drvdata->idr, etr_perf->pid); + mutex_unlock(&drvdata->idr_mutex); + + /* + * Something went very wrong if the buffer associated with this ID + * is not the same in the IDR. Leak to avoid use after free. + */ + if (buf && WARN_ON(buf != etr_buf)) + goto free_etr_perf_buffer; + + tmc_free_etr_buf(etr_perf->etr_buf); + +free_etr_perf_buffer: + kfree(etr_perf); +} + +/* + * tmc_etr_sync_perf_buffer: Copy the actual trace data from the hardware + * buffer to the perf ring buffer. + */ +static void tmc_etr_sync_perf_buffer(struct etr_perf_buffer *etr_perf, + unsigned long src_offset, + unsigned long to_copy) +{ + long bytes; + long pg_idx, pg_offset; + unsigned long head = etr_perf->head; + char **dst_pages, *src_buf; + struct etr_buf *etr_buf = etr_perf->etr_buf; + + head = etr_perf->head; + pg_idx = head >> PAGE_SHIFT; + pg_offset = head & (PAGE_SIZE - 1); + dst_pages = (char **)etr_perf->pages; + + while (to_copy > 0) { + /* + * In one iteration, we can copy minimum of : + * 1) what is available in the source buffer, + * 2) what is available in the source buffer, before it + * wraps around. + * 3) what is available in the destination page. + * in one iteration. + */ + if (src_offset >= etr_buf->size) + src_offset -= etr_buf->size; + bytes = tmc_etr_buf_get_data(etr_buf, src_offset, to_copy, + &src_buf); + if (WARN_ON_ONCE(bytes <= 0)) + break; + bytes = min(bytes, (long)(PAGE_SIZE - pg_offset)); + + memcpy(dst_pages[pg_idx] + pg_offset, src_buf, bytes); + + to_copy -= bytes; + + /* Move destination pointers */ + pg_offset += bytes; + if (pg_offset == PAGE_SIZE) { + pg_offset = 0; + if (++pg_idx == etr_perf->nr_pages) + pg_idx = 0; + } + + /* Move source pointers */ + src_offset += bytes; + } +} + +/* + * tmc_update_etr_buffer : Update the perf ring buffer with the + * available trace data. We use software double buffering at the moment. + * + * TODO: Add support for reusing the perf ring buffer. + */ +static unsigned long +tmc_update_etr_buffer(struct coresight_device *csdev, + struct perf_output_handle *handle, + void *config) +{ + bool lost = false; + unsigned long flags, offset, size = 0; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + struct etr_perf_buffer *etr_perf = config; + struct etr_buf *etr_buf = etr_perf->etr_buf; + + spin_lock_irqsave(&drvdata->spinlock, flags); + + /* Don't do anything if another tracer is using this sink */ + if (atomic_read(csdev->refcnt) != 1) { + spin_unlock_irqrestore(&drvdata->spinlock, flags); goto out; } - drvdata->mode = CS_MODE_PERF; - tmc_etr_enable_hw(drvdata); -out: + if (WARN_ON(drvdata->perf_buf != etr_buf)) { + lost = true; + spin_unlock_irqrestore(&drvdata->spinlock, flags); + goto out; + } + + CS_UNLOCK(drvdata->base); + + tmc_flush_and_stop(drvdata); + tmc_sync_etr_buf(drvdata); + + CS_LOCK(drvdata->base); spin_unlock_irqrestore(&drvdata->spinlock, flags); - return ret; + lost = etr_buf->full; + offset = etr_buf->offset; + size = etr_buf->len; + + /* + * The ETR buffer may be bigger than the space available in the + * perf ring buffer (handle->size). If so advance the offset so that we + * get the latest trace data. In snapshot mode none of that matters + * since we are expected to clobber stale data in favour of the latest + * traces. + */ + if (!etr_perf->snapshot && size > handle->size) { + u32 mask = tmc_get_memwidth_mask(drvdata); + + /* + * Make sure the new size is aligned in accordance with the + * requirement explained in function tmc_get_memwidth_mask(). + */ + size = handle->size & mask; + offset = etr_buf->offset + etr_buf->len - size; + + if (offset >= etr_buf->size) + offset -= etr_buf->size; + lost = true; + } + + /* Insert barrier packets at the beginning, if there was an overflow */ + if (lost) + tmc_etr_buf_insert_barrier_packet(etr_buf, etr_buf->offset); + tmc_etr_sync_perf_buffer(etr_perf, offset, size); + + /* + * In snapshot mode we simply increment the head by the number of byte + * that were written. User space function cs_etm_find_snapshot() will + * figure out how many bytes to get from the AUX buffer based on the + * position of the head. + */ + if (etr_perf->snapshot) + handle->head += size; +out: + /* + * Don't set the TRUNCATED flag in snapshot mode because 1) the + * captured buffer is expected to be truncated and 2) a full buffer + * prevents the event from being re-enabled by the perf core, + * resulting in stale data being send to user space. + */ + if (!etr_perf->snapshot && lost) + perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED); + return size; } -static int tmc_enable_etr_sink(struct coresight_device *csdev, u32 mode) +static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data) +{ + int rc = 0; + pid_t pid; + unsigned long flags; + struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + struct perf_output_handle *handle = data; + struct etr_perf_buffer *etr_perf = etm_perf_sink_config(handle); + + spin_lock_irqsave(&drvdata->spinlock, flags); + /* Don't use this sink if it is already claimed by sysFS */ + if (drvdata->mode == CS_MODE_SYSFS) { + rc = -EBUSY; + goto unlock_out; + } + + if (WARN_ON(!etr_perf || !etr_perf->etr_buf)) { + rc = -EINVAL; + goto unlock_out; + } + + /* Get a handle on the pid of the process to monitor */ + pid = etr_perf->pid; + + /* Do not proceed if this device is associated with another session */ + if (drvdata->pid != -1 && drvdata->pid != pid) { + rc = -EBUSY; + goto unlock_out; + } + + etr_perf->head = PERF_IDX2OFF(handle->head, etr_perf); + + /* + * No HW configuration is needed if the sink is already in + * use for this session. + */ + if (drvdata->pid == pid) { + atomic_inc(csdev->refcnt); + goto unlock_out; + } + + rc = tmc_etr_enable_hw(drvdata, etr_perf->etr_buf); + if (!rc) { + /* Associate with monitored process. */ + drvdata->pid = pid; + drvdata->mode = CS_MODE_PERF; + drvdata->perf_buf = etr_perf->etr_buf; + atomic_inc(csdev->refcnt); + } + +unlock_out: + spin_unlock_irqrestore(&drvdata->spinlock, flags); + return rc; +} + +static int tmc_enable_etr_sink(struct coresight_device *csdev, + u32 mode, void *data) { switch (mode) { case CS_MODE_SYSFS: return tmc_enable_etr_sink_sysfs(csdev); case CS_MODE_PERF: - return tmc_enable_etr_sink_perf(csdev); + return tmc_enable_etr_sink_perf(csdev, data); } /* We shouldn't be here */ return -EINVAL; } -static void tmc_disable_etr_sink(struct coresight_device *csdev) +static int tmc_disable_etr_sink(struct coresight_device *csdev) { unsigned long flags; struct tmc_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); spin_lock_irqsave(&drvdata->spinlock, flags); + if (drvdata->reading) { spin_unlock_irqrestore(&drvdata->spinlock, flags); - return; + return -EBUSY; } - /* Disable the TMC only if it needs to */ - if (drvdata->mode != CS_MODE_DISABLED) { - tmc_etr_disable_hw(drvdata); - drvdata->mode = CS_MODE_DISABLED; + if (atomic_dec_return(csdev->refcnt)) { + spin_unlock_irqrestore(&drvdata->spinlock, flags); + return -EBUSY; } + /* Complain if we (somehow) got out of sync */ + WARN_ON_ONCE(drvdata->mode == CS_MODE_DISABLED); + tmc_etr_disable_hw(drvdata); + /* Dissociate from monitored process. */ + drvdata->pid = -1; + drvdata->mode = CS_MODE_DISABLED; + /* Reset perf specific data */ + drvdata->perf_buf = NULL; + spin_unlock_irqrestore(&drvdata->spinlock, flags); - dev_info(drvdata->dev, "TMC-ETR disabled\n"); + dev_dbg(drvdata->dev, "TMC-ETR disabled\n"); + return 0; } static const struct coresight_ops_sink tmc_etr_sink_ops = { .enable = tmc_enable_etr_sink, .disable = tmc_disable_etr_sink, + .alloc_buffer = tmc_alloc_etr_buffer, + .update_buffer = tmc_update_etr_buffer, + .free_buffer = tmc_free_etr_buffer, }; const struct coresight_ops tmc_etr_cs_ops = { @@ -279,21 +1688,19 @@ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata) goto out; } - /* Don't interfere if operated from Perf */ - if (drvdata->mode == CS_MODE_PERF) { + /* + * We can safely allow reads even if the ETR is operating in PERF mode, + * since the sysfs session is captured in mode specific data. + * If drvdata::sysfs_data is NULL the trace data has been read already. + */ + if (!drvdata->sysfs_buf) { ret = -EINVAL; goto out; } - /* If drvdata::buf is NULL the trace data has been read already */ - if (drvdata->buf == NULL) { - ret = -EINVAL; - goto out; - } - - /* Disable the TMC if need be */ + /* Disable the TMC if we are trying to read from a running session. */ if (drvdata->mode == CS_MODE_SYSFS) - tmc_etr_disable_hw(drvdata); + __tmc_etr_disable_hw(drvdata); drvdata->reading = true; out: @@ -305,8 +1712,7 @@ out: int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) { unsigned long flags; - dma_addr_t paddr; - void __iomem *vaddr = NULL; + struct etr_buf *sysfs_buf = NULL; /* config types are set a boot time and never change */ if (WARN_ON_ONCE(drvdata->config_type != TMC_CONFIG_TYPE_ETR)) @@ -318,27 +1724,25 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata) if (drvdata->mode == CS_MODE_SYSFS) { /* * The trace run will continue with the same allocated trace - * buffer. The trace buffer is cleared in tmc_etr_enable_hw(), - * so we don't have to explicitly clear it. Also, since the - * tracer is still enabled drvdata::buf can't be NULL. + * buffer. Since the tracer is still enabled drvdata::buf can't + * be NULL. */ - tmc_etr_enable_hw(drvdata); + __tmc_etr_enable_hw(drvdata); } else { /* * The ETR is not tracing and the buffer was just read. * As such prepare to free the trace buffer. */ - vaddr = drvdata->vaddr; - paddr = drvdata->paddr; - drvdata->buf = drvdata->vaddr = NULL; + sysfs_buf = drvdata->sysfs_buf; + drvdata->sysfs_buf = NULL; } drvdata->reading = false; spin_unlock_irqrestore(&drvdata->spinlock, flags); /* Free allocated memory out side of the spinlock */ - if (vaddr) - dma_free_coherent(drvdata->dev, drvdata->size, vaddr, paddr); + if (sysfs_buf) + tmc_etr_free_sysfs_buf(sysfs_buf); return 0; } diff --git a/drivers/hwtracing/coresight/coresight-tmc.c b/drivers/hwtracing/coresight/coresight-tmc.c index 2ff4a66a3caa..877348cf7d64 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.c +++ b/drivers/hwtracing/coresight/coresight-tmc.c @@ -16,10 +16,13 @@ #include #include #include +#include #include #include #include #include +#include +#include #include #include #include @@ -71,6 +74,34 @@ void tmc_disable_hw(struct tmc_drvdata *drvdata) writel_relaxed(0x0, drvdata->base + TMC_CTL); } +u32 tmc_get_memwidth_mask(struct tmc_drvdata *drvdata) +{ + u32 mask = 0; + + /* + * When moving RRP or an offset address forward, the new values must + * be byte-address aligned to the width of the trace memory databus + * _and_ to a frame boundary (16 byte), whichever is the biggest. For + * example, for 32-bit, 64-bit and 128-bit wide trace memory, the four + * LSBs must be 0s. For 256-bit wide trace memory, the five LSBs must + * be 0s. + */ + switch (drvdata->memwidth) { + case TMC_MEM_INTF_WIDTH_32BITS: + /* fallthrough */ + case TMC_MEM_INTF_WIDTH_64BITS: + /* fallthrough */ + case TMC_MEM_INTF_WIDTH_128BITS: + mask = GENMASK(31, 4); + break; + case TMC_MEM_INTF_WIDTH_256BITS: + mask = GENMASK(31, 5); + break; + } + + return mask; +} + static int tmc_read_prepare(struct tmc_drvdata *drvdata) { int ret = 0; @@ -88,7 +119,7 @@ static int tmc_read_prepare(struct tmc_drvdata *drvdata) } if (!ret) - dev_info(drvdata->dev, "TMC read start\n"); + dev_dbg(drvdata->dev, "TMC read start\n"); return ret; } @@ -110,7 +141,7 @@ static int tmc_read_unprepare(struct tmc_drvdata *drvdata) } if (!ret) - dev_info(drvdata->dev, "TMC read end\n"); + dev_dbg(drvdata->dev, "TMC read end\n"); return ret; } @@ -131,35 +162,40 @@ static int tmc_open(struct inode *inode, struct file *file) return 0; } +static inline ssize_t tmc_get_sysfs_trace(struct tmc_drvdata *drvdata, + loff_t pos, size_t len, char **bufpp) +{ + switch (drvdata->config_type) { + case TMC_CONFIG_TYPE_ETB: + case TMC_CONFIG_TYPE_ETF: + return tmc_etb_get_sysfs_trace(drvdata, pos, len, bufpp); + case TMC_CONFIG_TYPE_ETR: + return tmc_etr_get_sysfs_trace(drvdata, pos, len, bufpp); + } + + return -EINVAL; +} + static ssize_t tmc_read(struct file *file, char __user *data, size_t len, loff_t *ppos) { + char *bufp; + ssize_t actual; struct tmc_drvdata *drvdata = container_of(file->private_data, struct tmc_drvdata, miscdev); - char *bufp = drvdata->buf + *ppos; + actual = tmc_get_sysfs_trace(drvdata, *ppos, len, &bufp); + if (actual <= 0) + return 0; - if (*ppos + len > drvdata->len) - len = drvdata->len - *ppos; - - if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { - if (bufp == (char *)(drvdata->vaddr + drvdata->size)) - bufp = drvdata->vaddr; - else if (bufp > (char *)(drvdata->vaddr + drvdata->size)) - bufp -= drvdata->size; - if ((bufp + len) > (char *)(drvdata->vaddr + drvdata->size)) - len = (char *)(drvdata->vaddr + drvdata->size) - bufp; - } - - if (copy_to_user(data, bufp, len)) { + if (copy_to_user(data, bufp, actual)) { dev_dbg(drvdata->dev, "%s: copy_to_user failed\n", __func__); return -EFAULT; } - *ppos += len; + *ppos += actual; + dev_dbg(drvdata->dev, "%zu bytes copied\n", actual); - dev_dbg(drvdata->dev, "%s: %zu bytes copied, %d bytes left\n", - __func__, len, (int)(drvdata->len - *ppos)); - return len; + return actual; } static int tmc_release(struct inode *inode, struct file *file) @@ -231,6 +267,7 @@ coresight_tmc_reg(ffcr, TMC_FFCR); coresight_tmc_reg(mode, TMC_MODE); coresight_tmc_reg(pscr, TMC_PSCR); coresight_tmc_reg(axictl, TMC_AXICTL); +coresight_tmc_reg(authstatus, TMC_AUTHSTATUS); coresight_tmc_reg(devid, CORESIGHT_DEVID); coresight_tmc_reg64(rrp, TMC_RRP, TMC_RRPHI); coresight_tmc_reg64(rwp, TMC_RWP, TMC_RWPHI); @@ -250,6 +287,7 @@ static struct attribute *coresight_tmc_mgmt_attrs[] = { &dev_attr_devid.attr, &dev_attr_dba.attr, &dev_attr_axictl.attr, + &dev_attr_authstatus.attr, NULL, }; @@ -279,8 +317,41 @@ static ssize_t trigger_cntr_store(struct device *dev, } static DEVICE_ATTR_RW(trigger_cntr); +static ssize_t buffer_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent); + + return sprintf(buf, "%#x\n", drvdata->size); +} + +static ssize_t buffer_size_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + int ret; + unsigned long val; + struct tmc_drvdata *drvdata = dev_get_drvdata(dev->parent); + + /* Only permitted for TMC-ETRs */ + if (drvdata->config_type != TMC_CONFIG_TYPE_ETR) + return -EPERM; + + ret = kstrtoul(buf, 0, &val); + if (ret) + return ret; + /* The buffer size should be page aligned */ + if (val & (PAGE_SIZE - 1)) + return -EINVAL; + drvdata->size = val; + return size; +} + +static DEVICE_ATTR_RW(buffer_size); + static struct attribute *coresight_tmc_attrs[] = { &dev_attr_trigger_cntr.attr, + &dev_attr_buffer_size.attr, NULL, }; @@ -299,16 +370,34 @@ const struct attribute_group *coresight_tmc_groups[] = { NULL, }; +static inline bool tmc_etr_can_use_sg(struct tmc_drvdata *drvdata) +{ + return fwnode_property_present(drvdata->dev->fwnode, + "arm,scatter-gather"); +} + +static inline bool tmc_etr_has_non_secure_access(struct tmc_drvdata *drvdata) +{ + u32 auth = readl_relaxed(drvdata->base + TMC_AUTHSTATUS); + + return (auth & TMC_AUTH_NSID_MASK) == 0x3; +} + /* Detect and initialise the capabilities of a TMC ETR */ static int tmc_etr_setup_caps(struct tmc_drvdata *drvdata, u32 devid, void *dev_caps) { + int rc; + u32 dma_mask = 0; + if (!tmc_etr_has_non_secure_access(drvdata)) + return -EACCES; + /* Set the unadvertised capabilities */ tmc_etr_init_caps(drvdata, (u32)(unsigned long)dev_caps); - if (!(devid & TMC_DEVID_NOSCAT)) + if (!(devid & TMC_DEVID_NOSCAT) && tmc_etr_can_use_sg(drvdata)) tmc_etr_set_cap(drvdata, TMC_ETR_SG); /* Check if the AXI address width is available */ @@ -332,7 +421,10 @@ static int tmc_etr_setup_caps(struct tmc_drvdata *drvdata, dma_mask = 40; } - return dma_set_mask_and_coherent(drvdata->dev, DMA_BIT_MASK(dma_mask)); + rc = dma_set_mask_and_coherent(drvdata->dev, DMA_BIT_MASK(dma_mask)); + if (rc) + dev_err(drvdata->dev, "Failed to setup DMA mask: %d\n", rc); + return rc; } static int tmc_probe(struct amba_device *adev, const struct amba_id *id) @@ -378,6 +470,8 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) devid = readl_relaxed(drvdata->base + CORESIGHT_DEVID); drvdata->config_type = BMVAL(devid, 6, 7); drvdata->memwidth = tmc_get_memwidth(devid); + /* This device is not associated with a session */ + drvdata->pid = -1; if (drvdata->config_type == TMC_CONFIG_TYPE_ETR) { if (np) @@ -390,8 +484,6 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) drvdata->size = readl_relaxed(drvdata->base + TMC_RSZ) * 4; } - pm_runtime_put(&adev->dev); - desc.pdata = pdata; desc.dev = dev; desc.groups = coresight_tmc_groups; @@ -409,6 +501,8 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) ret = tmc_etr_setup_caps(drvdata, devid, id->data); if (ret) goto out; + idr_init(&drvdata->idr); + mutex_init(&drvdata->idr_mutex); break; case TMC_CONFIG_TYPE_ETF: desc.type = CORESIGHT_DEV_TYPE_LINKSINK; @@ -433,6 +527,8 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id) ret = misc_register(&drvdata->miscdev); if (ret) coresight_unregister(drvdata->csdev); + else + pm_runtime_put(&adev->dev); out: return ret; } diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h index 8df7a813f537..13bf55179766 100644 --- a/drivers/hwtracing/coresight/coresight-tmc.h +++ b/drivers/hwtracing/coresight/coresight-tmc.h @@ -18,7 +18,11 @@ #ifndef _CORESIGHT_TMC_H #define _CORESIGHT_TMC_H +#include +#include #include +#include +#include #define TMC_RSZ 0x004 #define TMC_STS 0x00c @@ -46,6 +50,7 @@ #define TMC_ITATBCTR2 0xef0 #define TMC_ITATBCTR1 0xef4 #define TMC_ITATBCTR0 0xef8 +#define TMC_AUTHSTATUS 0xfb8 /* register description */ /* TMC_CTL - 0x020 */ @@ -54,6 +59,7 @@ #define TMC_STS_TMCREADY_BIT 2 #define TMC_STS_FULL BIT(0) #define TMC_STS_TRIGGERED BIT(1) +#define TMC_STS_MEMERR BIT(5) /* * TMC_AXICTL - 0x110 * @@ -96,6 +102,8 @@ #define TMC_DEVID_AXIAW_SHIFT 17 #define TMC_DEVID_AXIAW_MASK 0x7f +#define TMC_AUTH_NSID_MASK GENMASK(1, 0) + enum tmc_config_type { TMC_CONFIG_TYPE_ETB, TMC_CONFIG_TYPE_ETR, @@ -133,6 +141,38 @@ enum tmc_mem_intf_width { #define CORESIGHT_SOC_600_ETR_CAPS \ (TMC_ETR_SAVE_RESTORE | TMC_ETR_AXI_ARCACHE) +enum etr_mode { + ETR_MODE_FLAT, /* Uses contiguous flat buffer */ + ETR_MODE_ETR_SG, /* Uses in-built TMC ETR SG mechanism */ + ETR_MODE_CATU, /* Use SG mechanism in CATU */ +}; + +struct etr_buf_operations; + +/** + * struct etr_buf - Details of the buffer used by ETR + * refcount ; Number of sources currently using this etr_buf. + * @mode : Mode of the ETR buffer, contiguous, Scatter Gather etc. + * @full : Trace data overflow + * @size : Size of the buffer. + * @hwaddr : Address to be programmed in the TMC:DBA{LO,HI} + * @offset : Offset of the trace data in the buffer for consumption. + * @len : Available trace data @buf (may round up to the beginning). + * @ops : ETR buffer operations for the mode. + * @private : Backend specific information for the buf + */ +struct etr_buf { + refcount_t refcount; + enum etr_mode mode; + bool full; + ssize_t size; + dma_addr_t hwaddr; + unsigned long offset; + s64 len; + const struct etr_buf_operations *ops; + void *private; +}; + /** * struct tmc_drvdata - specifics associated to an TMC component * @base: memory mapped base address for this component. @@ -140,17 +180,22 @@ enum tmc_mem_intf_width { * @csdev: component vitals needed by the framework. * @miscdev: specifics to handle "/dev/xyz.tmc" entry. * @spinlock: only one at a time pls. - * @buf: area of memory where trace data get sent. - * @paddr: DMA start location in RAM. - * @vaddr: virtual representation of @paddr. - * @size: trace buffer size. - * @len: size of the available trace. + * @pid: Process ID of the process being monitored by the session + * that is using this component. + * @buf: Snapshot of the trace data for ETF/ETB. + * @etr_buf: details of buffer used in TMC-ETR + * @len: size of the available trace for ETF/ETB. + * @size: trace buffer size for this TMC (common for all modes). * @mode: how this TMC is being used. * @config_type: TMC variant, must be of type @tmc_config_type. * @memwidth: width of the memory interface databus, in bytes. * @trigger_cntr: amount of words to store after a trigger. * @etr_caps: Bitmask of capabilities of the TMC ETR, inferred from the * device configuration register (DEVID) + * @idr: Holds etr_bufs allocated for this ETR. + * @idr_mutex: Access serialisation for idr. + * @sysfs_buf: SYSFS buffer for ETR. + * @perf_buf: PERF buffer for ETR. */ struct tmc_drvdata { void __iomem *base; @@ -158,17 +203,64 @@ struct tmc_drvdata { struct coresight_device *csdev; struct miscdevice miscdev; spinlock_t spinlock; + pid_t pid; bool reading; - char *buf; - dma_addr_t paddr; - void __iomem *vaddr; - u32 size; + union { + char *buf; /* TMC ETB */ + struct etr_buf *etr_buf; /* TMC ETR */ + }; u32 len; + u32 size; u32 mode; enum tmc_config_type config_type; enum tmc_mem_intf_width memwidth; u32 trigger_cntr; u32 etr_caps; + struct idr idr; + struct mutex idr_mutex; + struct etr_buf *sysfs_buf; + struct etr_buf *perf_buf; +}; + +struct etr_buf_operations { + int (*alloc)(struct tmc_drvdata *drvdata, struct etr_buf *etr_buf, + int node, void **pages); + void (*sync)(struct etr_buf *etr_buf, u64 rrp, u64 rwp); + ssize_t (*get_data)(struct etr_buf *etr_buf, u64 offset, size_t len, + char **bufpp); + void (*free)(struct etr_buf *etr_buf); +}; + +/** + * struct tmc_pages - Collection of pages used for SG. + * @nr_pages: Number of pages in the list. + * @daddrs: Array of DMA'able page address. + * @pages: Array pages for the buffer. + */ +struct tmc_pages { + int nr_pages; + dma_addr_t *daddrs; + struct page **pages; +}; + +/* + * struct tmc_sg_table - Generic SG table for TMC + * @dev: Device for DMA allocations + * @table_vaddr: Contiguous Virtual address for PageTable + * @data_vaddr: Contiguous Virtual address for Data Buffer + * @table_daddr: DMA address of the PageTable base + * @node: Node for Page allocations + * @table_pages: List of pages & dma address for Table + * @data_pages: List of pages & dma address for Data + */ +struct tmc_sg_table { + struct device *dev; + void *table_vaddr; + void *data_vaddr; + dma_addr_t table_daddr; + int node; + struct tmc_pages table_pages; + struct tmc_pages data_pages; }; /* Generic functions */ @@ -176,6 +268,7 @@ void tmc_wait_for_tmcready(struct tmc_drvdata *drvdata); void tmc_flush_and_stop(struct tmc_drvdata *drvdata); void tmc_enable_hw(struct tmc_drvdata *drvdata); void tmc_disable_hw(struct tmc_drvdata *drvdata); +u32 tmc_get_memwidth_mask(struct tmc_drvdata *drvdata); /* ETB/ETF functions */ int tmc_read_prepare_etb(struct tmc_drvdata *drvdata); @@ -183,10 +276,14 @@ int tmc_read_unprepare_etb(struct tmc_drvdata *drvdata); extern const struct coresight_ops tmc_etb_cs_ops; extern const struct coresight_ops tmc_etf_cs_ops; +ssize_t tmc_etb_get_sysfs_trace(struct tmc_drvdata *drvdata, + loff_t pos, size_t len, char **bufpp); /* ETR functions */ int tmc_read_prepare_etr(struct tmc_drvdata *drvdata); int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata); extern const struct coresight_ops tmc_etr_cs_ops; +ssize_t tmc_etr_get_sysfs_trace(struct tmc_drvdata *drvdata, + loff_t pos, size_t len, char **bufpp); #define TMC_REG_PAIR(name, lo_off, hi_off) \ @@ -222,4 +319,23 @@ static inline bool tmc_etr_has_cap(struct tmc_drvdata *drvdata, u32 cap) return !!(drvdata->etr_caps & cap); } +struct tmc_sg_table *tmc_alloc_sg_table(struct device *dev, + int node, + int nr_tpages, + int nr_dpages, + void **pages); +void tmc_free_sg_table(struct tmc_sg_table *sg_table); +void tmc_sg_table_sync_table(struct tmc_sg_table *sg_table); +void tmc_sg_table_sync_data_range(struct tmc_sg_table *table, + u64 offset, u64 size); +ssize_t tmc_sg_table_get_data(struct tmc_sg_table *sg_table, + u64 offset, size_t len, char **bufpp); +static inline unsigned long +tmc_sg_table_buf_size(struct tmc_sg_table *sg_table) +{ + return sg_table->data_pages.nr_pages << PAGE_SHIFT; +} + +struct coresight_device *tmc_etr_get_catu_device(struct tmc_drvdata *drvdata); + #endif diff --git a/drivers/hwtracing/coresight/coresight-tpiu.c b/drivers/hwtracing/coresight/coresight-tpiu.c index 15dd01f8c197..0a55fd3b0efa 100644 --- a/drivers/hwtracing/coresight/coresight-tpiu.c +++ b/drivers/hwtracing/coresight/coresight-tpiu.c @@ -12,6 +12,7 @@ * GNU General Public License for more details. */ +#include #include #include #include @@ -75,13 +76,13 @@ static void tpiu_enable_hw(struct tpiu_drvdata *drvdata) CS_LOCK(drvdata->base); } -static int tpiu_enable(struct coresight_device *csdev, u32 mode) +static int tpiu_enable(struct coresight_device *csdev, u32 mode, void *__unused) { struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); tpiu_enable_hw(drvdata); - - dev_info(drvdata->dev, "TPIU enabled\n"); + atomic_inc(csdev->refcnt); + dev_dbg(drvdata->dev, "TPIU enabled\n"); return 0; } @@ -101,13 +102,17 @@ static void tpiu_disable_hw(struct tpiu_drvdata *drvdata) CS_LOCK(drvdata->base); } -static void tpiu_disable(struct coresight_device *csdev) +static int tpiu_disable(struct coresight_device *csdev) { struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); + if (atomic_dec_return(csdev->refcnt)) + return -EBUSY; + tpiu_disable_hw(drvdata); - dev_info(drvdata->dev, "TPIU disabled\n"); + dev_dbg(drvdata->dev, "TPIU disabled\n"); + return 0; } static const struct coresight_ops_sink tpiu_sink_ops = { @@ -168,10 +173,8 @@ static int tpiu_probe(struct amba_device *adev, const struct amba_id *id) desc.pdata = pdata; desc.dev = dev; drvdata->csdev = coresight_register(&desc); - if (IS_ERR(drvdata->csdev)) - return PTR_ERR(drvdata->csdev); - return 0; + return PTR_ERR_OR_ZERO(drvdata->csdev); } #ifdef CONFIG_PM diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c index e571e4010dff..65ea87e51c46 100644 --- a/drivers/hwtracing/coresight/coresight.c +++ b/drivers/hwtracing/coresight/coresight.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,7 @@ #include #include +#include "coresight-etm-perf.h" #include "coresight-priv.h" static DEFINE_MUTEX(coresight_mutex); @@ -58,8 +60,7 @@ static struct list_head *stm_path; * beginning of the data collected in a buffer. That way the decoder knows that * it needs to look for another sync sequence. */ -const u32 barrier_pkt[5] = {0x7fffffff, 0x7fffffff, - 0x7fffffff, 0x7fffffff, 0x0}; +const u32 barrier_pkt[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; static int coresight_id_match(struct device *dev, void *data) { @@ -136,41 +137,132 @@ static int coresight_find_link_outport(struct coresight_device *csdev, return -ENODEV; } -static int coresight_enable_sink(struct coresight_device *csdev, u32 mode) +static inline u32 coresight_read_claim_tags(void __iomem *base) +{ + return readl_relaxed(base + CORESIGHT_CLAIMCLR); +} + +static inline bool coresight_is_claimed_self_hosted(void __iomem *base) +{ + return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED; +} + +static inline bool coresight_is_claimed_any(void __iomem *base) +{ + return coresight_read_claim_tags(base) != 0; +} + +static inline void coresight_set_claim_tags(void __iomem *base) +{ + writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMSET); + isb(); +} + +static inline void coresight_clear_claim_tags(void __iomem *base) +{ + writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMCLR); + isb(); +} + +/* + * coresight_claim_device_unlocked : Claim the device for self-hosted usage + * to prevent an external tool from touching this device. As per PSCI + * standards, section "Preserving the execution context" => "Debug and Trace + * save and Restore", DBGCLAIM[1] is reserved for Self-hosted debug/trace and + * DBGCLAIM[0] is reserved for external tools. + * + * Called with CS_UNLOCKed for the component. + * Returns : 0 on success + */ +int coresight_claim_device_unlocked(void __iomem *base) +{ + if (coresight_is_claimed_any(base)) + return -EBUSY; + + coresight_set_claim_tags(base); + if (coresight_is_claimed_self_hosted(base)) + return 0; + /* There was a race setting the tags, clean up and fail */ + coresight_clear_claim_tags(base); + return -EBUSY; +} + +int coresight_claim_device(void __iomem *base) +{ + int rc; + + CS_UNLOCK(base); + rc = coresight_claim_device_unlocked(base); + CS_LOCK(base); + + return rc; +} + +/* + * coresight_disclaim_device_unlocked : Clear the claim tags for the device. + * Called with CS_UNLOCKed for the component. + */ +void coresight_disclaim_device_unlocked(void __iomem *base) +{ + + if (coresight_is_claimed_self_hosted(base)) + coresight_clear_claim_tags(base); + else + /* + * The external agent may have not honoured our claim + * and has manipulated it. Or something else has seriously + * gone wrong in our driver. + */ + WARN_ON_ONCE(1); +} + +void coresight_disclaim_device(void __iomem *base) +{ + CS_UNLOCK(base); + coresight_disclaim_device_unlocked(base); + CS_LOCK(base); +} + +static int coresight_enable_sink(struct coresight_device *csdev, + u32 mode, void *data) { int ret; - if (!csdev->enable) { - if (sink_ops(csdev)->enable) { - ret = sink_ops(csdev)->enable(csdev, mode); - if (ret) - return ret; - } - csdev->enable = true; - } + /* + * We need to make sure the "new" session is compatible with the + * existing "mode" of operation. + */ + if (!sink_ops(csdev)->enable) + return -EINVAL; - atomic_inc(csdev->refcnt); + ret = sink_ops(csdev)->enable(csdev, mode, data); + if (ret) + return ret; + csdev->enable = true; return 0; } static void coresight_disable_sink(struct coresight_device *csdev) { - if (atomic_dec_return(csdev->refcnt) == 0) { - if (sink_ops(csdev)->disable) { - sink_ops(csdev)->disable(csdev); - csdev->enable = false; - } - } + int ret; + + if (!sink_ops(csdev)->disable) + return; + + ret = sink_ops(csdev)->disable(csdev); + if (ret) + return; + csdev->enable = false; } static int coresight_enable_link(struct coresight_device *csdev, struct coresight_device *parent, struct coresight_device *child) { - int ret; + int ret = 0; int link_subtype; - int refport, inport, outport; + int inport, outport; if (!parent || !child) return -EINVAL; @@ -179,27 +271,17 @@ static int coresight_enable_link(struct coresight_device *csdev, outport = coresight_find_link_outport(csdev, child); link_subtype = csdev->subtype.link_subtype; - if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) - refport = inport; - else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) - refport = outport; - else - refport = 0; + if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG && inport < 0) + return inport; + if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT && outport < 0) + return outport; - if (refport < 0) - return refport; + if (link_ops(csdev)->enable) + ret = link_ops(csdev)->enable(csdev, inport, outport); + if (!ret) + csdev->enable = true; - if (atomic_inc_return(&csdev->refcnt[refport]) == 1) { - if (link_ops(csdev)->enable) { - ret = link_ops(csdev)->enable(csdev, inport, outport); - if (ret) - return ret; - } - } - - csdev->enable = true; - - return 0; + return ret; } static void coresight_disable_link(struct coresight_device *csdev, @@ -208,7 +290,7 @@ static void coresight_disable_link(struct coresight_device *csdev, { int i, nr_conns; int link_subtype; - int refport, inport, outport; + int inport, outport; if (!parent || !child) return; @@ -218,20 +300,15 @@ static void coresight_disable_link(struct coresight_device *csdev, link_subtype = csdev->subtype.link_subtype; if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) { - refport = inport; nr_conns = csdev->nr_inport; } else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) { - refport = outport; nr_conns = csdev->nr_outport; } else { - refport = 0; nr_conns = 1; } - if (atomic_dec_return(&csdev->refcnt[refport]) == 0) { - if (link_ops(csdev)->disable) - link_ops(csdev)->disable(csdev, inport, outport); - } + if (link_ops(csdev)->disable) + link_ops(csdev)->disable(csdev, inport, outport); for (i = 0; i < nr_conns; i++) if (atomic_read(&csdev->refcnt[i]) != 0) @@ -282,13 +359,21 @@ static bool coresight_disable_source(struct coresight_device *csdev) return !csdev->enable; } -void coresight_disable_path(struct list_head *path) +/* + * coresight_disable_path_from : Disable components in the given path beyond + * @nd in the list. If @nd is NULL, all the components, except the SOURCE are + * disabled. + */ +static void coresight_disable_path_from(struct list_head *path, + struct coresight_node *nd) { u32 type; - struct coresight_node *nd; struct coresight_device *csdev, *parent, *child; - list_for_each_entry(nd, path, link) { + if (!nd) + nd = list_first_entry(path, struct coresight_node, link); + + list_for_each_entry_continue(nd, path, link) { csdev = nd->csdev; type = csdev->type; @@ -308,7 +393,12 @@ void coresight_disable_path(struct list_head *path) coresight_disable_sink(csdev); break; case CORESIGHT_DEV_TYPE_SOURCE: - /* sources are disabled from either sysFS or Perf */ + /* + * We skip the first node in the path assuming that it + * is the source. So we don't expect a source device in + * the middle of a path. + */ + WARN_ON(1); break; case CORESIGHT_DEV_TYPE_LINK: parent = list_prev_entry(nd, link)->csdev; @@ -321,7 +411,12 @@ void coresight_disable_path(struct list_head *path) } } -int coresight_enable_path(struct list_head *path, u32 mode) +void coresight_disable_path(struct list_head *path) +{ + coresight_disable_path_from(path, NULL); +} + +int coresight_enable_path(struct list_head *path, u32 mode, void *sink_data) { int ret = 0; @@ -346,9 +441,15 @@ int coresight_enable_path(struct list_head *path, u32 mode) switch (type) { case CORESIGHT_DEV_TYPE_SINK: - ret = coresight_enable_sink(csdev, mode); + ret = coresight_enable_sink(csdev, mode, sink_data); + /* + * Sink is the first component turned on. If we + * failed to enable the sink, there are no components + * that need disabling. Disabling the path here + * would mean we could disrupt an existing session. + */ if (ret) - goto err; + goto out; break; case CORESIGHT_DEV_TYPE_SOURCE: /* sources are enabled from either sysFS or Perf */ @@ -368,7 +469,7 @@ int coresight_enable_path(struct list_head *path, u32 mode) out: return ret; err: - coresight_disable_path(path); + coresight_disable_path_from(path, nd); goto out; } @@ -433,6 +534,83 @@ struct coresight_device *coresight_get_enabled_sink(bool deactivate) return dev ? to_coresight_device(dev) : NULL; } +static int coresight_sink_by_id(struct device *dev, void *data) +{ + struct coresight_device *csdev = to_coresight_device(dev); + unsigned long hash; + + if (csdev->type == CORESIGHT_DEV_TYPE_SINK || + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) { + + if (!csdev->ea) + return 0; + /* + * See function etm_perf_add_symlink_sink() to know where + * this comes from. + */ + hash = (unsigned long)csdev->ea->var; + + if ((u32)hash == *(u32 *)data) + return 1; + } + + return 0; +} + +/** + * coresight_get_sink_by_id - returns the sink that matches the id + * @id: Id of the sink to match + * + * The name of a sink is unique, whether it is found on the AMBA bus or + * otherwise. As such the hash of that name can easily be used to identify + * a sink. + */ +struct coresight_device *coresight_get_sink_by_id(u32 id) +{ + struct device *dev = NULL; + + dev = bus_find_device(&coresight_bustype, NULL, &id, + coresight_sink_by_id); + + return dev ? to_coresight_device(dev) : NULL; +} + +/* + * coresight_grab_device - Power up this device and any of the helper + * devices connected to it for trace operation. Since the helper devices + * don't appear on the trace path, they should be handled along with the + * the master device. + */ +static void coresight_grab_device(struct coresight_device *csdev) +{ + int i; + + for (i = 0; i < csdev->nr_outport; i++) { + struct coresight_device *child = csdev->conns[i].child_dev; + + if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) + pm_runtime_get_sync(child->dev.parent); + } + pm_runtime_get_sync(csdev->dev.parent); +} + +/* + * coresight_drop_device - Release this device and any of the helper + * devices connected to it. + */ +static void coresight_drop_device(struct coresight_device *csdev) +{ + int i; + + pm_runtime_put(csdev->dev.parent); + for (i = 0; i < csdev->nr_outport; i++) { + struct coresight_device *child = csdev->conns[i].child_dev; + + if (child && child->type == CORESIGHT_DEV_TYPE_HELPER) + pm_runtime_put(child->dev.parent); + } +} + /** * _coresight_build_path - recursively build a path from a @csdev to a sink. * @csdev: The device to start from. @@ -481,9 +659,9 @@ out: if (!node) return -ENOMEM; + coresight_grab_device(csdev); node->csdev = csdev; list_add(&node->link, path); - pm_runtime_get_sync(csdev->dev.parent); return 0; } @@ -527,7 +705,7 @@ void coresight_release_path(struct list_head *path) list_for_each_entry_safe(nd, next, path, link) { csdev = nd->csdev; - pm_runtime_put_sync(csdev->dev.parent); + coresight_drop_device(csdev); list_del(&nd->link); kfree(nd); } @@ -607,7 +785,7 @@ int coresight_enable(struct coresight_device *csdev) goto out; } - ret = coresight_enable_path(path, CS_MODE_SYSFS); + ret = coresight_enable_path(path, CS_MODE_SYSFS, NULL); if (ret) goto err_path; @@ -778,13 +956,15 @@ static struct device_type coresight_dev_type[] = { .name = "source", .groups = coresight_source_groups, }, + { + .name = "helper", + }, }; static void coresight_device_release(struct device *dev) { struct coresight_device *csdev = to_coresight_device(dev); - kfree(csdev->conns); kfree(csdev->refcnt); kfree(csdev); } @@ -990,7 +1170,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) csdev = kzalloc(sizeof(*csdev), GFP_KERNEL); if (!csdev) { ret = -ENOMEM; - goto err_kzalloc_csdev; + goto err_out; } if (desc->type == CORESIGHT_DEV_TYPE_LINK || @@ -1006,7 +1186,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL); if (!refcnts) { ret = -ENOMEM; - goto err_kzalloc_refcnts; + goto err_free_csdev; } csdev->refcnt = refcnts; @@ -1019,7 +1199,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL); if (!conns) { ret = -ENOMEM; - goto err_kzalloc_conns; + goto err_free_refcnts; } for (i = 0; i < csdev->nr_outport; i++) { @@ -1044,8 +1224,30 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) dev_set_name(&csdev->dev, "%s", desc->pdata->name); ret = device_register(&csdev->dev); - if (ret) - goto err_device_register; + if (ret) { + put_device(&csdev->dev); + /* + * All resources are free'd explicitly via + * coresight_device_release(), triggered from put_device(). + */ + goto err_out; + } + + if (csdev->type == CORESIGHT_DEV_TYPE_SINK || + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) { + ret = etm_perf_add_symlink_sink(csdev); + + if (ret) { + device_unregister(&csdev->dev); + /* + * As with the above, all resources are free'd + * explicitly via coresight_device_release() triggered + * from put_device(), which is in turn called from + * function device_unregister(). + */ + goto err_out; + } + } mutex_lock(&coresight_mutex); @@ -1056,21 +1258,26 @@ struct coresight_device *coresight_register(struct coresight_desc *desc) return csdev; -err_device_register: - kfree(conns); -err_kzalloc_conns: +err_free_refcnts: kfree(refcnts); -err_kzalloc_refcnts: +err_free_csdev: kfree(csdev); -err_kzalloc_csdev: +err_out: return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(coresight_register); void coresight_unregister(struct coresight_device *csdev) { + etm_perf_del_symlink_sink(csdev); /* Remove references of that device in the topology */ coresight_remove_conns(csdev); device_unregister(&csdev->dev); } EXPORT_SYMBOL_GPL(coresight_unregister); + +bool coresight_loses_context_with_cpu(struct device *dev) +{ + return fwnode_property_present(dev_fwnode(dev), + "arm,coresight-loses-context-with-cpu"); +} diff --git a/drivers/hwtracing/coresight/of_coresight.c b/drivers/hwtracing/coresight/of_coresight.c index a18794128bf8..f6941fda9777 100644 --- a/drivers/hwtracing/coresight/of_coresight.c +++ b/drivers/hwtracing/coresight/of_coresight.c @@ -127,17 +127,79 @@ int of_coresight_get_cpu(const struct device_node *node) } EXPORT_SYMBOL_GPL(of_coresight_get_cpu); +/* + * of_coresight_parse_endpoint : Parse the given output endpoint @ep + * and fill the connection information in @pdata[@i]. + * + * Parses the local port, remote device name and the remote port. + * + * Returns : + * 1 - If the parsing is successful and a connection record + * was created for an output connection. + * 0 - If the parsing completed without any fatal errors. + * -Errno - Fatal error, abort the scanning. + */ +static int of_coresight_parse_endpoint(struct device *dev, + struct device_node *ep, + struct coresight_platform_data *pdata, + int i) +{ + int ret = 0; + struct of_endpoint endpoint, rendpoint; + struct device_node *rparent = NULL; + struct device_node *rep = NULL; + struct device *rdev = NULL; + + do { + /* Parse the local port details */ + if (of_graph_parse_endpoint(ep, &endpoint)) + break; + /* + * Get a handle on the remote endpoint and the device it is + * attached to. + */ + rep = of_graph_get_remote_endpoint(ep); + if (!rep) + break; + rparent = of_graph_get_port_parent(rep); + if (!rparent) + break; + if (of_graph_parse_endpoint(rep, &rendpoint)) + break; + + /* If the remote device is not available, defer probing */ + rdev = of_coresight_get_endpoint_device(rparent); + if (!rdev) { + ret = -EPROBE_DEFER; + break; + } + + pdata->outports[i] = endpoint.port; + pdata->child_names[i] = devm_kstrdup(dev, + dev_name(rdev), + GFP_KERNEL); + pdata->child_ports[i] = rendpoint.port; + /* Connection record updated */ + ret = 1; + } while (0); + + if (rparent) + of_node_put(rparent); + if (rep) + of_node_put(rep); + if (rdev) + put_device(rdev); + + return ret; +} + struct coresight_platform_data * of_get_coresight_platform_data(struct device *dev, const struct device_node *node) { int i = 0, ret = 0; struct coresight_platform_data *pdata; - struct of_endpoint endpoint, rendpoint; - struct device *rdev; struct device_node *ep = NULL; - struct device_node *rparent = NULL; - struct device_node *rport = NULL; pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) @@ -145,64 +207,39 @@ of_get_coresight_platform_data(struct device *dev, /* Use device name as sysfs handle */ pdata->name = dev_name(dev); + pdata->cpu = of_coresight_get_cpu(node); /* Get the number of input and output port for this component */ of_coresight_get_ports(node, &pdata->nr_inport, &pdata->nr_outport); - if (pdata->nr_outport) { - ret = of_coresight_alloc_memory(dev, pdata); - if (ret) + /* If there are no output connections, we are done */ + if (!pdata->nr_outport) + return pdata; + + ret = of_coresight_alloc_memory(dev, pdata); + if (ret) + return ERR_PTR(ret); + + /* Iterate through each port to discover topology */ + while ((ep = of_graph_get_next_endpoint(node, ep))) { + /* + * No need to deal with input ports, as processing the + * output ports connected to them will process the details. + */ + if (of_find_property(ep, "slave-mode", NULL)) + continue; + + ret = of_coresight_parse_endpoint(dev, ep, pdata, i); + switch (ret) { + case 1: + i++; /* Fall through */ + case 0: + break; + default: return ERR_PTR(ret); - - /* Iterate through each port to discover topology */ - do { - /* Get a handle on a port */ - ep = of_graph_get_next_endpoint(node, ep); - if (!ep) - break; - - /* - * No need to deal with input ports, processing for as - * processing for output ports will deal with them. - */ - if (of_find_property(ep, "slave-mode", NULL)) - continue; - - /* Get a handle on the local endpoint */ - ret = of_graph_parse_endpoint(ep, &endpoint); - - if (ret) - continue; - - /* The local out port number */ - pdata->outports[i] = endpoint.port; - - /* - * Get a handle on the remote port and parent - * attached to it. - */ - rparent = of_graph_get_remote_port_parent(ep); - rport = of_graph_get_remote_port(ep); - - if (!rparent || !rport) - continue; - - if (of_graph_parse_endpoint(rport, &rendpoint)) - continue; - - rdev = of_coresight_get_endpoint_device(rparent); - if (!rdev) - return ERR_PTR(-EPROBE_DEFER); - - pdata->child_names[i] = dev_name(rdev); - pdata->child_ports[i] = rendpoint.id; - - i++; - } while (ep); + } } - pdata->cpu = of_coresight_get_cpu(node); - return pdata; } EXPORT_SYMBOL_GPL(of_get_coresight_platform_data); diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c index 757801d27604..6a451b4fc04d 100644 --- a/drivers/hwtracing/intel_th/core.c +++ b/drivers/hwtracing/intel_th/core.c @@ -628,10 +628,8 @@ intel_th_subdevice_alloc(struct intel_th *th, } err = intel_th_device_add_resources(thdev, res, subdev->nres); - if (err) { - put_device(&thdev->dev); + if (err) goto fail_put_device; - } if (subdev->type == INTEL_TH_OUTPUT) { thdev->dev.devt = MKDEV(th->major, th->num_thdevs); @@ -644,10 +642,8 @@ intel_th_subdevice_alloc(struct intel_th *th, } err = device_add(&thdev->dev); - if (err) { - put_device(&thdev->dev); + if (err) goto fail_free_res; - } /* need switch driver to be loaded to enumerate the rest */ if (subdev->type == INTEL_TH_SWITCH && !req) { diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 140b18d858e8..fc371444407d 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -183,16 +183,46 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x02a6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Comet Lake PCH */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x06a6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, + { + /* Comet Lake PCH-V */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa3a6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Ice Lake NNPI */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x45c5), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Ice Lake CPU */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8a29), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, + { + /* Tiger Lake CPU */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x9a33), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Tiger Lake PCH */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa0a6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Jasper Lake PCH */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, + { + /* Elkhart Lake */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4b26), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { 0 }, }; diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 9a5ebaed0686..2c03f6f57a1d 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -429,12 +429,13 @@ config I2C_BCM_KONA If you do not need KONA I2C interface, say N. config I2C_BRCMSTB - tristate "BRCM Settop I2C controller" - depends on ARCH_BRCMSTB || BMIPS_GENERIC || COMPILE_TEST + tristate "BRCM Settop/DSL I2C controller" + depends on ARCH_BRCMSTB || BMIPS_GENERIC || ARCH_BCM_63XX || \ + COMPILE_TEST default y help If you say yes to this option, support will be included for the - I2C interface on the Broadcom Settop SoCs. + I2C interface on the Broadcom Settop/DSL SoCs. If you do not need I2C interface, say N. diff --git a/drivers/i2c/busses/i2c-altera.c b/drivers/i2c/busses/i2c-altera.c index f5e1941e65b5..a1cdcfc74acf 100644 --- a/drivers/i2c/busses/i2c-altera.c +++ b/drivers/i2c/busses/i2c-altera.c @@ -182,7 +182,7 @@ static void altr_i2c_init(struct altr_i2c_dev *idev) /* SCL Low Time */ writel(t_low, idev->base + ALTR_I2C_SCL_LOW); /* SDA Hold Time, 300ns */ - writel(div_u64(300 * clk_mhz, 1000), idev->base + ALTR_I2C_SDA_HOLD); + writel(3 * clk_mhz / 10, idev->base + ALTR_I2C_SDA_HOLD); /* Mask all master interrupt bits */ altr_i2c_int_enable(idev, ALTR_I2C_ALL_IRQ, false); diff --git a/drivers/i2c/busses/i2c-aspeed.c b/drivers/i2c/busses/i2c-aspeed.c index a074735456bc..29574b9075fd 100644 --- a/drivers/i2c/busses/i2c-aspeed.c +++ b/drivers/i2c/busses/i2c-aspeed.c @@ -135,7 +135,8 @@ struct aspeed_i2c_bus { /* Synchronizes I/O mem access to base. */ spinlock_t lock; struct completion cmd_complete; - u32 (*get_clk_reg_val)(u32 divisor); + u32 (*get_clk_reg_val)(struct device *dev, + u32 divisor); unsigned long parent_clk_frequency; u32 bus_frequency; /* Transaction state. */ @@ -679,16 +680,27 @@ static const struct i2c_algorithm aspeed_i2c_algo = { #endif /* CONFIG_I2C_SLAVE */ }; -static u32 aspeed_i2c_get_clk_reg_val(u32 clk_high_low_max, u32 divisor) +static u32 aspeed_i2c_get_clk_reg_val(struct device *dev, + u32 clk_high_low_mask, + u32 divisor) { - u32 base_clk, clk_high, clk_low, tmp; + u32 base_clk_divisor, clk_high_low_max, clk_high, clk_low, tmp; + + /* + * SCL_high and SCL_low represent a value 1 greater than what is stored + * since a zero divider is meaningless. Thus, the max value each can + * store is every bit set + 1. Since SCL_high and SCL_low are added + * together (see below), the max value of both is the max value of one + * them times two. + */ + clk_high_low_max = (clk_high_low_mask + 1) * 2; /* * The actual clock frequency of SCL is: * SCL_freq = APB_freq / (base_freq * (SCL_high + SCL_low)) * = APB_freq / divisor * where base_freq is a programmable clock divider; its value is - * base_freq = 1 << base_clk + * base_freq = 1 << base_clk_divisor * SCL_high is the number of base_freq clock cycles that SCL stays high * and SCL_low is the number of base_freq clock cycles that SCL stays * low for a period of SCL. @@ -698,47 +710,59 @@ static u32 aspeed_i2c_get_clk_reg_val(u32 clk_high_low_max, u32 divisor) * SCL_low = clk_low + 1 * Thus, * SCL_freq = APB_freq / - * ((1 << base_clk) * (clk_high + 1 + clk_low + 1)) + * ((1 << base_clk_divisor) * (clk_high + 1 + clk_low + 1)) * The documentation recommends clk_high >= clk_high_max / 2 and * clk_low >= clk_low_max / 2 - 1 when possible; this last constraint * gives us the following solution: */ - base_clk = divisor > clk_high_low_max ? + base_clk_divisor = divisor > clk_high_low_max ? ilog2((divisor - 1) / clk_high_low_max) + 1 : 0; - tmp = (divisor + (1 << base_clk) - 1) >> base_clk; - clk_low = tmp / 2; - clk_high = tmp - clk_low; - if (clk_high) - clk_high--; + if (base_clk_divisor > ASPEED_I2CD_TIME_BASE_DIVISOR_MASK) { + base_clk_divisor = ASPEED_I2CD_TIME_BASE_DIVISOR_MASK; + clk_low = clk_high_low_mask; + clk_high = clk_high_low_mask; + dev_err(dev, + "clamping clock divider: divider requested, %u, is greater than largest possible divider, %u.\n", + divisor, (1 << base_clk_divisor) * clk_high_low_max); + } else { + tmp = (divisor + (1 << base_clk_divisor) - 1) + >> base_clk_divisor; + clk_low = tmp / 2; + clk_high = tmp - clk_low; - if (clk_low) - clk_low--; + if (clk_high) + clk_high--; + + if (clk_low) + clk_low--; + } return ((clk_high << ASPEED_I2CD_TIME_SCL_HIGH_SHIFT) & ASPEED_I2CD_TIME_SCL_HIGH_MASK) | ((clk_low << ASPEED_I2CD_TIME_SCL_LOW_SHIFT) & ASPEED_I2CD_TIME_SCL_LOW_MASK) - | (base_clk & ASPEED_I2CD_TIME_BASE_DIVISOR_MASK); + | (base_clk_divisor + & ASPEED_I2CD_TIME_BASE_DIVISOR_MASK); } -static u32 aspeed_i2c_24xx_get_clk_reg_val(u32 divisor) +static u32 aspeed_i2c_24xx_get_clk_reg_val(struct device *dev, u32 divisor) { /* * clk_high and clk_low are each 3 bits wide, so each can hold a max * value of 8 giving a clk_high_low_max of 16. */ - return aspeed_i2c_get_clk_reg_val(16, divisor); + return aspeed_i2c_get_clk_reg_val(dev, GENMASK(2, 0), divisor); } -static u32 aspeed_i2c_25xx_get_clk_reg_val(u32 divisor) +static u32 aspeed_i2c_25xx_get_clk_reg_val(struct device *dev, u32 divisor) { /* * clk_high and clk_low are each 4 bits wide, so each can hold a max * value of 16 giving a clk_high_low_max of 32. */ - return aspeed_i2c_get_clk_reg_val(32, divisor); + return aspeed_i2c_get_clk_reg_val(dev, GENMASK(3, 0), divisor); } /* precondition: bus.lock has been acquired. */ @@ -751,7 +775,7 @@ static int aspeed_i2c_init_clk(struct aspeed_i2c_bus *bus) clk_reg_val &= (ASPEED_I2CD_TIME_TBUF_MASK | ASPEED_I2CD_TIME_THDSTA_MASK | ASPEED_I2CD_TIME_TACST_MASK); - clk_reg_val |= bus->get_clk_reg_val(divisor); + clk_reg_val |= bus->get_clk_reg_val(bus->dev, divisor); writel(clk_reg_val, bus->base + ASPEED_I2C_AC_TIMING_REG1); writel(ASPEED_NO_TIMEOUT_CTRL, bus->base + ASPEED_I2C_AC_TIMING_REG2); @@ -859,7 +883,8 @@ static int aspeed_i2c_probe_bus(struct platform_device *pdev) if (!match) bus->get_clk_reg_val = aspeed_i2c_24xx_get_clk_reg_val; else - bus->get_clk_reg_val = (u32 (*)(u32))match->data; + bus->get_clk_reg_val = (u32 (*)(struct device *, u32)) + match->data; /* Initialize the I2C adapter */ spin_lock_init(&bus->lock); diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index b73dd837fb53..26f83029f64a 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1088,7 +1088,8 @@ static int i2c_imx_probe(struct platform_device *pdev) /* Get I2C clock */ i2c_imx->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(i2c_imx->clk)) { - dev_err(&pdev->dev, "can't get I2C clock\n"); + if (PTR_ERR(i2c_imx->clk) != -EPROBE_DEFER) + dev_err(&pdev->dev, "can't get I2C clock\n"); return PTR_ERR(i2c_imx->clk); } diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index 30132c3957cd..41ca9ff7b5da 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -82,25 +82,6 @@ #define JZ4780_I2C_STA_TFNF BIT(1) #define JZ4780_I2C_STA_ACT BIT(0) -static const char * const jz4780_i2c_abrt_src[] = { - "ABRT_7B_ADDR_NOACK", - "ABRT_10ADDR1_NOACK", - "ABRT_10ADDR2_NOACK", - "ABRT_XDATA_NOACK", - "ABRT_GCALL_NOACK", - "ABRT_GCALL_READ", - "ABRT_HS_ACKD", - "SBYTE_ACKDET", - "ABRT_HS_NORSTRT", - "SBYTE_NORSTRT", - "ABRT_10B_RD_NORSTRT", - "ABRT_MASTER_DIS", - "ARB_LOST", - "SLVFLUSH_TXFIFO", - "SLV_ARBLOST", - "SLVRD_INTX", -}; - #define JZ4780_I2C_INTST_IGC BIT(11) #define JZ4780_I2C_INTST_ISTT BIT(10) #define JZ4780_I2C_INTST_ISTP BIT(9) @@ -538,21 +519,8 @@ done: static void jz4780_i2c_txabrt(struct jz4780_i2c *i2c, int src) { - int i; - - dev_err(&i2c->adap.dev, "txabrt: 0x%08x\n", src); - dev_err(&i2c->adap.dev, "device addr=%x\n", - jz4780_i2c_readw(i2c, JZ4780_I2C_TAR)); - dev_err(&i2c->adap.dev, "send cmd count:%d %d\n", - i2c->cmd, i2c->cmd_buf[i2c->cmd]); - dev_err(&i2c->adap.dev, "receive data count:%d %d\n", - i2c->cmd, i2c->data_buf[i2c->cmd]); - - for (i = 0; i < 16; i++) { - if (src & BIT(i)) - dev_dbg(&i2c->adap.dev, "I2C TXABRT[%d]=%s\n", - i, jz4780_i2c_abrt_src[i]); - } + dev_dbg(&i2c->adap.dev, "txabrt: 0x%08x, cmd: %d, send: %d, recv: %d\n", + src, i2c->cmd, i2c->cmd_buf[i2c->cmd], i2c->data_buf[i2c->cmd]); } static inline int jz4780_i2c_xfer_read(struct jz4780_i2c *i2c, diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 12ba183693d6..a03564f41ad0 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -486,6 +486,22 @@ static int omap_i2c_init(struct omap_i2c_dev *omap) return 0; } +/* + * Try bus recovery, but only if SDA is actually low. + */ +static int omap_i2c_recover_bus(struct omap_i2c_dev *omap) +{ + u16 systest; + + systest = omap_i2c_read_reg(omap, OMAP_I2C_SYSTEST_REG); + if ((systest & OMAP_I2C_SYSTEST_SCL_I_FUNC) && + (systest & OMAP_I2C_SYSTEST_SDA_I_FUNC)) + return 0; /* bus seems to already be fine */ + if (!(systest & OMAP_I2C_SYSTEST_SCL_I_FUNC)) + return -EBUSY; /* recovery would not fix SCL */ + return i2c_recover_bus(&omap->adapter); +} + /* * Waiting on Bus Busy */ @@ -496,7 +512,7 @@ static int omap_i2c_wait_for_bb(struct omap_i2c_dev *omap) timeout = jiffies + OMAP_I2C_TIMEOUT; while (omap_i2c_read_reg(omap, OMAP_I2C_STAT_REG) & OMAP_I2C_STAT_BB) { if (time_after(jiffies, timeout)) - return i2c_recover_bus(&omap->adapter); + return omap_i2c_recover_bus(omap); msleep(1); } @@ -577,8 +593,13 @@ static int omap_i2c_wait_for_bb_valid(struct omap_i2c_dev *omap) } if (time_after(jiffies, timeout)) { + /* + * SDA or SCL were low for the entire timeout without + * any activity detected. Most likely, a slave is + * locking up the bus with no master driving the clock. + */ dev_warn(omap->dev, "timeout waiting for bus ready\n"); - return -ETIMEDOUT; + return omap_i2c_recover_bus(omap); } msleep(1); diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index d8cbe149925b..14f60751729e 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -219,7 +219,7 @@ struct stm32f7_i2c_dev { struct stm32f7_i2c_timings timing; }; -/** +/* * All these values are coming from I2C Specification, Version 6.0, 4th of * April 2014. * diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c index bc26ec822e26..dd0687e36a47 100644 --- a/drivers/i2c/busses/i2c-uniphier-f.c +++ b/drivers/i2c/busses/i2c-uniphier-f.c @@ -98,6 +98,7 @@ struct uniphier_fi2c_priv { unsigned int flags; unsigned int busy_cnt; unsigned int clk_cycle; + spinlock_t lock; /* IRQ synchronization */ }; static void uniphier_fi2c_fill_txfifo(struct uniphier_fi2c_priv *priv, @@ -142,9 +143,10 @@ static void uniphier_fi2c_set_irqs(struct uniphier_fi2c_priv *priv) writel(priv->enabled_irqs, priv->membase + UNIPHIER_FI2C_IE); } -static void uniphier_fi2c_clear_irqs(struct uniphier_fi2c_priv *priv) +static void uniphier_fi2c_clear_irqs(struct uniphier_fi2c_priv *priv, + u32 mask) { - writel(-1, priv->membase + UNIPHIER_FI2C_IC); + writel(mask, priv->membase + UNIPHIER_FI2C_IC); } static void uniphier_fi2c_stop(struct uniphier_fi2c_priv *priv) @@ -162,7 +164,10 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id) struct uniphier_fi2c_priv *priv = dev_id; u32 irq_status; + spin_lock(&priv->lock); + irq_status = readl(priv->membase + UNIPHIER_FI2C_INT); + irq_status &= priv->enabled_irqs; dev_dbg(&priv->adap.dev, "interrupt: enabled_irqs=%04x, irq_status=%04x\n", @@ -207,7 +212,13 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id) if (irq_status & (UNIPHIER_FI2C_INT_RF | UNIPHIER_FI2C_INT_RB)) { uniphier_fi2c_drain_rxfifo(priv); - if (!priv->len) + /* + * If the number of bytes to read is multiple of the FIFO size + * (msg->len == 8, 16, 24, ...), the INT_RF bit is set a little + * earlier than INT_RB. We wait for INT_RB to confirm the + * completion of the current message. + */ + if (!priv->len && (irq_status & UNIPHIER_FI2C_INT_RB)) goto data_done; if (unlikely(priv->flags & UNIPHIER_FI2C_MANUAL_NACK)) { @@ -230,6 +241,8 @@ static irqreturn_t uniphier_fi2c_interrupt(int irq, void *dev_id) goto handled; } + spin_unlock(&priv->lock); + return IRQ_NONE; data_done: @@ -244,7 +257,14 @@ complete: } handled: - uniphier_fi2c_clear_irqs(priv); + /* + * This controller makes a pause while any bit of the IRQ status is + * asserted. Clear the asserted bit to kick the controller just before + * exiting the handler. + */ + uniphier_fi2c_clear_irqs(priv, irq_status); + + spin_unlock(&priv->lock); return IRQ_HANDLED; } @@ -252,6 +272,8 @@ handled: static void uniphier_fi2c_tx_init(struct uniphier_fi2c_priv *priv, u16 addr) { priv->enabled_irqs |= UNIPHIER_FI2C_INT_TE; + uniphier_fi2c_set_irqs(priv); + /* do not use TX byte counter */ writel(0, priv->membase + UNIPHIER_FI2C_TBC); /* set slave address */ @@ -284,6 +306,8 @@ static void uniphier_fi2c_rx_init(struct uniphier_fi2c_priv *priv, u16 addr) priv->enabled_irqs |= UNIPHIER_FI2C_INT_RF; } + uniphier_fi2c_set_irqs(priv); + /* set slave address with RD bit */ writel(UNIPHIER_FI2C_DTTX_CMD | UNIPHIER_FI2C_DTTX_RD | addr << 1, priv->membase + UNIPHIER_FI2C_DTTX); @@ -307,14 +331,16 @@ static void uniphier_fi2c_recover(struct uniphier_fi2c_priv *priv) } static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap, - struct i2c_msg *msg, bool stop) + struct i2c_msg *msg, bool repeat, + bool stop) { struct uniphier_fi2c_priv *priv = i2c_get_adapdata(adap); bool is_read = msg->flags & I2C_M_RD; - unsigned long time_left; + unsigned long time_left, flags; - dev_dbg(&adap->dev, "%s: addr=0x%02x, len=%d, stop=%d\n", - is_read ? "receive" : "transmit", msg->addr, msg->len, stop); + dev_dbg(&adap->dev, "%s: addr=0x%02x, len=%d, repeat=%d, stop=%d\n", + is_read ? "receive" : "transmit", msg->addr, msg->len, + repeat, stop); priv->len = msg->len; priv->buf = msg->buf; @@ -326,22 +352,36 @@ static int uniphier_fi2c_master_xfer_one(struct i2c_adapter *adap, priv->flags |= UNIPHIER_FI2C_STOP; reinit_completion(&priv->comp); - uniphier_fi2c_clear_irqs(priv); + uniphier_fi2c_clear_irqs(priv, U32_MAX); writel(UNIPHIER_FI2C_RST_TBRST | UNIPHIER_FI2C_RST_RBRST, priv->membase + UNIPHIER_FI2C_RST); /* reset TX/RX FIFO */ + spin_lock_irqsave(&priv->lock, flags); + if (is_read) uniphier_fi2c_rx_init(priv, msg->addr); else uniphier_fi2c_tx_init(priv, msg->addr); - uniphier_fi2c_set_irqs(priv); - dev_dbg(&adap->dev, "start condition\n"); - writel(UNIPHIER_FI2C_CR_MST | UNIPHIER_FI2C_CR_STA, - priv->membase + UNIPHIER_FI2C_CR); + /* + * For a repeated START condition, writing a slave address to the FIFO + * kicks the controller. So, the UNIPHIER_FI2C_CR register should be + * written only for a non-repeated START condition. + */ + if (!repeat) + writel(UNIPHIER_FI2C_CR_MST | UNIPHIER_FI2C_CR_STA, + priv->membase + UNIPHIER_FI2C_CR); + + spin_unlock_irqrestore(&priv->lock, flags); time_left = wait_for_completion_timeout(&priv->comp, adap->timeout); + + spin_lock_irqsave(&priv->lock, flags); + priv->enabled_irqs = 0; + uniphier_fi2c_set_irqs(priv); + spin_unlock_irqrestore(&priv->lock, flags); + if (!time_left) { dev_err(&adap->dev, "transaction timeout.\n"); uniphier_fi2c_recover(priv); @@ -394,6 +434,7 @@ static int uniphier_fi2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num) { struct i2c_msg *msg, *emsg = msgs + num; + bool repeat = false; int ret; ret = uniphier_fi2c_check_bus_busy(adap); @@ -404,9 +445,11 @@ static int uniphier_fi2c_master_xfer(struct i2c_adapter *adap, /* Emit STOP if it is the last message or I2C_M_STOP is set. */ bool stop = (msg + 1 == emsg) || (msg->flags & I2C_M_STOP); - ret = uniphier_fi2c_master_xfer_one(adap, msg, stop); + ret = uniphier_fi2c_master_xfer_one(adap, msg, repeat, stop); if (ret) return ret; + + repeat = !stop; } return num; @@ -546,6 +589,7 @@ static int uniphier_fi2c_probe(struct platform_device *pdev) priv->clk_cycle = clk_rate / bus_speed; init_completion(&priv->comp); + spin_lock_init(&priv->lock); priv->adap.owner = THIS_MODULE; priv->adap.algo = &uniphier_fi2c_algo; priv->adap.dev.parent = dev; diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 847d9bf6744c..0d4d5dcf94f3 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -43,6 +43,7 @@ struct i2c_acpi_lookup { int index; u32 speed; u32 min_speed; + u32 force_speed; }; static int i2c_acpi_fill_info(struct acpi_resource *ares, void *data) @@ -240,6 +241,19 @@ i2c_acpi_match_device(const struct acpi_device_id *matches, return acpi_match_device(matches, &client->dev); } +static const struct acpi_device_id i2c_acpi_force_400khz_device_ids[] = { + /* + * These Silead touchscreen controllers only work at 400KHz, for + * some reason they do not work at 100KHz. On some devices the ACPI + * tables list another device at their bus as only being capable + * of 100KHz, testing has shown that these other devices work fine + * at 400KHz (as can be expected of any recent i2c hw) so we force + * the speed of the bus to 400 KHz if a Silead device is present. + */ + { "MSSL1680", 0 }, + {} +}; + static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level, void *data, void **return_value) { @@ -258,6 +272,9 @@ static acpi_status i2c_acpi_lookup_speed(acpi_handle handle, u32 level, if (lookup->speed <= lookup->min_speed) lookup->min_speed = lookup->speed; + if (acpi_match_device_ids(adev, i2c_acpi_force_400khz_device_ids) == 0) + lookup->force_speed = 400000; + return AE_OK; } @@ -295,7 +312,16 @@ u32 i2c_acpi_find_bus_speed(struct device *dev) return 0; } - return lookup.min_speed != UINT_MAX ? lookup.min_speed : 0; + if (lookup.force_speed) { + if (lookup.force_speed != lookup.min_speed) + dev_warn(dev, FW_BUG "DSDT uses known not-working I2C bus speed %d, forcing it to %d\n", + lookup.min_speed, lookup.force_speed); + return lookup.force_speed; + } else if (lookup.min_speed != UINT_MAX) { + return lookup.min_speed; + } else { + return 0; + } } EXPORT_SYMBOL_GPL(i2c_acpi_find_bus_speed); @@ -326,10 +352,18 @@ static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle) static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev) { struct device *dev; + struct i2c_client *client; dev = bus_find_device(&i2c_bus_type, NULL, adev, i2c_acpi_find_match_device); - return dev ? i2c_verify_client(dev) : NULL; + if (!dev) + return NULL; + + client = i2c_verify_client(dev); + if (!client) + put_device(dev); + + return client; } static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value, diff --git a/drivers/i2c/i2c-core-of.c b/drivers/i2c/i2c-core-of.c index 8d474bb1dc15..17d727e0b842 100644 --- a/drivers/i2c/i2c-core-of.c +++ b/drivers/i2c/i2c-core-of.c @@ -238,14 +238,14 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, } client = of_i2c_register_device(adap, rd->dn); - put_device(&adap->dev); - if (IS_ERR(client)) { dev_err(&adap->dev, "failed to create client for '%pOF'\n", rd->dn); + put_device(&adap->dev); of_node_clear_flag(rd->dn, OF_POPULATED); return notifier_from_errno(PTR_ERR(client)); } + put_device(&adap->dev); break; case OF_RECONFIG_CHANGE_REMOVE: /* already depopulated? */ diff --git a/drivers/ide/cmd64x.c b/drivers/ide/cmd64x.c index b127ed60c733..9dde8390da09 100644 --- a/drivers/ide/cmd64x.c +++ b/drivers/ide/cmd64x.c @@ -65,6 +65,9 @@ static void cmd64x_program_timings(ide_drive_t *drive, u8 mode) struct ide_timing t; u8 arttim = 0; + if (drive->dn >= ARRAY_SIZE(drwtim_regs)) + return; + ide_timing_compute(drive, mode, &t, T, 0); /* diff --git a/drivers/ide/serverworks.c b/drivers/ide/serverworks.c index a97affca18ab..0f57d45484d1 100644 --- a/drivers/ide/serverworks.c +++ b/drivers/ide/serverworks.c @@ -114,6 +114,9 @@ static void svwks_set_pio_mode(ide_hwif_t *hwif, ide_drive_t *drive) struct pci_dev *dev = to_pci_dev(hwif->dev); const u8 pio = drive->pio_mode - XFER_PIO_0; + if (drive->dn >= ARRAY_SIZE(drive_pci)) + return; + pci_write_config_byte(dev, drive_pci[drive->dn], pio_modes[pio]); if (svwks_csb_check(dev)) { @@ -140,6 +143,9 @@ static void svwks_set_dma_mode(ide_hwif_t *hwif, ide_drive_t *drive) u8 ultra_enable = 0, ultra_timing = 0, dma_timing = 0; + if (drive->dn >= ARRAY_SIZE(drive_pci2)) + return; + pci_read_config_byte(dev, (0x56|hwif->channel), &ultra_timing); pci_read_config_byte(dev, 0x54, &ultra_enable); diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index 807299dd45eb..7e86a5b7ec4e 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -125,7 +125,7 @@ #define BMC150_ACCEL_SLEEP_1_SEC 0x0F #define BMC150_ACCEL_REG_TEMP 0x08 -#define BMC150_ACCEL_TEMP_CENTER_VAL 24 +#define BMC150_ACCEL_TEMP_CENTER_VAL 23 #define BMC150_ACCEL_AXIS_TO_REG(axis) (BMC150_ACCEL_REG_XOUT_L + (axis * 2)) #define BMC150_AUTO_SUSPEND_DELAY_MS 2000 diff --git a/drivers/iio/adc/dln2-adc.c b/drivers/iio/adc/dln2-adc.c index ab8d6aed5085..2a299bbd6acf 100644 --- a/drivers/iio/adc/dln2-adc.c +++ b/drivers/iio/adc/dln2-adc.c @@ -528,6 +528,10 @@ static int dln2_adc_triggered_buffer_postenable(struct iio_dev *indio_dev) u16 conflict; unsigned int trigger_chan; + ret = iio_triggered_buffer_postenable(indio_dev); + if (ret) + return ret; + mutex_lock(&dln2->mutex); /* Enable ADC */ @@ -541,6 +545,7 @@ static int dln2_adc_triggered_buffer_postenable(struct iio_dev *indio_dev) (int)conflict); ret = -EBUSY; } + iio_triggered_buffer_predisable(indio_dev); return ret; } @@ -554,6 +559,7 @@ static int dln2_adc_triggered_buffer_postenable(struct iio_dev *indio_dev) mutex_unlock(&dln2->mutex); if (ret < 0) { dev_dbg(&dln2->pdev->dev, "Problem in %s\n", __func__); + iio_triggered_buffer_predisable(indio_dev); return ret; } } else { @@ -561,12 +567,12 @@ static int dln2_adc_triggered_buffer_postenable(struct iio_dev *indio_dev) mutex_unlock(&dln2->mutex); } - return iio_triggered_buffer_postenable(indio_dev); + return 0; } static int dln2_adc_triggered_buffer_predisable(struct iio_dev *indio_dev) { - int ret; + int ret, ret2; struct dln2_adc *dln2 = iio_priv(indio_dev); mutex_lock(&dln2->mutex); @@ -581,12 +587,14 @@ static int dln2_adc_triggered_buffer_predisable(struct iio_dev *indio_dev) ret = dln2_adc_set_port_enabled(dln2, false, NULL); mutex_unlock(&dln2->mutex); - if (ret < 0) { + if (ret < 0) dev_dbg(&dln2->pdev->dev, "Problem in %s\n", __func__); - return ret; - } - return iio_triggered_buffer_predisable(indio_dev); + ret2 = iio_triggered_buffer_predisable(indio_dev); + if (ret == 0) + ret = ret2; + + return ret; } static const struct iio_buffer_setup_ops dln2_adc_buffer_setup_ops = { diff --git a/drivers/iio/adc/max1027.c b/drivers/iio/adc/max1027.c index ebc715927e63..03af02769370 100644 --- a/drivers/iio/adc/max1027.c +++ b/drivers/iio/adc/max1027.c @@ -462,6 +462,14 @@ static int max1027_probe(struct spi_device *spi) goto fail_dev_register; } + /* Internal reset */ + st->reg = MAX1027_RST_REG; + ret = spi_write(st->spi, &st->reg, 1); + if (ret < 0) { + dev_err(&indio_dev->dev, "Failed to reset the ADC\n"); + return ret; + } + /* Disable averaging */ st->reg = MAX1027_AVG_REG; ret = spi_write(st->spi, &st->reg, 1); diff --git a/drivers/iio/adc/max9611.c b/drivers/iio/adc/max9611.c index c61fbf560271..8649a61c50bc 100644 --- a/drivers/iio/adc/max9611.c +++ b/drivers/iio/adc/max9611.c @@ -92,6 +92,12 @@ #define MAX9611_TEMP_SCALE_NUM 1000000 #define MAX9611_TEMP_SCALE_DIV 2083 +/* + * Conversion time is 2 ms (typically) at Ta=25 degreeC + * No maximum value is known, so play it safe. + */ +#define MAX9611_CONV_TIME_US_RANGE 3000, 3300 + struct max9611_dev { struct device *dev; struct i2c_client *i2c_client; @@ -239,11 +245,9 @@ static int max9611_read_single(struct max9611_dev *max9611, return ret; } - /* - * need a delay here to make register configuration - * stabilize. 1 msec at least, from empirical testing. - */ - usleep_range(1000, 2000); + /* need a delay here to make register configuration stabilize. */ + + usleep_range(MAX9611_CONV_TIME_US_RANGE); ret = i2c_smbus_read_word_swapped(max9611->i2c_client, reg_addr); if (ret < 0) { @@ -289,7 +293,7 @@ static int max9611_read_csa_voltage(struct max9611_dev *max9611, return ret; if (*adc_raw > 0) { - *csa_gain = gain_selectors[i]; + *csa_gain = (enum max9611_csa_gain)gain_selectors[i]; return 0; } } @@ -511,7 +515,7 @@ static int max9611_init(struct max9611_dev *max9611) MAX9611_REG_CTRL2, 0); return ret; } - usleep_range(1000, 2000); + usleep_range(MAX9611_CONV_TIME_US_RANGE); return 0; } diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c index 2515badf8b28..9b2121f24926 100644 --- a/drivers/iio/adc/meson_saradc.c +++ b/drivers/iio/adc/meson_saradc.c @@ -976,6 +976,11 @@ static int meson_sar_adc_probe(struct platform_device *pdev) if (IS_ERR(base)) return PTR_ERR(base); + priv->regmap = devm_regmap_init_mmio(&pdev->dev, base, + priv->data->regmap_config); + if (IS_ERR(priv->regmap)) + return PTR_ERR(priv->regmap); + irq = irq_of_parse_and_map(pdev->dev.of_node, 0); if (!irq) return -EINVAL; @@ -985,11 +990,6 @@ static int meson_sar_adc_probe(struct platform_device *pdev) if (ret) return ret; - priv->regmap = devm_regmap_init_mmio(&pdev->dev, base, - priv->data->regmap_config); - if (IS_ERR(priv->regmap)) - return PTR_ERR(priv->regmap); - priv->clkin = devm_clk_get(&pdev->dev, "clkin"); if (IS_ERR(priv->clkin)) { dev_err(&pdev->dev, "failed to get clkin\n"); diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c index 804198eb0eef..bc9ebcc6508a 100644 --- a/drivers/iio/adc/stm32-adc-core.c +++ b/drivers/iio/adc/stm32-adc-core.c @@ -33,36 +33,9 @@ #include "stm32-adc-core.h" -/* STM32F4 - common registers for all ADC instances: 1, 2 & 3 */ -#define STM32F4_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) -#define STM32F4_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x04) - -/* STM32F4_ADC_CSR - bit fields */ -#define STM32F4_EOC3 BIT(17) -#define STM32F4_EOC2 BIT(9) -#define STM32F4_EOC1 BIT(1) - -/* STM32F4_ADC_CCR - bit fields */ -#define STM32F4_ADC_ADCPRE_SHIFT 16 -#define STM32F4_ADC_ADCPRE_MASK GENMASK(17, 16) - /* STM32 F4 maximum analog clock rate (from datasheet) */ #define STM32F4_ADC_MAX_CLK_RATE 36000000 -/* STM32H7 - common registers for all ADC instances */ -#define STM32H7_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) -#define STM32H7_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x08) - -/* STM32H7_ADC_CSR - bit fields */ -#define STM32H7_EOC_SLV BIT(18) -#define STM32H7_EOC_MST BIT(2) - -/* STM32H7_ADC_CCR - bit fields */ -#define STM32H7_PRESC_SHIFT 18 -#define STM32H7_PRESC_MASK GENMASK(21, 18) -#define STM32H7_CKMODE_SHIFT 16 -#define STM32H7_CKMODE_MASK GENMASK(17, 16) - /* STM32 H7 maximum analog clock rate (from datasheet) */ #define STM32H7_ADC_MAX_CLK_RATE 36000000 @@ -72,12 +45,16 @@ * @eoc1: adc1 end of conversion flag in @csr * @eoc2: adc2 end of conversion flag in @csr * @eoc3: adc3 end of conversion flag in @csr + * @ier: interrupt enable register offset for each adc + * @eocie_msk: end of conversion interrupt enable mask in @ier */ struct stm32_adc_common_regs { u32 csr; u32 eoc1_msk; u32 eoc2_msk; u32 eoc3_msk; + u32 ier; + u32 eocie_msk; }; struct stm32_adc_priv; @@ -271,6 +248,8 @@ static const struct stm32_adc_common_regs stm32f4_adc_common_regs = { .eoc1_msk = STM32F4_EOC1, .eoc2_msk = STM32F4_EOC2, .eoc3_msk = STM32F4_EOC3, + .ier = STM32F4_ADC_CR1, + .eocie_msk = STM32F4_EOCIE, }; /* STM32H7 common registers definitions */ @@ -278,8 +257,24 @@ static const struct stm32_adc_common_regs stm32h7_adc_common_regs = { .csr = STM32H7_ADC_CSR, .eoc1_msk = STM32H7_EOC_MST, .eoc2_msk = STM32H7_EOC_SLV, + .ier = STM32H7_ADC_IER, + .eocie_msk = STM32H7_EOCIE, }; +static const unsigned int stm32_adc_offset[STM32_ADC_MAX_ADCS] = { + 0, STM32_ADC_OFFSET, STM32_ADC_OFFSET * 2, +}; + +static unsigned int stm32_adc_eoc_enabled(struct stm32_adc_priv *priv, + unsigned int adc) +{ + u32 ier, offset = stm32_adc_offset[adc]; + + ier = readl_relaxed(priv->common.base + offset + priv->cfg->regs->ier); + + return ier & priv->cfg->regs->eocie_msk; +} + /* ADC common interrupt for all instances */ static void stm32_adc_irq_handler(struct irq_desc *desc) { @@ -290,13 +285,28 @@ static void stm32_adc_irq_handler(struct irq_desc *desc) chained_irq_enter(chip, desc); status = readl_relaxed(priv->common.base + priv->cfg->regs->csr); - if (status & priv->cfg->regs->eoc1_msk) + /* + * End of conversion may be handled by using IRQ or DMA. There may be a + * race here when two conversions complete at the same time on several + * ADCs. EOC may be read 'set' for several ADCs, with: + * - an ADC configured to use DMA (EOC triggers the DMA request, and + * is then automatically cleared by DR read in hardware) + * - an ADC configured to use IRQs (EOCIE bit is set. The handler must + * be called in this case) + * So both EOC status bit in CSR and EOCIE control bit must be checked + * before invoking the interrupt handler (e.g. call ISR only for + * IRQ-enabled ADCs). + */ + if (status & priv->cfg->regs->eoc1_msk && + stm32_adc_eoc_enabled(priv, 0)) generic_handle_irq(irq_find_mapping(priv->domain, 0)); - if (status & priv->cfg->regs->eoc2_msk) + if (status & priv->cfg->regs->eoc2_msk && + stm32_adc_eoc_enabled(priv, 1)) generic_handle_irq(irq_find_mapping(priv->domain, 1)); - if (status & priv->cfg->regs->eoc3_msk) + if (status & priv->cfg->regs->eoc3_msk && + stm32_adc_eoc_enabled(priv, 2)) generic_handle_irq(irq_find_mapping(priv->domain, 2)); chained_irq_exit(chip, desc); diff --git a/drivers/iio/adc/stm32-adc-core.h b/drivers/iio/adc/stm32-adc-core.h index 250ee958a669..9f8559cf86c4 100644 --- a/drivers/iio/adc/stm32-adc-core.h +++ b/drivers/iio/adc/stm32-adc-core.h @@ -37,8 +37,143 @@ * -------------------------------------------------------- */ #define STM32_ADC_MAX_ADCS 3 +#define STM32_ADC_OFFSET 0x100 #define STM32_ADCX_COMN_OFFSET 0x300 +/* STM32F4 - Registers for each ADC instance */ +#define STM32F4_ADC_SR 0x00 +#define STM32F4_ADC_CR1 0x04 +#define STM32F4_ADC_CR2 0x08 +#define STM32F4_ADC_SMPR1 0x0C +#define STM32F4_ADC_SMPR2 0x10 +#define STM32F4_ADC_HTR 0x24 +#define STM32F4_ADC_LTR 0x28 +#define STM32F4_ADC_SQR1 0x2C +#define STM32F4_ADC_SQR2 0x30 +#define STM32F4_ADC_SQR3 0x34 +#define STM32F4_ADC_JSQR 0x38 +#define STM32F4_ADC_JDR1 0x3C +#define STM32F4_ADC_JDR2 0x40 +#define STM32F4_ADC_JDR3 0x44 +#define STM32F4_ADC_JDR4 0x48 +#define STM32F4_ADC_DR 0x4C + +/* STM32F4 - common registers for all ADC instances: 1, 2 & 3 */ +#define STM32F4_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) +#define STM32F4_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x04) + +/* STM32F4_ADC_SR - bit fields */ +#define STM32F4_STRT BIT(4) +#define STM32F4_EOC BIT(1) + +/* STM32F4_ADC_CR1 - bit fields */ +#define STM32F4_RES_SHIFT 24 +#define STM32F4_RES_MASK GENMASK(25, 24) +#define STM32F4_SCAN BIT(8) +#define STM32F4_EOCIE BIT(5) + +/* STM32F4_ADC_CR2 - bit fields */ +#define STM32F4_SWSTART BIT(30) +#define STM32F4_EXTEN_SHIFT 28 +#define STM32F4_EXTEN_MASK GENMASK(29, 28) +#define STM32F4_EXTSEL_SHIFT 24 +#define STM32F4_EXTSEL_MASK GENMASK(27, 24) +#define STM32F4_EOCS BIT(10) +#define STM32F4_DDS BIT(9) +#define STM32F4_DMA BIT(8) +#define STM32F4_ADON BIT(0) + +/* STM32F4_ADC_CSR - bit fields */ +#define STM32F4_EOC3 BIT(17) +#define STM32F4_EOC2 BIT(9) +#define STM32F4_EOC1 BIT(1) + +/* STM32F4_ADC_CCR - bit fields */ +#define STM32F4_ADC_ADCPRE_SHIFT 16 +#define STM32F4_ADC_ADCPRE_MASK GENMASK(17, 16) + +/* STM32H7 - Registers for each ADC instance */ +#define STM32H7_ADC_ISR 0x00 +#define STM32H7_ADC_IER 0x04 +#define STM32H7_ADC_CR 0x08 +#define STM32H7_ADC_CFGR 0x0C +#define STM32H7_ADC_SMPR1 0x14 +#define STM32H7_ADC_SMPR2 0x18 +#define STM32H7_ADC_PCSEL 0x1C +#define STM32H7_ADC_SQR1 0x30 +#define STM32H7_ADC_SQR2 0x34 +#define STM32H7_ADC_SQR3 0x38 +#define STM32H7_ADC_SQR4 0x3C +#define STM32H7_ADC_DR 0x40 +#define STM32H7_ADC_CALFACT 0xC4 +#define STM32H7_ADC_CALFACT2 0xC8 + +/* STM32H7 - common registers for all ADC instances */ +#define STM32H7_ADC_CSR (STM32_ADCX_COMN_OFFSET + 0x00) +#define STM32H7_ADC_CCR (STM32_ADCX_COMN_OFFSET + 0x08) + +/* STM32H7_ADC_ISR - bit fields */ +#define STM32H7_EOC BIT(2) +#define STM32H7_ADRDY BIT(0) + +/* STM32H7_ADC_IER - bit fields */ +#define STM32H7_EOCIE STM32H7_EOC + +/* STM32H7_ADC_CR - bit fields */ +#define STM32H7_ADCAL BIT(31) +#define STM32H7_ADCALDIF BIT(30) +#define STM32H7_DEEPPWD BIT(29) +#define STM32H7_ADVREGEN BIT(28) +#define STM32H7_LINCALRDYW6 BIT(27) +#define STM32H7_LINCALRDYW5 BIT(26) +#define STM32H7_LINCALRDYW4 BIT(25) +#define STM32H7_LINCALRDYW3 BIT(24) +#define STM32H7_LINCALRDYW2 BIT(23) +#define STM32H7_LINCALRDYW1 BIT(22) +#define STM32H7_ADCALLIN BIT(16) +#define STM32H7_BOOST BIT(8) +#define STM32H7_ADSTP BIT(4) +#define STM32H7_ADSTART BIT(2) +#define STM32H7_ADDIS BIT(1) +#define STM32H7_ADEN BIT(0) + +/* STM32H7_ADC_CFGR bit fields */ +#define STM32H7_EXTEN_SHIFT 10 +#define STM32H7_EXTEN_MASK GENMASK(11, 10) +#define STM32H7_EXTSEL_SHIFT 5 +#define STM32H7_EXTSEL_MASK GENMASK(9, 5) +#define STM32H7_RES_SHIFT 2 +#define STM32H7_RES_MASK GENMASK(4, 2) +#define STM32H7_DMNGT_SHIFT 0 +#define STM32H7_DMNGT_MASK GENMASK(1, 0) + +enum stm32h7_adc_dmngt { + STM32H7_DMNGT_DR_ONLY, /* Regular data in DR only */ + STM32H7_DMNGT_DMA_ONESHOT, /* DMA one shot mode */ + STM32H7_DMNGT_DFSDM, /* DFSDM mode */ + STM32H7_DMNGT_DMA_CIRC, /* DMA circular mode */ +}; + +/* STM32H7_ADC_CALFACT - bit fields */ +#define STM32H7_CALFACT_D_SHIFT 16 +#define STM32H7_CALFACT_D_MASK GENMASK(26, 16) +#define STM32H7_CALFACT_S_SHIFT 0 +#define STM32H7_CALFACT_S_MASK GENMASK(10, 0) + +/* STM32H7_ADC_CALFACT2 - bit fields */ +#define STM32H7_LINCALFACT_SHIFT 0 +#define STM32H7_LINCALFACT_MASK GENMASK(29, 0) + +/* STM32H7_ADC_CSR - bit fields */ +#define STM32H7_EOC_SLV BIT(18) +#define STM32H7_EOC_MST BIT(2) + +/* STM32H7_ADC_CCR - bit fields */ +#define STM32H7_PRESC_SHIFT 18 +#define STM32H7_PRESC_MASK GENMASK(21, 18) +#define STM32H7_CKMODE_SHIFT 16 +#define STM32H7_CKMODE_MASK GENMASK(17, 16) + /** * struct stm32_adc_common - stm32 ADC driver common data (for all instances) * @base: control registers base cpu addr diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 04be8bd951be..258a4712167a 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -40,113 +40,6 @@ #include "stm32-adc-core.h" -/* STM32F4 - Registers for each ADC instance */ -#define STM32F4_ADC_SR 0x00 -#define STM32F4_ADC_CR1 0x04 -#define STM32F4_ADC_CR2 0x08 -#define STM32F4_ADC_SMPR1 0x0C -#define STM32F4_ADC_SMPR2 0x10 -#define STM32F4_ADC_HTR 0x24 -#define STM32F4_ADC_LTR 0x28 -#define STM32F4_ADC_SQR1 0x2C -#define STM32F4_ADC_SQR2 0x30 -#define STM32F4_ADC_SQR3 0x34 -#define STM32F4_ADC_JSQR 0x38 -#define STM32F4_ADC_JDR1 0x3C -#define STM32F4_ADC_JDR2 0x40 -#define STM32F4_ADC_JDR3 0x44 -#define STM32F4_ADC_JDR4 0x48 -#define STM32F4_ADC_DR 0x4C - -/* STM32F4_ADC_SR - bit fields */ -#define STM32F4_STRT BIT(4) -#define STM32F4_EOC BIT(1) - -/* STM32F4_ADC_CR1 - bit fields */ -#define STM32F4_RES_SHIFT 24 -#define STM32F4_RES_MASK GENMASK(25, 24) -#define STM32F4_SCAN BIT(8) -#define STM32F4_EOCIE BIT(5) - -/* STM32F4_ADC_CR2 - bit fields */ -#define STM32F4_SWSTART BIT(30) -#define STM32F4_EXTEN_SHIFT 28 -#define STM32F4_EXTEN_MASK GENMASK(29, 28) -#define STM32F4_EXTSEL_SHIFT 24 -#define STM32F4_EXTSEL_MASK GENMASK(27, 24) -#define STM32F4_EOCS BIT(10) -#define STM32F4_DDS BIT(9) -#define STM32F4_DMA BIT(8) -#define STM32F4_ADON BIT(0) - -/* STM32H7 - Registers for each ADC instance */ -#define STM32H7_ADC_ISR 0x00 -#define STM32H7_ADC_IER 0x04 -#define STM32H7_ADC_CR 0x08 -#define STM32H7_ADC_CFGR 0x0C -#define STM32H7_ADC_SMPR1 0x14 -#define STM32H7_ADC_SMPR2 0x18 -#define STM32H7_ADC_PCSEL 0x1C -#define STM32H7_ADC_SQR1 0x30 -#define STM32H7_ADC_SQR2 0x34 -#define STM32H7_ADC_SQR3 0x38 -#define STM32H7_ADC_SQR4 0x3C -#define STM32H7_ADC_DR 0x40 -#define STM32H7_ADC_CALFACT 0xC4 -#define STM32H7_ADC_CALFACT2 0xC8 - -/* STM32H7_ADC_ISR - bit fields */ -#define STM32H7_EOC BIT(2) -#define STM32H7_ADRDY BIT(0) - -/* STM32H7_ADC_IER - bit fields */ -#define STM32H7_EOCIE STM32H7_EOC - -/* STM32H7_ADC_CR - bit fields */ -#define STM32H7_ADCAL BIT(31) -#define STM32H7_ADCALDIF BIT(30) -#define STM32H7_DEEPPWD BIT(29) -#define STM32H7_ADVREGEN BIT(28) -#define STM32H7_LINCALRDYW6 BIT(27) -#define STM32H7_LINCALRDYW5 BIT(26) -#define STM32H7_LINCALRDYW4 BIT(25) -#define STM32H7_LINCALRDYW3 BIT(24) -#define STM32H7_LINCALRDYW2 BIT(23) -#define STM32H7_LINCALRDYW1 BIT(22) -#define STM32H7_ADCALLIN BIT(16) -#define STM32H7_BOOST BIT(8) -#define STM32H7_ADSTP BIT(4) -#define STM32H7_ADSTART BIT(2) -#define STM32H7_ADDIS BIT(1) -#define STM32H7_ADEN BIT(0) - -/* STM32H7_ADC_CFGR bit fields */ -#define STM32H7_EXTEN_SHIFT 10 -#define STM32H7_EXTEN_MASK GENMASK(11, 10) -#define STM32H7_EXTSEL_SHIFT 5 -#define STM32H7_EXTSEL_MASK GENMASK(9, 5) -#define STM32H7_RES_SHIFT 2 -#define STM32H7_RES_MASK GENMASK(4, 2) -#define STM32H7_DMNGT_SHIFT 0 -#define STM32H7_DMNGT_MASK GENMASK(1, 0) - -enum stm32h7_adc_dmngt { - STM32H7_DMNGT_DR_ONLY, /* Regular data in DR only */ - STM32H7_DMNGT_DMA_ONESHOT, /* DMA one shot mode */ - STM32H7_DMNGT_DFSDM, /* DFSDM mode */ - STM32H7_DMNGT_DMA_CIRC, /* DMA circular mode */ -}; - -/* STM32H7_ADC_CALFACT - bit fields */ -#define STM32H7_CALFACT_D_SHIFT 16 -#define STM32H7_CALFACT_D_MASK GENMASK(26, 16) -#define STM32H7_CALFACT_S_SHIFT 0 -#define STM32H7_CALFACT_S_MASK GENMASK(10, 0) - -/* STM32H7_ADC_CALFACT2 - bit fields */ -#define STM32H7_LINCALFACT_SHIFT 0 -#define STM32H7_LINCALFACT_MASK GENMASK(29, 0) - /* Number of linear calibration shadow registers / LINCALRDYW control bits */ #define STM32H7_LINCALFACT_NUM 6 @@ -1450,7 +1343,7 @@ static int stm32_adc_dma_start(struct iio_dev *indio_dev) cookie = dmaengine_submit(desc); ret = dma_submit_error(cookie); if (ret) { - dmaengine_terminate_all(adc->dma_chan); + dmaengine_terminate_sync(adc->dma_chan); return ret; } @@ -1523,7 +1416,7 @@ static int stm32_adc_buffer_predisable(struct iio_dev *indio_dev) dev_err(&indio_dev->dev, "predisable failed\n"); if (adc->dma_chan) - dmaengine_terminate_all(adc->dma_chan); + dmaengine_terminate_sync(adc->dma_chan); if (stm32_adc_set_trig(indio_dev, NULL)) dev_err(&indio_dev->dev, "Can't clear trigger\n"); diff --git a/drivers/iio/dac/ad5380.c b/drivers/iio/dac/ad5380.c index 97d2c5111f43..8bf7fc626a9d 100644 --- a/drivers/iio/dac/ad5380.c +++ b/drivers/iio/dac/ad5380.c @@ -221,7 +221,7 @@ static int ad5380_read_raw(struct iio_dev *indio_dev, if (ret) return ret; *val >>= chan->scan_type.shift; - val -= (1 << chan->scan_type.realbits) / 2; + *val -= (1 << chan->scan_type.realbits) / 2; return IIO_VAL_INT; case IIO_CHAN_INFO_SCALE: *val = 2 * st->vref; diff --git a/drivers/iio/dac/mcp4922.c b/drivers/iio/dac/mcp4922.c index 3854d201a5d6..68dd0be1ac07 100644 --- a/drivers/iio/dac/mcp4922.c +++ b/drivers/iio/dac/mcp4922.c @@ -94,17 +94,22 @@ static int mcp4922_write_raw(struct iio_dev *indio_dev, long mask) { struct mcp4922_state *state = iio_priv(indio_dev); + int ret; if (val2 != 0) return -EINVAL; switch (mask) { case IIO_CHAN_INFO_RAW: - if (val > GENMASK(chan->scan_type.realbits-1, 0)) + if (val < 0 || val > GENMASK(chan->scan_type.realbits - 1, 0)) return -EINVAL; val <<= chan->scan_type.shift; - state->value[chan->channel] = val; - return mcp4922_spi_write(state, chan->channel, val); + + ret = mcp4922_spi_write(state, chan->channel, val); + if (!ret) + state->value[chan->channel] = val; + return ret; + default: return -EINVAL; } diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c index 7851bd90ef64..b470cb8132da 100644 --- a/drivers/iio/humidity/hdc100x.c +++ b/drivers/iio/humidity/hdc100x.c @@ -237,7 +237,7 @@ static int hdc100x_read_raw(struct iio_dev *indio_dev, *val2 = 65536; return IIO_VAL_FRACTIONAL; } else { - *val = 100; + *val = 100000; *val2 = 65536; return IIO_VAL_FRACTIONAL; } diff --git a/drivers/iio/imu/adis16480.c b/drivers/iio/imu/adis16480.c index 12898424d838..5abe095901c8 100644 --- a/drivers/iio/imu/adis16480.c +++ b/drivers/iio/imu/adis16480.c @@ -266,8 +266,11 @@ static int adis16480_set_freq(struct iio_dev *indio_dev, int val, int val2) struct adis16480 *st = iio_priv(indio_dev); unsigned int t; + if (val < 0 || val2 < 0) + return -EINVAL; + t = val * 1000 + val2 / 1000; - if (t <= 0) + if (t == 0) return -EINVAL; t = 2460000 / t; @@ -369,12 +372,14 @@ static int adis16480_get_calibbias(struct iio_dev *indio_dev, case IIO_MAGN: case IIO_PRESSURE: ret = adis_read_reg_16(&st->adis, reg, &val16); - *bias = sign_extend32(val16, 15); + if (ret == 0) + *bias = sign_extend32(val16, 15); break; case IIO_ANGL_VEL: case IIO_ACCEL: ret = adis_read_reg_32(&st->adis, reg, &val32); - *bias = sign_extend32(val32, 31); + if (ret == 0) + *bias = sign_extend32(val32, 31); break; default: ret = -EINVAL; @@ -721,6 +726,7 @@ static const struct iio_info adis16480_info = { .write_raw = &adis16480_write_raw, .update_scan_mode = adis_update_scan_mode, .driver_module = THIS_MODULE, + .debugfs_reg_access = adis_debugfs_reg_access, }; static int adis16480_stop_device(struct iio_dev *indio_dev) diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index d50125766093..c3badf634378 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -570,7 +570,7 @@ static int iio_compute_scan_bytes(struct iio_dev *indio_dev, const unsigned long *mask, bool timestamp) { unsigned bytes = 0; - int length, i; + int length, i, largest = 0; /* How much space will the demuxed element take? */ for_each_set_bit(i, mask, @@ -578,13 +578,17 @@ static int iio_compute_scan_bytes(struct iio_dev *indio_dev, length = iio_storage_bytes_for_si(indio_dev, i); bytes = ALIGN(bytes, length); bytes += length; + largest = max(largest, length); } if (timestamp) { length = iio_storage_bytes_for_timestamp(indio_dev); bytes = ALIGN(bytes, length); bytes += length; + largest = max(largest, length); } + + bytes = ALIGN(bytes, largest); return bytes; } diff --git a/drivers/iio/light/bh1750.c b/drivers/iio/light/bh1750.c index 6c61187e630f..0b7ba02c8d16 100644 --- a/drivers/iio/light/bh1750.c +++ b/drivers/iio/light/bh1750.c @@ -62,9 +62,9 @@ struct bh1750_chip_info { u16 int_time_low_mask; u16 int_time_high_mask; -} +}; -static const bh1750_chip_info_tbl[] = { +static const struct bh1750_chip_info bh1750_chip_info_tbl[] = { [BH1710] = { 140, 1022, 300, 400, 250000000, 2, 0x001F, 0x03E0 }, [BH1721] = { 140, 1020, 300, 400, 250000000, 2, 0x0010, 0x03E0 }, [BH1750] = { 31, 254, 69, 1740, 57500000, 1, 0x001F, 0x00E0 }, diff --git a/drivers/iio/proximity/srf04.c b/drivers/iio/proximity/srf04.c index e37667f933b3..8a6ab9691832 100644 --- a/drivers/iio/proximity/srf04.c +++ b/drivers/iio/proximity/srf04.c @@ -105,7 +105,7 @@ static int srf04_read(struct srf04_data *data) udelay(10); gpiod_set_value(data->gpiod_trig, 0); - /* it cannot take more than 20 ms */ + /* it should not take more than 20 ms until echo is rising */ ret = wait_for_completion_killable_timeout(&data->rising, HZ/50); if (ret < 0) { mutex_unlock(&data->lock); @@ -115,7 +115,8 @@ static int srf04_read(struct srf04_data *data) return -ETIMEDOUT; } - ret = wait_for_completion_killable_timeout(&data->falling, HZ/50); + /* it cannot take more than 50 ms until echo is falling */ + ret = wait_for_completion_killable_timeout(&data->falling, HZ/20); if (ret < 0) { mutex_unlock(&data->lock); return ret; @@ -130,19 +131,19 @@ static int srf04_read(struct srf04_data *data) dt_ns = ktime_to_ns(ktime_dt); /* - * measuring more than 3 meters is beyond the capabilities of - * the sensor + * measuring more than 6,45 meters is beyond the capabilities of + * the supported sensors * ==> filter out invalid results for not measuring echos of * another us sensor * * formula: - * distance 3 m - * time = ---------- = --------- = 9404389 ns - * speed 319 m/s + * distance 6,45 * 2 m + * time = ---------- = ------------ = 40438871 ns + * speed 319 m/s * * using a minimum speed at -20 °C of 319 m/s */ - if (dt_ns > 9404389) + if (dt_ns > 40438871) return -EIO; time_ns = dt_ns; @@ -154,20 +155,20 @@ static int srf04_read(struct srf04_data *data) * with Temp in °C * and speed in m/s * - * use 343 m/s as ultrasonic speed at 20 °C here in absence of the + * use 343,5 m/s as ultrasonic speed at 20 °C here in absence of the * temperature * * therefore: - * time 343 - * distance = ------ * ----- - * 10^6 2 + * time 343,5 time * 106 + * distance = ------ * ------- = ------------ + * 10^6 2 617176 * with time in ns * and distance in mm (one way) * - * because we limit to 3 meters the multiplication with 343 just + * because we limit to 6,45 meters the multiplication with 106 just * fits into 32 bit */ - distance_mm = time_ns * 343 / 2000000; + distance_mm = time_ns * 106 / 617176; return distance_mm; } diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index aadaa9e84eee..c2bbe0df0931 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -140,7 +140,7 @@ int ib_nl_handle_ip_res_resp(struct sk_buff *skb, if (ib_nl_is_good_ip_resp(nlh)) ib_nl_process_good_ip_rsep(nlh); - return skb->len; + return 0; } static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 2af79e4f3235..80a8eb7e5d6e 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1143,6 +1143,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, /* Sharing an ib_cm_id with different handlers is not * supported */ spin_unlock_irqrestore(&cm.lock, flags); + ib_destroy_cm_id(cm_id); return ERR_PTR(-EINVAL); } atomic_inc(&cm_id_priv->refcount); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 7c5eca312aa8..1614f6f3677c 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2212,9 +2212,10 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, conn_id->cm_id.iw = NULL; cma_exch(conn_id, RDMA_CM_DESTROYING); mutex_unlock(&conn_id->handler_mutex); + mutex_unlock(&listen_id->handler_mutex); cma_deref_id(conn_id); rdma_destroy_id(&conn_id->id); - goto out; + return ret; } mutex_unlock(&conn_id->handler_mutex); @@ -2788,7 +2789,7 @@ static void addr_handler(int status, struct sockaddr *src_addr, if (status) pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to acquire device. status %d\n", status); - } else { + } else if (status) { pr_debug_ratelimited("RDMA CM: ADDR_ERROR: failed to resolve IP. status %d\n", status); } @@ -4567,6 +4568,7 @@ err: unregister_netdevice_notifier(&cma_nb); rdma_addr_unregister_client(&addr_client); ib_sa_unregister_client(&sa_client); + unregister_pernet_subsys(&cma_pernet_operations); err_wq: destroy_workqueue(cma_wq); return ret; diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 61ade4b3e7bb..6b0d1d8609ca 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -599,8 +599,8 @@ void ib_unregister_device(struct ib_device *device) } up_read(&lists_rwsem); - ib_device_unregister_rdmacg(device); ib_device_unregister_sysfs(device); + ib_device_unregister_rdmacg(device); mutex_unlock(&device_mutex); diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 30d7277249b8..16b0c10348e8 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -158,8 +158,10 @@ static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct list_head *e, *tmp; - list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) + list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { + list_del(e); kfree(list_entry(e, struct iwcm_work, free_list)); + } } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index e4339b9e43a5..6072ac7023cb 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -217,30 +217,30 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, /* Validate parameters */ qpn = get_spl_qp_index(qp_type); if (qpn == -1) { - dev_notice(&device->dev, - "ib_register_mad_agent: invalid QP Type %d\n", - qp_type); + dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n", + __func__, qp_type); goto error1; } if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) { - dev_notice(&device->dev, - "ib_register_mad_agent: invalid RMPP Version %u\n", - rmpp_version); + dev_dbg_ratelimited(&device->dev, + "%s: invalid RMPP Version %u\n", + __func__, rmpp_version); goto error1; } /* Validate MAD registration request if supplied */ if (mad_reg_req) { if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) { - dev_notice(&device->dev, - "ib_register_mad_agent: invalid Class Version %u\n", - mad_reg_req->mgmt_class_version); + dev_dbg_ratelimited(&device->dev, + "%s: invalid Class Version %u\n", + __func__, + mad_reg_req->mgmt_class_version); goto error1; } if (!recv_handler) { - dev_notice(&device->dev, - "ib_register_mad_agent: no recv_handler\n"); + dev_dbg_ratelimited(&device->dev, + "%s: no recv_handler\n", __func__); goto error1; } if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { @@ -250,9 +250,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, */ if (mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: Invalid Mgmt Class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; } } else if (mad_reg_req->mgmt_class == 0) { @@ -260,8 +260,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, * Class 0 is reserved in IBA and is used for * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE */ - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid Mgmt Class 0\n"); + dev_dbg_ratelimited(&device->dev, + "%s: Invalid Mgmt Class 0\n", + __func__); goto error1; } else if (is_vendor_class(mad_reg_req->mgmt_class)) { /* @@ -269,18 +270,19 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, * ensure supplied OUI is not zero */ if (!is_vendor_oui(mad_reg_req->oui)) { - dev_notice(&device->dev, - "ib_register_mad_agent: No OUI specified for class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: No OUI specified for class 0x%x\n", + __func__, + mad_reg_req->mgmt_class); goto error1; } } /* Make sure class supplied is consistent with RMPP */ if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { if (rmpp_version) { - dev_notice(&device->dev, - "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: RMPP version for non-RMPP class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; } } @@ -291,9 +293,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, IB_MGMT_CLASS_SUBN_LID_ROUTED) && (mad_reg_req->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: Invalid SM QP type: class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; } } else { @@ -301,9 +303,9 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, IB_MGMT_CLASS_SUBN_LID_ROUTED) || (mad_reg_req->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n", - mad_reg_req->mgmt_class); + dev_dbg_ratelimited(&device->dev, + "%s: Invalid GS QP type: class 0x%x\n", + __func__, mad_reg_req->mgmt_class); goto error1; } } @@ -318,18 +320,18 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, /* Validate device and port */ port_priv = ib_get_mad_port(device, port_num); if (!port_priv) { - dev_notice(&device->dev, - "ib_register_mad_agent: Invalid port %d\n", - port_num); + dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n", + __func__, port_num); ret = ERR_PTR(-ENODEV); goto error1; } - /* Verify the QP requested is supported. For example, Ethernet devices - * will not have QP0 */ + /* Verify the QP requested is supported. For example, Ethernet devices + * will not have QP0. + */ if (!port_priv->qp_info[qpn].qp) { - dev_notice(&device->dev, - "ib_register_mad_agent: QP %d not supported\n", qpn); + dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n", + __func__, qpn); ret = ERR_PTR(-EPROTONOSUPPORT); goto error1; } diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 50068b0a91fa..83dad5401c93 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -1078,7 +1078,7 @@ int ib_nl_handle_set_timeout(struct sk_buff *skb, } settimeout_out: - return skb->len; + return 0; } static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) @@ -1149,7 +1149,7 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, } resp_out: - return skb->len; + return 0; } static void free_sm_ah(struct kref *kref) diff --git a/drivers/infiniband/core/security.c b/drivers/infiniband/core/security.c index a3dd88c57be7..ce8e3009344a 100644 --- a/drivers/infiniband/core/security.c +++ b/drivers/infiniband/core/security.c @@ -338,22 +338,20 @@ static struct ib_ports_pkeys *get_new_pps(const struct ib_qp *qp, if (!new_pps) return NULL; - if (qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) { - if (!qp_pps) { - new_pps->main.port_num = qp_attr->port_num; - new_pps->main.pkey_index = qp_attr->pkey_index; - } else { - new_pps->main.port_num = (qp_attr_mask & IB_QP_PORT) ? - qp_attr->port_num : - qp_pps->main.port_num; + if (qp_attr_mask & IB_QP_PORT) + new_pps->main.port_num = qp_attr->port_num; + else if (qp_pps) + new_pps->main.port_num = qp_pps->main.port_num; - new_pps->main.pkey_index = - (qp_attr_mask & IB_QP_PKEY_INDEX) ? - qp_attr->pkey_index : - qp_pps->main.pkey_index; - } + if (qp_attr_mask & IB_QP_PKEY_INDEX) + new_pps->main.pkey_index = qp_attr->pkey_index; + else if (qp_pps) + new_pps->main.pkey_index = qp_pps->main.pkey_index; + + if ((qp_attr_mask & IB_QP_PKEY_INDEX) && (qp_attr_mask & IB_QP_PORT)) new_pps->main.state = IB_PORT_PKEY_VALID; - } else if (qp_pps) { + + if (!(qp_attr_mask & (IB_QP_PKEY_INDEX | IB_QP_PORT)) && qp_pps) { new_pps->main.port_num = qp_pps->main.port_num; new_pps->main.pkey_index = qp_pps->main.pkey_index; if (qp_pps->main.state != IB_PORT_PKEY_NOT_VALID) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 55e8f5ed8b3c..57b41125b146 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -637,7 +637,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, while (bcnt > 0) { const size_t gup_num_pages = min_t(size_t, - (bcnt + BIT(page_shift) - 1) >> page_shift, + ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE, PAGE_SIZE / sizeof(struct page *)); down_read(&owning_mm->mmap_sem); diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index 37c8903e7fd0..8d79a48ccd38 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -87,7 +87,7 @@ struct ib_uverbs_device { atomic_t refcount; - int num_comp_vectors; + u32 num_comp_vectors; struct completion comp; struct device *dev; struct ib_device __rcu *ib_dev; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index bf811b23bc95..7d00b6a53ed8 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -782,12 +782,17 @@ static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp, struct ib_event ib_event; ib_event.device = ibdev; - if (qp) + if (qp) { ib_event.element.qp = qp; - else + ib_event.event = event; + if (qp->event_handler) + qp->event_handler(&ib_event, qp->qp_context); + + } else { ib_event.element.port_num = port_num; - ib_event.event = event; - ib_dispatch_event(&ib_event); + ib_event.event = event; + ib_dispatch_event(&ib_event); + } } #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN 0x02 diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index e8afc47f8949..908803fe8276 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -2024,13 +2024,13 @@ static int bnxt_qplib_cq_process_req(struct bnxt_qplib_cq *cq, bnxt_qplib_mark_qp_error(qp); bnxt_qplib_unlock_buddy_cq(qp, cq); } else { + /* Before we complete, do WA 9060 */ + if (do_wa9060(qp, cq, cq_cons, sw_sq_cons, + cqe_sq_cons)) { + *lib_qp = qp; + goto out; + } if (swq->flags & SQ_SEND_FLAGS_SIGNAL_COMP) { - /* Before we complete, do WA 9060 */ - if (do_wa9060(qp, cq, cq_cons, sw_sq_cons, - cqe_sq_cons)) { - *lib_qp = qp; - goto out; - } cqe->status = CQ_REQ_STATUS_OK; cqe++; (*budget)--; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index d87f08cd78ad..7eb1cc1b1aa0 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -491,7 +491,6 @@ static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); release_ep_resources(ep); - kfree_skb(skb); return 0; } @@ -502,7 +501,6 @@ static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb) ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))); c4iw_put_ep(&ep->parent_ep->com); release_ep_resources(ep); - kfree_skb(skb); return 0; } @@ -2058,7 +2056,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, } else { pdev = get_real_dev(n->dev); ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, - n, pdev, 0); + n, pdev, rt_tos2priority(tos)); if (!ep->l2t) goto out; ep->mtu = dst_mtu(dst); @@ -2149,7 +2147,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep) laddr6->sin6_addr.s6_addr, raddr6->sin6_addr.s6_addr, laddr6->sin6_port, - raddr6->sin6_port, 0, + raddr6->sin6_port, + ep->com.cm_id->tos, raddr6->sin6_scope_id); iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; @@ -2925,15 +2924,18 @@ static int terminate(struct c4iw_dev *dev, struct sk_buff *skb) ep = get_ep_from_tid(dev, tid); BUG_ON(!ep); - if (ep && ep->com.qp) { - pr_warn("TERM received tid %u qpid %u\n", - tid, ep->com.qp->wq.sq.qid); - attrs.next_state = C4IW_QP_STATE_TERMINATE; - c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, - C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + if (ep) { + if (ep->com.qp) { + pr_warn("TERM received tid %u qpid %u\n", tid, + ep->com.qp->wq.sq.qid); + attrs.next_state = C4IW_QP_STATE_TERMINATE; + c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, + C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); + } + + c4iw_put_ep(&ep->com); } else pr_warn("TERM received tid %u no ep/qp\n", tid); - c4iw_put_ep(&ep->com); return 0; } @@ -3297,7 +3299,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) laddr6->sin6_addr.s6_addr, raddr6->sin6_addr.s6_addr, laddr6->sin6_port, - raddr6->sin6_port, 0, + raddr6->sin6_port, cm_id->tos, raddr6->sin6_scope_id); } if (!ep->dst) { diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index b5784cb145f5..805429bbc916 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -260,13 +260,17 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, struct sk_buff *skb) { int err; - struct fw_ri_tpte tpt; + struct fw_ri_tpte *tpt; u32 stag_idx; static atomic_t key; if (c4iw_fatal_error(rdev)) return -EIO; + tpt = kmalloc(sizeof(*tpt), GFP_KERNEL); + if (!tpt) + return -ENOMEM; + stag_state = stag_state > 0; stag_idx = (*stag) >> 8; @@ -276,6 +280,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, mutex_lock(&rdev->stats.lock); rdev->stats.stag.fail++; mutex_unlock(&rdev->stats.lock); + kfree(tpt); return -ENOMEM; } mutex_lock(&rdev->stats.lock); @@ -290,28 +295,28 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, /* write TPT entry */ if (reset_tpt_entry) - memset(&tpt, 0, sizeof(tpt)); + memset(tpt, 0, sizeof(*tpt)); else { - tpt.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | + tpt->valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | FW_RI_TPTE_STAGKEY_V((*stag & FW_RI_TPTE_STAGKEY_M)) | FW_RI_TPTE_STAGSTATE_V(stag_state) | FW_RI_TPTE_STAGTYPE_V(type) | FW_RI_TPTE_PDID_V(pdid)); - tpt.locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | + tpt->locread_to_qpid = cpu_to_be32(FW_RI_TPTE_PERM_V(perm) | (bind_enabled ? FW_RI_TPTE_MWBINDEN_F : 0) | FW_RI_TPTE_ADDRTYPE_V((zbva ? FW_RI_ZERO_BASED_TO : FW_RI_VA_BASED_TO))| FW_RI_TPTE_PS_V(page_size)); - tpt.nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( + tpt->nosnoop_pbladdr = !pbl_size ? 0 : cpu_to_be32( FW_RI_TPTE_PBLADDR_V(PBL_OFF(rdev, pbl_addr)>>3)); - tpt.len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); - tpt.va_hi = cpu_to_be32((u32)(to >> 32)); - tpt.va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); - tpt.dca_mwbcnt_pstag = cpu_to_be32(0); - tpt.len_hi = cpu_to_be32((u32)(len >> 32)); + tpt->len_lo = cpu_to_be32((u32)(len & 0xffffffffUL)); + tpt->va_hi = cpu_to_be32((u32)(to >> 32)); + tpt->va_lo_fbo = cpu_to_be32((u32)(to & 0xffffffffUL)); + tpt->dca_mwbcnt_pstag = cpu_to_be32(0); + tpt->len_hi = cpu_to_be32((u32)(len >> 32)); } err = write_adapter_mem(rdev, stag_idx + (rdev->lldi.vr->stag.start >> 5), - sizeof(tpt), &tpt, skb); + sizeof(*tpt), tpt, skb); if (reset_tpt_entry) { c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); @@ -319,6 +324,7 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry, rdev->stats.stag.cur -= 32; mutex_unlock(&rdev->stats.lock); } + kfree(tpt); return err; } diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index 9dcdc0a8685e..cb5785dda524 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1074,6 +1074,8 @@ static void log_state_transition(struct hfi1_pportdata *ppd, u32 state); static void log_physical_state(struct hfi1_pportdata *ppd, u32 state); static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state, int msecs); +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs); static void read_planned_down_reason_code(struct hfi1_devdata *dd, u8 *pdrrc); static void read_link_down_reason(struct hfi1_devdata *dd, u8 *ldr); static void handle_temp_err(struct hfi1_devdata *dd); @@ -1684,6 +1686,14 @@ static u64 access_sw_pio_drain(const struct cntr_entry *entry, return dd->verbs_dev.n_piodrain; } +static u64 access_sw_ctx0_seq_drop(const struct cntr_entry *entry, + void *context, int vl, int mode, u64 data) +{ + struct hfi1_devdata *dd = context; + + return dd->ctx0_seq_drop; +} + static u64 access_sw_vtx_wait(const struct cntr_entry *entry, void *context, int vl, int mode, u64 data) { @@ -4244,6 +4254,8 @@ static struct cntr_entry dev_cntrs[DEV_CNTR_LAST] = { access_sw_cpu_intr), [C_SW_CPU_RCV_LIM] = CNTR_ELEM("RcvLimit", 0, 0, CNTR_NORMAL, access_sw_cpu_rcv_limit), +[C_SW_CTX0_SEQ_DROP] = CNTR_ELEM("SeqDrop0", 0, 0, CNTR_NORMAL, + access_sw_ctx0_seq_drop), [C_SW_VTX_WAIT] = CNTR_ELEM("vTxWait", 0, 0, CNTR_NORMAL, access_sw_vtx_wait), [C_SW_PIO_WAIT] = CNTR_ELEM("PioWait", 0, 0, CNTR_NORMAL, @@ -10550,12 +10562,29 @@ void set_link_down_reason(struct hfi1_pportdata *ppd, u8 lcl_reason, } } -/* - * Verify if BCT for data VLs is non-zero. +/** + * data_vls_operational() - Verify if data VL BCT credits and MTU + * are both set. + * @ppd: pointer to hfi1_pportdata structure + * + * Return: true - Ok, false -otherwise. */ static inline bool data_vls_operational(struct hfi1_pportdata *ppd) { - return !!ppd->actual_vls_operational; + int i; + u64 reg; + + if (!ppd->actual_vls_operational) + return false; + + for (i = 0; i < ppd->vls_supported; i++) { + reg = read_csr(ppd->dd, SEND_CM_CREDIT_VL + (8 * i)); + if ((reg && !ppd->dd->vld[i].mtu) || + (!reg && ppd->dd->vld[i].mtu)) + return false; + } + + return true; } /* @@ -10660,7 +10689,8 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) if (!data_vls_operational(ppd)) { dd_dev_err(dd, - "%s: data VLs not operational\n", __func__); + "%s: Invalid data VL credits or mtu\n", + __func__); ret = -EINVAL; break; } @@ -10731,13 +10761,15 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) break; ppd->port_error_action = 0; - ppd->host_link_state = HLS_DN_POLL; if (quick_linkup) { /* quick linkup does not go into polling */ ret = do_quick_linkup(dd); } else { ret1 = set_physical_link_state(dd, PLS_POLLING); + if (!ret1) + ret1 = wait_phys_link_out_of_offline(ppd, + 3000); if (ret1 != HCMD_SUCCESS) { dd_dev_err(dd, "Failed to transition to Polling link state, return 0x%x\n", @@ -10745,6 +10777,14 @@ int set_link_state(struct hfi1_pportdata *ppd, u32 state) ret = -EINVAL; } } + + /* + * Change the host link state after requesting DC8051 to + * change its physical state so that we can ignore any + * interrupt with stale LNI(XX) error, which will not be + * cleared until DC8051 transitions to Polling state. + */ + ppd->host_link_state = HLS_DN_POLL; ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_NONE); /* @@ -12870,6 +12910,39 @@ static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd, return read_state; } +/* + * wait_phys_link_out_of_offline - wait for any out of offline state + * @ppd: port device + * @msecs: the number of milliseconds to wait + * + * Wait up to msecs milliseconds for any out of offline physical link + * state change to occur. + * Returns 0 if at least one state is reached, otherwise -ETIMEDOUT. + */ +static int wait_phys_link_out_of_offline(struct hfi1_pportdata *ppd, + int msecs) +{ + u32 read_state; + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(msecs); + while (1) { + read_state = read_physical_state(ppd->dd); + if ((read_state & 0xF0) != PLS_OFFLINE) + break; + if (time_after(jiffies, timeout)) { + dd_dev_err(ppd->dd, + "timeout waiting for phy link out of offline. Read state 0x%x, %dms\n", + read_state, msecs); + return -ETIMEDOUT; + } + usleep_range(1950, 2050); /* sleep 2ms-ish */ + } + + log_state_transition(ppd, read_state); + return read_state; +} + #define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \ (r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK) diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index 50b8645d0b87..a88ef2433cea 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -864,6 +864,7 @@ enum { C_DC_PG_STS_TX_MBE_CNT, C_SW_CPU_INTR, C_SW_CPU_RCV_LIM, + C_SW_CTX0_SEQ_DROP, C_SW_VTX_WAIT, C_SW_PIO_WAIT, C_SW_PIO_DRAIN, diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 72c836b826ca..7aa1aabb7a43 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -710,6 +710,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread) { int ret; + packet->rcd->dd->ctx0_seq_drop++; /* Set up for the next packet */ packet->rhqoff += packet->rsize; if (packet->rhqoff >= packet->maxcnt) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 76861a8b5c1e..b3ab803bf8b1 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -195,23 +195,24 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) fd = kzalloc(sizeof(*fd), GFP_KERNEL); - if (fd) { - fd->rec_cpu_num = -1; /* no cpu affinity by default */ - fd->mm = current->mm; - mmgrab(fd->mm); - fd->dd = dd; - kobject_get(&fd->dd->kobj); - fp->private_data = fd; - } else { - fp->private_data = NULL; - - if (atomic_dec_and_test(&dd->user_refcount)) - complete(&dd->user_comp); - - return -ENOMEM; - } - + if (!fd || init_srcu_struct(&fd->pq_srcu)) + goto nomem; + spin_lock_init(&fd->pq_rcu_lock); + spin_lock_init(&fd->tid_lock); + spin_lock_init(&fd->invalid_lock); + fd->rec_cpu_num = -1; /* no cpu affinity by default */ + fd->mm = current->mm; + mmgrab(fd->mm); + fd->dd = dd; + kobject_get(&fd->dd->kobj); + fp->private_data = fd; return 0; +nomem: + kfree(fd); + fp->private_data = NULL; + if (atomic_dec_and_test(&dd->user_refcount)) + complete(&dd->user_comp); + return -ENOMEM; } static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, @@ -417,21 +418,30 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) { struct hfi1_filedata *fd = kiocb->ki_filp->private_data; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq; struct hfi1_user_sdma_comp_q *cq = fd->cq; int done = 0, reqs = 0; unsigned long dim = from->nr_segs; + int idx; - if (!cq || !pq) + idx = srcu_read_lock(&fd->pq_srcu); + pq = srcu_dereference(fd->pq, &fd->pq_srcu); + if (!cq || !pq) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EIO; + } - if (!iter_is_iovec(from) || !dim) + if (!iter_is_iovec(from) || !dim) { + srcu_read_unlock(&fd->pq_srcu, idx); return -EINVAL; + } trace_hfi1_sdma_request(fd->dd, fd->uctxt->ctxt, fd->subctxt, dim); - if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) + if (atomic_read(&pq->n_reqs) == pq->n_max_reqs) { + srcu_read_unlock(&fd->pq_srcu, idx); return -ENOSPC; + } while (dim) { int ret; @@ -449,6 +459,7 @@ static ssize_t hfi1_write_iter(struct kiocb *kiocb, struct iov_iter *from) reqs++; } + srcu_read_unlock(&fd->pq_srcu, idx); return reqs; } @@ -824,6 +835,7 @@ done: if (atomic_dec_and_test(&dd->user_refcount)) complete(&dd->user_comp); + cleanup_srcu_struct(&fdata->pq_srcu); kfree(fdata); return 0; } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index af550c1767e3..cf9bc95d8039 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1043,6 +1043,8 @@ struct hfi1_devdata { char *boardname; /* human readable board info */ + u64 ctx0_seq_drop; + /* reset value */ u64 z_int_counter; u64 z_rcv_limit; @@ -1353,10 +1355,13 @@ struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { + struct srcu_struct pq_srcu; struct hfi1_devdata *dd; struct hfi1_ctxtdata *uctxt; struct hfi1_user_sdma_comp_q *cq; - struct hfi1_user_sdma_pkt_q *pq; + /* update side lock for SRCU */ + spinlock_t pq_rcu_lock; + struct hfi1_user_sdma_pkt_q __rcu *pq; u16 subctxt; /* for cpu affinity; -1 if none */ int rec_cpu_num; diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 51a5416b1da4..fd9ae23c480e 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -327,7 +327,9 @@ int pcie_speeds(struct hfi1_devdata *dd) /* * bus->max_bus_speed is set from the bridge's linkcap Max Link Speed */ - if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) { + if (parent && + (dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT || + dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) { dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n"); dd->link_gen3_capable = 0; } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index 6781bcdb10b3..741938409f8e 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -1529,8 +1529,11 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) } ret = rhashtable_init(tmp_sdma_rht, &sdma_rht_params); - if (ret < 0) + if (ret < 0) { + kfree(tmp_sdma_rht); goto bail; + } + dd->sdma_rht = tmp_sdma_rht; dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); diff --git a/drivers/infiniband/hw/hfi1/user_exp_rcv.c b/drivers/infiniband/hw/hfi1/user_exp_rcv.c index b38e3808836c..c6d085e1c10d 100644 --- a/drivers/infiniband/hw/hfi1/user_exp_rcv.c +++ b/drivers/infiniband/hw/hfi1/user_exp_rcv.c @@ -90,9 +90,6 @@ int hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, struct hfi1_devdata *dd = uctxt->dd; int ret = 0; - spin_lock_init(&fd->tid_lock); - spin_lock_init(&fd->invalid_lock); - fd->entry_to_rb = kcalloc(uctxt->expected_count, sizeof(struct rb_node *), GFP_KERNEL); diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 75275f9e363d..f23d47194c12 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -179,7 +179,6 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, pq = kzalloc(sizeof(*pq), GFP_KERNEL); if (!pq) return -ENOMEM; - pq->dd = dd; pq->ctxt = uctxt->ctxt; pq->subctxt = fd->subctxt; @@ -236,7 +235,7 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, goto pq_mmu_fail; } - fd->pq = pq; + rcu_assign_pointer(fd->pq, pq); fd->cq = cq; return 0; @@ -264,8 +263,14 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, trace_hfi1_sdma_user_free_queues(uctxt->dd, uctxt->ctxt, fd->subctxt); - pq = fd->pq; + spin_lock(&fd->pq_rcu_lock); + pq = srcu_dereference_check(fd->pq, &fd->pq_srcu, + lockdep_is_held(&fd->pq_rcu_lock)); if (pq) { + rcu_assign_pointer(fd->pq, NULL); + spin_unlock(&fd->pq_rcu_lock); + synchronize_srcu(&fd->pq_srcu); + /* at this point there can be no more new requests */ if (pq->handler) hfi1_mmu_rb_unregister(pq->handler); iowait_sdma_drain(&pq->busy); @@ -277,7 +282,8 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, kfree(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); kfree(pq); - fd->pq = NULL; + } else { + spin_unlock(&fd->pq_rcu_lock); } if (fd->cq) { vfree(fd->cq->comps); @@ -321,7 +327,8 @@ int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, { int ret = 0, i; struct hfi1_ctxtdata *uctxt = fd->uctxt; - struct hfi1_user_sdma_pkt_q *pq = fd->pq; + struct hfi1_user_sdma_pkt_q *pq = + srcu_dereference(fd->pq, &fd->pq_srcu); struct hfi1_user_sdma_comp_q *cq = fd->cq; struct hfi1_devdata *dd = pq->dd; unsigned long idx = 0; @@ -856,8 +863,10 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) changes = set_txreq_header_ahg(req, tx, datalen); - if (changes < 0) + if (changes < 0) { + ret = changes; goto free_tx; + } } } else { ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) + diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index ad78b471c112..b962dbcfe9a7 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -593,10 +593,11 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, opa_get_lid(packet->dlid, 9B)); if (!mcast) goto drop; + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) { packet->qp = p->qp; if (hfi1_do_pkey_check(packet)) - goto drop; + goto unlock_drop; spin_lock_irqsave(&packet->qp->r_lock, flags); packet_handler = qp_ok(packet); if (likely(packet_handler)) @@ -605,6 +606,7 @@ static inline void hfi1_handle_packet(struct hfi1_packet *packet, ibp->rvp.n_pkt_drops++; spin_unlock_irqrestore(&packet->qp->r_lock, flags); } + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. diff --git a/drivers/infiniband/hw/hfi1/vnic_sdma.c b/drivers/infiniband/hw/hfi1/vnic_sdma.c index c3c96c5869ed..718dcdef946e 100644 --- a/drivers/infiniband/hw/hfi1/vnic_sdma.c +++ b/drivers/infiniband/hw/hfi1/vnic_sdma.c @@ -57,7 +57,6 @@ #define HFI1_VNIC_TXREQ_NAME_LEN 32 #define HFI1_VNIC_SDMA_DESC_WTRMRK 64 -#define HFI1_VNIC_SDMA_RETRY_COUNT 1 /* * struct vnic_txreq - VNIC transmit descriptor @@ -67,7 +66,6 @@ * @pad: pad buffer * @plen: pad length * @pbc_val: pbc value - * @retry_count: tx retry count */ struct vnic_txreq { struct sdma_txreq txreq; @@ -77,8 +75,6 @@ struct vnic_txreq { unsigned char pad[HFI1_VNIC_MAX_PAD]; u16 plen; __le64 pbc_val; - - u32 retry_count; }; static void vnic_sdma_complete(struct sdma_txreq *txreq, @@ -196,7 +192,6 @@ int hfi1_vnic_send_dma(struct hfi1_devdata *dd, u8 q_idx, ret = build_vnic_tx_desc(sde, tx, pbc); if (unlikely(ret)) goto free_desc; - tx->retry_count = 0; ret = sdma_send_txreq(sde, &vnic_sdma->wait, &tx->txreq, vnic_sdma->pkts_sent); @@ -238,14 +233,14 @@ static int hfi1_vnic_sdma_sleep(struct sdma_engine *sde, struct hfi1_vnic_sdma *vnic_sdma = container_of(wait, struct hfi1_vnic_sdma, wait); struct hfi1_ibdev *dev = &vnic_sdma->dd->verbs_dev; - struct vnic_txreq *tx = container_of(txreq, struct vnic_txreq, txreq); - if (sdma_progress(sde, seq, txreq)) - if (tx->retry_count++ < HFI1_VNIC_SDMA_RETRY_COUNT) - return -EAGAIN; + write_seqlock(&dev->iowait_lock); + if (sdma_progress(sde, seq, txreq)) { + write_sequnlock(&dev->iowait_lock); + return -EAGAIN; + } vnic_sdma->state = HFI1_VNIC_SDMA_Q_DEFERRED; - write_seqlock(&dev->iowait_lock); if (list_empty(&vnic_sdma->wait.list)) iowait_queue(pkts_sent, wait, &sde->dmawait); write_sequnlock(&dev->iowait_lock); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 435748858252..8e8917ebb013 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -52,7 +52,7 @@ enum { #define HNS_ROCE_HEM_CHUNK_LEN \ ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ - (sizeof(struct scatterlist))) + (sizeof(struct scatterlist) + sizeof(void *))) enum { HNS_ROCE_HEM_PAGE_SHIFT = 12, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 3a37d26889df..281e9987ffc8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -241,7 +241,6 @@ void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) { hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); - hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn); } } diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index b7f1ce5333cb..880c63579ba8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -1667,7 +1667,7 @@ static enum i40iw_status_code i40iw_add_mqh_6(struct i40iw_device *iwdev, unsigned long flags; rtnl_lock(); - for_each_netdev_rcu(&init_net, ip_dev) { + for_each_netdev(&init_net, ip_dev) { if ((((rdma_vlan_dev_vlan_id(ip_dev) < I40IW_NO_VLAN) && (rdma_vlan_dev_real_dev(ip_dev) == iwdev->netdev)) || (ip_dev == iwdev->netdev)) && (ip_dev->flags & IFF_UP)) { diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0299c0642de8..7e73a1a6cb67 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -3073,16 +3073,17 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) ibdev->ib_active = false; flush_workqueue(wq); - mlx4_ib_close_sriov(ibdev); - mlx4_ib_mad_cleanup(ibdev); - ib_unregister_device(&ibdev->ib_dev); - mlx4_ib_diag_cleanup(ibdev); if (ibdev->iboe.nb.notifier_call) { if (unregister_netdevice_notifier(&ibdev->iboe.nb)) pr_warn("failure unregistering notifier\n"); ibdev->iboe.nb.notifier_call = NULL; } + mlx4_ib_close_sriov(ibdev); + mlx4_ib_mad_cleanup(ibdev); + ib_unregister_device(&ibdev->ib_dev); + mlx4_ib_diag_cleanup(ibdev); + mlx4_qp_release_range(dev, ibdev->steer_qpn_base, ibdev->steer_qpn_count); kfree(ibdev->ib_uc_qpns_bitmap); diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index e219093d2764..d2da28d613f2 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -353,16 +353,12 @@ err: static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max) { - char base_name[9]; - - /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */ - strlcpy(name, pci_name(dev->dev->persist->pdev), max); - strncpy(base_name, name, 8); /*till xxxx:yy:*/ - base_name[8] = '\0'; - /* with no ARI only 3 last bits are used so when the fn is higher than 8 + /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n + * with no ARI only 3 last bits are used so when the fn is higher than 8 * need to add it to the dev num, so count in the last number will be * modulo 8 */ - sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8)); + snprintf(name, max, "%.8s%.2d.%d", pci_name(dev->dev->persist->pdev), + i / 8, i % 8); } struct mlx4_port { diff --git a/drivers/infiniband/hw/mlx5/gsi.c b/drivers/infiniband/hw/mlx5/gsi.c index 79e6309460dc..262c18b2f525 100644 --- a/drivers/infiniband/hw/mlx5/gsi.c +++ b/drivers/infiniband/hw/mlx5/gsi.c @@ -507,8 +507,7 @@ int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr, ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr); if (ret) { /* Undo the effect of adding the outstanding wr */ - gsi->outstanding_pi = (gsi->outstanding_pi - 1) % - gsi->cap.max_send_wr; + gsi->outstanding_pi--; goto err; } spin_unlock_irqrestore(&gsi->lock, flags); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index cfddca850cb4..fb45bfa4f845 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -460,7 +460,7 @@ struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); - return NULL; + return ERR_PTR(-EINVAL); } ent = &cache->ent[entry]; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5a7dcb5afe6e..84c962820aa2 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2357,6 +2357,11 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX | MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_PRI_PORT, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PKEY_INDEX | + MLX5_QP_OPTPAR_PRI_PORT, }, [MLX5_QP_STATE_RTR] = { [MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | @@ -2390,6 +2395,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_PM_STATE, [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH | + MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_RNR_TIMEOUT, }, }, [MLX5_QP_STATE_RTS] = { @@ -2406,6 +2417,12 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q [MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_Q_KEY | MLX5_QP_OPTPAR_SRQN | MLX5_QP_OPTPAR_CQN_RCV, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_PM_STATE | + MLX5_QP_OPTPAR_ALT_ADDR_PATH, }, }, [MLX5_QP_STATE_SQER] = { @@ -2417,6 +2434,10 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q MLX5_QP_OPTPAR_RWE | MLX5_QP_OPTPAR_RAE | MLX5_QP_OPTPAR_RRE, + [MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RNR_TIMEOUT | + MLX5_QP_OPTPAR_RWE | + MLX5_QP_OPTPAR_RAE | + MLX5_QP_OPTPAR_RRE, }, }, }; diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index e36a9bc52268..ccf50dafce9c 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -986,7 +986,8 @@ static int __mthca_init_one(struct pci_dev *pdev, int hca_type) goto err_free_dev; } - if (mthca_cmd_init(mdev)) { + err = mthca_cmd_init(mdev); + if (err) { mthca_err(mdev, "Failed to init command interface, aborting.\n"); goto err_free_dev; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 27d5e8d9f08d..7683d13dad3d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -55,7 +55,7 @@ int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { - if (index > 1) + if (index > 0) return -EINVAL; *pkey = 0xffff; diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index ddb05b42e5e6..3e48ed64760b 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -73,7 +73,7 @@ static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str) struct qedr_dev *qedr = get_qedr_dev(ibdev); u32 fw_ver = (u32)qedr->attr.fw_ver; - snprintf(str, IB_FW_VERSION_NAME_MAX, "%d. %d. %d. %d", + snprintf(str, IB_FW_VERSION_NAME_MAX, "%d.%d.%d.%d", (fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF, (fw_ver >> 8) & 0xFF, fw_ver & 0xFF); } diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 7f4cc9336442..6ae72accae3d 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -63,7 +63,7 @@ static inline int qedr_ib_copy_to_udata(struct ib_udata *udata, void *src, int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { - if (index > QEDR_ROCE_PKEY_TABLE_LEN) + if (index >= QEDR_ROCE_PKEY_TABLE_LEN) return -EINVAL; *pkey = QEDR_ROCE_PKEY_DEFAULT; @@ -178,54 +178,47 @@ int qedr_query_device(struct ib_device *ibdev, return 0; } -#define QEDR_SPEED_SDR (1) -#define QEDR_SPEED_DDR (2) -#define QEDR_SPEED_QDR (4) -#define QEDR_SPEED_FDR10 (8) -#define QEDR_SPEED_FDR (16) -#define QEDR_SPEED_EDR (32) - static inline void get_link_speed_and_width(int speed, u8 *ib_speed, u8 *ib_width) { switch (speed) { case 1000: - *ib_speed = QEDR_SPEED_SDR; + *ib_speed = IB_SPEED_SDR; *ib_width = IB_WIDTH_1X; break; case 10000: - *ib_speed = QEDR_SPEED_QDR; + *ib_speed = IB_SPEED_QDR; *ib_width = IB_WIDTH_1X; break; case 20000: - *ib_speed = QEDR_SPEED_DDR; + *ib_speed = IB_SPEED_DDR; *ib_width = IB_WIDTH_4X; break; case 25000: - *ib_speed = QEDR_SPEED_EDR; + *ib_speed = IB_SPEED_EDR; *ib_width = IB_WIDTH_1X; break; case 40000: - *ib_speed = QEDR_SPEED_QDR; + *ib_speed = IB_SPEED_QDR; *ib_width = IB_WIDTH_4X; break; case 50000: - *ib_speed = QEDR_SPEED_QDR; - *ib_width = IB_WIDTH_4X; + *ib_speed = IB_SPEED_HDR; + *ib_width = IB_WIDTH_1X; break; case 100000: - *ib_speed = QEDR_SPEED_EDR; + *ib_speed = IB_SPEED_EDR; *ib_width = IB_WIDTH_4X; break; default: /* Unsupported */ - *ib_speed = QEDR_SPEED_SDR; + *ib_speed = IB_SPEED_SDR; *ib_width = IB_WIDTH_1X; } } @@ -1343,6 +1336,14 @@ static void qedr_cleanup_user(struct qedr_dev *dev, struct qedr_qp *qp) if (qp->urq.umem) ib_umem_release(qp->urq.umem); qp->urq.umem = NULL; + + if (rdma_protocol_roce(&dev->ibdev, 1)) { + qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl); + qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl); + } else { + kfree(qp->usq.pbl_tbl); + kfree(qp->urq.pbl_tbl); + } } static int qedr_create_user_qp(struct qedr_dev *dev, @@ -2331,8 +2332,8 @@ int qedr_dereg_mr(struct ib_mr *ib_mr) dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); - if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR)) - qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); + if (mr->type != QEDR_MR_DMA) + free_mr_info(dev, &mr->info); /* it could be user registered memory. */ if (mr->umem) diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c index 891873b38a1e..5f3f197678b7 100644 --- a/drivers/infiniband/hw/qib/qib_sdma.c +++ b/drivers/infiniband/hw/qib/qib_sdma.c @@ -600,8 +600,10 @@ retry: dw = (len + 3) >> 2; addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr, dw << 2, DMA_TO_DEVICE); - if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) + if (dma_mapping_error(&ppd->dd->pcidev->dev, addr)) { + ret = -ENOMEM; goto unmap; + } sdmadesc[0] = 0; make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset); /* SDmaUseLargeBuf has to be set in every descriptor */ diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index ca2638d8f35e..d831f3e61ae8 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -301,6 +301,9 @@ static ssize_t qib_portattr_show(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->show) + return -EIO; + return pattr->show(ppd, buf); } @@ -312,6 +315,9 @@ static ssize_t qib_portattr_store(struct kobject *kobj, struct qib_pportdata *ppd = container_of(kobj, struct qib_pportdata, pport_kobj); + if (!pattr->store) + return -EIO; + return pattr->store(ppd, buf, len); } diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index 350bc29a066f..b473df8eea1a 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -360,8 +360,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) if (mcast == NULL) goto drop; this_cpu_inc(ibp->pmastats->n_multicast_rcv); + rcu_read_lock(); list_for_each_entry_rcu(p, &mcast->qp_list, list) qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp); + rcu_read_unlock(); /* * Notify rvt_multicast_detach() if it is waiting for us * to finish. diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index fdfa25059723..2602c7375d58 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -423,7 +423,7 @@ struct net_device *usnic_get_netdev(struct ib_device *device, u8 port_num) int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { - if (index > 1) + if (index > 0) return -EINVAL; *pkey = 0xffff; diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c index aa533f08e017..5c7aa6ff1538 100644 --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c @@ -550,7 +550,7 @@ struct ib_ah *pvrdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr, if (!atomic_add_unless(&dev->num_ahs, 1, dev->dsr->caps.max_ah)) return ERR_PTR(-ENOMEM); - ah = kzalloc(sizeof(*ah), GFP_KERNEL); + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) { atomic_dec(&dev->num_ahs); return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 83cfe44f070e..fd9ce03dbd29 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -253,6 +253,17 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { + /* read retries of partial data may restart from + * read response first or response only. + */ + if ((pkt->psn == wqe->first_psn && + pkt->opcode == + IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) || + (wqe->first_psn == wqe->last_psn && + pkt->opcode == + IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY)) + break; + return COMPST_ERROR; } break; @@ -501,11 +512,11 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, struct rxe_pkt_info *pkt, struct rxe_send_wqe *wqe) { - qp->comp.opcode = -1; - - if (pkt) { - if (psn_compare(pkt->psn, qp->comp.psn) >= 0) - qp->comp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + if (pkt && wqe->state == wqe_state_pending) { + if (psn_compare(wqe->last_psn, qp->comp.psn) >= 0) { + qp->comp.psn = (wqe->last_psn + 1) & BTH_PSN_MASK; + qp->comp.opcode = -1; + } if (qp->req.wait_psn) { qp->req.wait_psn = 0; diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index c4aabf78dc90..f6e036ded046 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -30,7 +30,7 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - +#include #include "rxe.h" #include "rxe_loc.h" #include "rxe_queue.h" @@ -97,7 +97,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, err = do_mmap_info(rxe, udata, false, context, cq->queue->buf, cq->queue->buf_size, &cq->queue->ip); if (err) { - kvfree(cq->queue->buf); + vfree(cq->queue->buf); kfree(cq->queue); return err; } diff --git a/drivers/infiniband/sw/rxe/rxe_hw_counters.c b/drivers/infiniband/sw/rxe/rxe_hw_counters.c index 6aeb7a165e46..ea4542a9d69e 100644 --- a/drivers/infiniband/sw/rxe/rxe_hw_counters.c +++ b/drivers/infiniband/sw/rxe/rxe_hw_counters.c @@ -59,7 +59,7 @@ int rxe_ib_get_hw_stats(struct ib_device *ibdev, return -EINVAL; for (cnt = 0; cnt < ARRAY_SIZE(rxe_counter_name); cnt++) - stats->value[cnt] = dev->stats_counters[cnt]; + stats->value[cnt] = atomic64_read(&dev->stats_counters[cnt]); return ARRAY_SIZE(rxe_counter_name); } diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index b4a8acc7bb7d..0e2425f28233 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -112,6 +112,18 @@ static inline struct kmem_cache *pool_cache(struct rxe_pool *pool) return rxe_type_info[pool->type].cache; } +static void rxe_cache_clean(size_t cnt) +{ + int i; + struct rxe_type_info *type; + + for (i = 0; i < cnt; i++) { + type = &rxe_type_info[i]; + kmem_cache_destroy(type->cache); + type->cache = NULL; + } +} + int rxe_cache_init(void) { int err; @@ -136,24 +148,14 @@ int rxe_cache_init(void) return 0; err1: - while (--i >= 0) { - kmem_cache_destroy(type->cache); - type->cache = NULL; - } + rxe_cache_clean(i); return err; } void rxe_cache_exit(void) { - int i; - struct rxe_type_info *type; - - for (i = 0; i < RXE_NUM_TYPES; i++) { - type = &rxe_type_info[i]; - kmem_cache_destroy(type->cache); - type->cache = NULL; - } + rxe_cache_clean(RXE_NUM_TYPES); } static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index aeea994b04c4..25055a68a2c0 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "rxe.h" #include "rxe_loc.h" @@ -255,7 +256,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, qp->sq.queue->buf_size, &qp->sq.queue->ip); if (err) { - kvfree(qp->sq.queue->buf); + vfree(qp->sq.queue->buf); kfree(qp->sq.queue); return err; } @@ -308,7 +309,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, qp->rq.queue->buf_size, &qp->rq.queue->ip); if (err) { - kvfree(qp->rq.queue->buf); + vfree(qp->rq.queue->buf); kfree(qp->rq.queue); return err; } diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 83412df726a5..b7098f7bb30e 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -393,7 +393,7 @@ int rxe_rcv(struct sk_buff *skb) calc_icrc = rxe_icrc_hdr(pkt, skb); calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt), - payload_size(pkt)); + payload_size(pkt) + bth_pad(pkt)); calc_icrc = (__force u32)cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { if (skb->protocol == htons(ETH_P_IPV6)) diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 08ae4f3a6a37..e6785b1ea85f 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -73,9 +73,6 @@ static void req_retry(struct rxe_qp *qp) int npsn; int first = 1; - wqe = queue_head(qp->sq.queue); - npsn = (qp->comp.psn - wqe->first_psn) & BTH_PSN_MASK; - qp->req.wqe_index = consumer_index(qp->sq.queue); qp->req.psn = qp->comp.psn; qp->req.opcode = -1; @@ -107,11 +104,17 @@ static void req_retry(struct rxe_qp *qp) if (first) { first = 0; - if (mask & WR_WRITE_OR_SEND_MASK) + if (mask & WR_WRITE_OR_SEND_MASK) { + npsn = (qp->comp.psn - wqe->first_psn) & + BTH_PSN_MASK; retry_first_write_send(qp, wqe, mask, npsn); + } - if (mask & WR_READ_MASK) + if (mask & WR_READ_MASK) { + npsn = (wqe->dma.length - wqe->dma.resid) / + qp->mtu; wqe->iova += npsn * qp->mtu; + } } wqe->state = wqe_state_posted; @@ -435,7 +438,7 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, if (pkt->mask & RXE_RETH_MASK) { reth_set_rkey(pkt, ibwr->wr.rdma.rkey); reth_set_va(pkt, wqe->iova); - reth_set_len(pkt, wqe->dma.length); + reth_set_len(pkt, wqe->dma.resid); } if (pkt->mask & RXE_IMMDT_MASK) @@ -497,6 +500,12 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (err) return err; } + if (bth_pad(pkt)) { + u8 *pad = payload_addr(pkt) + paylen; + + memset(pad, 0, bth_pad(pkt)); + crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt)); + } } p = payload_addr(pkt) + paylen + bth_pad(pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 9207682b7a2e..a07a29b48863 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -738,6 +738,13 @@ static enum resp_states read_reply(struct rxe_qp *qp, if (err) pr_err("Failed copying memory\n"); + if (bth_pad(&ack_pkt)) { + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + u8 *pad = payload_addr(&ack_pkt) + payload; + + memset(pad, 0, bth_pad(&ack_pkt)); + icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt)); + } p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt); *p = ~icrc; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index b2b76a316eba..46c8a66731e6 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -408,18 +408,18 @@ struct rxe_dev { struct list_head pending_mmaps; spinlock_t mmap_offset_lock; /* guard mmap_offset */ - int mmap_offset; + u64 mmap_offset; - u64 stats_counters[RXE_NUM_OF_COUNTERS]; + atomic64_t stats_counters[RXE_NUM_OF_COUNTERS]; struct rxe_port port; struct list_head list; struct crypto_shash *tfm; }; -static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters cnt) +static inline void rxe_counter_inc(struct rxe_dev *rxe, enum rxe_counters index) { - rxe->stats_counters[cnt]++; + atomic64_inc(&rxe->stats_counters[index]); } static inline struct rxe_dev *to_rdev(struct ib_device *dev) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 1a93d3d58c8a..caae4bfab950 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -249,7 +249,8 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu) return 0; } - if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) + if (new_mtu < (ETH_MIN_MTU + IPOIB_ENCAP_LEN) || + new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu)) return -EINVAL; priv->admin_mtu = new_mtu; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 19624e023ebd..b5a789567b4e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -648,6 +648,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, if (ib_conn->pi_support) { u32 sig_caps = ib_conn->device->ib_device->attrs.sig_prot_cap; + shost->sg_prot_tablesize = shost->sg_tablesize; scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP | SHOST_DIX_GUARD_CRC); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index c1ae4aeae2f9..46dfc6ae9d1c 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -197,7 +197,7 @@ struct iser_data_buf { struct scatterlist *sg; int size; unsigned long data_len; - unsigned int dma_nents; + int dma_nents; }; /* fwd declarations */ diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 2a07692007bd..a126750b65a9 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -592,13 +592,19 @@ void iser_login_rsp(struct ib_cq *cq, struct ib_wc *wc) ib_conn->post_recv_buf_count--; } -static inline void +static inline int iser_inv_desc(struct iser_fr_desc *desc, u32 rkey) { - if (likely(rkey == desc->rsc.mr->rkey)) + if (likely(rkey == desc->rsc.mr->rkey)) { desc->rsc.mr_valid = 0; - else if (likely(rkey == desc->pi_ctx->sig_mr->rkey)) + } else if (likely(desc->pi_ctx && rkey == desc->pi_ctx->sig_mr->rkey)) { desc->pi_ctx->sig_mr_valid = 0; + } else { + iser_err("Bogus remote invalidation for rkey %#x\n", rkey); + return -EINVAL; + } + + return 0; } static int @@ -626,12 +632,14 @@ iser_check_remote_inv(struct iser_conn *iser_conn, if (iser_task->dir[ISER_DIR_IN]) { desc = iser_task->rdma_reg[ISER_DIR_IN].mem_h; - iser_inv_desc(desc, rkey); + if (unlikely(iser_inv_desc(desc, rkey))) + return -EINVAL; } if (iser_task->dir[ISER_DIR_OUT]) { desc = iser_task->rdma_reg[ISER_DIR_OUT].mem_h; - iser_inv_desc(desc, rkey); + if (unlikely(iser_inv_desc(desc, rkey))) + return -EINVAL; } } else { iser_err("failed to get task for itt=%d\n", hdr->itt); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 322209d5ff58..19883169e7b7 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -240,8 +240,8 @@ int iser_fast_reg_fmr(struct iscsi_iser_task *iser_task, page_vec->npages = 0; page_vec->fake_mr.page_size = SIZE_4K; plen = ib_sg_to_pages(&page_vec->fake_mr, mem->sg, - mem->size, NULL, iser_set_page); - if (unlikely(plen < mem->size)) { + mem->dma_nents, NULL, iser_set_page); + if (unlikely(plen < mem->dma_nents)) { iser_err("page vec too short to hold this SG\n"); iser_data_buf_dump(mem, device->ib_device); iser_dump_page_vec(page_vec); @@ -450,10 +450,10 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); - n = ib_map_mr_sg(mr, mem->sg, mem->size, NULL, SIZE_4K); - if (unlikely(n != mem->size)) { + n = ib_map_mr_sg(mr, mem->sg, mem->dma_nents, NULL, SIZE_4K); + if (unlikely(n != mem->dma_nents)) { iser_err("failed to map sg (%d/%d)\n", - n, mem->size); + n, mem->dma_nents); return n < 0 ? n : -EINVAL; } diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c index a72278e9cd27..9c8ddaaa6fbb 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_encap.c @@ -351,7 +351,8 @@ static uint32_t opa_vnic_get_dlid(struct opa_vnic_adapter *adapter, if (unlikely(!dlid)) v_warn("Null dlid in MAC address\n"); } else if (def_port != OPA_VNIC_INVALID_PORT) { - dlid = info->vesw.u_ucast_dlid[def_port]; + if (def_port < OPA_VESW_MAX_NUM_DEF_PORT) + dlid = info->vesw.u_ucast_dlid[def_port]; } } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 3f5b5893792c..9f7287f45d06 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -2210,6 +2210,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) if (srp_post_send(ch, iu, len)) { shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); + scmnd->result = DID_ERROR << 16; goto err_unmap; } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 181deb6f05b0..5f1055c94d66 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -1246,9 +1246,11 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, struct srpt_send_ioctx *ioctx, u64 tag, int status) { + struct se_cmd *cmd = &ioctx->cmd; struct srp_rsp *srp_rsp; const u8 *sense_data; int sense_data_len, max_sense_len; + u32 resid = cmd->residual_count; /* * The lowest bit of all SAM-3 status codes is zero (see also @@ -1270,6 +1272,28 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, srp_rsp->tag = tag; srp_rsp->status = status; + if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { + if (cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an underflow write */ + srp_rsp->flags = SRP_RSP_FLAG_DOUNDER; + srp_rsp->data_out_res_cnt = cpu_to_be32(resid); + } else if (cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an underflow read */ + srp_rsp->flags = SRP_RSP_FLAG_DIUNDER; + srp_rsp->data_in_res_cnt = cpu_to_be32(resid); + } + } else if (cmd->se_cmd_flags & SCF_OVERFLOW_BIT) { + if (cmd->data_direction == DMA_TO_DEVICE) { + /* residual data from an overflow write */ + srp_rsp->flags = SRP_RSP_FLAG_DOOVER; + srp_rsp->data_out_res_cnt = cpu_to_be32(resid); + } else if (cmd->data_direction == DMA_FROM_DEVICE) { + /* residual data from an overflow read */ + srp_rsp->flags = SRP_RSP_FLAG_DIOVER; + srp_rsp->data_in_res_cnt = cpu_to_be32(resid); + } + } + if (sense_data_len) { BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp)); max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); diff --git a/drivers/input/input.c b/drivers/input/input.c index 50d425fe6706..cadb368be8ef 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -858,16 +858,18 @@ static int input_default_setkeycode(struct input_dev *dev, } } - __clear_bit(*old_keycode, dev->keybit); - __set_bit(ke->keycode, dev->keybit); - - for (i = 0; i < dev->keycodemax; i++) { - if (input_fetch_keycode(dev, i) == *old_keycode) { - __set_bit(*old_keycode, dev->keybit); - break; /* Setting the bit twice is useless, so break */ + if (*old_keycode <= KEY_MAX) { + __clear_bit(*old_keycode, dev->keybit); + for (i = 0; i < dev->keycodemax; i++) { + if (input_fetch_keycode(dev, i) == *old_keycode) { + __set_bit(*old_keycode, dev->keybit); + /* Setting the bit twice is useless, so break */ + break; + } } } + __set_bit(ke->keycode, dev->keybit); return 0; } @@ -923,9 +925,13 @@ int input_set_keycode(struct input_dev *dev, * Simulate keyup event if keycode is not present * in the keymap anymore */ - if (test_bit(EV_KEY, dev->evbit) && - !is_event_supported(old_keycode, dev->keybit, KEY_MAX) && - __test_and_clear_bit(old_keycode, dev->key)) { + if (old_keycode > KEY_MAX) { + dev_warn(dev->dev.parent ?: &dev->dev, + "%s: got too big old keycode %#x\n", + __func__, old_keycode); + } else if (test_bit(EV_KEY, dev->evbit) && + !is_event_supported(old_keycode, dev->keybit, KEY_MAX) && + __test_and_clear_bit(old_keycode, dev->key)) { struct input_value vals[] = { { EV_KEY, old_keycode, 0 }, input_value_sync diff --git a/drivers/input/joystick/psxpad-spi.c b/drivers/input/joystick/psxpad-spi.c index 28b473f6cbb6..092096ee06b9 100644 --- a/drivers/input/joystick/psxpad-spi.c +++ b/drivers/input/joystick/psxpad-spi.c @@ -292,7 +292,7 @@ static int psxpad_spi_probe(struct spi_device *spi) if (!pad) return -ENOMEM; - pdev = input_allocate_polled_device(); + pdev = devm_input_allocate_polled_device(&spi->dev); if (!pdev) { dev_err(&spi->dev, "failed to allocate input device\n"); return -ENOMEM; diff --git a/drivers/input/keyboard/nomadik-ske-keypad.c b/drivers/input/keyboard/nomadik-ske-keypad.c index 8567ee47761e..ae3b04557074 100644 --- a/drivers/input/keyboard/nomadik-ske-keypad.c +++ b/drivers/input/keyboard/nomadik-ske-keypad.c @@ -100,7 +100,7 @@ static int __init ske_keypad_chip_init(struct ske_keypad *keypad) while ((readl(keypad->reg_base + SKE_RIS) != 0x00000000) && timeout--) cpu_relax(); - if (!timeout) + if (timeout == -1) return -EINVAL; /* diff --git a/drivers/input/misc/da9063_onkey.c b/drivers/input/misc/da9063_onkey.c index 3e9c353d82ef..a01b25facf46 100644 --- a/drivers/input/misc/da9063_onkey.c +++ b/drivers/input/misc/da9063_onkey.c @@ -248,10 +248,7 @@ static int da9063_onkey_probe(struct platform_device *pdev) onkey->input->phys = onkey->phys; onkey->input->dev.parent = &pdev->dev; - if (onkey->key_power) - input_set_capability(onkey->input, EV_KEY, KEY_POWER); - - input_set_capability(onkey->input, EV_KEY, KEY_SLEEP); + input_set_capability(onkey->input, EV_KEY, KEY_POWER); INIT_DELAYED_WORK(&onkey->work, da9063_poll_on); diff --git a/drivers/input/misc/keyspan_remote.c b/drivers/input/misc/keyspan_remote.c index 77c47d6325fe..a9ee813eef10 100644 --- a/drivers/input/misc/keyspan_remote.c +++ b/drivers/input/misc/keyspan_remote.c @@ -344,7 +344,8 @@ static int keyspan_setup(struct usb_device* dev) int retval = 0; retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), - 0x11, 0x40, 0x5601, 0x0, NULL, 0, 0); + 0x11, 0x40, 0x5601, 0x0, NULL, 0, + USB_CTRL_SET_TIMEOUT); if (retval) { dev_dbg(&dev->dev, "%s - failed to set bit rate due to error: %d\n", __func__, retval); @@ -352,7 +353,8 @@ static int keyspan_setup(struct usb_device* dev) } retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), - 0x44, 0x40, 0x0, 0x0, NULL, 0, 0); + 0x44, 0x40, 0x0, 0x0, NULL, 0, + USB_CTRL_SET_TIMEOUT); if (retval) { dev_dbg(&dev->dev, "%s - failed to set resume sensitivity due to error: %d\n", __func__, retval); @@ -360,7 +362,8 @@ static int keyspan_setup(struct usb_device* dev) } retval = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), - 0x22, 0x40, 0x0, 0x0, NULL, 0, 0); + 0x22, 0x40, 0x0, 0x0, NULL, 0, + USB_CTRL_SET_TIMEOUT); if (retval) { dev_dbg(&dev->dev, "%s - failed to turn receive on due to error: %d\n", __func__, retval); diff --git a/drivers/input/misc/pm8xxx-vibrator.c b/drivers/input/misc/pm8xxx-vibrator.c index 7dd1c1fbe42a..27b3db154a33 100644 --- a/drivers/input/misc/pm8xxx-vibrator.c +++ b/drivers/input/misc/pm8xxx-vibrator.c @@ -98,7 +98,7 @@ static int pm8xxx_vib_set(struct pm8xxx_vib *vib, bool on) if (regs->enable_mask) rc = regmap_update_bits(vib->regmap, regs->enable_addr, - on ? regs->enable_mask : 0, val); + regs->enable_mask, on ? ~0 : 0); return rc; } diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 7db53eab7012..5f764e0993a4 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -149,7 +149,6 @@ static const char * const topbuttonpad_pnp_ids[] = { "LEN0042", /* Yoga */ "LEN0045", "LEN0047", - "LEN0049", "LEN2000", /* S540 */ "LEN2001", /* Edge E431 */ "LEN2002", /* Edge E531 */ @@ -169,18 +168,22 @@ static const char * const smbus_pnp_ids[] = { /* all of the topbuttonpad_pnp_ids are valid, we just add some extras */ "LEN0048", /* X1 Carbon 3 */ "LEN0046", /* X250 */ + "LEN0049", /* Yoga 11e */ "LEN004a", /* W541 */ "LEN005b", /* P50 */ "LEN005e", /* T560 */ + "LEN006c", /* T470s */ "LEN0071", /* T480 */ "LEN0072", /* X1 Carbon Gen 5 (2017) - Elan/ALPS trackpoint */ "LEN0073", /* X1 Carbon G5 (Elantech) */ + "LEN0091", /* X1 Carbon 6 */ "LEN0092", /* X1 Carbon 6 */ "LEN0093", /* T480 */ "LEN0096", /* X280 */ "LEN0097", /* X280 -> ALPS trackpoint */ "LEN009b", /* T580 */ "LEN200f", /* T450s */ + "LEN2044", /* L470 */ "LEN2054", /* E480 */ "LEN2055", /* E580 */ "SYN3052", /* HP EliteBook 840 G4 */ diff --git a/drivers/input/rmi4/rmi_f11.c b/drivers/input/rmi4/rmi_f11.c index bb63b8823d62..e8c3e5d1ea22 100644 --- a/drivers/input/rmi4/rmi_f11.c +++ b/drivers/input/rmi4/rmi_f11.c @@ -1295,8 +1295,8 @@ static int rmi_f11_attention(struct rmi_function *fn, unsigned long *irq_bits) valid_bytes = f11->sensor.attn_size; memcpy(f11->sensor.data_pkt, drvdata->attn_data.data, valid_bytes); - drvdata->attn_data.data += f11->sensor.attn_size; - drvdata->attn_data.size -= f11->sensor.attn_size; + drvdata->attn_data.data += valid_bytes; + drvdata->attn_data.size -= valid_bytes; } else { error = rmi_read_block(rmi_dev, data_base_addr, f11->sensor.data_pkt, diff --git a/drivers/input/rmi4/rmi_f12.c b/drivers/input/rmi4/rmi_f12.c index 8b0db086d68a..99dd2f512058 100644 --- a/drivers/input/rmi4/rmi_f12.c +++ b/drivers/input/rmi4/rmi_f12.c @@ -58,6 +58,9 @@ struct f12_data { const struct rmi_register_desc_item *data15; u16 data15_offset; + + unsigned long *abs_mask; + unsigned long *rel_mask; }; static int rmi_f12_read_sensor_tuning(struct f12_data *f12) @@ -214,8 +217,8 @@ static int rmi_f12_attention(struct rmi_function *fn, valid_bytes = sensor->attn_size; memcpy(sensor->data_pkt, drvdata->attn_data.data, valid_bytes); - drvdata->attn_data.data += sensor->attn_size; - drvdata->attn_data.size -= sensor->attn_size; + drvdata->attn_data.data += valid_bytes; + drvdata->attn_data.size -= valid_bytes; } else { retval = rmi_read_block(rmi_dev, f12->data_addr, sensor->data_pkt, sensor->pkt_size); @@ -296,9 +299,18 @@ static int rmi_f12_write_control_regs(struct rmi_function *fn) static int rmi_f12_config(struct rmi_function *fn) { struct rmi_driver *drv = fn->rmi_dev->driver; + struct f12_data *f12 = dev_get_drvdata(&fn->dev); + struct rmi_2d_sensor *sensor; int ret; - drv->set_irq_bits(fn->rmi_dev, fn->irq_mask); + sensor = &f12->sensor; + + if (!sensor->report_abs) + drv->clear_irq_bits(fn->rmi_dev, f12->abs_mask); + else + drv->set_irq_bits(fn->rmi_dev, f12->abs_mask); + + drv->clear_irq_bits(fn->rmi_dev, f12->rel_mask); ret = rmi_f12_write_control_regs(fn); if (ret) @@ -320,9 +332,12 @@ static int rmi_f12_probe(struct rmi_function *fn) struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev); struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev); u16 data_offset = 0; + int mask_size; rmi_dbg(RMI_DEBUG_FN, &fn->dev, "%s\n", __func__); + mask_size = BITS_TO_LONGS(drvdata->irq_count) * sizeof(unsigned long); + ret = rmi_read(fn->rmi_dev, query_addr, &buf); if (ret < 0) { dev_err(&fn->dev, "Failed to read general info register: %d\n", @@ -337,10 +352,19 @@ static int rmi_f12_probe(struct rmi_function *fn) return -ENODEV; } - f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data), GFP_KERNEL); + f12 = devm_kzalloc(&fn->dev, sizeof(struct f12_data) + mask_size * 2, + GFP_KERNEL); if (!f12) return -ENOMEM; + f12->abs_mask = (unsigned long *)((char *)f12 + + sizeof(struct f12_data)); + f12->rel_mask = (unsigned long *)((char *)f12 + + sizeof(struct f12_data) + mask_size); + + set_bit(fn->irq_pos, f12->abs_mask); + set_bit(fn->irq_pos + 1, f12->rel_mask); + f12->has_dribble = !!(buf & BIT(3)); if (fn->dev.of_node) { diff --git a/drivers/input/rmi4/rmi_f34v7.c b/drivers/input/rmi4/rmi_f34v7.c index 3991d2943660..099dde68e332 100644 --- a/drivers/input/rmi4/rmi_f34v7.c +++ b/drivers/input/rmi4/rmi_f34v7.c @@ -1192,6 +1192,9 @@ int rmi_f34v7_do_reflash(struct f34_data *f34, const struct firmware *fw) { int ret; + f34->fn->rmi_dev->driver->set_irq_bits(f34->fn->rmi_dev, + f34->fn->irq_mask); + rmi_f34v7_read_queries_bl_version(f34); f34->v7.image = fw->data; diff --git a/drivers/input/rmi4/rmi_f54.c b/drivers/input/rmi4/rmi_f54.c index f5206e2c767e..7f1959517ec0 100644 --- a/drivers/input/rmi4/rmi_f54.c +++ b/drivers/input/rmi4/rmi_f54.c @@ -362,7 +362,7 @@ static const struct vb2_ops rmi_f54_queue_ops = { static const struct vb2_queue rmi_f54_queue = { .type = V4L2_BUF_TYPE_VIDEO_CAPTURE, .io_modes = VB2_MMAP | VB2_USERPTR | VB2_DMABUF | VB2_READ, - .buf_struct_size = sizeof(struct vb2_buffer), + .buf_struct_size = sizeof(struct vb2_v4l2_buffer), .ops = &rmi_f54_queue_ops, .mem_ops = &vb2_vmalloc_memops, .timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC, @@ -619,7 +619,7 @@ static int rmi_f54_config(struct rmi_function *fn) { struct rmi_driver *drv = fn->rmi_dev->driver; - drv->set_irq_bits(fn->rmi_dev, fn->irq_mask); + drv->clear_irq_bits(fn->rmi_dev, fn->irq_mask); return 0; } @@ -747,6 +747,7 @@ static void rmi_f54_remove(struct rmi_function *fn) video_unregister_device(&f54->vdev); v4l2_device_unregister(&f54->v4l2); + destroy_workqueue(f54->workqueue); } struct rmi_function_handler rmi_f54_handler = { diff --git a/drivers/input/serio/gscps2.c b/drivers/input/serio/gscps2.c index aa9f29b875de..d84e3b70215a 100644 --- a/drivers/input/serio/gscps2.c +++ b/drivers/input/serio/gscps2.c @@ -382,9 +382,9 @@ static int __init gscps2_probe(struct parisc_device *dev) goto fail; #endif - printk(KERN_INFO "serio: %s port at 0x%p irq %d @ %s\n", + pr_info("serio: %s port at 0x%08lx irq %d @ %s\n", ps2port->port->name, - ps2port->addr, + hpa, ps2port->padev->irq, ps2port->port->phys); diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 8eef6849d066..5585823ced19 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -887,8 +887,8 @@ static int __init hp_sdc_init(void) "HP SDC NMI", &hp_sdc)) goto err2; - printk(KERN_INFO PREFIX "HP SDC at 0x%p, IRQ %d (NMI IRQ %d)\n", - (void *)hp_sdc.base_io, hp_sdc.irq, hp_sdc.nmi); + pr_info(PREFIX "HP SDC at 0x%08lx, IRQ %d (NMI IRQ %d)\n", + hp_sdc.base_io, hp_sdc.irq, hp_sdc.nmi); hp_sdc_status_in8(); hp_sdc_data_in8(); diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c index 0b55e1f375b3..fbe2df91aad3 100644 --- a/drivers/input/tablet/aiptek.c +++ b/drivers/input/tablet/aiptek.c @@ -1822,14 +1822,14 @@ aiptek_probe(struct usb_interface *intf, const struct usb_device_id *id) input_set_abs_params(inputdev, ABS_WHEEL, AIPTEK_WHEEL_MIN, AIPTEK_WHEEL_MAX - 1, 0, 0); /* Verify that a device really has an endpoint */ - if (intf->altsetting[0].desc.bNumEndpoints < 1) { + if (intf->cur_altsetting->desc.bNumEndpoints < 1) { dev_err(&intf->dev, "interface has %d endpoints, but must have minimum 1\n", - intf->altsetting[0].desc.bNumEndpoints); + intf->cur_altsetting->desc.bNumEndpoints); err = -EINVAL; goto fail3; } - endpoint = &intf->altsetting[0].endpoint[0].desc; + endpoint = &intf->cur_altsetting->endpoint[0].desc; /* Go set up our URB, which is called when the tablet receives * input. diff --git a/drivers/input/tablet/gtco.c b/drivers/input/tablet/gtco.c index 35031228a6d0..799c94dda651 100644 --- a/drivers/input/tablet/gtco.c +++ b/drivers/input/tablet/gtco.c @@ -875,18 +875,14 @@ static int gtco_probe(struct usb_interface *usbinterface, } /* Sanity check that a device has an endpoint */ - if (usbinterface->altsetting[0].desc.bNumEndpoints < 1) { + if (usbinterface->cur_altsetting->desc.bNumEndpoints < 1) { dev_err(&usbinterface->dev, "Invalid number of endpoints\n"); error = -EINVAL; goto err_free_urb; } - /* - * The endpoint is always altsetting 0, we know this since we know - * this device only has one interrupt endpoint - */ - endpoint = &usbinterface->altsetting[0].endpoint[0].desc; + endpoint = &usbinterface->cur_altsetting->endpoint[0].desc; /* Some debug */ dev_dbg(&usbinterface->dev, "gtco # interfaces: %d\n", usbinterface->num_altsetting); @@ -973,7 +969,7 @@ static int gtco_probe(struct usb_interface *usbinterface, input_dev->dev.parent = &usbinterface->dev; /* Setup the URB, it will be posted later on open of input device */ - endpoint = &usbinterface->altsetting[0].endpoint[0].desc; + endpoint = &usbinterface->cur_altsetting->endpoint[0].desc; usb_fill_int_urb(gtco->urbinfo, udev, diff --git a/drivers/input/tablet/pegasus_notetaker.c b/drivers/input/tablet/pegasus_notetaker.c index 47de5a81172f..2319144802c9 100644 --- a/drivers/input/tablet/pegasus_notetaker.c +++ b/drivers/input/tablet/pegasus_notetaker.c @@ -260,7 +260,7 @@ static int pegasus_probe(struct usb_interface *intf, return -ENODEV; /* Sanity check that the device has an endpoint */ - if (intf->altsetting[0].desc.bNumEndpoints < 1) { + if (intf->cur_altsetting->desc.bNumEndpoints < 1) { dev_err(&intf->dev, "Invalid number of endpoints\n"); return -EINVAL; } diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 59aaac43db91..138d1f3b12b2 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -3257,6 +3257,8 @@ static int __maybe_unused mxt_suspend(struct device *dev) mutex_unlock(&input_dev->mutex); + disable_irq(data->irq); + return 0; } @@ -3269,6 +3271,8 @@ static int __maybe_unused mxt_resume(struct device *dev) if (!input_dev) return 0; + enable_irq(data->irq); + mutex_lock(&input_dev->mutex); if (input_dev->users) diff --git a/drivers/input/touchscreen/cyttsp4_core.c b/drivers/input/touchscreen/cyttsp4_core.c index beaf61ce775b..a9af83de88bb 100644 --- a/drivers/input/touchscreen/cyttsp4_core.c +++ b/drivers/input/touchscreen/cyttsp4_core.c @@ -1972,11 +1972,6 @@ static int cyttsp4_mt_probe(struct cyttsp4 *cd) /* get sysinfo */ md->si = &cd->sysinfo; - if (!md->si) { - dev_err(dev, "%s: Fail get sysinfo pointer from core p=%p\n", - __func__, md->si); - goto error_get_sysinfo; - } rc = cyttsp4_setup_input_device(cd); if (rc) @@ -1986,8 +1981,6 @@ static int cyttsp4_mt_probe(struct cyttsp4 *cd) error_init_input: input_free_device(md->input); -error_get_sysinfo: - input_set_drvdata(md->input, NULL); error_alloc_failed: dev_err(dev, "%s failed.\n", __func__); return rc; diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 5bf63f76ddda..4eff5b44640c 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -888,6 +888,7 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, { const struct edt_i2c_chip_data *chip_data; struct edt_ft5x06_ts_data *tsdata; + u8 buf[2] = { 0xfc, 0x00 }; struct input_dev *input; unsigned long irq_flags; int error; @@ -957,6 +958,12 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client, return error; } + /* + * Dummy read access. EP0700MLP1 returns bogus data on the first + * register read access and ignores writes. + */ + edt_ft5x06_ts_readwrite(tsdata->client, 2, buf, 2, buf); + edt_ft5x06_ts_set_regs(tsdata); edt_ft5x06_ts_get_defaults(&client->dev, tsdata); edt_ft5x06_ts_get_parameters(tsdata); diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index 2bfa89ec552c..777dd5b159d3 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -92,6 +92,15 @@ static const unsigned long goodix_irq_flags[] = { */ static const struct dmi_system_id rotated_screen[] = { #if defined(CONFIG_DMI) && defined(CONFIG_X86) + { + .ident = "Teclast X89", + .matches = { + /* tPAD is too generic, also match on bios date */ + DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"), + DMI_MATCH(DMI_BOARD_NAME, "tPAD"), + DMI_MATCH(DMI_BIOS_DATE, "12/19/2014"), + }, + }, { .ident = "WinBook TW100", .matches = { diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index 0dbcf105f7db..7c0eeef29b3c 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -534,20 +534,33 @@ static int __maybe_unused silead_ts_suspend(struct device *dev) static int __maybe_unused silead_ts_resume(struct device *dev) { struct i2c_client *client = to_i2c_client(dev); + bool second_try = false; int error, status; silead_ts_set_power(client, SILEAD_POWER_ON); + retry: error = silead_ts_reset(client); if (error) return error; + if (second_try) { + error = silead_ts_load_fw(client); + if (error) + return error; + } + error = silead_ts_startup(client); if (error) return error; status = silead_ts_get_status(client); if (status != SILEAD_STATUS_OK) { + if (!second_try) { + second_try = true; + dev_dbg(dev, "Reloading firmware after unsuccessful resume\n"); + goto retry; + } dev_err(dev, "Resume error, status: 0x%02x\n", status); return -ENODEV; } diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c index be5615c6bf8f..482f97e1c9d3 100644 --- a/drivers/input/touchscreen/st1232.c +++ b/drivers/input/touchscreen/st1232.c @@ -203,6 +203,7 @@ static int st1232_ts_probe(struct i2c_client *client, input_dev->id.bustype = BUS_I2C; input_dev->dev.parent = &client->dev; + __set_bit(INPUT_PROP_DIRECT, input_dev->propbit); __set_bit(EV_SYN, input_dev->evbit); __set_bit(EV_KEY, input_dev->evbit); __set_bit(EV_ABS, input_dev->evbit); diff --git a/drivers/input/touchscreen/sun4i-ts.c b/drivers/input/touchscreen/sun4i-ts.c index d2e14d9e5975..ab44eb0352d0 100644 --- a/drivers/input/touchscreen/sun4i-ts.c +++ b/drivers/input/touchscreen/sun4i-ts.c @@ -246,6 +246,7 @@ static int sun4i_ts_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; struct device *hwmon; + struct thermal_zone_device *thermal; int error; u32 reg; bool ts_attached; @@ -365,7 +366,10 @@ static int sun4i_ts_probe(struct platform_device *pdev) if (IS_ERR(hwmon)) return PTR_ERR(hwmon); - devm_thermal_zone_of_sensor_register(ts->dev, 0, ts, &sun4i_ts_tz_ops); + thermal = devm_thermal_zone_of_sensor_register(ts->dev, 0, ts, + &sun4i_ts_tz_ops); + if (IS_ERR(thermal)) + return PTR_ERR(thermal); writel(TEMP_IRQ_EN(1), ts->base + TP_INT_FIFOC); diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c index f16f8358c70a..98e03d0ca03c 100644 --- a/drivers/input/touchscreen/sur40.c +++ b/drivers/input/touchscreen/sur40.c @@ -537,7 +537,7 @@ static int sur40_probe(struct usb_interface *interface, int error; /* Check if we really have the right interface. */ - iface_desc = &interface->altsetting[0]; + iface_desc = interface->cur_altsetting; if (iface_desc->desc.bInterfaceClass != 0xFF) return -ENODEV; diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 07b6cf58fd99..778f167be2d3 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -139,10 +139,14 @@ static struct lock_class_key reserved_rbtree_key; static inline int match_hid_uid(struct device *dev, struct acpihid_map_entry *entry) { + struct acpi_device *adev = ACPI_COMPANION(dev); const char *hid, *uid; - hid = acpi_device_hid(ACPI_COMPANION(dev)); - uid = acpi_device_uid(ACPI_COMPANION(dev)); + if (!adev) + return -ENODEV; + + hid = acpi_device_hid(adev); + uid = acpi_device_uid(adev); if (!hid || !(*hid)) return -ENODEV; @@ -2156,6 +2160,8 @@ skip_ats_check: */ domain_flush_tlb_pde(domain); + domain_flush_complete(domain); + return ret; } diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 6a3cf4d0bd5e..4d2920988d60 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -420,6 +420,9 @@ static void iommu_enable(struct amd_iommu *iommu) static void iommu_disable(struct amd_iommu *iommu) { + if (!iommu->mmio_base) + return; + /* Disable command buffer */ iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 60fbd88cac38..c9ef3557bddd 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1145,7 +1145,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, } arm_smmu_sync_ste_for_sid(smmu, sid); - dst[0] = cpu_to_le64(val); + /* See comment in arm_smmu_write_ctx_desc() */ + WRITE_ONCE(dst[0], cpu_to_le64(val)); arm_smmu_sync_ste_for_sid(smmu, sid); /* It's likely that we'll want to use the new STE soon */ diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 1189bbe1a437..a092625add72 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -211,15 +211,15 @@ static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, start -= iova_offset(iovad, start); num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); - msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL); - if (!msi_page) - return -ENOMEM; - for (i = 0; i < num_pages; i++) { - msi_page[i].phys = start; - msi_page[i].iova = start; - INIT_LIST_HEAD(&msi_page[i].list); - list_add(&msi_page[i].list, &cookie->msi_page_list); + msi_page = kmalloc(sizeof(*msi_page), GFP_KERNEL); + if (!msi_page) + return -ENOMEM; + + msi_page->phys = start; + msi_page->iova = start; + INIT_LIST_HEAD(&msi_page->list); + list_add(&msi_page->list, &cookie->msi_page_list); start += iovad->granule; } diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 38d0128b8135..1f527ca60955 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -139,6 +140,13 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) BUG_ON(dev->is_virtfn); + /* + * Ignore devices that have a domain number higher than what can + * be looked up in DMAR, e.g. VMD subdevices with domain 0x10000 + */ + if (pci_domain_nr(dev->bus) > U16_MAX) + return NULL; + /* Only generate path[] for device addition event */ if (event == BUS_NOTIFY_ADD_DEVICE) for (tmp = dev; tmp; tmp = tmp->bus->self) @@ -451,12 +459,13 @@ static int __init dmar_parse_one_andd(struct acpi_dmar_header *header, /* Check for NUL termination within the designated length */ if (strnlen(andd->device_name, header->length - 8) == header->length - 8) { - WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, + pr_warn(FW_BUG "Your BIOS is broken; ANDD object name is not NUL-terminated\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); return -EINVAL; } pr_info("ANDD device: %x name: %s\n", andd->device_number, @@ -482,14 +491,14 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) return 0; } } - WARN_TAINT( - 1, TAINT_FIRMWARE_WORKAROUND, + pr_warn(FW_BUG "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - drhd->reg_base_addr, + rhsa->base_address, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); return 0; } @@ -835,14 +844,14 @@ int __init dmar_table_init(void) static void warn_invalid_dmar(u64 addr, const char *message) { - WARN_TAINT_ONCE( - 1, TAINT_FIRMWARE_WORKAROUND, + pr_warn_once(FW_BUG "Your BIOS is broken; DMAR reported at address %llx%s!\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", addr, message, dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); } static int __ref diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 523d0889c2a4..db1b546134f5 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -442,7 +442,6 @@ struct dmar_rmrr_unit { u64 end_address; /* reserved end address */ struct dmar_dev_scope *devices; /* target devices */ int devices_cnt; /* target device count */ - struct iommu_resv_region *resv; /* reserved region handle */ }; struct dmar_atsr_unit { @@ -3361,9 +3360,12 @@ static int __init init_dmars(void) iommu_identity_mapping |= IDENTMAP_ALL; #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA - iommu_identity_mapping |= IDENTMAP_GFX; + dmar_map_gfx = 0; #endif + if (!dmar_map_gfx) + iommu_identity_mapping |= IDENTMAP_GFX; + check_tylersburg_isoch(); if (iommu_identity_mapping) { @@ -3982,10 +3984,11 @@ static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev) /* we know that the this iommu should be at offset 0xa000 from vtbar */ drhd = dmar_find_matched_drhd_unit(pdev); - if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000, - TAINT_FIRMWARE_WORKAROUND, - "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n")) + if (!drhd || drhd->reg_base_addr - vtbar != 0xa000) { + pr_warn_once(FW_BUG "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; + } } DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu); @@ -4168,7 +4171,6 @@ static inline void init_iommu_pm_ops(void) {} int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) { struct acpi_dmar_reserved_memory *rmrr; - int prot = DMA_PTE_READ|DMA_PTE_WRITE; struct dmar_rmrr_unit *rmrru; size_t length; @@ -4182,22 +4184,16 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg) rmrru->end_address = rmrr->end_address; length = rmrr->end_address - rmrr->base_address + 1; - rmrru->resv = iommu_alloc_resv_region(rmrr->base_address, length, prot, - IOMMU_RESV_DIRECT); - if (!rmrru->resv) - goto free_rmrru; rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), ((void *)rmrr) + rmrr->header.length, &rmrru->devices_cnt); if (rmrru->devices_cnt && rmrru->devices == NULL) - goto free_all; + goto free_rmrru; list_add(&rmrru->list, &dmar_rmrr_units); return 0; -free_all: - kfree(rmrru->resv); free_rmrru: kfree(rmrru); out: @@ -4415,7 +4411,6 @@ static void intel_iommu_free_dmars(void) list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) { list_del(&rmrru->list); dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt); - kfree(rmrru->resv); kfree(rmrru); } @@ -5129,8 +5124,10 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, u64 phys = 0; pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); - if (pte) - phys = dma_pte_addr(pte); + if (pte && dma_pte_present(pte)) + phys = dma_pte_addr(pte) + + (iova & (BIT_MASK(level_to_offset_bits(level) + + VTD_PAGE_SHIFT) - 1)); return phys; } @@ -5183,22 +5180,33 @@ static void intel_iommu_remove_device(struct device *dev) static void intel_iommu_get_resv_regions(struct device *device, struct list_head *head) { + int prot = DMA_PTE_READ | DMA_PTE_WRITE; struct iommu_resv_region *reg; struct dmar_rmrr_unit *rmrr; struct device *i_dev; int i; - rcu_read_lock(); + down_read(&dmar_global_lock); for_each_rmrr_units(rmrr) { for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, i, i_dev) { + struct iommu_resv_region *resv; + size_t length; + if (i_dev != device) continue; - list_add_tail(&rmrr->resv->list, head); + length = rmrr->end_address - rmrr->base_address + 1; + resv = iommu_alloc_resv_region(rmrr->base_address, + length, prot, + IOMMU_RESV_DIRECT); + if (!resv) + break; + + list_add_tail(&resv->list, head); } } - rcu_read_unlock(); + up_read(&dmar_global_lock); reg = iommu_alloc_resv_region(IOAPIC_RANGE_START, IOAPIC_RANGE_END - IOAPIC_RANGE_START + 1, @@ -5213,10 +5221,8 @@ static void intel_iommu_put_resv_regions(struct device *dev, { struct iommu_resv_region *entry, *next; - list_for_each_entry_safe(entry, next, head, list) { - if (entry->type == IOMMU_RESV_MSI) - kfree(entry); - } + list_for_each_entry_safe(entry, next, head, list) + kfree(entry); } #ifdef CONFIG_INTEL_IOMMU_SVM diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 51e5c43caed1..cef162772838 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -551,13 +551,12 @@ static int arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, return 0; tablep = iopte_deref(pte, data); + } else if (unmap_idx >= 0) { + io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); + return size; } - if (unmap_idx < 0) - return __arm_lpae_unmap(data, iova, size, lvl, tablep); - - io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); - return size; + return __arm_lpae_unmap(data, iova, size, lvl, tablep); } static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 93a5ae28ee4c..18c4cdd8c586 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -628,6 +628,7 @@ err_put_group: mutex_unlock(&group->mutex); dev->iommu_group = NULL; kobject_put(group->devices_kobj); + sysfs_remove_link(group->devices_kobj, device->name); err_free_name: kfree(device->name); err_remove_link: @@ -1958,9 +1959,9 @@ int iommu_request_dm_for_dev(struct device *dev) int ret; /* Device must already be in a group before calling this function */ - group = iommu_group_get_for_dev(dev); - if (IS_ERR(group)) - return PTR_ERR(group); + group = iommu_group_get(dev); + if (!group) + return -EINVAL; mutex_lock(&group->mutex); diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 3cc98c332f05..e44f2bbf53ba 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -145,6 +145,30 @@ struct mtk_iommu_resv_iova_region { static struct iommu_ops mtk_iommu_ops; +/* + * In M4U 4GB mode, the physical address is remapped as below: + * + * CPU Physical address: + * ==================== + * + * 0 1G 2G 3G 4G 5G + * |---A---|---B---|---C---|---D---|---E---| + * +--I/O--+------------Memory-------------+ + * + * IOMMU output physical address: + * ============================= + * + * 4G 5G 6G 7G 8G + * |---E---|---B---|---C---|---D---| + * +------------Memory-------------+ + * + * The Region 'A'(I/O) can NOT be mapped by M4U; For Region 'B'/'C'/'D', the + * bit32 of the CPU physical address always is needed to set, and for Region + * 'E', the CPU physical address keep as is. + * Additionally, The iommu consumers always use the CPU phyiscal address. + */ +#define MTK_IOMMU_4GB_MODE_REMAP_BASE 0x40000000 + static LIST_HEAD(m4ulist); /* List all the M4U HWs */ #define for_each_m4u(data) list_for_each_entry(data, &m4ulist, list) @@ -594,6 +618,9 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, pa = dom->iop->iova_to_phys(dom->iop, iova); spin_unlock_irqrestore(&dom->pgtlock, flags); + if (data->enable_4GB && pa && pa < MTK_IOMMU_4GB_MODE_REMAP_BASE) + pa |= BIT_ULL(32); + return pa; } diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 4a2c4378b3db..444cfdecaf89 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -327,21 +327,19 @@ static void qcom_iommu_domain_free(struct iommu_domain *domain) { struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); - if (WARN_ON(qcom_domain->iommu)) /* forgot to detach? */ - return; - iommu_put_dma_cookie(domain); - /* NOTE: unmap can be called after client device is powered off, - * for example, with GPUs or anything involving dma-buf. So we - * cannot rely on the device_link. Make sure the IOMMU is on to - * avoid unclocked accesses in the TLB inv path: - */ - pm_runtime_get_sync(qcom_domain->iommu->dev); - - free_io_pgtable_ops(qcom_domain->pgtbl_ops); - - pm_runtime_put_sync(qcom_domain->iommu->dev); + if (qcom_domain->iommu) { + /* + * NOTE: unmap can be called after client device is powered + * off, for example, with GPUs or anything involving dma-buf. + * So we cannot rely on the device_link. Make sure the IOMMU + * is on to avoid unclocked accesses in the TLB inv path: + */ + pm_runtime_get_sync(qcom_domain->iommu->dev); + free_io_pgtable_ops(qcom_domain->pgtbl_ops); + pm_runtime_put_sync(qcom_domain->iommu->dev); + } kfree(qcom_domain); } @@ -386,7 +384,7 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); unsigned i; - if (!qcom_domain->iommu) + if (WARN_ON(!qcom_domain->iommu)) return; pm_runtime_get_sync(qcom_iommu->dev); @@ -397,8 +395,6 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); } pm_runtime_put_sync(qcom_iommu->dev); - - qcom_domain->iommu = NULL; } static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 40eb8138546a..848dac3e4580 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -156,9 +156,9 @@ static bool smmu_dma_addr_valid(struct tegra_smmu *smmu, dma_addr_t addr) return (addr & smmu->pfn_mask) == addr; } -static dma_addr_t smmu_pde_to_dma(u32 pde) +static dma_addr_t smmu_pde_to_dma(struct tegra_smmu *smmu, u32 pde) { - return pde << 12; + return (dma_addr_t)(pde & smmu->pfn_mask) << 12; } static void smmu_flush_ptc_all(struct tegra_smmu *smmu) @@ -543,6 +543,7 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, dma_addr_t *dmap) { unsigned int pd_index = iova_pd_index(iova); + struct tegra_smmu *smmu = as->smmu; struct page *pt_page; u32 *pd; @@ -551,7 +552,7 @@ static u32 *tegra_smmu_pte_lookup(struct tegra_smmu_as *as, unsigned long iova, return NULL; pd = page_address(as->pd); - *dmap = smmu_pde_to_dma(pd[pd_index]); + *dmap = smmu_pde_to_dma(smmu, pd[pd_index]); return tegra_smmu_pte_offset(pt_page, iova); } @@ -593,7 +594,7 @@ static u32 *as_get_pte(struct tegra_smmu_as *as, dma_addr_t iova, } else { u32 *pd = page_address(as->pd); - *dmap = smmu_pde_to_dma(pd[pde]); + *dmap = smmu_pde_to_dma(smmu, pd[pde]); } return tegra_smmu_pte_offset(as->pts[pde], iova); @@ -618,7 +619,7 @@ static void tegra_smmu_pte_put_use(struct tegra_smmu_as *as, unsigned long iova) if (--as->count[pde] == 0) { struct tegra_smmu *smmu = as->smmu; u32 *pd = page_address(as->pd); - dma_addr_t pte_dma = smmu_pde_to_dma(pd[pde]); + dma_addr_t pte_dma = smmu_pde_to_dma(smmu, pd[pde]); tegra_smmu_set_pde(as, iova, 0); diff --git a/drivers/irqchip/irq-bcm7038-l1.c b/drivers/irqchip/irq-bcm7038-l1.c index 0b9a8b709abf..b32988cac80c 100644 --- a/drivers/irqchip/irq-bcm7038-l1.c +++ b/drivers/irqchip/irq-bcm7038-l1.c @@ -284,6 +284,10 @@ static int __init bcm7038_l1_init_one(struct device_node *dn, pr_err("failed to map parent interrupt %d\n", parent_irq); return -EINVAL; } + + if (of_property_read_bool(dn, "brcm,irq-can-wake")) + enable_irq_wake(parent_irq); + irq_set_chained_handler_and_data(parent_irq, bcm7038_l1_irq_handle, intc); diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 52238e6bed39..799df1e598db 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -527,7 +527,7 @@ static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd, struct its_cmd_desc *desc) { its_encode_cmd(cmd, GITS_CMD_INVALL); - its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id); + its_encode_collection(cmd, desc->its_invall_cmd.col->col_id); its_fixup_cmd(cmd); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 8db85ceb0806..cd09974c4469 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -1238,7 +1238,7 @@ static void __init gic_of_setup_kvm_info(struct device_node *node) __weak int __init mt_gic_ext_init(void) { return 0; } -static int __init gic_of_init(struct device_node *node, struct device_node *parent) +static int __init gicv3_of_init(struct device_node *node, struct device_node *parent) { void __iomem *dist_base; struct redist_region *rdist_regs; @@ -1306,7 +1306,7 @@ out_unmap_dist: return err; } -IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init); +IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gicv3_of_init); #ifdef CONFIG_ACPI static struct @@ -1315,6 +1315,7 @@ static struct struct redist_region *redist_regs; u32 nr_redist_regions; bool single_redist; + int enabled_rdists; u32 maint_irq; int maint_irq_mode; phys_addr_t vcpu_base; @@ -1409,8 +1410,10 @@ static int __init gic_acpi_match_gicc(struct acpi_subtable_header *header, * If GICC is enabled and has valid gicr base address, then it means * GICR base is presented via GICC */ - if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) + if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) { + acpi_data.enabled_rdists++; return 0; + } /* * It's perfectly valid firmware can pass disabled GICC entry, driver @@ -1440,8 +1443,10 @@ static int __init gic_acpi_count_gicr_regions(void) count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, gic_acpi_match_gicc, 0); - if (count > 0) + if (count > 0) { acpi_data.single_redist = true; + count = acpi_data.enabled_rdists; + } return count; } diff --git a/drivers/irqchip/irq-ingenic.c b/drivers/irqchip/irq-ingenic.c index fc5953dea509..b2e16dca76a6 100644 --- a/drivers/irqchip/irq-ingenic.c +++ b/drivers/irqchip/irq-ingenic.c @@ -117,6 +117,14 @@ static int __init ingenic_intc_of_init(struct device_node *node, goto out_unmap_irq; } + domain = irq_domain_add_legacy(node, num_chips * 32, + JZ4740_IRQ_BASE, 0, + &irq_domain_simple_ops, NULL); + if (!domain) { + err = -ENOMEM; + goto out_unmap_base; + } + for (i = 0; i < num_chips; i++) { /* Mask all irqs */ writel(0xffffffff, intc->base + (i * CHIP_SIZE) + @@ -143,14 +151,11 @@ static int __init ingenic_intc_of_init(struct device_node *node, IRQ_NOPROBE | IRQ_LEVEL); } - domain = irq_domain_add_legacy(node, num_chips * 32, JZ4740_IRQ_BASE, 0, - &irq_domain_simple_ops, NULL); - if (!domain) - pr_warn("unable to register IRQ domain\n"); - setup_irq(parent_irq, &intc_cascade_action); return 0; +out_unmap_base: + iounmap(intc->base); out_unmap_irq: irq_dispose_mapping(parent_irq); out_free: diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 98b6e1d4b1a6..f7fdbf5d183b 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -381,6 +381,7 @@ static struct platform_driver mbigen_platform_driver = { .name = "Hisilicon MBIGEN-V2", .of_match_table = mbigen_of_match, .acpi_match_table = ACPI_PTR(mbigen_acpi_match), + .suppress_bind_attrs = true, }, .probe = mbigen_device_probe, }; diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c index e18c48d3a92e..6a77b9ea8e41 100644 --- a/drivers/irqchip/irq-mvebu-icu.c +++ b/drivers/irqchip/irq-mvebu-icu.c @@ -92,7 +92,7 @@ static int mvebu_icu_irq_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *hwirq, unsigned int *type) { - struct mvebu_icu *icu = d->host_data; + struct mvebu_icu *icu = platform_msi_get_host_data(d); unsigned int icu_group; /* Check the count of the parameters in dt */ diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c index 91efbf0f19f9..da45049de97e 100644 --- a/drivers/isdn/capi/capi.c +++ b/drivers/isdn/capi/capi.c @@ -743,7 +743,7 @@ capi_poll(struct file *file, poll_table *wait) poll_wait(file, &(cdev->recvwait), wait); mask = POLLOUT | POLLWRNORM; - if (!skb_queue_empty(&cdev->recvqueue)) + if (!skb_queue_empty_lockless(&cdev->recvqueue)) mask |= POLLIN | POLLRDNORM; return mask; } diff --git a/drivers/isdn/gigaset/usb-gigaset.c b/drivers/isdn/gigaset/usb-gigaset.c index eade36dafa34..4c239f18240d 100644 --- a/drivers/isdn/gigaset/usb-gigaset.c +++ b/drivers/isdn/gigaset/usb-gigaset.c @@ -574,8 +574,7 @@ static int gigaset_initcshw(struct cardstate *cs) { struct usb_cardstate *ucs; - cs->hw.usb = ucs = - kmalloc(sizeof(struct usb_cardstate), GFP_KERNEL); + cs->hw.usb = ucs = kzalloc(sizeof(struct usb_cardstate), GFP_KERNEL); if (!ucs) { pr_err("out of memory\n"); return -ENOMEM; @@ -587,9 +586,6 @@ static int gigaset_initcshw(struct cardstate *cs) ucs->bchars[3] = 0; ucs->bchars[4] = 0x11; ucs->bchars[5] = 0x13; - ucs->bulk_out_buffer = NULL; - ucs->bulk_out_urb = NULL; - ucs->read_urb = NULL; tasklet_init(&cs->write_tasklet, gigaset_modem_fill, (unsigned long) cs); @@ -688,6 +684,11 @@ static int gigaset_probe(struct usb_interface *interface, return -ENODEV; } + if (hostif->desc.bNumEndpoints < 2) { + dev_err(&interface->dev, "missing endpoints\n"); + return -ENODEV; + } + dev_info(&udev->dev, "%s: Device matched ... !\n", __func__); /* allocate memory for our device state and initialize it */ @@ -707,6 +708,12 @@ static int gigaset_probe(struct usb_interface *interface, endpoint = &hostif->endpoint[0].desc; + if (!usb_endpoint_is_bulk_out(endpoint)) { + dev_err(&interface->dev, "missing bulk-out endpoint\n"); + retval = -ENODEV; + goto error; + } + buffer_size = le16_to_cpu(endpoint->wMaxPacketSize); ucs->bulk_out_size = buffer_size; ucs->bulk_out_epnum = usb_endpoint_num(endpoint); @@ -726,6 +733,12 @@ static int gigaset_probe(struct usb_interface *interface, endpoint = &hostif->endpoint[1].desc; + if (!usb_endpoint_is_int_in(endpoint)) { + dev_err(&interface->dev, "missing int-in endpoint\n"); + retval = -ENODEV; + goto error; + } + ucs->busy = 0; ucs->read_urb = usb_alloc_urb(0, GFP_KERNEL); diff --git a/drivers/isdn/mISDN/tei.c b/drivers/isdn/mISDN/tei.c index 12d9e5f4beb1..58635b5f296f 100644 --- a/drivers/isdn/mISDN/tei.c +++ b/drivers/isdn/mISDN/tei.c @@ -1180,8 +1180,7 @@ static int ctrl_teimanager(struct manager *mgr, void *arg) { /* currently we only have one option */ - int *val = (int *)arg; - int ret = 0; + unsigned int *val = (unsigned int *)arg; switch (val[0]) { case IMCLEAR_L2: @@ -1197,9 +1196,9 @@ ctrl_teimanager(struct manager *mgr, void *arg) test_and_clear_bit(OPTION_L1_HOLD, &mgr->options); break; default: - ret = -EINVAL; + return -EINVAL; } - return ret; + return 0; } /* This function does create a L2 for fixed TEI in NT Mode */ diff --git a/drivers/leds/leds-pca963x.c b/drivers/leds/leds-pca963x.c index 3bf9a1271819..88c7313cf869 100644 --- a/drivers/leds/leds-pca963x.c +++ b/drivers/leds/leds-pca963x.c @@ -43,6 +43,8 @@ #define PCA963X_LED_PWM 0x2 /* Controlled through PWM */ #define PCA963X_LED_GRP_PWM 0x3 /* Controlled through PWM/GRPPWM */ +#define PCA963X_MODE2_OUTDRV 0x04 /* Open-drain or totem pole */ +#define PCA963X_MODE2_INVRT 0x10 /* Normal or inverted direction */ #define PCA963X_MODE2_DMBLNK 0x20 /* Enable blinking */ #define PCA963X_MODE1 0x00 @@ -462,12 +464,12 @@ static int pca963x_probe(struct i2c_client *client, PCA963X_MODE2); /* Configure output: open-drain or totem pole (push-pull) */ if (pdata->outdrv == PCA963X_OPEN_DRAIN) - mode2 |= 0x01; + mode2 &= ~PCA963X_MODE2_OUTDRV; else - mode2 |= 0x05; + mode2 |= PCA963X_MODE2_OUTDRV; /* Configure direction: normal or inverted */ if (pdata->dir == PCA963X_INVERTED) - mode2 |= 0x10; + mode2 |= PCA963X_MODE2_INVRT; i2c_smbus_write_byte_data(pca963x->chip->client, PCA963X_MODE2, mode2); } diff --git a/drivers/lightnvm/pblk-rb.c b/drivers/lightnvm/pblk-rb.c index c0dd17a82170..73de2deaba67 100644 --- a/drivers/lightnvm/pblk-rb.c +++ b/drivers/lightnvm/pblk-rb.c @@ -825,8 +825,8 @@ int pblk_rb_tear_down_check(struct pblk_rb *rb) } out: - spin_unlock(&rb->w_lock); spin_unlock_irq(&rb->s_lock); + spin_unlock(&rb->w_lock); return ret; } diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c index da7f4fc1a51d..a0f61eb853c5 100644 --- a/drivers/macintosh/windfarm_smu_sat.c +++ b/drivers/macintosh/windfarm_smu_sat.c @@ -22,14 +22,6 @@ #define VERSION "1.0" -#define DEBUG - -#ifdef DEBUG -#define DBG(args...) printk(args) -#else -#define DBG(args...) do { } while(0) -#endif - /* If the cache is older than 800ms we'll refetch it */ #define MAX_AGE msecs_to_jiffies(800) @@ -106,13 +98,10 @@ struct smu_sdbp_header *smu_sat_get_sdb_partition(unsigned int sat_id, int id, buf[i+2] = data[3]; buf[i+3] = data[2]; } -#ifdef DEBUG - DBG(KERN_DEBUG "sat %d partition %x:", sat_id, id); - for (i = 0; i < len; ++i) - DBG(" %x", buf[i]); - DBG("\n"); -#endif + printk(KERN_DEBUG "sat %d partition %x:", sat_id, id); + print_hex_dump(KERN_DEBUG, " ", DUMP_PREFIX_OFFSET, + 16, 1, buf, len, false); if (size) *size = len; return (struct smu_sdbp_header *) buf; @@ -132,13 +121,13 @@ static int wf_sat_read_cache(struct wf_sat *sat) if (err < 0) return err; sat->last_read = jiffies; + #ifdef LOTSA_DEBUG { int i; - DBG(KERN_DEBUG "wf_sat_get: data is"); - for (i = 0; i < 16; ++i) - DBG(" %.2x", sat->cache[i]); - DBG("\n"); + printk(KERN_DEBUG "wf_sat_get: data is"); + print_hex_dump(KERN_DEBUG, " ", DUMP_PREFIX_OFFSET, + 16, 1, sat->cache, 16, false); } #endif return 0; diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c index 93f3d4d61fa7..546ba140f83d 100644 --- a/drivers/mailbox/mailbox-test.c +++ b/drivers/mailbox/mailbox-test.c @@ -363,22 +363,24 @@ static int mbox_test_probe(struct platform_device *pdev) /* It's okay for MMIO to be NULL */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - size = resource_size(res); tdev->tx_mmio = devm_ioremap_resource(&pdev->dev, res); - if (PTR_ERR(tdev->tx_mmio) == -EBUSY) + if (PTR_ERR(tdev->tx_mmio) == -EBUSY) { /* if reserved area in SRAM, try just ioremap */ + size = resource_size(res); tdev->tx_mmio = devm_ioremap(&pdev->dev, res->start, size); - else if (IS_ERR(tdev->tx_mmio)) + } else if (IS_ERR(tdev->tx_mmio)) { tdev->tx_mmio = NULL; + } /* If specified, second reg entry is Rx MMIO */ res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - size = resource_size(res); tdev->rx_mmio = devm_ioremap_resource(&pdev->dev, res); - if (PTR_ERR(tdev->rx_mmio) == -EBUSY) + if (PTR_ERR(tdev->rx_mmio) == -EBUSY) { + size = resource_size(res); tdev->rx_mmio = devm_ioremap(&pdev->dev, res->start, size); - else if (IS_ERR(tdev->rx_mmio)) + } else if (IS_ERR(tdev->rx_mmio)) { tdev->rx_mmio = tdev->tx_mmio; + } tdev->tx_channel = mbox_test_request_channel(pdev, "tx"); tdev->rx_channel = mbox_test_request_channel(pdev, "rx"); diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c index 44b49a2676f0..055c90b8253c 100644 --- a/drivers/mailbox/mailbox.c +++ b/drivers/mailbox/mailbox.c @@ -351,7 +351,7 @@ struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index) init_completion(&chan->tx_complete); if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone) - chan->txdone_method |= TXDONE_BY_ACK; + chan->txdone_method = TXDONE_BY_ACK; spin_unlock_irqrestore(&chan->lock, flags); @@ -420,7 +420,7 @@ void mbox_free_channel(struct mbox_chan *chan) spin_lock_irqsave(&chan->lock, flags); chan->cl = NULL; chan->active_req = NULL; - if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK)) + if (chan->txdone_method == TXDONE_BY_ACK) chan->txdone_method = TXDONE_BY_POLL; module_put(chan->mbox->dev->driver->owner); diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c index 9b7005e1345e..27c2294be51a 100644 --- a/drivers/mailbox/pcc.c +++ b/drivers/mailbox/pcc.c @@ -266,7 +266,7 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl, init_completion(&chan->tx_complete); if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone) - chan->txdone_method |= TXDONE_BY_ACK; + chan->txdone_method = TXDONE_BY_ACK; spin_unlock_irqrestore(&chan->lock, flags); @@ -312,7 +312,7 @@ void pcc_mbox_free_channel(struct mbox_chan *chan) spin_lock_irqsave(&chan->lock, flags); chan->cl = NULL; chan->active_req = NULL; - if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK)) + if (chan->txdone_method == TXDONE_BY_ACK) chan->txdone_method = TXDONE_BY_POLL; spin_unlock_irqrestore(&chan->lock, flags); diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index dfc3bb9bbc39..1725d89cf1ff 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -297,6 +297,27 @@ config MTK_HW_FDE If you have a machine with a MTK crypto engine for FDE, say Y here. If unsure, say N. +config DM_DEFAULT_KEY + tristate "Default-key target support" + depends on BLK_DEV_DM + depends on BLK_INLINE_ENCRYPTION + # dm-default-key doesn't require -o inlinecrypt, but it does currently + # rely on the inline encryption hooks being built into the kernel. + depends on FS_ENCRYPTION_INLINE_CRYPT + help + This device-mapper target allows you to create a device that + assigns a default encryption key to bios that aren't for the + contents of an encrypted file. + + This ensures that all blocks on-disk will be encrypted with + some key, without the performance hit of file contents being + encrypted twice when fscrypt (File-Based Encryption) is used. + + It is only appropriate to use dm-default-key when key + configuration is tightly controlled, like it is in Android, + such that all fscrypt keys are at least as hard to compromise + as the default key. + config DM_SNAPSHOT tristate "Snapshot target" depends on BLK_DEV_DM diff --git a/drivers/md/Makefile b/drivers/md/Makefile index d3016bce66c4..49d752d80f07 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o obj-$(CONFIG_DM_BUFIO) += dm-bufio.o obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o +obj-$(CONFIG_DM_DEFAULT_KEY) += dm-default-key.o obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_FLAKEY) += dm-flakey.o obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index 8d1964b472e7..0bfde500af19 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -381,7 +381,8 @@ void bch_btree_keys_stats(struct btree_keys *, struct bset_stats *); /* Bkey utility code */ -#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys) +#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \ + (unsigned int)(i)->keys) static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned idx) { diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 9406326216f1..96a6583e7b52 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -685,6 +685,8 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, * IO can always make forward progress: */ nr /= c->btree_pages; + if (nr == 0) + nr = 1; nr = min_t(unsigned long, nr, mca_can_free(c)); i = 0; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 1a270e2262f5..690aeb09bbf5 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -905,6 +905,7 @@ static void cached_dev_detach_finish(struct work_struct *w) bch_write_bdev_super(dc, &cl); closure_sync(&cl); + calc_cached_dev_sectors(dc->disk.c); bcache_device_detach(&dc->disk); list_move(&dc->list, &uncached_devices); diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 0cabf31fb163..7eb76a1a2505 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1729,7 +1729,7 @@ void bitmap_flush(struct mddev *mddev) /* * free memory that was allocated */ -void bitmap_free(struct bitmap *bitmap) +void md_bitmap_free(struct bitmap *bitmap) { unsigned long k, pages; struct bitmap_page *bp; @@ -1763,7 +1763,7 @@ void bitmap_free(struct bitmap *bitmap) kfree(bp); kfree(bitmap); } -EXPORT_SYMBOL(bitmap_free); +EXPORT_SYMBOL(md_bitmap_free); void bitmap_wait_behind_writes(struct mddev *mddev) { @@ -1796,7 +1796,7 @@ void bitmap_destroy(struct mddev *mddev) if (mddev->thread) mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT; - bitmap_free(bitmap); + md_bitmap_free(bitmap); } /* @@ -1887,7 +1887,7 @@ struct bitmap *bitmap_create(struct mddev *mddev, int slot) return bitmap; error: - bitmap_free(bitmap); + md_bitmap_free(bitmap); return ERR_PTR(err); } @@ -1958,7 +1958,7 @@ struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot) rv = bitmap_init_from_disk(bitmap, 0); if (rv) { - bitmap_free(bitmap); + md_bitmap_free(bitmap); return ERR_PTR(rv); } diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index 5df35ca90f58..dd53a978c5f2 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h @@ -271,7 +271,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, struct bitmap *get_bitmap_from_slot(struct mddev *mddev, int slot); int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *lo, sector_t *hi, bool clear_bits); -void bitmap_free(struct bitmap *bitmap); +void md_bitmap_free(struct bitmap *bitmap); void bitmap_wait_behind_writes(struct mddev *mddev); #endif diff --git a/drivers/md/dm-bio-prison-v1.c b/drivers/md/dm-bio-prison-v1.c index 874841f0fc83..10532a76688e 100644 --- a/drivers/md/dm-bio-prison-v1.c +++ b/drivers/md/dm-bio-prison-v1.c @@ -33,7 +33,7 @@ static struct kmem_cache *_cell_cache; */ struct dm_bio_prison *dm_bio_prison_create(void) { - struct dm_bio_prison *prison = kmalloc(sizeof(*prison), GFP_KERNEL); + struct dm_bio_prison *prison = kzalloc(sizeof(*prison), GFP_KERNEL); if (!prison) return NULL; diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c index 8ce3a1a588cf..c34ec615420f 100644 --- a/drivers/md/dm-bio-prison-v2.c +++ b/drivers/md/dm-bio-prison-v2.c @@ -35,7 +35,7 @@ static struct kmem_cache *_cell_cache; */ struct dm_bio_prison_v2 *dm_bio_prison_create_v2(struct workqueue_struct *wq) { - struct dm_bio_prison_v2 *prison = kmalloc(sizeof(*prison), GFP_KERNEL); + struct dm_bio_prison_v2 *prison = kzalloc(sizeof(*prison), GFP_KERNEL); if (!prison) return NULL; diff --git a/drivers/md/dm-bow.c b/drivers/md/dm-bow.c index b92da30a3d42..28df18633853 100644 --- a/drivers/md/dm-bow.c +++ b/drivers/md/dm-bow.c @@ -726,6 +726,7 @@ static int dm_bow_ctr(struct dm_target *ti, unsigned int argc, char **argv) rb_insert_color(&br->node, &bc->ranges); ti->discards_supported = true; + ti->may_passthrough_inline_crypto = true; return 0; diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 1e17e6421da3..9440da8b0120 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -33,7 +33,8 @@ #define DM_BUFIO_MEMORY_PERCENT 2 #define DM_BUFIO_VMALLOC_PERCENT 25 -#define DM_BUFIO_WRITEBACK_PERCENT 75 +#define DM_BUFIO_WRITEBACK_RATIO 3 +#define DM_BUFIO_LOW_WATERMARK_RATIO 16 /* * Check buffer ages in this interval (seconds) @@ -146,10 +147,12 @@ enum data_mode { struct dm_buffer { struct rb_node node; struct list_head lru_list; + struct list_head global_list; sector_t block; void *data; enum data_mode data_mode; unsigned char list_mode; /* LIST_* */ + unsigned accessed; unsigned hold_count; blk_status_t read_error; blk_status_t write_error; @@ -222,7 +225,11 @@ static unsigned long dm_bufio_cache_size; */ static unsigned long dm_bufio_cache_size_latch; -static DEFINE_SPINLOCK(param_spinlock); +static DEFINE_SPINLOCK(global_spinlock); + +static LIST_HEAD(global_queue); + +static unsigned long global_num = 0; /* * Buffers are freed after this timeout @@ -238,11 +245,6 @@ static unsigned long dm_bufio_current_allocated; /*----------------------------------------------------------------*/ -/* - * Per-client cache: dm_bufio_cache_size / dm_bufio_client_count - */ -static unsigned long dm_bufio_cache_size_per_client; - /* * The current number of clients. */ @@ -254,11 +256,15 @@ static int dm_bufio_client_count; static LIST_HEAD(dm_bufio_all_clients); /* - * This mutex protects dm_bufio_cache_size_latch, - * dm_bufio_cache_size_per_client and dm_bufio_client_count + * This mutex protects dm_bufio_cache_size_latch and dm_bufio_client_count */ static DEFINE_MUTEX(dm_bufio_clients_lock); +static struct workqueue_struct *dm_bufio_wq; +static struct delayed_work dm_bufio_cleanup_old_work; +static struct work_struct dm_bufio_replacement_work; + + #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING static void buffer_record_stack(struct dm_buffer *b) { @@ -319,15 +325,23 @@ static void __remove(struct dm_bufio_client *c, struct dm_buffer *b) /*----------------------------------------------------------------*/ -static void adjust_total_allocated(enum data_mode data_mode, long diff) +static void adjust_total_allocated(struct dm_buffer *b, bool unlink) { + enum data_mode data_mode; + long diff; + static unsigned long * const class_ptr[DATA_MODE_LIMIT] = { &dm_bufio_allocated_kmem_cache, &dm_bufio_allocated_get_free_pages, &dm_bufio_allocated_vmalloc, }; - spin_lock(¶m_spinlock); + data_mode = b->data_mode; + diff = (long)b->c->block_size; + if (unlink) + diff = -diff; + + spin_lock(&global_spinlock); *class_ptr[data_mode] += diff; @@ -336,7 +350,19 @@ static void adjust_total_allocated(enum data_mode data_mode, long diff) if (dm_bufio_current_allocated > dm_bufio_peak_allocated) dm_bufio_peak_allocated = dm_bufio_current_allocated; - spin_unlock(¶m_spinlock); + b->accessed = 1; + + if (!unlink) { + list_add(&b->global_list, &global_queue); + global_num++; + if (dm_bufio_current_allocated > dm_bufio_cache_size) + queue_work(dm_bufio_wq, &dm_bufio_replacement_work); + } else { + list_del(&b->global_list); + global_num--; + } + + spin_unlock(&global_spinlock); } /* @@ -357,9 +383,6 @@ static void __cache_size_refresh(void) dm_bufio_default_cache_size); dm_bufio_cache_size_latch = dm_bufio_default_cache_size; } - - dm_bufio_cache_size_per_client = dm_bufio_cache_size_latch / - (dm_bufio_client_count ? : 1); } /* @@ -465,8 +488,6 @@ static struct dm_buffer *alloc_buffer(struct dm_bufio_client *c, gfp_t gfp_mask) return NULL; } - adjust_total_allocated(b->data_mode, (long)c->block_size); - #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING memset(&b->stack_trace, 0, sizeof(b->stack_trace)); #endif @@ -480,8 +501,6 @@ static void free_buffer(struct dm_buffer *b) { struct dm_bufio_client *c = b->c; - adjust_total_allocated(b->data_mode, -(long)c->block_size); - free_buffer_data(c, b->data, b->data_mode); kfree(b); } @@ -499,6 +518,8 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty) list_add(&b->lru_list, &c->lru[dirty]); __insert(b->c, b); b->last_accessed = jiffies; + + adjust_total_allocated(b, false); } /* @@ -513,6 +534,8 @@ static void __unlink_buffer(struct dm_buffer *b) c->n_buffers[b->list_mode]--; __remove(b->c, b); list_del(&b->lru_list); + + adjust_total_allocated(b, true); } /* @@ -522,6 +545,8 @@ static void __relink_lru(struct dm_buffer *b, int dirty) { struct dm_bufio_client *c = b->c; + b->accessed = 1; + BUG_ON(!c->n_buffers[b->list_mode]); c->n_buffers[b->list_mode]--; @@ -947,33 +972,6 @@ static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait, } } -/* - * Get writeback threshold and buffer limit for a given client. - */ -static void __get_memory_limit(struct dm_bufio_client *c, - unsigned long *threshold_buffers, - unsigned long *limit_buffers) -{ - unsigned long buffers; - - if (unlikely(ACCESS_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch)) { - if (mutex_trylock(&dm_bufio_clients_lock)) { - __cache_size_refresh(); - mutex_unlock(&dm_bufio_clients_lock); - } - } - - buffers = dm_bufio_cache_size_per_client >> - (c->sectors_per_block_bits + SECTOR_SHIFT); - - if (buffers < c->minimum_buffers) - buffers = c->minimum_buffers; - - *limit_buffers = buffers; - *threshold_buffers = mult_frac(buffers, - DM_BUFIO_WRITEBACK_PERCENT, 100); -} - /* * Check if we're over watermark. * If we are over threshold_buffers, start freeing buffers. @@ -982,23 +980,7 @@ static void __get_memory_limit(struct dm_bufio_client *c, static void __check_watermark(struct dm_bufio_client *c, struct list_head *write_list) { - unsigned long threshold_buffers, limit_buffers; - - __get_memory_limit(c, &threshold_buffers, &limit_buffers); - - while (c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY] > - limit_buffers) { - - struct dm_buffer *b = __get_unclaimed_buffer(c); - - if (!b) - return; - - __free_buffer_wake(b); - cond_resched(); - } - - if (c->n_buffers[LIST_DIRTY] > threshold_buffers) + if (c->n_buffers[LIST_DIRTY] > c->n_buffers[LIST_CLEAN] * DM_BUFIO_WRITEBACK_RATIO) __write_dirty_buffers_async(c, 1, write_list); } @@ -1865,6 +1847,74 @@ static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) dm_bufio_unlock(c); } +static void do_global_cleanup(struct work_struct *w) +{ + struct dm_bufio_client *locked_client = NULL; + struct dm_bufio_client *current_client; + struct dm_buffer *b; + unsigned spinlock_hold_count; + unsigned long threshold = dm_bufio_cache_size - + dm_bufio_cache_size / DM_BUFIO_LOW_WATERMARK_RATIO; + unsigned long loops = global_num * 2; + + mutex_lock(&dm_bufio_clients_lock); + + while (1) { + cond_resched(); + + spin_lock(&global_spinlock); + if (unlikely(dm_bufio_current_allocated <= threshold)) + break; + + spinlock_hold_count = 0; +get_next: + if (!loops--) + break; + if (unlikely(list_empty(&global_queue))) + break; + b = list_entry(global_queue.prev, struct dm_buffer, global_list); + + if (b->accessed) { + b->accessed = 0; + list_move(&b->global_list, &global_queue); + if (likely(++spinlock_hold_count < 16)) + goto get_next; + spin_unlock(&global_spinlock); + continue; + } + + current_client = b->c; + if (unlikely(current_client != locked_client)) { + if (locked_client) + dm_bufio_unlock(locked_client); + + if (!dm_bufio_trylock(current_client)) { + spin_unlock(&global_spinlock); + dm_bufio_lock(current_client); + locked_client = current_client; + continue; + } + + locked_client = current_client; + } + + spin_unlock(&global_spinlock); + + if (unlikely(!__try_evict_buffer(b, GFP_KERNEL))) { + spin_lock(&global_spinlock); + list_move(&b->global_list, &global_queue); + spin_unlock(&global_spinlock); + } + } + + spin_unlock(&global_spinlock); + + if (locked_client) + dm_bufio_unlock(locked_client); + + mutex_unlock(&dm_bufio_clients_lock); +} + static void cleanup_old_buffers(void) { unsigned long max_age_hz = get_max_age_hz(); @@ -1880,14 +1930,11 @@ static void cleanup_old_buffers(void) mutex_unlock(&dm_bufio_clients_lock); } -static struct workqueue_struct *dm_bufio_wq; -static struct delayed_work dm_bufio_work; - static void work_fn(struct work_struct *w) { cleanup_old_buffers(); - queue_delayed_work(dm_bufio_wq, &dm_bufio_work, + queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, DM_BUFIO_WORK_TIMER_SECS * HZ); } @@ -1932,8 +1979,9 @@ static int __init dm_bufio_init(void) if (!dm_bufio_wq) return -ENOMEM; - INIT_DELAYED_WORK(&dm_bufio_work, work_fn); - queue_delayed_work(dm_bufio_wq, &dm_bufio_work, + INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn); + INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup); + queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, DM_BUFIO_WORK_TIMER_SECS * HZ); return 0; @@ -1947,7 +1995,8 @@ static void __exit dm_bufio_exit(void) int bug = 0; int i; - cancel_delayed_work_sync(&dm_bufio_work); + cancel_delayed_work_sync(&dm_bufio_cleanup_old_work); + flush_workqueue(dm_bufio_wq); destroy_workqueue(dm_bufio_wq); for (i = 0; i < ARRAY_SIZE(dm_bufio_caches); i++) diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index b5f541112fca..69cdb29ef6be 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -2971,8 +2971,8 @@ static void cache_postsuspend(struct dm_target *ti) prevent_background_work(cache); BUG_ON(atomic_read(&cache->nr_io_migrations)); - cancel_delayed_work(&cache->waker); - flush_workqueue(cache->wq); + cancel_delayed_work_sync(&cache->waker); + drain_workqueue(cache->wq); WARN_ON(cache->tracker.in_flight); /* diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index dad55d638cbb..cb0d287ddd50 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -501,8 +501,14 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti, const char *opts) { - unsigned bs = crypto_skcipher_blocksize(any_tfm(cc)); - int log = ilog2(bs); + unsigned bs; + int log; + + if (test_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags)) + bs = crypto_aead_blocksize(any_tfm_aead(cc)); + else + bs = crypto_skcipher_blocksize(any_tfm(cc)); + log = ilog2(bs); /* we need to calculate how far we must shift the sector count * to get the cipher block count, we use this shift in _gen */ diff --git a/drivers/md/dm-default-key.c b/drivers/md/dm-default-key.c new file mode 100644 index 000000000000..b3da6afa6927 --- /dev/null +++ b/drivers/md/dm-default-key.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2017 Google, Inc. + */ + +#include +#include +#include + +#define DM_MSG_PREFIX "default-key" + +#define DM_DEFAULT_KEY_MAX_WRAPPED_KEY_SIZE 128 + +#define SECTOR_SIZE (1 << SECTOR_SHIFT) + +static const struct dm_default_key_cipher { + const char *name; + enum blk_crypto_mode_num mode_num; + int key_size; +} dm_default_key_ciphers[] = { + { + .name = "aes-xts-plain64", + .mode_num = BLK_ENCRYPTION_MODE_AES_256_XTS, + .key_size = 64, + }, { + .name = "xchacha12,aes-adiantum-plain64", + .mode_num = BLK_ENCRYPTION_MODE_ADIANTUM, + .key_size = 32, + }, +}; + +/** + * struct dm_default_c - private data of a default-key target + * @dev: the underlying device + * @start: starting sector of the range of @dev which this target actually maps. + * For this purpose a "sector" is 512 bytes. + * @cipher_string: the name of the encryption algorithm being used + * @iv_offset: starting offset for IVs. IVs are generated as if the target were + * preceded by @iv_offset 512-byte sectors. + * @sector_size: crypto sector size in bytes (usually 4096) + * @sector_bits: log2(sector_size) + * @key: the encryption key to use + */ +struct default_key_c { + struct dm_dev *dev; + sector_t start; + const char *cipher_string; + u64 iv_offset; + unsigned int sector_size; + unsigned int sector_bits; + struct blk_crypto_key key; + bool is_hw_wrapped; +}; + +static const struct dm_default_key_cipher * +lookup_cipher(const char *cipher_string) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(dm_default_key_ciphers); i++) { + if (strcmp(cipher_string, dm_default_key_ciphers[i].name) == 0) + return &dm_default_key_ciphers[i]; + } + return NULL; +} + +static void default_key_dtr(struct dm_target *ti) +{ + struct default_key_c *dkc = ti->private; + int err; + + if (dkc->dev) { + err = blk_crypto_evict_key(dkc->dev->bdev->bd_queue, &dkc->key); + if (err && err != -ENOKEY) + DMWARN("Failed to evict crypto key: %d", err); + dm_put_device(ti, dkc->dev); + } + kzfree(dkc->cipher_string); + kzfree(dkc); +} + +static int default_key_ctr_optional(struct dm_target *ti, + unsigned int argc, char **argv) +{ + struct default_key_c *dkc = ti->private; + struct dm_arg_set as; + static const struct dm_arg _args[] = { + {0, 4, "Invalid number of feature args"}, + }; + unsigned int opt_params; + const char *opt_string; + bool iv_large_sectors = false; + char dummy; + int err; + + as.argc = argc; + as.argv = argv; + + err = dm_read_arg_group(_args, &as, &opt_params, &ti->error); + if (err) + return err; + + while (opt_params--) { + opt_string = dm_shift_arg(&as); + if (!opt_string) { + ti->error = "Not enough feature arguments"; + return -EINVAL; + } + if (!strcmp(opt_string, "allow_discards")) { + ti->num_discard_bios = 1; + } else if (sscanf(opt_string, "sector_size:%u%c", + &dkc->sector_size, &dummy) == 1) { + if (dkc->sector_size < SECTOR_SIZE || + dkc->sector_size > 4096 || + !is_power_of_2(dkc->sector_size)) { + ti->error = "Invalid sector_size"; + return -EINVAL; + } + } else if (!strcmp(opt_string, "iv_large_sectors")) { + iv_large_sectors = true; + } else if (!strcmp(opt_string, "wrappedkey_v0")) { + dkc->is_hw_wrapped = true; + } else { + ti->error = "Invalid feature arguments"; + return -EINVAL; + } + } + + /* dm-default-key doesn't implement iv_large_sectors=false. */ + if (dkc->sector_size != SECTOR_SIZE && !iv_large_sectors) { + ti->error = "iv_large_sectors must be specified"; + return -EINVAL; + } + + return 0; +} + +/* + * Construct a default-key mapping: + * + * + * This syntax matches dm-crypt's, but lots of unneeded functionality has been + * removed. Also, dm-default-key requires that the "iv_large_sectors" option be + * given whenever a non-default sector size is used. + */ +static int default_key_ctr(struct dm_target *ti, unsigned int argc, char **argv) +{ + struct default_key_c *dkc; + const struct dm_default_key_cipher *cipher; + u8 raw_key[DM_DEFAULT_KEY_MAX_WRAPPED_KEY_SIZE]; + unsigned int raw_key_size; + unsigned long long tmpll; + char dummy; + int err; + + if (argc < 5) { + ti->error = "Not enough arguments"; + return -EINVAL; + } + + dkc = kzalloc(sizeof(*dkc), GFP_KERNEL); + if (!dkc) { + ti->error = "Out of memory"; + return -ENOMEM; + } + ti->private = dkc; + + /* */ + dkc->cipher_string = kstrdup(argv[0], GFP_KERNEL); + if (!dkc->cipher_string) { + ti->error = "Out of memory"; + err = -ENOMEM; + goto bad; + } + cipher = lookup_cipher(dkc->cipher_string); + if (!cipher) { + ti->error = "Unsupported cipher"; + err = -EINVAL; + goto bad; + } + + /* */ + raw_key_size = strlen(argv[1]); + if (raw_key_size > 2 * DM_DEFAULT_KEY_MAX_WRAPPED_KEY_SIZE || + raw_key_size % 2) { + ti->error = "Invalid keysize"; + err = -EINVAL; + goto bad; + } + raw_key_size /= 2; + if (hex2bin(raw_key, argv[1], raw_key_size) != 0) { + ti->error = "Malformed key string"; + err = -EINVAL; + goto bad; + } + + /* */ + if (sscanf(argv[2], "%llu%c", &dkc->iv_offset, &dummy) != 1) { + ti->error = "Invalid iv_offset sector"; + err = -EINVAL; + goto bad; + } + + /* */ + err = dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), + &dkc->dev); + if (err) { + ti->error = "Device lookup failed"; + goto bad; + } + + /* */ + if (sscanf(argv[4], "%llu%c", &tmpll, &dummy) != 1 || + tmpll != (sector_t)tmpll) { + ti->error = "Invalid start sector"; + err = -EINVAL; + goto bad; + } + dkc->start = tmpll; + + /* optional arguments */ + dkc->sector_size = SECTOR_SIZE; + if (argc > 5) { + err = default_key_ctr_optional(ti, argc - 5, &argv[5]); + if (err) + goto bad; + } + dkc->sector_bits = ilog2(dkc->sector_size); + if (ti->len & ((dkc->sector_size >> SECTOR_SHIFT) - 1)) { + ti->error = "Device size is not a multiple of sector_size"; + err = -EINVAL; + goto bad; + } + + err = blk_crypto_init_key(&dkc->key, raw_key, cipher->key_size, + dkc->is_hw_wrapped, cipher->mode_num, + dkc->sector_size); + if (err) { + ti->error = "Error initializing blk-crypto key"; + goto bad; + } + + err = blk_crypto_start_using_mode(cipher->mode_num, dkc->sector_size, + dkc->dev->bdev->bd_queue); + if (err) { + ti->error = "Error starting to use blk-crypto"; + goto bad; + } + + ti->num_flush_bios = 1; + + ti->may_passthrough_inline_crypto = true; + + err = 0; + goto out; + +bad: + default_key_dtr(ti); +out: + memzero_explicit(raw_key, sizeof(raw_key)); + return err; +} + +static int default_key_map(struct dm_target *ti, struct bio *bio) +{ + const struct default_key_c *dkc = ti->private; + sector_t sector_in_target; + u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE] = { 0 }; + + bio_set_dev(bio, dkc->dev->bdev); + + /* + * If the bio is a device-level request which doesn't target a specific + * sector, there's nothing more to do. + */ + if (bio_sectors(bio) == 0) + return DM_MAPIO_REMAPPED; + + /* Map the bio's sector to the underlying device. (512-byte sectors) */ + sector_in_target = dm_target_offset(ti, bio->bi_iter.bi_sector); + bio->bi_iter.bi_sector = dkc->start + sector_in_target; + + /* + * If the bio should skip dm-default-key (i.e. if it's for an encrypted + * file's contents), or if it doesn't have any data (e.g. if it's a + * DISCARD request), there's nothing more to do. + */ + if (bio_should_skip_dm_default_key(bio) || !bio_has_data(bio)) + return DM_MAPIO_REMAPPED; + + /* + * Else, dm-default-key needs to set this bio's encryption context. + * It must not already have one. + */ + if (WARN_ON_ONCE(bio_has_crypt_ctx(bio))) + return DM_MAPIO_KILL; + + /* Calculate the DUN and enforce data-unit (crypto sector) alignment. */ + dun[0] = dkc->iv_offset + sector_in_target; /* 512-byte sectors */ + if (dun[0] & ((dkc->sector_size >> SECTOR_SHIFT) - 1)) + return DM_MAPIO_KILL; + dun[0] >>= dkc->sector_bits - SECTOR_SHIFT; /* crypto sectors */ + + bio_crypt_set_ctx(bio, &dkc->key, dun, GFP_NOIO); + + return DM_MAPIO_REMAPPED; +} + +static void default_key_status(struct dm_target *ti, status_type_t type, + unsigned int status_flags, char *result, + unsigned int maxlen) +{ + const struct default_key_c *dkc = ti->private; + unsigned int sz = 0; + int num_feature_args = 0; + + switch (type) { + case STATUSTYPE_INFO: + result[0] = '\0'; + break; + + case STATUSTYPE_TABLE: + /* Omit the key for now. */ + DMEMIT("%s - %llu %s %llu", dkc->cipher_string, dkc->iv_offset, + dkc->dev->name, (unsigned long long)dkc->start); + + num_feature_args += !!ti->num_discard_bios; + if (dkc->sector_size != SECTOR_SIZE) + num_feature_args += 2; + if (dkc->is_hw_wrapped) + num_feature_args += 1; + if (num_feature_args != 0) { + DMEMIT(" %d", num_feature_args); + if (ti->num_discard_bios) + DMEMIT(" allow_discards"); + if (dkc->sector_size != SECTOR_SIZE) { + DMEMIT(" sector_size:%u", dkc->sector_size); + DMEMIT(" iv_large_sectors"); + } + if (dkc->is_hw_wrapped) + DMEMIT(" wrappedkey_v0"); + } + break; + } +} + +static int default_key_prepare_ioctl(struct dm_target *ti, + struct block_device **bdev, + fmode_t *mode) +{ + const struct default_key_c *dkc = ti->private; + const struct dm_dev *dev = dkc->dev; + + *bdev = dev->bdev; + + /* Only pass ioctls through if the device sizes match exactly. */ + if (dkc->start != 0 || + ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT) + return 1; + return 0; +} + +static int default_key_iterate_devices(struct dm_target *ti, + iterate_devices_callout_fn fn, + void *data) +{ + const struct default_key_c *dkc = ti->private; + + return fn(ti, dkc->dev, dkc->start, ti->len, data); +} + +static void default_key_io_hints(struct dm_target *ti, + struct queue_limits *limits) +{ + const struct default_key_c *dkc = ti->private; + const unsigned int sector_size = dkc->sector_size; + + limits->logical_block_size = + max_t(unsigned short, limits->logical_block_size, sector_size); + limits->physical_block_size = + max_t(unsigned int, limits->physical_block_size, sector_size); + limits->io_min = max_t(unsigned int, limits->io_min, sector_size); +} + +static struct target_type default_key_target = { + .name = "default-key", + .version = {2, 1, 0}, + .module = THIS_MODULE, + .ctr = default_key_ctr, + .dtr = default_key_dtr, + .map = default_key_map, + .status = default_key_status, + .prepare_ioctl = default_key_prepare_ioctl, + .iterate_devices = default_key_iterate_devices, + .io_hints = default_key_io_hints, +}; + +static int __init dm_default_key_init(void) +{ + return dm_register_target(&default_key_target); +} + +static void __exit dm_default_key_exit(void) +{ + dm_unregister_target(&default_key_target); +} + +module_init(dm_default_key_init); +module_exit(dm_default_key_exit); + +MODULE_AUTHOR("Paul Lawrence "); +MODULE_AUTHOR("Paul Crowley "); +MODULE_AUTHOR("Eric Biggers "); +MODULE_DESCRIPTION(DM_NAME " target for encrypting filesystem metadata"); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 0c1ef63c3461..b1b68e01b889 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -282,20 +282,31 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) { - unsigned bio_bytes = bio_cur_bytes(bio); - char *data = bio_data(bio); + unsigned int corrupt_bio_byte = fc->corrupt_bio_byte - 1; + + struct bvec_iter iter; + struct bio_vec bvec; + + if (!bio_has_data(bio)) + return; /* - * Overwrite the Nth byte of the data returned. + * Overwrite the Nth byte of the bio's data, on whichever page + * it falls. */ - if (data && bio_bytes >= fc->corrupt_bio_byte) { - data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value; - - DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " - "(rw=%c bi_opf=%u bi_sector=%llu cur_bytes=%u)\n", - bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, - (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf, - (unsigned long long)bio->bi_iter.bi_sector, bio_bytes); + bio_for_each_segment(bvec, bio, iter) { + if (bio_iter_len(bio, iter) > corrupt_bio_byte) { + char *segment = (page_address(bio_iter_page(bio, iter)) + + bio_iter_offset(bio, iter)); + segment[corrupt_bio_byte] = fc->corrupt_bio_value; + DMDEBUG("Corrupting data bio=%p by writing %u to byte %u " + "(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n", + bio, fc->corrupt_bio_value, fc->corrupt_bio_byte, + (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf, + (unsigned long long)bio->bi_iter.bi_sector, bio->bi_iter.bi_size); + break; + } + corrupt_bio_byte -= bio_iter_len(bio, iter); } } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index 23f0f4eaaa2e..b6ca5b1100db 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -187,6 +187,7 @@ struct dm_integrity_c { struct rb_root in_progress; wait_queue_head_t endio_wait; struct workqueue_struct *wait_wq; + struct workqueue_struct *offload_wq; unsigned char commit_seq; commit_id_t commit_ids[N_COMMIT_IDS]; @@ -1157,7 +1158,7 @@ static void dec_in_flight(struct dm_integrity_io *dio) dio->range.logical_sector += dio->range.n_sectors; bio_advance(bio, dio->range.n_sectors << SECTOR_SHIFT); INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->wait_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); return; } do_endio_flush(ic, dio); @@ -1577,7 +1578,7 @@ static void dm_integrity_map_continue(struct dm_integrity_io *dio, bool from_map if (need_sync_io && from_map) { INIT_WORK(&dio->work, integrity_bio_wait); - queue_work(ic->metadata_wq, &dio->work); + queue_work(ic->offload_wq, &dio->work); return; } @@ -3005,6 +3006,14 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } + ic->offload_wq = alloc_workqueue("dm-integrity-offload", WQ_MEM_RECLAIM, + METADATA_WORKQUEUE_MAX_ACTIVE); + if (!ic->offload_wq) { + ti->error = "Cannot allocate workqueue"; + r = -ENOMEM; + goto bad; + } + ic->commit_wq = alloc_workqueue("dm-integrity-commit", WQ_MEM_RECLAIM, 1); if (!ic->commit_wq) { ti->error = "Cannot allocate workqueue"; @@ -3189,6 +3198,8 @@ static void dm_integrity_dtr(struct dm_target *ti) destroy_workqueue(ic->metadata_wq); if (ic->wait_wq) destroy_workqueue(ic->wait_wq); + if (ic->offload_wq) + destroy_workqueue(ic->offload_wq); if (ic->commit_wq) destroy_workqueue(ic->commit_wq); if (ic->writer_wq) diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index b4357ed4d541..56e2c0e079d7 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -50,7 +50,7 @@ struct dm_io_client *dm_io_client_create(void) struct dm_io_client *client; unsigned min_ios = dm_get_reserved_bio_based_ios(); - client = kmalloc(sizeof(*client), GFP_KERNEL); + client = kzalloc(sizeof(*client), GFP_KERNEL); if (!client) return ERR_PTR(-ENOMEM); diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index bd9a45b94b55..7ca2b1aaa79d 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -892,7 +892,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *thro int r = -ENOMEM; struct dm_kcopyd_client *kc; - kc = kmalloc(sizeof(*kc), GFP_KERNEL); + kc = kzalloc(sizeof(*kc), GFP_KERNEL); if (!kc) return ERR_PTR(-ENOMEM); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index e6fd31b03c38..aa294720b14e 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -61,6 +61,7 @@ int dm_linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_discard_bios = 1; ti->num_write_same_bios = 1; ti->num_write_zeroes_bios = 1; + ti->may_passthrough_inline_crypto = true; ti->private = lc; return 0; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 151211b4cb1b..2c5912e75514 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2441,7 +2441,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) } /* Enable bitmap creation for RAID levels != 0 */ - mddev->bitmap_info.offset = rt_is_raid0(rs->raid_type) ? 0 : to_sector(4096); + mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096); mddev->bitmap_info.default_offset = mddev->bitmap_info.offset; if (!test_and_clear_bit(FirstUse, &rdev->flags)) { diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index 85c32b22a420..91c6f6d72eee 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c @@ -179,7 +179,7 @@ struct dm_region_hash *dm_region_hash_create( ; nr_buckets >>= 1; - rh = kmalloc(sizeof(*rh), GFP_KERNEL); + rh = kzalloc(sizeof(*rh), GFP_KERNEL); if (!rh) { DMERR("unable to allocate region hash memory"); return ERR_PTR(-ENOMEM); diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index c5534d294773..00025569e807 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -17,7 +17,7 @@ #include "dm-bufio.h" #define DM_MSG_PREFIX "persistent snapshot" -#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ +#define DM_CHUNK_SIZE_DEFAULT_SECTORS 32U /* 16KB */ #define DM_PREFETCH_CHUNKS 12 diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index b502debc6df3..2170f6c118b8 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -19,7 +19,6 @@ #include #include #include -#include #include "dm.h" @@ -48,7 +47,7 @@ struct dm_exception_table { }; struct dm_snapshot { - struct rw_semaphore lock; + struct mutex lock; struct dm_dev *origin; struct dm_dev *cow; @@ -106,8 +105,8 @@ struct dm_snapshot { /* The on disk metadata handler */ struct dm_exception_store *store; - /* Maximum number of in-flight COW jobs. */ - struct semaphore cow_count; + unsigned in_progress; + struct wait_queue_head in_progress_wait; struct dm_kcopyd_client *kcopyd_client; @@ -158,8 +157,8 @@ struct dm_snapshot { */ #define DEFAULT_COW_THRESHOLD 2048 -static int cow_threshold = DEFAULT_COW_THRESHOLD; -module_param_named(snapshot_cow_threshold, cow_threshold, int, 0644); +static unsigned cow_threshold = DEFAULT_COW_THRESHOLD; +module_param_named(snapshot_cow_threshold, cow_threshold, uint, 0644); MODULE_PARM_DESC(snapshot_cow_threshold, "Maximum number of chunks being copied on write"); DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, @@ -456,9 +455,9 @@ static int __find_snapshots_sharing_cow(struct dm_snapshot *snap, if (!bdev_equal(s->cow->bdev, snap->cow->bdev)) continue; - down_read(&s->lock); + mutex_lock(&s->lock); active = s->active; - up_read(&s->lock); + mutex_unlock(&s->lock); if (active) { if (snap_src) @@ -926,7 +925,7 @@ static int remove_single_exception_chunk(struct dm_snapshot *s) int r; chunk_t old_chunk = s->first_merging_chunk + s->num_merging_chunks - 1; - down_write(&s->lock); + mutex_lock(&s->lock); /* * Process chunks (and associated exceptions) in reverse order @@ -941,7 +940,7 @@ static int remove_single_exception_chunk(struct dm_snapshot *s) b = __release_queued_bios_after_merge(s); out: - up_write(&s->lock); + mutex_unlock(&s->lock); if (b) flush_bios(b); @@ -1000,9 +999,9 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s) if (linear_chunks < 0) { DMERR("Read error in exception store: " "shutting down merge"); - down_write(&s->lock); + mutex_lock(&s->lock); s->merge_failed = 1; - up_write(&s->lock); + mutex_unlock(&s->lock); } goto shut; } @@ -1043,10 +1042,10 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s) previous_count = read_pending_exceptions_done_count(); } - down_write(&s->lock); + mutex_lock(&s->lock); s->first_merging_chunk = old_chunk; s->num_merging_chunks = linear_chunks; - up_write(&s->lock); + mutex_unlock(&s->lock); /* Wait until writes to all 'linear_chunks' drain */ for (i = 0; i < linear_chunks; i++) @@ -1088,10 +1087,10 @@ static void merge_callback(int read_err, unsigned long write_err, void *context) return; shut: - down_write(&s->lock); + mutex_lock(&s->lock); s->merge_failed = 1; b = __release_queued_bios_after_merge(s); - up_write(&s->lock); + mutex_unlock(&s->lock); error_bios(b); merge_shutdown(s); @@ -1137,7 +1136,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) origin_mode = FMODE_WRITE; } - s = kmalloc(sizeof(*s), GFP_KERNEL); + s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) { ti->error = "Cannot allocate private snapshot structure"; r = -ENOMEM; @@ -1190,7 +1189,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->exception_start_sequence = 0; s->exception_complete_sequence = 0; INIT_LIST_HEAD(&s->out_of_order_list); - init_rwsem(&s->lock); + mutex_init(&s->lock); INIT_LIST_HEAD(&s->list); spin_lock_init(&s->pe_lock); s->state_bits = 0; @@ -1206,7 +1205,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - sema_init(&s->cow_count, (cow_threshold > 0) ? cow_threshold : INT_MAX); + init_waitqueue_head(&s->in_progress_wait); s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { @@ -1357,9 +1356,9 @@ static void snapshot_dtr(struct dm_target *ti) /* Check whether exception handover must be cancelled */ (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); if (snap_src && snap_dest && (s == snap_src)) { - down_write(&snap_dest->lock); + mutex_lock(&snap_dest->lock); snap_dest->valid = 0; - up_write(&snap_dest->lock); + mutex_unlock(&snap_dest->lock); DMERR("Cancelling snapshot handover."); } up_read(&_origins_lock); @@ -1390,13 +1389,62 @@ static void snapshot_dtr(struct dm_target *ti) dm_exception_store_destroy(s->store); + mutex_destroy(&s->lock); + dm_put_device(ti, s->cow); dm_put_device(ti, s->origin); + WARN_ON(s->in_progress); + kfree(s); } +static void account_start_copy(struct dm_snapshot *s) +{ + spin_lock(&s->in_progress_wait.lock); + s->in_progress++; + spin_unlock(&s->in_progress_wait.lock); +} + +static void account_end_copy(struct dm_snapshot *s) +{ + spin_lock(&s->in_progress_wait.lock); + BUG_ON(!s->in_progress); + s->in_progress--; + if (likely(s->in_progress <= cow_threshold) && + unlikely(waitqueue_active(&s->in_progress_wait))) + wake_up_locked(&s->in_progress_wait); + spin_unlock(&s->in_progress_wait.lock); +} + +static bool wait_for_in_progress(struct dm_snapshot *s, bool unlock_origins) +{ + if (unlikely(s->in_progress > cow_threshold)) { + spin_lock(&s->in_progress_wait.lock); + if (likely(s->in_progress > cow_threshold)) { + /* + * NOTE: this throttle doesn't account for whether + * the caller is servicing an IO that will trigger a COW + * so excess throttling may result for chunks not required + * to be COW'd. But if cow_threshold was reached, extra + * throttling is unlikely to negatively impact performance. + */ + DECLARE_WAITQUEUE(wait, current); + __add_wait_queue(&s->in_progress_wait, &wait); + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&s->in_progress_wait.lock); + if (unlock_origins) + up_read(&_origins_lock); + io_schedule(); + remove_wait_queue(&s->in_progress_wait, &wait); + return false; + } + spin_unlock(&s->in_progress_wait.lock); + } + return true; +} + /* * Flush a list of buffers. */ @@ -1412,7 +1460,7 @@ static void flush_bios(struct bio *bio) } } -static int do_origin(struct dm_dev *origin, struct bio *bio); +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit); /* * Flush a list of buffers. @@ -1425,7 +1473,7 @@ static void retry_origin_bios(struct dm_snapshot *s, struct bio *bio) while (bio) { n = bio->bi_next; bio->bi_next = NULL; - r = do_origin(s->origin, bio); + r = do_origin(s->origin, bio, false); if (r == DM_MAPIO_REMAPPED) generic_make_request(bio); bio = n; @@ -1477,7 +1525,7 @@ static void pending_complete(void *context, int success) if (!success) { /* Read/write error - snapshot is unusable */ - down_write(&s->lock); + mutex_lock(&s->lock); __invalidate_snapshot(s, -EIO); error = 1; goto out; @@ -1485,14 +1533,14 @@ static void pending_complete(void *context, int success) e = alloc_completed_exception(GFP_NOIO); if (!e) { - down_write(&s->lock); + mutex_lock(&s->lock); __invalidate_snapshot(s, -ENOMEM); error = 1; goto out; } *e = pe->e; - down_write(&s->lock); + mutex_lock(&s->lock); if (!s->valid) { free_completed_exception(e); error = 1; @@ -1517,7 +1565,7 @@ out: full_bio->bi_end_io = pe->full_bio_end_io; increment_pending_exceptions_done_count(); - up_write(&s->lock); + mutex_unlock(&s->lock); /* Submit any pending write bios */ if (error) { @@ -1579,7 +1627,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) } list_add(&pe->out_of_order_entry, lh); } - up(&s->cow_count); + account_end_copy(s); } /* @@ -1603,7 +1651,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ - down(&s->cow_count); + account_start_copy(s); dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } @@ -1623,7 +1671,7 @@ static void start_full_bio(struct dm_snap_pending_exception *pe, pe->full_bio = bio; pe->full_bio_end_io = bio->bi_end_io; - down(&s->cow_count); + account_start_copy(s); callback_data = dm_kcopyd_prepare_callback(s->kcopyd_client, copy_callback, pe); @@ -1714,9 +1762,12 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (!s->valid) return DM_MAPIO_KILL; - /* FIXME: should only take write lock if we need - * to copy an exception */ - down_write(&s->lock); + if (bio_data_dir(bio) == WRITE) { + while (unlikely(!wait_for_in_progress(s, false))) + ; /* wait_for_in_progress() has slept */ + } + + mutex_lock(&s->lock); if (!s->valid || (unlikely(s->snapshot_overflowed) && bio_data_dir(bio) == WRITE)) { @@ -1739,9 +1790,9 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (bio_data_dir(bio) == WRITE) { pe = __lookup_pending_exception(s, chunk); if (!pe) { - up_write(&s->lock); + mutex_unlock(&s->lock); pe = alloc_pending_exception(s); - down_write(&s->lock); + mutex_lock(&s->lock); if (!s->valid || s->snapshot_overflowed) { free_pending_exception(pe); @@ -1776,7 +1827,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) bio->bi_iter.bi_size == (s->store->chunk_size << SECTOR_SHIFT)) { pe->started = 1; - up_write(&s->lock); + mutex_unlock(&s->lock); start_full_bio(pe, bio); goto out; } @@ -1786,7 +1837,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) if (!pe->started) { /* this is protected by snap->lock */ pe->started = 1; - up_write(&s->lock); + mutex_unlock(&s->lock); start_copy(pe); goto out; } @@ -1796,7 +1847,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio) } out_unlock: - up_write(&s->lock); + mutex_unlock(&s->lock); out: return r; } @@ -1832,7 +1883,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) chunk = sector_to_chunk(s->store, bio->bi_iter.bi_sector); - down_write(&s->lock); + mutex_lock(&s->lock); /* Full merging snapshots are redirected to the origin */ if (!s->valid) @@ -1863,12 +1914,12 @@ redirect_to_origin: bio_set_dev(bio, s->origin->bdev); if (bio_data_dir(bio) == WRITE) { - up_write(&s->lock); - return do_origin(s->origin, bio); + mutex_unlock(&s->lock); + return do_origin(s->origin, bio, false); } out_unlock: - up_write(&s->lock); + mutex_unlock(&s->lock); return r; } @@ -1900,7 +1951,7 @@ static int snapshot_preresume(struct dm_target *ti) down_read(&_origins_lock); (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); if (snap_src && snap_dest) { - down_read(&snap_src->lock); + mutex_lock(&snap_src->lock); if (s == snap_src) { DMERR("Unable to resume snapshot source until " "handover completes."); @@ -1910,7 +1961,7 @@ static int snapshot_preresume(struct dm_target *ti) "source is suspended."); r = -EINVAL; } - up_read(&snap_src->lock); + mutex_unlock(&snap_src->lock); } up_read(&_origins_lock); @@ -1956,11 +2007,11 @@ static void snapshot_resume(struct dm_target *ti) (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); if (snap_src && snap_dest) { - down_write(&snap_src->lock); - down_write_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); + mutex_lock(&snap_src->lock); + mutex_lock_nested(&snap_dest->lock, SINGLE_DEPTH_NESTING); __handover_exceptions(snap_src, snap_dest); - up_write(&snap_dest->lock); - up_write(&snap_src->lock); + mutex_unlock(&snap_dest->lock); + mutex_unlock(&snap_src->lock); } up_read(&_origins_lock); @@ -1975,9 +2026,9 @@ static void snapshot_resume(struct dm_target *ti) /* Now we have correct chunk size, reregister */ reregister_snapshot(s); - down_write(&s->lock); + mutex_lock(&s->lock); s->active = 1; - up_write(&s->lock); + mutex_unlock(&s->lock); } static uint32_t get_origin_minimum_chunksize(struct block_device *bdev) @@ -2017,7 +2068,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, switch (type) { case STATUSTYPE_INFO: - down_write(&snap->lock); + mutex_lock(&snap->lock); if (!snap->valid) DMEMIT("Invalid"); @@ -2042,7 +2093,7 @@ static void snapshot_status(struct dm_target *ti, status_type_t type, DMEMIT("Unknown"); } - up_write(&snap->lock); + mutex_unlock(&snap->lock); break; @@ -2108,7 +2159,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, if (dm_target_is_snapshot_merge(snap->ti)) continue; - down_write(&snap->lock); + mutex_lock(&snap->lock); /* Only deal with valid and active snapshots */ if (!snap->valid || !snap->active) @@ -2135,9 +2186,9 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, pe = __lookup_pending_exception(snap, chunk); if (!pe) { - up_write(&snap->lock); + mutex_unlock(&snap->lock); pe = alloc_pending_exception(snap); - down_write(&snap->lock); + mutex_lock(&snap->lock); if (!snap->valid) { free_pending_exception(pe); @@ -2180,7 +2231,7 @@ static int __origin_write(struct list_head *snapshots, sector_t sector, } next_snapshot: - up_write(&snap->lock); + mutex_unlock(&snap->lock); if (pe_to_start_now) { start_copy(pe_to_start_now); @@ -2201,15 +2252,24 @@ next_snapshot: /* * Called on a write from the origin driver. */ -static int do_origin(struct dm_dev *origin, struct bio *bio) +static int do_origin(struct dm_dev *origin, struct bio *bio, bool limit) { struct origin *o; int r = DM_MAPIO_REMAPPED; +again: down_read(&_origins_lock); o = __lookup_origin(origin->bdev); - if (o) + if (o) { + if (limit) { + struct dm_snapshot *s; + list_for_each_entry(s, &o->snapshots, list) + if (unlikely(!wait_for_in_progress(s, true))) + goto again; + } + r = __origin_write(&o->snapshots, bio->bi_iter.bi_sector, bio); + } up_read(&_origins_lock); return r; @@ -2322,7 +2382,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio) dm_accept_partial_bio(bio, available_sectors); /* Only tell snapshots if this is a write */ - return do_origin(o->dev, bio); + return do_origin(o->dev, bio, true); } static long origin_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 4a5015bd4cbf..0f6688f1c6ad 100755 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #define DM_MSG_PREFIX "table" @@ -1619,6 +1621,54 @@ static void dm_table_verify_integrity(struct dm_table *t) } } +#ifdef CONFIG_BLK_INLINE_ENCRYPTION +static int device_intersect_crypto_modes(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct keyslot_manager *parent = data; + struct keyslot_manager *child = bdev_get_queue(dev->bdev)->ksm; + + keyslot_manager_intersect_modes(parent, child); + return 0; +} + +/* + * Update the inline crypto modes supported by 'q->ksm' to be the intersection + * of the modes supported by all targets in the table. + * + * For any mode to be supported at all, all targets must have explicitly + * declared that they can pass through inline crypto support. For a particular + * mode to be supported, all underlying devices must also support it. + * + * Assume that 'q->ksm' initially declares all modes to be supported. + */ +static void dm_calculate_supported_crypto_modes(struct dm_table *t, + struct request_queue *q) +{ + struct dm_target *ti; + unsigned int i; + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (!ti->may_passthrough_inline_crypto) { + keyslot_manager_intersect_modes(q->ksm, NULL); + return; + } + if (!ti->type->iterate_devices) + continue; + ti->type->iterate_devices(ti, device_intersect_crypto_modes, + q->ksm); + } +} +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ +static inline void dm_calculate_supported_crypto_modes(struct dm_table *t, + struct request_queue *q) +{ +} +#endif /* !CONFIG_BLK_INLINE_ENCRYPTION */ + static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1867,6 +1917,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, dm_table_verify_integrity(t); + dm_calculate_supported_crypto_modes(t, q); + /* * Determine whether or not this queue's I/O timings contribute * to the entropy pool, Only request-based targets use this. diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index aa7795990989..0ee5eae71690 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2962,7 +2962,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, return (struct pool *)pmd; } - pool = kmalloc(sizeof(*pool), GFP_KERNEL); + pool = kzalloc(sizeof(*pool), GFP_KERNEL); if (!pool) { *error = "Error allocating memory for pool"; err_p = ERR_PTR(-ENOMEM); diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index b322821a6323..e3b67b145027 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -132,6 +132,7 @@ struct dmz_metadata { sector_t zone_bitmap_size; unsigned int zone_nr_bitmap_blocks; + unsigned int zone_bits_per_mblk; unsigned int nr_bitmap_blocks; unsigned int nr_map_blocks; @@ -552,6 +553,7 @@ static struct dmz_mblock *dmz_get_mblock(struct dmz_metadata *zmd, TASK_UNINTERRUPTIBLE); if (test_bit(DMZ_META_ERROR, &mblk->state)) { dmz_release_mblock(zmd, mblk); + dmz_check_bdev(zmd->dev); return ERR_PTR(-EIO); } @@ -623,6 +625,8 @@ static int dmz_rdwr_block(struct dmz_metadata *zmd, int op, sector_t block, ret = submit_bio_wait(bio); bio_put(bio); + if (ret) + dmz_check_bdev(zmd->dev); return ret; } @@ -689,6 +693,7 @@ static int dmz_write_dirty_mblocks(struct dmz_metadata *zmd, TASK_UNINTERRUPTIBLE); if (test_bit(DMZ_META_ERROR, &mblk->state)) { clear_bit(DMZ_META_ERROR, &mblk->state); + dmz_check_bdev(zmd->dev); ret = -EIO; } nr_mblks_submitted--; @@ -766,7 +771,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) /* If there are no dirty metadata blocks, just flush the device cache */ if (list_empty(&write_list)) { ret = blkdev_issue_flush(zmd->dev->bdev, GFP_NOIO, NULL); - goto out; + goto err; } /* @@ -776,7 +781,7 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) */ ret = dmz_log_dirty_mblocks(zmd, &write_list); if (ret) - goto out; + goto err; /* * The log is on disk. It is now safe to update in place @@ -784,11 +789,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) */ ret = dmz_write_dirty_mblocks(zmd, &write_list, zmd->mblk_primary); if (ret) - goto out; + goto err; ret = dmz_write_sb(zmd, zmd->mblk_primary); if (ret) - goto out; + goto err; while (!list_empty(&write_list)) { mblk = list_first_entry(&write_list, struct dmz_mblock, link); @@ -803,16 +808,20 @@ int dmz_flush_metadata(struct dmz_metadata *zmd) zmd->sb_gen++; out: - if (ret && !list_empty(&write_list)) { - spin_lock(&zmd->mblk_lock); - list_splice(&write_list, &zmd->mblk_dirty_list); - spin_unlock(&zmd->mblk_lock); - } - dmz_unlock_flush(zmd); up_write(&zmd->mblk_sem); return ret; + +err: + if (!list_empty(&write_list)) { + spin_lock(&zmd->mblk_lock); + list_splice(&write_list, &zmd->mblk_dirty_list); + spin_unlock(&zmd->mblk_lock); + } + if (!dmz_check_bdev(zmd->dev)) + ret = -EIO; + goto out; } /* @@ -1157,7 +1166,10 @@ static int dmz_init_zones(struct dmz_metadata *zmd) /* Init */ zmd->zone_bitmap_size = dev->zone_nr_blocks >> 3; - zmd->zone_nr_bitmap_blocks = zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT; + zmd->zone_nr_bitmap_blocks = + max_t(sector_t, 1, zmd->zone_bitmap_size >> DMZ_BLOCK_SHIFT); + zmd->zone_bits_per_mblk = min_t(sector_t, dev->zone_nr_blocks, + DMZ_BLOCK_SIZE_BITS); /* Allocate zone array */ zmd->zones = kcalloc(dev->nr_zones, sizeof(struct dm_zone), GFP_KERNEL); @@ -1235,6 +1247,7 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) if (ret) { dmz_dev_err(zmd->dev, "Get zone %u report failed", dmz_id(zmd, zone)); + dmz_check_bdev(zmd->dev); return ret; } @@ -1973,7 +1986,7 @@ int dmz_copy_valid_blocks(struct dmz_metadata *zmd, struct dm_zone *from_zone, dmz_release_mblock(zmd, to_mblk); dmz_release_mblock(zmd, from_mblk); - chunk_block += DMZ_BLOCK_SIZE_BITS; + chunk_block += zmd->zone_bits_per_mblk; } to_zone->weight = from_zone->weight; @@ -2034,7 +2047,7 @@ int dmz_validate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, /* Set bits */ bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); count = dmz_set_bits((unsigned long *)mblk->data, bit, nr_bits); if (count) { @@ -2113,7 +2126,7 @@ int dmz_invalidate_blocks(struct dmz_metadata *zmd, struct dm_zone *zone, /* Clear bits */ bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); count = dmz_clear_bits((unsigned long *)mblk->data, bit, nr_bits); @@ -2173,6 +2186,7 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone, { struct dmz_mblock *mblk; unsigned int bit, set_bit, nr_bits; + unsigned int zone_bits = zmd->zone_bits_per_mblk; unsigned long *bitmap; int n = 0; @@ -2187,15 +2201,15 @@ static int dmz_to_next_set_block(struct dmz_metadata *zmd, struct dm_zone *zone, /* Get offset */ bitmap = (unsigned long *) mblk->data; bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zone_bits - bit); if (set) - set_bit = find_next_bit(bitmap, DMZ_BLOCK_SIZE_BITS, bit); + set_bit = find_next_bit(bitmap, zone_bits, bit); else - set_bit = find_next_zero_bit(bitmap, DMZ_BLOCK_SIZE_BITS, bit); + set_bit = find_next_zero_bit(bitmap, zone_bits, bit); dmz_release_mblock(zmd, mblk); n += set_bit - bit; - if (set_bit < DMZ_BLOCK_SIZE_BITS) + if (set_bit < zone_bits) break; nr_blocks -= nr_bits; @@ -2298,7 +2312,7 @@ static void dmz_get_zone_weight(struct dmz_metadata *zmd, struct dm_zone *zone) /* Count bits in this block */ bitmap = mblk->data; bit = chunk_block & DMZ_BLOCK_MASK_BITS; - nr_bits = min(nr_blocks, DMZ_BLOCK_SIZE_BITS - bit); + nr_bits = min(nr_blocks, zmd->zone_bits_per_mblk - bit); n += dmz_count_bits(bitmap, bit, nr_bits); dmz_release_mblock(zmd, mblk); diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c index a9f84a998476..2fad512dce98 100644 --- a/drivers/md/dm-zoned-reclaim.c +++ b/drivers/md/dm-zoned-reclaim.c @@ -81,6 +81,7 @@ static int dmz_reclaim_align_wp(struct dmz_reclaim *zrc, struct dm_zone *zone, "Align zone %u wp %llu to %llu (wp+%u) blocks failed %d", dmz_id(zmd, zone), (unsigned long long)wp_block, (unsigned long long)block, nr_blocks, ret); + dmz_check_bdev(zrc->dev); return ret; } @@ -490,12 +491,7 @@ static void dmz_reclaim_work(struct work_struct *work) ret = dmz_do_reclaim(zrc); if (ret) { dmz_dev_debug(zrc->dev, "Reclaim error %d\n", ret); - if (ret == -EIO) - /* - * LLD might be performing some error handling sequence - * at the underlying device. To not interfere, do not - * attempt to schedule the next reclaim run immediately. - */ + if (!dmz_check_bdev(zrc->dev)) return; } diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 4694763f9d40..497a2bc5da51 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -79,6 +79,8 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status) if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK) bio->bi_status = status; + if (bio->bi_status != BLK_STS_OK) + bioctx->target->dev->flags |= DMZ_CHECK_BDEV; if (atomic_dec_and_test(&bioctx->ref)) { struct dm_zone *zone = bioctx->zone; @@ -564,31 +566,51 @@ out: } /* - * Check the backing device availability. If it's on the way out, + * Check if the backing device is being removed. If it's on the way out, * start failing I/O. Reclaim and metadata components also call this * function to cleanly abort operation in the event of such failure. */ bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev) { - struct gendisk *disk; + if (dmz_dev->flags & DMZ_BDEV_DYING) + return true; - if (!(dmz_dev->flags & DMZ_BDEV_DYING)) { - disk = dmz_dev->bdev->bd_disk; - if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) { - dmz_dev_warn(dmz_dev, "Backing device queue dying"); - dmz_dev->flags |= DMZ_BDEV_DYING; - } else if (disk->fops->check_events) { - if (disk->fops->check_events(disk, 0) & - DISK_EVENT_MEDIA_CHANGE) { - dmz_dev_warn(dmz_dev, "Backing device offline"); - dmz_dev->flags |= DMZ_BDEV_DYING; - } - } + if (dmz_dev->flags & DMZ_CHECK_BDEV) + return !dmz_check_bdev(dmz_dev); + + if (blk_queue_dying(bdev_get_queue(dmz_dev->bdev))) { + dmz_dev_warn(dmz_dev, "Backing device queue dying"); + dmz_dev->flags |= DMZ_BDEV_DYING; } return dmz_dev->flags & DMZ_BDEV_DYING; } +/* + * Check the backing device availability. This detects such events as + * backing device going offline due to errors, media removals, etc. + * This check is less efficient than dmz_bdev_is_dying() and should + * only be performed as a part of error handling. + */ +bool dmz_check_bdev(struct dmz_dev *dmz_dev) +{ + struct gendisk *disk; + + dmz_dev->flags &= ~DMZ_CHECK_BDEV; + + if (dmz_bdev_is_dying(dmz_dev)) + return false; + + disk = dmz_dev->bdev->bd_disk; + if (disk->fops->check_events && + disk->fops->check_events(disk, 0) & DISK_EVENT_MEDIA_CHANGE) { + dmz_dev_warn(dmz_dev, "Backing device offline"); + dmz_dev->flags |= DMZ_BDEV_DYING; + } + + return !(dmz_dev->flags & DMZ_BDEV_DYING); +} + /* * Process a new BIO. */ @@ -901,8 +923,8 @@ static int dmz_prepare_ioctl(struct dm_target *ti, { struct dmz_target *dmz = ti->private; - if (dmz_bdev_is_dying(dmz->dev)) - return -ENODEV; + if (!dmz_check_bdev(dmz->dev)) + return -EIO; *bdev = dmz->dev->bdev; diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index 93a64529f219..2662746ba8b9 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -71,6 +71,7 @@ struct dmz_dev { /* Device flags. */ #define DMZ_BDEV_DYING (1 << 0) +#define DMZ_CHECK_BDEV (2 << 0) /* * Zone descriptor. @@ -254,5 +255,6 @@ void dmz_schedule_reclaim(struct dmz_reclaim *zrc); * Functions defined in dm-zoned-target.c */ bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev); +bool dmz_check_bdev(struct dmz_dev *dmz_dev); #endif /* DM_ZONED_H */ diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a56008b2e7c2..c41c2eddba31 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -24,6 +24,8 @@ #include #include #include +#include +#include #define DM_MSG_PREFIX "core" @@ -1249,9 +1251,10 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio, __bio_clone_fast(clone, bio); + bio_crypt_clone(clone, bio, GFP_NOIO); + if (unlikely(bio_integrity(bio) != NULL)) { int r; - if (unlikely(!dm_target_has_integrity(tio->ti->type) && !dm_target_passes_integrity(tio->ti->type))) { DMWARN("%s: the target %s doesn't support integrity data.", @@ -1647,7 +1650,6 @@ void dm_init_md_queue(struct mapped_device *md) * - must do so here (in alloc_dev callchain) before queue is used */ md->queue->queuedata = md; - md->queue->backing_dev_info->congested_data = md; } void dm_init_normal_md_queue(struct mapped_device *md) @@ -1658,9 +1660,12 @@ void dm_init_normal_md_queue(struct mapped_device *md) /* * Initialize aspects of queue that aren't relevant for blk-mq */ + md->queue->backing_dev_info->congested_data = md; md->queue->backing_dev_info->congested_fn = dm_any_congested; } +static void dm_destroy_inline_encryption(struct request_queue *q); + static void cleanup_mapped_device(struct mapped_device *md) { if (md->wq) @@ -1685,8 +1690,10 @@ static void cleanup_mapped_device(struct mapped_device *md) put_disk(md->disk); } - if (md->queue) + if (md->queue) { + dm_destroy_inline_encryption(md->queue); blk_cleanup_queue(md->queue); + } cleanup_srcu_struct(&md->io_barrier); @@ -1750,6 +1757,12 @@ static struct mapped_device *alloc_dev(int minor) goto bad; dm_init_md_queue(md); + /* + * default to bio-based required ->make_request_fn until DM + * table is loaded and md->type established. If request-based + * table is loaded: blk-mq will override accordingly. + */ + blk_queue_make_request(md->queue, dm_make_request); md->disk = alloc_disk_node(1, numa_node_id); if (!md->disk) @@ -2029,6 +2042,161 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_get_queue_limits); +#ifdef CONFIG_BLK_INLINE_ENCRYPTION +struct dm_keyslot_evict_args { + const struct blk_crypto_key *key; + int err; +}; + +static int dm_keyslot_evict_callback(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct dm_keyslot_evict_args *args = data; + int err; + + err = blk_crypto_evict_key(dev->bdev->bd_queue, args->key); + if (!args->err) + args->err = err; + /* Always try to evict the key from all devices. */ + return 0; +} + +/* + * When an inline encryption key is evicted from a device-mapper device, evict + * it from all the underlying devices. + */ +static int dm_keyslot_evict(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, unsigned int slot) +{ + struct mapped_device *md = keyslot_manager_private(ksm); + struct dm_keyslot_evict_args args = { key }; + struct dm_table *t; + int srcu_idx; + int i; + struct dm_target *ti; + + t = dm_get_live_table(md, &srcu_idx); + if (!t) + return 0; + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + if (!ti->type->iterate_devices) + continue; + ti->type->iterate_devices(ti, dm_keyslot_evict_callback, &args); + } + dm_put_live_table(md, srcu_idx); + return args.err; +} + +struct dm_derive_raw_secret_args { + const u8 *wrapped_key; + unsigned int wrapped_key_size; + u8 *secret; + unsigned int secret_size; + int err; +}; + +static int dm_derive_raw_secret_callback(struct dm_target *ti, + struct dm_dev *dev, sector_t start, + sector_t len, void *data) +{ + struct dm_derive_raw_secret_args *args = data; + struct request_queue *q = dev->bdev->bd_queue; + + if (!args->err) + return 0; + + if (!q->ksm) { + args->err = -EOPNOTSUPP; + return 0; + } + + args->err = keyslot_manager_derive_raw_secret(q->ksm, args->wrapped_key, + args->wrapped_key_size, + args->secret, + args->secret_size); + /* Try another device in case this fails. */ + return 0; +} + +/* + * Retrieve the raw_secret from the underlying device. Given that + * only only one raw_secret can exist for a particular wrappedkey, + * retrieve it only from the first device that supports derive_raw_secret() + */ +static int dm_derive_raw_secret(struct keyslot_manager *ksm, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *secret, unsigned int secret_size) +{ + struct mapped_device *md = keyslot_manager_private(ksm); + struct dm_derive_raw_secret_args args = { + .wrapped_key = wrapped_key, + .wrapped_key_size = wrapped_key_size, + .secret = secret, + .secret_size = secret_size, + .err = -EOPNOTSUPP, + }; + struct dm_table *t; + int srcu_idx; + int i; + struct dm_target *ti; + + t = dm_get_live_table(md, &srcu_idx); + if (!t) + return -EOPNOTSUPP; + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + if (!ti->type->iterate_devices) + continue; + ti->type->iterate_devices(ti, dm_derive_raw_secret_callback, + &args); + if (!args.err) + break; + } + dm_put_live_table(md, srcu_idx); + return args.err; +} + +static struct keyslot_mgmt_ll_ops dm_ksm_ll_ops = { + .keyslot_evict = dm_keyslot_evict, + .derive_raw_secret = dm_derive_raw_secret, +}; + +static int dm_init_inline_encryption(struct mapped_device *md) +{ + unsigned int mode_masks[BLK_ENCRYPTION_MODE_MAX]; + + /* + * Start out with all crypto mode support bits set. Any unsupported + * bits will be cleared later when calculating the device restrictions. + */ + memset(mode_masks, 0xFF, sizeof(mode_masks)); + + md->queue->ksm = keyslot_manager_create_passthrough(NULL, + &dm_ksm_ll_ops, + mode_masks, md); + if (!md->queue->ksm) + return -ENOMEM; + return 0; +} + +static void dm_destroy_inline_encryption(struct request_queue *q) +{ + keyslot_manager_destroy(q->ksm); + q->ksm = NULL; +} +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ +static inline int dm_init_inline_encryption(struct mapped_device *md) +{ + return 0; +} + +static inline void dm_destroy_inline_encryption(struct request_queue *q) +{ +} +#endif /* !CONFIG_BLK_INLINE_ENCRYPTION */ + /* * Setup the DM device's queue based on md's type */ @@ -2055,7 +2223,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) case DM_TYPE_BIO_BASED: case DM_TYPE_DAX_BIO_BASED: dm_init_normal_md_queue(md); - blk_queue_make_request(md->queue, dm_make_request); /* * DM handles splitting bios as needed. Free the bio_split bioset * since it won't be used (saves 1 process per bio-based DM device). @@ -2068,6 +2235,12 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) break; } + r = dm_init_inline_encryption(md); + if (r) { + DMERR("Cannot initialize inline encryption"); + return r; + } + return 0; } diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 717aaffc227d..10057ac85476 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -1128,7 +1128,7 @@ int cluster_check_sync_size(struct mddev *mddev) bm_lockres = lockres_init(mddev, str, NULL, 1); if (!bm_lockres) { pr_err("md-cluster: Cannot initialize %s\n", str); - bitmap_free(bitmap); + md_bitmap_free(bitmap); return -1; } bm_lockres->flags |= DLM_LKF_NOQUEUE; @@ -1142,11 +1142,11 @@ int cluster_check_sync_size(struct mddev *mddev) sync_size = sb->sync_size; else if (sync_size != sb->sync_size) { kunmap_atomic(sb); - bitmap_free(bitmap); + md_bitmap_free(bitmap); return -1; } kunmap_atomic(sb); - bitmap_free(bitmap); + md_bitmap_free(bitmap); } return (my_sync_size == sync_size) ? 0 : -1; diff --git a/drivers/md/md.c b/drivers/md/md.c index 5bddde0ee095..b2075f9609ed 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8736,6 +8736,18 @@ static void md_start_sync(struct work_struct *ws) */ void md_check_recovery(struct mddev *mddev) { + if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) { + /* Write superblock - thread that called mddev_suspend() + * holds reconfig_mutex for us. + */ + set_bit(MD_UPDATING_SB, &mddev->flags); + smp_mb__after_atomic(); + if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags)) + md_update_sb(mddev, 0); + clear_bit_unlock(MD_UPDATING_SB, &mddev->flags); + wake_up(&mddev->sb_wait); + } + if (mddev->suspended) return; @@ -8896,16 +8908,6 @@ void md_check_recovery(struct mddev *mddev) unlock: wake_up(&mddev->sb_wait); mddev_unlock(mddev); - } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) { - /* Write superblock - thread that called mddev_suspend() - * holds reconfig_mutex for us. - */ - set_bit(MD_UPDATING_SB, &mddev->flags); - smp_mb__after_atomic(); - if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags)) - md_update_sb(mddev, 0); - clear_bit_unlock(MD_UPDATING_SB, &mddev->flags); - wake_up(&mddev->sb_wait); } } EXPORT_SYMBOL(md_check_recovery); diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 21ea537bd55e..eff04fa23dfa 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -203,7 +203,13 @@ static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent, struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); - unsigned threshold = 2 * merge_threshold(left) + 1; + /* + * Ensure the number of entries in each child will be greater + * than or equal to (max_entries / 3 + 1), so no matter which + * child is used for removal, the number will still be not + * less than (max_entries / 3). + */ + unsigned int threshold = 2 * (merge_threshold(left) + 1); if (nr_left + nr_right < threshold) { /* diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 829b4ce057d8..97f16fe14f54 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -382,6 +382,33 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, return -ENOSPC; } +int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, + dm_block_t begin, dm_block_t end, dm_block_t *b) +{ + int r; + uint32_t count; + + do { + r = sm_ll_find_free_block(new_ll, begin, new_ll->nr_blocks, b); + if (r) + break; + + /* double check this block wasn't used in the old transaction */ + if (*b >= old_ll->nr_blocks) + count = 0; + else { + r = sm_ll_lookup(old_ll, *b, &count); + if (r) + break; + + if (count) + begin = *b + 1; + } + } while (count); + + return r; +} + static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, int (*mutator)(void *context, uint32_t old, uint32_t *new), void *context, enum allocation_event *ev) diff --git a/drivers/md/persistent-data/dm-space-map-common.h b/drivers/md/persistent-data/dm-space-map-common.h index b3078d5eda0c..8de63ce39bdd 100644 --- a/drivers/md/persistent-data/dm-space-map-common.h +++ b/drivers/md/persistent-data/dm-space-map-common.h @@ -109,6 +109,8 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result); int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result); int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, dm_block_t end, dm_block_t *result); +int sm_ll_find_common_free_block(struct ll_disk *old_ll, struct ll_disk *new_ll, + dm_block_t begin, dm_block_t end, dm_block_t *result); int sm_ll_insert(struct ll_disk *ll, dm_block_t b, uint32_t ref_count, enum allocation_event *ev); int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev); diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index 32adf6b4a9c7..bf4c5e2ccb6f 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -167,8 +167,10 @@ static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) enum allocation_event ev; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - /* FIXME: we should loop round a couple of times */ - r = sm_ll_find_free_block(&smd->old_ll, smd->begin, smd->old_ll.nr_blocks, b); + /* + * Any block we allocate has to be free in both the old and current ll. + */ + r = sm_ll_find_common_free_block(&smd->old_ll, &smd->ll, smd->begin, smd->ll.nr_blocks, b); if (r) return r; diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index b23cac2c4738..31a999458be9 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -447,7 +447,10 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) enum allocation_event ev; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b); + /* + * Any block we allocate has to be free in both the old and current ll. + */ + r = sm_ll_find_common_free_block(&smm->old_ll, &smm->ll, smm->begin, smm->ll.nr_blocks, b); if (r) return r; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 28fb71721770..cdafa5e0ea6d 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -94,7 +94,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) char b[BDEVNAME_SIZE]; char b2[BDEVNAME_SIZE]; struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL); - unsigned short blksize = 512; + unsigned blksize = 512; *private_conf = ERR_PTR(-ENOMEM); if (!conf) @@ -158,7 +158,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) } else { pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n", mdname(mddev)); - pr_err("md/raid0: please set raid.default_layout to 1 or 2\n"); + pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n"); err = -ENOTSUPP; goto abort; } @@ -616,7 +616,7 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) tmp_dev = map_sector(mddev, zone, sector, §or); break; default: - WARN("md/raid0:%s: Invalid layout\n", mdname(mddev)); + WARN(1, "md/raid0:%s: Invalid layout\n", mdname(mddev)); bio_io_error(bio); return true; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f46ac9db9edb..0a9d623b13c2 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2749,7 +2749,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, write_targets++; } } - if (bio->bi_end_io) { + if (rdev && bio->bi_end_io) { atomic_inc(&rdev->nr_pending); bio->bi_iter.bi_sector = sector_nr + rdev->data_offset; bio_set_dev(bio, rdev->bdev); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 433e78f453da..d08d77b9674f 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -226,7 +226,7 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) out_free_pages: while (--j >= 0) - resync_free_pages(&rps[j * 2]); + resync_free_pages(&rps[j]); j = 0; out_free_bio: diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4c49bed40f1f..d5c14d56a714 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5721,7 +5721,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) do_flush = false; } - if (!sh->batch_head) + if (!sh->batch_head || sh == sh->batch_head) set_bit(STRIPE_HANDLE, &sh->state); clear_bit(STRIPE_DELAYED, &sh->state); if ((!sh->batch_head || sh == sh->batch_head) && diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c index f8a808d45034..0d7d687aeea0 100644 --- a/drivers/media/cec/cec-adap.c +++ b/drivers/media/cec/cec-adap.c @@ -330,7 +330,8 @@ static void cec_data_cancel(struct cec_data *data) } else { list_del_init(&data->list); if (!(data->msg.tx_status & CEC_TX_STATUS_OK)) - data->adap->transmit_queue_sz--; + if (!WARN_ON(!data->adap->transmit_queue_sz)) + data->adap->transmit_queue_sz--; } /* Mark it as an error */ @@ -377,6 +378,14 @@ static void cec_flush(struct cec_adapter *adap) * need to do anything special in that case. */ } + /* + * If something went wrong and this counter isn't what it should + * be, then this will reset it back to 0. Warn if it is not 0, + * since it indicates a bug, either in this framework or in a + * CEC driver. + */ + if (WARN_ON(adap->transmit_queue_sz)) + adap->transmit_queue_sz = 0; } /* @@ -465,7 +474,8 @@ int cec_thread_func(void *_adap) data = list_first_entry(&adap->transmit_queue, struct cec_data, list); list_del_init(&data->list); - adap->transmit_queue_sz--; + if (!WARN_ON(!data->adap->transmit_queue_sz)) + adap->transmit_queue_sz--; /* Make this the current transmitting message */ adap->transmitting = data; @@ -1031,11 +1041,11 @@ void cec_received_msg_ts(struct cec_adapter *adap, valid_la = false; else if (!cec_msg_is_broadcast(msg) && !(dir_fl & DIRECTED)) valid_la = false; - else if (cec_msg_is_broadcast(msg) && !(dir_fl & BCAST1_4)) + else if (cec_msg_is_broadcast(msg) && !(dir_fl & BCAST)) valid_la = false; else if (cec_msg_is_broadcast(msg) && - adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0 && - !(dir_fl & BCAST2_0)) + adap->log_addrs.cec_version < CEC_OP_CEC_VERSION_2_0 && + !(dir_fl & BCAST1_4)) valid_la = false; } if (valid_la && min_len) { @@ -1403,6 +1413,13 @@ configured: las->log_addr[i], cec_phys_addr_exp(adap->phys_addr)); cec_transmit_msg_fh(adap, &msg, NULL, false); + + /* Report Vendor ID */ + if (adap->log_addrs.vendor_id != CEC_VENDOR_ID_NONE) { + cec_msg_device_vendor_id(&msg, + adap->log_addrs.vendor_id); + cec_transmit_msg_fh(adap, &msg, NULL, false); + } } adap->kthread_config = NULL; complete(&adap->config_completion); diff --git a/drivers/media/cec/cec-pin.c b/drivers/media/cec/cec-pin.c index c003b8eac617..68fc6a24d077 100644 --- a/drivers/media/cec/cec-pin.c +++ b/drivers/media/cec/cec-pin.c @@ -529,6 +529,17 @@ static enum hrtimer_restart cec_pin_timer(struct hrtimer *timer) /* Start bit, switch to receive state */ pin->ts = ts; pin->state = CEC_ST_RX_START_BIT_LOW; + /* + * If a transmit is pending, then that transmit should + * use a signal free time of no more than + * CEC_SIGNAL_FREE_TIME_NEW_INITIATOR since it will + * have a new initiator due to the receive that is now + * starting. + */ + if (pin->tx_msg.len && pin->tx_signal_free_time > + CEC_SIGNAL_FREE_TIME_NEW_INITIATOR) + pin->tx_signal_free_time = + CEC_SIGNAL_FREE_TIME_NEW_INITIATOR; break; } if (pin->ts == 0) @@ -690,6 +701,15 @@ static int cec_pin_adap_transmit(struct cec_adapter *adap, u8 attempts, { struct cec_pin *pin = adap->pin; + /* + * If a receive is in progress, then this transmit should use + * a signal free time of max CEC_SIGNAL_FREE_TIME_NEW_INITIATOR + * since when it starts transmitting it will have a new initiator. + */ + if (pin->state != CEC_ST_IDLE && + signal_free_time > CEC_SIGNAL_FREE_TIME_NEW_INITIATOR) + signal_free_time = CEC_SIGNAL_FREE_TIME_NEW_INITIATOR; + pin->tx_signal_free_time = signal_free_time; pin->tx_msg = *msg; pin->work_tx_status = 0; diff --git a/drivers/media/i2c/adv748x/adv748x-core.c b/drivers/media/i2c/adv748x/adv748x-core.c index 5ee14f2c2747..cfec08593ac8 100644 --- a/drivers/media/i2c/adv748x/adv748x-core.c +++ b/drivers/media/i2c/adv748x/adv748x-core.c @@ -642,7 +642,8 @@ static int adv748x_parse_dt(struct adv748x_state *state) { struct device_node *ep_np = NULL; struct of_endpoint ep; - bool found = false; + bool out_found = false; + bool in_found = false; for_each_endpoint_of_node(state->dev->of_node, ep_np) { of_graph_parse_endpoint(ep_np, &ep); @@ -667,10 +668,17 @@ static int adv748x_parse_dt(struct adv748x_state *state) of_node_get(ep_np); state->endpoints[ep.port] = ep_np; - found = true; + /* + * At least one input endpoint and one output endpoint shall + * be defined. + */ + if (ep.port < ADV748X_PORT_TXA) + in_found = true; + else + out_found = true; } - return found ? 0 : -ENODEV; + return in_found && out_found ? 0 : -ENODEV; } static void adv748x_dt_cleanup(struct adv748x_state *state) @@ -702,6 +710,17 @@ static int adv748x_probe(struct i2c_client *client, state->i2c_clients[ADV748X_PAGE_IO] = client; i2c_set_clientdata(client, state); + /* + * We can not use container_of to get back to the state with two TXs; + * Initialize the TXs's fields unconditionally on the endpoint + * presence to access them later. + */ + state->txa.state = state->txb.state = state; + state->txa.page = ADV748X_PAGE_TXA; + state->txb.page = ADV748X_PAGE_TXB; + state->txa.port = ADV748X_PORT_TXA; + state->txb.port = ADV748X_PORT_TXB; + /* Discover and process ports declared by the Device tree endpoints */ ret = adv748x_parse_dt(state); if (ret) { diff --git a/drivers/media/i2c/adv748x/adv748x-csi2.c b/drivers/media/i2c/adv748x/adv748x-csi2.c index 979825d4a419..0953ba0bcc09 100644 --- a/drivers/media/i2c/adv748x/adv748x-csi2.c +++ b/drivers/media/i2c/adv748x/adv748x-csi2.c @@ -265,19 +265,10 @@ static int adv748x_csi2_init_controls(struct adv748x_csi2 *tx) int adv748x_csi2_init(struct adv748x_state *state, struct adv748x_csi2 *tx) { - struct device_node *ep; int ret; - /* We can not use container_of to get back to the state with two TXs */ - tx->state = state; - tx->page = is_txa(tx) ? ADV748X_PAGE_TXA : ADV748X_PAGE_TXB; - - ep = state->endpoints[is_txa(tx) ? ADV748X_PORT_TXA : ADV748X_PORT_TXB]; - if (!ep) { - adv_err(state, "No endpoint found for %s\n", - is_txa(tx) ? "txa" : "txb"); - return -ENODEV; - } + if (!is_tx_enabled(tx)) + return 0; /* Initialise the virtual channel */ adv748x_csi2_set_virtual_channel(tx, 0); @@ -287,7 +278,7 @@ int adv748x_csi2_init(struct adv748x_state *state, struct adv748x_csi2 *tx) is_txa(tx) ? "txa" : "txb"); /* Ensure that matching is based upon the endpoint fwnodes */ - tx->sd.fwnode = of_fwnode_handle(ep); + tx->sd.fwnode = of_fwnode_handle(state->endpoints[tx->port]); /* Register internal ops for incremental subdev registration */ tx->sd.internal_ops = &adv748x_csi2_internal_ops; @@ -320,6 +311,9 @@ err_free_media: void adv748x_csi2_cleanup(struct adv748x_csi2 *tx) { + if (!is_tx_enabled(tx)) + return; + v4l2_async_unregister_subdev(&tx->sd); media_entity_cleanup(&tx->sd.entity); v4l2_ctrl_handler_free(&tx->ctrl_hdl); diff --git a/drivers/media/i2c/adv748x/adv748x.h b/drivers/media/i2c/adv748x/adv748x.h index cc4151b5b31e..1991c22be51a 100644 --- a/drivers/media/i2c/adv748x/adv748x.h +++ b/drivers/media/i2c/adv748x/adv748x.h @@ -94,6 +94,7 @@ struct adv748x_csi2 { struct adv748x_state *state; struct v4l2_mbus_framefmt format; unsigned int page; + unsigned int port; struct media_pad pads[ADV748X_CSI2_NR_PADS]; struct v4l2_ctrl_handler ctrl_hdl; @@ -102,6 +103,7 @@ struct adv748x_csi2 { #define notifier_to_csi2(n) container_of(n, struct adv748x_csi2, notifier) #define adv748x_sd_to_csi2(sd) container_of(sd, struct adv748x_csi2, sd) +#define is_tx_enabled(_tx) ((_tx)->state->endpoints[(_tx)->port] != NULL) enum adv748x_hdmi_pads { ADV748X_HDMI_SINK, @@ -370,10 +372,10 @@ int adv748x_write_block(struct adv748x_state *state, int client_page, #define io_read(s, r) adv748x_read(s, ADV748X_PAGE_IO, r) #define io_write(s, r, v) adv748x_write(s, ADV748X_PAGE_IO, r, v) -#define io_clrset(s, r, m, v) io_write(s, r, (io_read(s, r) & ~m) | v) +#define io_clrset(s, r, m, v) io_write(s, r, (io_read(s, r) & ~(m)) | (v)) #define hdmi_read(s, r) adv748x_read(s, ADV748X_PAGE_HDMI, r) -#define hdmi_read16(s, r, m) (((hdmi_read(s, r) << 8) | hdmi_read(s, r+1)) & m) +#define hdmi_read16(s, r, m) (((hdmi_read(s, r) << 8) | hdmi_read(s, (r)+1)) & (m)) #define hdmi_write(s, r, v) adv748x_write(s, ADV748X_PAGE_HDMI, r, v) #define repeater_read(s, r) adv748x_read(s, ADV748X_PAGE_REPEATER, r) @@ -381,11 +383,11 @@ int adv748x_write_block(struct adv748x_state *state, int client_page, #define sdp_read(s, r) adv748x_read(s, ADV748X_PAGE_SDP, r) #define sdp_write(s, r, v) adv748x_write(s, ADV748X_PAGE_SDP, r, v) -#define sdp_clrset(s, r, m, v) sdp_write(s, r, (sdp_read(s, r) & ~m) | v) +#define sdp_clrset(s, r, m, v) sdp_write(s, r, (sdp_read(s, r) & ~(m)) | (v)) #define cp_read(s, r) adv748x_read(s, ADV748X_PAGE_CP, r) #define cp_write(s, r, v) adv748x_write(s, ADV748X_PAGE_CP, r, v) -#define cp_clrset(s, r, m, v) cp_write(s, r, (cp_read(s, r) & ~m) | v) +#define cp_clrset(s, r, m, v) cp_write(s, r, (cp_read(s, r) & ~(m)) | (v)) #define txa_read(s, r) adv748x_read(s, ADV748X_PAGE_TXA, r) #define txb_read(s, r) adv748x_read(s, ADV748X_PAGE_TXB, r) diff --git a/drivers/media/i2c/dw9714.c b/drivers/media/i2c/dw9714.c index 95af4fc99cd0..c1273bcd5901 100644 --- a/drivers/media/i2c/dw9714.c +++ b/drivers/media/i2c/dw9714.c @@ -182,7 +182,8 @@ static int dw9714_probe(struct i2c_client *client) return 0; err_cleanup: - dw9714_subdev_cleanup(dw9714_dev); + v4l2_ctrl_handler_free(&dw9714_dev->ctrls_vcm); + media_entity_cleanup(&dw9714_dev->sd.entity); dev_err(&client->dev, "Probe failed: %d\n", rval); return rval; } diff --git a/drivers/media/i2c/mt9v032.c b/drivers/media/i2c/mt9v032.c index 8a430640c85d..1a20d0d558d3 100644 --- a/drivers/media/i2c/mt9v032.c +++ b/drivers/media/i2c/mt9v032.c @@ -423,10 +423,12 @@ static int mt9v032_enum_mbus_code(struct v4l2_subdev *subdev, struct v4l2_subdev_pad_config *cfg, struct v4l2_subdev_mbus_code_enum *code) { + struct mt9v032 *mt9v032 = to_mt9v032(subdev); + if (code->index > 0) return -EINVAL; - code->code = MEDIA_BUS_FMT_SGRBG10_1X10; + code->code = mt9v032->format.code; return 0; } @@ -434,7 +436,11 @@ static int mt9v032_enum_frame_size(struct v4l2_subdev *subdev, struct v4l2_subdev_pad_config *cfg, struct v4l2_subdev_frame_size_enum *fse) { - if (fse->index >= 3 || fse->code != MEDIA_BUS_FMT_SGRBG10_1X10) + struct mt9v032 *mt9v032 = to_mt9v032(subdev); + + if (fse->index >= 3) + return -EINVAL; + if (mt9v032->format.code != fse->code) return -EINVAL; fse->min_width = MT9V032_WINDOW_WIDTH_DEF / (1 << fse->index); diff --git a/drivers/media/i2c/ov2659.c b/drivers/media/i2c/ov2659.c index ce23f436e130..e7768ed1ff9c 100644 --- a/drivers/media/i2c/ov2659.c +++ b/drivers/media/i2c/ov2659.c @@ -419,10 +419,14 @@ static struct sensor_register ov2659_720p[] = { { REG_TIMING_YINC, 0x11 }, { REG_TIMING_VERT_FORMAT, 0x80 }, { REG_TIMING_HORIZ_FORMAT, 0x00 }, + { 0x370a, 0x12 }, { 0x3a03, 0xe8 }, { 0x3a09, 0x6f }, { 0x3a0b, 0x5d }, { 0x3a15, 0x9a }, + { REG_VFIFO_READ_START_H, 0x00 }, + { REG_VFIFO_READ_START_L, 0x80 }, + { REG_ISP_CTRL02, 0x00 }, { REG_NULL, 0x00 }, }; @@ -1132,7 +1136,7 @@ static int ov2659_set_fmt(struct v4l2_subdev *sd, mf = v4l2_subdev_get_try_format(sd, cfg, fmt->pad); *mf = fmt->format; #else - return -ENOTTY; + ret = -ENOTTY; #endif } else { s64 val; @@ -1203,11 +1207,15 @@ static int ov2659_s_stream(struct v4l2_subdev *sd, int on) goto unlock; } - ov2659_set_pixel_clock(ov2659); - ov2659_set_frame_size(ov2659); - ov2659_set_format(ov2659); - ov2659_set_streaming(ov2659, 1); - ov2659->streaming = on; + ret = ov2659_set_pixel_clock(ov2659); + if (!ret) + ret = ov2659_set_frame_size(ov2659); + if (!ret) + ret = ov2659_set_format(ov2659); + if (!ret) { + ov2659_set_streaming(ov2659, 1); + ov2659->streaming = on; + } unlock: mutex_unlock(&ov2659->lock); diff --git a/drivers/media/i2c/ov6650.c b/drivers/media/i2c/ov6650.c index 025869eec2ac..4f67a515bdd8 100644 --- a/drivers/media/i2c/ov6650.c +++ b/drivers/media/i2c/ov6650.c @@ -203,7 +203,6 @@ struct ov6650 { unsigned long pclk_max; /* from resolution and format */ struct v4l2_fract tpf; /* as requested with s_parm */ u32 code; - enum v4l2_colorspace colorspace; }; @@ -216,6 +215,17 @@ static u32 ov6650_codes[] = { MEDIA_BUS_FMT_Y8_1X8, }; +static const struct v4l2_mbus_framefmt ov6650_def_fmt = { + .width = W_CIF, + .height = H_CIF, + .code = MEDIA_BUS_FMT_SBGGR8_1X8, + .colorspace = V4L2_COLORSPACE_SRGB, + .field = V4L2_FIELD_NONE, + .ycbcr_enc = V4L2_YCBCR_ENC_DEFAULT, + .quantization = V4L2_QUANTIZATION_DEFAULT, + .xfer_func = V4L2_XFER_FUNC_DEFAULT, +}; + /* read a register */ static int ov6650_reg_read(struct i2c_client *client, u8 reg, u8 *val) { @@ -469,38 +479,39 @@ static int ov6650_set_selection(struct v4l2_subdev *sd, { struct i2c_client *client = v4l2_get_subdevdata(sd); struct ov6650 *priv = to_ov6650(client); - struct v4l2_rect rect = sel->r; int ret; if (sel->which != V4L2_SUBDEV_FORMAT_ACTIVE || sel->target != V4L2_SEL_TGT_CROP) return -EINVAL; - v4l_bound_align_image(&rect.width, 2, W_CIF, 1, - &rect.height, 2, H_CIF, 1, 0); - v4l_bound_align_image(&rect.left, DEF_HSTRT << 1, - (DEF_HSTRT << 1) + W_CIF - (__s32)rect.width, 1, - &rect.top, DEF_VSTRT << 1, - (DEF_VSTRT << 1) + H_CIF - (__s32)rect.height, 1, - 0); + v4l_bound_align_image(&sel->r.width, 2, W_CIF, 1, + &sel->r.height, 2, H_CIF, 1, 0); + v4l_bound_align_image(&sel->r.left, DEF_HSTRT << 1, + (DEF_HSTRT << 1) + W_CIF - (__s32)sel->r.width, 1, + &sel->r.top, DEF_VSTRT << 1, + (DEF_VSTRT << 1) + H_CIF - (__s32)sel->r.height, + 1, 0); - ret = ov6650_reg_write(client, REG_HSTRT, rect.left >> 1); + ret = ov6650_reg_write(client, REG_HSTRT, sel->r.left >> 1); if (!ret) { - priv->rect.left = rect.left; + priv->rect.width += priv->rect.left - sel->r.left; + priv->rect.left = sel->r.left; ret = ov6650_reg_write(client, REG_HSTOP, - (rect.left + rect.width) >> 1); + (sel->r.left + sel->r.width) >> 1); } if (!ret) { - priv->rect.width = rect.width; - ret = ov6650_reg_write(client, REG_VSTRT, rect.top >> 1); + priv->rect.width = sel->r.width; + ret = ov6650_reg_write(client, REG_VSTRT, sel->r.top >> 1); } if (!ret) { - priv->rect.top = rect.top; + priv->rect.height += priv->rect.top - sel->r.top; + priv->rect.top = sel->r.top; ret = ov6650_reg_write(client, REG_VSTOP, - (rect.top + rect.height) >> 1); + (sel->r.top + sel->r.height) >> 1); } if (!ret) - priv->rect.height = rect.height; + priv->rect.height = sel->r.height; return ret; } @@ -516,12 +527,20 @@ static int ov6650_get_fmt(struct v4l2_subdev *sd, if (format->pad) return -EINVAL; - mf->width = priv->rect.width >> priv->half_scale; - mf->height = priv->rect.height >> priv->half_scale; - mf->code = priv->code; - mf->colorspace = priv->colorspace; - mf->field = V4L2_FIELD_NONE; + /* initialize response with default media bus frame format */ + *mf = ov6650_def_fmt; + /* update media bus format code and frame size */ + if (format->which == V4L2_SUBDEV_FORMAT_TRY) { + mf->width = cfg->try_fmt.width; + mf->height = cfg->try_fmt.height; + mf->code = cfg->try_fmt.code; + + } else { + mf->width = priv->rect.width >> priv->half_scale; + mf->height = priv->rect.height >> priv->half_scale; + mf->code = priv->code; + } return 0; } @@ -614,7 +633,6 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) dev_err(&client->dev, "Pixel format not handled: 0x%x\n", code); return -EINVAL; } - priv->code = code; if (code == MEDIA_BUS_FMT_Y8_1X8 || code == MEDIA_BUS_FMT_SBGGR8_1X8) { @@ -627,11 +645,6 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) priv->pclk_max = 8000000; } - if (code == MEDIA_BUS_FMT_SBGGR8_1X8) - priv->colorspace = V4L2_COLORSPACE_SRGB; - else if (code != 0) - priv->colorspace = V4L2_COLORSPACE_JPEG; - if (half_scale) { dev_dbg(&client->dev, "max resolution: QCIF\n"); coma_set |= COMA_QCIF; @@ -640,7 +653,6 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) dev_dbg(&client->dev, "max resolution: CIF\n"); coma_mask |= COMA_QCIF; } - priv->half_scale = half_scale; clkrc = CLKRC_12MHz; mclk = 12000000; @@ -658,14 +670,14 @@ static int ov6650_s_fmt(struct v4l2_subdev *sd, struct v4l2_mbus_framefmt *mf) ret = ov6650_reg_rmw(client, REG_COMA, coma_set, coma_mask); if (!ret) ret = ov6650_reg_write(client, REG_CLKRC, clkrc); - if (!ret) - ret = ov6650_reg_rmw(client, REG_COML, coml_set, coml_mask); - if (!ret) { - mf->colorspace = priv->colorspace; - mf->width = priv->rect.width >> half_scale; - mf->height = priv->rect.height >> half_scale; + priv->half_scale = half_scale; + + ret = ov6650_reg_rmw(client, REG_COML, coml_set, coml_mask); } + if (!ret) + priv->code = code; + return ret; } @@ -684,8 +696,6 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd, v4l_bound_align_image(&mf->width, 2, W_CIF, 1, &mf->height, 2, H_CIF, 1, 0); - mf->field = V4L2_FIELD_NONE; - switch (mf->code) { case MEDIA_BUS_FMT_Y10_1X10: mf->code = MEDIA_BUS_FMT_Y8_1X8; @@ -695,20 +705,39 @@ static int ov6650_set_fmt(struct v4l2_subdev *sd, case MEDIA_BUS_FMT_YUYV8_2X8: case MEDIA_BUS_FMT_VYUY8_2X8: case MEDIA_BUS_FMT_UYVY8_2X8: - mf->colorspace = V4L2_COLORSPACE_JPEG; break; default: mf->code = MEDIA_BUS_FMT_SBGGR8_1X8; /* fall through */ case MEDIA_BUS_FMT_SBGGR8_1X8: - mf->colorspace = V4L2_COLORSPACE_SRGB; break; } - if (format->which == V4L2_SUBDEV_FORMAT_ACTIVE) - return ov6650_s_fmt(sd, mf); - cfg->try_fmt = *mf; + if (format->which == V4L2_SUBDEV_FORMAT_TRY) { + /* store media bus format code and frame size in pad config */ + cfg->try_fmt.width = mf->width; + cfg->try_fmt.height = mf->height; + cfg->try_fmt.code = mf->code; + /* return default mbus frame format updated with pad config */ + *mf = ov6650_def_fmt; + mf->width = cfg->try_fmt.width; + mf->height = cfg->try_fmt.height; + mf->code = cfg->try_fmt.code; + + } else { + /* apply new media bus format code and frame size */ + int ret = ov6650_s_fmt(sd, mf); + + if (ret) + return ret; + + /* return default format updated with active size and code */ + *mf = ov6650_def_fmt; + mf->width = priv->rect.width >> priv->half_scale; + mf->height = priv->rect.height >> priv->half_scale; + mf->code = priv->code; + } return 0; } @@ -1016,7 +1045,6 @@ static int ov6650_probe(struct i2c_client *client, priv->rect.height = H_CIF; priv->half_scale = false; priv->code = MEDIA_BUS_FMT_YUYV8_2X8; - priv->colorspace = V4L2_COLORSPACE_JPEG; ret = ov6650_video_probe(client); if (ret) diff --git a/drivers/media/pci/cx18/cx18-fileops.c b/drivers/media/pci/cx18/cx18-fileops.c index 98467b2089fa..099d59b992c1 100644 --- a/drivers/media/pci/cx18/cx18-fileops.c +++ b/drivers/media/pci/cx18/cx18-fileops.c @@ -484,7 +484,7 @@ static ssize_t cx18_read_pos(struct cx18_stream *s, char __user *ubuf, CX18_DEBUG_HI_FILE("read %zd from %s, got %zd\n", count, s->name, rc); if (rc > 0) - pos += rc; + *pos += rc; return rc; } diff --git a/drivers/media/pci/cx23885/cx23885-dvb.c b/drivers/media/pci/cx23885/cx23885-dvb.c index e795ddeb7fe2..60f122edaefb 100644 --- a/drivers/media/pci/cx23885/cx23885-dvb.c +++ b/drivers/media/pci/cx23885/cx23885-dvb.c @@ -1460,8 +1460,9 @@ static int dvb_register(struct cx23885_tsport *port) if (fe0->dvb.frontend != NULL) { struct i2c_adapter *tun_i2c; - fe0->dvb.frontend->sec_priv = kmalloc(sizeof(dib7000p_ops), GFP_KERNEL); - memcpy(fe0->dvb.frontend->sec_priv, &dib7000p_ops, sizeof(dib7000p_ops)); + fe0->dvb.frontend->sec_priv = kmemdup(&dib7000p_ops, sizeof(dib7000p_ops), GFP_KERNEL); + if (!fe0->dvb.frontend->sec_priv) + return -ENOMEM; tun_i2c = dib7000p_ops.get_i2c_master(fe0->dvb.frontend, DIBX000_I2C_INTERFACE_TUNER, 1); if (!dvb_attach(dib0070_attach, fe0->dvb.frontend, tun_i2c, &dib7070p_dib0070_config)) return -ENODEV; diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c index 7d25ecd4404b..1748812bd7e5 100644 --- a/drivers/media/pci/cx88/cx88-video.c +++ b/drivers/media/pci/cx88/cx88-video.c @@ -1310,7 +1310,7 @@ static int cx8800_initdev(struct pci_dev *pci_dev, core = cx88_core_get(dev->pci); if (!core) { err = -EINVAL; - goto fail_free; + goto fail_disable; } dev->core = core; @@ -1356,7 +1356,7 @@ static int cx8800_initdev(struct pci_dev *pci_dev, cc->step, cc->default_value); if (!vc) { err = core->audio_hdl.error; - goto fail_core; + goto fail_irq; } vc->priv = (void *)cc; } @@ -1370,7 +1370,7 @@ static int cx8800_initdev(struct pci_dev *pci_dev, cc->step, cc->default_value); if (!vc) { err = core->video_hdl.error; - goto fail_core; + goto fail_irq; } vc->priv = (void *)cc; if (vc->id == V4L2_CID_CHROMA_AGC) @@ -1533,11 +1533,14 @@ static int cx8800_initdev(struct pci_dev *pci_dev, fail_unreg: cx8800_unregister_video(dev); - free_irq(pci_dev->irq, dev); mutex_unlock(&core->lock); +fail_irq: + free_irq(pci_dev->irq, dev); fail_core: core->v4ldev = NULL; cx88_core_put(core, dev->pci); +fail_disable: + pci_disable_device(pci_dev); fail_free: kfree(dev); return err; diff --git a/drivers/media/pci/ivtv/ivtv-fileops.c b/drivers/media/pci/ivtv/ivtv-fileops.c index c9bd018e53de..e2b19c3eaa87 100644 --- a/drivers/media/pci/ivtv/ivtv-fileops.c +++ b/drivers/media/pci/ivtv/ivtv-fileops.c @@ -420,7 +420,7 @@ static ssize_t ivtv_read_pos(struct ivtv_stream *s, char __user *ubuf, size_t co IVTV_DEBUG_HI_FILE("read %zd from %s, got %zd\n", count, s->name, rc); if (rc > 0) - pos += rc; + *pos += rc; return rc; } diff --git a/drivers/media/pci/ivtv/ivtv-yuv.c b/drivers/media/pci/ivtv/ivtv-yuv.c index 44936d6d7c39..1380474519f2 100644 --- a/drivers/media/pci/ivtv/ivtv-yuv.c +++ b/drivers/media/pci/ivtv/ivtv-yuv.c @@ -935,7 +935,7 @@ static void ivtv_yuv_init(struct ivtv *itv) } /* We need a buffer for blanking when Y plane is offset - non-fatal if we can't get one */ - yi->blanking_ptr = kzalloc(720 * 16, GFP_KERNEL|__GFP_NOWARN); + yi->blanking_ptr = kzalloc(720 * 16, GFP_ATOMIC|__GFP_NOWARN); if (yi->blanking_ptr) { yi->blanking_dmaptr = pci_map_single(itv->pdev, yi->blanking_ptr, 720*16, PCI_DMA_TODEVICE); } else { diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c index 49e047e4a81e..926707c997ac 100644 --- a/drivers/media/pci/meye/meye.c +++ b/drivers/media/pci/meye/meye.c @@ -1460,7 +1460,7 @@ static int meye_mmap(struct file *file, struct vm_area_struct *vma) unsigned long page, pos; mutex_lock(&meye.lock); - if (size > gbuffers * gbufsize) { + if (size > gbuffers * gbufsize || offset > gbuffers * gbufsize - size) { mutex_unlock(&meye.lock); return -EINVAL; } diff --git a/drivers/media/pci/tw5864/tw5864-video.c b/drivers/media/pci/tw5864/tw5864-video.c index e7bd2b8484e3..ee1230440b39 100644 --- a/drivers/media/pci/tw5864/tw5864-video.c +++ b/drivers/media/pci/tw5864/tw5864-video.c @@ -1395,13 +1395,13 @@ static void tw5864_handle_frame(struct tw5864_h264_frame *frame) input->vb = NULL; spin_unlock_irqrestore(&input->slock, flags); - v4l2_buf = to_vb2_v4l2_buffer(&vb->vb.vb2_buf); - if (!vb) { /* Gone because of disabling */ dev_dbg(&dev->pci->dev, "vb is empty, dropping frame\n"); return; } + v4l2_buf = to_vb2_v4l2_buffer(&vb->vb.vb2_buf); + /* * Check for space. * Mind the overhead of startcode emulation prevention. diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c index dfcc484cab89..e92c5b56be42 100644 --- a/drivers/media/platform/am437x/am437x-vpfe.c +++ b/drivers/media/platform/am437x/am437x-vpfe.c @@ -1848,6 +1848,10 @@ static int vpfe_s_std(struct file *file, void *priv, v4l2_std_id std_id) if (!(sdinfo->inputs[0].capabilities & V4L2_IN_CAP_STD)) return -ENODATA; + /* if trying to set the same std then nothing to do */ + if (vpfe_standards[vpfe->std_index].std_id == std_id) + return 0; + /* If streaming is started, return error */ if (vb2_is_busy(&vpfe->buffer_queue)) { vpfe_err(vpfe, "%s device busy\n", __func__); diff --git a/drivers/media/platform/atmel/atmel-isc.c b/drivers/media/platform/atmel/atmel-isc.c index d7103c5f92c3..0dea3cf2cb52 100644 --- a/drivers/media/platform/atmel/atmel-isc.c +++ b/drivers/media/platform/atmel/atmel-isc.c @@ -1555,6 +1555,8 @@ static int isc_async_complete(struct v4l2_async_notifier *notifier) struct vb2_queue *q = &isc->vb2_vidq; int ret; + INIT_WORK(&isc->awb_work, isc_awb_work); + ret = v4l2_device_register_subdev_nodes(&isc->v4l2_dev); if (ret < 0) { v4l2_err(&isc->v4l2_dev, "Failed to register subdev nodes\n"); @@ -1614,8 +1616,6 @@ static int isc_async_complete(struct v4l2_async_notifier *notifier) return ret; } - INIT_WORK(&isc->awb_work, isc_awb_work); - /* Register video device */ strlcpy(vdev->name, ATMEL_ISC_NAME, sizeof(vdev->name)); vdev->release = video_device_release_empty; @@ -1722,8 +1722,11 @@ static int isc_parse_dt(struct device *dev, struct isc_device *isc) break; } - subdev_entity->asd = devm_kzalloc(dev, - sizeof(*subdev_entity->asd), GFP_KERNEL); + /* asd will be freed by the subsystem once it's added to the + * notifier list + */ + subdev_entity->asd = kzalloc(sizeof(*subdev_entity->asd), + GFP_KERNEL); if (subdev_entity->asd == NULL) { of_node_put(rem); ret = -ENOMEM; @@ -1859,6 +1862,7 @@ static int atmel_isc_probe(struct platform_device *pdev) &subdev_entity->notifier); if (ret) { dev_err(dev, "fail to register async notifier\n"); + kfree(subdev_entity->asd); goto cleanup_subdev; } diff --git a/drivers/media/platform/atmel/atmel-isi.c b/drivers/media/platform/atmel/atmel-isi.c index 891fa2505efa..2f962a3418f6 100644 --- a/drivers/media/platform/atmel/atmel-isi.c +++ b/drivers/media/platform/atmel/atmel-isi.c @@ -496,7 +496,7 @@ static void stop_streaming(struct vb2_queue *vq) spin_unlock_irq(&isi->irqlock); if (!isi->enable_preview_path) { - timeout = jiffies + FRAME_INTERVAL_MILLI_SEC * HZ; + timeout = jiffies + (FRAME_INTERVAL_MILLI_SEC * HZ) / 1000; /* Wait until the end of the current frame. */ while ((isi_readl(isi, ISI_STATUS) & ISI_CTRL_CDC) && time_before(jiffies, timeout)) diff --git a/drivers/media/platform/davinci/isif.c b/drivers/media/platform/davinci/isif.c index 5813b49391ed..12065ad1ac45 100644 --- a/drivers/media/platform/davinci/isif.c +++ b/drivers/media/platform/davinci/isif.c @@ -886,9 +886,7 @@ static int isif_set_hw_if_params(struct vpfe_hw_if_param *params) static int isif_config_ycbcr(void) { struct isif_ycbcr_config *params = &isif_cfg.ycbcr; - struct vpss_pg_frame_size frame_size; u32 modeset = 0, ccdcfg = 0; - struct vpss_sync_pol sync; dev_dbg(isif_cfg.dev, "\nStarting isif_config_ycbcr..."); @@ -976,13 +974,6 @@ static int isif_config_ycbcr(void) /* two fields are interleaved in memory */ regw(0x00000249, SDOFST); - /* Setup test pattern if enabled */ - if (isif_cfg.bayer.config_params.test_pat_gen) { - sync.ccdpg_hdpol = params->hd_pol; - sync.ccdpg_vdpol = params->vd_pol; - dm365_vpss_set_sync_pol(sync); - dm365_vpss_set_pg_frame_size(frame_size); - } return 0; } @@ -1102,7 +1093,8 @@ fail_nobase_res: while (i >= 0) { res = platform_get_resource(pdev, IORESOURCE_MEM, i); - release_mem_region(res->start, resource_size(res)); + if (res) + release_mem_region(res->start, resource_size(res)); i--; } vpfe_unregister_ccdc_device(&isif_hw_dev); diff --git a/drivers/media/platform/davinci/vpbe.c b/drivers/media/platform/davinci/vpbe.c index 1d3c13e36904..915af9ca4711 100644 --- a/drivers/media/platform/davinci/vpbe.c +++ b/drivers/media/platform/davinci/vpbe.c @@ -126,7 +126,7 @@ static int vpbe_enum_outputs(struct vpbe_device *vpbe_dev, struct v4l2_output *output) { struct vpbe_config *cfg = vpbe_dev->cfg; - int temp_index = output->index; + unsigned int temp_index = output->index; if (temp_index >= cfg->num_outputs) return -EINVAL; diff --git a/drivers/media/platform/davinci/vpbe_display.c b/drivers/media/platform/davinci/vpbe_display.c index 13d027031ff0..82b06cc48bd1 100644 --- a/drivers/media/platform/davinci/vpbe_display.c +++ b/drivers/media/platform/davinci/vpbe_display.c @@ -518,7 +518,7 @@ vpbe_disp_calculate_scale_factor(struct vpbe_display *disp_dev, else if (v_scale == 4) layer_info->v_zoom = ZOOM_X4; if (v_exp) - layer_info->h_exp = V_EXP_6_OVER_5; + layer_info->v_exp = V_EXP_6_OVER_5; } else { /* no scaling, only cropping. Set display area to crop area */ cfg->ysize = expected_ysize; diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.c b/drivers/media/platform/exynos4-is/fimc-isp-video.c index a920164f53f1..39340abefd14 100644 --- a/drivers/media/platform/exynos4-is/fimc-isp-video.c +++ b/drivers/media/platform/exynos4-is/fimc-isp-video.c @@ -316,7 +316,7 @@ static int isp_video_release(struct file *file) ivc->streaming = 0; } - vb2_fop_release(file); + _vb2_fop_release(file, NULL); if (v4l2_fh_is_singular_file(file)) { fimc_pipeline_call(&ivc->ve, close); diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c index 4d29860d27b4..18604b608ab2 100644 --- a/drivers/media/platform/omap/omap_vout.c +++ b/drivers/media/platform/omap/omap_vout.c @@ -1527,23 +1527,20 @@ static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *b) unsigned long size; struct videobuf_buffer *vb; - vb = q->bufs[b->index]; - if (!vout->streaming) return -EINVAL; - if (file->f_flags & O_NONBLOCK) - /* Call videobuf_dqbuf for non blocking mode */ - ret = videobuf_dqbuf(q, (struct v4l2_buffer *)b, 1); - else - /* Call videobuf_dqbuf for blocking mode */ - ret = videobuf_dqbuf(q, (struct v4l2_buffer *)b, 0); + ret = videobuf_dqbuf(q, b, !!(file->f_flags & O_NONBLOCK)); + if (ret) + return ret; + + vb = q->bufs[b->index]; addr = (unsigned long) vout->buf_phy_addr[vb->i]; size = (unsigned long) vb->size; dma_unmap_single(vout->vid_dev->v4l2_dev.dev, addr, size, DMA_TO_DEVICE); - return ret; + return 0; } static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type i) diff --git a/drivers/media/platform/pxa_camera.c b/drivers/media/platform/pxa_camera.c index edca993c2b1f..d270a23299cc 100644 --- a/drivers/media/platform/pxa_camera.c +++ b/drivers/media/platform/pxa_camera.c @@ -2374,7 +2374,7 @@ static int pxa_camera_probe(struct platform_device *pdev) pcdev->res = res; pcdev->pdata = pdev->dev.platform_data; - if (&pdev->dev.of_node && !pcdev->pdata) { + if (pdev->dev.of_node && !pcdev->pdata) { err = pxa_camera_pdata_from_dt(&pdev->dev, pcdev, &pcdev->asd); } else { pcdev->platform_flags = pcdev->pdata->flags; diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c index 769e9e68562d..9360b36b82cd 100644 --- a/drivers/media/platform/qcom/venus/core.c +++ b/drivers/media/platform/qcom/venus/core.c @@ -345,10 +345,11 @@ static const struct venus_resources msm8916_res = { }; static const struct freq_tbl msm8996_freq_table[] = { - { 1944000, 490000000 }, /* 4k UHD @ 60 */ - { 972000, 320000000 }, /* 4k UHD @ 30 */ - { 489600, 150000000 }, /* 1080p @ 60 */ - { 244800, 75000000 }, /* 1080p @ 30 */ + { 1944000, 520000000 }, /* 4k UHD @ 60 (decode only) */ + { 972000, 520000000 }, /* 4k UHD @ 30 */ + { 489600, 346666667 }, /* 1080p @ 60 */ + { 244800, 150000000 }, /* 1080p @ 30 */ + { 108000, 75000000 }, /* 720p @ 30 */ }; static const struct reg_val msm8996_reg_preset[] = { diff --git a/drivers/media/platform/qcom/venus/vdec.c b/drivers/media/platform/qcom/venus/vdec.c index c9e9576bb08a..5f0965593a0d 100644 --- a/drivers/media/platform/qcom/venus/vdec.c +++ b/drivers/media/platform/qcom/venus/vdec.c @@ -1060,9 +1060,6 @@ static const struct v4l2_file_operations vdec_fops = { .unlocked_ioctl = video_ioctl2, .poll = v4l2_m2m_fop_poll, .mmap = v4l2_m2m_fop_mmap, -#ifdef CONFIG_COMPAT - .compat_ioctl32 = v4l2_compat_ioctl32, -#endif }; static int vdec_probe(struct platform_device *pdev) diff --git a/drivers/media/platform/qcom/venus/venc.c b/drivers/media/platform/qcom/venus/venc.c index 3fcf0e9b7b29..a8af4a09485e 100644 --- a/drivers/media/platform/qcom/venus/venc.c +++ b/drivers/media/platform/qcom/venus/venc.c @@ -1166,9 +1166,6 @@ static const struct v4l2_file_operations venc_fops = { .unlocked_ioctl = video_ioctl2, .poll = v4l2_m2m_fop_poll, .mmap = v4l2_m2m_fop_mmap, -#ifdef CONFIG_COMPAT - .compat_ioctl32 = v4l2_compat_ioctl32, -#endif }; static int venc_probe(struct platform_device *pdev) diff --git a/drivers/media/platform/rcar_drif.c b/drivers/media/platform/rcar_drif.c index 522364ff0d5d..3871ed6a1fcb 100644 --- a/drivers/media/platform/rcar_drif.c +++ b/drivers/media/platform/rcar_drif.c @@ -915,6 +915,7 @@ static int rcar_drif_g_fmt_sdr_cap(struct file *file, void *priv, { struct rcar_drif_sdr *sdr = video_drvdata(file); + memset(f->fmt.sdr.reserved, 0, sizeof(f->fmt.sdr.reserved)); f->fmt.sdr.pixelformat = sdr->fmt->pixelformat; f->fmt.sdr.buffersize = sdr->fmt->buffersize; diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c index 4568e68e15fa..85a5e33600c0 100644 --- a/drivers/media/platform/s5p-jpeg/jpeg-core.c +++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c @@ -2005,7 +2005,7 @@ static int s5p_jpeg_controls_create(struct s5p_jpeg_ctx *ctx) v4l2_ctrl_new_std(&ctx->ctrl_handler, &s5p_jpeg_ctrl_ops, V4L2_CID_JPEG_RESTART_INTERVAL, - 0, 3, 0xffff, 0); + 0, 0xffff, 1, 0); if (ctx->jpeg->variant->version == SJPEG_S5P) mask = ~0x06; /* 422, 420 */ } diff --git a/drivers/media/platform/sti/bdisp/bdisp-hw.c b/drivers/media/platform/sti/bdisp/bdisp-hw.c index b7892f3efd98..5c4c3f0c57be 100644 --- a/drivers/media/platform/sti/bdisp/bdisp-hw.c +++ b/drivers/media/platform/sti/bdisp/bdisp-hw.c @@ -14,8 +14,8 @@ #define MAX_SRC_WIDTH 2048 /* Reset & boot poll config */ -#define POLL_RST_MAX 50 -#define POLL_RST_DELAY_MS 20 +#define POLL_RST_MAX 500 +#define POLL_RST_DELAY_MS 2 enum bdisp_target_plan { BDISP_RGB, @@ -382,7 +382,7 @@ int bdisp_hw_reset(struct bdisp_dev *bdisp) for (i = 0; i < POLL_RST_MAX; i++) { if (readl(bdisp->regs + BLT_STA1) & BLT_STA1_IDLE) break; - msleep(POLL_RST_DELAY_MS); + udelay(POLL_RST_DELAY_MS * 1000); } if (i == POLL_RST_MAX) dev_err(bdisp->dev, "Reset timeout\n"); diff --git a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c index 939da6da7644..601ca2b2ecd3 100644 --- a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c +++ b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c @@ -651,8 +651,7 @@ static int bdisp_release(struct file *file) dev_dbg(bdisp->dev, "%s\n", __func__); - if (mutex_lock_interruptible(&bdisp->lock)) - return -ERESTARTSYS; + mutex_lock(&bdisp->lock); v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); diff --git a/drivers/media/platform/stm32/stm32-dcmi.c b/drivers/media/platform/stm32/stm32-dcmi.c index 4281f3f76ab1..f157ccbd8286 100644 --- a/drivers/media/platform/stm32/stm32-dcmi.c +++ b/drivers/media/platform/stm32/stm32-dcmi.c @@ -161,6 +161,9 @@ struct stm32_dcmi { u32 misr; int errors_count; int buffers_count; + + /* Ensure DMA operations atomicity */ + struct mutex dma_lock; }; static inline struct stm32_dcmi *notifier_to_dcmi(struct v4l2_async_notifier *n) @@ -291,6 +294,13 @@ static int dcmi_start_dma(struct stm32_dcmi *dcmi, return ret; } + /* + * Avoid call of dmaengine_terminate_all() between + * dmaengine_prep_slave_single() and dmaengine_submit() + * by locking the whole DMA submission sequence + */ + mutex_lock(&dcmi->dma_lock); + /* Prepare a DMA transaction */ desc = dmaengine_prep_slave_single(dcmi->dma_chan, buf->paddr, buf->size, @@ -298,6 +308,7 @@ static int dcmi_start_dma(struct stm32_dcmi *dcmi, if (!desc) { dev_err(dcmi->dev, "%s: DMA dmaengine_prep_slave_single failed for buffer size %zu\n", __func__, buf->size); + mutex_unlock(&dcmi->dma_lock); return -EINVAL; } @@ -309,9 +320,12 @@ static int dcmi_start_dma(struct stm32_dcmi *dcmi, dcmi->dma_cookie = dmaengine_submit(desc); if (dma_submit_error(dcmi->dma_cookie)) { dev_err(dcmi->dev, "%s: DMA submission failed\n", __func__); + mutex_unlock(&dcmi->dma_lock); return -ENXIO; } + mutex_unlock(&dcmi->dma_lock); + dma_async_issue_pending(dcmi->dma_chan); return 0; @@ -690,7 +704,9 @@ static void dcmi_stop_streaming(struct vb2_queue *vq) spin_unlock_irq(&dcmi->irqlock); /* Stop all pending DMA operations */ + mutex_lock(&dcmi->dma_lock); dmaengine_terminate_all(dcmi->dma_chan); + mutex_unlock(&dcmi->dma_lock); clk_disable(dcmi->mclk); @@ -1662,6 +1678,7 @@ static int dcmi_probe(struct platform_device *pdev) spin_lock_init(&dcmi->irqlock); mutex_init(&dcmi->lock); + mutex_init(&dcmi->dma_lock); init_completion(&dcmi->complete); INIT_LIST_HEAD(&dcmi->buffers); diff --git a/drivers/media/platform/ti-vpe/vpdma.h b/drivers/media/platform/ti-vpe/vpdma.h index 7e611501c291..f29074c84915 100644 --- a/drivers/media/platform/ti-vpe/vpdma.h +++ b/drivers/media/platform/ti-vpe/vpdma.h @@ -60,6 +60,7 @@ struct vpdma_data_format { * line stride of source and dest * buffers should be 16 byte aligned */ +#define VPDMA_MAX_STRIDE 65520 /* Max line stride 16 byte aligned */ #define VPDMA_DTD_DESC_SIZE 32 /* 8 words */ #define VPDMA_CFD_CTD_DESC_SIZE 16 /* 4 words */ diff --git a/drivers/media/platform/ti-vpe/vpe.c b/drivers/media/platform/ti-vpe/vpe.c index 45bd10544189..2e8970c7e22d 100644 --- a/drivers/media/platform/ti-vpe/vpe.c +++ b/drivers/media/platform/ti-vpe/vpe.c @@ -352,20 +352,25 @@ enum { }; /* find our format description corresponding to the passed v4l2_format */ -static struct vpe_fmt *find_format(struct v4l2_format *f) +static struct vpe_fmt *__find_format(u32 fourcc) { struct vpe_fmt *fmt; unsigned int k; for (k = 0; k < ARRAY_SIZE(vpe_formats); k++) { fmt = &vpe_formats[k]; - if (fmt->fourcc == f->fmt.pix.pixelformat) + if (fmt->fourcc == fourcc) return fmt; } return NULL; } +static struct vpe_fmt *find_format(struct v4l2_format *f) +{ + return __find_format(f->fmt.pix.pixelformat); +} + /* * there is one vpe_dev structure in the driver, it is shared by * all instances. @@ -1044,11 +1049,14 @@ static void add_out_dtd(struct vpe_ctx *ctx, int port) dma_addr_t dma_addr; u32 flags = 0; u32 offset = 0; + u32 stride; if (port == VPE_PORT_MV_OUT) { vpdma_fmt = &vpdma_misc_fmts[VPDMA_DATA_FMT_MV]; dma_addr = ctx->mv_buf_dma[mv_buf_selector]; q_data = &ctx->q_data[Q_DATA_SRC]; + stride = ALIGN((q_data->width * vpdma_fmt->depth) >> 3, + VPDMA_STRIDE_ALIGN); } else { /* to incorporate interleaved formats */ int plane = fmt->coplanar ? p_data->vb_part : 0; @@ -1075,6 +1083,7 @@ static void add_out_dtd(struct vpe_ctx *ctx, int port) } /* Apply the offset */ dma_addr += offset; + stride = q_data->bytesperline[VPE_LUMA]; } if (q_data->flags & Q_DATA_FRAME_1D) @@ -1086,7 +1095,7 @@ static void add_out_dtd(struct vpe_ctx *ctx, int port) MAX_W, MAX_H); vpdma_add_out_dtd(&ctx->desc_list, q_data->width, - q_data->bytesperline[VPE_LUMA], &q_data->c_rect, + stride, &q_data->c_rect, vpdma_fmt, dma_addr, MAX_OUT_WIDTH_REG1, MAX_OUT_HEIGHT_REG1, p_data->channel, flags); } @@ -1105,10 +1114,13 @@ static void add_in_dtd(struct vpe_ctx *ctx, int port) dma_addr_t dma_addr; u32 flags = 0; u32 offset = 0; + u32 stride; if (port == VPE_PORT_MV_IN) { vpdma_fmt = &vpdma_misc_fmts[VPDMA_DATA_FMT_MV]; dma_addr = ctx->mv_buf_dma[mv_buf_selector]; + stride = ALIGN((q_data->width * vpdma_fmt->depth) >> 3, + VPDMA_STRIDE_ALIGN); } else { /* to incorporate interleaved formats */ int plane = fmt->coplanar ? p_data->vb_part : 0; @@ -1135,6 +1147,7 @@ static void add_in_dtd(struct vpe_ctx *ctx, int port) } /* Apply the offset */ dma_addr += offset; + stride = q_data->bytesperline[VPE_LUMA]; if (q_data->flags & Q_DATA_INTERLACED_SEQ_TB) { /* @@ -1170,10 +1183,10 @@ static void add_in_dtd(struct vpe_ctx *ctx, int port) if (p_data->vb_part && fmt->fourcc == V4L2_PIX_FMT_NV12) frame_height /= 2; - vpdma_add_in_dtd(&ctx->desc_list, q_data->width, - q_data->bytesperline[VPE_LUMA], &q_data->c_rect, - vpdma_fmt, dma_addr, p_data->channel, field, flags, frame_width, - frame_height, 0, 0); + vpdma_add_in_dtd(&ctx->desc_list, q_data->width, stride, + &q_data->c_rect, vpdma_fmt, dma_addr, + p_data->channel, field, flags, frame_width, + frame_height, 0, 0); } /* @@ -1422,9 +1435,6 @@ static irqreturn_t vpe_irq(int irq_vpe, void *data) /* the previous dst mv buffer becomes the next src mv buffer */ ctx->src_mv_buf_selector = !ctx->src_mv_buf_selector; - if (ctx->aborting) - goto finished; - s_vb = ctx->src_vbs[0]; d_vb = ctx->dst_vb; @@ -1435,6 +1445,7 @@ static irqreturn_t vpe_irq(int irq_vpe, void *data) d_vb->timecode = s_vb->timecode; d_vb->sequence = ctx->sequence; + s_vb->sequence = ctx->sequence; d_q_data = &ctx->q_data[Q_DATA_DST]; if (d_q_data->flags & Q_IS_INTERLACED) { @@ -1488,6 +1499,9 @@ static irqreturn_t vpe_irq(int irq_vpe, void *data) ctx->src_vbs[0] = NULL; ctx->dst_vb = NULL; + if (ctx->aborting) + goto finished; + ctx->bufs_completed++; if (ctx->bufs_completed < ctx->bufs_per_job && job_ready(ctx)) { device_run(ctx); @@ -1600,9 +1614,9 @@ static int __vpe_try_fmt(struct vpe_ctx *ctx, struct v4l2_format *f, unsigned int stride = 0; if (!fmt || !(fmt->types & type)) { - vpe_err(ctx->dev, "Fourcc format (0x%08x) invalid.\n", + vpe_dbg(ctx->dev, "Fourcc format (0x%08x) invalid.\n", pix->pixelformat); - return -EINVAL; + fmt = __find_format(V4L2_PIX_FMT_YUYV); } if (pix->field != V4L2_FIELD_NONE && pix->field != V4L2_FIELD_ALTERNATE @@ -1649,7 +1663,7 @@ static int __vpe_try_fmt(struct vpe_ctx *ctx, struct v4l2_format *f, &pix->height, MIN_H, MAX_H, H_ALIGN, S_ALIGN); - if (!pix->num_planes) + if (!pix->num_planes || pix->num_planes > 2) pix->num_planes = fmt->coplanar ? 2 : 1; else if (pix->num_planes > 1 && !fmt->coplanar) pix->num_planes = 1; @@ -1688,6 +1702,10 @@ static int __vpe_try_fmt(struct vpe_ctx *ctx, struct v4l2_format *f, if (stride > plane_fmt->bytesperline) plane_fmt->bytesperline = stride; + plane_fmt->bytesperline = clamp_t(u32, plane_fmt->bytesperline, + stride, + VPDMA_MAX_STRIDE); + plane_fmt->bytesperline = ALIGN(plane_fmt->bytesperline, VPDMA_STRIDE_ALIGN); @@ -2308,7 +2326,7 @@ static int vpe_open(struct file *file) v4l2_ctrl_handler_setup(hdl); s_q_data = &ctx->q_data[Q_DATA_SRC]; - s_q_data->fmt = &vpe_formats[2]; + s_q_data->fmt = __find_format(V4L2_PIX_FMT_YUYV); s_q_data->width = 1920; s_q_data->height = 1080; s_q_data->nplanes = 1; @@ -2386,6 +2404,12 @@ static int vpe_release(struct file *file) mutex_lock(&dev->dev_mutex); free_mv_buffers(ctx); + + vpdma_unmap_desc_buf(dev->vpdma, &ctx->desc_list.buf); + vpdma_unmap_desc_buf(dev->vpdma, &ctx->mmr_adb); + vpdma_unmap_desc_buf(dev->vpdma, &ctx->sc_coeff_h); + vpdma_unmap_desc_buf(dev->vpdma, &ctx->sc_coeff_v); + vpdma_free_desc_list(&ctx->desc_list); vpdma_free_desc_buf(&ctx->mmr_adb); diff --git a/drivers/media/platform/vimc/vimc-common.c b/drivers/media/platform/vimc/vimc-common.c index 743554de724d..a9ab3871ccda 100644 --- a/drivers/media/platform/vimc/vimc-common.c +++ b/drivers/media/platform/vimc/vimc-common.c @@ -241,6 +241,8 @@ int vimc_pipeline_s_stream(struct media_entity *ent, int enable) /* Start the stream in the subdevice direct connected */ pad = media_entity_remote_pad(&ent->pads[i]); + if (!pad) + continue; if (!is_media_entity_v4l2_subdev(pad->entity)) return -EINVAL; diff --git a/drivers/media/platform/vimc/vimc-core.c b/drivers/media/platform/vimc/vimc-core.c index 57e5d6a020b0..447a01ff4e23 100644 --- a/drivers/media/platform/vimc/vimc-core.c +++ b/drivers/media/platform/vimc/vimc-core.c @@ -243,10 +243,7 @@ static void vimc_comp_unbind(struct device *master) static int vimc_comp_compare(struct device *comp, void *data) { - const struct platform_device *pdev = to_platform_device(comp); - const char *name = data; - - return !strcmp(pdev->dev.platform_data, name); + return comp == data; } static struct component_match *vimc_add_subdevs(struct vimc_device *vimc) @@ -275,7 +272,7 @@ static struct component_match *vimc_add_subdevs(struct vimc_device *vimc) } component_match_add(&vimc->pdev.dev, &match, vimc_comp_compare, - (void *)vimc->pipe_cfg->ents[i].name); + &vimc->subdevs[i]->dev); } return match; diff --git a/drivers/media/platform/vimc/vimc-sensor.c b/drivers/media/platform/vimc/vimc-sensor.c index 70cee5c0c89a..29a16f8a4123 100644 --- a/drivers/media/platform/vimc/vimc-sensor.c +++ b/drivers/media/platform/vimc/vimc-sensor.c @@ -200,13 +200,6 @@ static void *vimc_sen_process_frame(struct vimc_ent_device *ved, { struct vimc_sen_device *vsen = container_of(ved, struct vimc_sen_device, ved); - const struct vimc_pix_map *vpix; - unsigned int frame_size; - - /* Calculate the frame size */ - vpix = vimc_pix_map_by_code(vsen->mbus_format.code); - frame_size = vsen->mbus_format.width * vpix->bpp * - vsen->mbus_format.height; tpg_fill_plane_buffer(&vsen->tpg, 0, 0, vsen->frame); return vsen->frame; diff --git a/drivers/media/platform/vivid/vivid-kthread-cap.c b/drivers/media/platform/vivid/vivid-kthread-cap.c index d300e5e7eadc..2ca9c928ed2f 100644 --- a/drivers/media/platform/vivid/vivid-kthread-cap.c +++ b/drivers/media/platform/vivid/vivid-kthread-cap.c @@ -777,7 +777,11 @@ static int vivid_thread_vid_cap(void *data) if (kthread_should_stop()) break; - mutex_lock(&dev->mutex); + if (!mutex_trylock(&dev->mutex)) { + schedule_timeout_uninterruptible(1); + continue; + } + cur_jiffies = jiffies; if (dev->cap_seq_resync) { dev->jiffies_vid_cap = cur_jiffies; @@ -930,8 +934,6 @@ void vivid_stop_generating_vid_cap(struct vivid_dev *dev, bool *pstreaming) /* shutdown control thread */ vivid_grab_controls(dev, false); - mutex_unlock(&dev->mutex); kthread_stop(dev->kthread_vid_cap); dev->kthread_vid_cap = NULL; - mutex_lock(&dev->mutex); } diff --git a/drivers/media/platform/vivid/vivid-kthread-out.c b/drivers/media/platform/vivid/vivid-kthread-out.c index 7c8d75852816..ed5d8fb854b4 100644 --- a/drivers/media/platform/vivid/vivid-kthread-out.c +++ b/drivers/media/platform/vivid/vivid-kthread-out.c @@ -147,7 +147,11 @@ static int vivid_thread_vid_out(void *data) if (kthread_should_stop()) break; - mutex_lock(&dev->mutex); + if (!mutex_trylock(&dev->mutex)) { + schedule_timeout_uninterruptible(1); + continue; + } + cur_jiffies = jiffies; if (dev->out_seq_resync) { dev->jiffies_vid_out = cur_jiffies; @@ -301,8 +305,6 @@ void vivid_stop_generating_vid_out(struct vivid_dev *dev, bool *pstreaming) /* shutdown control thread */ vivid_grab_controls(dev, false); - mutex_unlock(&dev->mutex); kthread_stop(dev->kthread_vid_out); dev->kthread_vid_out = NULL; - mutex_lock(&dev->mutex); } diff --git a/drivers/media/platform/vivid/vivid-osd.c b/drivers/media/platform/vivid/vivid-osd.c index bdc380b14e0c..a95b7c56569e 100644 --- a/drivers/media/platform/vivid/vivid-osd.c +++ b/drivers/media/platform/vivid/vivid-osd.c @@ -167,7 +167,7 @@ static int _vivid_fb_check_var(struct fb_var_screeninfo *var, struct vivid_dev * var->nonstd = 0; var->vmode &= ~FB_VMODE_MASK; - var->vmode = FB_VMODE_NONINTERLACED; + var->vmode |= FB_VMODE_NONINTERLACED; /* Dummy values */ var->hsync_len = 24; diff --git a/drivers/media/platform/vivid/vivid-sdr-cap.c b/drivers/media/platform/vivid/vivid-sdr-cap.c index ebd7b9c4dd83..4f49c9a47d49 100644 --- a/drivers/media/platform/vivid/vivid-sdr-cap.c +++ b/drivers/media/platform/vivid/vivid-sdr-cap.c @@ -149,7 +149,11 @@ static int vivid_thread_sdr_cap(void *data) if (kthread_should_stop()) break; - mutex_lock(&dev->mutex); + if (!mutex_trylock(&dev->mutex)) { + schedule_timeout_uninterruptible(1); + continue; + } + cur_jiffies = jiffies; if (dev->sdr_cap_seq_resync) { dev->jiffies_sdr_cap = cur_jiffies; @@ -309,10 +313,8 @@ static void sdr_cap_stop_streaming(struct vb2_queue *vq) } /* shutdown control thread */ - mutex_unlock(&dev->mutex); kthread_stop(dev->kthread_sdr_cap); dev->kthread_sdr_cap = NULL; - mutex_lock(&dev->mutex); } const struct vb2_ops vivid_sdr_cap_qops = { diff --git a/drivers/media/platform/vivid/vivid-vid-cap.c b/drivers/media/platform/vivid/vivid-vid-cap.c index 4ca3d600aa84..c66568e8f388 100644 --- a/drivers/media/platform/vivid/vivid-vid-cap.c +++ b/drivers/media/platform/vivid/vivid-vid-cap.c @@ -239,9 +239,6 @@ static int vid_cap_start_streaming(struct vb2_queue *vq, unsigned count) if (vb2_is_streaming(&dev->vb_vid_out_q)) dev->can_loop_video = vivid_vid_can_loop(dev); - if (dev->kthread_vid_cap) - return 0; - dev->vid_cap_seq_count = 0; dprintk(dev, 1, "%s\n", __func__); for (i = 0; i < VIDEO_MAX_FRAME; i++) diff --git a/drivers/media/platform/vivid/vivid-vid-out.c b/drivers/media/platform/vivid/vivid-vid-out.c index 0b1b6218ede8..3e7a26d15074 100644 --- a/drivers/media/platform/vivid/vivid-vid-out.c +++ b/drivers/media/platform/vivid/vivid-vid-out.c @@ -158,9 +158,6 @@ static int vid_out_start_streaming(struct vb2_queue *vq, unsigned count) if (vb2_is_streaming(&dev->vb_vid_cap_q)) dev->can_loop_video = vivid_vid_can_loop(dev); - if (dev->kthread_vid_out) - return 0; - dev->vid_out_seq_count = 0; dprintk(dev, 1, "%s\n", __func__); if (dev->start_streaming_error) { diff --git a/drivers/media/radio/radio-wl1273.c b/drivers/media/radio/radio-wl1273.c index 903fcd5e99c0..fc1ae86911b9 100644 --- a/drivers/media/radio/radio-wl1273.c +++ b/drivers/media/radio/radio-wl1273.c @@ -1156,8 +1156,7 @@ static int wl1273_fm_fops_release(struct file *file) if (radio->rds_users > 0) { radio->rds_users--; if (radio->rds_users == 0) { - if (mutex_lock_interruptible(&core->lock)) - return -EINTR; + mutex_lock(&core->lock); radio->irq_flags &= ~WL1273_RDS_EVENT; diff --git a/drivers/media/radio/si470x/radio-si470x-i2c.c b/drivers/media/radio/si470x/radio-si470x-i2c.c index 8ce6f9cff746..527535614342 100644 --- a/drivers/media/radio/si470x/radio-si470x-i2c.c +++ b/drivers/media/radio/si470x/radio-si470x-i2c.c @@ -453,8 +453,10 @@ static int si470x_i2c_remove(struct i2c_client *client) free_irq(client->irq, radio); video_unregister_device(&radio->videodev); - kfree(radio); + v4l2_ctrl_handler_free(&radio->hdl); + v4l2_device_unregister(&radio->v4l2_dev); + kfree(radio); return 0; } diff --git a/drivers/media/radio/wl128x/fmdrv_common.c b/drivers/media/radio/wl128x/fmdrv_common.c index 26895ae42fcf..2d20d908e280 100644 --- a/drivers/media/radio/wl128x/fmdrv_common.c +++ b/drivers/media/radio/wl128x/fmdrv_common.c @@ -1271,8 +1271,9 @@ static int fm_download_firmware(struct fmdev *fmdev, const u8 *fw_name) switch (action->type) { case ACTION_SEND_COMMAND: /* Send */ - if (fmc_send_cmd(fmdev, 0, 0, action->data, - action->size, NULL, NULL)) + ret = fmc_send_cmd(fmdev, 0, 0, action->data, + action->size, NULL, NULL); + if (ret) goto rel_fw; cmd_cnt++; diff --git a/drivers/media/rc/iguanair.c b/drivers/media/rc/iguanair.c index 3c2e248ceca8..03dbbfba71fc 100644 --- a/drivers/media/rc/iguanair.c +++ b/drivers/media/rc/iguanair.c @@ -427,7 +427,7 @@ static int iguanair_probe(struct usb_interface *intf, int ret, pipein, pipeout; struct usb_host_interface *idesc; - idesc = intf->altsetting; + idesc = intf->cur_altsetting; if (idesc->desc.bNumEndpoints < 2) return -ENODEV; diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c index a7547c88e4c3..edf8a7a76e86 100644 --- a/drivers/media/rc/imon.c +++ b/drivers/media/rc/imon.c @@ -1737,8 +1737,7 @@ static void imon_incoming_scancode(struct imon_context *ictx, spin_unlock_irqrestore(&ictx->kc_lock, flags); /* send touchscreen events through input subsystem if touchpad data */ - if (ictx->display_type == IMON_DISPLAY_TYPE_VGA && len == 8 && - buf[7] == 0x86) { + if (ictx->touch && len == 8 && buf[7] == 0x86) { imon_touch_event(ictx, buf); return; diff --git a/drivers/media/rc/ir-rc6-decoder.c b/drivers/media/rc/ir-rc6-decoder.c index 5d0d2fe3b7a7..90f7930444a1 100644 --- a/drivers/media/rc/ir-rc6-decoder.c +++ b/drivers/media/rc/ir-rc6-decoder.c @@ -40,6 +40,7 @@ #define RC6_6A_MCE_TOGGLE_MASK 0x8000 /* for the body bits */ #define RC6_6A_LCC_MASK 0xffff0000 /* RC6-6A-32 long customer code mask */ #define RC6_6A_MCE_CC 0x800f0000 /* MCE customer code */ +#define RC6_6A_KATHREIN_CC 0x80460000 /* Kathrein RCU-676 customer code */ #ifndef CHAR_BIT #define CHAR_BIT 8 /* Normally in */ #endif @@ -252,13 +253,17 @@ again: toggle = 0; break; case 32: - if ((scancode & RC6_6A_LCC_MASK) == RC6_6A_MCE_CC) { + switch (scancode & RC6_6A_LCC_MASK) { + case RC6_6A_MCE_CC: + case RC6_6A_KATHREIN_CC: protocol = RC_PROTO_RC6_MCE; toggle = !!(scancode & RC6_6A_MCE_TOGGLE_MASK); scancode &= ~RC6_6A_MCE_TOGGLE_MASK; - } else { + break; + default: protocol = RC_PROTO_RC6_6A_32; toggle = 0; + break; } break; default: diff --git a/drivers/media/usb/au0828/au0828-core.c b/drivers/media/usb/au0828/au0828-core.c index e3f63299f85c..07e3322bb182 100644 --- a/drivers/media/usb/au0828/au0828-core.c +++ b/drivers/media/usb/au0828/au0828-core.c @@ -632,7 +632,7 @@ static int au0828_usb_probe(struct usb_interface *interface, /* Analog TV */ retval = au0828_analog_register(dev, interface); if (retval) { - pr_err("%s() au0282_dev_register failed to register on V4L2\n", + pr_err("%s() au0828_analog_register failed to register on V4L2\n", __func__); mutex_unlock(&dev->lock); goto done; @@ -641,7 +641,7 @@ static int au0828_usb_probe(struct usb_interface *interface, /* Digital TV */ retval = au0828_dvb_register(dev); if (retval) - pr_err("%s() au0282_dev_register failed\n", + pr_err("%s() au0828_dvb_register failed\n", __func__); /* Remote controller */ diff --git a/drivers/media/usb/b2c2/flexcop-usb.c b/drivers/media/usb/b2c2/flexcop-usb.c index a8f3169e30b3..427cda457af6 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.c +++ b/drivers/media/usb/b2c2/flexcop-usb.c @@ -294,7 +294,7 @@ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c, mutex_unlock(&fc_usb->data_mutex); - return 0; + return ret; } /* actual bus specific access functions, @@ -503,7 +503,13 @@ urb_error: static int flexcop_usb_init(struct flexcop_usb *fc_usb) { /* use the alternate setting with the larges buffer */ - usb_set_interface(fc_usb->udev,0,1); + int ret = usb_set_interface(fc_usb->udev, 0, 1); + + if (ret) { + err("set interface failed."); + return ret; + } + switch (fc_usb->udev->speed) { case USB_SPEED_LOW: err("cannot handle USB speed because it is too slow."); @@ -537,6 +543,9 @@ static int flexcop_usb_probe(struct usb_interface *intf, struct flexcop_device *fc = NULL; int ret; + if (intf->cur_altsetting->desc.bNumEndpoints < 1) + return -ENODEV; + if ((fc = flexcop_device_kmalloc(sizeof(struct flexcop_usb))) == NULL) { err("out of memory\n"); return -ENOMEM; diff --git a/drivers/media/usb/cx231xx/cx231xx-video.c b/drivers/media/usb/cx231xx/cx231xx-video.c index 179b8481a870..fd33c2e9327d 100644 --- a/drivers/media/usb/cx231xx/cx231xx-video.c +++ b/drivers/media/usb/cx231xx/cx231xx-video.c @@ -1389,7 +1389,7 @@ int cx231xx_g_register(struct file *file, void *priv, ret = cx231xx_read_ctrl_reg(dev, VRT_GET_REGISTER, (u16)reg->reg, value, 4); reg->val = value[0] | value[1] << 8 | - value[2] << 16 | value[3] << 24; + value[2] << 16 | (u32)value[3] << 24; reg->size = 4; break; case 1: /* AFE - read byte */ diff --git a/drivers/media/usb/dvb-usb/af9005.c b/drivers/media/usb/dvb-usb/af9005.c index 986763b1b2b3..66990a193bc5 100644 --- a/drivers/media/usb/dvb-usb/af9005.c +++ b/drivers/media/usb/dvb-usb/af9005.c @@ -563,7 +563,7 @@ static int af9005_boot_packet(struct usb_device *udev, int type, u8 *reply, u8 *buf, int size) { u16 checksum; - int act_len, i, ret; + int act_len = 0, i, ret; memset(buf, 0, size); buf[0] = (u8) (FW_BULKOUT_SIZE & 0xff); @@ -985,8 +985,9 @@ static int af9005_identify_state(struct usb_device *udev, else if (reply == 0x02) *cold = 0; else - return -EIO; - deb_info("Identify state cold = %d\n", *cold); + ret = -EIO; + if (!ret) + deb_info("Identify state cold = %d\n", *cold); err: kfree(buf); diff --git a/drivers/media/usb/dvb-usb/cxusb.c b/drivers/media/usb/dvb-usb/cxusb.c index cfe86b4864b3..47a9a791ee7d 100644 --- a/drivers/media/usb/dvb-usb/cxusb.c +++ b/drivers/media/usb/dvb-usb/cxusb.c @@ -455,7 +455,8 @@ static int cxusb_rc_query(struct dvb_usb_device *d) { u8 ircode[4]; - cxusb_ctrl_msg(d, CMD_GET_IR_CODE, NULL, 0, ircode, 4); + if (cxusb_ctrl_msg(d, CMD_GET_IR_CODE, NULL, 0, ircode, 4) < 0) + return 0; if (ircode[2] || ircode[3]) rc_keydown(d->rc_dev, RC_PROTO_NEC, diff --git a/drivers/media/usb/dvb-usb/digitv.c b/drivers/media/usb/dvb-usb/digitv.c index 475a3c0cdee7..20d33f0544ed 100644 --- a/drivers/media/usb/dvb-usb/digitv.c +++ b/drivers/media/usb/dvb-usb/digitv.c @@ -233,18 +233,22 @@ static struct rc_map_table rc_map_digitv_table[] = { static int digitv_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { - int i; + int ret, i; u8 key[5]; u8 b[4] = { 0 }; *event = 0; *state = REMOTE_NO_KEY_PRESSED; - digitv_ctrl_msg(d,USB_READ_REMOTE,0,NULL,0,&key[1],4); + ret = digitv_ctrl_msg(d, USB_READ_REMOTE, 0, NULL, 0, &key[1], 4); + if (ret) + return ret; /* Tell the device we've read the remote. Not sure how necessary this is, but the Nebula SDK does it. */ - digitv_ctrl_msg(d,USB_WRITE_REMOTE,0,b,4,NULL,0); + ret = digitv_ctrl_msg(d, USB_WRITE_REMOTE, 0, b, 4, NULL, 0); + if (ret) + return ret; /* if something is inside the buffer, simulate key press */ if (key[1] != 0) diff --git a/drivers/media/usb/dvb-usb/dvb-usb-urb.c b/drivers/media/usb/dvb-usb/dvb-usb-urb.c index c1b4e94a37f8..2aabf90d8697 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb-urb.c +++ b/drivers/media/usb/dvb-usb/dvb-usb-urb.c @@ -12,7 +12,7 @@ int dvb_usb_generic_rw(struct dvb_usb_device *d, u8 *wbuf, u16 wlen, u8 *rbuf, u16 rlen, int delay_ms) { - int actlen,ret = -ENOMEM; + int actlen = 0, ret = -ENOMEM; if (!d || wbuf == NULL || wlen == 0) return -EINVAL; diff --git a/drivers/media/usb/gspca/gspca.c b/drivers/media/usb/gspca/gspca.c index 0f141762abf1..87582be4a39d 100644 --- a/drivers/media/usb/gspca/gspca.c +++ b/drivers/media/usb/gspca/gspca.c @@ -2038,7 +2038,7 @@ int gspca_dev_probe2(struct usb_interface *intf, pr_err("couldn't kzalloc gspca struct\n"); return -ENOMEM; } - gspca_dev->usb_buf = kmalloc(USB_BUF_SZ, GFP_KERNEL); + gspca_dev->usb_buf = kzalloc(USB_BUF_SZ, GFP_KERNEL); if (!gspca_dev->usb_buf) { pr_err("out of memory\n"); ret = -ENOMEM; diff --git a/drivers/media/usb/pulse8-cec/pulse8-cec.c b/drivers/media/usb/pulse8-cec/pulse8-cec.c index 50146f263d90..f1615fb60015 100644 --- a/drivers/media/usb/pulse8-cec/pulse8-cec.c +++ b/drivers/media/usb/pulse8-cec/pulse8-cec.c @@ -121,6 +121,7 @@ struct pulse8 { unsigned int vers; struct completion cmd_done; struct work_struct work; + u8 work_result; struct delayed_work ping_eeprom_work; struct cec_msg rx_msg; u8 data[DATA_SIZE]; @@ -142,8 +143,10 @@ static void pulse8_irq_work_handler(struct work_struct *work) { struct pulse8 *pulse8 = container_of(work, struct pulse8, work); + u8 result = pulse8->work_result; - switch (pulse8->data[0] & 0x3f) { + pulse8->work_result = 0; + switch (result & 0x3f) { case MSGCODE_FRAME_DATA: cec_received_msg(pulse8->adap, &pulse8->rx_msg); break; @@ -177,12 +180,12 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data, pulse8->escape = false; } else if (data == MSGEND) { struct cec_msg *msg = &pulse8->rx_msg; + u8 msgcode = pulse8->buf[0]; if (debug) dev_info(pulse8->dev, "received: %*ph\n", pulse8->idx, pulse8->buf); - pulse8->data[0] = pulse8->buf[0]; - switch (pulse8->buf[0] & 0x3f) { + switch (msgcode & 0x3f) { case MSGCODE_FRAME_START: msg->len = 1; msg->msg[0] = pulse8->buf[1]; @@ -191,14 +194,20 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data, if (msg->len == CEC_MAX_MSG_SIZE) break; msg->msg[msg->len++] = pulse8->buf[1]; - if (pulse8->buf[0] & MSGCODE_FRAME_EOM) + if (msgcode & MSGCODE_FRAME_EOM) { + WARN_ON(pulse8->work_result); + pulse8->work_result = msgcode; schedule_work(&pulse8->work); + break; + } break; case MSGCODE_TRANSMIT_SUCCEEDED: case MSGCODE_TRANSMIT_FAILED_LINE: case MSGCODE_TRANSMIT_FAILED_ACK: case MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA: case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE: + WARN_ON(pulse8->work_result); + pulse8->work_result = msgcode; schedule_work(&pulse8->work); break; case MSGCODE_HIGH_ERROR: @@ -585,7 +594,7 @@ unlock: else pulse8->config_pending = true; mutex_unlock(&pulse8->config_lock); - return err; + return log_addr == CEC_LOG_ADDR_INVALID ? 0 : err; } static int pulse8_cec_adap_transmit(struct cec_adapter *adap, u8 attempts, diff --git a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c index 4320bda9352d..e0413db26781 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-v4l2.c @@ -915,8 +915,12 @@ static void pvr2_v4l2_internal_check(struct pvr2_channel *chp) pvr2_v4l2_dev_disassociate_parent(vp->dev_video); pvr2_v4l2_dev_disassociate_parent(vp->dev_radio); if (!list_empty(&vp->dev_video->devbase.fh_list) || - !list_empty(&vp->dev_radio->devbase.fh_list)) + (vp->dev_radio && + !list_empty(&vp->dev_radio->devbase.fh_list))) { + pvr2_trace(PVR2_TRACE_STRUCT, + "pvr2_v4l2 internal_check exit-empty id=%p", vp); return; + } pvr2_v4l2_destroy_no_lock(vp); } @@ -990,7 +994,8 @@ static int pvr2_v4l2_release(struct file *file) kfree(fhp); if (vp->channel.mc_head->disconnect_flag && list_empty(&vp->dev_video->devbase.fh_list) && - list_empty(&vp->dev_radio->devbase.fh_list)) { + (!vp->dev_radio || + list_empty(&vp->dev_radio->devbase.fh_list))) { pvr2_v4l2_destroy_no_lock(vp); } return 0; diff --git a/drivers/media/usb/stkwebcam/stk-webcam.c b/drivers/media/usb/stkwebcam/stk-webcam.c index a7da1356a36e..6992e84f8a8b 100644 --- a/drivers/media/usb/stkwebcam/stk-webcam.c +++ b/drivers/media/usb/stkwebcam/stk-webcam.c @@ -164,7 +164,11 @@ int stk_camera_read_reg(struct stk_camera *dev, u16 index, u8 *value) *value = *buf; kfree(buf); - return ret; + + if (ret < 0) + return ret; + else + return 0; } static int stk_start_stream(struct stk_camera *dev) diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c index 960272d3c924..4c39c502d616 100644 --- a/drivers/media/usb/usbvision/usbvision-video.c +++ b/drivers/media/usb/usbvision/usbvision-video.c @@ -328,6 +328,10 @@ static int usbvision_v4l2_open(struct file *file) if (mutex_lock_interruptible(&usbvision->v4l2_lock)) return -ERESTARTSYS; + if (usbvision->remove_pending) { + err_code = -ENODEV; + goto unlock; + } if (usbvision->user) { err_code = -EBUSY; } else { @@ -391,6 +395,7 @@ unlock: static int usbvision_v4l2_close(struct file *file) { struct usb_usbvision *usbvision = video_drvdata(file); + int r; PDEBUG(DBG_IO, "close"); @@ -405,9 +410,10 @@ static int usbvision_v4l2_close(struct file *file) usbvision_scratch_free(usbvision); usbvision->user--; + r = usbvision->remove_pending; mutex_unlock(&usbvision->v4l2_lock); - if (usbvision->remove_pending) { + if (r) { printk(KERN_INFO "%s: Final disconnect\n", __func__); usbvision_release(usbvision); return 0; @@ -1091,6 +1097,11 @@ static int usbvision_radio_open(struct file *file) if (mutex_lock_interruptible(&usbvision->v4l2_lock)) return -ERESTARTSYS; + + if (usbvision->remove_pending) { + err_code = -ENODEV; + goto out; + } err_code = v4l2_fh_open(file); if (err_code) goto out; @@ -1123,6 +1134,7 @@ out: static int usbvision_radio_close(struct file *file) { struct usb_usbvision *usbvision = video_drvdata(file); + int r; PDEBUG(DBG_IO, ""); @@ -1135,9 +1147,10 @@ static int usbvision_radio_close(struct file *file) usbvision_audio_off(usbvision); usbvision->radio = 0; usbvision->user--; + r = usbvision->remove_pending; mutex_unlock(&usbvision->v4l2_lock); - if (usbvision->remove_pending) { + if (r) { printk(KERN_INFO "%s: Final disconnect\n", __func__); v4l2_fh_release(file); usbvision_release(usbvision); @@ -1562,6 +1575,7 @@ err_usb: static void usbvision_disconnect(struct usb_interface *intf) { struct usb_usbvision *usbvision = to_usbvision(usb_get_intfdata(intf)); + int u; PDEBUG(DBG_PROBE, ""); @@ -1578,13 +1592,14 @@ static void usbvision_disconnect(struct usb_interface *intf) v4l2_device_disconnect(&usbvision->v4l2_dev); usbvision_i2c_unregister(usbvision); usbvision->remove_pending = 1; /* Now all ISO data will be ignored */ + u = usbvision->user; usb_put_dev(usbvision->dev); usbvision->dev = NULL; /* USB device is no more */ mutex_unlock(&usbvision->v4l2_lock); - if (usbvision->user) { + if (u) { printk(KERN_INFO "%s: In use, disconnect pending\n", __func__); wake_up_interruptible(&usbvision->wait_frame); diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index 6432136c2cb2..591ca125bd96 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1446,6 +1446,11 @@ static int uvc_scan_chain_forward(struct uvc_video_chain *chain, break; if (forward == prev) continue; + if (forward->chain.next || forward->chain.prev) { + uvc_trace(UVC_TRACE_DESCR, "Found reference to " + "entity %d already in chain.\n", forward->id); + return -EINVAL; + } switch (UVC_ENTITY_TYPE(forward)) { case UVC_VC_EXTENSION_UNIT: @@ -1527,6 +1532,13 @@ static int uvc_scan_chain_backward(struct uvc_video_chain *chain, return -1; } + if (term->chain.next || term->chain.prev) { + uvc_trace(UVC_TRACE_DESCR, "Found reference to " + "entity %d already in chain.\n", + term->id); + return -EINVAL; + } + if (uvc_trace_param & UVC_TRACE_PROBE) printk(KERN_CONT " %d", term->id); @@ -2059,6 +2071,20 @@ static int uvc_probe(struct usb_interface *intf, sizeof(dev->name) - len); } + /* Initialize the media device. */ +#ifdef CONFIG_MEDIA_CONTROLLER + dev->mdev.dev = &intf->dev; + strscpy(dev->mdev.model, dev->name, sizeof(dev->mdev.model)); + if (udev->serial) + strscpy(dev->mdev.serial, udev->serial, + sizeof(dev->mdev.serial)); + usb_make_path(udev, dev->mdev.bus_info, sizeof(dev->mdev.bus_info)); + dev->mdev.hw_revision = le16_to_cpu(udev->descriptor.bcdDevice); + media_device_init(&dev->mdev); + + dev->vdev.mdev = &dev->mdev; +#endif + /* Parse the Video Class control descriptor. */ if (uvc_parse_control(dev) < 0) { uvc_trace(UVC_TRACE_PROBE, "Unable to parse UVC " @@ -2079,19 +2105,7 @@ static int uvc_probe(struct usb_interface *intf, "linux-uvc-devel mailing list.\n"); } - /* Initialize the media device and register the V4L2 device. */ -#ifdef CONFIG_MEDIA_CONTROLLER - dev->mdev.dev = &intf->dev; - strlcpy(dev->mdev.model, dev->name, sizeof(dev->mdev.model)); - if (udev->serial) - strlcpy(dev->mdev.serial, udev->serial, - sizeof(dev->mdev.serial)); - strcpy(dev->mdev.bus_info, udev->devpath); - dev->mdev.hw_revision = le16_to_cpu(udev->descriptor.bcdDevice); - media_device_init(&dev->mdev); - - dev->vdev.mdev = &dev->mdev; -#endif + /* Register the V4L2 device. */ if (v4l2_device_register(&intf->dev, &dev->vdev) < 0) goto error; diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c index 4ff8d0aed015..d30f129a9db7 100644 --- a/drivers/media/usb/zr364xx/zr364xx.c +++ b/drivers/media/usb/zr364xx/zr364xx.c @@ -706,7 +706,8 @@ static int zr364xx_vidioc_querycap(struct file *file, void *priv, struct zr364xx_camera *cam = video_drvdata(file); strlcpy(cap->driver, DRIVER_DESC, sizeof(cap->driver)); - strlcpy(cap->card, cam->udev->product, sizeof(cap->card)); + if (cam->udev->product) + strlcpy(cap->card, cam->udev->product, sizeof(cap->card)); strlcpy(cap->bus_info, dev_name(&cam->udev->dev), sizeof(cap->bus_info)); cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index 51149aac4340..3b0003f2b091 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -1108,6 +1108,7 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, case V4L2_CID_FLASH_STROBE_STOP: case V4L2_CID_AUTO_FOCUS_START: case V4L2_CID_AUTO_FOCUS_STOP: + case V4L2_CID_DO_WHITE_BALANCE: *type = V4L2_CTRL_TYPE_BUTTON; *flags |= V4L2_CTRL_FLAG_WRITE_ONLY | V4L2_CTRL_FLAG_EXECUTE_ON_WRITE; diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 3b54d7a606ac..19881ac9beb2 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1410,10 +1410,26 @@ static int v4l_enum_fmt(const struct v4l2_ioctl_ops *ops, return ret; } +static void v4l_pix_format_touch(struct v4l2_pix_format *p) +{ + /* + * The v4l2_pix_format structure contains fields that make no sense for + * touch. Set them to default values in this case. + */ + + p->field = V4L2_FIELD_NONE; + p->colorspace = V4L2_COLORSPACE_RAW; + p->flags = 0; + p->ycbcr_enc = 0; + p->quantization = 0; + p->xfer_func = 0; +} + static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops, struct file *file, void *fh, void *arg) { struct v4l2_format *p = arg; + struct video_device *vfd = video_devdata(file); int ret = check_fmt(file, p->type); if (ret) @@ -1451,6 +1467,8 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops, ret = ops->vidioc_g_fmt_vid_cap(file, fh, arg); /* just in case the driver zeroed it again */ p->fmt.pix.priv = V4L2_PIX_FMT_PRIV_MAGIC; + if (vfd->vfl_type == VFL_TYPE_TOUCH) + v4l_pix_format_touch(&p->fmt.pix); return ret; case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: return ops->vidioc_g_fmt_vid_cap_mplane(file, fh, arg); @@ -1486,21 +1504,6 @@ static int v4l_g_fmt(const struct v4l2_ioctl_ops *ops, return -EINVAL; } -static void v4l_pix_format_touch(struct v4l2_pix_format *p) -{ - /* - * The v4l2_pix_format structure contains fields that make no sense for - * touch. Set them to default values in this case. - */ - - p->field = V4L2_FIELD_NONE; - p->colorspace = V4L2_COLORSPACE_RAW; - p->flags = 0; - p->ycbcr_enc = 0; - p->quantization = 0; - p->xfer_func = 0; -} - static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, struct file *file, void *fh, void *arg) { @@ -1540,12 +1543,12 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, case V4L2_BUF_TYPE_VBI_CAPTURE: if (unlikely(!ops->vidioc_s_fmt_vbi_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.vbi); + CLEAR_AFTER_FIELD(p, fmt.vbi.flags); return ops->vidioc_s_fmt_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: if (unlikely(!ops->vidioc_s_fmt_sliced_vbi_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.sliced); + CLEAR_AFTER_FIELD(p, fmt.sliced.io_size); return ops->vidioc_s_fmt_sliced_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT: if (unlikely(!ops->vidioc_s_fmt_vid_out)) @@ -1568,22 +1571,22 @@ static int v4l_s_fmt(const struct v4l2_ioctl_ops *ops, case V4L2_BUF_TYPE_VBI_OUTPUT: if (unlikely(!ops->vidioc_s_fmt_vbi_out)) break; - CLEAR_AFTER_FIELD(p, fmt.vbi); + CLEAR_AFTER_FIELD(p, fmt.vbi.flags); return ops->vidioc_s_fmt_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: if (unlikely(!ops->vidioc_s_fmt_sliced_vbi_out)) break; - CLEAR_AFTER_FIELD(p, fmt.sliced); + CLEAR_AFTER_FIELD(p, fmt.sliced.io_size); return ops->vidioc_s_fmt_sliced_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SDR_CAPTURE: if (unlikely(!ops->vidioc_s_fmt_sdr_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.sdr); + CLEAR_AFTER_FIELD(p, fmt.sdr.buffersize); return ops->vidioc_s_fmt_sdr_cap(file, fh, arg); case V4L2_BUF_TYPE_SDR_OUTPUT: if (unlikely(!ops->vidioc_s_fmt_sdr_out)) break; - CLEAR_AFTER_FIELD(p, fmt.sdr); + CLEAR_AFTER_FIELD(p, fmt.sdr.buffersize); return ops->vidioc_s_fmt_sdr_out(file, fh, arg); case V4L2_BUF_TYPE_META_CAPTURE: if (unlikely(!ops->vidioc_s_fmt_meta_cap)) @@ -1627,12 +1630,12 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops, case V4L2_BUF_TYPE_VBI_CAPTURE: if (unlikely(!ops->vidioc_try_fmt_vbi_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.vbi); + CLEAR_AFTER_FIELD(p, fmt.vbi.flags); return ops->vidioc_try_fmt_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: if (unlikely(!ops->vidioc_try_fmt_sliced_vbi_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.sliced); + CLEAR_AFTER_FIELD(p, fmt.sliced.io_size); return ops->vidioc_try_fmt_sliced_vbi_cap(file, fh, arg); case V4L2_BUF_TYPE_VIDEO_OUTPUT: if (unlikely(!ops->vidioc_try_fmt_vid_out)) @@ -1655,22 +1658,22 @@ static int v4l_try_fmt(const struct v4l2_ioctl_ops *ops, case V4L2_BUF_TYPE_VBI_OUTPUT: if (unlikely(!ops->vidioc_try_fmt_vbi_out)) break; - CLEAR_AFTER_FIELD(p, fmt.vbi); + CLEAR_AFTER_FIELD(p, fmt.vbi.flags); return ops->vidioc_try_fmt_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SLICED_VBI_OUTPUT: if (unlikely(!ops->vidioc_try_fmt_sliced_vbi_out)) break; - CLEAR_AFTER_FIELD(p, fmt.sliced); + CLEAR_AFTER_FIELD(p, fmt.sliced.io_size); return ops->vidioc_try_fmt_sliced_vbi_out(file, fh, arg); case V4L2_BUF_TYPE_SDR_CAPTURE: if (unlikely(!ops->vidioc_try_fmt_sdr_cap)) break; - CLEAR_AFTER_FIELD(p, fmt.sdr); + CLEAR_AFTER_FIELD(p, fmt.sdr.buffersize); return ops->vidioc_try_fmt_sdr_cap(file, fh, arg); case V4L2_BUF_TYPE_SDR_OUTPUT: if (unlikely(!ops->vidioc_try_fmt_sdr_out)) break; - CLEAR_AFTER_FIELD(p, fmt.sdr); + CLEAR_AFTER_FIELD(p, fmt.sdr.buffersize); return ops->vidioc_try_fmt_sdr_out(file, fh, arg); case V4L2_BUF_TYPE_META_CAPTURE: if (unlikely(!ops->vidioc_try_fmt_meta_cap)) diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index f412429cf5ba..c55e607f5631 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -352,8 +352,11 @@ int videobuf_dma_free(struct videobuf_dmabuf *dma) BUG_ON(dma->sglen); if (dma->pages) { - for (i = 0; i < dma->nr_pages; i++) + for (i = 0; i < dma->nr_pages; i++) { + if (dma->direction == DMA_FROM_DEVICE) + set_page_dirty_lock(dma->pages[i]); put_page(dma->pages[i]); + } kfree(dma->pages); dma->pages = NULL; } diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c index 48db922075e2..08fa6400d255 100644 --- a/drivers/memstick/host/jmb38x_ms.c +++ b/drivers/memstick/host/jmb38x_ms.c @@ -947,7 +947,7 @@ static int jmb38x_ms_probe(struct pci_dev *pdev, if (!cnt) { rc = -ENODEV; pci_dev_busy = 1; - goto err_out; + goto err_out_int; } jm = kzalloc(sizeof(struct jmb38x_ms) diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c index cf6ce9f600ca..f9b2e652c399 100644 --- a/drivers/message/fusion/mptctl.c +++ b/drivers/message/fusion/mptctl.c @@ -100,19 +100,19 @@ struct buflist { * Function prototypes. Called from OS entry point mptctl_ioctl. * arg contents specific to function. */ -static int mptctl_fw_download(unsigned long arg); -static int mptctl_getiocinfo(unsigned long arg, unsigned int cmd); -static int mptctl_gettargetinfo(unsigned long arg); -static int mptctl_readtest(unsigned long arg); -static int mptctl_mpt_command(unsigned long arg); -static int mptctl_eventquery(unsigned long arg); -static int mptctl_eventenable(unsigned long arg); -static int mptctl_eventreport(unsigned long arg); -static int mptctl_replace_fw(unsigned long arg); +static int mptctl_fw_download(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_getiocinfo(MPT_ADAPTER *iocp, unsigned long arg, unsigned int cmd); +static int mptctl_gettargetinfo(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_readtest(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_mpt_command(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_eventquery(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_eventenable(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_eventreport(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_replace_fw(MPT_ADAPTER *iocp, unsigned long arg); -static int mptctl_do_reset(unsigned long arg); -static int mptctl_hp_hostinfo(unsigned long arg, unsigned int cmd); -static int mptctl_hp_targetinfo(unsigned long arg); +static int mptctl_do_reset(MPT_ADAPTER *iocp, unsigned long arg); +static int mptctl_hp_hostinfo(MPT_ADAPTER *iocp, unsigned long arg, unsigned int cmd); +static int mptctl_hp_targetinfo(MPT_ADAPTER *iocp, unsigned long arg); static int mptctl_probe(struct pci_dev *, const struct pci_device_id *); static void mptctl_remove(struct pci_dev *); @@ -123,8 +123,8 @@ static long compat_mpctl_ioctl(struct file *f, unsigned cmd, unsigned long arg); /* * Private function calls. */ -static int mptctl_do_mpt_command(struct mpt_ioctl_command karg, void __user *mfPtr); -static int mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen); +static int mptctl_do_mpt_command(MPT_ADAPTER *iocp, struct mpt_ioctl_command karg, void __user *mfPtr); +static int mptctl_do_fw_download(MPT_ADAPTER *iocp, char __user *ufwbuf, size_t fwlen); static MptSge_t *kbuf_alloc_2_sgl(int bytes, u32 dir, int sge_offset, int *frags, struct buflist **blp, dma_addr_t *sglbuf_dma, MPT_ADAPTER *ioc); static void kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma, @@ -656,19 +656,19 @@ __mptctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) * by TM and FW reloads. */ if ((cmd & ~IOCSIZE_MASK) == (MPTIOCINFO & ~IOCSIZE_MASK)) { - return mptctl_getiocinfo(arg, _IOC_SIZE(cmd)); + return mptctl_getiocinfo(iocp, arg, _IOC_SIZE(cmd)); } else if (cmd == MPTTARGETINFO) { - return mptctl_gettargetinfo(arg); + return mptctl_gettargetinfo(iocp, arg); } else if (cmd == MPTTEST) { - return mptctl_readtest(arg); + return mptctl_readtest(iocp, arg); } else if (cmd == MPTEVENTQUERY) { - return mptctl_eventquery(arg); + return mptctl_eventquery(iocp, arg); } else if (cmd == MPTEVENTENABLE) { - return mptctl_eventenable(arg); + return mptctl_eventenable(iocp, arg); } else if (cmd == MPTEVENTREPORT) { - return mptctl_eventreport(arg); + return mptctl_eventreport(iocp, arg); } else if (cmd == MPTFWREPLACE) { - return mptctl_replace_fw(arg); + return mptctl_replace_fw(iocp, arg); } /* All of these commands require an interrupt or @@ -678,15 +678,15 @@ __mptctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return ret; if (cmd == MPTFWDOWNLOAD) - ret = mptctl_fw_download(arg); + ret = mptctl_fw_download(iocp, arg); else if (cmd == MPTCOMMAND) - ret = mptctl_mpt_command(arg); + ret = mptctl_mpt_command(iocp, arg); else if (cmd == MPTHARDRESET) - ret = mptctl_do_reset(arg); + ret = mptctl_do_reset(iocp, arg); else if ((cmd & ~IOCSIZE_MASK) == (HP_GETHOSTINFO & ~IOCSIZE_MASK)) - ret = mptctl_hp_hostinfo(arg, _IOC_SIZE(cmd)); + ret = mptctl_hp_hostinfo(iocp, arg, _IOC_SIZE(cmd)); else if (cmd == HP_GETTARGETINFO) - ret = mptctl_hp_targetinfo(arg); + ret = mptctl_hp_targetinfo(iocp, arg); else ret = -EINVAL; @@ -705,11 +705,10 @@ mptctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return ret; } -static int mptctl_do_reset(unsigned long arg) +static int mptctl_do_reset(MPT_ADAPTER *iocp, unsigned long arg) { struct mpt_ioctl_diag_reset __user *urinfo = (void __user *) arg; struct mpt_ioctl_diag_reset krinfo; - MPT_ADAPTER *iocp; if (copy_from_user(&krinfo, urinfo, sizeof(struct mpt_ioctl_diag_reset))) { printk(KERN_ERR MYNAM "%s@%d::mptctl_do_reset - " @@ -718,12 +717,6 @@ static int mptctl_do_reset(unsigned long arg) return -EFAULT; } - if (mpt_verify_adapter(krinfo.hdr.iocnum, &iocp) < 0) { - printk(KERN_DEBUG MYNAM "%s@%d::mptctl_do_reset - ioc%d not found!\n", - __FILE__, __LINE__, krinfo.hdr.iocnum); - return -ENODEV; /* (-6) No such device or address */ - } - dctlprintk(iocp, printk(MYIOC_s_DEBUG_FMT "mptctl_do_reset called.\n", iocp->name)); @@ -754,7 +747,7 @@ static int mptctl_do_reset(unsigned long arg) * -ENOMSG if FW upload returned bad status */ static int -mptctl_fw_download(unsigned long arg) +mptctl_fw_download(MPT_ADAPTER *iocp, unsigned long arg) { struct mpt_fw_xfer __user *ufwdl = (void __user *) arg; struct mpt_fw_xfer kfwdl; @@ -766,7 +759,7 @@ mptctl_fw_download(unsigned long arg) return -EFAULT; } - return mptctl_do_fw_download(kfwdl.iocnum, kfwdl.bufp, kfwdl.fwlen); + return mptctl_do_fw_download(iocp, kfwdl.bufp, kfwdl.fwlen); } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -784,11 +777,10 @@ mptctl_fw_download(unsigned long arg) * -ENOMSG if FW upload returned bad status */ static int -mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen) +mptctl_do_fw_download(MPT_ADAPTER *iocp, char __user *ufwbuf, size_t fwlen) { FWDownload_t *dlmsg; MPT_FRAME_HDR *mf; - MPT_ADAPTER *iocp; FWDownloadTCSGE_t *ptsge; MptSge_t *sgl, *sgIn; char *sgOut; @@ -808,17 +800,10 @@ mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen) pFWDownloadReply_t ReplyMsg = NULL; unsigned long timeleft; - if (mpt_verify_adapter(ioc, &iocp) < 0) { - printk(KERN_DEBUG MYNAM "ioctl_fwdl - ioc%d not found!\n", - ioc); - return -ENODEV; /* (-6) No such device or address */ - } else { - - /* Valid device. Get a message frame and construct the FW download message. - */ - if ((mf = mpt_get_msg_frame(mptctl_id, iocp)) == NULL) - return -EAGAIN; - } + /* Valid device. Get a message frame and construct the FW download message. + */ + if ((mf = mpt_get_msg_frame(mptctl_id, iocp)) == NULL) + return -EAGAIN; dctlprintk(iocp, printk(MYIOC_s_DEBUG_FMT "mptctl_do_fwdl called. mptctl_id = %xh.\n", iocp->name, mptctl_id)); @@ -826,8 +811,6 @@ mptctl_do_fw_download(int ioc, char __user *ufwbuf, size_t fwlen) iocp->name, ufwbuf)); dctlprintk(iocp, printk(MYIOC_s_DEBUG_FMT "DbG: kfwdl.fwlen = %d\n", iocp->name, (int)fwlen)); - dctlprintk(iocp, printk(MYIOC_s_DEBUG_FMT "DbG: kfwdl.ioc = %04xh\n", - iocp->name, ioc)); dlmsg = (FWDownload_t*) mf; ptsge = (FWDownloadTCSGE_t *) &dlmsg->SGL; @@ -1238,13 +1221,11 @@ kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma, struct buflist *buflist, MPT_ADAPTE * -ENODEV if no such device/adapter */ static int -mptctl_getiocinfo (unsigned long arg, unsigned int data_size) +mptctl_getiocinfo (MPT_ADAPTER *ioc, unsigned long arg, unsigned int data_size) { struct mpt_ioctl_iocinfo __user *uarg = (void __user *) arg; struct mpt_ioctl_iocinfo *karg; - MPT_ADAPTER *ioc; struct pci_dev *pdev; - int iocnum; unsigned int port; int cim_rev; struct scsi_device *sdev; @@ -1272,14 +1253,6 @@ mptctl_getiocinfo (unsigned long arg, unsigned int data_size) return PTR_ERR(karg); } - if (((iocnum = mpt_verify_adapter(karg->hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_getiocinfo() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - kfree(karg); - return -ENODEV; - } - /* Verify the data transfer size is correct. */ if (karg->hdr.maxDataSize != data_size) { printk(MYIOC_s_ERR_FMT "%s@%d::mptctl_getiocinfo - " @@ -1385,15 +1358,13 @@ mptctl_getiocinfo (unsigned long arg, unsigned int data_size) * -ENODEV if no such device/adapter */ static int -mptctl_gettargetinfo (unsigned long arg) +mptctl_gettargetinfo (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_targetinfo __user *uarg = (void __user *) arg; struct mpt_ioctl_targetinfo karg; - MPT_ADAPTER *ioc; VirtDevice *vdevice; char *pmem; int *pdata; - int iocnum; int numDevices = 0; int lun; int maxWordsLeft; @@ -1408,13 +1379,6 @@ mptctl_gettargetinfo (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_gettargetinfo() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_gettargetinfo called.\n", ioc->name)); /* Get the port number and set the maximum number of bytes @@ -1510,12 +1474,10 @@ mptctl_gettargetinfo (unsigned long arg) * -ENODEV if no such device/adapter */ static int -mptctl_readtest (unsigned long arg) +mptctl_readtest (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_test __user *uarg = (void __user *) arg; struct mpt_ioctl_test karg; - MPT_ADAPTER *ioc; - int iocnum; if (copy_from_user(&karg, uarg, sizeof(struct mpt_ioctl_test))) { printk(KERN_ERR MYNAM "%s@%d::mptctl_readtest - " @@ -1524,13 +1486,6 @@ mptctl_readtest (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_readtest() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_readtest called.\n", ioc->name)); /* Fill in the data and return the structure to the calling @@ -1571,12 +1526,10 @@ mptctl_readtest (unsigned long arg) * -ENODEV if no such device/adapter */ static int -mptctl_eventquery (unsigned long arg) +mptctl_eventquery (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_eventquery __user *uarg = (void __user *) arg; struct mpt_ioctl_eventquery karg; - MPT_ADAPTER *ioc; - int iocnum; if (copy_from_user(&karg, uarg, sizeof(struct mpt_ioctl_eventquery))) { printk(KERN_ERR MYNAM "%s@%d::mptctl_eventquery - " @@ -1585,13 +1538,6 @@ mptctl_eventquery (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_eventquery() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_eventquery called.\n", ioc->name)); karg.eventEntries = MPTCTL_EVENT_LOG_SIZE; @@ -1610,12 +1556,10 @@ mptctl_eventquery (unsigned long arg) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ static int -mptctl_eventenable (unsigned long arg) +mptctl_eventenable (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_eventenable __user *uarg = (void __user *) arg; struct mpt_ioctl_eventenable karg; - MPT_ADAPTER *ioc; - int iocnum; if (copy_from_user(&karg, uarg, sizeof(struct mpt_ioctl_eventenable))) { printk(KERN_ERR MYNAM "%s@%d::mptctl_eventenable - " @@ -1624,13 +1568,6 @@ mptctl_eventenable (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_eventenable() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_eventenable called.\n", ioc->name)); if (ioc->events == NULL) { @@ -1658,12 +1595,10 @@ mptctl_eventenable (unsigned long arg) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ static int -mptctl_eventreport (unsigned long arg) +mptctl_eventreport (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_eventreport __user *uarg = (void __user *) arg; struct mpt_ioctl_eventreport karg; - MPT_ADAPTER *ioc; - int iocnum; int numBytes, maxEvents, max; if (copy_from_user(&karg, uarg, sizeof(struct mpt_ioctl_eventreport))) { @@ -1673,12 +1608,6 @@ mptctl_eventreport (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_eventreport() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_eventreport called.\n", ioc->name)); @@ -1712,12 +1641,10 @@ mptctl_eventreport (unsigned long arg) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ static int -mptctl_replace_fw (unsigned long arg) +mptctl_replace_fw (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_replace_fw __user *uarg = (void __user *) arg; struct mpt_ioctl_replace_fw karg; - MPT_ADAPTER *ioc; - int iocnum; int newFwSize; if (copy_from_user(&karg, uarg, sizeof(struct mpt_ioctl_replace_fw))) { @@ -1727,13 +1654,6 @@ mptctl_replace_fw (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_replace_fw() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_replace_fw called.\n", ioc->name)); /* If caching FW, Free the old FW image @@ -1780,12 +1700,10 @@ mptctl_replace_fw (unsigned long arg) * -ENOMEM if memory allocation error */ static int -mptctl_mpt_command (unsigned long arg) +mptctl_mpt_command (MPT_ADAPTER *ioc, unsigned long arg) { struct mpt_ioctl_command __user *uarg = (void __user *) arg; struct mpt_ioctl_command karg; - MPT_ADAPTER *ioc; - int iocnum; int rc; @@ -1796,14 +1714,7 @@ mptctl_mpt_command (unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_mpt_command() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - - rc = mptctl_do_mpt_command (karg, &uarg->MF); + rc = mptctl_do_mpt_command (ioc, karg, &uarg->MF); return rc; } @@ -1821,9 +1732,8 @@ mptctl_mpt_command (unsigned long arg) * -EPERM if SCSI I/O and target is untagged */ static int -mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr) +mptctl_do_mpt_command (MPT_ADAPTER *ioc, struct mpt_ioctl_command karg, void __user *mfPtr) { - MPT_ADAPTER *ioc; MPT_FRAME_HDR *mf = NULL; MPIHeader_t *hdr; char *psge; @@ -1832,7 +1742,7 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr) dma_addr_t dma_addr_in; dma_addr_t dma_addr_out; int sgSize = 0; /* Num SG elements */ - int iocnum, flagsLength; + int flagsLength; int sz, rc = 0; int msgContext; u16 req_idx; @@ -1847,13 +1757,6 @@ mptctl_do_mpt_command (struct mpt_ioctl_command karg, void __user *mfPtr) bufIn.kptr = bufOut.kptr = NULL; bufIn.len = bufOut.len = 0; - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_do_mpt_command() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } - spin_lock_irqsave(&ioc->taskmgmt_lock, flags); if (ioc->ioc_reset_in_progress) { spin_unlock_irqrestore(&ioc->taskmgmt_lock, flags); @@ -2418,17 +2321,15 @@ done_free_mem: * -ENOMEM if memory allocation error */ static int -mptctl_hp_hostinfo(unsigned long arg, unsigned int data_size) +mptctl_hp_hostinfo(MPT_ADAPTER *ioc, unsigned long arg, unsigned int data_size) { hp_host_info_t __user *uarg = (void __user *) arg; - MPT_ADAPTER *ioc; struct pci_dev *pdev; char *pbuf=NULL; dma_addr_t buf_dma; hp_host_info_t karg; CONFIGPARMS cfg; ConfigPageHeader_t hdr; - int iocnum; int rc, cim_rev; ToolboxIstwiReadWriteRequest_t *IstwiRWRequest; MPT_FRAME_HDR *mf = NULL; @@ -2452,12 +2353,6 @@ mptctl_hp_hostinfo(unsigned long arg, unsigned int data_size) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_hp_hostinfo() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT ": mptctl_hp_hostinfo called.\n", ioc->name)); @@ -2670,15 +2565,13 @@ retry_wait: * -ENOMEM if memory allocation error */ static int -mptctl_hp_targetinfo(unsigned long arg) +mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg) { hp_target_info_t __user *uarg = (void __user *) arg; SCSIDevicePage0_t *pg0_alloc; SCSIDevicePage3_t *pg3_alloc; - MPT_ADAPTER *ioc; MPT_SCSI_HOST *hd = NULL; hp_target_info_t karg; - int iocnum; int data_sz; dma_addr_t page_dma; CONFIGPARMS cfg; @@ -2692,12 +2585,6 @@ mptctl_hp_targetinfo(unsigned long arg) return -EFAULT; } - if (((iocnum = mpt_verify_adapter(karg.hdr.iocnum, &ioc)) < 0) || - (ioc == NULL)) { - printk(KERN_DEBUG MYNAM "%s::mptctl_hp_targetinfo() @%d - ioc%d not found!\n", - __FILE__, __LINE__, iocnum); - return -ENODEV; - } if (karg.hdr.id >= MPT_MAX_FC_DEVICES) return -EINVAL; dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "mptctl_hp_targetinfo called.\n", @@ -2865,7 +2752,7 @@ compat_mptfwxfer_ioctl(struct file *filp, unsigned int cmd, kfw.fwlen = kfw32.fwlen; kfw.bufp = compat_ptr(kfw32.bufp); - ret = mptctl_do_fw_download(kfw.iocnum, kfw.bufp, kfw.fwlen); + ret = mptctl_do_fw_download(iocp, kfw.bufp, kfw.fwlen); mutex_unlock(&iocp->ioctl_cmds.mutex); @@ -2919,7 +2806,7 @@ compat_mpt_command(struct file *filp, unsigned int cmd, /* Pass new structure to do_mpt_command */ - ret = mptctl_do_mpt_command (karg, &uarg->MF); + ret = mptctl_do_mpt_command (iocp, karg, &uarg->MF); mutex_unlock(&iocp->ioctl_cmds.mutex); diff --git a/drivers/mfd/ab8500-debugfs.c b/drivers/mfd/ab8500-debugfs.c index c1c815241e02..28c3ee38b081 100644 --- a/drivers/mfd/ab8500-debugfs.c +++ b/drivers/mfd/ab8500-debugfs.c @@ -1258,7 +1258,6 @@ static struct ab8500_prcmu_ranges ab8540_debug_ranges[AB8500_NUM_BANKS] = { }, }; - static irqreturn_t ab8500_debug_handler(int irq, void *data) { char buf[16]; diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c index d8e3184bd27c..ad8a5296c50b 100644 --- a/drivers/mfd/arizona-core.c +++ b/drivers/mfd/arizona-core.c @@ -52,8 +52,10 @@ int arizona_clk32k_enable(struct arizona *arizona) if (ret != 0) goto err_ref; ret = clk_prepare_enable(arizona->mclk[ARIZONA_MCLK1]); - if (ret != 0) - goto err_pm; + if (ret != 0) { + pm_runtime_put_sync(arizona->dev); + goto err_ref; + } break; case ARIZONA_32KZ_MCLK2: ret = clk_prepare_enable(arizona->mclk[ARIZONA_MCLK2]); @@ -67,8 +69,6 @@ int arizona_clk32k_enable(struct arizona *arizona) ARIZONA_CLK_32K_ENA); } -err_pm: - pm_runtime_put_sync(arizona->dev); err_ref: if (ret != 0) arizona->clk32k_ref--; diff --git a/drivers/mfd/da9062-core.c b/drivers/mfd/da9062-core.c index fe1811523e4a..eff6ae5073c8 100644 --- a/drivers/mfd/da9062-core.c +++ b/drivers/mfd/da9062-core.c @@ -257,7 +257,7 @@ static const struct mfd_cell da9062_devs[] = { .name = "da9062-watchdog", .num_resources = ARRAY_SIZE(da9062_wdt_resources), .resources = da9062_wdt_resources, - .of_compatible = "dlg,da9062-wdt", + .of_compatible = "dlg,da9062-watchdog", }, { .name = "da9062-thermal", diff --git a/drivers/mfd/dln2.c b/drivers/mfd/dln2.c index 704e189ca162..95d0f2df0ad4 100644 --- a/drivers/mfd/dln2.c +++ b/drivers/mfd/dln2.c @@ -729,6 +729,8 @@ static int dln2_probe(struct usb_interface *interface, const struct usb_device_id *usb_id) { struct usb_host_interface *hostif = interface->cur_altsetting; + struct usb_endpoint_descriptor *epin; + struct usb_endpoint_descriptor *epout; struct device *dev = &interface->dev; struct dln2_dev *dln2; int ret; @@ -738,12 +740,19 @@ static int dln2_probe(struct usb_interface *interface, hostif->desc.bNumEndpoints < 2) return -ENODEV; + epin = &hostif->endpoint[0].desc; + epout = &hostif->endpoint[1].desc; + if (!usb_endpoint_is_bulk_out(epout)) + return -ENODEV; + if (!usb_endpoint_is_bulk_in(epin)) + return -ENODEV; + dln2 = kzalloc(sizeof(*dln2), GFP_KERNEL); if (!dln2) return -ENOMEM; - dln2->ep_out = hostif->endpoint[0].desc.bEndpointAddress; - dln2->ep_in = hostif->endpoint[1].desc.bEndpointAddress; + dln2->ep_out = epout->bEndpointAddress; + dln2->ep_in = epin->bEndpointAddress; dln2->usb_dev = usb_get_dev(interface_to_usbdev(interface)); dln2->interface = interface; usb_set_intfdata(interface, dln2); diff --git a/drivers/mfd/intel-lpss-pci.c b/drivers/mfd/intel-lpss-pci.c index 2b7e8eeaa59e..0504761516f7 100644 --- a/drivers/mfd/intel-lpss-pci.c +++ b/drivers/mfd/intel-lpss-pci.c @@ -126,6 +126,18 @@ static const struct intel_lpss_platform_info apl_i2c_info = { .properties = apl_i2c_properties, }; +static struct property_entry glk_i2c_properties[] = { + PROPERTY_ENTRY_U32("i2c-sda-hold-time-ns", 313), + PROPERTY_ENTRY_U32("i2c-sda-falling-time-ns", 171), + PROPERTY_ENTRY_U32("i2c-scl-falling-time-ns", 290), + { }, +}; + +static const struct intel_lpss_platform_info glk_i2c_info = { + .clk_rate = 133000000, + .properties = glk_i2c_properties, +}; + static const struct intel_lpss_platform_info cnl_i2c_info = { .clk_rate = 216000000, .properties = spt_i2c_properties, @@ -165,14 +177,14 @@ static const struct pci_device_id intel_lpss_pci_ids[] = { { PCI_VDEVICE(INTEL, 0x1ac6), (kernel_ulong_t)&bxt_info }, { PCI_VDEVICE(INTEL, 0x1aee), (kernel_ulong_t)&bxt_uart_info }, /* GLK */ - { PCI_VDEVICE(INTEL, 0x31ac), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31ae), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31b0), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31b2), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31b4), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31b6), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31b8), (kernel_ulong_t)&bxt_i2c_info }, - { PCI_VDEVICE(INTEL, 0x31ba), (kernel_ulong_t)&bxt_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31ac), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31ae), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31b0), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31b2), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31b4), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31b6), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31b8), (kernel_ulong_t)&glk_i2c_info }, + { PCI_VDEVICE(INTEL, 0x31ba), (kernel_ulong_t)&glk_i2c_info }, { PCI_VDEVICE(INTEL, 0x31bc), (kernel_ulong_t)&bxt_uart_info }, { PCI_VDEVICE(INTEL, 0x31be), (kernel_ulong_t)&bxt_uart_info }, { PCI_VDEVICE(INTEL, 0x31c0), (kernel_ulong_t)&bxt_uart_info }, diff --git a/drivers/mfd/intel-lpss.c b/drivers/mfd/intel-lpss.c index b5c4f8f974aa..9ed573e232c0 100644 --- a/drivers/mfd/intel-lpss.c +++ b/drivers/mfd/intel-lpss.c @@ -541,6 +541,7 @@ module_init(intel_lpss_init); static void __exit intel_lpss_exit(void) { + ida_destroy(&intel_lpss_devid_ida); debugfs_remove(intel_lpss_debugfs); } module_exit(intel_lpss_exit); diff --git a/drivers/mfd/intel_soc_pmic_bxtwc.c b/drivers/mfd/intel_soc_pmic_bxtwc.c index 15bc052704a6..9ca1f8c015de 100644 --- a/drivers/mfd/intel_soc_pmic_bxtwc.c +++ b/drivers/mfd/intel_soc_pmic_bxtwc.c @@ -31,8 +31,8 @@ /* Interrupt Status Registers */ #define BXTWC_IRQLVL1 0x4E02 -#define BXTWC_PWRBTNIRQ 0x4E03 +#define BXTWC_PWRBTNIRQ 0x4E03 #define BXTWC_THRM0IRQ 0x4E04 #define BXTWC_THRM1IRQ 0x4E05 #define BXTWC_THRM2IRQ 0x4E06 @@ -47,10 +47,9 @@ /* Interrupt MASK Registers */ #define BXTWC_MIRQLVL1 0x4E0E -#define BXTWC_MPWRTNIRQ 0x4E0F - #define BXTWC_MIRQLVL1_MCHGR BIT(5) +#define BXTWC_MPWRBTNIRQ 0x4E0F #define BXTWC_MTHRM0IRQ 0x4E12 #define BXTWC_MTHRM1IRQ 0x4E13 #define BXTWC_MTHRM2IRQ 0x4E14 @@ -66,9 +65,7 @@ /* Whiskey Cove PMIC share same ACPI ID between different platforms */ #define BROXTON_PMIC_WC_HRV 4 -/* Manage in two IRQ chips since mask registers are not consecutive */ enum bxtwc_irqs { - /* Level 1 */ BXTWC_PWRBTN_LVL1_IRQ = 0, BXTWC_TMU_LVL1_IRQ, BXTWC_THRM_LVL1_IRQ, @@ -77,9 +74,11 @@ enum bxtwc_irqs { BXTWC_CHGR_LVL1_IRQ, BXTWC_GPIO_LVL1_IRQ, BXTWC_CRIT_LVL1_IRQ, +}; - /* Level 2 */ - BXTWC_PWRBTN_IRQ, +enum bxtwc_irqs_pwrbtn { + BXTWC_PWRBTN_IRQ = 0, + BXTWC_UIBTN_IRQ, }; enum bxtwc_irqs_bcu { @@ -113,7 +112,10 @@ static const struct regmap_irq bxtwc_regmap_irqs[] = { REGMAP_IRQ_REG(BXTWC_CHGR_LVL1_IRQ, 0, BIT(5)), REGMAP_IRQ_REG(BXTWC_GPIO_LVL1_IRQ, 0, BIT(6)), REGMAP_IRQ_REG(BXTWC_CRIT_LVL1_IRQ, 0, BIT(7)), - REGMAP_IRQ_REG(BXTWC_PWRBTN_IRQ, 1, 0x03), +}; + +static const struct regmap_irq bxtwc_regmap_irqs_pwrbtn[] = { + REGMAP_IRQ_REG(BXTWC_PWRBTN_IRQ, 0, 0x01), }; static const struct regmap_irq bxtwc_regmap_irqs_bcu[] = { @@ -125,7 +127,7 @@ static const struct regmap_irq bxtwc_regmap_irqs_adc[] = { }; static const struct regmap_irq bxtwc_regmap_irqs_chgr[] = { - REGMAP_IRQ_REG(BXTWC_USBC_IRQ, 0, BIT(5)), + REGMAP_IRQ_REG(BXTWC_USBC_IRQ, 0, 0x20), REGMAP_IRQ_REG(BXTWC_CHGR0_IRQ, 0, 0x1f), REGMAP_IRQ_REG(BXTWC_CHGR1_IRQ, 1, 0x1f), }; @@ -144,7 +146,16 @@ static struct regmap_irq_chip bxtwc_regmap_irq_chip = { .mask_base = BXTWC_MIRQLVL1, .irqs = bxtwc_regmap_irqs, .num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs), - .num_regs = 2, + .num_regs = 1, +}; + +static struct regmap_irq_chip bxtwc_regmap_irq_chip_pwrbtn = { + .name = "bxtwc_irq_chip_pwrbtn", + .status_base = BXTWC_PWRBTNIRQ, + .mask_base = BXTWC_MPWRBTNIRQ, + .irqs = bxtwc_regmap_irqs_pwrbtn, + .num_irqs = ARRAY_SIZE(bxtwc_regmap_irqs_pwrbtn), + .num_regs = 1, }; static struct regmap_irq_chip bxtwc_regmap_irq_chip_tmu = { @@ -472,6 +483,16 @@ static int bxtwc_probe(struct platform_device *pdev) return ret; } + ret = bxtwc_add_chained_irq_chip(pmic, pmic->irq_chip_data, + BXTWC_PWRBTN_LVL1_IRQ, + IRQF_ONESHOT, + &bxtwc_regmap_irq_chip_pwrbtn, + &pmic->irq_chip_data_pwrbtn); + if (ret) { + dev_err(&pdev->dev, "Failed to add PWRBTN IRQ chip\n"); + return ret; + } + ret = bxtwc_add_chained_irq_chip(pmic, pmic->irq_chip_data, BXTWC_TMU_LVL1_IRQ, IRQF_ONESHOT, diff --git a/drivers/mfd/max8997.c b/drivers/mfd/max8997.c index 2d6e2c392786..4a2fc59d5901 100644 --- a/drivers/mfd/max8997.c +++ b/drivers/mfd/max8997.c @@ -155,12 +155,6 @@ static struct max8997_platform_data *max8997_i2c_parse_dt_pdata( pd->ono = irq_of_parse_and_map(dev->of_node, 1); - /* - * ToDo: the 'wakeup' member in the platform data is more of a linux - * specfic information. Hence, there is no binding for that yet and - * not parsed here. - */ - return pd; } @@ -248,7 +242,7 @@ static int max8997_i2c_probe(struct i2c_client *i2c, */ /* MAX8997 has a power button input. */ - device_init_wakeup(max8997->dev, pdata->wakeup); + device_init_wakeup(max8997->dev, true); return ret; diff --git a/drivers/mfd/mc13xxx-core.c b/drivers/mfd/mc13xxx-core.c index 6c16f170529f..75d52034f89d 100644 --- a/drivers/mfd/mc13xxx-core.c +++ b/drivers/mfd/mc13xxx-core.c @@ -278,7 +278,8 @@ int mc13xxx_adc_do_conversion(struct mc13xxx *mc13xxx, unsigned int mode, if (ret) goto out; - adc0 = MC13XXX_ADC0_ADINC1 | MC13XXX_ADC0_ADINC2; + adc0 = MC13XXX_ADC0_ADINC1 | MC13XXX_ADC0_ADINC2 | + MC13XXX_ADC0_CHRGRAWDIV; adc1 = MC13XXX_ADC1_ADEN | MC13XXX_ADC1_ADTRIGIGN | MC13XXX_ADC1_ASC; if (channel > 7) diff --git a/drivers/mfd/palmas.c b/drivers/mfd/palmas.c index 3922a93f9f92..663a2398b6b1 100644 --- a/drivers/mfd/palmas.c +++ b/drivers/mfd/palmas.c @@ -430,6 +430,7 @@ static void palmas_power_off(void) { unsigned int addr; int ret, slave; + u8 powerhold_mask; struct device_node *np = palmas_dev->dev->of_node; if (of_property_read_bool(np, "ti,palmas-override-powerhold")) { @@ -437,8 +438,15 @@ static void palmas_power_off(void) PALMAS_PRIMARY_SECONDARY_PAD2); slave = PALMAS_BASE_TO_SLAVE(PALMAS_PU_PD_OD_BASE); + if (of_device_is_compatible(np, "ti,tps65917")) + powerhold_mask = + TPS65917_PRIMARY_SECONDARY_PAD2_GPIO_5_MASK; + else + powerhold_mask = + PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_7_MASK; + ret = regmap_update_bits(palmas_dev->regmap[slave], addr, - PALMAS_PRIMARY_SECONDARY_PAD2_GPIO_7_MASK, 0); + powerhold_mask, 0); if (ret) dev_err(palmas_dev->dev, "Unable to write PRIMARY_SECONDARY_PAD2 %d\n", diff --git a/drivers/mfd/rn5t618.c b/drivers/mfd/rn5t618.c index f4037d42a60f..dd4251f105e0 100644 --- a/drivers/mfd/rn5t618.c +++ b/drivers/mfd/rn5t618.c @@ -32,6 +32,7 @@ static bool rn5t618_volatile_reg(struct device *dev, unsigned int reg) case RN5T618_WATCHDOGCNT: case RN5T618_DCIRQ: case RN5T618_ILIMDATAH ... RN5T618_AIN0DATAL: + case RN5T618_ADCCNT3: case RN5T618_IR_ADC1 ... RN5T618_IR_ADC3: case RN5T618_IR_GPR: case RN5T618_IR_GPF: diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c index 5894d6c16fab..ca9f0c8d1ed0 100644 --- a/drivers/mfd/ti_am335x_tscadc.c +++ b/drivers/mfd/ti_am335x_tscadc.c @@ -296,11 +296,24 @@ static int ti_tscadc_remove(struct platform_device *pdev) return 0; } +static int __maybe_unused ti_tscadc_can_wakeup(struct device *dev, void *data) +{ + return device_may_wakeup(dev); +} + static int __maybe_unused tscadc_suspend(struct device *dev) { struct ti_tscadc_dev *tscadc = dev_get_drvdata(dev); regmap_write(tscadc->regmap, REG_SE, 0x00); + if (device_for_each_child(dev, NULL, ti_tscadc_can_wakeup)) { + u32 ctrl; + + regmap_read(tscadc->regmap, REG_CTRL, &ctrl); + ctrl &= ~(CNTRLREG_POWERDOWN); + ctrl |= CNTRLREG_TSCSSENB; + regmap_write(tscadc->regmap, REG_CTRL, ctrl); + } pm_runtime_put_sync(dev); return 0; diff --git a/drivers/misc/altera-stapl/altera.c b/drivers/misc/altera-stapl/altera.c index f53e217e963f..494e263daa74 100644 --- a/drivers/misc/altera-stapl/altera.c +++ b/drivers/misc/altera-stapl/altera.c @@ -2176,8 +2176,7 @@ static int altera_get_note(u8 *p, s32 program_size, key_ptr = &p[note_strings + get_unaligned_be32( &p[note_table + (8 * i)])]; - if ((strncasecmp(key, key_ptr, strlen(key_ptr)) == 0) && - (key != NULL)) { + if (key && !strncasecmp(key, key_ptr, strlen(key_ptr))) { status = 0; value_ptr = &p[note_strings + diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c index 1a64eb185cfd..de2ce5539545 100644 --- a/drivers/misc/cxl/guest.c +++ b/drivers/misc/cxl/guest.c @@ -1028,8 +1028,6 @@ err1: void cxl_guest_remove_afu(struct cxl_afu *afu) { - pr_devel("in %s - AFU(%d)\n", __func__, afu->slice); - if (!afu) return; diff --git a/drivers/misc/enclosure.c b/drivers/misc/enclosure.c index eb29113e0bac..b11737f7bdca 100644 --- a/drivers/misc/enclosure.c +++ b/drivers/misc/enclosure.c @@ -419,10 +419,9 @@ int enclosure_remove_device(struct enclosure_device *edev, struct device *dev) cdev = &edev->component[i]; if (cdev->dev == dev) { enclosure_remove_links(cdev); - device_del(&cdev->cdev); put_device(dev); cdev->dev = NULL; - return device_add(&cdev->cdev); + return 0; } } return -ENODEV; diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index cb1240985157..f55e6e822bea 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c @@ -298,7 +298,7 @@ static int genwqe_sgl_size(int num_pages) int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, void __user *user_addr, size_t user_size) { - int rc; + int ret = -ENOMEM; struct pci_dev *pci_dev = cd->pci_dev; sgl->fpage_offs = offset_in_page((unsigned long)user_addr); @@ -317,7 +317,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, if (get_order(sgl->sgl_size) > MAX_ORDER) { dev_err(&pci_dev->dev, "[%s] err: too much memory requested!\n", __func__); - return -ENOMEM; + return ret; } sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size, @@ -325,7 +325,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, if (sgl->sgl == NULL) { dev_err(&pci_dev->dev, "[%s] err: no memory available!\n", __func__); - return -ENOMEM; + return ret; } /* Only use buffering on incomplete pages */ @@ -338,7 +338,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, /* Sync with user memory */ if (copy_from_user(sgl->fpage + sgl->fpage_offs, user_addr, sgl->fpage_size)) { - rc = -EFAULT; + ret = -EFAULT; goto err_out; } } @@ -351,7 +351,7 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, /* Sync with user memory */ if (copy_from_user(sgl->lpage, user_addr + user_size - sgl->lpage_size, sgl->lpage_size)) { - rc = -EFAULT; + ret = -EFAULT; goto err_out2; } } @@ -373,7 +373,8 @@ int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, sgl->sgl = NULL; sgl->sgl_dma_addr = 0; sgl->sgl_size = 0; - return -ENOMEM; + + return ret; } int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 41af80d45da2..83d220ede65f 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -981,6 +981,12 @@ static void kgdbts_run_tests(void) int nmi_sleep = 0; int i; + verbose = 0; + if (strstr(config, "V1")) + verbose = 1; + if (strstr(config, "V2")) + verbose = 2; + ptr = strchr(config, 'F'); if (ptr) fork_test = simple_strtol(ptr + 1, NULL, 10); @@ -1064,13 +1070,6 @@ static int kgdbts_option_setup(char *opt) return -ENOSPC; } strcpy(config, opt); - - verbose = 0; - if (strstr(config, "V1")) - verbose = 1; - if (strstr(config, "V2")) - verbose = 2; - return 0; } @@ -1082,9 +1081,6 @@ static int configure_kgdbts(void) if (!strlen(config) || isspace(config[0])) goto noconfig; - err = kgdbts_option_setup(config); - if (err) - goto noconfig; final_ack = 0; run_plant_and_detach_test(1); diff --git a/drivers/misc/mediatek/Makefile b/drivers/misc/mediatek/Makefile index 2243c029a821..32eb28d54193 100644 --- a/drivers/misc/mediatek/Makefile +++ b/drivers/misc/mediatek/Makefile @@ -142,7 +142,7 @@ obj-$(CONFIG_MTK_ECCCI_C2K) += c2k_usb/ obj-$(CONFIG_USB) += usb_boost/ obj-$(CONFIG_MTK_USB_TYPEC) += typec/ ifneq ($(wildcard $(srctree)/../vendor/mediatek/internal/selinux_warning_enable),) -obj-$(CONFIG_MTK_SELINUX_AEE_WARNING) += selinux_warning/ +#obj-$(CONFIG_MTK_SELINUX_AEE_WARNING) += selinux_warning/ endif obj-$(CONFIG_THERMAL) += thermal/ obj-$(CONFIG_MTK_MTD_NAND) += nand/ diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 37b13bc5c16f..8f6ab516041b 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -850,15 +850,16 @@ static const struct device_type mei_cl_device_type = { /** * mei_cl_bus_set_name - set device name for me client device + * - + * Example: 0000:00:16.0-55213584-9a29-4916-badf-0fb7ed682aeb * * @cldev: me client device */ static inline void mei_cl_bus_set_name(struct mei_cl_device *cldev) { - dev_set_name(&cldev->dev, "mei:%s:%pUl:%02X", - cldev->name, - mei_me_cl_uuid(cldev->me_cl), - mei_me_cl_ver(cldev->me_cl)); + dev_set_name(&cldev->dev, "%s-%pUl", + dev_name(cldev->bus->dev), + mei_me_cl_uuid(cldev->me_cl)); } /** diff --git a/drivers/misc/mic/card/mic_x100.c b/drivers/misc/mic/card/mic_x100.c index b9f0710ffa6b..4007adc666f3 100644 --- a/drivers/misc/mic/card/mic_x100.c +++ b/drivers/misc/mic/card/mic_x100.c @@ -249,6 +249,9 @@ static int __init mic_probe(struct platform_device *pdev) mdrv->dev = &pdev->dev; snprintf(mdrv->name, sizeof(mic_driver_name), mic_driver_name); + /* FIXME: use dma_set_mask_and_coherent() and check result */ + dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + mdev->mmio.pa = MIC_X100_MMIO_BASE; mdev->mmio.len = MIC_X100_MMIO_LEN; mdev->mmio.va = devm_ioremap(&pdev->dev, MIC_X100_MMIO_BASE, @@ -294,18 +297,6 @@ static void mic_platform_shutdown(struct platform_device *pdev) mic_remove(pdev); } -static u64 mic_dma_mask = DMA_BIT_MASK(64); - -static struct platform_device mic_platform_dev = { - .name = mic_driver_name, - .id = 0, - .num_resources = 0, - .dev = { - .dma_mask = &mic_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(64), - }, -}; - static struct platform_driver __refdata mic_platform_driver = { .probe = mic_probe, .remove = mic_remove, @@ -315,6 +306,8 @@ static struct platform_driver __refdata mic_platform_driver = { }, }; +static struct platform_device *mic_platform_dev; + static int __init mic_init(void) { int ret; @@ -328,9 +321,12 @@ static int __init mic_init(void) request_module("mic_x100_dma"); mic_init_card_debugfs(); - ret = platform_device_register(&mic_platform_dev); + + mic_platform_dev = platform_device_register_simple(mic_driver_name, + 0, NULL, 0); + ret = PTR_ERR_OR_ZERO(mic_platform_dev); if (ret) { - pr_err("platform_device_register ret %d\n", ret); + pr_err("platform_device_register_full ret %d\n", ret); goto cleanup_debugfs; } ret = platform_driver_register(&mic_platform_driver); @@ -341,7 +337,7 @@ static int __init mic_init(void) return ret; device_unregister: - platform_device_unregister(&mic_platform_dev); + platform_device_unregister(mic_platform_dev); cleanup_debugfs: mic_exit_card_debugfs(); done: @@ -351,7 +347,7 @@ done: static void __exit mic_exit(void) { platform_driver_unregister(&mic_platform_driver); - platform_device_unregister(&mic_platform_dev); + platform_device_unregister(mic_platform_dev); mic_exit_card_debugfs(); } diff --git a/drivers/misc/mic/scif/scif_fence.c b/drivers/misc/mic/scif/scif_fence.c index cac3bcc308a7..7bb929f05d85 100644 --- a/drivers/misc/mic/scif/scif_fence.c +++ b/drivers/misc/mic/scif/scif_fence.c @@ -272,7 +272,7 @@ static int _scif_prog_signal(scif_epd_t epd, dma_addr_t dst, u64 val) dma_fail: if (!x100) dma_pool_free(ep->remote_dev->signal_pool, status, - status->src_dma_addr); + src - offsetof(struct scif_status, val)); alloc_fail: return err; } diff --git a/drivers/misc/pci_endpoint_test.c b/drivers/misc/pci_endpoint_test.c index 9849bf183299..230f1e8538dc 100644 --- a/drivers/misc/pci_endpoint_test.c +++ b/drivers/misc/pci_endpoint_test.c @@ -92,6 +92,7 @@ struct pci_endpoint_test { void __iomem *bar[6]; struct completion irq_raised; int last_irq; + int num_irqs; /* mutex to protect the ioctls */ struct mutex mutex; struct miscdevice miscdev; @@ -226,6 +227,9 @@ static bool pci_endpoint_test_copy(struct pci_endpoint_test *test, size_t size) u32 src_crc32; u32 dst_crc32; + if (size > SIZE_MAX - alignment) + goto err; + orig_src_addr = dma_alloc_coherent(dev, size + alignment, &orig_src_phys_addr, GFP_KERNEL); if (!orig_src_addr) { @@ -311,6 +315,9 @@ static bool pci_endpoint_test_write(struct pci_endpoint_test *test, size_t size) size_t alignment = test->alignment; u32 crc32; + if (size > SIZE_MAX - alignment) + goto err; + orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr, GFP_KERNEL); if (!orig_addr) { @@ -369,6 +376,9 @@ static bool pci_endpoint_test_read(struct pci_endpoint_test *test, size_t size) size_t alignment = test->alignment; u32 crc32; + if (size > SIZE_MAX - alignment) + goto err; + orig_addr = dma_alloc_coherent(dev, size + alignment, &orig_phys_addr, GFP_KERNEL); if (!orig_addr) { @@ -505,6 +515,7 @@ static int pci_endpoint_test_probe(struct pci_dev *pdev, irq = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI); if (irq < 0) dev_err(dev, "failed to get MSI interrupts\n"); + test->num_irqs = irq; } err = devm_request_irq(dev, pdev->irq, pci_endpoint_test_irqhandler, @@ -572,6 +583,9 @@ err_iounmap: pci_iounmap(pdev, test->bar[bar]); } + for (i = 0; i < irq; i++) + devm_free_irq(dev, pdev->irq + i, test); + err_disable_msi: pci_disable_msi(pdev); pci_release_regions(pdev); @@ -585,6 +599,7 @@ err_disable_pdev: static void pci_endpoint_test_remove(struct pci_dev *pdev) { int id; + int i; enum pci_barno bar; struct pci_endpoint_test *test = pci_get_drvdata(pdev); struct miscdevice *misc_device = &test->miscdev; @@ -600,6 +615,8 @@ static void pci_endpoint_test_remove(struct pci_dev *pdev) if (test->bar[bar]) pci_iounmap(pdev, test->bar[bar]); } + for (i = 0; i < test->num_irqs; i++) + devm_free_irq(&pdev->dev, pdev->irq + i, test); pci_disable_msi(pdev); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/misc/sgi-xp/xpc_partition.c b/drivers/misc/sgi-xp/xpc_partition.c index 6956f7e7d439..ca5f0102daef 100644 --- a/drivers/misc/sgi-xp/xpc_partition.c +++ b/drivers/misc/sgi-xp/xpc_partition.c @@ -70,7 +70,7 @@ xpc_get_rsvd_page_pa(int nasid) unsigned long rp_pa = nasid; /* seed with nasid */ size_t len = 0; size_t buf_len = 0; - void *buf = buf; + void *buf = NULL; void *buf_base = NULL; enum xp_retval (*get_partition_rsvd_page_pa) (void *, u64 *, unsigned long *, size_t *) = diff --git a/drivers/misc/uid_sys_stats.c b/drivers/misc/uid_sys_stats.c index 6e5e47fcdee9..2174ab2d0813 100644 --- a/drivers/misc/uid_sys_stats.c +++ b/drivers/misc/uid_sys_stats.c @@ -358,9 +358,12 @@ static int uid_cputime_show(struct seq_file *m, void *v) __func__, uid); return -ENOMEM; } - task_cputime_adjusted(task, &utime, &stime); - uid_entry->active_utime += utime; - uid_entry->active_stime += stime; + /* avoid double accounting of dying threads */ + if (!(task->flags & PF_EXITING)) { + task_cputime_adjusted(task, &utime, &stime); + uid_entry->active_utime += utime; + uid_entry->active_stime += stime; + } } while_each_thread(temp, task); rcu_read_unlock(); @@ -456,6 +459,10 @@ static void add_uid_io_stats(struct uid_entry *uid_entry, { struct io_stats *io_slot = &uid_entry->io[slot]; + /* avoid double accounting of dying threads */ + if (slot != UID_STATE_DEAD_TASKS && (task->flags & PF_EXITING)) + return; + io_slot->read_bytes += task->ioac.read_bytes; io_slot->write_bytes += compute_write_bytes(task); io_slot->rchar += task->ioac.rchar; diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 7c9e5d3b02a6..9e1d51ace860 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -114,6 +115,7 @@ static int max_devices; #define MAX_DEVICES 256 static DEFINE_IDA(mmc_blk_ida); +static DEFINE_IDA(mmc_rpmb_ida); /* * There is one mmc_blk_data per slot. @@ -124,6 +126,7 @@ struct mmc_blk_data { struct gendisk *disk; struct mmc_queue queue; struct list_head part; + struct list_head rpmbs; unsigned int flags; #define MMC_BLK_CMD23 (1 << 0) /* Can do SET_BLOCK_COUNT for multiblock */ @@ -154,6 +157,32 @@ struct mmc_blk_data { struct dentry *ext_csd_dentry; }; +/* Device type for RPMB character devices */ +static dev_t mmc_rpmb_devt; + +/* Bus type for RPMB character devices */ +static struct bus_type mmc_rpmb_bus_type = { + .name = "mmc_rpmb", +}; + +/** + * struct mmc_rpmb_data - special RPMB device type for these areas + * @dev: the device for the RPMB area + * @chrdev: character device for the RPMB area + * @id: unique device ID number + * @part_index: partition index (0 on first) + * @md: parent MMC block device + * @node: list item, so we can put this device on a list + */ +struct mmc_rpmb_data { + struct device dev; + struct cdev chrdev; + int id; + unsigned int part_index; + struct mmc_blk_data *md; + struct list_head node; +}; + static DEFINE_MUTEX(open_lock); module_param(perdev_minors, int, 0444); @@ -336,6 +365,7 @@ struct mmc_blk_ioc_data { struct mmc_ioc_cmd ic; unsigned char *buf; u64 buf_bytes; + struct mmc_rpmb_data *rpmb; }; static struct mmc_blk_ioc_data *mmc_blk_ioctl_copy_from_user( @@ -474,14 +504,25 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, struct mmc_request mrq = {}; struct scatterlist sg; int err; - bool is_rpmb = false; + unsigned int target_part; u32 status = 0; if (!card || !md || !idata) return -EINVAL; - if (md->area_type & MMC_BLK_DATA_AREA_RPMB) - is_rpmb = true; + /* + * The RPMB accesses comes in from the character device, so we + * need to target these explicitly. Else we just target the + * partition type for the block device the ioctl() was issued + * on. + */ + if (idata->rpmb) { + /* Support multiple RPMB partitions */ + target_part = idata->rpmb->part_index; + target_part |= EXT_CSD_PART_CONFIG_ACC_RPMB; + } else { + target_part = md->part_type; + } cmd.opcode = idata->ic.opcode; cmd.arg = idata->ic.arg; @@ -525,7 +566,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, mrq.cmd = &cmd; - err = mmc_blk_part_switch(card, md->part_type); + err = mmc_blk_part_switch(card, target_part); if (err) return err; @@ -535,7 +576,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, return err; } - if (is_rpmb) { + if (idata->rpmb) { err = mmc_set_blockcount(card, data.blocks, idata->ic.write_flag & (1 << 31)); if (err) @@ -599,7 +640,7 @@ static int __mmc_blk_ioctl_cmd(struct mmc_card *card, struct mmc_blk_data *md, memcpy(&(idata->ic.response), cmd.resp, sizeof(cmd.resp)); - if (is_rpmb) { + if (idata->rpmb) { /* * Ensure RPMB command has completed by polling CMD13 * "Send Status". @@ -742,7 +783,8 @@ int mmc_ffu_ops_check_bdev(struct block_device *bdev, #endif static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md, - struct mmc_ioc_cmd __user *ic_ptr) + struct mmc_ioc_cmd __user *ic_ptr, + struct mmc_rpmb_data *rpmb) { struct mmc_blk_ioc_data *idata; struct mmc_blk_ioc_data *idatas[1]; @@ -754,6 +796,8 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md, idata = mmc_blk_ioctl_copy_from_user(ic_ptr); if (IS_ERR(idata)) return PTR_ERR(idata); + /* This will be NULL on non-RPMB ioctl():s */ + idata->rpmb = rpmb; card = md->queue.card; if (IS_ERR(card)) { @@ -773,7 +817,8 @@ static int mmc_blk_ioctl_cmd(struct mmc_blk_data *md, goto cmd_done; } idatas[0] = idata; - req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_IOCTL; + req_to_mmc_queue_req(req)->drv_op = + rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL; req_to_mmc_queue_req(req)->drv_op_data = idatas; req_to_mmc_queue_req(req)->ioc_count = 1; blk_execute_rq(mq->queue, NULL, req, 0); @@ -788,7 +833,8 @@ cmd_done: } static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md, - struct mmc_ioc_multi_cmd __user *user) + struct mmc_ioc_multi_cmd __user *user, + struct mmc_rpmb_data *rpmb) { struct mmc_blk_ioc_data **idata = NULL; struct mmc_ioc_cmd __user *cmds = user->cmds; @@ -819,6 +865,8 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md, num_of_cmds = i; goto cmd_err; } + /* This will be NULL on non-RPMB ioctl():s */ + idata[i]->rpmb = rpmb; } card = md->queue.card; @@ -839,7 +887,8 @@ static int mmc_blk_ioctl_multi_cmd(struct mmc_blk_data *md, err = PTR_ERR(req); goto cmd_err; } - req_to_mmc_queue_req(req)->drv_op = MMC_DRV_OP_IOCTL; + req_to_mmc_queue_req(req)->drv_op = + rpmb ? MMC_DRV_OP_IOCTL_RPMB : MMC_DRV_OP_IOCTL; req_to_mmc_queue_req(req)->drv_op_data = idata; req_to_mmc_queue_req(req)->ioc_count = num_of_cmds; blk_execute_rq(mq->queue, NULL, req, 0); @@ -1167,7 +1216,8 @@ static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode, if (!md) return -EINVAL; ret = mmc_blk_ioctl_cmd(md, - (struct mmc_ioc_cmd __user *)arg); + (struct mmc_ioc_cmd __user *)arg, + NULL); mmc_blk_put(md); return ret; case MMC_IOC_MULTI_CMD: @@ -1178,7 +1228,8 @@ static int mmc_blk_ioctl(struct block_device *bdev, fmode_t mode, if (!md) return -EINVAL; ret = mmc_blk_ioctl_multi_cmd(md, - (struct mmc_ioc_multi_cmd __user *)arg); + (struct mmc_ioc_multi_cmd __user *)arg, + NULL); mmc_blk_put(md); return ret; #ifdef CONFIG_MTK_MMC_PWR_WP @@ -1681,19 +1732,6 @@ static inline void mmc_blk_reset_success(struct mmc_blk_data *md, int type) { md->reset_done &= ~type; } - -int mmc_access_rpmb(struct mmc_queue *mq) -{ - struct mmc_blk_data *md = mq->blkdata; - /* - * If this is a RPMB partition access, return ture - */ - if (md && md->part_type == EXT_CSD_PART_CONFIG_ACC_RPMB) - return true; - - return false; -} - #ifdef CONFIG_MTK_EMMC_HW_CQ static struct mmc_cmdq_req *mmc_blk_cmdq_prep_discard_req(struct mmc_queue *mq, struct request *req) @@ -1778,6 +1816,7 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req) struct mmc_blk_data *md = mq->blkdata; struct mmc_blk_data *main_md = dev_get_drvdata(&card->dev); struct mmc_blk_ioc_data **idata; + bool rpmb_ioctl; u8 **ext_csd; u32 status; int ret; @@ -1796,11 +1835,11 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req) } #endif - mq_rq = req_to_mmc_queue_req(req); - + rpmb_ioctl = (mq_rq->drv_op == MMC_DRV_OP_IOCTL_RPMB); switch (mq_rq->drv_op) { case MMC_DRV_OP_IOCTL: + case MMC_DRV_OP_IOCTL_RPMB: idata = mq_rq->drv_op_data; for (i = 0, ret = 0; i < mq_rq->ioc_count; i++) { ret = __mmc_blk_ioctl_cmd(card, md, idata[i]); @@ -1808,8 +1847,8 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req) break; } /* Always switch back to main area after RPMB access */ - if (md->area_type & MMC_BLK_DATA_AREA_RPMB) - mmc_blk_part_switch(card, main_md->part_type); + if (rpmb_ioctl) + mmc_blk_part_switch(card, 0); break; case MMC_DRV_OP_BOOT_WP: ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BOOT_WP, @@ -1838,7 +1877,6 @@ static void mmc_blk_issue_drv_op(struct mmc_queue *mq, struct request *req) ret = -EINVAL; break; } - #if defined(CONFIG_MTK_EMMC_CQ_SUPPORT) || defined(CONFIG_MTK_EMMC_HW_CQ) if (cmdq_en && main_md->part_curr <= PART_CMDQ_EN) { ret = mmc_cmdq_enable(card); @@ -3115,9 +3153,8 @@ void mmc_blk_cmdq_complete_rq(struct request *rq) int err_resp = 0; bool is_dcmd = false; bool err_rwsem = false; -#ifdef CONFIG_MTK_MMC_DEBUG int cpu = -1; -#endif + if (down_read_trylock(&ctx_info->err_rwsem)) { err_rwsem = true; } else { @@ -3139,7 +3176,6 @@ void mmc_blk_cmdq_complete_rq(struct request *rq) mmc_hostname(host), __func__, mrq->req, err, cmdq_req->tag); #endif -#ifdef CONFIG_MTK_MMC_DEBUG /* softirq dump */ cpu = smp_processor_id(); dbg_add_sirq_log(host, MAGIC_CQHCI_DBG_TYPE_SIRQ, @@ -3147,7 +3183,7 @@ void mmc_blk_cmdq_complete_rq(struct request *rq) cmdq_req->tag, cpu, ctx_info->data_active_reqs); -#endif + if ((err || err_resp) && !cmdq_req->skip_err_handling) { pr_notice("%s: %s: txfr error(%d)/resp_err(%d)\n", mmc_hostname(mrq->host), __func__, err, @@ -3940,6 +3976,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card, spin_lock_init(&md->lock); INIT_LIST_HEAD(&md->part); + INIT_LIST_HEAD(&md->rpmbs); md->usage = 1; ret = mmc_init_queue(&md->queue, card, &md->lock, subname, area_type); @@ -4069,6 +4106,158 @@ static int mmc_blk_alloc_part(struct mmc_card *card, return 0; } +/** + * mmc_rpmb_ioctl() - ioctl handler for the RPMB chardev + * @filp: the character device file + * @cmd: the ioctl() command + * @arg: the argument from userspace + * + * This will essentially just redirect the ioctl()s coming in over to + * the main block device spawning the RPMB character device. + */ +static long mmc_rpmb_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct mmc_rpmb_data *rpmb = filp->private_data; + int ret; + + switch (cmd) { + case MMC_IOC_CMD: + ret = mmc_blk_ioctl_cmd(rpmb->md, + (struct mmc_ioc_cmd __user *)arg, + rpmb); + break; + case MMC_IOC_MULTI_CMD: + ret = mmc_blk_ioctl_multi_cmd(rpmb->md, + (struct mmc_ioc_multi_cmd __user *)arg, + rpmb); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +#ifdef CONFIG_COMPAT +static long mmc_rpmb_ioctl_compat(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + return mmc_rpmb_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); +} +#endif + +static int mmc_rpmb_chrdev_open(struct inode *inode, struct file *filp) +{ + struct mmc_rpmb_data *rpmb = container_of(inode->i_cdev, + struct mmc_rpmb_data, chrdev); + + get_device(&rpmb->dev); + filp->private_data = rpmb; + mmc_blk_get(rpmb->md->disk); + + return nonseekable_open(inode, filp); +} + +static int mmc_rpmb_chrdev_release(struct inode *inode, struct file *filp) +{ + struct mmc_rpmb_data *rpmb = container_of(inode->i_cdev, + struct mmc_rpmb_data, chrdev); + + put_device(&rpmb->dev); + mmc_blk_put(rpmb->md); + + return 0; +} + +static const struct file_operations mmc_rpmb_fileops = { + .release = mmc_rpmb_chrdev_release, + .open = mmc_rpmb_chrdev_open, + .owner = THIS_MODULE, + .llseek = no_llseek, + .unlocked_ioctl = mmc_rpmb_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = mmc_rpmb_ioctl_compat, +#endif +}; + +static void mmc_blk_rpmb_device_release(struct device *dev) +{ + struct mmc_rpmb_data *rpmb = dev_get_drvdata(dev); + + ida_simple_remove(&mmc_rpmb_ida, rpmb->id); + kfree(rpmb); +} + +static int mmc_blk_alloc_rpmb_part(struct mmc_card *card, + struct mmc_blk_data *md, + unsigned int part_index, + sector_t size, + const char *subname) +{ + int devidx, ret; + char rpmb_name[DISK_NAME_LEN]; + char cap_str[10]; + struct mmc_rpmb_data *rpmb; + + /* This creates the minor number for the RPMB char device */ + devidx = ida_simple_get(&mmc_rpmb_ida, 0, max_devices, GFP_KERNEL); + if (devidx < 0) + return devidx; + + rpmb = kzalloc(sizeof(*rpmb), GFP_KERNEL); + if (!rpmb) { + ida_simple_remove(&mmc_rpmb_ida, devidx); + return -ENOMEM; + } + + snprintf(rpmb_name, sizeof(rpmb_name), + "mmcblk%u%s", card->host->index, subname ? subname : ""); + + rpmb->id = devidx; + rpmb->part_index = part_index; + rpmb->dev.init_name = rpmb_name; + rpmb->dev.bus = &mmc_rpmb_bus_type; + rpmb->dev.devt = MKDEV(MAJOR(mmc_rpmb_devt), rpmb->id); + rpmb->dev.parent = &card->dev; + rpmb->dev.release = mmc_blk_rpmb_device_release; + device_initialize(&rpmb->dev); + dev_set_drvdata(&rpmb->dev, rpmb); + rpmb->md = md; + + cdev_init(&rpmb->chrdev, &mmc_rpmb_fileops); + rpmb->chrdev.owner = THIS_MODULE; + ret = cdev_device_add(&rpmb->chrdev, &rpmb->dev); + if (ret) { + pr_err("%s: could not add character device\n", rpmb_name); + goto out_put_device; + } + + list_add(&rpmb->node, &md->rpmbs); + + string_get_size((u64)size, 512, STRING_UNITS_2, + cap_str, sizeof(cap_str)); + + pr_info("%s: %s %s partition %u %s, chardev (%d:%d)\n", + rpmb_name, mmc_card_id(card), + mmc_card_name(card), EXT_CSD_PART_CONFIG_ACC_RPMB, cap_str, + MAJOR(mmc_rpmb_devt), rpmb->id); + + return 0; + +out_put_device: + put_device(&rpmb->dev); + return ret; +} + +static void mmc_blk_remove_rpmb_part(struct mmc_rpmb_data *rpmb) + +{ + cdev_device_del(&rpmb->chrdev, &rpmb->dev); + put_device(&rpmb->dev); +} + /* MMC Physical partitions consist of two boot partitions and * up to four general purpose partitions. * For each partition enabled in EXT_CSD a block device will be allocatedi @@ -4077,13 +4266,26 @@ static int mmc_blk_alloc_part(struct mmc_card *card, static int mmc_blk_alloc_parts(struct mmc_card *card, struct mmc_blk_data *md) { - int idx, ret = 0; + int idx, ret; if (!mmc_card_mmc(card)) return 0; for (idx = 0; idx < card->nr_parts; idx++) { - if (card->part[idx].size) { + if (card->part[idx].area_type & MMC_BLK_DATA_AREA_RPMB) { + /* + * RPMB partitions does not provide block access, they + * are only accessed using ioctl():s. Thus create + * special RPMB block devices that do not have a + * backing block queue for these. + */ + ret = mmc_blk_alloc_rpmb_part(card, md, + card->part[idx].part_cfg, + card->part[idx].size >> 9, + card->part[idx].name); + if (ret) + return ret; + } else if (card->part[idx].size) { ret = mmc_blk_alloc_part(card, md, card->part[idx].part_cfg, card->part[idx].size >> 9, @@ -4095,7 +4297,7 @@ static int mmc_blk_alloc_parts(struct mmc_card *card, struct mmc_blk_data *md) } } - return ret; + return 0; } static void mmc_blk_remove_req(struct mmc_blk_data *md) @@ -4136,7 +4338,15 @@ static void mmc_blk_remove_parts(struct mmc_card *card, { struct list_head *pos, *q; struct mmc_blk_data *part_md; + struct mmc_rpmb_data *rpmb; + /* Remove RPMB partitions */ + list_for_each_safe(pos, q, &md->rpmbs) { + rpmb = list_entry(pos, struct mmc_rpmb_data, node); + list_del(pos); + mmc_blk_remove_rpmb_part(rpmb); + } + /* Remove block partitions */ list_for_each_safe(pos, q, &md->part) { part_md = list_entry(pos, struct mmc_blk_data, part); list_del(pos); @@ -4543,6 +4753,17 @@ static int __init mmc_blk_init(void) { int res; + res = bus_register(&mmc_rpmb_bus_type); + if (res < 0) { + pr_err("mmcblk: could not register RPMB bus type\n"); + return res; + } + res = alloc_chrdev_region(&mmc_rpmb_devt, 0, MAX_DEVICES, "rpmb"); + if (res < 0) { + pr_err("mmcblk: failed to allocate rpmb chrdev region\n"); + goto out_bus_unreg; + } + if (perdev_minors != CONFIG_MMC_BLOCK_MINORS) pr_info("mmcblk: using %d minors per device\n", perdev_minors); @@ -4550,18 +4771,22 @@ static int __init mmc_blk_init(void) res = register_blkdev(MMC_BLOCK_MAJOR, "mmc"); if (res) - goto out; + goto out_chrdev_unreg; res = mmc_register_driver(&mmc_driver); if (res) - goto out2; + goto out_blkdev_unreg; mt_mmc_biolog_init(); return 0; - out2: + +out_blkdev_unreg: unregister_blkdev(MMC_BLOCK_MAJOR, "mmc"); - out: +out_chrdev_unreg: + unregister_chrdev_region(mmc_rpmb_devt, MAX_DEVICES); +out_bus_unreg: + bus_unregister(&mmc_rpmb_bus_type); return res; } @@ -4569,7 +4794,8 @@ static void __exit mmc_blk_exit(void) { mmc_unregister_driver(&mmc_driver); unregister_blkdev(MMC_BLOCK_MAJOR, "mmc"); - mt_mmc_biolog_exit(); + unregister_chrdev_region(mmc_rpmb_devt, MAX_DEVICES); + bus_unregister(&mmc_rpmb_bus_type); } module_init(mmc_blk_init); diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 2e460b1cca53..614f6c1da8b8 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -379,8 +379,6 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) if (mmc_gpio_alloc(host)) { put_device(&host->class_dev); - ida_simple_remove(&mmc_host_ida, host->index); - kfree(host); return NULL; } diff --git a/drivers/mmc/core/queue.h b/drivers/mmc/core/queue.h index ab252a7294f6..4278256bf4a2 100644 --- a/drivers/mmc/core/queue.h +++ b/drivers/mmc/core/queue.h @@ -52,12 +52,14 @@ struct mmc_blk_request { /** * enum mmc_drv_op - enumerates the operations in the mmc_queue_req * @MMC_DRV_OP_IOCTL: ioctl operation + * @MMC_DRV_OP_IOCTL_RPMB: RPMB-oriented ioctl operation * @MMC_DRV_OP_BOOT_WP: write protect boot partitions * @MMC_DRV_OP_GET_CARD_STATUS: get card status * @MMC_DRV_OP_GET_EXT_CSD: get the EXT CSD from an eMMC card */ enum mmc_drv_op { MMC_DRV_OP_IOCTL, + MMC_DRV_OP_IOCTL_RPMB, MMC_DRV_OP_BOOT_WP, MMC_DRV_OP_GET_CARD_STATUS, MMC_DRV_OP_GET_EXT_CSD, @@ -146,8 +148,6 @@ extern unsigned int mmc_queue_map_sg(struct mmc_queue *, #ifdef CONFIG_MTK_EMMC_CQ_SUPPORT extern void mmc_wait_cmdq_empty(struct mmc_host *host); #endif -extern bool mmc_blk_part_cmdq_en(struct mmc_queue *mq); -extern int mmc_access_rpmb(struct mmc_queue *); #ifdef CONFIG_MTK_EMMC_HW_CQ extern int mmc_cmdq_init(struct mmc_queue *mq, struct mmc_card *card); diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index d4c0a107ceda..63694729a797 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -127,7 +127,14 @@ static const struct mmc_fixup mmc_ext_csd_fixups[] = { END_FIXUP }; + static const struct mmc_fixup sdio_fixup_methods[] = { + SDIO_FIXUP(SDIO_VENDOR_ID_TI_WL1251, SDIO_DEVICE_ID_TI_WL1251, + add_quirk, MMC_QUIRK_NONSTD_FUNC_IF), + + SDIO_FIXUP(SDIO_VENDOR_ID_TI_WL1251, SDIO_DEVICE_ID_TI_WL1251, + add_quirk, MMC_QUIRK_DISABLE_CD), + SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271, add_quirk, MMC_QUIRK_NONSTD_FUNC_IF), diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 08a55c2e96e1..53ce1bb83d2c 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -74,9 +75,11 @@ #define CFG_CLK_ALWAYS_ON BIT(18) #define CFG_CHK_DS BIT(20) #define CFG_AUTO_CLK BIT(23) +#define CFG_ERR_ABORT BIT(27) #define SD_EMMC_STATUS 0x48 #define STATUS_BUSY BIT(31) +#define STATUS_DESC_BUSY BIT(30) #define STATUS_DATI GENMASK(23, 16) #define SD_EMMC_IRQ_EN 0x4c @@ -905,6 +908,7 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) cmd_cfg |= FIELD_PREP(CMD_CFG_CMD_INDEX_MASK, cmd->opcode); cmd_cfg |= CMD_CFG_OWNER; /* owned by CPU */ + cmd_cfg |= CMD_CFG_ERROR; /* stop in case of error */ meson_mmc_set_response_bits(cmd, &cmd_cfg); @@ -999,6 +1003,17 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id) u32 irq_en, status, raw_status; irqreturn_t ret = IRQ_NONE; + irq_en = readl(host->regs + SD_EMMC_IRQ_EN); + raw_status = readl(host->regs + SD_EMMC_STATUS); + status = raw_status & irq_en; + + if (!status) { + dev_dbg(host->dev, + "Unexpected IRQ! irq_en 0x%08x - status 0x%08x\n", + irq_en, raw_status); + return IRQ_NONE; + } + if (WARN_ON(!host) || WARN_ON(!host->cmd)) return IRQ_NONE; @@ -1006,22 +1021,18 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id) cmd = host->cmd; data = cmd->data; - irq_en = readl(host->regs + SD_EMMC_IRQ_EN); - raw_status = readl(host->regs + SD_EMMC_STATUS); - status = raw_status & irq_en; - cmd->error = 0; if (status & IRQ_CRC_ERR) { dev_dbg(host->dev, "CRC Error - status 0x%08x\n", status); cmd->error = -EILSEQ; - ret = IRQ_HANDLED; + ret = IRQ_WAKE_THREAD; goto out; } if (status & IRQ_TIMEOUTS) { dev_dbg(host->dev, "Timeout - status 0x%08x\n", status); cmd->error = -ETIMEDOUT; - ret = IRQ_HANDLED; + ret = IRQ_WAKE_THREAD; goto out; } @@ -1046,17 +1057,49 @@ out: /* ack all enabled interrupts */ writel(irq_en, host->regs + SD_EMMC_STATUS); + if (cmd->error) { + /* Stop desc in case of errors */ + u32 start = readl(host->regs + SD_EMMC_START); + + start &= ~START_DESC_BUSY; + writel(start, host->regs + SD_EMMC_START); + } + if (ret == IRQ_HANDLED) meson_mmc_request_done(host->mmc, cmd->mrq); - else if (ret == IRQ_NONE) - dev_warn(host->dev, - "Unexpected IRQ! status=0x%08x, irq_en=0x%08x\n", - raw_status, irq_en); spin_unlock(&host->lock); return ret; } +static int meson_mmc_wait_desc_stop(struct meson_host *host) +{ + int loop; + u32 status; + + /* + * It may sometimes take a while for it to actually halt. Here, we + * are giving it 5ms to comply + * + * If we don't confirm the descriptor is stopped, it might raise new + * IRQs after we have called mmc_request_done() which is bad. + */ + for (loop = 50; loop; loop--) { + status = readl(host->regs + SD_EMMC_STATUS); + if (status & (STATUS_BUSY | STATUS_DESC_BUSY)) + udelay(100); + else + break; + } + + if (status & (STATUS_BUSY | STATUS_DESC_BUSY)) { + dev_err(host->dev, "Timed out waiting for host to stop\n"); + return -ETIMEDOUT; + } + + return 0; +} + static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) { struct meson_host *host = dev_id; @@ -1067,6 +1110,13 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) if (WARN_ON(!cmd)) return IRQ_NONE; + if (cmd->error) { + meson_mmc_wait_desc_stop(host); + meson_mmc_request_done(host->mmc, cmd->mrq); + + return IRQ_HANDLED; + } + data = cmd->data; if (meson_mmc_bounce_buf_read(data)) { xfer_bytes = data->blksz * data->blocks; @@ -1107,6 +1157,9 @@ static void meson_mmc_cfg_init(struct meson_host *host) cfg |= FIELD_PREP(CFG_RC_CC_MASK, ilog2(SD_EMMC_CFG_CMD_GAP)); cfg |= FIELD_PREP(CFG_BLK_LEN_MASK, ilog2(SD_EMMC_CFG_BLK_SIZE)); + /* abort chain on R/W errors */ + cfg |= CFG_ERR_ABORT; + writel(cfg, host->regs + SD_EMMC_CFG); } diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index ea254d00541f..24795454d106 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1154,17 +1154,22 @@ static void mmc_spi_initsequence(struct mmc_spi_host *host) * SPI protocol. Another is that when chipselect is released while * the card returns BUSY status, the clock must issue several cycles * with chipselect high before the card will stop driving its output. + * + * SPI_CS_HIGH means "asserted" here. In some cases like when using + * GPIOs for chip select, SPI_CS_HIGH is set but this will be logically + * inverted by gpiolib, so if we want to ascertain to drive it high + * we should toggle the default with an XOR as we do here. */ - host->spi->mode |= SPI_CS_HIGH; + host->spi->mode ^= SPI_CS_HIGH; if (spi_setup(host->spi) != 0) { /* Just warn; most cards work without it. */ dev_warn(&host->spi->dev, "can't change chip-select polarity\n"); - host->spi->mode &= ~SPI_CS_HIGH; + host->spi->mode ^= SPI_CS_HIGH; } else { mmc_spi_readbytes(host, 18); - host->spi->mode &= ~SPI_CS_HIGH; + host->spi->mode ^= SPI_CS_HIGH; if (spi_setup(host->spi) != 0) { /* Wot, we can't get the same setup we had before? */ dev_err(&host->spi->dev, diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index f2b01d76f834..d9eae86e2042 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -243,6 +243,7 @@ #define MSDC_PB2_RESPWAIT (0x3 << 2) /* RW */ #define MSDC_PB2_RESPSTSENSEL (0x7 << 16) /* RW */ #define MSDC_PB2_CRCSTSENSEL (0x7 << 29) /* RW */ +#define MSDC_PATCH_BIT1_CMDTA (0x7 << 3) /* RW */ #define MSDC_PAD_TUNE_DATWRDLY (0x1f << 0) /* RW */ #define MSDC_PAD_TUNE_DATRRDLY (0x1f << 8) /* RW */ @@ -1148,6 +1149,7 @@ static void msdc_start_command(struct msdc_host *host, WARN_ON(host->cmd); host->cmd = cmd; + mod_delayed_work(system_wq, &host->req_timeout, DAT_TIMEOUT); if (!msdc_cmd_is_ready(host, mrq, cmd)) return; @@ -1159,7 +1161,6 @@ static void msdc_start_command(struct msdc_host *host, cmd->error = 0; rawcmd = msdc_cmd_prepare_raw_cmd(host, mrq, cmd); - mod_delayed_work(system_wq, &host->req_timeout, DAT_TIMEOUT); spin_lock_irqsave(&host->lock, flags); sdr_set_bits(host->base + MSDC_INTEN, cmd_ints_mask); @@ -1987,6 +1988,7 @@ static int hs400_tune_response(struct mmc_host *mmc, u32 opcode) /* select EMMC50 PAD CMD tune */ sdr_set_bits(host->base + PAD_CMD_TUNE, BIT(0)); + sdr_set_field(host->base + MSDC_PATCH_BIT1, MSDC_PATCH_BIT1_CMDTA, 2); if (mmc->ios.timing == MMC_TIMING_MMC_HS200 || mmc->ios.timing == MMC_TIMING_UHS_SDR104) diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c index 3f3ff7530b76..ea12712bd2c3 100644 --- a/drivers/mmc/host/omap_hsmmc.c +++ b/drivers/mmc/host/omap_hsmmc.c @@ -1678,6 +1678,36 @@ static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card) if (mmc_pdata(host)->init_card) mmc_pdata(host)->init_card(card); + else if (card->type == MMC_TYPE_SDIO || + card->type == MMC_TYPE_SD_COMBO) { + struct device_node *np = mmc_dev(mmc)->of_node; + + /* + * REVISIT: should be moved to sdio core and made more + * general e.g. by expanding the DT bindings of child nodes + * to provide a mechanism to provide this information: + * Documentation/devicetree/bindings/mmc/mmc-card.txt + */ + + np = of_get_compatible_child(np, "ti,wl1251"); + if (np) { + /* + * We have TI wl1251 attached to MMC3. Pass this + * information to the SDIO core because it can't be + * probed by normal methods. + */ + + dev_info(host->dev, "found wl1251\n"); + card->quirks |= MMC_QUIRK_NONSTD_SDIO; + card->cccr.wide_bus = 1; + card->cis.vendor = 0x104c; + card->cis.device = 0x9066; + card->cis.blksize = 512; + card->cis.max_dtr = 24000000; + card->ocr = 0x80; + of_node_put(np); + } + } } static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable) diff --git a/drivers/mmc/host/sdhci-brcmstb.c b/drivers/mmc/host/sdhci-brcmstb.c index 552bddc5096c..1cd10356fc14 100644 --- a/drivers/mmc/host/sdhci-brcmstb.c +++ b/drivers/mmc/host/sdhci-brcmstb.c @@ -55,7 +55,9 @@ static int sdhci_brcmstb_probe(struct platform_device *pdev) } sdhci_get_of_property(pdev); - mmc_of_parse(host->mmc); + res = mmc_of_parse(host->mmc); + if (res) + goto err; /* * Supply the existing CAPS, but clear the UHS modes. This diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index 74af8cc4ef36..1dadd460cc8f 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -331,19 +331,22 @@ static int sdhci_at91_probe(struct platform_device *pdev) priv->mainck = devm_clk_get(&pdev->dev, "baseclk"); if (IS_ERR(priv->mainck)) { dev_err(&pdev->dev, "failed to get baseclk\n"); - return PTR_ERR(priv->mainck); + ret = PTR_ERR(priv->mainck); + goto sdhci_pltfm_free; } priv->hclock = devm_clk_get(&pdev->dev, "hclock"); if (IS_ERR(priv->hclock)) { dev_err(&pdev->dev, "failed to get hclock\n"); - return PTR_ERR(priv->hclock); + ret = PTR_ERR(priv->hclock); + goto sdhci_pltfm_free; } priv->gck = devm_clk_get(&pdev->dev, "multclk"); if (IS_ERR(priv->gck)) { dev_err(&pdev->dev, "failed to get multclk\n"); - return PTR_ERR(priv->gck); + ret = PTR_ERR(priv->gck); + goto sdhci_pltfm_free; } ret = sdhci_at91_set_clks_presets(&pdev->dev); @@ -365,7 +368,7 @@ static int sdhci_at91_probe(struct platform_device *pdev) pm_runtime_use_autosuspend(&pdev->dev); /* HS200 is broken at this moment */ - host->quirks2 = SDHCI_QUIRK2_BROKEN_HS200; + host->quirks2 |= SDHCI_QUIRK2_BROKEN_HS200; ret = sdhci_add_host(host); if (ret) diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index 176cbc67d08a..9a1ab39ee35e 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -615,9 +615,6 @@ static void esdhc_reset(struct sdhci_host *host, u8 mask) sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); - if (of_find_compatible_node(NULL, NULL, "fsl,p2020-esdhc")) - mdelay(5); - if (mask & SDHCI_RESET_ALL) { val = sdhci_readl(host, ESDHC_TBCTL); val &= ~ESDHC_TB_EN; @@ -889,8 +886,8 @@ static int sdhci_esdhc_probe(struct platform_device *pdev) host->quirks &= ~SDHCI_QUIRK_NO_BUSY_IRQ; if (of_find_compatible_node(NULL, NULL, "fsl,p2020-esdhc")) { - host->quirks2 |= SDHCI_QUIRK_RESET_AFTER_REQUEST; - host->quirks2 |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; + host->quirks |= SDHCI_QUIRK_RESET_AFTER_REQUEST; + host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL; } if (of_device_is_compatible(np, "fsl,p5040-esdhc") || diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index ce3f344d2b66..d2b0a62bfce1 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -177,7 +177,7 @@ static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask) misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_DDR50; if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR104) misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDR104; - if (soc_data->nvquirks & SDHCI_MISC_CTRL_ENABLE_SDR50) + if (soc_data->nvquirks & NVQUIRK_ENABLE_SDR50) clk_ctrl |= SDHCI_CLOCK_CTRL_SDR50_TUNING_OVERRIDE; } diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 5807028c8309..4f1c884c0b50 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1635,9 +1635,7 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing) ctrl_2 |= SDHCI_CTRL_UHS_SDR104; else if (timing == MMC_TIMING_UHS_SDR12) ctrl_2 |= SDHCI_CTRL_UHS_SDR12; - else if (timing == MMC_TIMING_SD_HS || - timing == MMC_TIMING_MMC_HS || - timing == MMC_TIMING_UHS_SDR25) + else if (timing == MMC_TIMING_UHS_SDR25) ctrl_2 |= SDHCI_CTRL_UHS_SDR25; else if (timing == MMC_TIMING_UHS_SDR50) ctrl_2 |= SDHCI_CTRL_UHS_SDR50; @@ -2164,8 +2162,8 @@ static void __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode) sdhci_send_tuning(host, opcode); if (!host->tuning_done) { - pr_info("%s: Tuning timeout, falling back to fixed sampling clock\n", - mmc_hostname(host->mmc)); + pr_debug("%s: Tuning timeout, falling back to fixed sampling clock\n", + mmc_hostname(host->mmc)); sdhci_abort_tuning(host, opcode); return; } @@ -3594,11 +3592,13 @@ int sdhci_setup_host(struct sdhci_host *host) if (host->ops->get_min_clock) mmc->f_min = host->ops->get_min_clock(host); else if (host->version >= SDHCI_SPEC_300) { - if (host->clk_mul) { - mmc->f_min = (host->max_clk * host->clk_mul) / 1024; + if (host->clk_mul) max_clk = host->max_clk * host->clk_mul; - } else - mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300; + /* + * Divided Clock Mode minimum clock rate is always less than + * Programmable Clock Mode minimum clock rate. + */ + mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_300; } else mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200; diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 2437fcde915a..a09aad9155a5 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -914,8 +914,9 @@ static void tmio_mmc_finish_request(struct tmio_mmc_host *host) if (mrq->cmd->error || (mrq->data && mrq->data->error)) tmio_mmc_abort_dma(host); - if (host->check_scc_error) - host->check_scc_error(host); + /* SCC error means retune, but executed command was still successful */ + if (host->check_scc_error && host->check_scc_error(host)) + mmc_retune_needed(host->mmc); /* If SET_BLOCK_COUNT, continue with main command */ if (host->mrq && !mrq->cmd->error) { @@ -1219,7 +1220,7 @@ int tmio_mmc_host_probe(struct tmio_mmc_host *_host, _host->start_signal_voltage_switch; mmc->ops = &tmio_mmc_ops; - mmc->caps |= MMC_CAP_4_BIT_DATA | pdata->capabilities; + mmc->caps |= MMC_CAP_ERASE | MMC_CAP_4_BIT_DATA | pdata->capabilities; mmc->caps2 |= pdata->capabilities2; mmc->max_segs = pdata->max_segs ? : 32; mmc->max_blk_size = 512; diff --git a/drivers/mtd/devices/spear_smi.c b/drivers/mtd/devices/spear_smi.c index ddf478976013..c2922e129db8 100644 --- a/drivers/mtd/devices/spear_smi.c +++ b/drivers/mtd/devices/spear_smi.c @@ -595,6 +595,26 @@ static int spear_mtd_read(struct mtd_info *mtd, loff_t from, size_t len, return 0; } +/* + * The purpose of this function is to ensure a memcpy_toio() with byte writes + * only. Its structure is inspired from the ARM implementation of _memcpy_toio() + * which also does single byte writes but cannot be used here as this is just an + * implementation detail and not part of the API. Not mentioning the comment + * stating that _memcpy_toio() should be optimized. + */ +static void spear_smi_memcpy_toio_b(volatile void __iomem *dest, + const void *src, size_t len) +{ + const unsigned char *from = src; + + while (len) { + len--; + writeb(*from, dest); + from++; + dest++; + } +} + static inline int spear_smi_cpy_toio(struct spear_smi *dev, u32 bank, void __iomem *dest, const void *src, size_t len) { @@ -617,7 +637,23 @@ static inline int spear_smi_cpy_toio(struct spear_smi *dev, u32 bank, ctrlreg1 = readl(dev->io_base + SMI_CR1); writel((ctrlreg1 | WB_MODE) & ~SW_MODE, dev->io_base + SMI_CR1); - memcpy_toio(dest, src, len); + /* + * In Write Burst mode (WB_MODE), the specs states that writes must be: + * - incremental + * - of the same size + * The ARM implementation of memcpy_toio() will optimize the number of + * I/O by using as much 4-byte writes as possible, surrounded by + * 2-byte/1-byte access if: + * - the destination is not 4-byte aligned + * - the length is not a multiple of 4-byte. + * Avoid this alternance of write access size by using our own 'byte + * access' helper if at least one of the two conditions above is true. + */ + if (IS_ALIGNED(len, sizeof(u32)) && + IS_ALIGNED((uintptr_t)dest, sizeof(u32))) + memcpy_toio(dest, src, len); + else + spear_smi_memcpy_toio_b(dest, src, len); writel(ctrlreg1, dev->io_base + SMI_CR1); diff --git a/drivers/mtd/maps/physmap_of_core.c b/drivers/mtd/maps/physmap_of_core.c index b1bd4faecfb2..5d8399742c75 100644 --- a/drivers/mtd/maps/physmap_of_core.c +++ b/drivers/mtd/maps/physmap_of_core.c @@ -30,7 +30,6 @@ struct of_flash_list { struct mtd_info *mtd; struct map_info map; - struct resource *res; }; struct of_flash { @@ -55,18 +54,10 @@ static int of_flash_remove(struct platform_device *dev) mtd_concat_destroy(info->cmtd); } - for (i = 0; i < info->list_size; i++) { + for (i = 0; i < info->list_size; i++) if (info->list[i].mtd) map_destroy(info->list[i].mtd); - if (info->list[i].map.virt) - iounmap(info->list[i].map.virt); - - if (info->list[i].res) { - release_resource(info->list[i].res); - kfree(info->list[i].res); - } - } return 0; } @@ -214,10 +205,11 @@ static int of_flash_probe(struct platform_device *dev) err = -EBUSY; res_size = resource_size(&res); - info->list[i].res = request_mem_region(res.start, res_size, - dev_name(&dev->dev)); - if (!info->list[i].res) + info->list[i].map.virt = devm_ioremap_resource(&dev->dev, &res); + if (IS_ERR(info->list[i].map.virt)) { + err = PTR_ERR(info->list[i].map.virt); goto err_out; + } err = -ENXIO; width = of_get_property(dp, "bank-width", NULL); @@ -240,15 +232,6 @@ static int of_flash_probe(struct platform_device *dev) if (err) goto err_out; - err = -ENOMEM; - info->list[i].map.virt = ioremap(info->list[i].map.phys, - info->list[i].map.size); - if (!info->list[i].map.virt) { - dev_err(&dev->dev, "Failed to ioremap() flash" - " region\n"); - goto err_out; - } - simple_map_init(&info->list[i].map); /* diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h index 37accfd0400e..24480b75a88d 100644 --- a/drivers/mtd/mtdcore.h +++ b/drivers/mtd/mtdcore.h @@ -7,7 +7,7 @@ extern struct mutex mtd_table_mutex; struct mtd_info *__mtd_next_device(int i); -int add_mtd_device(struct mtd_info *mtd); +int __must_check add_mtd_device(struct mtd_info *mtd); int del_mtd_device(struct mtd_info *mtd); int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int); int del_mtd_partitions(struct mtd_info *); diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index a308e707392d..45626b0eed64 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -684,10 +684,21 @@ int mtd_add_partition(struct mtd_info *parent, const char *name, list_add(&new->list, &mtd_partitions); mutex_unlock(&mtd_partitions_mutex); - add_mtd_device(&new->mtd); + ret = add_mtd_device(&new->mtd); + if (ret) + goto err_remove_part; mtd_add_partition_attrs(new); + return 0; + +err_remove_part: + mutex_lock(&mtd_partitions_mutex); + list_del(&new->list); + mutex_unlock(&mtd_partitions_mutex); + + free_partition(new); + return ret; } EXPORT_SYMBOL_GPL(mtd_add_partition); @@ -778,22 +789,31 @@ int add_mtd_partitions(struct mtd_info *master, { struct mtd_part *slave; uint64_t cur_offset = 0; - int i; + int i, ret; printk(KERN_NOTICE "Creating %d MTD partitions on \"%s\":\n", nbparts, master->name); for (i = 0; i < nbparts; i++) { slave = allocate_partition(master, parts + i, i, cur_offset); if (IS_ERR(slave)) { - del_mtd_partitions(master); - return PTR_ERR(slave); + ret = PTR_ERR(slave); + goto err_del_partitions; } mutex_lock(&mtd_partitions_mutex); list_add(&slave->list, &mtd_partitions); mutex_unlock(&mtd_partitions_mutex); - add_mtd_device(&slave->mtd); + ret = add_mtd_device(&slave->mtd); + if (ret) { + mutex_lock(&mtd_partitions_mutex); + list_del(&slave->list); + mutex_unlock(&mtd_partitions_mutex); + + free_partition(slave); + goto err_del_partitions; + } + mtd_add_partition_attrs(slave); if (parts[i].types) mtd_parse_part(slave, parts[i].types); @@ -802,6 +822,11 @@ int add_mtd_partitions(struct mtd_info *master, } return 0; + +err_del_partitions: + del_mtd_partitions(master); + + return ret; } static DEFINE_SPINLOCK(part_parser_lock); diff --git a/drivers/mtd/nand/atmel/nand-controller.c b/drivers/mtd/nand/atmel/nand-controller.c index 0b93f152d993..d5a493e8ee08 100644 --- a/drivers/mtd/nand/atmel/nand-controller.c +++ b/drivers/mtd/nand/atmel/nand-controller.c @@ -1888,7 +1888,7 @@ static int atmel_nand_controller_add_nands(struct atmel_nand_controller *nc) ret = of_property_read_u32(np, "#size-cells", &val); if (ret) { - dev_err(dev, "missing #address-cells property\n"); + dev_err(dev, "missing #size-cells property\n"); return ret; } diff --git a/drivers/mtd/nand/atmel/pmecc.c b/drivers/mtd/nand/atmel/pmecc.c index 4124bf91bee6..8cd153974e8d 100644 --- a/drivers/mtd/nand/atmel/pmecc.c +++ b/drivers/mtd/nand/atmel/pmecc.c @@ -875,23 +875,32 @@ static struct atmel_pmecc *atmel_pmecc_get_by_node(struct device *userdev, { struct platform_device *pdev; struct atmel_pmecc *pmecc, **ptr; + int ret; pdev = of_find_device_by_node(np); - if (!pdev || !platform_get_drvdata(pdev)) + if (!pdev) return ERR_PTR(-EPROBE_DEFER); + pmecc = platform_get_drvdata(pdev); + if (!pmecc) { + ret = -EPROBE_DEFER; + goto err_put_device; + } ptr = devres_alloc(devm_atmel_pmecc_put, sizeof(*ptr), GFP_KERNEL); - if (!ptr) - return ERR_PTR(-ENOMEM); - - get_device(&pdev->dev); - pmecc = platform_get_drvdata(pdev); + if (!ptr) { + ret = -ENOMEM; + goto err_put_device; + } *ptr = pmecc; devres_add(userdev, ptr); return pmecc; + +err_put_device: + put_device(&pdev->dev); + return ERR_PTR(ret); } static const int atmel_pmecc_strengths[] = { 2, 4, 8, 12, 24, 32 }; diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c index e7f3c98487e6..43db80e5d994 100644 --- a/drivers/mtd/nand/sh_flctl.c +++ b/drivers/mtd/nand/sh_flctl.c @@ -480,7 +480,7 @@ static void read_fiforeg(struct sh_flctl *flctl, int rlen, int offset) /* initiate DMA transfer */ if (flctl->chan_fifo0_rx && rlen >= 32 && - flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_DEV_TO_MEM) > 0) + flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_FROM_DEVICE) > 0) goto convert; /* DMA success */ /* do polling transfer */ @@ -539,7 +539,7 @@ static void write_ec_fiforeg(struct sh_flctl *flctl, int rlen, /* initiate DMA transfer */ if (flctl->chan_fifo0_tx && rlen >= 32 && - flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_MEM_TO_DEV) > 0) + flctl_dma_fifo0_transfer(flctl, buf, rlen, DMA_TO_DEVICE) > 0) return; /* DMA success */ /* do polling transfer */ diff --git a/drivers/mtd/nand/sunxi_nand.c b/drivers/mtd/nand/sunxi_nand.c index 958974821582..8e5231482397 100644 --- a/drivers/mtd/nand/sunxi_nand.c +++ b/drivers/mtd/nand/sunxi_nand.c @@ -1435,7 +1435,7 @@ static int sunxi_nfc_hw_ecc_write_page_dma(struct mtd_info *mtd, sunxi_nfc_randomizer_enable(mtd); writel((NAND_CMD_RNDIN << 8) | NAND_CMD_PAGEPROG, - nfc->regs + NFC_REG_RCMD_SET); + nfc->regs + NFC_REG_WCMD_SET); dma_async_issue_pending(nfc->dmac); diff --git a/drivers/mtd/spi-nor/cadence-quadspi.c b/drivers/mtd/spi-nor/cadence-quadspi.c index f22dd34f4f83..ff4edf4bb23c 100644 --- a/drivers/mtd/spi-nor/cadence-quadspi.c +++ b/drivers/mtd/spi-nor/cadence-quadspi.c @@ -38,6 +38,9 @@ #define CQSPI_NAME "cadence-qspi" #define CQSPI_MAX_CHIPSELECT 16 +/* Quirks */ +#define CQSPI_NEEDS_WR_DELAY BIT(0) + struct cqspi_st; struct cqspi_flash_pdata { @@ -76,6 +79,7 @@ struct cqspi_st { u32 fifo_depth; u32 fifo_width; u32 trigger_address; + u32 wr_delay; struct cqspi_flash_pdata f_pdata[CQSPI_MAX_CHIPSELECT]; }; @@ -623,6 +627,15 @@ static int cqspi_indirect_write_execute(struct spi_nor *nor, reinit_completion(&cqspi->transfer_complete); writel(CQSPI_REG_INDIRECTWR_START_MASK, reg_base + CQSPI_REG_INDIRECTWR); + /* + * As per 66AK2G02 TRM SPRUHY8F section 11.15.5.3 Indirect Access + * Controller programming sequence, couple of cycles of + * QSPI_REF_CLK delay is required for the above bit to + * be internally synchronized by the QSPI module. Provide 5 + * cycles of delay. + */ + if (cqspi->wr_delay) + ndelay(cqspi->wr_delay); while (remaining > 0) { size_t write_words, mod_bytes; @@ -1184,6 +1197,7 @@ static int cqspi_probe(struct platform_device *pdev) struct cqspi_st *cqspi; struct resource *res; struct resource *res_ahb; + unsigned long data; int ret; int irq; @@ -1241,6 +1255,10 @@ static int cqspi_probe(struct platform_device *pdev) } cqspi->master_ref_clk_hz = clk_get_rate(cqspi->clk); + data = (unsigned long)of_device_get_match_data(dev); + if (data & CQSPI_NEEDS_WR_DELAY) + cqspi->wr_delay = 5 * DIV_ROUND_UP(NSEC_PER_SEC, + cqspi->master_ref_clk_hz); ret = devm_request_irq(dev, irq, cqspi_irq_handler, 0, pdev->name, cqspi); @@ -1312,7 +1330,14 @@ static const struct dev_pm_ops cqspi__dev_pm_ops = { #endif static const struct of_device_id cqspi_dt_ids[] = { - {.compatible = "cdns,qspi-nor",}, + { + .compatible = "cdns,qspi-nor", + .data = (void *)0, + }, + { + .compatible = "ti,k2g-qspi", + .data = (void *)CQSPI_NEEDS_WR_DELAY, + }, { /* end of table */ } }; diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 34ecc12ee3d9..0fe3e39f870f 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -1030,7 +1030,7 @@ static const struct flash_info spi_nor_ids[] = { { "mx25l25635e", INFO(0xc22019, 0, 64 * 1024, 512, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, { "mx25u25635f", INFO(0xc22539, 0, 64 * 1024, 512, SECT_4K | SPI_NOR_4B_OPCODES) }, { "mx25l25655e", INFO(0xc22619, 0, 64 * 1024, 512, 0) }, - { "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, + { "mx66l51235l", INFO(0xc2201a, 0, 64 * 1024, 1024, SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) }, { "mx66u51235f", INFO(0xc2253a, 0, 64 * 1024, 1024, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ | SPI_NOR_4B_OPCODES) }, { "mx66l1g45g", INFO(0xc2201b, 0, 64 * 1024, 2048, SECT_4K | SPI_NOR_DUAL_READ | SPI_NOR_QUAD_READ) }, { "mx66l1g55g", INFO(0xc2261b, 0, 64 * 1024, 2048, SPI_NOR_QUAD_READ) }, @@ -1216,7 +1216,7 @@ static int spi_nor_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf) { struct spi_nor *nor = mtd_to_spi_nor(mtd); - int ret; + ssize_t ret; dev_dbg(nor->dev, "from 0x%08x, len %zd\n", (u32)from, len); @@ -1445,7 +1445,7 @@ static int macronix_quad_enable(struct spi_nor *nor) */ static int write_sr_cr(struct spi_nor *nor, u8 *sr_cr) { - int ret; + ssize_t ret; write_enable(nor); @@ -2382,7 +2382,7 @@ static int spi_nor_init_params(struct spi_nor *nor, memset(params, 0, sizeof(*params)); /* Set SPI NOR sizes. */ - params->size = info->sector_size * info->n_sectors; + params->size = (u64)info->sector_size * info->n_sectors; params->page_size = info->page_size; /* (Fast) Read settings. */ diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 34b53d070a55..e94a63881590 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -1092,10 +1092,10 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway) ubi_wl_close(ubi); ubi_free_internal_volumes(ubi); vfree(ubi->vtbl); - put_mtd_device(ubi->mtd); vfree(ubi->peb_buf); vfree(ubi->fm_buf); ubi_msg(ubi, "mtd%d is detached", ubi->mtd->index); + put_mtd_device(ubi->mtd); put_device(&ubi->dev); return 0; } diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c index 63e8527f7b65..18aba1cf8acc 100644 --- a/drivers/mtd/ubi/fastmap.c +++ b/drivers/mtd/ubi/fastmap.c @@ -73,7 +73,7 @@ static int self_check_seen(struct ubi_device *ubi, unsigned long *seen) return 0; for (pnum = 0; pnum < ubi->peb_count; pnum++) { - if (test_bit(pnum, seen) && ubi->lookuptbl[pnum]) { + if (!test_bit(pnum, seen) && ubi->lookuptbl[pnum]) { ubi_err(ubi, "self-check failed for PEB %d, fastmap didn't see it", pnum); ret = -EINVAL; } @@ -1147,7 +1147,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, struct rb_node *tmp_rb; int ret, i, j, free_peb_count, used_peb_count, vol_count; int scrub_peb_count, erase_peb_count; - unsigned long *seen_pebs = NULL; + unsigned long *seen_pebs; fm_raw = ubi->fm_buf; memset(ubi->fm_buf, 0, ubi->fm_size); @@ -1161,7 +1161,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, dvbuf = new_fm_vbuf(ubi, UBI_FM_DATA_VOLUME_ID); if (!dvbuf) { ret = -ENOMEM; - goto out_kfree; + goto out_free_avbuf; } avhdr = ubi_get_vid_hdr(avbuf); @@ -1170,7 +1170,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, seen_pebs = init_seen(ubi); if (IS_ERR(seen_pebs)) { ret = PTR_ERR(seen_pebs); - goto out_kfree; + goto out_free_dvbuf; } spin_lock(&ubi->volumes_lock); @@ -1338,7 +1338,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, ret = ubi_io_write_vid_hdr(ubi, new_fm->e[0]->pnum, avbuf); if (ret) { ubi_err(ubi, "unable to write vid_hdr to fastmap SB!"); - goto out_kfree; + goto out_free_seen; } for (i = 0; i < new_fm->used_blocks; i++) { @@ -1360,7 +1360,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, if (ret) { ubi_err(ubi, "unable to write vid_hdr to PEB %i!", new_fm->e[i]->pnum); - goto out_kfree; + goto out_free_seen; } } @@ -1370,7 +1370,7 @@ static int ubi_write_fastmap(struct ubi_device *ubi, if (ret) { ubi_err(ubi, "unable to write fastmap to PEB %i!", new_fm->e[i]->pnum); - goto out_kfree; + goto out_free_seen; } } @@ -1380,10 +1380,13 @@ static int ubi_write_fastmap(struct ubi_device *ubi, ret = self_check_seen(ubi, seen_pebs); dbg_bld("fastmap written!"); -out_kfree: - ubi_free_vid_buf(avbuf); - ubi_free_vid_buf(dvbuf); +out_free_seen: free_seen(seen_pebs); +out_free_dvbuf: + ubi_free_vid_buf(dvbuf); +out_free_avbuf: + ubi_free_vid_buf(avbuf); + out: return ret; } diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index d4b2e8744498..c2cf6bd3c162 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -227,9 +227,9 @@ out_unlock: out_free: kfree(desc); out_put_ubi: - ubi_put_device(ubi); ubi_err(ubi, "cannot open device %d, volume %d, error %d", ubi_num, vol_id, err); + ubi_put_device(ubi); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(ubi_open_volume); diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 60666db31886..0b79ddec15b7 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -71,11 +71,6 @@ struct arp_pkt { }; #pragma pack() -static inline struct arp_pkt *arp_pkt(const struct sk_buff *skb) -{ - return (struct arp_pkt *)skb_network_header(skb); -} - /* Forward declaration */ static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[], bool strict_match); @@ -574,10 +569,11 @@ static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip) spin_unlock(&bond->mode_lock); } -static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bond) +static struct slave *rlb_choose_channel(struct sk_buff *skb, + struct bonding *bond, + const struct arp_pkt *arp) { struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond)); - struct arp_pkt *arp = arp_pkt(skb); struct slave *assigned_slave, *curr_active_slave; struct rlb_client_info *client_info; u32 hash_index = 0; @@ -674,8 +670,12 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon */ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) { - struct arp_pkt *arp = arp_pkt(skb); struct slave *tx_slave = NULL; + struct arp_pkt *arp; + + if (!pskb_network_may_pull(skb, sizeof(*arp))) + return NULL; + arp = (struct arp_pkt *)skb_network_header(skb); /* Don't modify or load balance ARPs that do not originate locally * (e.g.,arrive via a bridge). @@ -685,7 +685,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) if (arp->op_code == htons(ARPOP_REPLY)) { /* the arp must be sent on the selected rx channel */ - tx_slave = rlb_choose_channel(skb, bond); + tx_slave = rlb_choose_channel(skb, bond, arp); if (tx_slave) bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr, tx_slave->dev->addr_len); @@ -696,7 +696,7 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond) * When the arp reply is received the entry will be updated * with the correct unicast address of the client. */ - rlb_choose_channel(skb, bond); + rlb_choose_channel(skb, bond, arp); /* The ARP reply packets must be delayed so that * they can cancel out the influence of the ARP request. @@ -1403,26 +1403,31 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) bool do_tx_balance = true; u32 hash_index = 0; const u8 *hash_start = NULL; - struct ipv6hdr *ip6hdr; skb_reset_mac_header(skb); eth_data = eth_hdr(skb); switch (ntohs(skb->protocol)) { case ETH_P_IP: { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph; if (ether_addr_equal_64bits(eth_data->h_dest, mac_bcast) || - (iph->daddr == ip_bcast) || - (iph->protocol == IPPROTO_IGMP)) { + (!pskb_network_may_pull(skb, sizeof(*iph)))) { + do_tx_balance = false; + break; + } + iph = ip_hdr(skb); + if (iph->daddr == ip_bcast || iph->protocol == IPPROTO_IGMP) { do_tx_balance = false; break; } hash_start = (char *)&(iph->daddr); hash_size = sizeof(iph->daddr); - } break; - case ETH_P_IPV6: + } + case ETH_P_IPV6: { + const struct ipv6hdr *ip6hdr; + /* IPv6 doesn't really use broadcast mac address, but leave * that here just in case. */ @@ -1439,7 +1444,11 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } - /* Additianally, DAD probes should not be tx-balanced as that + if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) { + do_tx_balance = false; + break; + } + /* Additionally, DAD probes should not be tx-balanced as that * will lead to false positives for duplicate addresses and * prevent address configuration from working. */ @@ -1449,17 +1458,26 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } - hash_start = (char *)&(ipv6_hdr(skb)->daddr); - hash_size = sizeof(ipv6_hdr(skb)->daddr); + hash_start = (char *)&ip6hdr->daddr; + hash_size = sizeof(ip6hdr->daddr); break; - case ETH_P_IPX: - if (ipx_hdr(skb)->ipx_checksum != IPX_NO_CHECKSUM) { + } + case ETH_P_IPX: { + const struct ipxhdr *ipxhdr; + + if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) { + do_tx_balance = false; + break; + } + ipxhdr = (struct ipxhdr *)skb_network_header(skb); + + if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) { /* something is wrong with this packet */ do_tx_balance = false; break; } - if (ipx_hdr(skb)->ipx_type != IPX_TYPE_NCP) { + if (ipxhdr->ipx_type != IPX_TYPE_NCP) { /* The only protocol worth balancing in * this family since it has an "ARP" like * mechanism @@ -1468,9 +1486,11 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) break; } + eth_data = eth_hdr(skb); hash_start = (char *)eth_data->h_dest; hash_size = ETH_ALEN; break; + } case ETH_P_ARP: do_tx_balance = false; if (bond_info->rlb_enabled) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index c1eeba1906fd..fef599eb822b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1779,7 +1779,8 @@ err_hwaddr_unsync: bond_hw_addr_flush(bond_dev, slave_dev); err_close: - slave_dev->priv_flags &= ~IFF_BONDING; + if (!netif_is_bond_master(slave_dev)) + slave_dev->priv_flags &= ~IFF_BONDING; dev_close(slave_dev); err_restore_mac: @@ -1985,7 +1986,8 @@ static int __bond_release_one(struct net_device *bond_dev, else dev_set_mtu(slave_dev, slave->original_mtu); - slave_dev->priv_flags &= ~IFF_BONDING; + if (!netif_is_bond_master(slave_dev)) + slave_dev->priv_flags &= ~IFF_BONDING; bond_free_slave(slave); @@ -2055,8 +2057,7 @@ static int bond_miimon_inspect(struct bonding *bond) ignore_updelay = !rcu_dereference(bond->curr_active_slave); bond_for_each_slave_rcu(bond, slave, iter) { - slave->new_link = BOND_LINK_NOCHANGE; - slave->link_new_state = slave->link; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); link_state = bond_check_dev_link(bond, slave->dev, 0); @@ -2092,7 +2093,7 @@ static int bond_miimon_inspect(struct bonding *bond) } if (slave->delay <= 0) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; continue; } @@ -2131,7 +2132,7 @@ static int bond_miimon_inspect(struct bonding *bond) slave->delay = 0; if (slave->delay <= 0) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); commit++; ignore_updelay = false; continue; @@ -2151,7 +2152,7 @@ static void bond_miimon_commit(struct bonding *bond) struct slave *slave, *primary; bond_for_each_slave(bond, slave, iter) { - switch (slave->new_link) { + switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: /* For 802.3ad mode, check current slave speed and * duplex again in case its port was disabled after @@ -2185,9 +2186,6 @@ static void bond_miimon_commit(struct bonding *bond) } else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { /* make it immediately active */ bond_set_active_slave(slave); - } else if (slave != primary) { - /* prevent it from being the active one */ - bond_set_backup_slave(slave); } netdev_info(bond->dev, "link status definitely up for interface %s, %u Mbps %s duplex\n", @@ -2244,8 +2242,8 @@ static void bond_miimon_commit(struct bonding *bond) default: netdev_err(bond->dev, "invalid new link %d on slave %s\n", - slave->new_link, slave->dev->name); - slave->new_link = BOND_LINK_NOCHANGE; + slave->link_new_state, slave->dev->name); + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); continue; } @@ -2644,13 +2642,13 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) bond_for_each_slave_rcu(bond, slave, iter) { unsigned long trans_start = dev_trans_start(slave->dev); - slave->new_link = BOND_LINK_NOCHANGE; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); if (slave->link != BOND_LINK_UP) { if (bond_time_in_interval(bond, trans_start, 1) && bond_time_in_interval(bond, slave->last_rx, 1)) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); slave_state_changed = 1; /* primary_slave has no meaning in round-robin @@ -2677,7 +2675,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) if (!bond_time_in_interval(bond, trans_start, 2) || !bond_time_in_interval(bond, slave->last_rx, 2)) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); slave_state_changed = 1; if (slave->link_failure_count < UINT_MAX) @@ -2709,8 +2707,8 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) goto re_arm; bond_for_each_slave(bond, slave, iter) { - if (slave->new_link != BOND_LINK_NOCHANGE) - slave->link = slave->new_link; + if (slave->link_new_state != BOND_LINK_NOCHANGE) + slave->link = slave->link_new_state; } if (slave_state_changed) { @@ -2733,9 +2731,9 @@ re_arm: } /* Called to inspect slaves for active-backup mode ARP monitor link state - * changes. Sets new_link in slaves to specify what action should take - * place for the slave. Returns 0 if no changes are found, >0 if changes - * to link states must be committed. + * changes. Sets proposed link state in slaves to specify what action + * should take place for the slave. Returns 0 if no changes are found, >0 + * if changes to link states must be committed. * * Called with rcu_read_lock held. */ @@ -2747,12 +2745,12 @@ static int bond_ab_arp_inspect(struct bonding *bond) int commit = 0; bond_for_each_slave_rcu(bond, slave, iter) { - slave->new_link = BOND_LINK_NOCHANGE; + bond_propose_link_state(slave, BOND_LINK_NOCHANGE); last_rx = slave_last_rx(bond, slave); if (slave->link != BOND_LINK_UP) { if (bond_time_in_interval(bond, last_rx, 1)) { - slave->new_link = BOND_LINK_UP; + bond_propose_link_state(slave, BOND_LINK_UP); commit++; } continue; @@ -2780,7 +2778,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) if (!bond_is_active_slave(slave) && !rcu_access_pointer(bond->current_arp_slave) && !bond_time_in_interval(bond, last_rx, 3)) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; } @@ -2793,7 +2791,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) if (bond_is_active_slave(slave) && (!bond_time_in_interval(bond, trans_start, 2) || !bond_time_in_interval(bond, last_rx, 2))) { - slave->new_link = BOND_LINK_DOWN; + bond_propose_link_state(slave, BOND_LINK_DOWN); commit++; } } @@ -2813,7 +2811,7 @@ static void bond_ab_arp_commit(struct bonding *bond) struct slave *slave; bond_for_each_slave(bond, slave, iter) { - switch (slave->new_link) { + switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: continue; @@ -2866,7 +2864,7 @@ static void bond_ab_arp_commit(struct bonding *bond) default: netdev_err(bond->dev, "impossible: new_link %d on slave %s\n", - slave->new_link, slave->dev->name); + slave->link_new_state, slave->dev->name); continue; } @@ -3992,7 +3990,7 @@ out: * this to-be-skipped slave to send a packet out. */ old_arr = rtnl_dereference(bond->slave_arr); - for (idx = 0; idx < old_arr->count; idx++) { + for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) { if (skipslave == old_arr->arr[idx]) { old_arr->arr[idx] = old_arr->arr[old_arr->count-1]; diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 606b7d8ffe13..24c6015f6c92 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -52,6 +52,7 @@ #define CONTROL_EX_PDR BIT(8) /* control register */ +#define CONTROL_SWR BIT(15) #define CONTROL_TEST BIT(7) #define CONTROL_CCE BIT(6) #define CONTROL_DISABLE_AR BIT(5) @@ -97,6 +98,9 @@ #define BTR_TSEG2_SHIFT 12 #define BTR_TSEG2_MASK (0x7 << BTR_TSEG2_SHIFT) +/* interrupt register */ +#define INT_STS_PENDING 0x8000 + /* brp extension register */ #define BRP_EXT_BRPE_MASK 0x0f #define BRP_EXT_BRPE_SHIFT 0 @@ -569,6 +573,26 @@ static void c_can_configure_msg_objects(struct net_device *dev) IF_MCONT_RCV_EOB); } +static int c_can_software_reset(struct net_device *dev) +{ + struct c_can_priv *priv = netdev_priv(dev); + int retry = 0; + + if (priv->type != BOSCH_D_CAN) + return 0; + + priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_SWR | CONTROL_INIT); + while (priv->read_reg(priv, C_CAN_CTRL_REG) & CONTROL_SWR) { + msleep(20); + if (retry++ > 100) { + netdev_err(dev, "CCTRL: software reset failed\n"); + return -EIO; + } + } + + return 0; +} + /* * Configure C_CAN chip: * - enable/disable auto-retransmission @@ -578,6 +602,11 @@ static void c_can_configure_msg_objects(struct net_device *dev) static int c_can_chip_config(struct net_device *dev) { struct c_can_priv *priv = netdev_priv(dev); + int err; + + err = c_can_software_reset(dev); + if (err) + return err; /* enable automatic retransmission */ priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_ENABLE_AR); @@ -1029,10 +1058,16 @@ static int c_can_poll(struct napi_struct *napi, int quota) u16 curr, last = priv->last_status; int work_done = 0; - priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG); - /* Ack status on C_CAN. D_CAN is self clearing */ - if (priv->type != BOSCH_D_CAN) - priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED); + /* Only read the status register if a status interrupt was pending */ + if (atomic_xchg(&priv->sie_pending, 0)) { + priv->last_status = curr = priv->read_reg(priv, C_CAN_STS_REG); + /* Ack status on C_CAN. D_CAN is self clearing */ + if (priv->type != BOSCH_D_CAN) + priv->write_reg(priv, C_CAN_STS_REG, LEC_UNUSED); + } else { + /* no change detected ... */ + curr = last; + } /* handle state changes */ if ((curr & STATUS_EWARN) && (!(last & STATUS_EWARN))) { @@ -1083,10 +1118,16 @@ static irqreturn_t c_can_isr(int irq, void *dev_id) { struct net_device *dev = (struct net_device *)dev_id; struct c_can_priv *priv = netdev_priv(dev); + int reg_int; - if (!priv->read_reg(priv, C_CAN_INT_REG)) + reg_int = priv->read_reg(priv, C_CAN_INT_REG); + if (!reg_int) return IRQ_NONE; + /* save for later use */ + if (reg_int & INT_STS_PENDING) + atomic_set(&priv->sie_pending, 1); + /* disable all interrupts and schedule the NAPI */ c_can_irq_control(priv, false); napi_schedule(&priv->napi); diff --git a/drivers/net/can/c_can/c_can.h b/drivers/net/can/c_can/c_can.h index 8acdc7fa4792..d5567a7c1c6d 100644 --- a/drivers/net/can/c_can/c_can.h +++ b/drivers/net/can/c_can/c_can.h @@ -198,6 +198,7 @@ struct c_can_priv { struct net_device *dev; struct device *device; atomic_t tx_active; + atomic_t sie_pending; unsigned long tx_dir; int last_status; u16 (*read_reg) (const struct c_can_priv *priv, enum reg index); diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index d92113db4fb9..05ad5ed145a3 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -867,6 +867,7 @@ static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { = { .len = sizeof(struct can_bittiming) }, [IFLA_CAN_DATA_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, + [IFLA_CAN_TERMINATION] = { .type = NLA_U16 }, }; static int can_validate(struct nlattr *tb[], struct nlattr *data[], diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 7280f3a8aa04..84dd79041285 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1018,6 +1018,7 @@ static int flexcan_chip_start(struct net_device *dev) reg_mecr = flexcan_read(®s->mecr); reg_mecr &= ~FLEXCAN_MECR_ECRWRDIS; flexcan_write(reg_mecr, ®s->mecr); + reg_mecr |= FLEXCAN_MECR_ECCDIS; reg_mecr &= ~(FLEXCAN_MECR_NCEFAFRZ | FLEXCAN_MECR_HANCEI_MSK | FLEXCAN_MECR_FANCEI_MSK); flexcan_write(reg_mecr, ®s->mecr); diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c index acb708fc1463..0a7d818a06f3 100644 --- a/drivers/net/can/mscan/mscan.c +++ b/drivers/net/can/mscan/mscan.c @@ -392,13 +392,12 @@ static int mscan_rx_poll(struct napi_struct *napi, int quota) struct net_device *dev = napi->dev; struct mscan_regs __iomem *regs = priv->reg_base; struct net_device_stats *stats = &dev->stats; - int npackets = 0; - int ret = 1; + int work_done = 0; struct sk_buff *skb; struct can_frame *frame; u8 canrflg; - while (npackets < quota) { + while (work_done < quota) { canrflg = in_8(®s->canrflg); if (!(canrflg & (MSCAN_RXF | MSCAN_ERR_IF))) break; @@ -419,18 +418,18 @@ static int mscan_rx_poll(struct napi_struct *napi, int quota) stats->rx_packets++; stats->rx_bytes += frame->can_dlc; - npackets++; + work_done++; netif_receive_skb(skb); } - if (!(in_8(®s->canrflg) & (MSCAN_RXF | MSCAN_ERR_IF))) { - napi_complete(&priv->napi); - clear_bit(F_RX_PROGRESS, &priv->flags); - if (priv->can.state < CAN_STATE_BUS_OFF) - out_8(®s->canrier, priv->shadow_canrier); - ret = 0; + if (work_done < quota) { + if (likely(napi_complete_done(&priv->napi, work_done))) { + clear_bit(F_RX_PROGRESS, &priv->flags); + if (priv->can.state < CAN_STATE_BUS_OFF) + out_8(®s->canrier, priv->shadow_canrier); + } } - return ret; + return work_done; } static irqreturn_t mscan_isr(int irq, void *dev_id) diff --git a/drivers/net/can/rx-offload.c b/drivers/net/can/rx-offload.c index d227db45fec9..54ffd1e91a69 100644 --- a/drivers/net/can/rx-offload.c +++ b/drivers/net/can/rx-offload.c @@ -116,37 +116,95 @@ static int can_rx_offload_compare(struct sk_buff *a, struct sk_buff *b) return cb_b->timestamp - cb_a->timestamp; } -static struct sk_buff *can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n) +/** + * can_rx_offload_offload_one() - Read one CAN frame from HW + * @offload: pointer to rx_offload context + * @n: number of mailbox to read + * + * The task of this function is to read a CAN frame from mailbox @n + * from the device and return the mailbox's content as a struct + * sk_buff. + * + * If the struct can_rx_offload::skb_queue exceeds the maximal queue + * length (struct can_rx_offload::skb_queue_len_max) or no skb can be + * allocated, the mailbox contents is discarded by reading it into an + * overflow buffer. This way the mailbox is marked as free by the + * driver. + * + * Return: A pointer to skb containing the CAN frame on success. + * + * NULL if the mailbox @n is empty. + * + * ERR_PTR() in case of an error + */ +static struct sk_buff * +can_rx_offload_offload_one(struct can_rx_offload *offload, unsigned int n) { - struct sk_buff *skb = NULL; + struct sk_buff *skb = NULL, *skb_error = NULL; struct can_rx_offload_cb *cb; struct can_frame *cf; int ret; - /* If queue is full or skb not available, read to discard mailbox */ - if (likely(skb_queue_len(&offload->skb_queue) <= - offload->skb_queue_len_max)) + if (likely(skb_queue_len(&offload->skb_queue) < + offload->skb_queue_len_max)) { skb = alloc_can_skb(offload->dev, &cf); + if (unlikely(!skb)) + skb_error = ERR_PTR(-ENOMEM); /* skb alloc failed */ + } else { + skb_error = ERR_PTR(-ENOBUFS); /* skb_queue is full */ + } - if (!skb) { + /* If queue is full or skb not available, drop by reading into + * overflow buffer. + */ + if (unlikely(skb_error)) { struct can_frame cf_overflow; u32 timestamp; ret = offload->mailbox_read(offload, &cf_overflow, ×tamp, n); - if (ret) - offload->dev->stats.rx_dropped++; - return NULL; + /* Mailbox was empty. */ + if (unlikely(!ret)) + return NULL; + + /* Mailbox has been read and we're dropping it or + * there was a problem reading the mailbox. + * + * Increment error counters in any case. + */ + offload->dev->stats.rx_dropped++; + offload->dev->stats.rx_fifo_errors++; + + /* There was a problem reading the mailbox, propagate + * error value. + */ + if (unlikely(ret < 0)) + return ERR_PTR(ret); + + return skb_error; } cb = can_rx_offload_get_cb(skb); ret = offload->mailbox_read(offload, cf, &cb->timestamp, n); - if (!ret) { + + /* Mailbox was empty. */ + if (unlikely(!ret)) { kfree_skb(skb); return NULL; } + /* There was a problem reading the mailbox, propagate error value. */ + if (unlikely(ret < 0)) { + kfree_skb(skb); + + offload->dev->stats.rx_dropped++; + offload->dev->stats.rx_fifo_errors++; + + return ERR_PTR(ret); + } + + /* Mailbox was read. */ return skb; } @@ -166,8 +224,8 @@ int can_rx_offload_irq_offload_timestamp(struct can_rx_offload *offload, u64 pen continue; skb = can_rx_offload_offload_one(offload, i); - if (!skb) - break; + if (IS_ERR_OR_NULL(skb)) + continue; __skb_queue_add_sort(&skb_queue, skb, can_rx_offload_compare); } @@ -197,7 +255,13 @@ int can_rx_offload_irq_offload_fifo(struct can_rx_offload *offload) struct sk_buff *skb; int received = 0; - while ((skb = can_rx_offload_offload_one(offload, 0))) { + while (1) { + skb = can_rx_offload_offload_one(offload, 0); + if (IS_ERR(skb)) + continue; + if (!skb) + break; + skb_queue_tail(&offload->skb_queue, skb); received++; } @@ -216,8 +280,10 @@ int can_rx_offload_queue_sorted(struct can_rx_offload *offload, unsigned long flags; if (skb_queue_len(&offload->skb_queue) > - offload->skb_queue_len_max) - return -ENOMEM; + offload->skb_queue_len_max) { + kfree_skb(skb); + return -ENOBUFS; + } cb = can_rx_offload_get_cb(skb); cb->timestamp = timestamp; @@ -259,8 +325,10 @@ int can_rx_offload_queue_tail(struct can_rx_offload *offload, struct sk_buff *skb) { if (skb_queue_len(&offload->skb_queue) > - offload->skb_queue_len_max) - return -ENOMEM; + offload->skb_queue_len_max) { + kfree_skb(skb); + return -ENOBUFS; + } skb_queue_tail(&offload->skb_queue, skb); can_rx_offload_schedule(offload); diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 5d067c1b987f..35564a9561b7 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -343,9 +343,16 @@ static void slcan_transmit(struct work_struct *work) */ static void slcan_write_wakeup(struct tty_struct *tty) { - struct slcan *sl = tty->disc_data; + struct slcan *sl; + + rcu_read_lock(); + sl = rcu_dereference(tty->disc_data); + if (!sl) + goto out; schedule_work(&sl->tx_work); +out: + rcu_read_unlock(); } /* Send a can_frame to a TTY queue. */ @@ -613,6 +620,8 @@ err_free_chan: sl->tty = NULL; tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); + slc_free_netdev(sl->dev); + free_netdev(sl->dev); err_exit: rtnl_unlock(); @@ -638,10 +647,11 @@ static void slcan_close(struct tty_struct *tty) return; spin_lock_bh(&sl->lock); - tty->disc_data = NULL; + rcu_assign_pointer(tty->disc_data, NULL); sl->tty = NULL; spin_unlock_bh(&sl->lock); + synchronize_rcu(); flush_work(&sl->tx_work); /* Flush network side */ diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 8bf80ad9dc44..aed8ab6d6c5b 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -631,6 +631,7 @@ static int gs_can_open(struct net_device *netdev) rc); usb_unanchor_urb(urb); + usb_free_urb(urb); break; } @@ -925,7 +926,7 @@ static int gs_usb_probe(struct usb_interface *intf, GS_USB_BREQ_HOST_FORMAT, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 1, - intf->altsetting[0].desc.bInterfaceNumber, + intf->cur_altsetting->desc.bInterfaceNumber, hconf, sizeof(*hconf), 1000); @@ -948,7 +949,7 @@ static int gs_usb_probe(struct usb_interface *intf, GS_USB_BREQ_DEVICE_CONFIG, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 1, - intf->altsetting[0].desc.bInterfaceNumber, + intf->cur_altsetting->desc.bInterfaceNumber, dconf, sizeof(*dconf), 1000); diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index e0c24abce16c..070e1ba79736 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -887,9 +887,8 @@ static void mcba_usb_disconnect(struct usb_interface *intf) netdev_info(priv->netdev, "device disconnected\n"); unregister_candev(priv->netdev); - free_candev(priv->netdev); - mcba_urb_unlink(priv); + free_candev(priv->netdev); } static struct usb_driver mcba_usb_driver = { diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index 838545ce468d..0e1fc6c4360e 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -108,7 +108,7 @@ struct pcan_usb_msg_context { u8 *end; u8 rec_cnt; u8 rec_idx; - u8 rec_data_idx; + u8 rec_ts_idx; struct net_device *netdev; struct pcan_usb *pdev; }; @@ -441,8 +441,8 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, } if ((n & PCAN_USB_ERROR_BUS_LIGHT) == 0) { /* no error (back to active state) */ - mc->pdev->dev.can.state = CAN_STATE_ERROR_ACTIVE; - return 0; + new_state = CAN_STATE_ERROR_ACTIVE; + break; } break; @@ -465,9 +465,9 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, } if ((n & PCAN_USB_ERROR_BUS_HEAVY) == 0) { - /* no error (back to active state) */ - mc->pdev->dev.can.state = CAN_STATE_ERROR_ACTIVE; - return 0; + /* no error (back to warning state) */ + new_state = CAN_STATE_ERROR_WARNING; + break; } break; @@ -506,6 +506,11 @@ static int pcan_usb_decode_error(struct pcan_usb_msg_context *mc, u8 n, mc->pdev->dev.can.can_stats.error_warning++; break; + case CAN_STATE_ERROR_ACTIVE: + cf->can_id |= CAN_ERR_CRTL; + cf->data[1] = CAN_ERR_CRTL_ACTIVE; + break; + default: /* CAN_STATE_MAX (trick to handle other errors) */ cf->can_id |= CAN_ERR_CRTL; @@ -552,10 +557,15 @@ static int pcan_usb_decode_status(struct pcan_usb_msg_context *mc, mc->ptr += PCAN_USB_CMD_ARGS; if (status_len & PCAN_USB_STATUSLEN_TIMESTAMP) { - int err = pcan_usb_decode_ts(mc, !mc->rec_idx); + int err = pcan_usb_decode_ts(mc, !mc->rec_ts_idx); if (err) return err; + + /* Next packet in the buffer will have a timestamp on a single + * byte + */ + mc->rec_ts_idx++; } switch (f) { @@ -638,10 +648,13 @@ static int pcan_usb_decode_data(struct pcan_usb_msg_context *mc, u8 status_len) cf->can_dlc = get_can_dlc(rec_len); - /* first data packet timestamp is a word */ - if (pcan_usb_decode_ts(mc, !mc->rec_data_idx)) + /* Only first packet timestamp is a word */ + if (pcan_usb_decode_ts(mc, !mc->rec_ts_idx)) goto decode_failed; + /* Next packet in the buffer will have a timestamp on a single byte */ + mc->rec_ts_idx++; + /* read data */ memset(cf->data, 0x0, sizeof(cf->data)); if (status_len & PCAN_USB_STATUSLEN_RTR) { @@ -695,7 +708,6 @@ static int pcan_usb_decode_msg(struct peak_usb_device *dev, u8 *ibuf, u32 lbuf) /* handle normal can frames here */ } else { err = pcan_usb_decode_data(&mc, sl); - mc.rec_data_idx++; } } diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index 059282a6065c..85d92f129af2 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -776,7 +776,7 @@ static int peak_usb_create_dev(const struct peak_usb_adapter *peak_usb_adapter, dev = netdev_priv(netdev); /* allocate a buffer large enough to send commands */ - dev->cmd_buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL); + dev->cmd_buf = kzalloc(PCAN_USB_MAX_CMD_LEN, GFP_KERNEL); if (!dev->cmd_buf) { err = -ENOMEM; goto lbl_free_candev; diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index 27861c417c94..3e4416473607 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -1007,9 +1007,8 @@ static void usb_8dev_disconnect(struct usb_interface *intf) netdev_info(priv->netdev, "device disconnected\n"); unregister_netdev(priv->netdev); - free_candev(priv->netdev); - unlink_all_urbs(priv); + free_candev(priv->netdev); } } diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 83a9bc892a3b..6ae13f2419e3 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -55,6 +55,7 @@ config NET_DSA_QCA8K config NET_DSA_SMSC_LAN9303 tristate select NET_DSA_TAG_LAN9303 + select REGMAP ---help--- This enables support for the SMSC/Microchip LAN9303 3 port ethernet switch chips. diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index acf64d4cd94c..434e6dced6b7 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -1431,7 +1431,6 @@ int b53_mirror_add(struct dsa_switch *ds, int port, loc = B53_EG_MIR_CTL; b53_read16(dev, B53_MGMT_PAGE, loc, ®); - reg &= ~MIRROR_MASK; reg |= BIT(port); b53_write16(dev, B53_MGMT_PAGE, loc, reg); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 0132921f408a..6bca42e34a53 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -106,22 +106,11 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) unsigned int i; u32 reg, offset; - if (priv->type == BCM7445_DEVICE_ID) - offset = CORE_STS_OVERRIDE_IMP; - else - offset = CORE_STS_OVERRIDE_IMP2; - /* Enable the port memories */ reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL); reg &= ~P_TXQ_PSM_VDD(port); core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL); - /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ - reg = core_readl(priv, CORE_IMP_CTL); - reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN); - reg &= ~(RX_DIS | TX_DIS); - core_writel(priv, reg, CORE_IMP_CTL); - /* Enable forwarding */ core_writel(priv, SW_FWDG_EN, CORE_SWMODE); @@ -140,10 +129,28 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) bcm_sf2_brcm_hdr_setup(priv, port); - /* Force link status for IMP port */ - reg = core_readl(priv, offset); - reg |= (MII_SW_OR | LINK_STS); - core_writel(priv, reg, offset); + if (port == 8) { + if (priv->type == BCM7445_DEVICE_ID) + offset = CORE_STS_OVERRIDE_IMP; + else + offset = CORE_STS_OVERRIDE_IMP2; + + /* Force link status for IMP port */ + reg = core_readl(priv, offset); + reg |= (MII_SW_OR | LINK_STS); + reg &= ~GMII_SPEED_UP_2G; + core_writel(priv, reg, offset); + + /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ + reg = core_readl(priv, CORE_IMP_CTL); + reg |= (RX_BCST_EN | RX_MCST_EN | RX_UCST_EN); + reg &= ~(RX_DIS | TX_DIS); + core_writel(priv, reg, CORE_IMP_CTL); + } else { + reg = core_readl(priv, CORE_G_PCTL_PORT(port)); + reg &= ~(RX_DIS | TX_DIS); + core_writel(priv, reg, CORE_G_PCTL_PORT(port)); + } } static void bcm_sf2_eee_enable_set(struct dsa_switch *ds, int port, bool enable) @@ -426,11 +433,10 @@ static int bcm_sf2_sw_mdio_write(struct mii_bus *bus, int addr, int regnum, * send them to our master MDIO bus controller */ if (addr == BRCM_PSEUDO_PHY_ADDR && priv->indir_phy_mask & BIT(addr)) - bcm_sf2_sw_indir_rw(priv, 0, addr, regnum, val); + return bcm_sf2_sw_indir_rw(priv, 0, addr, regnum, val); else - mdiobus_write_nested(priv->master_mii_bus, addr, regnum, val); - - return 0; + return mdiobus_write_nested(priv->master_mii_bus, addr, + regnum, val); } static irqreturn_t bcm_sf2_switch_0_isr(int irq, void *dev_id) @@ -1190,12 +1196,16 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev) return ret; } + bcm_sf2_gphy_enable_set(priv->dev->ds, true); + ret = bcm_sf2_mdio_register(ds); if (ret) { pr_err("failed to register MDIO bus\n"); return ret; } + bcm_sf2_gphy_enable_set(priv->dev->ds, false); + ret = bcm_sf2_cfp_rst(priv); if (ret) { pr_err("failed to reset CFP\n"); diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 0fff1502267a..10ea01459a36 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2196,11 +2196,22 @@ static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg) mutex_unlock(&chip->reg_lock); if (reg == MII_PHYSID2) { - /* Some internal PHYS don't have a model number. Use - * the mv88e6390 family model number instead. - */ - if (!(val & 0x3f0)) - val |= MV88E6XXX_PORT_SWITCH_ID_PROD_6390 >> 4; + /* Some internal PHYs don't have a model number. */ + if (chip->info->family != MV88E6XXX_FAMILY_6165) + /* Then there is the 6165 family. It gets is + * PHYs correct. But it can also have two + * SERDES interfaces in the PHY address + * space. And these don't have a model + * number. But they are not PHYs, so we don't + * want to give them something a PHY driver + * will recognise. + * + * Use the mv88e6390 family model number + * instead, for anything which really could be + * a PHY, + */ + if (!(val & 0x3f0)) + val |= MV88E6XXX_PORT_SWITCH_ID_PROD_6390 >> 4; } return err ? err : val; @@ -2527,7 +2538,7 @@ static const struct mv88e6xxx_ops mv88e6141_ops = { .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6390_port_set_speed, + .port_set_speed = mv88e6341_port_set_speed, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, @@ -3029,7 +3040,7 @@ static const struct mv88e6xxx_ops mv88e6341_ops = { .port_set_link = mv88e6xxx_port_set_link, .port_set_duplex = mv88e6xxx_port_set_duplex, .port_set_rgmii_delay = mv88e6390_port_set_rgmii_delay, - .port_set_speed = mv88e6390_port_set_speed, + .port_set_speed = mv88e6341_port_set_speed, .port_tag_remap = mv88e6095_port_tag_remap, .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c index d76d7c7ea819..544b6a9cc01a 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.c +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -313,6 +313,11 @@ int mv88e6390_g1_set_cpu_port(struct mv88e6xxx_chip *chip, int port) { u16 ptr = MV88E6390_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST; + /* Use the default high priority for management frames sent to + * the CPU. + */ + port |= MV88E6390_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST_MGMTPRI; + return mv88e6390_g1_monitor_write(chip, ptr, port); } diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h index 950b914f9251..d82e8956cbd5 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.h +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -189,6 +189,7 @@ #define MV88E6390_G1_MONITOR_MGMT_CTL_PTR_INGRESS_DEST 0x2000 #define MV88E6390_G1_MONITOR_MGMT_CTL_PTR_EGRESS_DEST 0x2100 #define MV88E6390_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST 0x3000 +#define MV88E6390_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST_MGMTPRI 0x00e0 #define MV88E6390_G1_MONITOR_MGMT_CTL_DATA_MASK 0x00ff /* Offset 0x1C: Global Control 2 */ diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 2cffecfe86e3..fd0a88c56031 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -203,8 +203,11 @@ static int mv88e6xxx_port_set_speed(struct mv88e6xxx_chip *chip, int port, ctrl = MV88E6XXX_PORT_MAC_CTL_SPEED_1000; break; case 2500: - ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000 | - MV88E6390_PORT_MAC_CTL_ALTSPEED; + if (alt_bit) + ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000 | + MV88E6390_PORT_MAC_CTL_ALTSPEED; + else + ctrl = MV88E6390_PORT_MAC_CTL_SPEED_10000; break; case 10000: /* all bits set, fall through... */ @@ -266,6 +269,24 @@ int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) return mv88e6xxx_port_set_speed(chip, port, speed, false, false); } +/* Support 10, 100, 200, 1000, 2500 Mbps (e.g. 88E6341) */ +int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) +{ + if (speed == SPEED_MAX) + speed = port < 5 ? 1000 : 2500; + + if (speed > 2500) + return -EOPNOTSUPP; + + if (speed == 200 && port != 0) + return -EOPNOTSUPP; + + if (speed == 2500 && port < 5) + return -EOPNOTSUPP; + + return mv88e6xxx_port_set_speed(chip, port, speed, !port, true); +} + /* Support 10, 100, 200, 1000 Mbps (e.g. 88E6352 family) */ int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed) { diff --git a/drivers/net/dsa/mv88e6xxx/port.h b/drivers/net/dsa/mv88e6xxx/port.h index ccdc67fe9079..8a645683cf6b 100644 --- a/drivers/net/dsa/mv88e6xxx/port.h +++ b/drivers/net/dsa/mv88e6xxx/port.h @@ -262,6 +262,7 @@ int mv88e6xxx_port_set_duplex(struct mv88e6xxx_chip *chip, int port, int dup); int mv88e6065_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); int mv88e6185_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); +int mv88e6341_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); int mv88e6352_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); int mv88e6390_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); int mv88e6390x_port_set_speed(struct mv88e6xxx_chip *chip, int port, int speed); diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index c3c9d7e33bd6..8ee59b20b47a 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -459,6 +459,18 @@ qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode) qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL, QCA8K_PORT_PAD_RGMII_RX_DELAY_EN); break; + case PHY_INTERFACE_MODE_RGMII_ID: + /* RGMII_ID needs internal delay. This is enabled through + * PORT5_PAD_CTRL for all ports, rather than individual port + * registers + */ + qca8k_write(priv, reg, + QCA8K_PORT_PAD_RGMII_EN | + QCA8K_PORT_PAD_RGMII_TX_DELAY(QCA8K_MAX_DELAY) | + QCA8K_PORT_PAD_RGMII_RX_DELAY(QCA8K_MAX_DELAY)); + qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL, + QCA8K_PORT_PAD_RGMII_RX_DELAY_EN); + break; case PHY_INTERFACE_MODE_SGMII: qca8k_write(priv, reg, QCA8K_PORT_PAD_SGMII_EN); break; @@ -551,7 +563,7 @@ qca8k_setup(struct dsa_switch *ds) BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S); /* Setup connection between CPU port & user ports */ - for (i = 0; i < DSA_MAX_PORTS; i++) { + for (i = 0; i < QCA8K_NUM_PORTS; i++) { /* CPU port gets connected to all user ports of the switch */ if (dsa_is_cpu_port(ds, i)) { qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT), @@ -900,7 +912,7 @@ qca8k_sw_probe(struct mdio_device *mdiodev) if (id != QCA8K_ID_QCA8337) return -ENODEV; - priv->ds = dsa_switch_alloc(&mdiodev->dev, DSA_MAX_PORTS); + priv->ds = dsa_switch_alloc(&mdiodev->dev, QCA8K_NUM_PORTS); if (!priv->ds) return -ENOMEM; diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index 613fe5c50236..d146e54c8a6c 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -40,6 +40,7 @@ ((0x8 + (x & 0x3)) << 22) #define QCA8K_PORT_PAD_RGMII_RX_DELAY(x) \ ((0x10 + (x & 0x3)) << 20) +#define QCA8K_MAX_DELAY 3 #define QCA8K_PORT_PAD_RGMII_RX_DELAY_EN BIT(24) #define QCA8K_PORT_PAD_SGMII_EN BIT(7) #define QCA8K_REG_MODULE_EN 0x030 diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig index 99b30353541a..9e87d7b8360f 100644 --- a/drivers/net/ethernet/amazon/Kconfig +++ b/drivers/net/ethernet/amazon/Kconfig @@ -17,7 +17,7 @@ if NET_VENDOR_AMAZON config ENA_ETHERNET tristate "Elastic Network Adapter (ENA) support" - depends on (PCI_MSI && X86) + depends on PCI_MSI && !CPU_BIG_ENDIAN ---help--- This driver supports Elastic Network Adapter (ENA)" diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c index 1a4ffc5d3da4..dc9149a32f41 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.c +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -199,6 +199,11 @@ static inline void comp_ctxt_release(struct ena_com_admin_queue *queue, static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue, u16 command_id, bool capture) { + if (unlikely(!queue->comp_ctx)) { + pr_err("Completion context is NULL\n"); + return NULL; + } + if (unlikely(command_id >= queue->q_depth)) { pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n", command_id, queue->q_depth); @@ -843,6 +848,24 @@ static int ena_com_get_feature(struct ena_com_dev *ena_dev, 0); } +static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev) +{ + struct ena_admin_feature_rss_flow_hash_control *hash_key = + (ena_dev->rss).hash_key; + + netdev_rss_key_fill(&hash_key->key, sizeof(hash_key->key)); + /* The key is stored in the device in u32 array + * as well as the API requires the key to be passed in this + * format. Thus the size of our array should be divided by 4 + */ + hash_key->keys_num = sizeof(hash_key->key) / sizeof(u32); +} + +int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev) +{ + return ena_dev->rss.hash_func; +} + static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) { struct ena_rss *rss = &ena_dev->rss; @@ -2002,7 +2025,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev) if (unlikely(ret)) return ret; - if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) { + if (!(get_resp.u.flow_hash_func.supported_func & BIT(rss->hash_func))) { pr_err("Func hash %d isn't supported by device, abort\n", rss->hash_func); return -EOPNOTSUPP; @@ -2069,15 +2092,16 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, switch (func) { case ENA_ADMIN_TOEPLITZ: - if (key_len > sizeof(hash_key->key)) { - pr_err("key len (%hu) is bigger than the max supported (%zu)\n", - key_len, sizeof(hash_key->key)); - return -EINVAL; + if (key) { + if (key_len != sizeof(hash_key->key)) { + pr_err("key len (%hu) doesn't equal the supported size (%zu)\n", + key_len, sizeof(hash_key->key)); + return -EINVAL; + } + memcpy(hash_key->key, key, key_len); + rss->hash_init_val = init_val; + hash_key->keys_num = key_len >> 2; } - - memcpy(hash_key->key, key, key_len); - rss->hash_init_val = init_val; - hash_key->keys_num = key_len >> 2; break; case ENA_ADMIN_CRC32: rss->hash_init_val = init_val; @@ -2087,6 +2111,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, return -EINVAL; } + rss->hash_func = func; rc = ena_com_set_hash_function(ena_dev); /* Restore the old function */ @@ -2113,7 +2138,11 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev, if (unlikely(rc)) return rc; - rss->hash_func = get_resp.u.flow_hash_func.selected_func; + /* ffs() returns 1 in case the lsb is set */ + rss->hash_func = ffs(get_resp.u.flow_hash_func.selected_func); + if (rss->hash_func) + rss->hash_func--; + if (func) *func = rss->hash_func; @@ -2401,6 +2430,8 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size) if (unlikely(rc)) goto err_hash_key; + ena_com_hash_key_fill_default_key(ena_dev); + rc = ena_com_hash_ctrl_init(ena_dev); if (unlikely(rc)) goto err_hash_ctrl; diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h index 7b784f8a06a6..7272fb0d858d 100644 --- a/drivers/net/ethernet/amazon/ena/ena_com.h +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -42,6 +42,7 @@ #include #include #include +#include #include "ena_common_defs.h" #include "ena_admin_defs.h" @@ -631,6 +632,14 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size); */ void ena_com_rss_destroy(struct ena_com_dev *ena_dev); +/* ena_com_get_current_hash_function - Get RSS hash function + * @ena_dev: ENA communication layer struct + * + * Return the current hash function. + * @return: 0 or one of the ena_admin_hash_functions values. + */ +int ena_com_get_current_hash_function(struct ena_com_dev *ena_dev); + /* ena_com_fill_hash_function - Fill RSS hash function * @ena_dev: ENA communication layer struct * @func: The hash function (Toeplitz or crc) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 967020fb26ee..d29e256bf610 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -648,6 +648,28 @@ static u32 ena_get_rxfh_key_size(struct net_device *netdev) return ENA_HASH_KEY_SIZE; } +static int ena_indirection_table_get(struct ena_adapter *adapter, u32 *indir) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int i, rc; + + if (!indir) + return 0; + + rc = ena_com_indirect_table_get(ena_dev, indir); + if (rc) + return rc; + + /* Our internal representation of the indices is: even indices + * for Tx and uneven indices for Rx. We need to convert the Rx + * indices to be consecutive + */ + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) + indir[i] = ENA_IO_RXQ_IDX_TO_COMBINED_IDX(indir[i]); + + return rc; +} + static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { @@ -656,11 +678,25 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 func; int rc; - rc = ena_com_indirect_table_get(adapter->ena_dev, indir); + rc = ena_indirection_table_get(adapter, indir); if (rc) return rc; + /* We call this function in order to check if the device + * supports getting/setting the hash function. + */ rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key); + + if (rc) { + if (rc == -EOPNOTSUPP) { + key = NULL; + hfunc = NULL; + rc = 0; + } + + return rc; + } + if (rc) return rc; @@ -669,7 +705,7 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, func = ETH_RSS_HASH_TOP; break; case ENA_ADMIN_CRC32: - func = ETH_RSS_HASH_XOR; + func = ETH_RSS_HASH_CRC32; break; default: netif_err(adapter, drv, netdev, @@ -694,8 +730,8 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, if (indir) { for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { rc = ena_com_indirect_table_fill_entry(ena_dev, - ENA_IO_RXQ_IDX(indir[i]), - i); + i, + ENA_IO_RXQ_IDX(indir[i])); if (unlikely(rc)) { netif_err(adapter, drv, netdev, "Cannot fill indirect table (index is too large)\n"); @@ -712,10 +748,13 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, } switch (hfunc) { + case ETH_RSS_HASH_NO_CHANGE: + func = ena_com_get_current_hash_function(ena_dev); + break; case ETH_RSS_HASH_TOP: func = ENA_ADMIN_TOEPLITZ; break; - case ETH_RSS_HASH_XOR: + case ETH_RSS_HASH_CRC32: func = ENA_ADMIN_CRC32; break; default: @@ -816,6 +855,7 @@ static const struct ethtool_ops ena_ethtool_ops = { .get_channels = ena_get_channels, .get_tunable = ena_get_tunable, .set_tunable = ena_set_tunable, + .get_ts_info = ethtool_op_get_ts_info, }; void ena_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index db6f6a877f63..d9ece9ac6f53 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1196,8 +1196,8 @@ static int ena_io_poll(struct napi_struct *napi, int budget) struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); struct ena_ring *tx_ring, *rx_ring; - u32 tx_work_done; - u32 rx_work_done; + int tx_work_done; + int rx_work_done = 0; int tx_budget; int napi_comp_call = 0; int ret; @@ -1214,7 +1214,11 @@ static int ena_io_poll(struct napi_struct *napi, int budget) } tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget); - rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); + /* On netpoll the budget is zero and the handler should only clean the + * tx completions. + */ + if (likely(budget)) + rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); /* If the device is about to reset or down, avoid unmask * the interrupt and return 0 so NAPI won't reschedule @@ -1792,6 +1796,7 @@ err_setup_rx: err_setup_tx: ena_free_io_irq(adapter); err_req_irq: + ena_del_napi(adapter); return rc; } @@ -2798,8 +2803,8 @@ static void check_for_missing_keep_alive(struct ena_adapter *adapter) if (adapter->keep_alive_timeout == ENA_HW_HINTS_NO_TIMEOUT) return; - keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies + - adapter->keep_alive_timeout); + keep_alive_expired = adapter->last_keep_alive_jiffies + + adapter->keep_alive_timeout; if (unlikely(time_is_before_jiffies(keep_alive_expired))) { netif_err(adapter, drv, adapter->netdev, "Keep alive watchdog timeout.\n"); @@ -2901,7 +2906,7 @@ static void ena_timer_service(unsigned long data) } /* Reset the timer */ - mod_timer(&adapter->timer_service, jiffies + HZ); + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); } static int ena_calc_io_queue_num(struct pci_dev *pdev, diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 3404376c28ca..5a72267b858b 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -113,6 +113,8 @@ #define ENA_IO_TXQ_IDX(q) (2 * (q)) #define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) +#define ENA_IO_TXQ_IDX_TO_COMBINED_IDX(q) ((q) / 2) +#define ENA_IO_RXQ_IDX_TO_COMBINED_IDX(q) (((q) - 1) / 2) #define ENA_MGMNT_IRQ_IDX 0 #define ENA_IO_IRQ_FIRST_IDX 1 diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c index b11e910850f7..78d1e5385a3e 100644 --- a/drivers/net/ethernet/amd/am79c961a.c +++ b/drivers/net/ethernet/amd/am79c961a.c @@ -440,7 +440,7 @@ static void am79c961_timeout(struct net_device *dev) /* * Transmit a packet */ -static int +static netdev_tx_t am79c961_sendpacket(struct sk_buff *skb, struct net_device *dev) { struct dev_priv *priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c index c5b81268c284..d3d44e07afbc 100644 --- a/drivers/net/ethernet/amd/atarilance.c +++ b/drivers/net/ethernet/amd/atarilance.c @@ -339,7 +339,8 @@ static unsigned long lance_probe1( struct net_device *dev, struct lance_addr *init_rec ); static int lance_open( struct net_device *dev ); static void lance_init_ring( struct net_device *dev ); -static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev ); +static netdev_tx_t lance_start_xmit(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t lance_interrupt( int irq, void *dev_id ); static int lance_rx( struct net_device *dev ); static int lance_close( struct net_device *dev ); @@ -769,7 +770,8 @@ static void lance_tx_timeout (struct net_device *dev) /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev ) +static netdev_tx_t +lance_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); struct lance_ioreg *IO = lp->iobase; diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index c7cde58feaf7..290d070b293b 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -893,7 +893,7 @@ static void lance_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t lance_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); volatile struct lance_regs *ll = lp->ll; diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c index 77b1db267730..da7e3d4f4166 100644 --- a/drivers/net/ethernet/amd/sun3lance.c +++ b/drivers/net/ethernet/amd/sun3lance.c @@ -236,7 +236,8 @@ struct lance_private { static int lance_probe( struct net_device *dev); static int lance_open( struct net_device *dev ); static void lance_init_ring( struct net_device *dev ); -static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev ); +static netdev_tx_t lance_start_xmit(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t lance_interrupt( int irq, void *dev_id); static int lance_rx( struct net_device *dev ); static int lance_close( struct net_device *dev ); @@ -511,7 +512,8 @@ static void lance_init_ring( struct net_device *dev ) } -static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev ) +static netdev_tx_t +lance_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); int entry, len; diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c index 9845e07d40cd..1a44c8c26b8a 100644 --- a/drivers/net/ethernet/amd/sunlance.c +++ b/drivers/net/ethernet/amd/sunlance.c @@ -1106,7 +1106,7 @@ static void lance_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t lance_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct lance_private *lp = netdev_priv(dev); int entry, skblen, len; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 75c4455e2271..c65d2cdcc7cf 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1964,7 +1964,7 @@ static int xgbe_close(struct net_device *netdev) return 0; } -static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t xgbe_xmit(struct sk_buff *skb, struct net_device *netdev) { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; @@ -1973,7 +1973,7 @@ static int xgbe_xmit(struct sk_buff *skb, struct net_device *netdev) struct xgbe_ring *ring; struct xgbe_packet_data *packet; struct netdev_queue *txq; - int ret; + netdev_tx_t ret; DBGPR("-->xgbe_xmit: skb->len = %d\n", skb->len); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 50dd6bf176d0..3a489b2b99c9 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -2034,7 +2034,7 @@ static int xgene_enet_probe(struct platform_device *pdev) int ret; ndev = alloc_etherdev_mqs(sizeof(struct xgene_enet_pdata), - XGENE_NUM_RX_RING, XGENE_NUM_TX_RING); + XGENE_NUM_TX_RING, XGENE_NUM_RX_RING); if (!ndev) return -ENOMEM; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h index 57e796870595..ea4b7e97c61e 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h @@ -40,8 +40,8 @@ #define AQ_CFG_IS_LRO_DEF 1U /* RSS */ -#define AQ_CFG_RSS_INDIRECTION_TABLE_MAX 128U -#define AQ_CFG_RSS_HASHKEY_SIZE 320U +#define AQ_CFG_RSS_INDIRECTION_TABLE_MAX 64U +#define AQ_CFG_RSS_HASHKEY_SIZE 40U #define AQ_CFG_IS_RSS_DEF 1U #define AQ_CFG_NUM_RSS_QUEUES_DEF AQ_CFG_VECS_DEF diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index cc658a29cc33..7a900f76c9ac 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -43,7 +43,7 @@ static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues) struct aq_rss_parameters *rss_params = &cfg->aq_rss; int i = 0; - static u8 rss_key[40] = { + static u8 rss_key[AQ_CFG_RSS_HASHKEY_SIZE] = { 0x1e, 0xad, 0x71, 0x87, 0x65, 0xfc, 0x26, 0x7d, 0x0d, 0x45, 0x67, 0x74, 0xcd, 0x06, 0x1a, 0x18, 0xb6, 0xc1, 0xf0, 0xc7, 0xbb, 0x18, 0xbe, 0xf8, @@ -519,8 +519,10 @@ static unsigned int aq_nic_map_skb(struct aq_nic_s *self, dx_buff->len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) + if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) { + ret = 0; goto exit; + } first = dx_buff; dx_buff->len_pkt = skb->len; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index 5fecc9a099ef..bb2894a333f2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -310,15 +310,13 @@ irqreturn_t aq_vec_isr_legacy(int irq, void *private) { struct aq_vec_s *self = private; u64 irq_mask = 0U; - irqreturn_t err = 0; + int err; - if (!self) { - err = -EINVAL; - goto err_exit; - } + if (!self) + return IRQ_NONE; err = self->aq_hw_ops->hw_irq_read(self->aq_hw, &irq_mask); if (err < 0) - goto err_exit; + return IRQ_NONE; if (irq_mask) { self->aq_hw_ops->hw_irq_disable(self->aq_hw, @@ -326,11 +324,10 @@ irqreturn_t aq_vec_isr_legacy(int irq, void *private) napi_schedule(&self->napi); } else { self->aq_hw_ops->hw_irq_enable(self->aq_hw, 1U); - err = IRQ_NONE; + return IRQ_NONE; } -err_exit: - return err >= 0 ? IRQ_HANDLED : IRQ_NONE; + return IRQ_HANDLED; } cpumask_t *aq_vec_get_affinity_mask(struct aq_vec_s *self) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c index b0abd187cead..b83ee74d2839 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c @@ -182,8 +182,8 @@ static int hw_atl_a0_hw_rss_set(struct aq_hw_s *self, u32 i = 0U; u32 num_rss_queues = max(1U, self->aq_nic_cfg->num_rss_queues); int err = 0; - u16 bitary[(HW_ATL_A0_RSS_REDIRECTION_MAX * - HW_ATL_A0_RSS_REDIRECTION_BITS / 16U)]; + u16 bitary[1 + (HW_ATL_A0_RSS_REDIRECTION_MAX * + HW_ATL_A0_RSS_REDIRECTION_BITS / 16U)]; memset(bitary, 0, sizeof(bitary)); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c index 236325f48ec9..1c1bb074f664 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c @@ -183,8 +183,8 @@ static int hw_atl_b0_hw_rss_set(struct aq_hw_s *self, u32 i = 0U; u32 num_rss_queues = max(1U, self->aq_nic_cfg->num_rss_queues); int err = 0; - u16 bitary[(HW_ATL_B0_RSS_REDIRECTION_MAX * - HW_ATL_B0_RSS_REDIRECTION_BITS / 16U)]; + u16 bitary[1 + (HW_ATL_B0_RSS_REDIRECTION_MAX * + HW_ATL_B0_RSS_REDIRECTION_BITS / 16U)]; memset(bitary, 0, sizeof(bitary)); diff --git a/drivers/net/ethernet/arc/emac_rockchip.c b/drivers/net/ethernet/arc/emac_rockchip.c index c770ca37c9b2..a7d30731d376 100644 --- a/drivers/net/ethernet/arc/emac_rockchip.c +++ b/drivers/net/ethernet/arc/emac_rockchip.c @@ -261,6 +261,9 @@ static int emac_rockchip_remove(struct platform_device *pdev) if (priv->regulator) regulator_disable(priv->regulator); + if (priv->soc_data->need_div_macclk) + clk_disable_unprepare(priv->macclk); + free_netdev(ndev); return err; } diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 4f7e195af0bc..0d08039981b5 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -472,7 +472,9 @@ static void atl1e_mdio_write(struct net_device *netdev, int phy_id, { struct atl1e_adapter *adapter = netdev_priv(netdev); - atl1e_write_phy_reg(&adapter->hw, reg_num & MDIO_REG_ADDR_MASK, val); + if (atl1e_write_phy_reg(&adapter->hw, + reg_num & MDIO_REG_ADDR_MASK, val)) + netdev_err(netdev, "write phy register failed\n"); } static int atl1e_mii_ioctl(struct net_device *netdev, diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index a1125d10c825..8b9a0ce1d29f 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1521,8 +1521,10 @@ static int b44_magic_pattern(u8 *macaddr, u8 *ppattern, u8 *pmask, int offset) int ethaddr_bytes = ETH_ALEN; memset(ppattern + offset, 0xff, magicsync); - for (j = 0; j < magicsync; j++) - set_bit(len++, (unsigned long *) pmask); + for (j = 0; j < magicsync; j++) { + pmask[len >> 3] |= BIT(len & 7); + len++; + } for (j = 0; j < B44_MAX_PATTERNS; j++) { if ((B44_PATTERN_SIZE - len) >= ETH_ALEN) @@ -1534,7 +1536,8 @@ static int b44_magic_pattern(u8 *macaddr, u8 *ppattern, u8 *pmask, int offset) for (k = 0; k< ethaddr_bytes; k++) { ppattern[offset + magicsync + (j * ETH_ALEN) + k] = macaddr[k]; - set_bit(len++, (unsigned long *) pmask); + pmask[len >> 3] |= BIT(len & 7); + len++; } } return len - 1; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 68470c7c630a..35eb0119b015 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -571,12 +571,13 @@ static irqreturn_t bcm_enet_isr_dma(int irq, void *dev_id) /* * tx request callback */ -static int bcm_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +bcm_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bcm_enet_priv *priv; struct bcm_enet_desc *desc; u32 len_stat; - int ret; + netdev_tx_t ret; priv = netdev_priv(dev); diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 79018fea7be2..f48f7d104af2 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2116,7 +2116,7 @@ static int bcm_sysport_probe(struct platform_device *pdev) priv->phy_interface = of_get_phy_mode(dn); /* Default to GMII interface mode */ - if (priv->phy_interface < 0) + if ((int)priv->phy_interface < 0) priv->phy_interface = PHY_INTERFACE_MODE_GMII; /* In the case of a fixed PHY, the DT node associated @@ -2329,6 +2329,9 @@ static int bcm_sysport_resume(struct device *d) umac_reset(priv); + /* Disable the UniMAC RX/TX */ + umac_enable_set(priv, CMD_RX_EN | CMD_TX_EN, 0); + /* We may have been suspended and never received a WOL event that * would turn off MPD detection, take care of that now */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 4e091a11daaf..52bce009d096 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -1112,7 +1112,7 @@ static inline u8 bnx2x_get_path_func_num(struct bnx2x *bp) for (i = 0; i < E1H_FUNC_MAX / 2; i++) { u32 func_config = MF_CFG_RD(bp, - func_mf_config[BP_PORT(bp) + 2 * i]. + func_mf_config[BP_PATH(bp) + 2 * i]. config); func_num += ((func_config & FUNC_MF_CFG_FUNC_HIDE) ? 0 : 1); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 8f0c9f6de893..b0ada7eac652 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -3540,6 +3540,16 @@ static void bnx2x_drv_info_iscsi_stat(struct bnx2x *bp) */ static void bnx2x_config_mf_bw(struct bnx2x *bp) { + /* Workaround for MFW bug. + * MFW is not supposed to generate BW attention in + * single function mode. + */ + if (!IS_MF(bp)) { + DP(BNX2X_MSG_MCP, + "Ignoring MF BW config in single function mode\n"); + return; + } + if (bp->link_vars.link_up) { bnx2x_cmng_fns_init(bp, true, CMNG_FNS_MINMAX); bnx2x_link_sync_notify(bp); @@ -9985,10 +9995,18 @@ static void bnx2x_recovery_failed(struct bnx2x *bp) */ static void bnx2x_parity_recover(struct bnx2x *bp) { - bool global = false; u32 error_recovered, error_unrecovered; - bool is_parity; + bool is_parity, global = false; +#ifdef CONFIG_BNX2X_SRIOV + int vf_idx; + for (vf_idx = 0; vf_idx < bp->requested_nr_virtfn; vf_idx++) { + struct bnx2x_virtf *vf = BP_VF(bp, vf_idx); + + if (vf) + vf->state = VF_LOST; + } +#endif DP(NETIF_MSG_HW, "Handling parity\n"); while (1) { switch (bp->recovery_state) { diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 9ca994d0bab6..1977e0c552df 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -2389,15 +2389,21 @@ static int bnx2x_set_pf_tx_switching(struct bnx2x *bp, bool enable) /* send the ramrod on all the queues of the PF */ for_each_eth_queue(bp, i) { struct bnx2x_fastpath *fp = &bp->fp[i]; + int tx_idx; /* Set the appropriate Queue object */ q_params.q_obj = &bnx2x_sp_obj(bp, fp).q_obj; - /* Update the Queue state */ - rc = bnx2x_queue_state_change(bp, &q_params); - if (rc) { - BNX2X_ERR("Failed to configure Tx switching\n"); - return rc; + for (tx_idx = FIRST_TX_COS_INDEX; + tx_idx < fp->max_cos; tx_idx++) { + q_params.params.update.cid_index = tx_idx; + + /* Update the Queue state */ + rc = bnx2x_queue_state_change(bp, &q_params); + if (rc) { + BNX2X_ERR("Failed to configure Tx switching\n"); + return rc; + } } } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h index 53466f6cebab..a887bfa24c88 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h @@ -139,6 +139,7 @@ struct bnx2x_virtf { #define VF_ACQUIRED 1 /* VF acquired, but not initialized */ #define VF_ENABLED 2 /* VF Enabled */ #define VF_RESET 3 /* VF FLR'd, pending cleanup */ +#define VF_LOST 4 /* Recovery while VFs are loaded */ bool flr_clnup_stage; /* true during flr cleanup */ bool malicious; /* true if FW indicated so, until FLR */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c index 76a4668c50fe..6d5b81a971e3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c @@ -2112,6 +2112,18 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf, { int i; + if (vf->state == VF_LOST) { + /* Just ack the FW and return if VFs are lost + * in case of parity error. VFs are supposed to be timedout + * on waiting for PF response. + */ + DP(BNX2X_MSG_IOV, + "VF 0x%x lost, not handling the request\n", vf->abs_vfid); + + storm_memset_vf_mbx_ack(bp, vf->abs_vfid); + return; + } + /* check if tlv type is known */ if (bnx2x_tlv_supported(mbx->first_tlv.tl.type)) { /* Lock the per vf op mutex and note the locker's identity. diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 38ee7692132c..5163da01e54f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5375,7 +5375,7 @@ static void bnxt_setup_msix(struct bnxt *bp) int tcs, i; tcs = netdev_get_num_tc(dev); - if (tcs > 1) { + if (tcs) { int i, off, count; for (i = 0; i < tcs; i++) { @@ -7310,13 +7310,13 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu) struct bnxt *bp = netdev_priv(dev); if (netif_running(dev)) - bnxt_close_nic(bp, false, false); + bnxt_close_nic(bp, true, false); dev->mtu = new_mtu; bnxt_set_ring_params(bp); if (netif_running(dev)) - return bnxt_open_nic(bp, false, false); + return bnxt_open_nic(bp, true, false); return 0; } @@ -7402,11 +7402,23 @@ static bool bnxt_fltr_match(struct bnxt_ntuple_filter *f1, struct flow_keys *keys1 = &f1->fkeys; struct flow_keys *keys2 = &f2->fkeys; - if (keys1->addrs.v4addrs.src == keys2->addrs.v4addrs.src && - keys1->addrs.v4addrs.dst == keys2->addrs.v4addrs.dst && - keys1->ports.ports == keys2->ports.ports && - keys1->basic.ip_proto == keys2->basic.ip_proto && - keys1->basic.n_proto == keys2->basic.n_proto && + if (keys1->basic.n_proto != keys2->basic.n_proto || + keys1->basic.ip_proto != keys2->basic.ip_proto) + return false; + + if (keys1->basic.n_proto == htons(ETH_P_IP)) { + if (keys1->addrs.v4addrs.src != keys2->addrs.v4addrs.src || + keys1->addrs.v4addrs.dst != keys2->addrs.v4addrs.dst) + return false; + } else { + if (memcmp(&keys1->addrs.v6addrs.src, &keys2->addrs.v6addrs.src, + sizeof(keys1->addrs.v6addrs.src)) || + memcmp(&keys1->addrs.v6addrs.dst, &keys2->addrs.v6addrs.dst, + sizeof(keys1->addrs.v6addrs.dst))) + return false; + } + + if (keys1->ports.ports == keys2->ports.ports && keys1->control.flags == keys2->control.flags && ether_addr_equal(f1->src_mac_addr, f2->src_mac_addr) && ether_addr_equal(f1->dst_mac_addr, f2->dst_mac_addr)) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index a22336fef66b..3c78cd1cdd6f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1339,14 +1339,22 @@ static int bnxt_flash_nvram(struct net_device *dev, rc = hwrm_send_message(bp, &req, sizeof(req), FLASH_NVRAM_TIMEOUT); dma_free_coherent(&bp->pdev->dev, data_len, kmem, dma_handle); + if (rc == HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED) { + netdev_info(dev, + "PF does not have admin privileges to flash the device\n"); + rc = -EACCES; + } else if (rc) { + rc = -EIO; + } return rc; } static int bnxt_firmware_reset(struct net_device *dev, u16 dir_type) { - struct bnxt *bp = netdev_priv(dev); struct hwrm_fw_reset_input req = {0}; + struct bnxt *bp = netdev_priv(dev); + int rc; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1); @@ -1380,7 +1388,15 @@ static int bnxt_firmware_reset(struct net_device *dev, return -EINVAL; } - return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc == HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED) { + netdev_info(dev, + "PF does not have admin privileges to reset the device\n"); + rc = -EACCES; + } else if (rc) { + rc = -EIO; + } + return rc; } static int bnxt_flash_firmware(struct net_device *dev, @@ -1587,9 +1603,9 @@ static int bnxt_flash_package_from_file(struct net_device *dev, struct hwrm_nvm_install_update_output *resp = bp->hwrm_cmd_resp_addr; struct hwrm_nvm_install_update_input install = {0}; const struct firmware *fw; + int rc, hwrm_err = 0; u32 item_len; u16 index; - int rc; bnxt_hwrm_fw_set_time(bp); @@ -1632,15 +1648,16 @@ static int bnxt_flash_package_from_file(struct net_device *dev, memcpy(kmem, fw->data, fw->size); modify.host_src_addr = cpu_to_le64(dma_handle); - rc = hwrm_send_message(bp, &modify, sizeof(modify), - FLASH_PACKAGE_TIMEOUT); + hwrm_err = hwrm_send_message(bp, &modify, + sizeof(modify), + FLASH_PACKAGE_TIMEOUT); dma_free_coherent(&bp->pdev->dev, fw->size, kmem, dma_handle); } } release_firmware(fw); - if (rc) - return rc; + if (rc || hwrm_err) + goto err_exit; if ((install_type & 0xffff) == 0) install_type >>= 16; @@ -1648,26 +1665,21 @@ static int bnxt_flash_package_from_file(struct net_device *dev, install.install_type = cpu_to_le32(install_type); mutex_lock(&bp->hwrm_cmd_lock); - rc = _hwrm_send_message(bp, &install, sizeof(install), - INSTALL_PACKAGE_TIMEOUT); - if (rc) { - rc = -EOPNOTSUPP; - goto flash_pkg_exit; - } - - if (resp->error_code) { + hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), + INSTALL_PACKAGE_TIMEOUT); + if (hwrm_err) { u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err; - if (error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { + if (resp->error_code && error_code == + NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { install.flags |= cpu_to_le16( NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG); - rc = _hwrm_send_message(bp, &install, sizeof(install), - INSTALL_PACKAGE_TIMEOUT); - if (rc) { - rc = -EOPNOTSUPP; - goto flash_pkg_exit; - } + hwrm_err = _hwrm_send_message(bp, &install, + sizeof(install), + INSTALL_PACKAGE_TIMEOUT); } + if (hwrm_err) + goto flash_pkg_exit; } if (resp->result) { @@ -1677,6 +1689,14 @@ static int bnxt_flash_package_from_file(struct net_device *dev, } flash_pkg_exit: mutex_unlock(&bp->hwrm_cmd_lock); +err_exit: + if (hwrm_err == HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED) { + netdev_info(dev, + "PF does not have admin privileges to flash the device\n"); + rc = -EACCES; + } else if (hwrm_err) { + rc = -EOPNOTSUPP; + } return rc; } @@ -2236,17 +2256,37 @@ static int bnxt_hwrm_mac_loopback(struct bnxt *bp, bool enable) return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); } +static int bnxt_query_force_speeds(struct bnxt *bp, u16 *force_speeds) +{ + struct hwrm_port_phy_qcaps_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_port_phy_qcaps_input req = {0}; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_QCAPS, -1, -1); + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) + *force_speeds = le16_to_cpu(resp->supported_speeds_force_mode); + + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + static int bnxt_disable_an_for_lpbk(struct bnxt *bp, struct hwrm_port_phy_cfg_input *req) { struct bnxt_link_info *link_info = &bp->link_info; - u16 fw_advertising = link_info->advertising; + u16 fw_advertising; u16 fw_speed; int rc; if (!link_info->autoneg) return 0; + rc = bnxt_query_force_speeds(bp, &fw_advertising); + if (rc) + return rc; + fw_speed = PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB; if (netif_carrier_ok(bp->dev)) fw_speed = bp->link_info.link_speed; @@ -2421,7 +2461,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, bool offline = false; u8 test_results = 0; u8 test_mask = 0; - int rc, i; + int rc = 0, i; if (!bp->num_tests || !BNXT_SINGLE_PF(bp)) return; @@ -2479,9 +2519,9 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, } bnxt_hwrm_phy_loopback(bp, false); bnxt_half_close_nic(bp); - bnxt_open_nic(bp, false, true); + rc = bnxt_open_nic(bp, false, true); } - if (bnxt_test_irq(bp)) { + if (rc || bnxt_test_irq(bp)) { buf[BNXT_IRQ_TEST_IDX] = 1; etest->flags |= ETH_TEST_FL_FAILED; } diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index d857df8ebdb4..3e3044fe3206 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -1138,7 +1138,7 @@ static int bcmgenet_power_down(struct bcmgenet_priv *priv, break; } - return 0; + return ret; } static void bcmgenet_power_up(struct bcmgenet_priv *priv, @@ -1985,6 +1985,8 @@ static void bcmgenet_link_intr_enable(struct bcmgenet_priv *priv) */ if (priv->internal_phy) { int0_enable |= UMAC_IRQ_LINK_EVENT; + if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) + int0_enable |= UMAC_IRQ_PHY_DET_R; } else if (priv->ext_phy) { int0_enable |= UMAC_IRQ_LINK_EVENT; } else if (priv->phy_interface == PHY_INTERFACE_MODE_MOCA) { @@ -2608,6 +2610,12 @@ static void bcmgenet_irq_task(struct work_struct *work) bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC); } + if (status & UMAC_IRQ_PHY_DET_R && + priv->dev->phydev->autoneg != AUTONEG_ENABLE) { + phy_init_hw(priv->dev->phydev); + genphy_config_aneg(priv->dev->phydev); + } + /* Link UP/DOWN event */ if (status & UMAC_IRQ_LINK_EVENT) phy_mac_interrupt(priv->phydev, @@ -2713,8 +2721,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id) } /* all other interested interrupts handled in bottom half */ - status &= (UMAC_IRQ_LINK_EVENT | - UMAC_IRQ_MPD_R); + status &= (UMAC_IRQ_LINK_EVENT | UMAC_IRQ_MPD_R | UMAC_IRQ_PHY_DET_R); if (status) { /* Save irq status for bottom-half processing. */ spin_lock_irqsave(&priv->lock, flags); @@ -3683,6 +3690,7 @@ static int bcmgenet_resume(struct device *d) phy_init_hw(priv->phydev); /* Speed settings must be restored */ + genphy_config_aneg(dev->phydev); bcmgenet_mii_config(priv->dev, false); /* disable ethernet MAC while updating its registers */ diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 9d499c5c8f8a..f176a0307f39 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -368,6 +368,7 @@ struct bcmgenet_mib_counters { #define EXT_PWR_DOWN_PHY_EN (1 << 20) #define EXT_RGMII_OOB_CTRL 0x0C +#define RGMII_MODE_EN_V123 (1 << 0) #define RGMII_LINK (1 << 4) #define OOB_DISABLE (1 << 5) #define RGMII_MODE_EN (1 << 6) diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index c421e2753c8c..fca9da1b1363 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -277,7 +277,11 @@ int bcmgenet_mii_config(struct net_device *dev, bool init) */ if (priv->ext_phy) { reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL); - reg |= RGMII_MODE_EN | id_mode_dis; + reg |= id_mode_dis; + if (GENET_IS_V1(priv) || GENET_IS_V2(priv) || GENET_IS_V3(priv)) + reg |= RGMII_MODE_EN_V123; + else + reg |= RGMII_MODE_EN; bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL); } @@ -292,11 +296,12 @@ int bcmgenet_mii_probe(struct net_device *dev) struct bcmgenet_priv *priv = netdev_priv(dev); struct device_node *dn = priv->pdev->dev.of_node; struct phy_device *phydev; - u32 phy_flags; + u32 phy_flags = 0; int ret; /* Communicate the integrated PHY revision */ - phy_flags = priv->gphy_rev; + if (priv->internal_phy) + phy_flags = priv->gphy_rev; /* Initialize link state variables that bcmgenet_mii_setup() uses */ priv->old_link = -1; diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index ecdef42f0ae6..00230fe097d9 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -299,7 +299,7 @@ static enum sbmac_state sbmac_set_channel_state(struct sbmac_softc *, static void sbmac_promiscuous_mode(struct sbmac_softc *sc, int onoff); static uint64_t sbmac_addr2reg(unsigned char *ptr); static irqreturn_t sbmac_intr(int irq, void *dev_instance); -static int sbmac_start_tx(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t sbmac_start_tx(struct sk_buff *skb, struct net_device *dev); static void sbmac_setmulti(struct sbmac_softc *sc); static int sbmac_init(struct platform_device *pldev, long long base); static int sbmac_set_speed(struct sbmac_softc *s, enum sbmac_speed speed); @@ -2028,7 +2028,7 @@ static irqreturn_t sbmac_intr(int irq,void *dev_instance) * Return value: * nothing ********************************************************************* */ -static int sbmac_start_tx(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t sbmac_start_tx(struct sk_buff *skb, struct net_device *dev) { struct sbmac_softc *sc = netdev_priv(dev); unsigned long flags; diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index c93f3a2dc6c1..4c0bcfd1d250 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -457,7 +457,11 @@ /* Bitfields in TISUBN */ #define GEM_SUBNSINCR_OFFSET 0 -#define GEM_SUBNSINCR_SIZE 16 +#define GEM_SUBNSINCRL_OFFSET 24 +#define GEM_SUBNSINCRL_SIZE 8 +#define GEM_SUBNSINCRH_OFFSET 0 +#define GEM_SUBNSINCRH_SIZE 16 +#define GEM_SUBNSINCR_SIZE 24 /* Bitfields in TI */ #define GEM_NSINCR_OFFSET 0 diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 2287749de087..b01b242c2bf0 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -66,7 +66,11 @@ /* Max length of transmit frame must be a multiple of 8 bytes */ #define MACB_TX_LEN_ALIGN 8 #define MACB_MAX_TX_LEN ((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1))) -#define GEM_MAX_TX_LEN ((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1))) +/* Limit maximum TX length as per Cadence TSO errata. This is to avoid a + * false amba_error in TX path from the DMA assuming there is not enough + * space in the SRAM (16KB) even when there is. + */ +#define GEM_MAX_TX_LEN (unsigned int)(0x3FC0) #define GEM_MTU_MIN_SIZE ETH_MIN_MTU #define MACB_NETIF_LSO NETIF_F_TSO @@ -852,7 +856,9 @@ static void macb_tx_interrupt(struct macb_queue *queue) /* First, update TX stats if needed */ if (skb) { - if (gem_ptp_do_txstamp(queue, skb, desc) == 0) { + if (unlikely(skb_shinfo(skb)->tx_flags & + SKBTX_HW_TSTAMP) && + gem_ptp_do_txstamp(queue, skb, desc) == 0) { /* skb now belongs to timestamp buffer * and will be removed later */ @@ -1575,16 +1581,14 @@ static netdev_features_t macb_features_check(struct sk_buff *skb, /* Validate LSO compatibility */ - /* there is only one buffer */ - if (!skb_is_nonlinear(skb)) + /* there is only one buffer or protocol is not UDP */ + if (!skb_is_nonlinear(skb) || (ip_hdr(skb)->protocol != IPPROTO_UDP)) return features; /* length of header */ hdrlen = skb_transport_offset(skb); - if (ip_hdr(skb)->protocol == IPPROTO_TCP) - hdrlen += tcp_hdrlen(skb); - /* For LSO: + /* For UFO only: * When software supplies two or more payload buffers all payload buffers * apart from the last must be a multiple of 8 bytes in size. */ @@ -2822,7 +2826,7 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, if (!err) err = -ENODEV; - dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to get macb_clk (%d)\n", err); return err; } @@ -2831,7 +2835,7 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, if (!err) err = -ENODEV; - dev_err(&pdev->dev, "failed to get hclk (%u)\n", err); + dev_err(&pdev->dev, "failed to get hclk (%d)\n", err); return err; } @@ -2845,25 +2849,25 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, err = clk_prepare_enable(*pclk); if (err) { - dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable pclk (%d)\n", err); return err; } err = clk_prepare_enable(*hclk); if (err) { - dev_err(&pdev->dev, "failed to enable hclk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable hclk (%d)\n", err); goto err_disable_pclk; } err = clk_prepare_enable(*tx_clk); if (err) { - dev_err(&pdev->dev, "failed to enable tx_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable tx_clk (%d)\n", err); goto err_disable_hclk; } err = clk_prepare_enable(*rx_clk); if (err) { - dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable rx_clk (%d)\n", err); goto err_disable_txclk; } @@ -3298,7 +3302,7 @@ static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk, err = clk_prepare_enable(*pclk); if (err) { - dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err); + dev_err(&pdev->dev, "failed to enable pclk (%d)\n", err); return err; } diff --git a/drivers/net/ethernet/cadence/macb_ptp.c b/drivers/net/ethernet/cadence/macb_ptp.c index 678835136bf8..f1f07e9d53f8 100755 --- a/drivers/net/ethernet/cadence/macb_ptp.c +++ b/drivers/net/ethernet/cadence/macb_ptp.c @@ -115,7 +115,10 @@ static int gem_tsu_incr_set(struct macb *bp, struct tsu_incr *incr_spec) * to take effect. */ spin_lock_irqsave(&bp->tsu_clk_lock, flags); - gem_writel(bp, TISUBN, GEM_BF(SUBNSINCR, incr_spec->sub_ns)); + /* RegBit[15:0] = Subns[23:8]; RegBit[31:24] = Subns[7:0] */ + gem_writel(bp, TISUBN, GEM_BF(SUBNSINCRL, incr_spec->sub_ns) | + GEM_BF(SUBNSINCRH, (incr_spec->sub_ns >> + GEM_SUBNSINCRL_SIZE))); gem_writel(bp, TI, GEM_BF(NSINCR, incr_spec->ns)); spin_unlock_irqrestore(&bp->tsu_clk_lock, flags); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 29d53b1763a7..2a9c925376cc 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -1444,8 +1444,9 @@ void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq) } if (iq) { spin_lock_bh(&iq->lock); - writel(iq->pkt_in_done, iq->inst_cnt_reg); - iq->pkt_in_done = 0; + writel(iq->pkts_processed, iq->inst_cnt_reg); + iq->pkt_in_done -= iq->pkts_processed; + iq->pkts_processed = 0; /* this write needs to be flushed before we release the lock */ mmiowb(); spin_unlock_bh(&iq->lock); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h index 5c3c8da976f7..1860603452ee 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h @@ -84,6 +84,8 @@ struct octeon_instr_queue { u32 pkt_in_done; + u32 pkts_processed; + /** A spinlock to protect access to the input ring.*/ spinlock_t iq_flush_running_lock; diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index 55e873126463..0ea623768783 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -122,6 +122,7 @@ int octeon_init_instr_queue(struct octeon_device *oct, iq->do_auto_flush = 1; iq->db_timeout = (u32)conf->db_timeout; atomic_set(&iq->instr_pending, 0); + iq->pkts_processed = 0; /* Initialize the spinlock for this instruction queue */ spin_lock_init(&iq->lock); @@ -474,6 +475,7 @@ octeon_flush_iq(struct octeon_device *oct, struct octeon_instr_queue *iq, lio_process_iq_request_list(oct, iq, 0); if (inst_processed) { + iq->pkts_processed += inst_processed; atomic_sub(inst_processed, &iq->instr_pending); iq->stats.instr_processed += inst_processed; } diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c index 45c51277e0cf..61701ba2ac72 100644 --- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c @@ -1497,7 +1497,7 @@ static int octeon_mgmt_probe(struct platform_device *pdev) netdev->ethtool_ops = &octeon_mgmt_ethtool_ops; netdev->min_mtu = 64 - OCTEON_MGMT_RX_HEADROOM; - netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM; + netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM - VLAN_HLEN; mac = of_get_mac_address(pdev->dev.of_node); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 5e5c4d7796b8..d678f088925c 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -234,10 +234,19 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) lmac = &bgx->lmac[lmacid]; cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); - if (enable) + if (enable) { cfg |= CMR_PKT_RX_EN | CMR_PKT_TX_EN; - else + + /* enable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1S, + GMI_TXX_INT_UNDFLW); + } else { cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN); + + /* Disable TX FIFO Underflow interrupt */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_INT_ENA_W1C, + GMI_TXX_INT_UNDFLW); + } bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); if (bgx->is_rgx) @@ -915,7 +924,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) phy_interface_mode(lmac->lmac_type))) return -ENODEV; - phy_start_aneg(lmac->phydev); + phy_start(lmac->phydev); return 0; } @@ -1340,6 +1349,48 @@ static int bgx_init_phy(struct bgx *bgx) return bgx_init_of_phy(bgx); } +static irqreturn_t bgx_intr_handler(int irq, void *data) +{ + struct bgx *bgx = (struct bgx *)data; + u64 status, val; + int lmac; + + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { + status = bgx_reg_read(bgx, lmac, BGX_GMP_GMI_TXX_INT); + if (status & GMI_TXX_INT_UNDFLW) { + pci_err(bgx->pdev, "BGX%d lmac%d UNDFLW\n", + bgx->bgx_id, lmac); + val = bgx_reg_read(bgx, lmac, BGX_CMRX_CFG); + val &= ~CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + val |= CMR_EN; + bgx_reg_write(bgx, lmac, BGX_CMRX_CFG, val); + } + /* clear interrupts */ + bgx_reg_write(bgx, lmac, BGX_GMP_GMI_TXX_INT, status); + } + + return IRQ_HANDLED; +} + +static void bgx_register_intr(struct pci_dev *pdev) +{ + struct bgx *bgx = pci_get_drvdata(pdev); + int ret; + + ret = pci_alloc_irq_vectors(pdev, BGX_LMAC_VEC_OFFSET, + BGX_LMAC_VEC_OFFSET, PCI_IRQ_ALL_TYPES); + if (ret < 0) { + pci_err(pdev, "Req for #%d msix vectors failed\n", + BGX_LMAC_VEC_OFFSET); + return; + } + ret = pci_request_irq(pdev, GMPX_GMI_TX_INT, bgx_intr_handler, NULL, + bgx, "BGX%d", bgx->bgx_id); + if (ret) + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); +} + static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { int err; @@ -1355,7 +1406,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, bgx); - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(dev, "Failed to enable PCI device\n"); pci_set_drvdata(pdev, NULL); @@ -1409,6 +1460,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) bgx_init_hw(bgx); + bgx_register_intr(pdev); + /* Enable all LMACs */ for (lmac = 0; lmac < bgx->lmac_count; lmac++) { err = bgx_lmac_enable(bgx, lmac); @@ -1425,6 +1478,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err_enable: bgx_vnic[bgx->bgx_id] = NULL; + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); err_release_regions: pci_release_regions(pdev); err_disable_device: @@ -1442,6 +1496,8 @@ static void bgx_remove(struct pci_dev *pdev) for (lmac = 0; lmac < bgx->lmac_count; lmac++) bgx_lmac_disable(bgx, lmac); + pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); + bgx_vnic[bgx->bgx_id] = NULL; pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 23acdc5ab896..adaa3bfa5f6c 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -179,6 +179,15 @@ #define BGX_GMP_GMI_TXX_BURST 0x38228 #define BGX_GMP_GMI_TXX_MIN_PKT 0x38240 #define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300 +#define BGX_GMP_GMI_TXX_INT 0x38500 +#define BGX_GMP_GMI_TXX_INT_W1S 0x38508 +#define BGX_GMP_GMI_TXX_INT_ENA_W1C 0x38510 +#define BGX_GMP_GMI_TXX_INT_ENA_W1S 0x38518 +#define GMI_TXX_INT_PTP_LOST BIT_ULL(4) +#define GMI_TXX_INT_LATE_COL BIT_ULL(3) +#define GMI_TXX_INT_XSDEF BIT_ULL(2) +#define GMI_TXX_INT_XSCOL BIT_ULL(1) +#define GMI_TXX_INT_UNDFLW BIT_ULL(0) #define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */ #define BGX_MSIX_VEC_0_29_CTL 0x400008 diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 338683e5ef1e..b8779afb8550 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -2449,6 +2449,8 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) if (!is_offload(adapter)) return -EOPNOTSUPP; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; if (!(adapter->flags & FULL_INIT_DONE)) return -EIO; /* need the memory controllers */ if (copy_from_user(&t, useraddr, sizeof(t))) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c index 6ee2ed30626b..306b4b320616 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c @@ -266,8 +266,8 @@ void cxgb4_dcb_handle_fw_update(struct adapter *adap, enum cxgb4_dcb_state_input input = ((pcmd->u.dcb.control.all_syncd_pkd & FW_PORT_CMD_ALL_SYNCD_F) - ? CXGB4_DCB_STATE_FW_ALLSYNCED - : CXGB4_DCB_STATE_FW_INCOMPLETE); + ? CXGB4_DCB_INPUT_FW_ALLSYNCED + : CXGB4_DCB_INPUT_FW_INCOMPLETE); if (dcb->dcb_version != FW_PORT_DCB_VER_UNKNOWN) { dcb_running_version = FW_PORT_CMD_DCB_VERSION_G( diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h index ccf24d3dc982..2c418c405c50 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h @@ -67,7 +67,7 @@ do { \ if ((__dcb)->dcb_version == FW_PORT_DCB_VER_IEEE) \ cxgb4_dcb_state_fsm((__dev), \ - CXGB4_DCB_STATE_FW_ALLSYNCED); \ + CXGB4_DCB_INPUT_FW_ALLSYNCED); \ } while (0) /* States we can be in for a port's Data Center Bridging. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 9e5cd18e7358..8bd90ad15607 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -66,8 +66,7 @@ static void *seq_tab_start(struct seq_file *seq, loff_t *pos) static void *seq_tab_next(struct seq_file *seq, void *v, loff_t *pos) { v = seq_tab_get_idx(seq->private, *pos + 1); - if (v) - ++*pos; + ++(*pos); return v; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c index 8441ce3541af..ad4c9f17d77c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -670,10 +670,10 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) lld->fr_nsmr_tpte_wr_support = adap->params.fr_nsmr_tpte_wr_support; } -static void uld_attach(struct adapter *adap, unsigned int uld) +static int uld_attach(struct adapter *adap, unsigned int uld) { - void *handle; struct cxgb4_lld_info lli; + void *handle; uld_init(adap, &lli); uld_queue_init(adap, uld, &lli); @@ -683,7 +683,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld) dev_warn(adap->pdev_dev, "could not attach to the %s driver, error %ld\n", adap->uld[uld].name, PTR_ERR(handle)); - return; + return PTR_ERR(handle); } adap->uld[uld].handle = handle; @@ -691,23 +691,24 @@ static void uld_attach(struct adapter *adap, unsigned int uld) if (adap->flags & FULL_INIT_DONE) adap->uld[uld].state_change(handle, CXGB4_STATE_UP); + + return 0; } -/** - * cxgb4_register_uld - register an upper-layer driver - * @type: the ULD type - * @p: the ULD methods +/* cxgb4_register_uld - register an upper-layer driver + * @type: the ULD type + * @p: the ULD methods * - * Registers an upper-layer driver with this driver and notifies the ULD - * about any presently available devices that support its type. Returns - * %-EBUSY if a ULD of the same type is already registered. + * Registers an upper-layer driver with this driver and notifies the ULD + * about any presently available devices that support its type. Returns + * %-EBUSY if a ULD of the same type is already registered. */ int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p) { - int ret = 0; unsigned int adap_idx = 0; struct adapter *adap; + int ret = 0; if (type >= CXGB4_ULD_MAX) return -EINVAL; @@ -741,12 +742,16 @@ int cxgb4_register_uld(enum cxgb4_uld type, if (ret) goto free_irq; adap->uld[type] = *p; - uld_attach(adap, type); + ret = uld_attach(adap, type); + if (ret) + goto free_txq; adap_idx++; } mutex_unlock(&uld_mutex); return 0; +free_txq: + release_sge_txq_uld(adap, type); free_irq: if (adap->flags & FULL_INIT_DONE) quiesce_rx_uld(adap, type); diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c index f7ef8871dd0b..67aa3c997417 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c +++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c @@ -682,8 +682,7 @@ static void *l2t_seq_start(struct seq_file *seq, loff_t *pos) static void *l2t_seq_next(struct seq_file *seq, void *v, loff_t *pos) { v = l2t_get_idx(seq, *pos); - if (v) - ++*pos; + ++(*pos); return v; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 1802debbd3c7..39bcf27902e4 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -3750,7 +3750,7 @@ int t4_fwcache(struct adapter *adap, enum fw_params_param_dev_fwcache op) c.param[0].mnem = cpu_to_be32(FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_FWCACHE)); - c.param[0].val = (__force __be32)op; + c.param[0].val = cpu_to_be32(op); return t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), NULL); } diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 8996ebbd222e..26ba18ea08c6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -714,6 +714,10 @@ static int adapter_up(struct adapter *adapter) if (adapter->flags & USING_MSIX) name_msix_vecs(adapter); + + /* Initialize hash mac addr list*/ + INIT_LIST_HEAD(&adapter->mac_hlist); + adapter->flags |= FULL_INIT_DONE; } @@ -739,8 +743,6 @@ static int adapter_up(struct adapter *adapter) enable_rx(adapter); t4vf_sge_start(adapter); - /* Initialize hash mac addr list*/ - INIT_LIST_HEAD(&adapter->mac_hlist); return 0; } diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c index e2a702996db4..82bd918bf967 100644 --- a/drivers/net/ethernet/cirrus/ep93xx_eth.c +++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c @@ -767,6 +767,7 @@ static int ep93xx_eth_remove(struct platform_device *pdev) { struct net_device *dev; struct ep93xx_priv *ep; + struct resource *mem; dev = platform_get_drvdata(pdev); if (dev == NULL) @@ -782,8 +783,8 @@ static int ep93xx_eth_remove(struct platform_device *pdev) iounmap(ep->base_addr); if (ep->res != NULL) { - release_resource(ep->res); - kfree(ep->res); + mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(mem->start, resource_size(mem)); } free_netdev(dev); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 19f374b180fc..52a3b32390a9 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -1972,10 +1972,10 @@ static int enic_stop(struct net_device *netdev) napi_disable(&enic->napi[i]); netif_carrier_off(netdev); - netif_tx_disable(netdev); if (vnic_dev_get_intr_mode(enic->vdev) == VNIC_DEV_INTR_MODE_MSIX) for (i = 0; i < enic->wq_count; i++) napi_disable(&enic->napi[enic_cq_wq(enic, i)]); + netif_tx_disable(netdev); if (!enic_is_dynamic(enic) && !enic_is_sriov_vf(enic)) enic_dev_del_station_addr(enic); diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 07e10a45beaa..cd5309668186 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -2224,15 +2224,16 @@ static int __init dmfe_init_module(void) if (cr6set) dmfe_cr6_user_set = cr6set; - switch(mode) { - case DMFE_10MHF: + switch (mode) { + case DMFE_10MHF: case DMFE_100MHF: case DMFE_10MFD: case DMFE_100MFD: case DMFE_1M_HPNA: dmfe_media_mode = mode; break; - default:dmfe_media_mode = DMFE_AUTO; + default: + dmfe_media_mode = DMFE_AUTO; break; } diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c index 7fc248efc4ba..9779555eea25 100644 --- a/drivers/net/ethernet/dec/tulip/uli526x.c +++ b/drivers/net/ethernet/dec/tulip/uli526x.c @@ -1819,8 +1819,8 @@ static int __init uli526x_init_module(void) if (cr6set) uli526x_cr6_user_set = cr6set; - switch (mode) { - case ULI526X_10MHF: + switch (mode) { + case ULI526X_10MHF: case ULI526X_100MHF: case ULI526X_10MFD: case ULI526X_100MFD: diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 9ed8e4b81530..a1baddcd6799 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -707,8 +707,8 @@ static bool ftgmac100_prep_tx_csum(struct sk_buff *skb, u32 *csum_vlan) return skb_checksum_help(skb) == 0; } -static int ftgmac100_hard_start_xmit(struct sk_buff *skb, - struct net_device *netdev) +static netdev_tx_t ftgmac100_hard_start_xmit(struct sk_buff *skb, + struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); struct ftgmac100_txdes *txdes, *first; @@ -734,6 +734,18 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb, */ nfrags = skb_shinfo(skb)->nr_frags; + /* Setup HW checksumming */ + csum_vlan = 0; + if (skb->ip_summed == CHECKSUM_PARTIAL && + !ftgmac100_prep_tx_csum(skb, &csum_vlan)) + goto drop; + + /* Add VLAN tag */ + if (skb_vlan_tag_present(skb)) { + csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG; + csum_vlan |= skb_vlan_tag_get(skb) & 0xffff; + } + /* Get header len */ len = skb_headlen(skb); @@ -760,19 +772,6 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb, if (nfrags == 0) f_ctl_stat |= FTGMAC100_TXDES0_LTS; txdes->txdes3 = cpu_to_le32(map); - - /* Setup HW checksumming */ - csum_vlan = 0; - if (skb->ip_summed == CHECKSUM_PARTIAL && - !ftgmac100_prep_tx_csum(skb, &csum_vlan)) - goto drop; - - /* Add VLAN tag */ - if (skb_vlan_tag_present(skb)) { - csum_vlan |= FTGMAC100_TXDES1_INS_VLANTAG; - csum_vlan |= skb_vlan_tag_get(skb) & 0xffff; - } - txdes->txdes1 = cpu_to_le32(csum_vlan); /* Next descriptor */ diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c index 415fd93e9930..769c627aace5 100644 --- a/drivers/net/ethernet/faraday/ftmac100.c +++ b/drivers/net/ethernet/faraday/ftmac100.c @@ -632,8 +632,8 @@ static void ftmac100_tx_complete(struct ftmac100 *priv) ; } -static int ftmac100_xmit(struct ftmac100 *priv, struct sk_buff *skb, - dma_addr_t map) +static netdev_tx_t ftmac100_xmit(struct ftmac100 *priv, struct sk_buff *skb, + dma_addr_t map) { struct net_device *netdev = priv->netdev; struct ftmac100_txdes *txdes; @@ -1013,7 +1013,8 @@ static int ftmac100_stop(struct net_device *netdev) return 0; } -static int ftmac100_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t +ftmac100_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ftmac100 *priv = netdev_priv(netdev); dma_addr_t map; diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index d5f8bf87519a..39b8b6730e77 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2036,7 +2036,8 @@ static inline int dpaa_xmit(struct dpaa_priv *priv, return 0; } -static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) +static netdev_tx_t +dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) { const int queue_mapping = skb_get_queue_mapping(skb); bool nonlinear = skb_is_nonlinear(skb); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 0ae6532b02e0..8ba915cc4c2e 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2478,15 +2478,15 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec) return -EINVAL; } - cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr); + cycle = fec_enet_us_to_itr_clock(ndev, ec->rx_coalesce_usecs); if (cycle > 0xFFFF) { pr_err("Rx coalesced usec exceed hardware limitation\n"); return -EINVAL; } - cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr); + cycle = fec_enet_us_to_itr_clock(ndev, ec->tx_coalesce_usecs); if (cycle > 0xFFFF) { - pr_err("Rx coalesced usec exceed hardware limitation\n"); + pr_err("Tx coalesced usec exceed hardware limitation\n"); return -EINVAL; } @@ -3565,6 +3565,11 @@ fec_drv_remove(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); struct device_node *np = pdev->dev.of_node; + int ret; + + ret = pm_runtime_get_sync(&pdev->dev); + if (ret < 0) + return ret; cancel_work_sync(&fep->tx_timeout_work); fec_ptp_stop(pdev); @@ -3572,13 +3577,17 @@ fec_drv_remove(struct platform_device *pdev) fec_enet_mii_remove(fep); if (fep->reg_phy) regulator_disable(fep->reg_phy); - pm_runtime_put(&pdev->dev); - pm_runtime_disable(&pdev->dev); + if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(fep->phy_node); free_netdev(ndev); + clk_disable_unprepare(fep->clk_ahb); + clk_disable_unprepare(fep->clk_ipg); + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_disable(&pdev->dev); + return 0; } diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index 6d7269d87a85..b90bab72efdb 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -305,7 +305,8 @@ static int mpc52xx_fec_close(struct net_device *dev) * invariant will hold if you make sure that the netif_*_queue() * calls are done at the proper times. */ -static int mpc52xx_fec_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +mpc52xx_fec_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct mpc52xx_fec_priv *priv = netdev_priv(dev); struct bcom_fec_bd *bd; diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 75ce773c21a6..b33650a897f1 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -110,7 +110,7 @@ do { \ /* Interface Mode Register (IF_MODE) */ #define IF_MODE_MASK 0x00000003 /* 30-31 Mask on i/f mode bits */ -#define IF_MODE_XGMII 0x00000000 /* 30-31 XGMII (10G) interface */ +#define IF_MODE_10G 0x00000000 /* 30-31 10G interface */ #define IF_MODE_GMII 0x00000002 /* 30-31 GMII (1G) interface */ #define IF_MODE_RGMII 0x00000004 #define IF_MODE_RGMII_AUTO 0x00008000 @@ -439,7 +439,7 @@ static int init(struct memac_regs __iomem *regs, struct memac_cfg *cfg, tmp = 0; switch (phy_if) { case PHY_INTERFACE_MODE_XGMII: - tmp |= IF_MODE_XGMII; + tmp |= IF_MODE_10G; break; default: tmp |= IF_MODE_GMII; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 28bd4cf61741..708082c255d0 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -481,7 +481,8 @@ static struct sk_buff *tx_skb_align_workaround(struct net_device *dev, } #endif -static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); cbd_t __iomem *bdp; diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 63daae120b2d..e4a2c74a9b47 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -112,7 +112,7 @@ const char gfar_driver_version[] = "2.0"; static int gfar_enet_open(struct net_device *dev); -static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t gfar_start_xmit(struct sk_buff *skb, struct net_device *dev); static void gfar_reset_task(struct work_struct *work); static void gfar_timeout(struct net_device *dev); static int gfar_close(struct net_device *dev); @@ -2334,7 +2334,7 @@ static inline bool gfar_csum_errata_76(struct gfar_private *priv, /* This is called by the kernel when a frame is ready for transmission. * It is pointed to by the dev->hard_start_xmit function pointer */ -static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); struct gfar_priv_tx_q *tx_queue = NULL; @@ -2685,13 +2685,17 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) skb_dirtytx = tx_queue->skb_dirtytx; while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) { + bool do_tstamp; + + do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + priv->hwts_tx_en; frags = skb_shinfo(skb)->nr_frags; /* When time stamping, one additional TxBD must be freed. * Also, we need to dma_unmap_single() the TxPAL. */ - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) + if (unlikely(do_tstamp)) nr_txbds = frags + 2; else nr_txbds = frags + 1; @@ -2705,7 +2709,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) (lstatus & BD_LENGTH_MASK)) break; - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(do_tstamp)) { next = next_txbd(bdp, base, tx_ring_size); buflen = be16_to_cpu(next->length) + GMAC_FCB_LEN + GMAC_TXPAL_LEN; @@ -2715,7 +2719,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr), buflen, DMA_TO_DEVICE); - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(do_tstamp)) { struct skb_shared_hwtstamps shhwtstamps; u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) & ~0x7UL); diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 94df1ddc5dcb..bddf4c25ee6e 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3085,7 +3085,8 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth) /* This is called by the kernel when a frame is ready for transmission. */ /* It is pointed to by the dev->hard_start_xmit function pointer */ -static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ucc_geth_private *ugeth = netdev_priv(dev); #ifdef CONFIG_UGETH_TX_ON_DEMAND diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index e03b30c60dcf..c82c85ef5fb3 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -49,6 +49,7 @@ struct tgec_mdio_controller { struct mdio_fsl_priv { struct tgec_mdio_controller __iomem *mdio_base; bool is_little_endian; + bool has_a011043; }; static u32 xgmac_read32(void __iomem *regs, @@ -226,7 +227,8 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) return ret; /* Return all Fs if nothing was there */ - if (xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) { + if ((xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) && + !priv->has_a011043) { dev_err(&bus->dev, "Error while reading PHY%d reg at %d.%hhu\n", phy_id, dev_addr, regnum); @@ -274,6 +276,9 @@ static int xgmac_mdio_probe(struct platform_device *pdev) priv->is_little_endian = of_property_read_bool(pdev->dev.of_node, "little-endian"); + priv->has_a011043 = of_property_read_bool(pdev->dev.of_node, + "fsl,erratum-a011043"); + ret = of_mdiobus_register(bus, np); if (ret) { dev_err(&pdev->dev, "cannot register MDIO bus\n"); diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 1bfe9544b3c1..d5489cb0afff 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -174,6 +174,7 @@ struct hip04_priv { dma_addr_t rx_phys[RX_DESC_NUM]; unsigned int rx_head; unsigned int rx_buf_size; + unsigned int rx_cnt_remaining; struct device_node *phy_node; struct phy_device *phy; @@ -423,7 +424,8 @@ static void hip04_start_tx_timer(struct hip04_priv *priv) ns, HRTIMER_MODE_REL); } -static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t +hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct hip04_priv *priv = netdev_priv(ndev); struct net_device_stats *stats = &ndev->stats; @@ -454,9 +456,9 @@ static int hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev) skb_tx_timestamp(skb); hip04_set_xmit_desc(priv, phys); - priv->tx_head = TX_NEXT(tx_head); count++; netdev_sent_queue(ndev, skb->len); + priv->tx_head = TX_NEXT(tx_head); stats->tx_bytes += skb->len; stats->tx_packets++; @@ -487,7 +489,6 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget) struct hip04_priv *priv = container_of(napi, struct hip04_priv, napi); struct net_device *ndev = priv->ndev; struct net_device_stats *stats = &ndev->stats; - unsigned int cnt = hip04_recv_cnt(priv); struct rx_desc *desc; struct sk_buff *skb; unsigned char *buf; @@ -500,8 +501,8 @@ static int hip04_rx_poll(struct napi_struct *napi, int budget) /* clean up tx descriptors */ tx_remaining = hip04_tx_reclaim(ndev, false); - - while (cnt && !last) { + priv->rx_cnt_remaining += hip04_recv_cnt(priv); + while (priv->rx_cnt_remaining && !last) { buf = priv->rx_buf[priv->rx_head]; skb = build_skb(buf, priv->rx_buf_size); if (unlikely(!skb)) { @@ -547,11 +548,13 @@ refill: hip04_set_recv_desc(priv, phys); priv->rx_head = RX_NEXT(priv->rx_head); - if (rx >= budget) + if (rx >= budget) { + --priv->rx_cnt_remaining; goto done; + } - if (--cnt == 0) - cnt = hip04_recv_cnt(priv); + if (--priv->rx_cnt_remaining == 0) + priv->rx_cnt_remaining += hip04_recv_cnt(priv); } if (!(priv->reg_inten & RCV_INT)) { @@ -636,6 +639,7 @@ static int hip04_mac_open(struct net_device *ndev) int i; priv->rx_head = 0; + priv->rx_cnt_remaining = 0; priv->tx_head = 0; priv->tx_tail = 0; hip04_reset_ppe(priv); @@ -942,7 +946,6 @@ static int hip04_remove(struct platform_device *pdev) hip04_free_ring(ndev, d); unregister_netdev(ndev); - free_irq(ndev->irq, ndev); of_node_put(priv->phy_node); cancel_work_sync(&priv->tx_timeout_task); free_netdev(ndev); diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 25a6c8722eca..6adf6831d120 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -736,7 +736,7 @@ static int hix5hd2_fill_sg_desc(struct hix5hd2_priv *priv, return 0; } -static int hix5hd2_net_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t hix5hd2_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct hix5hd2_priv *priv = netdev_priv(dev); struct hix5hd2_desc *desc; @@ -1202,7 +1202,7 @@ static int hix5hd2_dev_probe(struct platform_device *pdev) goto err_free_mdio; priv->phy_mode = of_get_phy_mode(node); - if (priv->phy_mode < 0) { + if ((int)priv->phy_mode < 0) { netdev_err(ndev, "not find phy-mode\n"); ret = -EINVAL; goto err_mdiobus; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index b681c07b33fb..0733745f4be6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -669,7 +669,6 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, skb = *out_skb = napi_alloc_skb(&ring_data->napi, HNS_RX_HEAD_SIZE); if (unlikely(!skb)) { - netdev_err(ndev, "alloc rx skb fail\n"); ring->stats.sw_err_cnt++; return -ENOMEM; } @@ -1180,7 +1179,6 @@ static int hns_nic_common_poll(struct napi_struct *napi, int budget) container_of(napi, struct hns_nic_ring_data, napi); struct hnae_ring *ring = ring_data->ring; -try_again: clean_complete += ring_data->poll_one( ring_data, budget - clean_complete, ring_data->ex_process); @@ -1190,7 +1188,7 @@ try_again: napi_complete(napi); ring->q->handle->dev->ops->toggle_ring_irq(ring, 0); } else { - goto try_again; + return budget; } } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index 8b511e6e0ce9..396ea0db7102 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -251,6 +251,8 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num) if ((enum hclge_cmd_return_status)desc_ret == HCLGE_CMD_EXEC_SUCCESS) retval = 0; + else if (desc_ret == HCLGE_CMD_NOT_SUPPORTED) + retval = -EOPNOTSUPP; else retval = -EIO; hw->cmq.last_status = (enum hclge_cmd_status)desc_ret; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 758cf3948131..3823ae6303ad 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -52,7 +52,7 @@ struct hclge_cmq_ring { enum hclge_cmd_return_status { HCLGE_CMD_EXEC_SUCCESS = 0, HCLGE_CMD_NO_AUTH = 1, - HCLGE_CMD_NOT_EXEC = 2, + HCLGE_CMD_NOT_SUPPORTED = 2, HCLGE_CMD_QUEUE_FULL = 3, }; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 86523e8993cb..3bb6181ff054 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2179,7 +2179,7 @@ static int hclge_get_mac_phy_link(struct hclge_dev *hdev) mac_state = hclge_get_mac_link_status(hdev); if (hdev->hw.mac.phydev) { - if (!genphy_read_status(hdev->hw.mac.phydev)) + if (hdev->hw.mac.phydev->state == PHY_RUNNING) link_stat = mac_state & hdev->hw.mac.phydev->link; else diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h index 9fcfd9395424..a4c5e72d6012 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h @@ -480,7 +480,7 @@ struct hclge_vport { u16 alloc_rss_size; u16 qs_offset; - u16 bw_limit; /* VSI BW Limit (0 = disabled) */ + u32 bw_limit; /* VSI BW Limit (0 = disabled) */ u8 dwrr; int vport_id; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index 55228b91d80b..3799cb2548ce 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -200,7 +200,7 @@ static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev, } static int hclge_tm_q_to_qs_map_cfg(struct hclge_dev *hdev, - u8 q_id, u16 qs_id) + u16 q_id, u16 qs_id) { struct hclge_nq_to_qs_link_cmd *map; struct hclge_desc desc; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c index 69726908e72c..e9cff8ed5e07 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c @@ -1307,13 +1307,11 @@ static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu) } ret = h->ae_algo->ops->set_mtu(h, new_mtu); - if (ret) { + if (ret) netdev_err(netdev, "failed to change MTU in hardware %d\n", ret); - return ret; - } - - netdev->mtu = new_mtu; + else + netdev->mtu = new_mtu; /* if the netdev was running earlier, bring it up again */ if (if_running && hns3_nic_net_open(netdev)) @@ -2302,7 +2300,7 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector, chain = devm_kzalloc(&pdev->dev, sizeof(*chain), GFP_KERNEL); if (!chain) - return -ENOMEM; + goto err_free_chain; cur_chain->next = chain; chain->tqp_index = tx_ring->tqp->tqp_index; @@ -2326,7 +2324,7 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector, while (rx_ring) { chain = devm_kzalloc(&pdev->dev, sizeof(*chain), GFP_KERNEL); if (!chain) - return -ENOMEM; + goto err_free_chain; cur_chain->next = chain; chain->tqp_index = rx_ring->tqp->tqp_index; @@ -2338,6 +2336,16 @@ static int hns3_get_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector, } return 0; + +err_free_chain: + cur_chain = head->next; + while (cur_chain) { + chain = cur_chain->next; + devm_kfree(&pdev->dev, chain); + cur_chain = chain; + } + + return -ENOMEM; } static void hns3_free_vector_ring_chain(struct hns3_enet_tqp_vector *tqp_vector, @@ -2532,8 +2540,10 @@ static int hns3_queue_to_ring(struct hnae3_queue *tqp, return ret; ret = hns3_ring_get_cfg(tqp, priv, HNAE3_RING_TYPE_RX); - if (ret) + if (ret) { + devm_kfree(priv->dev, priv->ring_data[tqp->tqp_index].ring); return ret; + } return 0; } @@ -2558,6 +2568,12 @@ static int hns3_get_ring_config(struct hns3_nic_priv *priv) return 0; err: + while (i--) { + devm_kfree(priv->dev, priv->ring_data[i].ring); + devm_kfree(priv->dev, + priv->ring_data[i + h->kinfo.num_tqps].ring); + } + devm_kfree(&pdev->dev, priv->ring_data); return ret; } diff --git a/drivers/net/ethernet/hisilicon/hns_mdio.c b/drivers/net/ethernet/hisilicon/hns_mdio.c index baf5cc251f32..9a3bc0994a1d 100644 --- a/drivers/net/ethernet/hisilicon/hns_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns_mdio.c @@ -156,11 +156,15 @@ static int mdio_sc_cfg_reg_write(struct hns_mdio_device *mdio_dev, { u32 time_cnt; u32 reg_value; + int ret; regmap_write(mdio_dev->subctrl_vbase, cfg_reg, set_val); for (time_cnt = MDIO_TIMEOUT; time_cnt; time_cnt--) { - regmap_read(mdio_dev->subctrl_vbase, st_reg, ®_value); + ret = regmap_read(mdio_dev->subctrl_vbase, st_reg, ®_value); + if (ret) + return ret; + reg_value &= st_msk; if ((!!check_st) == (!!reg_value)) break; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 79b567447084..46aba02b8672 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -312,6 +312,7 @@ static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth, } hw_ioctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif); + hw_ioctxt.ppf_idx = HINIC_HWIF_PPF_IDX(hwif); hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT; hw_ioctxt.cmdq_depth = 0; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h index 0f5563f3b779..a011fd2d2627 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h @@ -104,8 +104,8 @@ struct hinic_cmd_hw_ioctxt { u8 rsvd2; u8 rsvd3; + u8 ppf_idx; u8 rsvd4; - u8 rsvd5; u16 rq_depth; u16 rx_buf_sz_idx; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h index 5b4760c0e9f5..f683ccbdfca0 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h @@ -146,6 +146,7 @@ #define HINIC_HWIF_FUNC_IDX(hwif) ((hwif)->attr.func_idx) #define HINIC_HWIF_PCI_INTF(hwif) ((hwif)->attr.pci_intf_idx) #define HINIC_HWIF_PF_IDX(hwif) ((hwif)->attr.pf_idx) +#define HINIC_HWIF_PPF_IDX(hwif) ((hwif)->attr.ppf_idx) #define HINIC_FUNC_TYPE(hwif) ((hwif)->attr.func_type) #define HINIC_IS_PF(hwif) (HINIC_FUNC_TYPE(hwif) == HINIC_PF) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index a696b5b2d40e..44c73215d026 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -598,9 +598,6 @@ static int add_mac_addr(struct net_device *netdev, const u8 *addr) u16 vid = 0; int err; - if (!is_valid_ether_addr(addr)) - return -EADDRNOTAVAIL; - netif_info(nic_dev, drv, netdev, "set mac addr = %02x %02x %02x %02x %02x %02x\n", addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); @@ -724,6 +721,7 @@ static void set_rx_mode(struct work_struct *work) { struct hinic_rx_mode_work *rx_mode_work = work_to_rx_mode_work(work); struct hinic_dev *nic_dev = rx_mode_work_to_nic_dev(rx_mode_work); + struct netdev_hw_addr *ha; netif_info(nic_dev, drv, nic_dev->netdev, "set rx mode work\n"); @@ -731,6 +729,9 @@ static void set_rx_mode(struct work_struct *work) __dev_uc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr); __dev_mc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr); + + netdev_for_each_mc_addr(ha, nic_dev->netdev) + add_mac_addr(nic_dev->netdev, ha->addr); } static void hinic_set_rx_mode(struct net_device *netdev) diff --git a/drivers/net/ethernet/i825xx/lasi_82596.c b/drivers/net/ethernet/i825xx/lasi_82596.c index b69c622ba8b2..6f0e4019adef 100644 --- a/drivers/net/ethernet/i825xx/lasi_82596.c +++ b/drivers/net/ethernet/i825xx/lasi_82596.c @@ -96,6 +96,8 @@ #define OPT_SWAP_PORT 0x0001 /* Need to wordswp on the MPU port */ +#define LIB82596_DMA_ATTR DMA_ATTR_NON_CONSISTENT + #define DMA_WBACK(ndev, addr, len) \ do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_TO_DEVICE); } while (0) @@ -199,7 +201,7 @@ static int __exit lan_remove_chip(struct parisc_device *pdev) unregister_netdev (dev); dma_free_attrs(&pdev->dev, sizeof(struct i596_private), lp->dma, - lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + lp->dma_addr, LIB82596_DMA_ATTR); free_netdev (dev); return 0; } diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index f00a1dc2128c..da3758fdf025 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -1065,7 +1065,7 @@ static int i82596_probe(struct net_device *dev) dma = dma_alloc_attrs(dev->dev.parent, sizeof(struct i596_dma), &lp->dma_addr, GFP_KERNEL, - DMA_ATTR_NON_CONSISTENT); + LIB82596_DMA_ATTR); if (!dma) { printk(KERN_ERR "%s: Couldn't get shared memory\n", __FILE__); return -ENOMEM; @@ -1087,7 +1087,7 @@ static int i82596_probe(struct net_device *dev) i = register_netdev(dev); if (i) { dma_free_attrs(dev->dev.parent, sizeof(struct i596_dma), - dma, lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + dma, lp->dma_addr, LIB82596_DMA_ATTR); return i; } diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c index b2c04a789744..43c1fd18670b 100644 --- a/drivers/net/ethernet/i825xx/sni_82596.c +++ b/drivers/net/ethernet/i825xx/sni_82596.c @@ -23,6 +23,8 @@ static const char sni_82596_string[] = "snirm_82596"; +#define LIB82596_DMA_ATTR 0 + #define DMA_WBACK(priv, addr, len) do { } while (0) #define DMA_INV(priv, addr, len) do { } while (0) #define DMA_WBACK_INV(priv, addr, len) do { } while (0) @@ -151,7 +153,7 @@ static int sni_82596_driver_remove(struct platform_device *pdev) unregister_netdev(dev); dma_free_attrs(dev->dev.parent, sizeof(struct i596_private), lp->dma, - lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + lp->dma_addr, LIB82596_DMA_ATTR); iounmap(lp->ca); iounmap(lp->mpu_port); free_netdev (dev); diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 30cbdf0fed59..373deb247ac0 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -1475,7 +1475,7 @@ static int ehea_init_port_res(struct ehea_port *port, struct ehea_port_res *pr, memset(pr, 0, sizeof(struct ehea_port_res)); - pr->tx_bytes = rx_bytes; + pr->tx_bytes = tx_bytes; pr->tx_packets = tx_packets; pr->rx_bytes = rx_bytes; pr->rx_packets = rx_packets; diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 4d10270ddf8f..90974462743b 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -1370,8 +1370,8 @@ static inline int e100_load_ucode_wait(struct nic *nic) fw = e100_request_firmware(nic); /* If it's NULL, then no ucode is required */ - if (!fw || IS_ERR(fw)) - return PTR_ERR(fw); + if (IS_ERR_OR_NULL(fw)) + return PTR_ERR_OR_ZERO(fw); if ((err = e100_exec_cb(nic, (void *)fw, e100_setup_ucode))) netif_err(nic, probe, nic->netdev, diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index 10df2d60c181..88b34f722337 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -627,6 +627,7 @@ static int e1000_set_ringparam(struct net_device *netdev, for (i = 0; i < adapter->num_rx_queues; i++) rxdr[i].count = rxdr->count; + err = 0; if (netif_running(adapter->netdev)) { /* Try to get new resources before deleting old */ err = e1000_setup_all_rx_resources(adapter); @@ -647,14 +648,13 @@ static int e1000_set_ringparam(struct net_device *netdev, adapter->rx_ring = rxdr; adapter->tx_ring = txdr; err = e1000_up(adapter); - if (err) - goto err_setup; } kfree(tx_old); kfree(rx_old); clear_bit(__E1000_RESETTING, &adapter->flags); - return 0; + return err; + err_setup_tx: e1000_free_all_rx_resources(adapter); err_setup_rx: @@ -666,7 +666,6 @@ err_alloc_rx: err_alloc_tx: if (netif_running(adapter->netdev)) e1000_up(adapter); -err_setup: clear_bit(__E1000_RESETTING, &adapter->flags); return err; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index ef22793d6a03..751ac5616884 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -969,6 +969,7 @@ static int i40e_set_pauseparam(struct net_device *netdev, i40e_status status; u8 aq_failures; int err = 0; + u32 is_an; /* Changing the port's flow control is not supported if this isn't the * port's controlling PF @@ -981,15 +982,14 @@ static int i40e_set_pauseparam(struct net_device *netdev, if (vsi != pf->vsi[pf->lan_vsi]) return -EOPNOTSUPP; - if (pause->autoneg != ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ? - AUTONEG_ENABLE : AUTONEG_DISABLE)) { + is_an = hw_link_info->an_info & I40E_AQ_AN_COMPLETED; + if (pause->autoneg != is_an) { netdev_info(netdev, "To change autoneg please use: ethtool -s autoneg \n"); return -EOPNOTSUPP; } /* If we have link and don't have autoneg */ - if (!test_bit(__I40E_DOWN, pf->state) && - !(hw_link_info->an_info & I40E_AQ_AN_COMPLETED)) { + if (!test_bit(__I40E_DOWN, pf->state) && !is_an) { /* Send message that it might not necessarily work*/ netdev_info(netdev, "Autoneg did not complete so changing settings may not result in an actual change.\n"); } @@ -1040,7 +1040,7 @@ static int i40e_set_pauseparam(struct net_device *netdev, err = -EAGAIN; } - if (!test_bit(__I40E_DOWN, pf->state)) { + if (!test_bit(__I40E_DOWN, pf->state) && is_an) { /* Give it a little more time to try to come back */ msleep(75); if (!test_bit(__I40E_DOWN, pf->state)) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 39029a12a233..aa2b446d6ad0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -11885,6 +11885,7 @@ static void i40e_remove(struct pci_dev *pdev) mutex_destroy(&hw->aq.asq_mutex); /* Clear all dynamic memory lists of rings, q_vectors, and VSIs */ + rtnl_lock(); i40e_clear_interrupt_scheme(pf); for (i = 0; i < pf->num_alloc_vsi; i++) { if (pf->vsi[i]) { @@ -11893,6 +11894,7 @@ static void i40e_remove(struct pci_dev *pdev) pf->vsi[i] = NULL; } } + rtnl_unlock(); for (i = 0; i < I40E_MAX_VEB; i++) { kfree(pf->veb[i]); @@ -12086,7 +12088,13 @@ static void i40e_shutdown(struct pci_dev *pdev) wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0)); + /* Since we're going to destroy queues during the + * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this + * whole section + */ + rtnl_lock(); i40e_clear_interrupt_scheme(pf); + rtnl_unlock(); if (system_state == SYSTEM_POWER_OFF) { pci_wake_from_d3(pdev, pf->wol_en); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index ef242dbae116..5fc870757480 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -704,7 +704,8 @@ static long i40e_ptp_create_clock(struct i40e_pf *pf) if (!IS_ERR_OR_NULL(pf->ptp_clock)) return 0; - strncpy(pf->ptp_caps.name, i40e_driver_name, sizeof(pf->ptp_caps.name)); + strncpy(pf->ptp_caps.name, i40e_driver_name, + sizeof(pf->ptp_caps.name) - 1); pf->ptp_caps.owner = THIS_MODULE; pf->ptp_caps.max_adj = 999999999; pf->ptp_caps.n_ext_ts = 0; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index bdb752321600..fae3625ec0b6 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -2177,6 +2177,16 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen) ret = I40E_ERR_INVALID_MAC_ADDR; goto error_param; } + + if (vf->pf_set_mac && + ether_addr_equal(al->list[i].addr, + vf->default_lan_addr.addr)) { + dev_err(&pf->pdev->dev, + "MAC addr %pM has been set by PF, cannot delete it for VF %d, reset VF to change MAC addr\n", + vf->default_lan_addr.addr, vf->vf_id); + ret = I40E_ERR_PARAM; + goto error_param; + } } vsi = pf->vsi[vf->lan_vsi_idx]; @@ -3191,7 +3201,7 @@ int i40e_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link) vf->link_forced = true; vf->link_up = true; pfe.event_data.link_event.link_status = true; - pfe.event_data.link_event.link_speed = I40E_LINK_SPEED_40GB; + pfe.event_data.link_event.link_speed = VIRTCHNL_LINK_SPEED_40GB; break; case IFLA_VF_LINK_STATE_DISABLE: vf->link_forced = true; diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index c37cc8bccf47..158c277ec353 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -562,7 +562,7 @@ static s32 igb_set_sfp_media_type_82575(struct e1000_hw *hw) dev_spec->module_plugged = true; if (eth_flags->e1000_base_lx || eth_flags->e1000_base_sx) { hw->phy.media_type = e1000_media_type_internal_serdes; - } else if (eth_flags->e100_base_fx) { + } else if (eth_flags->e100_base_fx || eth_flags->e100_base_lx) { dev_spec->sgmii_active = true; hw->phy.media_type = e1000_media_type_internal_serdes; } else if (eth_flags->e1000_base_t) { @@ -689,14 +689,10 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw) break; } - /* do not change link mode for 100BaseFX */ - if (dev_spec->eth_flags.e100_base_fx) - break; - /* change current link mode setting */ ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK; - if (hw->phy.media_type == e1000_media_type_copper) + if (dev_spec->sgmii_active) ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_SGMII; else ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index d06a8db514d4..82028ce355fb 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -201,7 +201,7 @@ static int igb_get_link_ksettings(struct net_device *netdev, advertising &= ~ADVERTISED_1000baseKX_Full; } } - if (eth_flags->e100_base_fx) { + if (eth_flags->e100_base_fx || eth_flags->e100_base_lx) { supported |= SUPPORTED_100baseT_Full; advertising |= ADVERTISED_100baseT_Full; } diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 71b235f935d9..9c7e75b3b6c7 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1680,7 +1680,8 @@ static void igb_check_swap_media(struct igb_adapter *adapter) if ((hw->phy.media_type == e1000_media_type_copper) && (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) { swap_now = true; - } else if (!(connsw & E1000_CONNSW_SERDESD)) { + } else if ((hw->phy.media_type != e1000_media_type_copper) && + !(connsw & E1000_CONNSW_SERDESD)) { /* copper signal takes time to appear */ if (adapter->copper_tries < 4) { adapter->copper_tries++; diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 0746b19ec6d3..295d27f33104 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -65,9 +65,15 @@ * * The 40 bit 82580 SYSTIM overflows every * 2^40 * 10^-9 / 60 = 18.3 minutes. + * + * SYSTIM is converted to real time using a timecounter. As + * timecounter_cyc2time() allows old timestamps, the timecounter + * needs to be updated at least once per half of the SYSTIM interval. + * Scheduling of delayed work is not very accurate, so we aim for 8 + * minutes to be sure the actual interval is shorter than 9.16 minutes. */ -#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9) +#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 8) #define IGB_PTP_TX_TIMEOUT (HZ * 15) #define INCPERIOD_82576 BIT(E1000_TIMINCA_16NS_SHIFT) #define INCVALUE_82576_MASK GENMASK(E1000_TIMINCA_16NS_SHIFT - 1, 0) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 01c120d656c5..ba184287e11f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1871,13 +1871,7 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, struct sk_buff *skb) { - /* if the page was released unmap it, else just sync our portion */ - if (unlikely(IXGBE_CB(skb)->page_released)) { - dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma, - ixgbe_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBE_RX_DMA_ATTR); - } else if (ring_uses_build_skb(rx_ring)) { + if (ring_uses_build_skb(rx_ring)) { unsigned long offset = (unsigned long)(skb->data) & ~PAGE_MASK; dma_sync_single_range_for_cpu(rx_ring->dev, @@ -1894,6 +1888,14 @@ static void ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, skb_frag_size(frag), DMA_FROM_DEVICE); } + + /* If the page was released, just unmap it. */ + if (unlikely(IXGBE_CB(skb)->page_released)) { + dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma, + ixgbe_rx_pg_size(rx_ring), + DMA_FROM_DEVICE, + IXGBE_RX_DMA_ATTR); + } } /** @@ -3490,12 +3492,18 @@ static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) else mtqc |= IXGBE_MTQC_64VF; } else { - if (tcs > 4) + if (tcs > 4) { mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ; - else if (tcs > 1) + } else if (tcs > 1) { mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ; - else - mtqc = IXGBE_MTQC_64Q_1PB; + } else { + u8 max_txq = adapter->num_tx_queues + + adapter->num_xdp_queues; + if (max_txq > 63) + mtqc = IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ; + else + mtqc = IXGBE_MTQC_64Q_1PB; + } } IXGBE_WRITE_REG(hw, IXGBE_MTQC, mtqc); @@ -5123,6 +5131,7 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct hlist_node *node2; struct ixgbe_fdir_filter *filter; + u8 queue; spin_lock(&adapter->fdir_perfect_lock); @@ -5131,12 +5140,34 @@ static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) hlist_for_each_entry_safe(filter, node2, &adapter->fdir_filter_list, fdir_node) { + if (filter->action == IXGBE_FDIR_DROP_QUEUE) { + queue = IXGBE_FDIR_DROP_QUEUE; + } else { + u32 ring = ethtool_get_flow_spec_ring(filter->action); + u8 vf = ethtool_get_flow_spec_ring_vf(filter->action); + + if (!vf && (ring >= adapter->num_rx_queues)) { + e_err(drv, "FDIR restore failed without VF, ring: %u\n", + ring); + continue; + } else if (vf && + ((vf > adapter->num_vfs) || + ring >= adapter->num_rx_queues_per_pool)) { + e_err(drv, "FDIR restore failed with VF, vf: %hhu, ring: %u\n", + vf, ring); + continue; + } + + /* Map the ring onto the absolute queue index */ + if (!vf) + queue = adapter->rx_ring[ring]->reg_idx; + else + queue = ((vf - 1) * + adapter->num_rx_queues_per_pool) + ring; + } + ixgbe_fdir_write_perfect_filter_82599(hw, - &filter->filter, - filter->sw_idx, - (filter->action == IXGBE_FDIR_DROP_QUEUE) ? - IXGBE_FDIR_DROP_QUEUE : - adapter->rx_ring[filter->action]->reg_idx); + &filter->filter, filter->sw_idx, queue); } spin_unlock(&adapter->fdir_perfect_lock); @@ -8367,7 +8398,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && adapter->ptp_clock) { - if (!test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS, + if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON && + !test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state)) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IXGBE_TX_FLAGS_TSTAMP; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index e238f6e85ab6..a7708e14aa5c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1858,11 +1858,6 @@ static int ixgbevf_write_uc_addr_list(struct net_device *netdev) struct ixgbe_hw *hw = &adapter->hw; int count = 0; - if ((netdev_uc_count(netdev)) > 10) { - pr_err("Too many unicast filters - No Space\n"); - return -ENOSPC; - } - if (!netdev_uc_empty(netdev)) { struct netdev_hw_addr *ha; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index d631cd94ee63..25a15bdc125e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1722,6 +1722,7 @@ static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, err = mlx4_en_get_flow(dev, cmd, cmd->fs.location); break; case ETHTOOL_GRXCLSRLALL: + cmd->data = MAX_NUM_OF_FS_RULES; while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) { err = mlx4_en_get_flow(dev, cmd, i); if (!err) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index c273a3ebb8e8..12d4b891301b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -199,7 +199,7 @@ int mlx4_check_port_params(struct mlx4_dev *dev, for (i = 0; i < dev->caps.num_ports - 1; i++) { if (port_type[i] != port_type[i + 1]) { mlx4_err(dev, "Only same port types supported on this HCA, aborting\n"); - return -EINVAL; + return -EOPNOTSUPP; } } } @@ -208,7 +208,7 @@ int mlx4_check_port_params(struct mlx4_dev *dev, if (!(port_type[i] & dev->caps.supported_type[i+1])) { mlx4_err(dev, "Requested port type for port %d is not supported on this HCA\n", i + 1); - return -EINVAL; + return -EOPNOTSUPP; } } return 0; @@ -1152,8 +1152,7 @@ static int __set_port_type(struct mlx4_port_info *info, mlx4_err(mdev, "Requested port type for port %d is not supported on this HCA\n", info->port); - err = -EINVAL; - goto err_sup; + return -EOPNOTSUPP; } mlx4_stop_sense(mdev); @@ -1175,7 +1174,7 @@ static int __set_port_type(struct mlx4_port_info *info, for (i = 1; i <= mdev->caps.num_ports; i++) { if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) { mdev->caps.possible_type[i] = mdev->caps.port_type[i]; - err = -EINVAL; + err = -EOPNOTSUPP; } } } @@ -1201,7 +1200,7 @@ static int __set_port_type(struct mlx4_port_info *info, out: mlx4_start_sense(mdev); mutex_unlock(&priv->port_mutex); -err_sup: + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 53ca6cf316dc..66e8054a8966 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -471,12 +471,31 @@ void mlx4_init_quotas(struct mlx4_dev *dev) priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[pf]; } -static int get_max_gauranteed_vfs_counter(struct mlx4_dev *dev) +static int +mlx4_calc_res_counter_guaranteed(struct mlx4_dev *dev, + struct resource_allocator *res_alloc, + int vf) { - /* reduce the sink counter */ - return (dev->caps.max_counters - 1 - - (MLX4_PF_COUNTERS_PER_PORT * MLX4_MAX_PORTS)) - / MLX4_MAX_PORTS; + struct mlx4_active_ports actv_ports; + int ports, counters_guaranteed; + + /* For master, only allocate according to the number of phys ports */ + if (vf == mlx4_master_func_num(dev)) + return MLX4_PF_COUNTERS_PER_PORT * dev->caps.num_ports; + + /* calculate real number of ports for the VF */ + actv_ports = mlx4_get_active_ports(dev, vf); + ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports); + counters_guaranteed = ports * MLX4_VF_COUNTERS_PER_PORT; + + /* If we do not have enough counters for this VF, do not + * allocate any for it. '-1' to reduce the sink counter. + */ + if ((res_alloc->res_reserved + counters_guaranteed) > + (dev->caps.max_counters - 1)) + return 0; + + return counters_guaranteed; } int mlx4_init_resource_tracker(struct mlx4_dev *dev) @@ -484,7 +503,6 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i, j; int t; - int max_vfs_guarantee_counter = get_max_gauranteed_vfs_counter(dev); priv->mfunc.master.res_tracker.slave_list = kzalloc(dev->num_slaves * sizeof(struct slave_list), @@ -601,16 +619,8 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) break; case RES_COUNTER: res_alloc->quota[t] = dev->caps.max_counters; - if (t == mlx4_master_func_num(dev)) - res_alloc->guaranteed[t] = - MLX4_PF_COUNTERS_PER_PORT * - MLX4_MAX_PORTS; - else if (t <= max_vfs_guarantee_counter) - res_alloc->guaranteed[t] = - MLX4_VF_COUNTERS_PER_PORT * - MLX4_MAX_PORTS; - else - res_alloc->guaranteed[t] = 0; + res_alloc->guaranteed[t] = + mlx4_calc_res_counter_guaranteed(dev, res_alloc, t); break; default: break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index f6beb5ef5971..c3f1e2d76a46 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1625,7 +1625,7 @@ static int mlx5e_get_module_info(struct net_device *netdev, break; case MLX5_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = MLX5_EEPROM_PAGE_LENGTH; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; break; default: netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index eec7c2ef067a..bf311a3c3e02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1093,8 +1093,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) return 0; - if (cq->decmprs_left) + if (cq->decmprs_left) { work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); + if (cq->decmprs_left || work_done >= budget) + goto out; + } cqe = mlx5_cqwq_get_cqe(&cq->wq); if (!cqe) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 090d54275a7d..387758fc6be4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1783,7 +1783,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, unlock: mutex_unlock(&esw->state_lock); - return 0; + return err; } int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index c4392f741c5f..5212428031a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -462,8 +462,10 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) } err = mlx5_vector2eqn(mdev, smp_processor_id(), &eqn, &irqn); - if (err) + if (err) { + kvfree(in); goto err_cqwq; + } cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 97874c2568fc..1ac0e173da12 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -838,11 +838,9 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) priv->numa_node = dev_to_node(&dev->pdev->dev); - priv->dbg_root = debugfs_create_dir(dev_name(&pdev->dev), mlx5_debugfs_root); - if (!priv->dbg_root) { - dev_err(&pdev->dev, "Cannot create debugfs dir, aborting\n"); - return -ENOMEM; - } + if (mlx5_debugfs_root) + priv->dbg_root = + debugfs_create_dir(pci_name(pdev), mlx5_debugfs_root); err = mlx5_pci_enable_device(dev); if (err) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 889130edb715..b92d5690287b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -44,14 +44,15 @@ static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev, { struct mlx5_qp_table *table = &dev->priv.qp_table; struct mlx5_core_rsc_common *common; + unsigned long flags; - spin_lock(&table->lock); + spin_lock_irqsave(&table->lock, flags); common = radix_tree_lookup(&table->tree, rsn); if (common) atomic_inc(&common->refcount); - spin_unlock(&table->lock); + spin_unlock_irqrestore(&table->lock, flags); if (!common) { mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n", @@ -124,7 +125,7 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) { mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n", event_type, rsn); - return; + goto out; } switch (common->res) { @@ -138,7 +139,7 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) default: mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn); } - +out: mlx5_core_put_rsc(common); } diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c index 2cf89126fb23..d765e7a69d6b 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c @@ -86,6 +86,8 @@ retry: return err; if (fsm_state_err != MLXFW_FSM_STATE_ERR_OK) { + fsm_state_err = min_t(enum mlxfw_fsm_state_err, + fsm_state_err, MLXFW_FSM_STATE_ERR_MAX); pr_err("Firmware flash failed: %s\n", mlxfw_fsm_state_err_str[fsm_state_err]); return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c index 993cb5ba934e..b99169a386eb 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include "mlxfw_mfa2.h" #include "mlxfw_mfa2_file.h" @@ -579,7 +580,7 @@ mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file, comp_size = be32_to_cpu(comp->size); comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len; - comp_data = kmalloc(sizeof(*comp_data) + comp_buf_size, GFP_KERNEL); + comp_data = vzalloc(sizeof(*comp_data) + comp_buf_size); if (!comp_data) return ERR_PTR(-ENOMEM); comp_data->comp.data_size = comp_size; @@ -601,7 +602,7 @@ mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file, comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len; return &comp_data->comp; err_out: - kfree(comp_data); + vfree(comp_data); return ERR_PTR(err); } @@ -610,7 +611,7 @@ void mlxfw_mfa2_file_component_put(struct mlxfw_mfa2_component *comp) const struct mlxfw_mfa2_comp_data *comp_data; comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp); - kfree(comp_data); + vfree(comp_data); } void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 8ab7a4f98a07..e7974ba06432 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -2452,7 +2452,7 @@ static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port, * Configures the ETS elements. */ #define MLXSW_REG_QEEC_ID 0x400D -#define MLXSW_REG_QEEC_LEN 0x1C +#define MLXSW_REG_QEEC_LEN 0x20 MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN); @@ -2494,6 +2494,15 @@ MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8); */ MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8); +/* reg_qeec_mise + * Min shaper configuration enable. Enables configuration of the min + * shaper on this ETS element + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, mise, 0x0C, 31, 1); + enum { MLXSW_REG_QEEC_BYTES_MODE, MLXSW_REG_QEEC_PACKETS_MODE, @@ -2510,6 +2519,17 @@ enum { */ MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1); +/* The smallest permitted min shaper rate. */ +#define MLXSW_REG_QEEC_MIS_MIN 200000 /* Kbps */ + +/* reg_qeec_min_shaper_rate + * Min shaper information rate. + * For CPU port, can only be configured for port hierarchy. + * When in bytes mode, value is specified in units of 1000bps. + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, min_shaper_rate, 0x0C, 0, 28); + /* reg_qeec_mase * Max shaper configuration enable. Enables configuration of the max * shaper on this ETS element. diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 5c74787f903b..226187cba0e8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2825,6 +2825,13 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) MLXSW_REG_QEEC_MAS_DIS); if (err) return err; + + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_TC, + i + 8, i, + MLXSW_REG_QEEC_MAS_DIS); + if (err) + return err; } /* Map all priorities to traffic class 0. */ @@ -4077,9 +4084,6 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, err = mlxsw_sp_lag_col_port_add(mlxsw_sp_port, lag_id, port_index); if (err) goto err_col_port_add; - err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, lag_id); - if (err) - goto err_col_port_enable; mlxsw_core_lag_mapping_set(mlxsw_sp->core, lag_id, port_index, mlxsw_sp_port->local_port); @@ -4094,8 +4098,6 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, return 0; -err_col_port_enable: - mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); err_col_port_add: if (!lag->ref_count) mlxsw_sp_lag_destroy(mlxsw_sp, lag_id); @@ -4114,7 +4116,6 @@ static void mlxsw_sp_port_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port, lag = mlxsw_sp_lag_get(mlxsw_sp, lag_id); WARN_ON(lag->ref_count == 0); - mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, lag_id); mlxsw_sp_lag_col_port_remove(mlxsw_sp_port, lag_id); /* Any VLANs configured on the port are no longer valid */ @@ -4159,21 +4160,56 @@ static int mlxsw_sp_lag_dist_port_remove(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sldr), sldr_pl); } -static int mlxsw_sp_port_lag_tx_en_set(struct mlxsw_sp_port *mlxsw_sp_port, - bool lag_tx_enabled) +static int +mlxsw_sp_port_lag_col_dist_enable(struct mlxsw_sp_port *mlxsw_sp_port) { - if (lag_tx_enabled) - return mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, - mlxsw_sp_port->lag_id); - else - return mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, - mlxsw_sp_port->lag_id); + int err; + + err = mlxsw_sp_lag_col_port_enable(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + return err; + + err = mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id); + if (err) + goto err_dist_port_add; + + return 0; + +err_dist_port_add: + mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, mlxsw_sp_port->lag_id); + return err; +} + +static int +mlxsw_sp_port_lag_col_dist_disable(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_lag_dist_port_remove(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + return err; + + err = mlxsw_sp_lag_col_port_disable(mlxsw_sp_port, + mlxsw_sp_port->lag_id); + if (err) + goto err_col_port_disable; + + return 0; + +err_col_port_disable: + mlxsw_sp_lag_dist_port_add(mlxsw_sp_port, mlxsw_sp_port->lag_id); + return err; } static int mlxsw_sp_port_lag_changed(struct mlxsw_sp_port *mlxsw_sp_port, struct netdev_lag_lower_state_info *info) { - return mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, info->tx_enabled); + if (info->tx_enabled) + return mlxsw_sp_port_lag_col_dist_enable(mlxsw_sp_port); + else + return mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); } static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -4309,8 +4345,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, err = mlxsw_sp_port_lag_join(mlxsw_sp_port, upper_dev); } else { - mlxsw_sp_port_lag_tx_en_set(mlxsw_sp_port, - false); + mlxsw_sp_port_lag_col_dist_disable(mlxsw_sp_port); mlxsw_sp_port_lag_leave(mlxsw_sp_port, upper_dev); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 51e6846da72b..3c04f3d5de2d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -225,7 +225,7 @@ mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, start_again: err = devlink_dpipe_entry_ctx_prepare(dump_ctx); if (err) - return err; + goto err_ctx_prepare; j = 0; for (; i < rif_count; i++) { struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); @@ -257,6 +257,7 @@ start_again: return 0; err_entry_append: err_entry_get: +err_ctx_prepare: rtnl_unlock(); devlink_dpipe_entry_clear(&entry); return err; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3ed4fb346f23..05a2006a20b9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1252,15 +1252,12 @@ mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp, { u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN; enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt; - struct net_device *ipip_ul_dev; if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto) return false; - ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip, - ul_tb_id, ipip_entry) && - (!ipip_ul_dev || ipip_ul_dev == ul_dev); + ul_tb_id, ipip_entry); } /* Given decap parameters, find the corresponding IPIP entry. */ @@ -1765,7 +1762,7 @@ static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work) static void mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, - bool removing); + bool removing, bool dead); static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding) { @@ -1894,7 +1891,8 @@ static void mlxsw_sp_router_neigh_event_work(struct work_struct *work) memcpy(neigh_entry->ha, ha, ETH_ALEN); mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected); - mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected); + mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected, + dead); if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list)) mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); @@ -2538,13 +2536,79 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, nh->update = 1; } +static int +mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n, *old_n = neigh_entry->key.n; + struct mlxsw_sp_nexthop *nh; + bool entry_connected; + u8 nud_state, dead; + int err; + + nh = list_first_entry(&neigh_entry->nexthop_list, + struct mlxsw_sp_nexthop, neigh_list_node); + + n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); + if (!n) { + n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, + nh->rif->dev); + if (IS_ERR(n)) + return PTR_ERR(n); + neigh_event_send(n, NULL); + } + + mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); + neigh_entry->key.n = n; + err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + if (err) + goto err_neigh_entry_insert; + + read_lock_bh(&n->lock); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + entry_connected = nud_state & NUD_VALID && !dead; + + list_for_each_entry(nh, &neigh_entry->nexthop_list, + neigh_list_node) { + neigh_release(old_n); + neigh_clone(n); + __mlxsw_sp_nexthop_neigh_update(nh, !entry_connected); + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); + } + + neigh_release(n); + + return 0; + +err_neigh_entry_insert: + neigh_entry->key.n = old_n; + mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry); + neigh_release(n); + return err; +} + static void mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, - bool removing) + bool removing, bool dead) { struct mlxsw_sp_nexthop *nh; + if (list_empty(&neigh_entry->nexthop_list)) + return; + + if (dead) { + int err; + + err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp, + neigh_entry); + if (err) + dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n"); + return; + } + list_for_each_entry(nh, &neigh_entry->nexthop_list, neigh_list_node) { __mlxsw_sp_nexthop_neigh_update(nh, removing); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 8a1788108f52..698de51b3fef 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1939,8 +1939,15 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, struct net_device *dev = switchdev_notifier_info_to_dev(ptr); struct mlxsw_sp_switchdev_event_work *switchdev_work; struct switchdev_notifier_fdb_info *fdb_info = ptr; + struct net_device *br_dev; - if (!mlxsw_sp_port_dev_lower_find_rcu(dev)) + /* Tunnel devices are not our uppers, so check their master instead */ + br_dev = netdev_master_upper_dev_get_rcu(dev); + if (!br_dev) + return NOTIFY_DONE; + if (!netif_is_bridge_master(br_dev)) + return NOTIFY_DONE; + if (!mlxsw_sp_port_dev_lower_find_rcu(br_dev)) return NOTIFY_DONE; switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); diff --git a/drivers/net/ethernet/micrel/ks8695net.c b/drivers/net/ethernet/micrel/ks8695net.c index bd51e057e915..b881f5d4a7f9 100644 --- a/drivers/net/ethernet/micrel/ks8695net.c +++ b/drivers/net/ethernet/micrel/ks8695net.c @@ -1164,7 +1164,7 @@ ks8695_timeout(struct net_device *ndev) * sk_buff and adds it to the TX ring. It then kicks the TX DMA * engine to ensure transmission begins. */ -static int +static netdev_tx_t ks8695_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct ks8695_priv *ksp = netdev_priv(ndev); diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index f3e9dd47b56f..c699a779757e 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -474,24 +474,6 @@ static int msg_enable; * chip is busy transferring packet data (RX/TX FIFO accesses). */ -/** - * ks_rdreg8 - read 8 bit register from device - * @ks : The chip information - * @offset: The register address - * - * Read a 8bit register from the chip, returning the result - */ -static u8 ks_rdreg8(struct ks_net *ks, int offset) -{ - u16 data; - u8 shift_bit = offset & 0x03; - u8 shift_data = (offset & 1) << 3; - ks->cmd_reg_cache = (u16) offset | (u16)(BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - data = ioread16(ks->hw_addr); - return (u8)(data >> shift_data); -} - /** * ks_rdreg16 - read 16 bit register from device * @ks : The chip information @@ -502,27 +484,11 @@ static u8 ks_rdreg8(struct ks_net *ks, int offset) static u16 ks_rdreg16(struct ks_net *ks, int offset) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); return ioread16(ks->hw_addr); } -/** - * ks_wrreg8 - write 8bit register value to chip - * @ks: The chip information - * @offset: The register address - * @value: The value to write - * - */ -static void ks_wrreg8(struct ks_net *ks, int offset, u8 value) -{ - u8 shift_bit = (offset & 0x03); - u16 value_write = (u16)(value << ((offset & 1) << 3)); - ks->cmd_reg_cache = (u16)offset | (BE0 << shift_bit); - iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); - iowrite16(value_write, ks->hw_addr); -} - /** * ks_wrreg16 - write 16bit register value to chip * @ks: The chip information @@ -533,7 +499,7 @@ static void ks_wrreg8(struct ks_net *ks, int offset, u8 value) static void ks_wrreg16(struct ks_net *ks, int offset, u16 value) { - ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02)); + ks->cmd_reg_cache = (u16)offset | ((BE3 | BE2) >> (offset & 0x02)); iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd); iowrite16(value, ks->hw_addr); } @@ -549,7 +515,7 @@ static inline void ks_inblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - *wptr++ = (u16)ioread16(ks->hw_addr); + *wptr++ = be16_to_cpu(ioread16(ks->hw_addr)); } /** @@ -563,7 +529,7 @@ static inline void ks_outblk(struct ks_net *ks, u16 *wptr, u32 len) { len >>= 1; while (len--) - iowrite16(*wptr++, ks->hw_addr); + iowrite16(cpu_to_be16(*wptr++), ks->hw_addr); } static void ks_disable_int(struct ks_net *ks) @@ -642,8 +608,7 @@ static void ks_read_config(struct ks_net *ks) u16 reg_data = 0; /* Regardless of bus width, 8 bit read should always work.*/ - reg_data = ks_rdreg8(ks, KS_CCR) & 0x00FF; - reg_data |= ks_rdreg8(ks, KS_CCR+1) << 8; + reg_data = ks_rdreg16(ks, KS_CCR); /* addr/data bus are multiplexed */ ks->sharedbus = (reg_data & CCR_SHARED) == CCR_SHARED; @@ -747,7 +712,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) /* 1. set sudo DMA mode */ ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI); - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. read prepend data */ /** @@ -764,7 +729,7 @@ static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len) ks_inblk(ks, buf, ALIGN(len, 4)); /* 4. reset sudo DMA Mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); } /** @@ -866,14 +831,17 @@ static irqreturn_t ks_irq(int irq, void *pw) { struct net_device *netdev = pw; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; u16 status; + spin_lock_irqsave(&ks->statelock, flags); /*this should be the first in IRQ handler */ ks_save_cmd_reg(ks); status = ks_rdreg16(ks, KS_ISR); if (unlikely(!status)) { ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_NONE; } @@ -899,6 +867,7 @@ static irqreturn_t ks_irq(int irq, void *pw) ks->netdev->stats.rx_over_errors++; /* this should be the last in IRQ handler*/ ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_HANDLED; } @@ -968,6 +937,7 @@ static int ks_net_stop(struct net_device *netdev) /* shutdown RX/TX QMU */ ks_disable_qmu(ks); + ks_disable_int(ks); /* set powermode to soft power down to save power */ ks_set_powermode(ks, PMECR_PM_SOFTDOWN); @@ -997,13 +967,13 @@ static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len) ks->txh.txw[1] = cpu_to_le16(len); /* 1. set sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_SDA) & 0xff); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); /* 2. write status/lenth info */ ks_outblk(ks, ks->txh.txw, 4); /* 3. write pkt data */ ks_outblk(ks, (u16 *)pdata, ALIGN(len, 4)); /* 4. reset sudo-DMA mode */ - ks_wrreg8(ks, KS_RXQCR, ks->rc_rxqcr); + ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); /* 5. Enqueue Tx(move the pkt from TX buffer into TXQ) */ ks_wrreg16(ks, KS_TXQCR, TXQCR_METFE); /* 6. wait until TXQCR_METFE is auto-cleared */ @@ -1020,14 +990,13 @@ static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len) * spin_lock_irqsave is required because tx and rx should be mutual exclusive. * So while tx is in-progress, prevent IRQ interrupt from happenning. */ -static int ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) { - int retv = NETDEV_TX_OK; + netdev_tx_t retv = NETDEV_TX_OK; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; - disable_irq(netdev->irq); - ks_disable_int(ks); - spin_lock(&ks->statelock); + spin_lock_irqsave(&ks->statelock, flags); /* Extra space are required: * 4 byte for alignment, 4 for status/length, 4 for CRC @@ -1041,9 +1010,7 @@ static int ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) dev_kfree_skb(skb); } else retv = NETDEV_TX_BUSY; - spin_unlock(&ks->statelock); - ks_enable_int(ks); - enable_irq(netdev->irq); + spin_unlock_irqrestore(&ks->statelock, flags); return retv; } diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c index 23821540ab07..254e6dbc4c6a 100644 --- a/drivers/net/ethernet/natsemi/sonic.c +++ b/drivers/net/ethernet/natsemi/sonic.c @@ -50,6 +50,8 @@ static int sonic_open(struct net_device *dev) if (sonic_debug > 2) printk("sonic_open: initializing sonic driver.\n"); + spin_lock_init(&lp->lock); + for (i = 0; i < SONIC_NUM_RRS; i++) { struct sk_buff *skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); if (skb == NULL) { @@ -101,6 +103,24 @@ static int sonic_open(struct net_device *dev) return 0; } +/* Wait for the SONIC to become idle. */ +static void sonic_quiesce(struct net_device *dev, u16 mask) +{ + struct sonic_local * __maybe_unused lp = netdev_priv(dev); + int i; + u16 bits; + + for (i = 0; i < 1000; ++i) { + bits = SONIC_READ(SONIC_CMD) & mask; + if (!bits) + return; + if (irqs_disabled() || in_interrupt()) + udelay(20); + else + usleep_range(100, 200); + } + WARN_ONCE(1, "command deadline expired! 0x%04x\n", bits); +} /* * Close the SONIC device @@ -118,6 +138,9 @@ static int sonic_close(struct net_device *dev) /* * stop the SONIC, disable interrupts */ + SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + sonic_quiesce(dev, SONIC_CR_ALL); + SONIC_WRITE(SONIC_IMR, 0); SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); @@ -157,6 +180,9 @@ static void sonic_tx_timeout(struct net_device *dev) * put the Sonic into software-reset mode and * disable all interrupts before releasing DMA buffers */ + SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + sonic_quiesce(dev, SONIC_CR_ALL); + SONIC_WRITE(SONIC_IMR, 0); SONIC_WRITE(SONIC_ISR, 0x7fff); SONIC_WRITE(SONIC_CMD, SONIC_CR_RST); @@ -194,8 +220,6 @@ static void sonic_tx_timeout(struct net_device *dev) * wake the tx queue * Concurrently with all of this, the SONIC is potentially writing to * the status flags of the TDs. - * Until some mutual exclusion is added, this code will not work with SMP. However, - * MIPS Jazz machines and m68k Macs were all uni-processor machines. */ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) @@ -203,7 +227,8 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) struct sonic_local *lp = netdev_priv(dev); dma_addr_t laddr; int length; - int entry = lp->next_tx; + int entry; + unsigned long flags; if (sonic_debug > 2) printk("sonic_send_packet: skb=%p, dev=%p\n", skb, dev); @@ -221,11 +246,15 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) laddr = dma_map_single(lp->device, skb->data, length, DMA_TO_DEVICE); if (!laddr) { - printk(KERN_ERR "%s: failed to map tx DMA buffer.\n", dev->name); - dev_kfree_skb(skb); - return NETDEV_TX_BUSY; + pr_err_ratelimited("%s: failed to map tx DMA buffer.\n", dev->name); + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; } + spin_lock_irqsave(&lp->lock, flags); + + entry = lp->next_tx; + sonic_tda_put(dev, entry, SONIC_TD_STATUS, 0); /* clear status */ sonic_tda_put(dev, entry, SONIC_TD_FRAG_COUNT, 1); /* single fragment */ sonic_tda_put(dev, entry, SONIC_TD_PKTSIZE, length); /* length of packet */ @@ -235,10 +264,6 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) sonic_tda_put(dev, entry, SONIC_TD_LINK, sonic_tda_get(dev, entry, SONIC_TD_LINK) | SONIC_EOL); - /* - * Must set tx_skb[entry] only after clearing status, and - * before clearing EOL and before stopping queue - */ wmb(); lp->tx_len[entry] = length; lp->tx_laddr[entry] = laddr; @@ -263,6 +288,8 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) SONIC_WRITE(SONIC_CMD, SONIC_CR_TXP); + spin_unlock_irqrestore(&lp->lock, flags); + return NETDEV_TX_OK; } @@ -275,9 +302,21 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) struct net_device *dev = dev_id; struct sonic_local *lp = netdev_priv(dev); int status; + unsigned long flags; + + /* The lock has two purposes. Firstly, it synchronizes sonic_interrupt() + * with sonic_send_packet() so that the two functions can share state. + * Secondly, it makes sonic_interrupt() re-entrant, as that is required + * by macsonic which must use two IRQs with different priority levels. + */ + spin_lock_irqsave(&lp->lock, flags); + + status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT; + if (!status) { + spin_unlock_irqrestore(&lp->lock, flags); - if (!(status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT)) return IRQ_NONE; + } do { if (status & SONIC_INT_PKTRX) { @@ -292,11 +331,12 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) int td_status; int freed_some = 0; - /* At this point, cur_tx is the index of a TD that is one of: - * unallocated/freed (status set & tx_skb[entry] clear) - * allocated and sent (status set & tx_skb[entry] set ) - * allocated and not yet sent (status clear & tx_skb[entry] set ) - * still being allocated by sonic_send_packet (status clear & tx_skb[entry] clear) + /* The state of a Transmit Descriptor may be inferred + * from { tx_skb[entry], td_status } as follows. + * { clear, clear } => the TD has never been used + * { set, clear } => the TD was handed to SONIC + * { set, set } => the TD was handed back + * { clear, set } => the TD is available for re-use */ if (sonic_debug > 2) @@ -398,10 +438,30 @@ static irqreturn_t sonic_interrupt(int irq, void *dev_id) /* load CAM done */ if (status & SONIC_INT_LCD) SONIC_WRITE(SONIC_ISR, SONIC_INT_LCD); /* clear the interrupt */ - } while((status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT)); + + status = SONIC_READ(SONIC_ISR) & SONIC_IMR_DEFAULT; + } while (status); + + spin_unlock_irqrestore(&lp->lock, flags); + return IRQ_HANDLED; } +/* Return the array index corresponding to a given Receive Buffer pointer. */ +static int index_from_addr(struct sonic_local *lp, dma_addr_t addr, + unsigned int last) +{ + unsigned int i = last; + + do { + i = (i + 1) & SONIC_RRS_MASK; + if (addr == lp->rx_laddr[i]) + return i; + } while (i != last); + + return -ENOENT; +} + /* * We have a good packet(s), pass it/them up the network stack. */ @@ -421,6 +481,16 @@ static void sonic_rx(struct net_device *dev) status = sonic_rda_get(dev, entry, SONIC_RD_STATUS); if (status & SONIC_RCR_PRX) { + u32 addr = (sonic_rda_get(dev, entry, + SONIC_RD_PKTPTR_H) << 16) | + sonic_rda_get(dev, entry, SONIC_RD_PKTPTR_L); + int i = index_from_addr(lp, addr, entry); + + if (i < 0) { + WARN_ONCE(1, "failed to find buffer!\n"); + break; + } + /* Malloc up new buffer. */ new_skb = netdev_alloc_skb(dev, SONIC_RBSIZE + 2); if (new_skb == NULL) { @@ -442,7 +512,7 @@ static void sonic_rx(struct net_device *dev) /* now we have a new skb to replace it, pass the used one up the stack */ dma_unmap_single(lp->device, lp->rx_laddr[entry], SONIC_RBSIZE, DMA_FROM_DEVICE); - used_skb = lp->rx_skb[entry]; + used_skb = lp->rx_skb[i]; pkt_len = sonic_rda_get(dev, entry, SONIC_RD_PKTLEN); skb_trim(used_skb, pkt_len); used_skb->protocol = eth_type_trans(used_skb, dev); @@ -451,13 +521,13 @@ static void sonic_rx(struct net_device *dev) lp->stats.rx_bytes += pkt_len; /* and insert the new skb */ - lp->rx_laddr[entry] = new_laddr; - lp->rx_skb[entry] = new_skb; + lp->rx_laddr[i] = new_laddr; + lp->rx_skb[i] = new_skb; bufadr_l = (unsigned long)new_laddr & 0xffff; bufadr_h = (unsigned long)new_laddr >> 16; - sonic_rra_put(dev, entry, SONIC_RR_BUFADR_L, bufadr_l); - sonic_rra_put(dev, entry, SONIC_RR_BUFADR_H, bufadr_h); + sonic_rra_put(dev, i, SONIC_RR_BUFADR_L, bufadr_l); + sonic_rra_put(dev, i, SONIC_RR_BUFADR_H, bufadr_h); } else { /* This should only happen, if we enable accepting broken packets. */ lp->stats.rx_errors++; @@ -592,6 +662,7 @@ static int sonic_init(struct net_device *dev) */ SONIC_WRITE(SONIC_CMD, 0); SONIC_WRITE(SONIC_CMD, SONIC_CR_RXDIS); + sonic_quiesce(dev, SONIC_CR_ALL); /* * initialize the receive resource area diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h index 421b1a283fed..7dc011655e70 100644 --- a/drivers/net/ethernet/natsemi/sonic.h +++ b/drivers/net/ethernet/natsemi/sonic.h @@ -110,6 +110,9 @@ #define SONIC_CR_TXP 0x0002 #define SONIC_CR_HTX 0x0001 +#define SONIC_CR_ALL (SONIC_CR_LCAM | SONIC_CR_RRRA | \ + SONIC_CR_RXEN | SONIC_CR_TXP) + /* * SONIC data configuration bits */ @@ -274,8 +277,9 @@ #define SONIC_NUM_RDS SONIC_NUM_RRS /* number of receive descriptors */ #define SONIC_NUM_TDS 16 /* number of transmit descriptors */ -#define SONIC_RDS_MASK (SONIC_NUM_RDS-1) -#define SONIC_TDS_MASK (SONIC_NUM_TDS-1) +#define SONIC_RRS_MASK (SONIC_NUM_RRS - 1) +#define SONIC_RDS_MASK (SONIC_NUM_RDS - 1) +#define SONIC_TDS_MASK (SONIC_NUM_TDS - 1) #define SONIC_RBSIZE 1520 /* size of one resource buffer */ @@ -321,6 +325,7 @@ struct sonic_local { unsigned int next_tx; /* next free TD */ struct device *device; /* generic device */ struct net_device_stats stats; + spinlock_t lock; }; #define TX_TIMEOUT (3 * HZ) @@ -342,30 +347,30 @@ static void sonic_tx_timeout(struct net_device *dev); as far as we can tell. */ /* OpenBSD calls this "SWO". I'd like to think that sonic_buf_put() is a much better name. */ -static inline void sonic_buf_put(void* base, int bitmode, +static inline void sonic_buf_put(u16 *base, int bitmode, int offset, __u16 val) { if (bitmode) #ifdef __BIG_ENDIAN - ((__u16 *) base + (offset*2))[1] = val; + __raw_writew(val, base + (offset * 2) + 1); #else - ((__u16 *) base + (offset*2))[0] = val; + __raw_writew(val, base + (offset * 2) + 0); #endif else - ((__u16 *) base)[offset] = val; + __raw_writew(val, base + (offset * 1) + 0); } -static inline __u16 sonic_buf_get(void* base, int bitmode, +static inline __u16 sonic_buf_get(u16 *base, int bitmode, int offset) { if (bitmode) #ifdef __BIG_ENDIAN - return ((volatile __u16 *) base + (offset*2))[1]; + return __raw_readw(base + (offset * 2) + 1); #else - return ((volatile __u16 *) base + (offset*2))[0]; + return __raw_readw(base + (offset * 2) + 0); #endif else - return ((volatile __u16 *) base)[offset]; + return __raw_readw(base + (offset * 1) + 0); } /* Inlines that you should actually use for reading/writing DMA buffers */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 6df2c8b2ce6f..bffa25d6dc29 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2169,9 +2169,13 @@ nfp_net_tx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring) tx_ring->size = sizeof(*tx_ring->txds) * tx_ring->cnt; tx_ring->txds = dma_zalloc_coherent(dp->dev, tx_ring->size, - &tx_ring->dma, GFP_KERNEL); - if (!tx_ring->txds) + &tx_ring->dma, + GFP_KERNEL | __GFP_NOWARN); + if (!tx_ring->txds) { + netdev_warn(dp->netdev, "failed to allocate TX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n", + tx_ring->cnt); goto err_alloc; + } sz = sizeof(*tx_ring->txbufs) * tx_ring->cnt; tx_ring->txbufs = kzalloc(sz, GFP_KERNEL); @@ -2314,9 +2318,13 @@ nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) rx_ring->cnt = dp->rxd_cnt; rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt; rx_ring->rxds = dma_zalloc_coherent(dp->dev, rx_ring->size, - &rx_ring->dma, GFP_KERNEL); - if (!rx_ring->rxds) + &rx_ring->dma, + GFP_KERNEL | __GFP_NOWARN); + if (!rx_ring->rxds) { + netdev_warn(dp->netdev, "failed to allocate RX descriptor ring memory, requested descriptor count: %d, consider lowering descriptor count\n", + rx_ring->cnt); goto err_alloc; + } sz = sizeof(*rx_ring->rxbufs) * rx_ring->cnt; rx_ring->rxbufs = kzalloc(sz, GFP_KERNEL); diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index 49591d9c2e1b..c9b4ac9d3330 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -1053,7 +1053,6 @@ static int pasemi_mac_phy_init(struct net_device *dev) dn = pci_device_to_OF_node(mac->pdev); phy_dn = of_parse_phandle(dn, "phy-handle", 0); - of_node_put(phy_dn); mac->link = 0; mac->speed = 0; @@ -1062,6 +1061,7 @@ static int pasemi_mac_phy_init(struct net_device *dev) phydev = of_phy_connect(dev, phy_dn, &pasemi_adjust_link, 0, PHY_INTERFACE_MODE_SGMII); + of_node_put(phy_dn); if (!phydev) { printk(KERN_ERR "%s: Could not attach to phy\n", dev->name); return -ENODEV; diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 91003bc6f00b..6c4714a8b54c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -829,7 +829,7 @@ u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf); /* Prototypes */ int qed_fill_dev_info(struct qed_dev *cdev, struct qed_dev_info *dev_info); -void qed_link_update(struct qed_hwfn *hwfn); +void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt); u32 qed_unzip_data(struct qed_hwfn *p_hwfn, u32 input_len, u8 *input_buf, u32 max_size, u8 *unzip_buf); diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index bb09f5a9846f..38d0f62bf037 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -509,7 +509,8 @@ int qed_iwarp_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) /* Make sure ep is closed before returning and freeing memory. */ if (ep) { - while (ep->state != QED_IWARP_EP_CLOSED && wait_count++ < 200) + while (READ_ONCE(ep->state) != QED_IWARP_EP_CLOSED && + wait_count++ < 200) msleep(100); if (ep->state != QED_IWARP_EP_CLOSED) @@ -991,8 +992,6 @@ qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn, params.ep_context = ep; - ep->state = QED_IWARP_EP_CLOSED; - switch (fw_return_code) { case RDMA_RETURN_OK: ep->qp->max_rd_atomic_req = ep->cm_info.ord; @@ -1052,6 +1051,10 @@ qed_iwarp_mpa_complete(struct qed_hwfn *p_hwfn, break; } + if (fw_return_code != RDMA_RETURN_OK) + /* paired with READ_ONCE in destroy_qp */ + smp_store_release(&ep->state, QED_IWARP_EP_CLOSED); + ep->event_cb(ep->cb_context, ¶ms); /* on passive side, if there is no associated QP (REJECT) we need to @@ -2069,7 +2072,9 @@ void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn, params.status = (fw_return_code == IWARP_QP_IN_ERROR_GOOD_CLOSE) ? 0 : -ECONNRESET; - ep->state = QED_IWARP_EP_CLOSED; + /* paired with READ_ONCE in destroy_qp */ + smp_store_release(&ep->state, QED_IWARP_EP_CLOSED); + spin_lock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); list_del(&ep->list_entry); spin_unlock_bh(&p_hwfn->p_rdma_info->iwarp.iw_lock); @@ -2157,7 +2162,8 @@ qed_iwarp_tcp_connect_unsuccessful(struct qed_hwfn *p_hwfn, params.event = QED_IWARP_EVENT_ACTIVE_COMPLETE; params.ep_context = ep; params.cm_info = &ep->cm_info; - ep->state = QED_IWARP_EP_CLOSED; + /* paired with READ_ONCE in destroy_qp */ + smp_store_release(&ep->state, QED_IWARP_EP_CLOSED); switch (fw_return_code) { case IWARP_CONN_ERROR_TCP_CONNECT_INVALID_PACKET: diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 62cde3854a5c..5d7adedac68d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1629,10 +1629,9 @@ static void __qed_get_vport_pstats_addrlen(struct qed_hwfn *p_hwfn, } } -static void __qed_get_vport_pstats(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct qed_eth_stats *p_stats, - u16 statistics_bin) +static noinline_for_stack void +__qed_get_vport_pstats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_eth_stats *p_stats, u16 statistics_bin) { struct eth_pstorm_per_queue_stat pstats; u32 pstats_addr = 0, pstats_len = 0; @@ -1659,10 +1658,9 @@ static void __qed_get_vport_pstats(struct qed_hwfn *p_hwfn, HILO_64_REGPAIR(pstats.error_drop_pkts); } -static void __qed_get_vport_tstats(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct qed_eth_stats *p_stats, - u16 statistics_bin) +static noinline_for_stack void +__qed_get_vport_tstats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_eth_stats *p_stats, u16 statistics_bin) { struct tstorm_per_port_stat tstats; u32 tstats_addr, tstats_len; @@ -1705,10 +1703,9 @@ static void __qed_get_vport_ustats_addrlen(struct qed_hwfn *p_hwfn, } } -static void __qed_get_vport_ustats(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct qed_eth_stats *p_stats, - u16 statistics_bin) +static noinline_for_stack +void __qed_get_vport_ustats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_eth_stats *p_stats, u16 statistics_bin) { struct eth_ustorm_per_queue_stat ustats; u32 ustats_addr = 0, ustats_len = 0; @@ -1747,10 +1744,9 @@ static void __qed_get_vport_mstats_addrlen(struct qed_hwfn *p_hwfn, } } -static void __qed_get_vport_mstats(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct qed_eth_stats *p_stats, - u16 statistics_bin) +static noinline_for_stack void +__qed_get_vport_mstats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_eth_stats *p_stats, u16 statistics_bin) { struct eth_mstorm_per_queue_stat mstats; u32 mstats_addr = 0, mstats_len = 0; @@ -1776,9 +1772,9 @@ static void __qed_get_vport_mstats(struct qed_hwfn *p_hwfn, HILO_64_REGPAIR(mstats.tpa_coalesced_bytes); } -static void __qed_get_vport_port_stats(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct qed_eth_stats *p_stats) +static noinline_for_stack void +__qed_get_vport_port_stats(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + struct qed_eth_stats *p_stats) { struct qed_eth_stats_common *p_common = &p_stats->common; struct port_stats port_stats; diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 557332f1f886..52e747fd9c83 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1389,6 +1389,7 @@ static int qed_get_link_data(struct qed_hwfn *hwfn, } static void qed_fill_link(struct qed_hwfn *hwfn, + struct qed_ptt *ptt, struct qed_link_output *if_link) { struct qed_mcp_link_params params; @@ -1469,7 +1470,7 @@ static void qed_fill_link(struct qed_hwfn *hwfn, /* TODO - fill duplex properly */ if_link->duplex = DUPLEX_FULL; - qed_mcp_get_media_type(hwfn->cdev, &media_type); + qed_mcp_get_media_type(hwfn, ptt, &media_type); if_link->port = qed_get_port_type(media_type); if_link->autoneg = params.speed.autoneg; @@ -1525,21 +1526,34 @@ static void qed_fill_link(struct qed_hwfn *hwfn, static void qed_get_current_link(struct qed_dev *cdev, struct qed_link_output *if_link) { + struct qed_hwfn *hwfn; + struct qed_ptt *ptt; int i; - qed_fill_link(&cdev->hwfns[0], if_link); + hwfn = &cdev->hwfns[0]; + if (IS_PF(cdev)) { + ptt = qed_ptt_acquire(hwfn); + if (ptt) { + qed_fill_link(hwfn, ptt, if_link); + qed_ptt_release(hwfn, ptt); + } else { + DP_NOTICE(hwfn, "Failed to fill link; No PTT\n"); + } + } else { + qed_fill_link(hwfn, NULL, if_link); + } for_each_hwfn(cdev, i) qed_inform_vf_link_state(&cdev->hwfns[i]); } -void qed_link_update(struct qed_hwfn *hwfn) +void qed_link_update(struct qed_hwfn *hwfn, struct qed_ptt *ptt) { void *cookie = hwfn->cdev->ops_cookie; struct qed_common_cb_ops *op = hwfn->cdev->protocol_ops.common; struct qed_link_output if_link; - qed_fill_link(hwfn, &if_link); + qed_fill_link(hwfn, ptt, &if_link); qed_inform_vf_link_state(hwfn); if (IS_LEAD_HWFN(hwfn) && cookie) diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index 7938abe9a301..ef17ca09d303 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -1352,7 +1352,7 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) qed_mcp_read_eee_config(p_hwfn, p_ptt, p_link); - qed_link_update(p_hwfn); + qed_link_update(p_hwfn, p_ptt); out: spin_unlock_bh(&p_hwfn->mcp_info->link_lock); } @@ -1722,12 +1722,10 @@ int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn, return 0; } -int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type) +int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *p_media_type) { - struct qed_hwfn *p_hwfn = &cdev->hwfns[0]; - struct qed_ptt *p_ptt; - - if (IS_VF(cdev)) + if (IS_VF(p_hwfn->cdev)) return -EINVAL; if (!qed_mcp_is_init(p_hwfn)) { @@ -1735,16 +1733,15 @@ int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type) return -EBUSY; } - *p_media_type = MEDIA_UNSPECIFIED; + if (!p_ptt) { + *p_media_type = MEDIA_UNSPECIFIED; + return -EINVAL; + } - p_ptt = qed_ptt_acquire(p_hwfn); - if (!p_ptt) - return -EBUSY; - - *p_media_type = qed_rd(p_hwfn, p_ptt, p_hwfn->mcp_info->port_addr + - offsetof(struct public_port, media_type)); - - qed_ptt_release(p_hwfn, p_ptt); + *p_media_type = qed_rd(p_hwfn, p_ptt, + p_hwfn->mcp_info->port_addr + + offsetof(struct public_port, + media_type)); return 0; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index f1fe5e3427ea..8fcdb2c3e5db 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -284,14 +284,15 @@ int qed_mcp_get_mbi_ver(struct qed_hwfn *p_hwfn, * @brief Get media type value of the port. * * @param cdev - qed dev pointer + * @param p_ptt * @param mfw_ver - media type value * * @return int - * 0 - Operation was successul. * -EBUSY - Operation failed */ -int qed_mcp_get_media_type(struct qed_dev *cdev, - u32 *media_type); +int qed_mcp_get_media_type(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *media_type); /** * @brief General function for sending commands to the MCP diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 3220086f99de..a2a9921b467b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -1669,7 +1669,7 @@ static void qed_handle_bulletin_change(struct qed_hwfn *hwfn) ops->ports_update(cookie, vxlan_port, geneve_port); /* Always update link configuration according to bulletin */ - qed_link_update(hwfn); + qed_link_update(hwfn, NULL); } void qed_iov_vf_task(struct work_struct *work) diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index adb700512baa..a80531b5aecc 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -156,6 +156,8 @@ struct qede_rdma_dev { struct list_head entry; struct list_head rdma_event_list; struct workqueue_struct *rdma_wq; + struct kref refcnt; + struct completion event_comp; }; struct qede_ptp; diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index f79e36e4060a..e7ad95de3da8 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -1181,7 +1181,7 @@ qede_configure_mcast_filtering(struct net_device *ndev, netif_addr_lock_bh(ndev); mc_count = netdev_mc_count(ndev); - if (mc_count < 64) { + if (mc_count <= 64) { netdev_for_each_mc_addr(ha, ndev) { ether_addr_copy(temp, ha->addr); temp += ETH_ALEN; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 6eab2c632c75..dab202f343c6 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1052,8 +1052,16 @@ enum qede_remove_mode { static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) { struct net_device *ndev = pci_get_drvdata(pdev); - struct qede_dev *edev = netdev_priv(ndev); - struct qed_dev *cdev = edev->cdev; + struct qede_dev *edev; + struct qed_dev *cdev; + + if (!ndev) { + dev_info(&pdev->dev, "Device has already been removed\n"); + return; + } + + edev = netdev_priv(ndev); + cdev = edev->cdev; DP_INFO(edev, "Starting qede_remove\n"); diff --git a/drivers/net/ethernet/qlogic/qede/qede_rdma.c b/drivers/net/ethernet/qlogic/qede/qede_rdma.c index 1900bf7e67d1..cd12fb919ad5 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_rdma.c +++ b/drivers/net/ethernet/qlogic/qede/qede_rdma.c @@ -57,6 +57,9 @@ static void _qede_rdma_dev_add(struct qede_dev *edev) static int qede_rdma_create_wq(struct qede_dev *edev) { INIT_LIST_HEAD(&edev->rdma_info.rdma_event_list); + kref_init(&edev->rdma_info.refcnt); + init_completion(&edev->rdma_info.event_comp); + edev->rdma_info.rdma_wq = create_singlethread_workqueue("rdma_wq"); if (!edev->rdma_info.rdma_wq) { DP_NOTICE(edev, "qedr: Could not create workqueue\n"); @@ -81,8 +84,23 @@ static void qede_rdma_cleanup_event(struct qede_dev *edev) } } +static void qede_rdma_complete_event(struct kref *ref) +{ + struct qede_rdma_dev *rdma_dev = + container_of(ref, struct qede_rdma_dev, refcnt); + + /* no more events will be added after this */ + complete(&rdma_dev->event_comp); +} + static void qede_rdma_destroy_wq(struct qede_dev *edev) { + /* Avoid race with add_event flow, make sure it finishes before + * we start accessing the list and cleaning up the work + */ + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); + wait_for_completion(&edev->rdma_info.event_comp); + qede_rdma_cleanup_event(edev); destroy_workqueue(edev->rdma_info.rdma_wq); } @@ -287,15 +305,24 @@ static void qede_rdma_add_event(struct qede_dev *edev, if (!edev->rdma_info.qedr_dev) return; + /* We don't want the cleanup flow to start while we're allocating and + * scheduling the work + */ + if (!kref_get_unless_zero(&edev->rdma_info.refcnt)) + return; /* already being destroyed */ + event_node = qede_rdma_get_free_event_node(edev); if (!event_node) - return; + goto out; event_node->event = event; event_node->ptr = edev; INIT_WORK(&event_node->work, qede_rdma_handle_event); queue_work(edev->rdma_info.rdma_wq, &event_node->work); + +out: + kref_put(&edev->rdma_info.refcnt, qede_rdma_complete_event); } void qede_rdma_dev_event_open(struct qede_dev *edev) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 5fca9a75780c..cc53ee26bd3e 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -2756,6 +2756,9 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev) int err; for (i = 0; i < qdev->num_large_buffers; i++) { + lrg_buf_cb = &qdev->lrg_buf[i]; + memset(lrg_buf_cb, 0, sizeof(struct ql_rcv_buf_cb)); + skb = netdev_alloc_skb(qdev->ndev, qdev->lrg_buffer_len); if (unlikely(!skb)) { @@ -2766,11 +2769,7 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev) ql_free_large_buffers(qdev); return -ENOMEM; } else { - - lrg_buf_cb = &qdev->lrg_buf[i]; - memset(lrg_buf_cb, 0, sizeof(struct ql_rcv_buf_cb)); lrg_buf_cb->index = i; - lrg_buf_cb->skb = skb; /* * We save some space to copy the ethhdr from first * buffer @@ -2792,6 +2791,7 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev) return -ENOMEM; } + lrg_buf_cb->skb = skb; dma_unmap_addr_set(lrg_buf_cb, mapaddr, map); dma_unmap_len_set(lrg_buf_cb, maplen, qdev->lrg_buffer_len - diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c index a496390b8632..07f9067affc6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c @@ -2043,6 +2043,7 @@ static void qlcnic_83xx_exec_template_cmd(struct qlcnic_adapter *p_dev, break; } entry += p_hdr->size; + cond_resched(); } p_dev->ahw->reset.seq_index = index; } diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c index 4b76c69fe86d..834208e55f7b 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.c @@ -883,7 +883,7 @@ static u8 qlcnic_dcb_get_capability(struct net_device *netdev, int capid, struct qlcnic_adapter *adapter = netdev_priv(netdev); if (!test_bit(QLCNIC_DCB_STATE, &adapter->dcb->state)) - return 0; + return 1; switch (capid) { case DCB_CAP_ATTR_PG: diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c index afa10a163da1..f34ae8c75bc5 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c @@ -703,6 +703,7 @@ static u32 qlcnic_read_memory_test_agent(struct qlcnic_adapter *adapter, addr += 16; reg_read -= 16; ret += 16; + cond_resched(); } out: mutex_unlock(&adapter->ahw->mem_lock); @@ -1383,6 +1384,7 @@ int qlcnic_dump_fw(struct qlcnic_adapter *adapter) buf_offset += entry->hdr.cap_size; entry_offset += entry->hdr.offset; buffer = fw_dump->data + buf_offset; + cond_resched(); } fw_dump->clr = 1; diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c index 275fc6f154a7..1c87178fc485 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.c +++ b/drivers/net/ethernet/qualcomm/qca_spi.c @@ -475,7 +475,6 @@ qcaspi_qca7k_sync(struct qcaspi *qca, int event) u16 signature = 0; u16 spi_config; u16 wrbuf_space = 0; - static u16 reset_count; if (event == QCASPI_EVENT_CPUON) { /* Read signature twice, if not valid @@ -528,13 +527,13 @@ qcaspi_qca7k_sync(struct qcaspi *qca, int event) qca->sync = QCASPI_SYNC_RESET; qca->stats.trig_reset++; - reset_count = 0; + qca->reset_count = 0; break; case QCASPI_SYNC_RESET: - reset_count++; + qca->reset_count++; netdev_dbg(qca->net_dev, "sync: waiting for CPU on, count %u.\n", - reset_count); - if (reset_count >= QCASPI_RESET_TIMEOUT) { + qca->reset_count); + if (qca->reset_count >= QCASPI_RESET_TIMEOUT) { /* reset did not seem to take place, try again */ qca->sync = QCASPI_SYNC_UNKNOWN; qca->stats.reset_timeout++; diff --git a/drivers/net/ethernet/qualcomm/qca_spi.h b/drivers/net/ethernet/qualcomm/qca_spi.h index fc0e98726b36..719c41227f22 100644 --- a/drivers/net/ethernet/qualcomm/qca_spi.h +++ b/drivers/net/ethernet/qualcomm/qca_spi.h @@ -92,6 +92,7 @@ struct qcaspi { unsigned int intr_req; unsigned int intr_svc; + u16 reset_count; #ifdef CONFIG_DEBUG_FS struct dentry *device_root; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 1e33aea59f50..b7df8c1121e3 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -84,10 +84,10 @@ static int rmnet_unregister_real_device(struct net_device *real_dev, if (port->nr_rmnet_devs) return -EINVAL; - kfree(port); - netdev_rx_handler_unregister(real_dev); + kfree(port); + /* release reference on real_dev */ dev_put(real_dev); @@ -157,6 +157,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, int err = 0; u16 mux_id; + if (!tb[IFLA_LINK]) { + NL_SET_ERR_MSG_MOD(extack, "link not specified"); + return -EINVAL; + } + real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev || !dev) return -ENODEV; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 9b1906a65e11..25f3b2ad26e9 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3046,12 +3046,16 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) struct device_node *np = dev->of_node; struct sh_eth_plat_data *pdata; const char *mac_addr; + int ret; pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) return NULL; - pdata->phy_interface = of_get_phy_mode(np); + ret = of_get_phy_mode(np); + if (ret < 0) + return NULL; + pdata->phy_interface = ret; mac_addr = of_get_mac_address(np); if (mac_addr) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 09352ee43b55..2d92a9fe4606 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -5852,22 +5852,25 @@ static const struct efx_ef10_nvram_type_info efx_ef10_nvram_types[] = { { NVRAM_PARTITION_TYPE_LICENSE, 0, 0, "sfc_license" }, { NVRAM_PARTITION_TYPE_PHY_MIN, 0xff, 0, "sfc_phy_fw" }, }; +#define EF10_NVRAM_PARTITION_COUNT ARRAY_SIZE(efx_ef10_nvram_types) static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, struct efx_mcdi_mtd_partition *part, - unsigned int type) + unsigned int type, + unsigned long *found) { MCDI_DECLARE_BUF(inbuf, MC_CMD_NVRAM_METADATA_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_METADATA_OUT_LENMAX); const struct efx_ef10_nvram_type_info *info; size_t size, erase_size, outlen; + int type_idx = 0; bool protected; int rc; - for (info = efx_ef10_nvram_types; ; info++) { - if (info == - efx_ef10_nvram_types + ARRAY_SIZE(efx_ef10_nvram_types)) + for (type_idx = 0; ; type_idx++) { + if (type_idx == EF10_NVRAM_PARTITION_COUNT) return -ENODEV; + info = efx_ef10_nvram_types + type_idx; if ((type & ~info->type_mask) == info->type) break; } @@ -5880,6 +5883,13 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, if (protected) return -ENODEV; /* hide it */ + /* If we've already exposed a partition of this type, hide this + * duplicate. All operations on MTDs are keyed by the type anyway, + * so we can't act on the duplicate. + */ + if (__test_and_set_bit(type_idx, found)) + return -EEXIST; + part->nvram_type = type; MCDI_SET_DWORD(inbuf, NVRAM_METADATA_IN_TYPE, type); @@ -5908,6 +5918,7 @@ static int efx_ef10_mtd_probe_partition(struct efx_nic *efx, static int efx_ef10_mtd_probe(struct efx_nic *efx) { MCDI_DECLARE_BUF(outbuf, MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX); + DECLARE_BITMAP(found, EF10_NVRAM_PARTITION_COUNT) = { 0 }; struct efx_mcdi_mtd_partition *parts; size_t outlen, n_parts_total, i, n_parts; unsigned int type; @@ -5936,11 +5947,13 @@ static int efx_ef10_mtd_probe(struct efx_nic *efx) for (i = 0; i < n_parts_total; i++) { type = MCDI_ARRAY_DWORD(outbuf, NVRAM_PARTITIONS_OUT_TYPE_ID, i); - rc = efx_ef10_mtd_probe_partition(efx, &parts[n_parts], type); - if (rc == 0) - n_parts++; - else if (rc != -ENODEV) + rc = efx_ef10_mtd_probe_partition(efx, &parts[n_parts], type, + found); + if (rc == -EEXIST || rc == -ENODEV) + continue; + if (rc) goto fail; + n_parts++; } rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts)); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index b9cb697b2818..e0d4c1e850cf 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -505,6 +505,7 @@ efx_copy_channel(const struct efx_channel *old_channel) if (tx_queue->channel) tx_queue->channel = channel; tx_queue->buffer = NULL; + tx_queue->cb_page = NULL; memset(&tx_queue->txd, 0, sizeof(tx_queue->txd)); } diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 60cdb97f58e2..f22690792697 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1320,7 +1320,8 @@ void efx_ptp_remove(struct efx_nic *efx) (void)efx_ptp_disable(efx); cancel_work_sync(&efx->ptp_data->work); - cancel_work_sync(&efx->ptp_data->pps_work); + if (efx->ptp_data->pps_workwq) + cancel_work_sync(&efx->ptp_data->pps_work); skb_queue_purge(&efx->ptp_data->rxq); skb_queue_purge(&efx->ptp_data->txq); diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 05157442a980..f4f52a64f450 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -514,7 +514,8 @@ static void smc911x_hardware_send_pkt(struct net_device *dev) * now, or set the card to generates an interrupt when ready * for the packet. */ -static int smc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +smc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct smc911x_local *lp = netdev_priv(dev); unsigned int free; @@ -947,7 +948,7 @@ static void smc911x_phy_configure(struct work_struct *work) if (lp->ctl_rspeed != 100) my_ad_caps &= ~(ADVERTISE_100BASE4|ADVERTISE_100FULL|ADVERTISE_100HALF); - if (!lp->ctl_rfduplx) + if (!lp->ctl_rfduplx) my_ad_caps &= ~(ADVERTISE_100FULL|ADVERTISE_10FULL); /* Update our Auto-Neg Advertisement Register */ diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 080428762858..96ac0d3af6f5 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -638,7 +638,8 @@ done: if (!THROTTLE_TX_PKTS) * now, or set the card to generates an interrupt when ready * for the packet. */ -static int smc_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +smc_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct smc_local *lp = netdev_priv(dev); void __iomem *ioaddr = lp->base; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index f0afb88d7bc2..ce4bfecc26c7 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -1786,7 +1786,8 @@ static int smsc911x_stop(struct net_device *dev) } /* Entry point for transmitting a packet */ -static int smsc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +smsc911x_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct smsc911x_data *pdata = netdev_priv(dev); unsigned int freespace; diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index d824bf942a8f..e51b50d94074 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -338,9 +338,8 @@ struct dma_features { unsigned int rx_fifo_size; }; -/* GMAC TX FIFO is 8K, Rx FIFO is 16K */ -#define BUF_SIZE_16KiB 16384 -/* RX Buffer size must be < 8191 and multiple of 4/8/16 bytes */ +/* RX Buffer size must be multiple of 4/8/16 bytes */ +#define BUF_SIZE_16KiB 16368 #define BUF_SIZE_8KiB 8188 #define BUF_SIZE_4KiB 4096 #define BUF_SIZE_2KiB 2048 @@ -367,7 +366,7 @@ struct dma_features { struct stmmac_desc_ops { /* DMA RX descriptor ring initialization */ void (*init_rx_desc) (struct dma_desc *p, int disable_rx_ic, int mode, - int end); + int end, int bfsize); /* DMA TX descriptor ring initialization */ void (*init_tx_desc) (struct dma_desc *p, int mode, int end); diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h index 40d6356a7e73..3dfb07a78952 100644 --- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h +++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h @@ -29,11 +29,13 @@ /* Specific functions used for Ring mode */ /* Enhanced descriptors */ -static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end) +static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end, + int bfsize) { - p->des1 |= cpu_to_le32((BUF_SIZE_8KiB - << ERDES1_BUFFER2_SIZE_SHIFT) - & ERDES1_BUFFER2_SIZE_MASK); + if (bfsize == BUF_SIZE_16KiB) + p->des1 |= cpu_to_le32((BUF_SIZE_8KiB + << ERDES1_BUFFER2_SIZE_SHIFT) + & ERDES1_BUFFER2_SIZE_MASK); if (end) p->des1 |= cpu_to_le32(ERDES1_END_RING); @@ -59,11 +61,15 @@ static inline void enh_set_tx_desc_len_on_ring(struct dma_desc *p, int len) } /* Normal descriptors */ -static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end) +static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end, int bfsize) { - p->des1 |= cpu_to_le32(((BUF_SIZE_2KiB - 1) - << RDES1_BUFFER2_SIZE_SHIFT) - & RDES1_BUFFER2_SIZE_MASK); + if (bfsize >= BUF_SIZE_2KiB) { + int bfsize2; + + bfsize2 = min(bfsize - BUF_SIZE_2KiB + 1, BUF_SIZE_2KiB - 1); + p->des1 |= cpu_to_le32((bfsize2 << RDES1_BUFFER2_SIZE_SHIFT) + & RDES1_BUFFER2_SIZE_MASK); + } if (end) p->des1 |= cpu_to_le32(RDES1_END_RING); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 866444b6c82f..11a4a81b0397 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -203,7 +203,7 @@ static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac) struct device *dev = &gmac->pdev->dev; gmac->phy_mode = of_get_phy_mode(dev->of_node); - if (gmac->phy_mode < 0) { + if ((int)gmac->phy_mode < 0) { dev_err(dev, "missing phy mode property\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 8be4b32544ef..d71d3c1c85ee 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -285,7 +285,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev) dwmac->pdev = pdev; dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node); - if (dwmac->phy_mode < 0) { + if ((int)dwmac->phy_mode < 0) { dev_err(&pdev->dev, "missing phy-mode property\n"); ret = -EINVAL; goto err_remove_config_dt; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index a62128a444a6..149fd0d5e069 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -724,6 +724,9 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv) /* default */ break; case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: reg |= SYSCON_EPIT | SYSCON_ETCS_INT_GMII; break; case PHY_INTERFACE_MODE_RMII: diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c index d07520fb969e..fc1fa0f9f338 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c @@ -53,13 +53,15 @@ static int sun7i_gmac_init(struct platform_device *pdev, void *priv) * rate, which then uses the auto-reparenting feature of the * clock driver, and enabling/disabling the clock. */ - if (gmac->interface == PHY_INTERFACE_MODE_RGMII) { + if (phy_interface_mode_is_rgmii(gmac->interface)) { clk_set_rate(gmac->tx_clk, SUN7I_GMAC_GMII_RGMII_RATE); clk_prepare_enable(gmac->tx_clk); gmac->clk_enabled = 1; } else { clk_set_rate(gmac->tx_clk, SUN7I_GMAC_MII_RATE); - clk_prepare(gmac->tx_clk); + ret = clk_prepare(gmac->tx_clk); + if (ret) + return ret; } return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 8445af580cb6..e5566c121525 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -438,7 +438,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw, } /* Handle multiple unicast addresses */ - if (netdev_uc_count(dev) > GMAC_MAX_PERFECT_ADDRESSES) { + if (netdev_uc_count(dev) > hw->unicast_filter_entries) { /* Switch to promiscuous mode if more than 128 addrs * are required */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 37b77e7da132..2896ec100c75 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -293,7 +293,7 @@ exit: } static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end) + int mode, int end, int bfsize) { p->des3 = cpu_to_le32(RDES3_OWN | RDES3_BUFFER1_VALID_ADDR); diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index f2150efddc88..dfa6599ca1a7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -265,15 +265,19 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x, } static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, - int mode, int end) + int mode, int end, int bfsize) { + int bfsize1; + p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32(BUF_SIZE_8KiB & ERDES1_BUFFER1_SIZE_MASK); + + bfsize1 = min(bfsize, BUF_SIZE_8KiB); + p->des1 |= cpu_to_le32(bfsize1 & ERDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ehn_desc_rx_set_on_chain(p); else - ehn_desc_rx_set_on_ring(p, end); + ehn_desc_rx_set_on_ring(p, end, bfsize); if (disable_rx_ic) p->des1 |= cpu_to_le32(ERDES1_DISABLE_IC); diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index 66c17bab5997..44a4666290da 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -133,15 +133,19 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x, } static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode, - int end) + int end, int bfsize) { + int bfsize1; + p->des0 |= cpu_to_le32(RDES0_OWN); - p->des1 |= cpu_to_le32((BUF_SIZE_2KiB - 1) & RDES1_BUFFER1_SIZE_MASK); + + bfsize1 = min(bfsize, BUF_SIZE_2KiB - 1); + p->des1 |= cpu_to_le32(bfsize1 & RDES1_BUFFER1_SIZE_MASK); if (mode == STMMAC_CHAIN_MODE) ndesc_rx_set_on_chain(p, end); else - ndesc_rx_set_on_ring(p, end); + ndesc_rx_set_on_ring(p, end, bfsize); if (disable_rx_ic) p->des1 |= cpu_to_le32(RDES1_DISABLE_IC); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f4df9ab0aed5..81d446469a35 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -51,7 +51,7 @@ #include #include "dwmac1000.h" -#define STMMAC_ALIGN(x) __ALIGN_KERNEL(x, SMP_CACHE_BYTES) +#define STMMAC_ALIGN(x) ALIGN(ALIGN(x, SMP_CACHE_BYTES), 16) #define TSO_MAX_BUFF_SIZE (SZ_16K - 1) /* Module parameters */ @@ -1043,7 +1043,9 @@ static int stmmac_set_bfsize(int mtu, int bufsize) { int ret = bufsize; - if (mtu >= BUF_SIZE_4KiB) + if (mtu >= BUF_SIZE_8KiB) + ret = BUF_SIZE_16KiB; + else if (mtu >= BUF_SIZE_4KiB) ret = BUF_SIZE_8KiB; else if (mtu >= BUF_SIZE_2KiB) ret = BUF_SIZE_4KiB; @@ -1072,11 +1074,13 @@ static void stmmac_clear_rx_descriptors(struct stmmac_priv *priv, u32 queue) if (priv->extend_desc) priv->hw->desc->init_rx_desc(&rx_q->dma_erx[i].basic, priv->use_riwt, priv->mode, - (i == DMA_RX_SIZE - 1)); + (i == DMA_RX_SIZE - 1), + priv->dma_buf_sz); else priv->hw->desc->init_rx_desc(&rx_q->dma_rx[i], priv->use_riwt, priv->mode, - (i == DMA_RX_SIZE - 1)); + (i == DMA_RX_SIZE - 1), + priv->dma_buf_sz); } /** @@ -3299,7 +3303,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) dma_wmb(); if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) - priv->hw->desc->init_rx_desc(p, priv->use_riwt, 0, 0); + priv->hw->desc->init_rx_desc(p, priv->use_riwt, 0, 0, priv->dma_buf_sz); else priv->hw->desc->set_rx_owner(p); @@ -3321,9 +3325,8 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) { struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue]; - unsigned int entry = rx_q->cur_rx; int coe = priv->hw->rx_csum; - unsigned int next_entry; + unsigned int next_entry = rx_q->cur_rx; unsigned int count = 0; if (netif_msg_rx_status(priv)) { @@ -3338,10 +3341,12 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) priv->hw->desc->display_ring(rx_head, DMA_RX_SIZE, true); } while (count < limit) { - int status; + int entry, status; struct dma_desc *p; struct dma_desc *np; + entry = next_entry; + if (priv->extend_desc) p = (struct dma_desc *)(rx_q->dma_erx + entry); else @@ -3408,7 +3413,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) "len %d larger than size (%d)\n", frame_len, priv->dma_buf_sz); priv->dev->stats.rx_length_errors++; - break; + continue; } /* ACS is set; GMAC core strips PAD/FCS for IEEE 802.3 @@ -3444,7 +3449,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) dev_warn(priv->device, "packet dropped\n"); priv->dev->stats.rx_dropped++; - break; + continue; } dma_sync_single_for_cpu(priv->device, @@ -3469,7 +3474,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) "%s: Inconsistent Rx chain\n", priv->dev->name); priv->dev->stats.rx_dropped++; - break; + continue; } prefetch(skb->data - NET_IP_ALIGN); rx_q->rx_skbuff[entry] = NULL; @@ -3504,7 +3509,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) priv->dev->stats.rx_packets++; priv->dev->stats.rx_bytes += frame_len; } - entry = next_entry; } stmmac_rx_refill(priv, queue); @@ -3595,12 +3599,24 @@ static void stmmac_set_rx_mode(struct net_device *dev) static int stmmac_change_mtu(struct net_device *dev, int new_mtu) { struct stmmac_priv *priv = netdev_priv(dev); + int txfifosz = priv->plat->tx_fifo_size; + + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + + txfifosz /= priv->plat->tx_queues_to_use; if (netif_running(dev)) { netdev_err(priv->dev, "must be stopped to change its MTU\n"); return -EBUSY; } + new_mtu = STMMAC_ALIGN(new_mtu); + + /* If condition true, FIFO is too small or MTU too large */ + if ((txfifosz < new_mtu) || (new_mtu > BUF_SIZE_16KiB)) + return -EINVAL; + dev->mtu = new_mtu; netdev_update_features(dev); @@ -4402,8 +4418,10 @@ int stmmac_suspend(struct device *dev) priv->hw->mac->set_mac(priv->ioaddr, false); pinctrl_pm_select_sleep_state(priv->device); /* Disable clock in case of PWM is off */ - clk_disable(priv->plat->pclk); - clk_disable(priv->plat->stmmac_clk); + if (priv->plat->clk_ptp_ref) + clk_disable_unprepare(priv->plat->clk_ptp_ref); + clk_disable_unprepare(priv->plat->pclk); + clk_disable_unprepare(priv->plat->stmmac_clk); } spin_unlock_irqrestore(&priv->lock, flags); @@ -4468,8 +4486,10 @@ int stmmac_resume(struct device *dev) } else { pinctrl_pm_select_default_state(priv->device); /* enable the clk previously disabled */ - clk_enable(priv->plat->stmmac_clk); - clk_enable(priv->plat->pclk); + clk_prepare_enable(priv->plat->stmmac_clk); + clk_prepare_enable(priv->plat->pclk); + if (priv->plat->clk_ptp_ref) + clk_prepare_enable(priv->plat->clk_ptp_ref); /* reset the phy so that it's ready */ if (priv->mii) stmmac_mdio_reset(priv->mii); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index e471a903c654..1c1d6a942822 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -154,7 +154,7 @@ static int stmmac_enable(struct ptp_clock_info *ptp, /* structure describing a PTP hardware clock */ static const struct ptp_clock_info stmmac_ptp_clock_ops = { .owner = THIS_MODULE, - .name = "stmmac_ptp_clock", + .name = "stmmac ptp", .max_adj = 62500000, .n_alarm = 0, .n_ext_ts = 0, diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c index 5b56c24b6ed2..e6b96c2989b2 100644 --- a/drivers/net/ethernet/sun/ldmvsw.c +++ b/drivers/net/ethernet/sun/ldmvsw.c @@ -111,7 +111,7 @@ static u16 vsw_select_queue(struct net_device *dev, struct sk_buff *skb, } /* Wrappers to common functions */ -static int vsw_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t vsw_start_xmit(struct sk_buff *skb, struct net_device *dev) { return sunvnet_start_xmit_common(skb, dev, vsw_tx_port_find); } diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c index 3189722110c2..9a60fb2b4e9d 100644 --- a/drivers/net/ethernet/sun/sunbmac.c +++ b/drivers/net/ethernet/sun/sunbmac.c @@ -951,7 +951,8 @@ static void bigmac_tx_timeout(struct net_device *dev) } /* Put a packet on the wire. */ -static int bigmac_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +bigmac_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bigmac *bp = netdev_priv(dev); int len, entry; diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c index a6bcdcdd947e..82386a375bd2 100644 --- a/drivers/net/ethernet/sun/sunqe.c +++ b/drivers/net/ethernet/sun/sunqe.c @@ -569,7 +569,7 @@ out: } /* Get a packet queued to go onto the wire. */ -static int qe_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t qe_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct sunqe *qep = netdev_priv(dev); struct sunqe_buffers *qbufs = qep->buffers; diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 65347d2f139b..02ebbe74d93d 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -245,7 +245,7 @@ static u16 vnet_select_queue(struct net_device *dev, struct sk_buff *skb, } /* Wrappers to common functions */ -static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) { return sunvnet_start_xmit_common(skb, dev, vnet_tx_port_find); } diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c index ecf456c7b6d1..fd84ff8bba31 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.c +++ b/drivers/net/ethernet/sun/sunvnet_common.c @@ -1215,9 +1215,10 @@ static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, int ncookies) return skb; } -static int vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb, - struct vnet_port *(*vnet_tx_port) - (struct sk_buff *, struct net_device *)) +static netdev_tx_t +vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb, + struct vnet_port *(*vnet_tx_port) + (struct sk_buff *, struct net_device *)) { struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port); struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING]; @@ -1320,9 +1321,10 @@ out_dropped: return NETDEV_TX_OK; } -int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, - struct vnet_port *(*vnet_tx_port) - (struct sk_buff *, struct net_device *)) +netdev_tx_t +sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, + struct vnet_port *(*vnet_tx_port) + (struct sk_buff *, struct net_device *)) { struct vnet_port *port = NULL; struct vio_dring_state *dr; diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h index 6a4dd1fb19bf..3fcb608fbbb3 100644 --- a/drivers/net/ethernet/sun/sunvnet_common.h +++ b/drivers/net/ethernet/sun/sunvnet_common.h @@ -136,9 +136,10 @@ int sunvnet_close_common(struct net_device *dev); void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp); int sunvnet_set_mac_addr_common(struct net_device *dev, void *p); void sunvnet_tx_timeout_common(struct net_device *dev); -int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, - struct vnet_port *(*vnet_tx_port) - (struct sk_buff *, struct net_device *)); +netdev_tx_t +sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev, + struct vnet_port *(*vnet_tx_port) + (struct sk_buff *, struct net_device *)); #ifdef CONFIG_NET_POLL_CONTROLLER void sunvnet_poll_controller_common(struct net_device *dev, struct vnet *vp); #endif diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 8cb44eabc283..3189afcd5888 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -601,6 +601,7 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) /* Clear all mcast from ALE */ cpsw_ale_flush_multicast(ale, ALE_ALL_PORTS, -1); + __dev_mc_unsync(ndev, NULL); /* Flood All Unicast Packets to Host port */ cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 1); @@ -861,8 +862,8 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) { struct cpsw_common *cpsw = dev_id; - cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX); writel(0, &cpsw->wr_regs->rx_en); + cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX); if (cpsw->quirk_irq) { disable_irq_nosync(cpsw->irqs_table[0]); diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index e7b76f6b4f67..23c953496a0d 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -116,9 +116,7 @@ static bool cpts_match_tx_ts(struct cpts *cpts, struct cpts_event *event) mtype, seqid); } else if (time_after(jiffies, skb_cb->tmo)) { /* timeout any expired skbs over 1s */ - dev_dbg(cpts->dev, - "expiring tx timestamp mtype %u seqid %04x\n", - mtype, seqid); + dev_dbg(cpts->dev, "expiring tx timestamp from txq\n"); __skb_unlink(skb, &cpts->txq); dev_consume_skb_any(skb); } @@ -567,7 +565,9 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs, return ERR_PTR(PTR_ERR(cpts->refclk)); } - clk_prepare(cpts->refclk); + ret = clk_prepare(cpts->refclk); + if (ret) + return ERR_PTR(ret); cpts->cc.read = cpts_systim_read; cpts->cc.mask = CLOCKSOURCE_MASK(32); diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 88d74aef218a..75237c81c63d 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -845,9 +845,9 @@ static int gelic_card_kick_txdma(struct gelic_card *card, * @skb: packet to send out * @netdev: interface device structure * - * returns 0 on success, <0 on failure + * returns NETDEV_TX_OK on success, NETDEV_TX_BUSY on failure */ -int gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev) +netdev_tx_t gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev) { struct gelic_card *card = netdev_card(netdev); struct gelic_descr *descr; diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h index 003d0452d9cb..fbbf9b54b173 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h @@ -370,7 +370,7 @@ void gelic_card_up(struct gelic_card *card); void gelic_card_down(struct gelic_card *card); int gelic_net_open(struct net_device *netdev); int gelic_net_stop(struct net_device *netdev); -int gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev); +netdev_tx_t gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev); void gelic_net_set_multi(struct net_device *netdev); void gelic_net_tx_timeout(struct net_device *netdev); int gelic_net_setup_netdev(struct net_device *netdev, struct gelic_card *card); diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c index cec9e70ab995..da136b8843dd 100644 --- a/drivers/net/ethernet/toshiba/spider_net.c +++ b/drivers/net/ethernet/toshiba/spider_net.c @@ -880,9 +880,9 @@ out: * @skb: packet to send out * @netdev: interface device structure * - * returns 0 on success, !0 on failure + * returns NETDEV_TX_OK on success, NETDEV_TX_BUSY on failure */ -static int +static netdev_tx_t spider_net_xmit(struct sk_buff *skb, struct net_device *netdev) { int cnt; diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c index 9146068979d2..03afc4d8c3ec 100644 --- a/drivers/net/ethernet/toshiba/tc35815.c +++ b/drivers/net/ethernet/toshiba/tc35815.c @@ -474,7 +474,8 @@ static void free_rxbuf_skb(struct pci_dev *hwdev, struct sk_buff *skb, dma_addr_ /* Index to functions, as function prototypes. */ static int tc35815_open(struct net_device *dev); -static int tc35815_send_packet(struct sk_buff *skb, struct net_device *dev); +static netdev_tx_t tc35815_send_packet(struct sk_buff *skb, + struct net_device *dev); static irqreturn_t tc35815_interrupt(int irq, void *dev_id); static int tc35815_rx(struct net_device *dev, int limit); static int tc35815_poll(struct napi_struct *napi, int budget); @@ -1248,7 +1249,8 @@ tc35815_open(struct net_device *dev) * invariant will hold if you make sure that the netif_*_queue() * calls are done at the proper times. */ -static int tc35815_send_packet(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +tc35815_send_packet(struct sk_buff *skb, struct net_device *dev) { struct tc35815_local *lp = netdev_priv(dev); struct TxFD *txfd; diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 60abc9250f56..2241f9897092 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -674,7 +674,8 @@ static inline int temac_check_tx_bd_space(struct temac_local *lp, int num_frag) return 0; } -static int temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t +temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct temac_local *lp = netdev_priv(ndev); struct cdmac_bd *cur_p; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index b481cb174b23..1152d74433f6 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -657,7 +657,8 @@ static inline int axienet_check_tx_bd_space(struct axienet_local *lp, * start the transmission. Additionally if checksum offloading is supported, * it populates AXI Stream Control fields with appropriate values. */ -static int axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) +static netdev_tx_t +axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) { u32 ii; u32 num_frag; @@ -1573,7 +1574,7 @@ static int axienet_probe(struct platform_device *pdev) } } else { lp->phy_mode = of_get_phy_mode(pdev->dev.of_node); - if (lp->phy_mode < 0) { + if ((int)lp->phy_mode < 0) { ret = -EINVAL; goto free_netdev; } diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 69e31ceccfae..6f3e79159d7a 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1005,9 +1005,10 @@ static int xemaclite_close(struct net_device *dev) * deferred and the Tx queue is stopped so that the deferred socket buffer can * be transmitted when the Emaclite device is free to transmit data. * - * Return: 0, always. + * Return: NETDEV_TX_OK, always. */ -static int xemaclite_send(struct sk_buff *orig_skb, struct net_device *dev) +static netdev_tx_t +xemaclite_send(struct sk_buff *orig_skb, struct net_device *dev) { struct net_local *lp = netdev_priv(dev); struct sk_buff *new_skb; @@ -1028,7 +1029,7 @@ static int xemaclite_send(struct sk_buff *orig_skb, struct net_device *dev) /* Take the time stamp now, since we can't do this in an ISR. */ skb_tx_timestamp(new_skb); spin_unlock_irqrestore(&lp->reset_lock, flags); - return 0; + return NETDEV_TX_OK; } spin_unlock_irqrestore(&lp->reset_lock, flags); @@ -1037,7 +1038,7 @@ static int xemaclite_send(struct sk_buff *orig_skb, struct net_device *dev) dev->stats.tx_bytes += len; dev_consume_skb_any(new_skb); - return 0; + return NETDEV_TX_OK; } /** diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index 750954be5a74..314e3eac09b9 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -181,6 +181,9 @@ static int fjes_acpi_add(struct acpi_device *device) /* create platform_device */ plat_dev = platform_device_register_simple(DRV_NAME, 0, fjes_resource, ARRAY_SIZE(fjes_resource)); + if (IS_ERR(plat_dev)) + return PTR_ERR(plat_dev); + device->driver_data = plat_dev; return 0; @@ -1252,8 +1255,17 @@ static int fjes_probe(struct platform_device *plat_dev) adapter->open_guard = false; adapter->txrx_wq = alloc_workqueue(DRV_NAME "/txrx", WQ_MEM_RECLAIM, 0); + if (unlikely(!adapter->txrx_wq)) { + err = -ENOMEM; + goto err_free_netdev; + } + adapter->control_wq = alloc_workqueue(DRV_NAME "/control", WQ_MEM_RECLAIM, 0); + if (unlikely(!adapter->control_wq)) { + err = -ENOMEM; + goto err_free_txrx_wq; + } INIT_WORK(&adapter->tx_stall_task, fjes_tx_stall_task); INIT_WORK(&adapter->raise_intr_rxdata_task, @@ -1270,7 +1282,7 @@ static int fjes_probe(struct platform_device *plat_dev) hw->hw_res.irq = platform_get_irq(plat_dev, 0); err = fjes_hw_init(&adapter->hw); if (err) - goto err_free_netdev; + goto err_free_control_wq; /* setup MAC address (02:00:00:00:00:[epid])*/ netdev->dev_addr[0] = 2; @@ -1292,6 +1304,10 @@ static int fjes_probe(struct platform_device *plat_dev) err_hw_exit: fjes_hw_exit(&adapter->hw); +err_free_control_wq: + destroy_workqueue(adapter->control_wq); +err_free_txrx_wq: + destroy_workqueue(adapter->txrx_wq); err_free_netdev: free_netdev(netdev); err_out: diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 5de4053774b8..92e4e5d53053 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -42,7 +42,6 @@ struct pdp_ctx { struct hlist_node hlist_addr; union { - u64 tid; struct { u64 tid; u16 flow; @@ -545,7 +544,7 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, mtu = dst_mtu(&rt->dst); } - rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu); + rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false); if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && mtu < ntohs(iph->tot_len)) { @@ -645,9 +644,16 @@ static void gtp_link_setup(struct net_device *dev) } static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize); -static void gtp_hashtable_free(struct gtp_dev *gtp); static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]); +static void gtp_destructor(struct net_device *dev) +{ + struct gtp_dev *gtp = netdev_priv(dev); + + kfree(gtp->addr_hash); + kfree(gtp->tid_hash); +} + static int gtp_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -665,10 +671,13 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, if (err < 0) return err; - if (!data[IFLA_GTP_PDP_HASHSIZE]) + if (!data[IFLA_GTP_PDP_HASHSIZE]) { hashsize = 1024; - else + } else { hashsize = nla_get_u32(data[IFLA_GTP_PDP_HASHSIZE]); + if (!hashsize) + hashsize = 1024; + } err = gtp_hashtable_new(gtp, hashsize); if (err < 0) @@ -682,13 +691,15 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev, gn = net_generic(dev_net(dev), gtp_net_id); list_add_rcu(>p->list, &gn->gtp_dev_list); + dev->priv_destructor = gtp_destructor; netdev_dbg(dev, "registered new GTP interface\n"); return 0; out_hashtable: - gtp_hashtable_free(gtp); + kfree(gtp->addr_hash); + kfree(gtp->tid_hash); out_encap: gtp_encap_disable(gtp); return err; @@ -697,9 +708,14 @@ out_encap: static void gtp_dellink(struct net_device *dev, struct list_head *head) { struct gtp_dev *gtp = netdev_priv(dev); + struct pdp_ctx *pctx; + int i; + + for (i = 0; i < gtp->hash_size; i++) + hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) + pdp_context_delete(pctx); gtp_encap_disable(gtp); - gtp_hashtable_free(gtp); list_del_rcu(>p->list); unregister_netdevice_queue(dev, head); } @@ -755,11 +771,13 @@ static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize) { int i; - gtp->addr_hash = kmalloc(sizeof(struct hlist_head) * hsize, GFP_KERNEL); + gtp->addr_hash = kmalloc(sizeof(struct hlist_head) * hsize, + GFP_KERNEL | __GFP_NOWARN); if (gtp->addr_hash == NULL) return -ENOMEM; - gtp->tid_hash = kmalloc(sizeof(struct hlist_head) * hsize, GFP_KERNEL); + gtp->tid_hash = kmalloc(sizeof(struct hlist_head) * hsize, + GFP_KERNEL | __GFP_NOWARN); if (gtp->tid_hash == NULL) goto err1; @@ -775,20 +793,6 @@ err1: return -ENOMEM; } -static void gtp_hashtable_free(struct gtp_dev *gtp) -{ - struct pdp_ctx *pctx; - int i; - - for (i = 0; i < gtp->hash_size; i++) - hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) - pdp_context_delete(pctx); - - synchronize_rcu(); - kfree(gtp->addr_hash); - kfree(gtp->tid_hash); -} - static struct sock *gtp_encap_enable_socket(int fd, int type, struct gtp_dev *gtp) { @@ -805,19 +809,21 @@ static struct sock *gtp_encap_enable_socket(int fd, int type, return NULL; } - if (sock->sk->sk_protocol != IPPROTO_UDP) { + sk = sock->sk; + if (sk->sk_protocol != IPPROTO_UDP || + sk->sk_type != SOCK_DGRAM || + (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) { pr_debug("socket fd=%d not UDP\n", fd); sk = ERR_PTR(-EINVAL); goto out_sock; } - lock_sock(sock->sk); - if (sock->sk->sk_user_data) { + lock_sock(sk); + if (sk->sk_user_data) { sk = ERR_PTR(-EBUSY); - goto out_sock; + goto out_rel_sock; } - sk = sock->sk; sock_hold(sk); tuncfg.sk_user_data = gtp; @@ -827,8 +833,9 @@ static struct sock *gtp_encap_enable_socket(int fd, int type, setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg); -out_sock: +out_rel_sock: release_sock(sock->sk); +out_sock: sockfd_put(sock); return sk; } @@ -929,24 +936,31 @@ static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info) } } -static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk, - struct genl_info *info) +static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk, + struct genl_info *info) { + struct pdp_ctx *pctx, *pctx_tid = NULL; struct net_device *dev = gtp->dev; u32 hash_ms, hash_tid = 0; - struct pdp_ctx *pctx; + unsigned int version; bool found = false; __be32 ms_addr; ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]); hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size; + version = nla_get_u32(info->attrs[GTPA_VERSION]); - hlist_for_each_entry_rcu(pctx, >p->addr_hash[hash_ms], hlist_addr) { - if (pctx->ms_addr_ip4.s_addr == ms_addr) { - found = true; - break; - } - } + pctx = ipv4_pdp_find(gtp, ms_addr); + if (pctx) + found = true; + if (version == GTP_V0) + pctx_tid = gtp0_pdp_find(gtp, + nla_get_u64(info->attrs[GTPA_TID])); + else if (version == GTP_V1) + pctx_tid = gtp1_pdp_find(gtp, + nla_get_u32(info->attrs[GTPA_I_TEI])); + if (pctx_tid) + found = true; if (found) { if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) @@ -954,6 +968,11 @@ static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk, if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + if (pctx && pctx_tid) + return -EEXIST; + if (!pctx) + pctx = pctx_tid; + ipv4_pdp_fill(pctx, info); if (pctx->gtp_version == GTP_V0) @@ -1077,7 +1096,7 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info) goto out_unlock; } - err = ipv4_pdp_add(gtp, sk, info); + err = gtp_pdp_add(gtp, sk, info); out_unlock: rcu_read_unlock(); @@ -1235,43 +1254,46 @@ static int gtp_genl_dump_pdp(struct sk_buff *skb, struct netlink_callback *cb) { struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp; + int i, j, bucket = cb->args[0], skip = cb->args[1]; struct net *net = sock_net(skb->sk); - struct gtp_net *gn = net_generic(net, gtp_net_id); - unsigned long tid = cb->args[1]; - int i, k = cb->args[0], ret; struct pdp_ctx *pctx; + struct gtp_net *gn; + + gn = net_generic(net, gtp_net_id); if (cb->args[4]) return 0; + rcu_read_lock(); list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) { if (last_gtp && last_gtp != gtp) continue; else last_gtp = NULL; - for (i = k; i < gtp->hash_size; i++) { - hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], hlist_tid) { - if (tid && tid != pctx->u.tid) - continue; - else - tid = 0; - - ret = gtp_genl_fill_info(skb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - cb->nlh->nlmsg_type, pctx); - if (ret < 0) { + for (i = bucket; i < gtp->hash_size; i++) { + j = 0; + hlist_for_each_entry_rcu(pctx, >p->tid_hash[i], + hlist_tid) { + if (j >= skip && + gtp_genl_fill_info(skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + cb->nlh->nlmsg_type, pctx)) { cb->args[0] = i; - cb->args[1] = pctx->u.tid; + cb->args[1] = j; cb->args[2] = (unsigned long)gtp; goto out; } + j++; } + skip = 0; } + bucket = 0; } cb->args[4] = 1; out: + rcu_read_unlock(); return skb->len; } diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 021a8ec411ab..6d4742d10a78 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -665,10 +665,10 @@ static void sixpack_close(struct tty_struct *tty) { struct sixpack *sp; - write_lock_bh(&disc_data_lock); + write_lock_irq(&disc_data_lock); sp = tty->disc_data; tty->disc_data = NULL; - write_unlock_bh(&disc_data_lock); + write_unlock_irq(&disc_data_lock); if (!sp) return; diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index aec6c26563cf..9fd7dab42a53 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -783,10 +783,10 @@ static void mkiss_close(struct tty_struct *tty) { struct mkiss *ax; - write_lock_bh(&disc_data_lock); + write_lock_irq(&disc_data_lock); ax = tty->disc_data; tty->disc_data = NULL; - write_unlock_bh(&disc_data_lock); + write_unlock_irq(&disc_data_lock); if (!ax) return; diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 0f07b5978fa1..fc794e69e6a1 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -179,7 +179,6 @@ struct rndis_device { u8 hw_mac_adr[ETH_ALEN]; u8 rss_key[NETVSC_HASH_KEYLEN]; - u16 rx_table[ITAB_NUM]; }; @@ -741,6 +740,8 @@ struct net_device_context { u32 tx_table[VRSS_SEND_TAB_SIZE]; + u16 rx_table[ITAB_NUM]; + /* Ethtool settings */ bool udp4_l4_hash; bool udp6_l4_hash; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 33c1f6548fb7..14451e14d99d 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -282,9 +282,9 @@ static inline u32 netvsc_get_hash( else if (flow.basic.n_proto == htons(ETH_P_IPV6)) hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd); else - hash = 0; + return 0; - skb_set_hash(skb, hash, PKT_HASH_TYPE_L3); + __skb_set_sw_hash(skb, hash, false); } return hash; @@ -802,8 +802,7 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, skb->protocol == htons(ETH_P_IP)) netvsc_comp_ipcsum(skb); - /* Do L4 checksum offload if enabled and present. - */ + /* Do L4 checksum offload if enabled and present. */ if (csum_info && (net->features & NETIF_F_RXCSUM)) { if (csum_info->receive.tcp_checksum_succeeded || csum_info->receive.udp_checksum_succeeded) @@ -969,7 +968,7 @@ static int netvsc_attach(struct net_device *ndev, if (netif_running(ndev)) { ret = rndis_filter_open(nvdev); if (ret) - return ret; + goto err; rdev = nvdev->extension; if (!rdev->link_state) @@ -977,6 +976,13 @@ static int netvsc_attach(struct net_device *ndev, } return 0; + +err: + netif_device_detach(ndev); + + rndis_filter_device_remove(hdev, nvdev); + + return ret; } static int netvsc_set_channels(struct net_device *net, @@ -1521,7 +1527,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, rndis_dev = ndev->extension; if (indir) { for (i = 0; i < ITAB_NUM; i++) - indir[i] = rndis_dev->rx_table[i]; + indir[i] = ndc->rx_table[i]; } if (key) @@ -1551,7 +1557,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, return -EINVAL; for (i = 0; i < ITAB_NUM; i++) - rndis_dev->rx_table[i] = indir[i]; + ndc->rx_table[i] = indir[i]; } if (!key) { @@ -1833,6 +1839,12 @@ static rx_handler_result_t netvsc_vf_handle_frame(struct sk_buff **pskb) struct netvsc_vf_pcpu_stats *pcpu_stats = this_cpu_ptr(ndev_ctx->vf_stats); + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(!skb)) + return RX_HANDLER_CONSUMED; + + *pskb = skb; + skb->dev = ndev; u64_stats_update_begin(&pcpu_stats->syncp); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index fc1d5e14d83e..aa0bbffe4900 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -715,6 +715,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev, const u8 *rss_key, u16 flag) { struct net_device *ndev = rdev->ndev; + struct net_device_context *ndc = netdev_priv(ndev); struct rndis_request *request; struct rndis_set_request *set; struct rndis_set_complete *set_complete; @@ -754,7 +755,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev, /* Set indirection table entries */ itab = (u32 *)(rssp + 1); for (i = 0; i < ITAB_NUM; i++) - itab[i] = rdev->rx_table[i]; + itab[i] = ndc->rx_table[i]; /* Set hask key values */ keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset); @@ -1204,6 +1205,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, struct netvsc_device_info *device_info) { struct net_device *net = hv_get_drvdata(dev); + struct net_device_context *ndc = netdev_priv(net); struct netvsc_device *net_device; struct rndis_device *rndis_device; struct ndis_recv_scale_cap rsscap; @@ -1286,9 +1288,11 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, /* We will use the given number of channels if available. */ net_device->num_chn = min(net_device->max_chn, device_info->num_chn); - for (i = 0; i < ITAB_NUM; i++) - rndis_device->rx_table[i] = ethtool_rxfh_indir_default( + if (!netif_is_rxfh_configured(net)) { + for (i = 0; i < ITAB_NUM; i++) + ndc->rx_table[i] = ethtool_rxfh_indir_default( i, net_device->num_chn); + } atomic_set(&net_device->open_chn, 1); vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open); @@ -1327,8 +1331,6 @@ void rndis_filter_device_remove(struct hv_device *dev, /* Halt and release the rndis device */ rndis_filter_halt_device(rndis_dev); - net_dev->extension = NULL; - netvsc_device_remove(dev); } diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index dcd10dba08c7..3a58962babd4 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -3153,12 +3153,12 @@ static int ca8210_probe(struct spi_device *spi_device) goto error; } + priv->spi->dev.platform_data = pdata; ret = ca8210_get_platform_data(priv->spi, pdata); if (ret) { dev_crit(&spi_device->dev, "ca8210_get_platform_data failed\n"); goto error; } - priv->spi->dev.platform_data = pdata; ret = ca8210_dev_com_init(priv); if (ret) { diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index 71ff6bd4be9f..baf8aab59f82 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -256,6 +256,7 @@ void ipvlan_process_multicast(struct work_struct *work) } if (dev) dev_put(dev); + cond_resched(); } } @@ -448,19 +449,21 @@ static int ipvlan_process_outbound(struct sk_buff *skb) struct ethhdr *ethh = eth_hdr(skb); int ret = NET_XMIT_DROP; - /* In this mode we dont care about multicast and broadcast traffic */ - if (is_multicast_ether_addr(ethh->h_dest)) { - pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n", - ntohs(skb->protocol)); - kfree_skb(skb); - goto out; - } - /* The ipvlan is a pseudo-L2 device, so the packets that we receive * will have L2; which need to discarded and processed further * in the net-ns of the main-device. */ if (skb_mac_header_was_set(skb)) { + /* In this mode we dont care about + * multicast and broadcast traffic */ + if (is_multicast_ether_addr(ethh->h_dest)) { + pr_debug_ratelimited( + "Dropped {multi|broad}cast of type=[%x]\n", + ntohs(skb->protocol)); + kfree_skb(skb); + goto out; + } + skb_pull(skb, sizeof(*ethh)); skb->mac_header = (typeof(skb->mac_header))~0U; skb_reset_network_header(skb); diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 09f6795cce53..cd32d6623f6a 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -236,7 +236,6 @@ static void ipvlan_uninit(struct net_device *dev) static int ipvlan_open(struct net_device *dev) { struct ipvl_dev *ipvlan = netdev_priv(dev); - struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_addr *addr; if (ipvlan->port->mode == IPVLAN_MODE_L3 || @@ -248,7 +247,7 @@ static int ipvlan_open(struct net_device *dev) list_for_each_entry(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_add(ipvlan, addr); - return dev_uc_add(phy_dev, phy_dev->dev_addr); + return 0; } static int ipvlan_stop(struct net_device *dev) @@ -260,8 +259,6 @@ static int ipvlan_stop(struct net_device *dev) dev_uc_unsync(phy_dev, dev); dev_mc_unsync(phy_dev, dev); - dev_uc_del(phy_dev, phy_dev->dev_addr); - list_for_each_entry(addr, &ipvlan->addrs, anode) ipvlan_ht_addr_del(addr); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index aa204c98af79..c2c3ce5653db 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2798,9 +2798,6 @@ static int macsec_dev_open(struct net_device *dev) struct net_device *real_dev = macsec->real_dev; int err; - if (!(real_dev->flags & IFF_UP)) - return -ENETDOWN; - err = dev_uc_add(real_dev, dev->dev_addr); if (err < 0) return err; @@ -2874,6 +2871,11 @@ static void macsec_dev_set_rx_mode(struct net_device *dev) dev_uc_sync(real_dev, dev); } +static sci_t dev_to_sci(struct net_device *dev, __be16 port) +{ + return make_sci(dev->dev_addr, port); +} + static int macsec_set_mac_address(struct net_device *dev, void *p) { struct macsec_dev *macsec = macsec_priv(dev); @@ -2895,6 +2897,7 @@ static int macsec_set_mac_address(struct net_device *dev, void *p) out: ether_addr_copy(dev->dev_addr, addr->sa_data); + macsec->secy.sci = dev_to_sci(dev, MACSEC_PORT_ES); return 0; } @@ -2977,6 +2980,7 @@ static const struct device_type macsec_type = { static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { [IFLA_MACSEC_SCI] = { .type = NLA_U64 }, + [IFLA_MACSEC_PORT] = { .type = NLA_U16 }, [IFLA_MACSEC_ICV_LEN] = { .type = NLA_U8 }, [IFLA_MACSEC_CIPHER_SUITE] = { .type = NLA_U64 }, [IFLA_MACSEC_WINDOW] = { .type = NLA_U32 }, @@ -2993,12 +2997,10 @@ static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { static void macsec_free_netdev(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); - struct net_device *real_dev = macsec->real_dev; free_percpu(macsec->stats); free_percpu(macsec->secy.tx_sc.stats); - dev_put(real_dev); } static void macsec_setup(struct net_device *dev) @@ -3164,11 +3166,6 @@ static bool sci_exists(struct net_device *dev, sci_t sci) return false; } -static sci_t dev_to_sci(struct net_device *dev, __be16 port) -{ - return make_sci(dev->dev_addr, port); -} - static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) { struct macsec_dev *macsec = macsec_priv(dev); @@ -3239,8 +3236,6 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (err < 0) return err; - dev_hold(real_dev); - macsec->nest_level = dev_get_nest_level(real_dev) + 1; netdev_lockdep_set_classes(dev); lockdep_set_class_and_subclass(&dev->addr_list_lock, @@ -3277,6 +3272,9 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (err < 0) goto del_dev; + netif_stacked_transfer_operstate(real_dev, dev); + linkwatch_fire_event(dev); + macsec_generation++; return 0; @@ -3448,6 +3446,20 @@ static int macsec_notify(struct notifier_block *this, unsigned long event, return NOTIFY_DONE; switch (event) { + case NETDEV_DOWN: + case NETDEV_UP: + case NETDEV_CHANGE: { + struct macsec_dev *m, *n; + struct macsec_rxh_data *rxd; + + rxd = macsec_data_rtnl(real_dev); + list_for_each_entry_safe(m, n, &rxd->secys, secys) { + struct net_device *dev = m->secy.netdev; + + netif_stacked_transfer_operstate(real_dev, dev); + } + break; + } case NETDEV_UNREGISTER: { struct macsec_dev *m, *n; struct macsec_rxh_data *rxd; diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 963a02c988e9..6989e84670e5 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -338,6 +338,8 @@ static void macvlan_process_broadcast(struct work_struct *w) if (src) dev_put(src->dev); kfree_skb(skb); + + cond_resched(); } } @@ -363,10 +365,11 @@ static void macvlan_broadcast_enqueue(struct macvlan_port *port, } spin_unlock(&port->bc_queue.lock); + schedule_work(&port->bc_work); + if (err) goto free_nskb; - schedule_work(&port->bc_work); return; free_nskb: @@ -514,10 +517,11 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) const struct macvlan_dev *dest; if (vlan->mode == MACVLAN_MODE_BRIDGE) { - const struct ethhdr *eth = (void *)skb->data; + const struct ethhdr *eth = skb_eth_hdr(skb); /* send to other bridge ports directly */ if (is_multicast_ether_addr(eth->h_dest)) { + skb_reset_mac_header(skb); macvlan_broadcast(skb, port, dev, MACVLAN_MODE_BRIDGE); goto xmit_world; } diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 0250aa9ae2cb..97bf49ad81a6 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -236,7 +236,7 @@ static void ntb_netdev_tx_timer(unsigned long data) struct ntb_netdev *dev = netdev_priv(ndev); if (ntb_transport_tx_free_entry(dev->qp) < tx_stop) { - mod_timer(&dev->tx_timer, jiffies + msecs_to_jiffies(tx_time)); + mod_timer(&dev->tx_timer, jiffies + usecs_to_jiffies(tx_time)); } else { /* Make sure anybody stopping the queue after this sees the new * value of ntb_transport_tx_free_entry() diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c index 12b09e6e03ba..0cbcced0870e 100644 --- a/drivers/net/phy/dp83867.c +++ b/drivers/net/phy/dp83867.c @@ -33,10 +33,18 @@ /* Extended Registers */ #define DP83867_CFG4 0x0031 +#define DP83867_CFG4_SGMII_ANEG_MASK (BIT(5) | BIT(6)) +#define DP83867_CFG4_SGMII_ANEG_TIMER_11MS (3 << 5) +#define DP83867_CFG4_SGMII_ANEG_TIMER_800US (2 << 5) +#define DP83867_CFG4_SGMII_ANEG_TIMER_2US (1 << 5) +#define DP83867_CFG4_SGMII_ANEG_TIMER_16MS (0 << 5) + #define DP83867_RGMIICTL 0x0032 #define DP83867_STRAP_STS1 0x006E #define DP83867_RGMIIDCTL 0x0086 #define DP83867_IO_MUX_CFG 0x0170 +#define DP83867_10M_SGMII_CFG 0x016F +#define DP83867_10M_SGMII_RATE_ADAPT_MASK BIT(7) #define DP83867_SW_RESET BIT(15) #define DP83867_SW_RESTART BIT(14) @@ -76,6 +84,10 @@ #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX 0x0 #define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN 0x1f +/* CFG3 bits */ +#define DP83867_CFG3_INT_OE BIT(7) +#define DP83867_CFG3_ROBUST_AUTO_MDIX BIT(9) + /* CFG4 bits */ #define DP83867_CFG4_PORT_MIRROR_EN BIT(0) @@ -283,13 +295,43 @@ static int dp83867_config_init(struct phy_device *phydev) } } - /* Enable Interrupt output INT_OE in CFG3 register */ - if (phy_interrupt_is_valid(phydev)) { - val = phy_read(phydev, DP83867_CFG3); - val |= BIT(7); - phy_write(phydev, DP83867_CFG3, val); + if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { + /* For support SPEED_10 in SGMII mode + * DP83867_10M_SGMII_RATE_ADAPT bit + * has to be cleared by software. That + * does not affect SPEED_100 and + * SPEED_1000. + */ + val = phy_read_mmd(phydev, DP83867_DEVADDR, + DP83867_10M_SGMII_CFG); + val &= ~DP83867_10M_SGMII_RATE_ADAPT_MASK; + ret = phy_write_mmd(phydev, DP83867_DEVADDR, + DP83867_10M_SGMII_CFG, val); + + if (ret) + return ret; + + /* After reset SGMII Autoneg timer is set to 2us (bits 6 and 5 + * are 01). That is not enough to finalize autoneg on some + * devices. Increase this timer duration to maximum 16ms. + */ + val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4); + val &= ~DP83867_CFG4_SGMII_ANEG_MASK; + val |= DP83867_CFG4_SGMII_ANEG_TIMER_16MS; + ret = phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4, val); + + if (ret) + return ret; } + val = phy_read(phydev, DP83867_CFG3); + /* Enable Interrupt output INT_OE in CFG3 register */ + if (phy_interrupt_is_valid(phydev)) + val |= DP83867_CFG3_INT_OE; + + val |= DP83867_CFG3_ROBUST_AUTO_MDIX; + phy_write(phydev, DP83867_CFG3, val); + if (dp83867->port_mirroring != DP83867_PORT_MIRROING_KEEP) dp83867_config_port_mirroring(phydev); diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index eb5167210681..3ab2eb677a59 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -67,11 +67,11 @@ static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num) do { s = read_seqcount_begin(&fp->seqcount); /* Issue callback if user registered it. */ - if (fp->link_update) { + if (fp->link_update) fp->link_update(fp->phydev->attached_dev, &fp->status); - fixed_phy_update(fp); - } + /* Check the GPIO for change in status */ + fixed_phy_update(fp); state = fp->status; } while (read_seqcount_retry(&fp->seqcount, s)); diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c index 46fe1ae919a3..51ce3ea17fb3 100644 --- a/drivers/net/phy/mdio-bcm-iproc.c +++ b/drivers/net/phy/mdio-bcm-iproc.c @@ -188,6 +188,23 @@ static int iproc_mdio_remove(struct platform_device *pdev) return 0; } +#ifdef CONFIG_PM_SLEEP +int iproc_mdio_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct iproc_mdio_priv *priv = platform_get_drvdata(pdev); + + /* restore the mii clock configuration */ + iproc_mdio_config_clk(priv->base); + + return 0; +} + +static const struct dev_pm_ops iproc_mdio_pm_ops = { + .resume = iproc_mdio_resume +}; +#endif /* CONFIG_PM_SLEEP */ + static const struct of_device_id iproc_mdio_of_match[] = { { .compatible = "brcm,iproc-mdio", }, { /* sentinel */ }, @@ -198,6 +215,9 @@ static struct platform_driver iproc_mdio_driver = { .driver = { .name = "iproc-mdio", .of_match_table = iproc_mdio_of_match, +#ifdef CONFIG_PM_SLEEP + .pm = &iproc_mdio_pm_ops, +#endif }, .probe = iproc_mdio_probe, .remove = iproc_mdio_remove, diff --git a/drivers/net/phy/mdio-bcm-unimac.c b/drivers/net/phy/mdio-bcm-unimac.c index 08e0647b85e2..52703bbd4d66 100644 --- a/drivers/net/phy/mdio-bcm-unimac.c +++ b/drivers/net/phy/mdio-bcm-unimac.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -45,6 +46,8 @@ struct unimac_mdio_priv { void __iomem *base; int (*wait_func) (void *wait_func_data); void *wait_func_data; + struct clk *clk; + u32 clk_freq; }; static inline u32 unimac_mdio_readl(struct unimac_mdio_priv *priv, u32 offset) @@ -189,6 +192,35 @@ static int unimac_mdio_reset(struct mii_bus *bus) return 0; } +static void unimac_mdio_clk_set(struct unimac_mdio_priv *priv) +{ + unsigned long rate; + u32 reg, div; + + /* Keep the hardware default values */ + if (!priv->clk_freq) + return; + + if (!priv->clk) + rate = 250000000; + else + rate = clk_get_rate(priv->clk); + + div = (rate / (2 * priv->clk_freq)) - 1; + if (div & ~MDIO_CLK_DIV_MASK) { + pr_warn("Incorrect MDIO clock frequency, ignoring\n"); + return; + } + + /* The MDIO clock is the reference clock (typicaly 250Mhz) divided by + * 2 x (MDIO_CLK_DIV + 1) + */ + reg = unimac_mdio_readl(priv, MDIO_CFG); + reg &= ~(MDIO_CLK_DIV_MASK << MDIO_CLK_DIV_SHIFT); + reg |= div << MDIO_CLK_DIV_SHIFT; + unimac_mdio_writel(priv, reg, MDIO_CFG); +} + static int unimac_mdio_probe(struct platform_device *pdev) { struct unimac_mdio_pdata *pdata = pdev->dev.platform_data; @@ -215,9 +247,26 @@ static int unimac_mdio_probe(struct platform_device *pdev) return -ENOMEM; } + priv->clk = devm_clk_get(&pdev->dev, NULL); + if (PTR_ERR(priv->clk) == -EPROBE_DEFER) + return PTR_ERR(priv->clk); + else + priv->clk = NULL; + + ret = clk_prepare_enable(priv->clk); + if (ret) + return ret; + + if (of_property_read_u32(np, "clock-frequency", &priv->clk_freq)) + priv->clk_freq = 0; + + unimac_mdio_clk_set(priv); + priv->mii_bus = mdiobus_alloc(); - if (!priv->mii_bus) - return -ENOMEM; + if (!priv->mii_bus) { + ret = -ENOMEM; + goto out_clk_disable; + } bus = priv->mii_bus; bus->priv = priv; @@ -251,6 +300,8 @@ static int unimac_mdio_probe(struct platform_device *pdev) out_mdio_free: mdiobus_free(bus); +out_clk_disable: + clk_disable_unprepare(priv->clk); return ret; } @@ -260,10 +311,37 @@ static int unimac_mdio_remove(struct platform_device *pdev) mdiobus_unregister(priv->mii_bus); mdiobus_free(priv->mii_bus); + clk_disable_unprepare(priv->clk); return 0; } +static int __maybe_unused unimac_mdio_suspend(struct device *d) +{ + struct unimac_mdio_priv *priv = dev_get_drvdata(d); + + clk_disable_unprepare(priv->clk); + + return 0; +} + +static int __maybe_unused unimac_mdio_resume(struct device *d) +{ + struct unimac_mdio_priv *priv = dev_get_drvdata(d); + int ret; + + ret = clk_prepare_enable(priv->clk); + if (ret) + return ret; + + unimac_mdio_clk_set(priv); + + return 0; +} + +static SIMPLE_DEV_PM_OPS(unimac_mdio_pm_ops, + unimac_mdio_suspend, unimac_mdio_resume); + static const struct of_device_id unimac_mdio_ids[] = { { .compatible = "brcm,genet-mdio-v5", }, { .compatible = "brcm,genet-mdio-v4", }, @@ -279,6 +357,7 @@ static struct platform_driver unimac_mdio_driver = { .driver = { .name = UNIMAC_MDIO_DRV_NAME, .of_match_table = unimac_mdio_ids, + .pm = &unimac_mdio_pm_ops, }, .probe = unimac_mdio_probe, .remove = unimac_mdio_remove, diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c index 650c2667d523..fe81741ab66a 100644 --- a/drivers/net/phy/mscc.c +++ b/drivers/net/phy/mscc.c @@ -111,8 +111,8 @@ struct vsc8531_private { #ifdef CONFIG_OF_MDIO struct vsc8531_edge_rate_table { - u16 vddmac; - u8 slowdown[8]; + u32 vddmac; + u32 slowdown[8]; }; static const struct vsc8531_edge_rate_table edge_table[] = { @@ -375,8 +375,7 @@ out_unlock: #ifdef CONFIG_OF_MDIO static int vsc85xx_edge_rate_magic_get(struct phy_device *phydev) { - u8 sd; - u16 vdd; + u32 vdd, sd; int rc, i, j; struct device *dev = &phydev->mdio.dev; struct device_node *of_node = dev->of_node; @@ -385,11 +384,11 @@ static int vsc85xx_edge_rate_magic_get(struct phy_device *phydev) if (!of_node) return -ENODEV; - rc = of_property_read_u16(of_node, "vsc8531,vddmac", &vdd); + rc = of_property_read_u32(of_node, "vsc8531,vddmac", &vdd); if (rc != 0) vdd = MSCC_VDDMAC_3300; - rc = of_property_read_u8(of_node, "vsc8531,edge-slowdown", &sd); + rc = of_property_read_u32(of_node, "vsc8531,edge-slowdown", &sd); if (rc != 0) sd = 0; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index ed7e3c70b511..27f1f0b5b8f6 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -91,7 +91,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) * MDIO bus driver and clock gated at this point. */ if (!netdev) - return !phydev->suspended; + goto out; /* Don't suspend PHY if the attached netdev parent may wakeup. * The parent may point to a PCI device, as in tg3 driver. @@ -106,7 +106,8 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (device_may_wakeup(&netdev->dev)) return false; - return true; +out: + return !phydev->suspended; } static int mdio_bus_phy_suspend(struct device *dev) @@ -124,6 +125,8 @@ static int mdio_bus_phy_suspend(struct device *dev) if (!mdio_bus_phy_may_suspend(phydev)) return 0; + phydev->suspended_by_mdio_bus = true; + return phy_suspend(phydev); } @@ -132,9 +135,11 @@ static int mdio_bus_phy_resume(struct device *dev) struct phy_device *phydev = to_phy_device(dev); int ret; - if (!mdio_bus_phy_may_suspend(phydev)) + if (!phydev->suspended_by_mdio_bus) goto no_resume; + phydev->suspended_by_mdio_bus = false; + ret = phy_resume(phydev); if (ret < 0) return ret; @@ -367,8 +372,8 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, mdiodev->device_free = phy_mdio_device_free; mdiodev->device_remove = phy_mdio_device_remove; - dev->speed = 0; - dev->duplex = -1; + dev->speed = SPEED_UNKNOWN; + dev->duplex = DUPLEX_UNKNOWN; dev->pause = 0; dev->asym_pause = 0; dev->link = 1; diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index b408512f9dba..f347e5ac6677 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -878,15 +878,15 @@ ppp_async_input(struct asyncppp *ap, const unsigned char *buf, skb = dev_alloc_skb(ap->mru + PPP_HDRLEN + 2); if (!skb) goto nomem; - ap->rpkt = skb; - } - if (skb->len == 0) { - /* Try to get the payload 4-byte aligned. - * This should match the - * PPP_ALLSTATIONS/PPP_UI/compressed tests in - * process_input_packet, but we do not have - * enough chars here to test buf[1] and buf[2]. - */ + ap->rpkt = skb; + } + if (skb->len == 0) { + /* Try to get the payload 4-byte aligned. + * This should match the + * PPP_ALLSTATIONS/PPP_UI/compressed tests in + * process_input_packet, but we do not have + * enough chars here to test buf[1] and buf[2]. + */ if (buf[0] != PPP_ALLSTATIONS) skb_reserve(skb, 2 + (buf[0] & 1)); } diff --git a/drivers/net/slip/slhc.c b/drivers/net/slip/slhc.c index ea90db3c7705..01334aeac577 100644 --- a/drivers/net/slip/slhc.c +++ b/drivers/net/slip/slhc.c @@ -232,7 +232,7 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, register struct cstate *cs = lcs->next; register unsigned long deltaS, deltaA; register short changes = 0; - int hlen; + int nlen, hlen; unsigned char new_seq[16]; register unsigned char *cp = new_seq; struct iphdr *ip; @@ -248,6 +248,8 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, return isize; ip = (struct iphdr *) icp; + if (ip->version != 4 || ip->ihl < 5) + return isize; /* Bail if this packet isn't TCP, or is an IP fragment */ if (ip->protocol != IPPROTO_TCP || (ntohs(ip->frag_off) & 0x3fff)) { @@ -258,10 +260,14 @@ slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, comp->sls_o_tcp++; return isize; } - /* Extract TCP header */ + nlen = ip->ihl * 4; + if (isize < nlen + sizeof(*th)) + return isize; - th = (struct tcphdr *)(((unsigned char *)ip) + ip->ihl*4); - hlen = ip->ihl*4 + th->doff*4; + th = (struct tcphdr *)(icp + nlen); + if (th->doff < sizeof(struct tcphdr) / 4) + return isize; + hlen = nlen + th->doff * 4; /* Bail if the TCP packet isn't `compressible' (i.e., ACK isn't set or * some other control bit is set). Also uncompressible if diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 436dd78c396a..d7882b548b79 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -452,9 +452,16 @@ static void slip_transmit(struct work_struct *work) */ static void slip_write_wakeup(struct tty_struct *tty) { - struct slip *sl = tty->disc_data; + struct slip *sl; + + rcu_read_lock(); + sl = rcu_dereference(tty->disc_data); + if (!sl) + goto out; schedule_work(&sl->tx_work); +out: + rcu_read_unlock(); } static void sl_tx_timeout(struct net_device *dev) @@ -859,6 +866,11 @@ err_free_chan: sl->tty = NULL; tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); + sl_free_netdev(sl->dev); + /* do not call free_netdev before rtnl_unlock */ + rtnl_unlock(); + free_netdev(sl->dev); + return err; err_exit: rtnl_unlock(); @@ -884,10 +896,11 @@ static void slip_close(struct tty_struct *tty) return; spin_lock_bh(&sl->lock); - tty->disc_data = NULL; + rcu_assign_pointer(tty->disc_data, NULL); sl->tty = NULL; spin_unlock_bh(&sl->lock); + synchronize_rcu(); flush_work(&sl->tx_work); /* VSV = very important to remove timers */ diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index f1aabf8a16c2..3dba58fa3433 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -2207,6 +2207,8 @@ team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = { [TEAM_ATTR_OPTION_CHANGED] = { .type = NLA_FLAG }, [TEAM_ATTR_OPTION_TYPE] = { .type = NLA_U8 }, [TEAM_ATTR_OPTION_DATA] = { .type = NLA_BINARY }, + [TEAM_ATTR_OPTION_PORT_IFINDEX] = { .type = NLA_U32 }, + [TEAM_ATTR_OPTION_ARRAY_INDEX] = { .type = NLA_U32 }, }; static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 3086211829a7..ba34f61d70de 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1134,6 +1134,13 @@ static void tun_net_init(struct net_device *dev) dev->max_mtu = MAX_MTU - dev->hard_header_len; } +static bool tun_sock_writeable(struct tun_struct *tun, struct tun_file *tfile) +{ + struct sock *sk = tfile->socket.sk; + + return (tun->dev->flags & IFF_UP) && sock_writeable(sk); +} + /* Character device part */ /* Poll */ @@ -1156,10 +1163,14 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) if (!skb_array_empty(&tfile->tx_array)) mask |= POLLIN | POLLRDNORM; - if (tun->dev->flags & IFF_UP && - (sock_writeable(sk) || - (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && - sock_writeable(sk)))) + /* Make sure SOCKWQ_ASYNC_NOSPACE is set if not writable to + * guarantee EPOLLOUT to be raised by either here or + * tun_sock_write_space(). Then process could get notification + * after it writes to a down device and meets -EIO. + */ + if (tun_sock_writeable(tun, tfile) || + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags) && + tun_sock_writeable(tun, tfile))) mask |= POLLOUT | POLLWRNORM; if (tun->dev->reg_state != NETREG_REGISTERED) diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index 501576f53854..914cac55a7ae 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -208,7 +208,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf) /* Get the MAC address */ ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0); - if (ret < 0) { + if (ret < ETH_ALEN) { netdev_err(dev->net, "Failed to read MAC address: %d\n", ret); goto free; } diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index ffd15f5f836f..6c7a169d906a 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -800,6 +800,13 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* ThinkPad USB-C Dock Gen 2 (based on Realtek RTL8153) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0xa387, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* NVIDIA Tegra USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */ { USB_DEVICE_AND_INTERFACE_INFO(NVIDIA_VENDOR_ID, 0x09ff, USB_CLASS_COMM, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index ab28487e6048..cb4c9d419bd3 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -578,8 +578,8 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size) /* read current mtu value from device */ err = usbnet_read_cmd(dev, USB_CDC_GET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_IN | USB_RECIP_INTERFACE, - 0, iface_no, &max_datagram_size, 2); - if (err < 0) { + 0, iface_no, &max_datagram_size, sizeof(max_datagram_size)); + if (err != sizeof(max_datagram_size)) { dev_dbg(&dev->intf->dev, "GET_MAX_DATAGRAM_SIZE failed\n"); goto out; } @@ -590,7 +590,7 @@ static void cdc_ncm_set_dgram_size(struct usbnet *dev, int new_size) max_datagram_size = cpu_to_le16(ctx->max_datagram_size); err = usbnet_write_cmd(dev, USB_CDC_SET_MAX_DATAGRAM_SIZE, USB_TYPE_CLASS | USB_DIR_OUT | USB_RECIP_INTERFACE, - 0, iface_no, &max_datagram_size, 2); + 0, iface_no, &max_datagram_size, sizeof(max_datagram_size)); if (err < 0) dev_dbg(&dev->intf->dev, "SET_MAX_DATAGRAM_SIZE failed\n"); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 24b994c68bcc..b179a96ea08c 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -497,7 +498,7 @@ static int lan78xx_read_stats(struct lan78xx_net *dev, } } else { netdev_warn(dev->net, - "Failed to read stat ret = 0x%x", ret); + "Failed to read stat ret = %d", ret); } kfree(stats); @@ -1765,6 +1766,7 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev) dev->mdiobus->read = lan78xx_mdiobus_read; dev->mdiobus->write = lan78xx_mdiobus_write; dev->mdiobus->name = "lan78xx-mdiobus"; + dev->mdiobus->parent = &dev->udev->dev; snprintf(dev->mdiobus->id, MII_BUS_ID_SIZE, "usb-%03d:%03d", dev->udev->bus->busnum, dev->udev->devnum); @@ -2603,11 +2605,6 @@ static int lan78xx_stop(struct net_device *net) return 0; } -static int lan78xx_linearize(struct sk_buff *skb) -{ - return skb_linearize(skb); -} - static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev, struct sk_buff *skb, gfp_t flags) { @@ -2618,8 +2615,10 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev, return NULL; } - if (lan78xx_linearize(skb) < 0) + if (skb_linearize(skb)) { + dev_kfree_skb_any(skb); return NULL; + } tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN_MASK_) | TX_CMD_A_FCS_; @@ -2818,6 +2817,11 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf) int i; ret = lan78xx_get_endpoints(dev, intf); + if (ret) { + netdev_warn(dev->net, "lan78xx_get_endpoints failed: %d\n", + ret); + return ret; + } dev->data[0] = (unsigned long)kzalloc(sizeof(*pdata), GFP_KERNEL); @@ -3522,6 +3526,19 @@ static void lan78xx_tx_timeout(struct net_device *net) tasklet_schedule(&dev->bh); } +static netdev_features_t lan78xx_features_check(struct sk_buff *skb, + struct net_device *netdev, + netdev_features_t features) +{ + if (skb->len + TX_OVERHEAD > MAX_SINGLE_PACKET_SIZE) + features &= ~NETIF_F_GSO_MASK; + + features = vlan_features_check(skb, features); + features = vxlan_features_check(skb, features); + + return features; +} + static const struct net_device_ops lan78xx_netdev_ops = { .ndo_open = lan78xx_open, .ndo_stop = lan78xx_stop, @@ -3535,6 +3552,7 @@ static const struct net_device_ops lan78xx_netdev_ops = { .ndo_set_features = lan78xx_set_features, .ndo_vlan_rx_add_vid = lan78xx_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = lan78xx_vlan_rx_kill_vid, + .ndo_features_check = lan78xx_features_check, }; static void lan78xx_stat_monitor(unsigned long param) @@ -3609,6 +3627,7 @@ static int lan78xx_probe(struct usb_interface *intf, /* MTU range: 68 - 9000 */ netdev->max_mtu = MAX_SINGLE_PACKET_SIZE; + netif_set_gso_max_size(netdev, MAX_SINGLE_PACKET_SIZE - MAX_HEADER); dev->ep_blkin = (intf->cur_altsetting)->endpoint + 0; dev->ep_blkout = (intf->cur_altsetting)->endpoint + 1; @@ -3642,10 +3661,14 @@ static int lan78xx_probe(struct usb_interface *intf, /* driver requires remote-wakeup capability during autosuspend. */ intf->needs_remote_wakeup = 1; + ret = lan78xx_phy_init(dev); + if (ret < 0) + goto out4; + ret = register_netdev(netdev); if (ret != 0) { netif_err(dev, probe, netdev, "couldn't register the device\n"); - goto out4; + goto out5; } usb_set_intfdata(intf, dev); @@ -3658,14 +3681,10 @@ static int lan78xx_probe(struct usb_interface *intf, pm_runtime_set_autosuspend_delay(&udev->dev, DEFAULT_AUTOSUSPEND_DELAY); - ret = lan78xx_phy_init(dev); - if (ret < 0) - goto out5; - return 0; out5: - unregister_netdev(netdev); + phy_disconnect(netdev->phydev); out4: usb_free_urb(dev->urb_intr); out3: diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index e406a05e79dc..a8d5561afc7d 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -63,7 +63,6 @@ enum qmi_wwan_flags { enum qmi_wwan_quirks { QMI_WWAN_QUIRK_DTR = 1 << 0, /* needs "set DTR" request */ - QMI_WWAN_QUIRK_QUECTEL_DYNCFG = 1 << 1, /* check num. endpoints */ }; struct qmimux_hdr { @@ -275,6 +274,9 @@ static void qmi_wwan_netdev_setup(struct net_device *net) netdev_dbg(net, "mode: raw IP\n"); } else if (!net->header_ops) { /* don't bother if already set */ ether_setup(net); + /* Restoring min/max mtu values set originally by usbnet */ + net->min_mtu = 0; + net->max_mtu = ETH_MAX_MTU; clear_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: Ethernet\n"); } @@ -853,16 +855,6 @@ static const struct driver_info qmi_wwan_info_quirk_dtr = { .data = QMI_WWAN_QUIRK_DTR, }; -static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = { - .description = "WWAN/QMI device", - .flags = FLAG_WWAN | FLAG_SEND_ZLP, - .bind = qmi_wwan_bind, - .unbind = qmi_wwan_unbind, - .manage_power = qmi_wwan_manage_power, - .rx_fixup = qmi_wwan_rx_fixup, - .data = QMI_WWAN_QUIRK_DTR | QMI_WWAN_QUIRK_QUECTEL_DYNCFG, -}; - #define HUAWEI_VENDOR_ID 0x12D1 /* map QMI/wwan function by a fixed interface number */ @@ -883,14 +875,18 @@ static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = { #define QMI_GOBI_DEVICE(vend, prod) \ QMI_FIXED_INTF(vend, prod, 0) -/* Quectel does not use fixed interface numbers on at least some of their - * devices. We need to check the number of endpoints to ensure that we bind to - * the correct interface. +/* Many devices have QMI and DIAG functions which are distinguishable + * from other vendor specific functions by class, subclass and + * protocol all being 0xff. The DIAG function has exactly 2 endpoints + * and is silently rejected when probed. + * + * This makes it possible to match dynamically numbered QMI functions + * as seen on e.g. many Quectel modems. */ -#define QMI_QUIRK_QUECTEL_DYNCFG(vend, prod) \ +#define QMI_MATCH_FF_FF_FF(vend, prod) \ USB_DEVICE_AND_INTERFACE_INFO(vend, prod, USB_CLASS_VENDOR_SPEC, \ USB_SUBCLASS_VENDOR_SPEC, 0xff), \ - .driver_info = (unsigned long)&qmi_wwan_info_quirk_quectel_dyncfg + .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr static const struct usb_device_id products[] = { /* 1. CDC ECM like devices match on the control interface */ @@ -996,9 +992,10 @@ static const struct usb_device_id products[] = { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7), .driver_info = (unsigned long)&qmi_wwan_info, }, - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ - {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */ + {QMI_MATCH_FF_FF_FF(0x2c7c, 0x0800)}, /* Quectel RM500Q-GL */ /* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ @@ -1286,6 +1283,8 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81b6, 8)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81b6, 10)}, /* Dell Wireless 5811e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ + {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e preproduction config */ + {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ @@ -1294,6 +1293,8 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ {QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */ + {QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */ + {QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ @@ -1374,7 +1375,6 @@ static int qmi_wwan_probe(struct usb_interface *intf, { struct usb_device_id *id = (struct usb_device_id *)prod; struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc; - const struct driver_info *info; /* Workaround to enable dynamic IDs. This disables usbnet * blacklisting functionality. Which, if required, can be @@ -1410,12 +1410,8 @@ static int qmi_wwan_probe(struct usb_interface *intf, * different. Ignore the current interface if the number of endpoints * equals the number for the diag interface (two). */ - info = (void *)id->driver_info; - - if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) { - if (desc->bNumEndpoints == 2) - return -ENODEV; - } + if (desc->bNumEndpoints == 2) + return -ENODEV; return usbnet_probe(intf, id); } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 455eec3c4694..cadf5ded45a9 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -2696,6 +2696,8 @@ static u16 r8153_phy_status(struct r8152 *tp, u16 desired) } msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } return data; @@ -4055,7 +4057,10 @@ static void r8153_init(struct r8152 *tp) if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; + msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } data = r8153_phy_status(tp, 0); @@ -4170,7 +4175,10 @@ static void r8153b_init(struct r8152 *tp) if (ocp_read_word(tp, MCU_TYPE_PLA, PLA_BOOT_CTRL) & AUTOLOAD_DONE) break; + msleep(20); + if (test_bit(RTL8152_UNPLUG, &tp->flags)) + break; } data = r8153_phy_status(tp, 0); @@ -4465,10 +4473,9 @@ static int rtl8152_reset_resume(struct usb_interface *intf) struct r8152 *tp = usb_get_intfdata(intf); clear_bit(SELECTIVE_SUSPEND, &tp->flags); - mutex_lock(&tp->control); tp->rtl_ops.init(tp); queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); - mutex_unlock(&tp->control); + set_ethernet_addr(tp); return rtl8152_resume(intf); } @@ -5159,6 +5166,9 @@ static int rtl8152_probe(struct usb_interface *intf, return -ENODEV; } + if (intf->cur_altsetting->desc.bNumEndpoints < 3) + return -ENODEV; + usb_reset_device(udev); netdev = alloc_etherdev(sizeof(struct r8152)); if (!netdev) { @@ -5242,6 +5252,11 @@ static int rtl8152_probe(struct usb_interface *intf, intf->needs_remote_wakeup = 1; + if (!rtl_can_wakeup(tp)) + __rtl_set_wol(tp, 0); + else + tp->saved_wolopts = __rtl_get_wol(tp); + tp->rtl_ops.init(tp); queue_delayed_work(system_long_wq, &tp->hw_phy_work, 0); set_ethernet_addr(tp); @@ -5255,10 +5270,6 @@ static int rtl8152_probe(struct usb_interface *intf, goto out1; } - if (!rtl_can_wakeup(tp)) - __rtl_set_wol(tp, 0); - - tp->saved_wolopts = __rtl_get_wol(tp); if (tp->saved_wolopts) device_set_wakeup_enable(&udev->dev, true); else @@ -5325,6 +5336,7 @@ static const struct usb_device_id rtl8152_table[] = { {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7205)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x720c)}, {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0x7214)}, + {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO, 0xa387)}, {REALTEK_USB_DEVICE(VENDOR_ID_LINKSYS, 0x0041)}, {REALTEK_USB_DEVICE(VENDOR_ID_NVIDIA, 0x09ff)}, {REALTEK_USB_DEVICE(VENDOR_ID_TPLINK, 0x0601)}, diff --git a/drivers/net/usb/sr9800.c b/drivers/net/usb/sr9800.c index 35f39f23d881..8f8c9ede88c2 100644 --- a/drivers/net/usb/sr9800.c +++ b/drivers/net/usb/sr9800.c @@ -336,7 +336,7 @@ static void sr_set_multicast(struct net_device *net) static int sr_mdio_read(struct net_device *net, int phy_id, int loc) { struct usbnet *dev = netdev_priv(net); - __le16 res; + __le16 res = 0; mutex_lock(&dev->phy_mutex); sr_set_sw_mii(dev); diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 03e4fcdfeab7..e0cea5c05f0e 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -996,24 +996,23 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { int orig_iif = skb->skb_iif; - bool need_strict; + bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); + bool is_ndisc = ipv6_ndisc_frame(skb); - /* loopback traffic; do not push through packet taps again. - * Reset pkt_type for upper layers to process skb + /* loopback, multicast & non-ND link-local traffic; do not push through + * packet taps again. Reset pkt_type for upper layers to process skb */ - if (skb->pkt_type == PACKET_LOOPBACK) { + if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IP6CB(skb)->flags |= IP6SKB_L3SLAVE; - skb->pkt_type = PACKET_HOST; + if (skb->pkt_type == PACKET_LOOPBACK) + skb->pkt_type = PACKET_HOST; goto out; } - /* if packet is NDISC or addressed to multicast or link-local - * then keep the ingress interface - */ - need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); - if (!ipv6_ndisc_frame(skb) && !need_strict) { + /* if packet is NDISC then keep the ingress interface */ + if (!is_ndisc) { vrf_rx_stats(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 291a0a766018..3275e61567d6 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2170,8 +2170,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, vni = tunnel_id_to_key32(info->key.tun_id); ifindex = 0; dst_cache = &info->dst_cache; - if (info->options_len) + if (info->options_len) { + if (info->options_len < sizeof(*md)) + goto drop; md = ip_tunnel_info_opts(info); + } ttl = info->key.ttl; tos = info->key.tos; label = info->key.label; @@ -2214,7 +2217,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, skb_dst_update_pmtu(skb, mtu); } - tos = ip_tunnel_ecn_encap(tos, old_iph, skb); + tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr), vni, md, flags, udp_sum); @@ -2255,7 +2258,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, skb_dst_update_pmtu(skb, mtu); } - tos = ip_tunnel_ecn_encap(tos, old_iph, skb); + tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb); ttl = ttl ? : ip6_dst_hoplimit(ndst); skb_scrub_packet(skb, xnet); err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr), @@ -3215,6 +3218,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb *f = NULL; + bool unregister = false; int err; err = vxlan_dev_configure(net, dev, conf, false, extack); @@ -3240,12 +3244,11 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, err = register_netdevice(dev); if (err) goto errout; + unregister = true; err = rtnl_configure_link(dev, NULL); - if (err) { - unregister_netdevice(dev); + if (err) goto errout; - } /* notify default fdb entry */ if (f) @@ -3253,9 +3256,16 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev, list_add(&vxlan->next, &vn->vxlan_list); return 0; + errout: + /* unregister_netdevice() destroys the default FDB entry with deletion + * notification. But the addition notification was not sent yet, so + * destroy the entry by hand here. + */ if (f) vxlan_fdb_destroy(vxlan, f, false); + if (unregister) + unregister_netdevice(dev); return err; } @@ -3485,7 +3495,6 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], struct vxlan_rdst *dst = &vxlan->default_dst; struct vxlan_rdst old_dst; struct vxlan_config conf; - struct vxlan_fdb *f = NULL; int err; err = vxlan_nl2conf(tb, data, @@ -3511,19 +3520,19 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[], old_dst.remote_ifindex, 0); if (!vxlan_addr_any(&dst->remote_ip)) { - err = vxlan_fdb_create(vxlan, all_zeros_mac, + err = vxlan_fdb_update(vxlan, all_zeros_mac, &dst->remote_ip, NUD_REACHABLE | NUD_PERMANENT, + NLM_F_APPEND | NLM_F_CREATE, vxlan->cfg.dst_port, dst->remote_vni, dst->remote_vni, dst->remote_ifindex, - NTF_SELF, &f); + NTF_SELF); if (err) { spin_unlock_bh(&vxlan->hash_lock); return err; } - vxlan_fdb_notify(vxlan, f, first_remote_rtnl(f), RTM_NEWNEIGH); } spin_unlock_bh(&vxlan->hash_lock); } diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 18b648648adb..6a26cef62193 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -76,7 +76,7 @@ static struct ucc_tdm_info utdm_primary_info = { }, }; -static struct ucc_tdm_info utdm_info[MAX_HDLC_NUM]; +static struct ucc_tdm_info utdm_info[UCC_MAX_NUM]; static int uhdlc_init(struct ucc_hdlc_private *priv) { @@ -240,6 +240,11 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) ret = -ENOMEM; goto free_riptr; } + if (riptr != (u16)riptr || tiptr != (u16)tiptr) { + dev_err(priv->dev, "MURAM allocation out of addressable range\n"); + ret = -ENOMEM; + goto free_tiptr; + } /* Set RIPTR, TIPTR */ iowrite16be(riptr, &priv->ucc_pram->riptr); @@ -1114,7 +1119,6 @@ static int ucc_hdlc_probe(struct platform_device *pdev) if (register_hdlc_device(dev)) { ret = -ENOBUFS; pr_err("ucc_hdlc: unable to register hdlc device\n"); - free_netdev(dev); goto free_dev; } diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index 6a505c26a3e7..a269ed63d90f 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -261,7 +261,7 @@ struct port { struct hss_plat_info *plat; buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS]; struct desc *desc_tab; /* coherent */ - u32 desc_tab_phys; + dma_addr_t desc_tab_phys; unsigned int id; unsigned int clock_type, clock_rate, loopback; unsigned int initialized, carrier; @@ -861,7 +861,7 @@ static int hss_hdlc_xmit(struct sk_buff *skb, struct net_device *dev) dev->stats.tx_dropped++; return NETDEV_TX_OK; } - memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4); + memcpy_swab32(mem, (u32 *)((uintptr_t)skb->data & ~3), bytes / 4); dev_kfree_skb(skb); #endif diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c index 236c62538036..1eb329fc7241 100644 --- a/drivers/net/wan/sdla.c +++ b/drivers/net/wan/sdla.c @@ -711,7 +711,7 @@ static netdev_tx_t sdla_transmit(struct sk_buff *skb, spin_lock_irqsave(&sdla_lock, flags); SDLA_WINDOW(dev, addr); - pbuf = (void *)(((int) dev->mem_start) + (addr & SDLA_ADDR_MASK)); + pbuf = (void *)(dev->mem_start + (addr & SDLA_ADDR_MASK)); __sdla_write(dev, pbuf->buf_addr, skb->data, skb->len); SDLA_WINDOW(dev, addr); pbuf->opp_flag = 1; diff --git a/drivers/net/wimax/i2400m/op-rfkill.c b/drivers/net/wimax/i2400m/op-rfkill.c index b0dba35a8ad2..dc6fe93ce71f 100644 --- a/drivers/net/wimax/i2400m/op-rfkill.c +++ b/drivers/net/wimax/i2400m/op-rfkill.c @@ -147,6 +147,7 @@ error_msg_to_dev: error_alloc: d_fnend(4, dev, "(wimax_dev %p state %d) = %d\n", wimax_dev, state, result); + kfree(cmd); return result; } diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 68f0463ed8df..ad4a1efc57c9 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -255,7 +255,8 @@ static int ar5523_cmd(struct ar5523 *ar, u32 code, const void *idata, if (flags & AR5523_CMD_FLAG_MAGIC) hdr->magic = cpu_to_be32(1 << 24); - memcpy(hdr + 1, idata, ilen); + if (ilen) + memcpy(hdr + 1, idata, ilen); cmd->odata = odata; cmd->olen = olen; diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index ff6815e95684..1404ec9f56be 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -663,10 +663,10 @@ static void ath10k_ahb_hif_stop(struct ath10k *ar) ath10k_ahb_irq_disable(ar); synchronize_irq(ar_ahb->irq); - ath10k_pci_flush(ar); - napi_synchronize(&ar->napi); napi_disable(&ar->napi); + + ath10k_pci_flush(ar); } static int ath10k_ahb_hif_power_up(struct ath10k *ar) diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 949ebb3e967b..be9ec265dfe5 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -881,6 +881,7 @@ struct ath10k { struct completion install_key_done; + int last_wmi_vdev_start_status; struct completion vdev_setup_done; struct workqueue_struct *workqueue; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 58a3c42c4aed..ea47ad4b2343 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -17,6 +17,7 @@ #include "mac.h" +#include #include #include #include @@ -954,7 +955,7 @@ static inline int ath10k_vdev_setup_sync(struct ath10k *ar) if (time_left == 0) return -ETIMEDOUT; - return 0; + return ar->last_wmi_vdev_start_status; } static int ath10k_monitor_vdev_start(struct ath10k *ar, int vdev_id) @@ -3626,7 +3627,7 @@ static int ath10k_mac_tx(struct ath10k *ar, struct ieee80211_vif *vif, enum ath10k_hw_txrx_mode txmode, enum ath10k_mac_tx_path txpath, - struct sk_buff *skb) + struct sk_buff *skb, bool noque_offchan) { struct ieee80211_hw *hw = ar->hw; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -3654,10 +3655,10 @@ static int ath10k_mac_tx(struct ath10k *ar, } } - if (info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) { + if (!noque_offchan && info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) { if (!ath10k_mac_tx_frm_has_freq(ar)) { - ath10k_dbg(ar, ATH10K_DBG_MAC, "queued offchannel skb %pK\n", - skb); + ath10k_dbg(ar, ATH10K_DBG_MAC, "mac queued offchannel skb %pK len %d\n", + skb, skb->len); skb_queue_tail(&ar->offchan_tx_queue, skb); ieee80211_queue_work(hw, &ar->offchan_tx_work); @@ -3719,8 +3720,8 @@ void ath10k_offchan_tx_work(struct work_struct *work) mutex_lock(&ar->conf_mutex); - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %pK\n", - skb); + ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %pK len %d\n", + skb, skb->len); hdr = (struct ieee80211_hdr *)skb->data; peer_addr = ieee80211_get_DA(hdr); @@ -3766,7 +3767,7 @@ void ath10k_offchan_tx_work(struct work_struct *work) txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb); txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode); - ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb); + ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb, true); if (ret) { ath10k_warn(ar, "failed to transmit offchannel frame: %d\n", ret); @@ -3776,8 +3777,8 @@ void ath10k_offchan_tx_work(struct work_struct *work) time_left = wait_for_completion_timeout(&ar->offchan_tx_completed, 3 * HZ); if (time_left == 0) - ath10k_warn(ar, "timed out waiting for offchannel skb %pK\n", - skb); + ath10k_warn(ar, "timed out waiting for offchannel skb %pK, len: %d\n", + skb, skb->len); if (!peer && tmp_peer_created) { ret = ath10k_peer_delete(ar, vdev_id, peer_addr); @@ -3956,7 +3957,7 @@ int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw, spin_unlock_bh(&ar->htt.tx_lock); } - ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb); + ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb, false); if (unlikely(ret)) { ath10k_warn(ar, "failed to push frame: %d\n", ret); @@ -4238,7 +4239,7 @@ static void ath10k_mac_op_tx(struct ieee80211_hw *hw, spin_unlock_bh(&ar->htt.tx_lock); } - ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb); + ret = ath10k_mac_tx(ar, vif, txmode, txpath, skb, false); if (ret) { ath10k_warn(ar, "failed to transmit frame: %d\n", ret); if (is_htt) { @@ -8174,6 +8175,7 @@ int ath10k_mac_register(struct ath10k *ar) ar->hw->wiphy->bands[NL80211_BAND_5GHZ] = band; } + wiphy_read_of_freq_limits(ar->hw->wiphy); ath10k_mac_setup_ht_vht_cap(ar); ar->hw->wiphy->interface_modes = diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index d790ea20b95d..f9e409caca68 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1039,10 +1039,9 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, struct ath10k_ce *ce = ath10k_ce_priv(ar); int ret = 0; u32 *buf; - unsigned int completed_nbytes, orig_nbytes, remaining_bytes; + unsigned int completed_nbytes, alloc_nbytes, remaining_bytes; struct ath10k_ce_pipe *ce_diag; void *data_buf = NULL; - u32 ce_data; /* Host buffer address in CE space */ dma_addr_t ce_data_base = 0; int i; @@ -1056,9 +1055,10 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, * 1) 4-byte alignment * 2) Buffer in DMA-able space */ - orig_nbytes = nbytes; + alloc_nbytes = min_t(unsigned int, nbytes, DIAG_TRANSFER_LIMIT); + data_buf = (unsigned char *)dma_alloc_coherent(ar->dev, - orig_nbytes, + alloc_nbytes, &ce_data_base, GFP_ATOMIC); if (!data_buf) { @@ -1066,9 +1066,6 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, goto done; } - /* Copy caller's data to allocated DMA buf */ - memcpy(data_buf, data, orig_nbytes); - /* * The address supplied by the caller is in the * Target CPU virtual address space. @@ -1081,12 +1078,14 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, */ address = ath10k_pci_targ_cpu_to_ce_addr(ar, address); - remaining_bytes = orig_nbytes; - ce_data = ce_data_base; + remaining_bytes = nbytes; while (remaining_bytes) { /* FIXME: check cast */ nbytes = min_t(int, remaining_bytes, DIAG_TRANSFER_LIMIT); + /* Copy caller's data to allocated DMA buf */ + memcpy(data_buf, data, nbytes); + /* Set up to receive directly into Target(!) address */ ret = __ath10k_ce_rx_post_buf(ce_diag, &address, address); if (ret != 0) @@ -1096,7 +1095,7 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, * Request CE to send caller-supplied data that * was copied to bounce buffer to Target(!) address. */ - ret = ath10k_ce_send_nolock(ce_diag, NULL, (u32)ce_data, + ret = ath10k_ce_send_nolock(ce_diag, NULL, ce_data_base, nbytes, 0, 0); if (ret != 0) goto done; @@ -1137,12 +1136,12 @@ int ath10k_pci_diag_write_mem(struct ath10k *ar, u32 address, remaining_bytes -= nbytes; address += nbytes; - ce_data += nbytes; + data += nbytes; } done: if (data_buf) { - dma_free_coherent(ar->dev, orig_nbytes, data_buf, + dma_free_coherent(ar->dev, alloc_nbytes, data_buf, ce_data_base); } @@ -1772,6 +1771,11 @@ static void ath10k_pci_hif_stop(struct ath10k *ar) ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif stop\n"); + ath10k_pci_irq_disable(ar); + ath10k_pci_irq_sync(ar); + napi_synchronize(&ar->napi); + napi_disable(&ar->napi); + /* Most likely the device has HTT Rx ring configured. The only way to * prevent the device from accessing (and possible corrupting) host * memory is to reset the chip now. @@ -1785,11 +1789,7 @@ static void ath10k_pci_hif_stop(struct ath10k *ar) */ ath10k_pci_safe_chip_reset(ar); - ath10k_pci_irq_disable(ar); - ath10k_pci_irq_sync(ar); ath10k_pci_flush(ar); - napi_synchronize(&ar->napi); - napi_disable(&ar->napi); spin_lock_irqsave(&ar_pci->ps_lock, flags); WARN_ON(ar_pci->ps_wake_refcount > 0); diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index 0a1248ebccf5..f49b21b137c1 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -392,16 +392,11 @@ static int ath10k_sdio_mbox_rx_process_packet(struct ath10k *ar, struct ath10k_htc_hdr *htc_hdr = (struct ath10k_htc_hdr *)skb->data; bool trailer_present = htc_hdr->flags & ATH10K_HTC_FLAG_TRAILER_PRESENT; enum ath10k_htc_ep_id eid; - u16 payload_len; u8 *trailer; int ret; - payload_len = le16_to_cpu(htc_hdr->len); - skb->len = payload_len + sizeof(struct ath10k_htc_hdr); - if (trailer_present) { - trailer = skb->data + sizeof(*htc_hdr) + - payload_len - htc_hdr->trailer_len; + trailer = skb->data + skb->len - htc_hdr->trailer_len; eid = pipe_id_to_eid(htc_hdr->eid); @@ -635,13 +630,31 @@ static int ath10k_sdio_mbox_rx_packet(struct ath10k *ar, { struct ath10k_sdio *ar_sdio = ath10k_sdio_priv(ar); struct sk_buff *skb = pkt->skb; + struct ath10k_htc_hdr *htc_hdr; int ret; ret = ath10k_sdio_readsb(ar, ar_sdio->mbox_info.htc_addr, skb->data, pkt->alloc_len); + if (ret) + goto out; + + /* Update actual length. The original length may be incorrect, + * as the FW will bundle multiple packets as long as their sizes + * fit within the same aligned length (pkt->alloc_len). + */ + htc_hdr = (struct ath10k_htc_hdr *)skb->data; + pkt->act_len = le16_to_cpu(htc_hdr->len) + sizeof(*htc_hdr); + if (pkt->act_len > pkt->alloc_len) { + ath10k_warn(ar, "rx packet too large (%zu > %zu)\n", + pkt->act_len, pkt->alloc_len); + ret = -EMSGSIZE; + goto out; + } + + skb_put(skb, pkt->act_len); + +out: pkt->status = ret; - if (!ret) - skb_put(skb, pkt->act_len); return ret; } diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c index d4986f626c35..9999c8c40269 100644 --- a/drivers/net/wireless/ath/ath10k/txrx.c +++ b/drivers/net/wireless/ath/ath10k/txrx.c @@ -100,6 +100,8 @@ int ath10k_txrx_tx_unref(struct ath10k_htt *htt, info = IEEE80211_SKB_CB(msdu); memset(&info->status, 0, sizeof(info->status)); + info->status.rates[0].idx = -1; + trace_ath10k_txrx_tx_unref(ar, tx_done->msdu_id); if (tx_done->status == HTT_TX_COMPL_STATE_DISCARD) { diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c index f09a4ad2e9de..c64a03f164c0 100644 --- a/drivers/net/wireless/ath/ath10k/usb.c +++ b/drivers/net/wireless/ath/ath10k/usb.c @@ -49,6 +49,10 @@ ath10k_usb_alloc_urb_from_pipe(struct ath10k_usb_pipe *pipe) struct ath10k_urb_context *urb_context = NULL; unsigned long flags; + /* bail if this pipe is not initialized */ + if (!pipe->ar_usb) + return NULL; + spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags); if (!list_empty(&pipe->urb_list_head)) { urb_context = list_first_entry(&pipe->urb_list_head, @@ -66,6 +70,10 @@ static void ath10k_usb_free_urb_to_pipe(struct ath10k_usb_pipe *pipe, { unsigned long flags; + /* bail if this pipe is not initialized */ + if (!pipe->ar_usb) + return; + spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags); pipe->urb_cnt++; @@ -446,6 +454,7 @@ static int ath10k_usb_hif_tx_sg(struct ath10k *ar, u8 pipe_id, ath10k_dbg(ar, ATH10K_DBG_USB_BULK, "usb bulk transmit failed: %d\n", ret); usb_unanchor_urb(urb); + usb_free_urb(urb); ret = -EINVAL; goto err_free_urb_to_pipe; } diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index ab8eb9cdfda0..25f51ca06093 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2414,7 +2414,8 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) status->freq, status->band, status->signal, status->rate_idx); - ieee80211_rx(ar->hw, skb); + ieee80211_rx_ni(ar->hw, skb); + return 0; } @@ -3132,18 +3133,31 @@ void ath10k_wmi_event_vdev_start_resp(struct ath10k *ar, struct sk_buff *skb) { struct wmi_vdev_start_ev_arg arg = {}; int ret; + u32 status; ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_VDEV_START_RESP_EVENTID\n"); + ar->last_wmi_vdev_start_status = 0; + ret = ath10k_wmi_pull_vdev_start(ar, skb, &arg); if (ret) { ath10k_warn(ar, "failed to parse vdev start event: %d\n", ret); - return; + ar->last_wmi_vdev_start_status = ret; + goto out; } - if (WARN_ON(__le32_to_cpu(arg.status))) - return; + status = __le32_to_cpu(arg.status); + if (WARN_ON_ONCE(status)) { + ath10k_warn(ar, "vdev-start-response reports status error: %d (%s)\n", + status, (status == WMI_VDEV_START_CHAN_INVALID) ? + "chan-invalid" : "unknown"); + /* Setup is done one way or another though, so we should still + * do the completion, so don't return here. + */ + ar->last_wmi_vdev_start_status = -EINVAL; + } +out: complete(&ar->vdev_setup_done); } diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index d0e05aa437e3..947b74c64fec 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -6480,11 +6480,17 @@ struct wmi_ch_info_ev_arg { __le32 rx_frame_count; }; +/* From 10.4 firmware, not sure all have the same values. */ +enum wmi_vdev_start_status { + WMI_VDEV_START_OK = 0, + WMI_VDEV_START_CHAN_INVALID, +}; + struct wmi_vdev_start_ev_arg { __le32 vdev_id; __le32 req_id; __le32 resp_type; /* %WMI_VDEV_RESP_ */ - __le32 status; + __le32 status; /* See wmi_vdev_start_status enum above */ }; struct wmi_peer_kick_ev_arg { diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 414b5b596efc..37deb9bae364 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -939,7 +939,7 @@ static int ath6kl_set_probed_ssids(struct ath6kl *ar, else ssid_list[i].flag = ANY_SSID_FLAG; - if (n_match_ssid == 0) + if (ar->wiphy->max_match_sets != 0 && n_match_ssid == 0) ssid_list[i].flag |= MATCH_SSID_FLAG; } @@ -1093,7 +1093,7 @@ void ath6kl_cfg80211_scan_complete_event(struct ath6kl_vif *vif, bool aborted) if (vif->scan_req->n_ssids && vif->scan_req->ssids[0].ssid_len) { for (i = 0; i < vif->scan_req->n_ssids; i++) { ath6kl_wmi_probedssid_cmd(ar->wmi, vif->fw_vif_idx, - i + 1, DISABLE_SSID_FLAG, + i, DISABLE_SSID_FLAG, 0, NULL); } } diff --git a/drivers/net/wireless/ath/ath6kl/usb.c b/drivers/net/wireless/ath/ath6kl/usb.c index 4defb7a0330f..53b66e9434c9 100644 --- a/drivers/net/wireless/ath/ath6kl/usb.c +++ b/drivers/net/wireless/ath/ath6kl/usb.c @@ -132,6 +132,10 @@ ath6kl_usb_alloc_urb_from_pipe(struct ath6kl_usb_pipe *pipe) struct ath6kl_urb_context *urb_context = NULL; unsigned long flags; + /* bail if this pipe is not initialized */ + if (!pipe->ar_usb) + return NULL; + spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags); if (!list_empty(&pipe->urb_list_head)) { urb_context = @@ -150,6 +154,10 @@ static void ath6kl_usb_free_urb_to_pipe(struct ath6kl_usb_pipe *pipe, { unsigned long flags; + /* bail if this pipe is not initialized */ + if (!pipe->ar_usb) + return; + spin_lock_irqsave(&pipe->ar_usb->cs_lock, flags); pipe->urb_cnt++; diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index 3dbfd86ebe36..76385834a7de 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -4116,7 +4116,7 @@ static void ar9003_hw_thermometer_apply(struct ath_hw *ah) static void ar9003_hw_thermo_cal_apply(struct ath_hw *ah) { - u32 data, ko, kg; + u32 data = 0, ko, kg; if (!AR_SREV_9462_20_OR_LATER(ah)) return; diff --git a/drivers/net/wireless/ath/ath9k/common-spectral.c b/drivers/net/wireless/ath/ath9k/common-spectral.c index a41bcbda1d9e..37d5994eb1cc 100644 --- a/drivers/net/wireless/ath/ath9k/common-spectral.c +++ b/drivers/net/wireless/ath/ath9k/common-spectral.c @@ -411,7 +411,7 @@ ath_cmn_process_ht20_40_fft(struct ath_rx_status *rs, ath_dbg(common, SPECTRAL_SCAN, "Calculated new upper max 0x%X at %i\n", - tmp_mag, i); + tmp_mag, fft_sample_40.upper_max_index); } else for (i = dc_pos; i < SPECTRAL_HT20_40_NUM_BINS; i++) { if (fft_sample_40.data[i] == (upper_mag >> max_exp)) diff --git a/drivers/net/wireless/ath/ath9k/dynack.c b/drivers/net/wireless/ath/ath9k/dynack.c index 6e236a485431..71b4888b30e7 100644 --- a/drivers/net/wireless/ath/ath9k/dynack.c +++ b/drivers/net/wireless/ath/ath9k/dynack.c @@ -300,9 +300,9 @@ void ath_dynack_node_init(struct ath_hw *ah, struct ath_node *an) an->ackto = ackto; - spin_lock(&da->qlock); + spin_lock_bh(&da->qlock); list_add_tail(&an->list, &da->nodes); - spin_unlock(&da->qlock); + spin_unlock_bh(&da->qlock); } EXPORT_SYMBOL(ath_dynack_node_init); @@ -316,9 +316,9 @@ void ath_dynack_node_deinit(struct ath_hw *ah, struct ath_node *an) { struct ath_dynack *da = &ah->dynack; - spin_lock(&da->qlock); + spin_lock_bh(&da->qlock); list_del(&an->list); - spin_unlock(&da->qlock); + spin_unlock_bh(&da->qlock); } EXPORT_SYMBOL(ath_dynack_node_deinit); diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index c5f4dd808745..6f669166c263 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -1214,7 +1214,7 @@ err_fw: static int send_eject_command(struct usb_interface *interface) { struct usb_device *udev = interface_to_usbdev(interface); - struct usb_host_interface *iface_desc = &interface->altsetting[0]; + struct usb_host_interface *iface_desc = interface->cur_altsetting; struct usb_endpoint_descriptor *endpoint; unsigned char *cmd; u8 bulk_out_ep; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index b38a586ea59a..4748f557c753 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -973,6 +973,8 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, struct ath_htc_rx_status *rxstatus; struct ath_rx_status rx_stats; bool decrypt_error = false; + __be16 rs_datalen; + bool is_phyerr; if (skb->len < HTC_RX_FRAME_HEADER_SIZE) { ath_err(common, "Corrupted RX frame, dropping (len: %d)\n", @@ -982,11 +984,24 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, rxstatus = (struct ath_htc_rx_status *)skb->data; - if (be16_to_cpu(rxstatus->rs_datalen) - - (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0) { + rs_datalen = be16_to_cpu(rxstatus->rs_datalen); + if (unlikely(rs_datalen - + (skb->len - HTC_RX_FRAME_HEADER_SIZE) != 0)) { ath_err(common, "Corrupted RX data len, dropping (dlen: %d, skblen: %d)\n", - rxstatus->rs_datalen, skb->len); + rs_datalen, skb->len); + goto rx_next; + } + + is_phyerr = rxstatus->rs_status & ATH9K_RXERR_PHY; + /* + * Discard zero-length packets and packets smaller than an ACK + * which are not PHY_ERROR (short radar pulses have a length of 3) + */ + if (unlikely(!rs_datalen || (rs_datalen < 10 && !is_phyerr))) { + ath_warn(common, + "Short RX data len, dropping (dlen: %d)\n", + rs_datalen); goto rx_next; } @@ -1011,7 +1026,7 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv, * Process PHY errors and return so that the packet * can be dropped. */ - if (rx_stats.rs_status & ATH9K_RXERR_PHY) { + if (unlikely(is_phyerr)) { /* TODO: Not using DFS processing now. */ if (ath_cmn_process_fft(&priv->spec_priv, hdr, &rx_stats, rx_status->mactime)) { diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 8b4ac7f0a09b..72ad84fde5c1 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1251,7 +1251,6 @@ static int ath9k_add_interface(struct ieee80211_hw *hw, struct ath_node *an = &avp->mcast_node; mutex_lock(&sc->mutex); - if (IS_ENABLED(CONFIG_ATH9K_TX99)) { if (sc->cur_chan->nvifs >= 1) { mutex_unlock(&sc->mutex); diff --git a/drivers/net/wireless/ath/ath9k/tx99.c b/drivers/net/wireless/ath/ath9k/tx99.c index fe3a8263b224..87d09d1e74aa 100644 --- a/drivers/net/wireless/ath/ath9k/tx99.c +++ b/drivers/net/wireless/ath/ath9k/tx99.c @@ -56,11 +56,6 @@ static struct sk_buff *ath9k_build_tx99_skb(struct ath_softc *sc) struct sk_buff *skb; struct ath_vif *avp; - if (!sc->tx99_vif) - return NULL; - - avp = (struct ath_vif *)sc->tx99_vif->drv_priv; - skb = alloc_skb(len, GFP_KERNEL); if (!skb) return NULL; @@ -77,7 +72,10 @@ static struct sk_buff *ath9k_build_tx99_skb(struct ath_softc *sc) memcpy(hdr->addr2, hw->wiphy->perm_addr, ETH_ALEN); memcpy(hdr->addr3, hw->wiphy->perm_addr, ETH_ALEN); - hdr->seq_ctrl |= cpu_to_le16(avp->seq_no); + if (sc->tx99_vif) { + avp = (struct ath_vif *) sc->tx99_vif->drv_priv; + hdr->seq_ctrl |= cpu_to_le16(avp->seq_no); + } tx_info = IEEE80211_SKB_CB(skb); memset(tx_info, 0, sizeof(*tx_info)); diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index d63d7c326801..798516f42f2f 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -1002,15 +1002,16 @@ int wmi_call(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len, { int rc; unsigned long remain; + ulong flags; mutex_lock(&wil->wmi_mutex); - spin_lock(&wil->wmi_ev_lock); + spin_lock_irqsave(&wil->wmi_ev_lock, flags); wil->reply_id = reply_id; wil->reply_buf = reply; wil->reply_size = reply_size; reinit_completion(&wil->wmi_call); - spin_unlock(&wil->wmi_ev_lock); + spin_unlock_irqrestore(&wil->wmi_ev_lock, flags); rc = __wmi_send(wil, cmdid, buf, len); if (rc) @@ -1030,11 +1031,11 @@ int wmi_call(struct wil6210_priv *wil, u16 cmdid, void *buf, u16 len, } out: - spin_lock(&wil->wmi_ev_lock); + spin_lock_irqsave(&wil->wmi_ev_lock, flags); wil->reply_id = 0; wil->reply_buf = NULL; wil->reply_size = 0; - spin_unlock(&wil->wmi_ev_lock); + spin_unlock_irqrestore(&wil->wmi_ev_lock, flags); mutex_unlock(&wil->wmi_mutex); diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c index f1e3dad57629..f435bd0f8b5b 100644 --- a/drivers/net/wireless/broadcom/b43legacy/main.c +++ b/drivers/net/wireless/broadcom/b43legacy/main.c @@ -1304,8 +1304,9 @@ static void handle_irq_ucode_debug(struct b43legacy_wldev *dev) } /* Interrupt handler bottom-half */ -static void b43legacy_interrupt_tasklet(struct b43legacy_wldev *dev) +static void b43legacy_interrupt_tasklet(unsigned long data) { + struct b43legacy_wldev *dev = (struct b43legacy_wldev *)data; u32 reason; u32 dma_reason[ARRAY_SIZE(dev->dma_reason)]; u32 merged_dma_reason = 0; @@ -3775,7 +3776,7 @@ static int b43legacy_one_core_attach(struct ssb_device *dev, b43legacy_set_status(wldev, B43legacy_STAT_UNINIT); wldev->bad_frames_preempt = modparam_bad_frames_preempt; tasklet_init(&wldev->isr_tasklet, - (void (*)(unsigned long))b43legacy_interrupt_tasklet, + b43legacy_interrupt_tasklet, (unsigned long)wldev); if (modparam_pio) wldev->__using_pio = true; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c index 450f2216fac2..4a883f4bbf88 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c @@ -74,7 +74,7 @@ #define P2P_AF_MAX_WAIT_TIME msecs_to_jiffies(2000) #define P2P_INVALID_CHANNEL -1 #define P2P_CHANNEL_SYNC_RETRY 5 -#define P2P_AF_FRM_SCAN_MAX_WAIT msecs_to_jiffies(1500) +#define P2P_AF_FRM_SCAN_MAX_WAIT msecs_to_jiffies(450) #define P2P_DEFAULT_SLEEP_TIME_VSDB 200 /* WiFi P2P Public Action Frame OUI Subtypes */ @@ -1139,7 +1139,6 @@ static s32 brcmf_p2p_af_searching_channel(struct brcmf_p2p_info *p2p) { struct afx_hdl *afx_hdl = &p2p->afx_hdl; struct brcmf_cfg80211_vif *pri_vif; - unsigned long duration; s32 retry; brcmf_dbg(TRACE, "Enter\n"); @@ -1155,7 +1154,6 @@ static s32 brcmf_p2p_af_searching_channel(struct brcmf_p2p_info *p2p) * pending action frame tx is cancelled. */ retry = 0; - duration = msecs_to_jiffies(P2P_AF_FRM_SCAN_MAX_WAIT); while ((retry < P2P_CHANNEL_SYNC_RETRY) && (afx_hdl->peer_chan == P2P_INVALID_CHANNEL)) { afx_hdl->is_listen = false; @@ -1163,7 +1161,8 @@ static s32 brcmf_p2p_af_searching_channel(struct brcmf_p2p_info *p2p) retry); /* search peer on peer's listen channel */ schedule_work(&afx_hdl->afx_work); - wait_for_completion_timeout(&afx_hdl->act_frm_scan, duration); + wait_for_completion_timeout(&afx_hdl->act_frm_scan, + P2P_AF_FRM_SCAN_MAX_WAIT); if ((afx_hdl->peer_chan != P2P_INVALID_CHANNEL) || (!test_bit(BRCMF_P2P_STATUS_FINDING_COMMON_CHANNEL, &p2p->status))) @@ -1176,7 +1175,7 @@ static s32 brcmf_p2p_af_searching_channel(struct brcmf_p2p_info *p2p) afx_hdl->is_listen = true; schedule_work(&afx_hdl->afx_work); wait_for_completion_timeout(&afx_hdl->act_frm_scan, - duration); + P2P_AF_FRM_SCAN_MAX_WAIT); } if ((afx_hdl->peer_chan != P2P_INVALID_CHANNEL) || (!test_bit(BRCMF_P2P_STATUS_FINDING_COMMON_CHANNEL, @@ -1463,10 +1462,12 @@ int brcmf_p2p_notify_action_tx_complete(struct brcmf_if *ifp, return 0; if (e->event_code == BRCMF_E_ACTION_FRAME_COMPLETE) { - if (e->status == BRCMF_E_STATUS_SUCCESS) + if (e->status == BRCMF_E_STATUS_SUCCESS) { set_bit(BRCMF_P2P_STATUS_ACTION_TX_COMPLETED, &p2p->status); - else { + if (!p2p->wait_for_offchan_complete) + complete(&p2p->send_af_done); + } else { set_bit(BRCMF_P2P_STATUS_ACTION_TX_NOACK, &p2p->status); /* If there is no ack, we don't need to wait for * WLC_E_ACTION_FRAME_OFFCHAN_COMPLETE event @@ -1517,6 +1518,17 @@ static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p, p2p->af_sent_channel = le32_to_cpu(af_params->channel); p2p->af_tx_sent_jiffies = jiffies; + if (test_bit(BRCMF_P2P_STATUS_DISCOVER_LISTEN, &p2p->status) && + p2p->af_sent_channel == + ieee80211_frequency_to_channel(p2p->remain_on_channel.center_freq)) + p2p->wait_for_offchan_complete = false; + else + p2p->wait_for_offchan_complete = true; + + brcmf_dbg(TRACE, "Waiting for %s tx completion event\n", + (p2p->wait_for_offchan_complete) ? + "off-channel" : "on-channel"); + timeout = wait_for_completion_timeout(&p2p->send_af_done, P2P_AF_MAX_WAIT_TIME); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h index 0e8b34d2d85c..39f0d0218088 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h @@ -124,6 +124,7 @@ struct afx_hdl { * @gon_req_action: about to send go negotiation requets frame. * @block_gon_req_tx: drop tx go negotiation requets frame. * @p2pdev_dynamically: is p2p device if created by module param or supplicant. + * @wait_for_offchan_complete: wait for off-channel tx completion event. */ struct brcmf_p2p_info { struct brcmf_cfg80211_info *cfg; @@ -144,6 +145,7 @@ struct brcmf_p2p_info { bool gon_req_action; bool block_gon_req_tx; bool p2pdev_dynamically; + bool wait_for_offchan_complete; }; s32 brcmf_p2p_attach(struct brcmf_cfg80211_info *cfg, bool p2pdev_forced); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 4c28b04ea605..d198a8780b96 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -1932,6 +1932,7 @@ static uint brcmf_sdio_readframes(struct brcmf_sdio *bus, uint maxframes) BRCMF_SDIO_FT_NORMAL)) { rd->len = 0; brcmu_pkt_buf_free_skb(pkt); + continue; } bus->sdcnt.rx_readahead_cnt++; if (rd->len != roundup(rd_new.len, 16)) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index be855aa32154..4ad830b7b1c9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -441,6 +441,7 @@ fail: usb_free_urb(req->urb); list_del(q->next); } + kfree(reqs); return NULL; } @@ -1333,7 +1334,7 @@ brcmf_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) goto fail; } - desc = &intf->altsetting[0].desc; + desc = &intf->cur_altsetting->desc; if ((desc->bInterfaceClass != USB_CLASS_VENDOR_SPEC) || (desc->bInterfaceSubClass != 2) || (desc->bInterfaceProtocol != 0xff)) { @@ -1346,7 +1347,7 @@ brcmf_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) num_of_eps = desc->bNumEndpoints; for (ep = 0; ep < num_of_eps; ep++) { - endpoint = &intf->altsetting[0].endpoint[ep].desc; + endpoint = &intf->cur_altsetting->endpoint[ep].desc; endpoint_num = usb_endpoint_num(endpoint); if (!usb_endpoint_xfer_bulk(endpoint)) continue; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c index ddfdfe177e24..66f1f41b1380 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c @@ -502,6 +502,7 @@ brcms_ops_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) } spin_lock_bh(&wl->lock); + wl->wlc->vif = vif; wl->mute_tx = false; brcms_c_mute(wl->wlc, false); if (vif->type == NL80211_IFTYPE_STATION) @@ -519,6 +520,11 @@ brcms_ops_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) static void brcms_ops_remove_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { + struct brcms_info *wl = hw->priv; + + spin_lock_bh(&wl->lock); + wl->wlc->vif = NULL; + spin_unlock_bh(&wl->lock); } static int brcms_ops_config(struct ieee80211_hw *hw, u32 changed) @@ -840,8 +846,8 @@ brcms_ops_ampdu_action(struct ieee80211_hw *hw, status = brcms_c_aggregatable(wl->wlc, tid); spin_unlock_bh(&wl->lock); if (!status) { - brcms_err(wl->wlc->hw->d11core, - "START: tid %d is not agg\'able\n", tid); + brcms_dbg_ht(wl->wlc->hw->d11core, + "START: tid %d is not agg\'able\n", tid); return -EINVAL; } ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid); @@ -937,6 +943,25 @@ static void brcms_ops_set_tsf(struct ieee80211_hw *hw, spin_unlock_bh(&wl->lock); } +static int brcms_ops_beacon_set_tim(struct ieee80211_hw *hw, + struct ieee80211_sta *sta, bool set) +{ + struct brcms_info *wl = hw->priv; + struct sk_buff *beacon = NULL; + u16 tim_offset = 0; + + spin_lock_bh(&wl->lock); + if (wl->wlc->vif) + beacon = ieee80211_beacon_get_tim(hw, wl->wlc->vif, + &tim_offset, NULL); + if (beacon) + brcms_c_set_new_beacon(wl->wlc, beacon, tim_offset, + wl->wlc->vif->bss_conf.dtim_period); + spin_unlock_bh(&wl->lock); + + return 0; +} + static const struct ieee80211_ops brcms_ops = { .tx = brcms_ops_tx, .start = brcms_ops_start, @@ -955,6 +980,7 @@ static const struct ieee80211_ops brcms_ops = { .flush = brcms_ops_flush, .get_tsf = brcms_ops_get_tsf, .set_tsf = brcms_ops_set_tsf, + .set_tim = brcms_ops_beacon_set_tim, }; void brcms_dpc(unsigned long data) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.h index c4d135cff04a..9f76b880814e 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.h @@ -563,6 +563,7 @@ struct brcms_c_info { struct wiphy *wiphy; struct scb pri_scb; + struct ieee80211_vif *vif; struct sk_buff *beacon; u16 beacon_tim_offset; diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index 54201c02fdb8..f3f20abbe269 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -5464,7 +5464,7 @@ static int proc_BSSList_open( struct inode *inode, struct file *file ) { we have to add a spin lock... */ rc = readBSSListRid(ai, doLoseSync, &BSSList_rid); while(rc == 0 && BSSList_rid.index != cpu_to_le16(0xffff)) { - ptr += sprintf(ptr, "%pM %*s rssi = %d", + ptr += sprintf(ptr, "%pM %.*s rssi = %d", BSSList_rid.bssid, (int)BSSList_rid.ssidLen, BSSList_rid.ssid, @@ -7788,16 +7788,8 @@ static int readrids(struct net_device *dev, aironet_ioctl *comp) { case AIROGVLIST: ridcode = RID_APLIST; break; case AIROGDRVNAM: ridcode = RID_DRVNAME; break; case AIROGEHTENC: ridcode = RID_ETHERENCAP; break; - case AIROGWEPKTMP: ridcode = RID_WEP_TEMP; - /* Only super-user can read WEP keys */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - break; - case AIROGWEPKNV: ridcode = RID_WEP_PERM; - /* Only super-user can read WEP keys */ - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - break; + case AIROGWEPKTMP: ridcode = RID_WEP_TEMP; break; + case AIROGWEPKNV: ridcode = RID_WEP_PERM; break; case AIROGSTAT: ridcode = RID_STATUS; break; case AIROGSTATSD32: ridcode = RID_STATSDELTA; break; case AIROGSTATSC32: ridcode = RID_STATS; break; @@ -7811,7 +7803,13 @@ static int readrids(struct net_device *dev, aironet_ioctl *comp) { return -EINVAL; } - if ((iobuf = kmalloc(RIDSIZE, GFP_KERNEL)) == NULL) + if (ridcode == RID_WEP_TEMP || ridcode == RID_WEP_PERM) { + /* Only super-user can read WEP keys */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + } + + if ((iobuf = kzalloc(RIDSIZE, GFP_KERNEL)) == NULL) return -ENOMEM; PC4500_readrid(ai,ridcode,iobuf,RIDSIZE, 1); diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c index 19c442cb93e4..8fbdd7d4fd0c 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c @@ -3220,8 +3220,9 @@ static void ipw2100_tx_send_data(struct ipw2100_priv *priv) } } -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv) +static void ipw2100_irq_tasklet(unsigned long data) { + struct ipw2100_priv *priv = (struct ipw2100_priv *)data; struct net_device *dev = priv->net_dev; unsigned long flags; u32 inta, tmp; @@ -6027,7 +6028,7 @@ static void ipw2100_rf_kill(struct work_struct *work) spin_unlock_irqrestore(&priv->low_lock, flags); } -static void ipw2100_irq_tasklet(struct ipw2100_priv *priv); +static void ipw2100_irq_tasklet(unsigned long data); static const struct net_device_ops ipw2100_netdev_ops = { .ndo_open = ipw2100_open, @@ -6157,7 +6158,7 @@ static struct net_device *ipw2100_alloc_device(struct pci_dev *pci_dev, INIT_DELAYED_WORK(&priv->rf_kill, ipw2100_rf_kill); INIT_DELAYED_WORK(&priv->scan_event, ipw2100_scan_event); - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long)) + tasklet_init(&priv->irq_tasklet, ipw2100_irq_tasklet, (unsigned long)priv); /* NOTE: We do not start the deferred work for status checks yet */ diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c index 8da87496cb58..2d0734ab3f74 100644 --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c @@ -1966,8 +1966,9 @@ static void notify_wx_assoc_event(struct ipw_priv *priv) wireless_send_event(priv->net_dev, SIOCGIWAP, &wrqu, NULL); } -static void ipw_irq_tasklet(struct ipw_priv *priv) +static void ipw_irq_tasklet(unsigned long data) { + struct ipw_priv *priv = (struct ipw_priv *)data; u32 inta, inta_mask, handled = 0; unsigned long flags; int rc = 0; @@ -10702,7 +10703,7 @@ static int ipw_setup_deferred_work(struct ipw_priv *priv) INIT_WORK(&priv->qos_activate, ipw_bg_qos_activate); #endif /* CONFIG_IPW2200_QOS */ - tasklet_init(&priv->irq_tasklet, (void (*)(unsigned long)) + tasklet_init(&priv->irq_tasklet, ipw_irq_tasklet, (unsigned long)priv); return ret; diff --git a/drivers/net/wireless/intel/iwlegacy/3945-mac.c b/drivers/net/wireless/intel/iwlegacy/3945-mac.c index 329f3a63dadd..0fb81151a132 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/3945-mac.c @@ -1399,8 +1399,9 @@ il3945_dump_nic_error_log(struct il_priv *il) } static void -il3945_irq_tasklet(struct il_priv *il) +il3945_irq_tasklet(unsigned long data) { + struct il_priv *il = (struct il_priv *)data; u32 inta, handled = 0; u32 inta_fh; unsigned long flags; @@ -3432,7 +3433,7 @@ il3945_setup_deferred_work(struct il_priv *il) setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il); tasklet_init(&il->irq_tasklet, - (void (*)(unsigned long))il3945_irq_tasklet, + il3945_irq_tasklet, (unsigned long)il); } diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c index de9b6522c43f..665e82effb03 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c @@ -4363,8 +4363,9 @@ il4965_synchronize_irq(struct il_priv *il) } static void -il4965_irq_tasklet(struct il_priv *il) +il4965_irq_tasklet(unsigned long data) { + struct il_priv *il = (struct il_priv *)data; u32 inta, handled = 0; u32 inta_fh; unsigned long flags; @@ -6264,7 +6265,7 @@ il4965_setup_deferred_work(struct il_priv *il) setup_timer(&il->watchdog, il_bg_watchdog, (unsigned long)il); tasklet_init(&il->irq_tasklet, - (void (*)(unsigned long))il4965_irq_tasklet, + il4965_irq_tasklet, (unsigned long)il); } diff --git a/drivers/net/wireless/intel/iwlegacy/common.c b/drivers/net/wireless/intel/iwlegacy/common.c index 8d5acda92a9b..6e6b124f0d5e 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.c +++ b/drivers/net/wireless/intel/iwlegacy/common.c @@ -717,7 +717,7 @@ il_eeprom_init(struct il_priv *il) u32 gp = _il_rd(il, CSR_EEPROM_GP); int sz; int ret; - u16 addr; + int addr; /* allocate eeprom */ sz = il->cfg->eeprom_size; diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/led.c b/drivers/net/wireless/intel/iwlwifi/dvm/led.c index 1bbd17ada974..20e16c423990 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/led.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/led.c @@ -185,6 +185,9 @@ void iwl_leds_init(struct iwl_priv *priv) priv->led.name = kasprintf(GFP_KERNEL, "%s-led", wiphy_name(priv->hw->wiphy)); + if (!priv->led.name) + return; + priv->led.brightness_set = iwl_led_brightness_set; priv->led.blink_set = iwl_led_blink_set; priv->led.max_brightness = 1; diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c index 2acd94da9efe..051a2fea9572 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c @@ -1229,6 +1229,23 @@ static int iwl_eeprom_init_hw_params(struct iwl_priv *priv) return 0; } +static int iwl_nvm_check_version(struct iwl_nvm_data *data, + struct iwl_trans *trans) +{ + if (data->nvm_version >= trans->cfg->nvm_ver || + data->calib_version >= trans->cfg->nvm_calib_ver) { + IWL_DEBUG_INFO(trans, "device EEPROM VER=0x%x, CALIB=0x%x\n", + data->nvm_version, data->calib_version); + return 0; + } + + IWL_ERR(trans, + "Unsupported (too old) EEPROM VER=0x%x < 0x%x CALIB=0x%x < 0x%x\n", + data->nvm_version, trans->cfg->nvm_ver, + data->calib_version, trans->cfg->nvm_calib_ver); + return -EINVAL; +} + static struct iwl_op_mode *iwl_op_mode_dvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, const struct iwl_fw *fw, diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h index 14ad9fb895f9..a9c8352a7641 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/tx.h @@ -722,9 +722,9 @@ enum iwl_mvm_ba_resp_flags { * @tfd_cnt: number of TFD-Q elements * @ra_tid_cnt: number of RATID-Q elements * @tfd: array of TFD queue status updates. See &iwl_mvm_compressed_ba_tfd - * for details. + * for details. Length in @tfd_cnt. * @ra_tid: array of RA-TID queue status updates. For debug purposes only. See - * &iwl_mvm_compressed_ba_ratid for more details. + * &iwl_mvm_compressed_ba_ratid for more details. Length in @ra_tid_cnt. */ struct iwl_mvm_compressed_ba_notif { __le32 flags; @@ -741,7 +741,7 @@ struct iwl_mvm_compressed_ba_notif { __le32 tx_rate; __le16 tfd_cnt; __le16 ra_tid_cnt; - struct iwl_mvm_compressed_ba_tfd tfd[1]; + struct iwl_mvm_compressed_ba_tfd tfd[0]; struct iwl_mvm_compressed_ba_ratid ra_tid[0]; } __packed; /* COMPRESSED_BA_RES_API_S_VER_4 */ diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c index 839010417241..ba9e7bfeca2c 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c @@ -532,6 +532,7 @@ static struct scatterlist *alloc_sgtable(int size) if (new_page) __free_page(new_page); } + kfree(table); return NULL; } alloc_size = min_t(int, size, PAGE_SIZE); @@ -775,7 +776,7 @@ void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt) dump_data = iwl_fw_error_next_data(dump_data); /* We only dump the FIFOs if the FW is in error state */ - if (test_bit(STATUS_FW_ERROR, &fwrt->trans->status)) { + if (fifo_data_len) { iwl_fw_dump_fifos(fwrt, &dump_data); if (radio_len) iwl_read_radio_regs(fwrt, &dump_data); @@ -954,7 +955,7 @@ int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt, * If the loading of the FW completed successfully, the next step is to * get the SMEM config data. Thus, if fwrt->smem_cfg.num_lmacs is non * zero, the FW was already loaded successully. If the state is "NO_FW" - * in such a case - WARN and exit, since FW may be dead. Otherwise, we + * in such a case - exit, since FW may be dead. Otherwise, we * can try to collect the data, since FW might just not be fully * loaded (no "ALIVE" yet), and the debug data is accessible. * @@ -962,9 +963,8 @@ int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt, * config. In such a case, due to HW access problems, we might * collect garbage. */ - if (WARN((fwrt->trans->state == IWL_TRANS_NO_FW) && - fwrt->smem_cfg.num_lmacs, - "Can't collect dbg data when FW isn't alive\n")) + if (fwrt->trans->state == IWL_TRANS_NO_FW && + fwrt->smem_cfg.num_lmacs) return -EIO; if (test_and_set_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status)) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c index 3199d345b427..92727f7e42db 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.c @@ -928,22 +928,3 @@ iwl_parse_eeprom_data(struct device *dev, const struct iwl_cfg *cfg, return NULL; } IWL_EXPORT_SYMBOL(iwl_parse_eeprom_data); - -/* helper functions */ -int iwl_nvm_check_version(struct iwl_nvm_data *data, - struct iwl_trans *trans) -{ - if (data->nvm_version >= trans->cfg->nvm_ver || - data->calib_version >= trans->cfg->nvm_calib_ver) { - IWL_DEBUG_INFO(trans, "device EEPROM VER=0x%x, CALIB=0x%x\n", - data->nvm_version, data->calib_version); - return 0; - } - - IWL_ERR(trans, - "Unsupported (too old) EEPROM VER=0x%x < 0x%x CALIB=0x%x < 0x%x\n", - data->nvm_version, trans->cfg->nvm_ver, - data->calib_version, trans->cfg->nvm_calib_ver); - return -EINVAL; -} -IWL_EXPORT_SYMBOL(iwl_nvm_check_version); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.h index b33888991b94..5545210151cd 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-eeprom-parse.h @@ -7,6 +7,7 @@ * * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2015 Intel Mobile Communications GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -33,6 +34,7 @@ * * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2015 Intel Mobile Communications GmbH + * Copyright (C) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -121,9 +123,6 @@ struct iwl_nvm_data * iwl_parse_eeprom_data(struct device *dev, const struct iwl_cfg *cfg, const u8 *eeprom, size_t eeprom_size); -int iwl_nvm_check_version(struct iwl_nvm_data *data, - struct iwl_trans *trans); - int iwl_init_sband_channels(struct iwl_nvm_data *data, struct ieee80211_supported_band *sband, int n_channels, enum nl80211_band band); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index b205a7bfb828..65c51c698328 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -947,8 +947,10 @@ int iwl_mvm_wowlan_config_key_params(struct iwl_mvm *mvm, { struct iwl_wowlan_kek_kck_material_cmd kek_kck_cmd = {}; struct iwl_wowlan_tkip_params_cmd tkip_cmd = {}; + bool unified = fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_CNSLDTD_D3_D0_IMG); struct wowlan_key_data key_data = { - .configure_keys = !d0i3, + .configure_keys = !d0i3 && !unified, .use_rsc_tsc = false, .tkip = &tkip_cmd, .use_tkip = false, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 534c0ea7b232..754dcc1c1f40 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -107,12 +107,12 @@ static int iwl_send_rss_cfg_cmd(struct iwl_mvm *mvm) int i; struct iwl_rss_config_cmd cmd = { .flags = cpu_to_le32(IWL_RSS_ENABLE), - .hash_mask = IWL_RSS_HASH_TYPE_IPV4_TCP | - IWL_RSS_HASH_TYPE_IPV4_UDP | - IWL_RSS_HASH_TYPE_IPV4_PAYLOAD | - IWL_RSS_HASH_TYPE_IPV6_TCP | - IWL_RSS_HASH_TYPE_IPV6_UDP | - IWL_RSS_HASH_TYPE_IPV6_PAYLOAD, + .hash_mask = BIT(IWL_RSS_HASH_TYPE_IPV4_TCP) | + BIT(IWL_RSS_HASH_TYPE_IPV4_UDP) | + BIT(IWL_RSS_HASH_TYPE_IPV4_PAYLOAD) | + BIT(IWL_RSS_HASH_TYPE_IPV6_TCP) | + BIT(IWL_RSS_HASH_TYPE_IPV6_UDP) | + BIT(IWL_RSS_HASH_TYPE_IPV6_PAYLOAD), }; if (mvm->trans->num_rx_queues == 1) @@ -501,7 +501,9 @@ int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm) if (mvm->nvm_file_name) iwl_mvm_load_nvm_to_nic(mvm); - WARN_ON(iwl_nvm_check_version(mvm->nvm_data, mvm->trans)); + WARN_ONCE(mvm->nvm_data->nvm_version < mvm->trans->cfg->nvm_ver, + "Too old NVM version (0x%0x, required = 0x%0x)", + mvm->nvm_data->nvm_version, mvm->trans->cfg->nvm_ver); /* * abort after reading the nvm in case RF Kill is on, we will complete diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/led.c b/drivers/net/wireless/intel/iwlwifi/mvm/led.c index b27269504a62..072f80c90ce4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/led.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/led.c @@ -131,6 +131,9 @@ int iwl_mvm_leds_init(struct iwl_mvm *mvm) mvm->led.name = kasprintf(GFP_KERNEL, "%s-led", wiphy_name(mvm->hw->wiphy)); + if (!mvm->led.name) + return -ENOMEM; + mvm->led.brightness_set = iwl_led_brightness_set; mvm->led.max_brightness = 1; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 77ed6ecf5ee5..b86c7a36d3f1 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -822,6 +822,21 @@ static void iwl_mvm_mac_tx(struct ieee80211_hw *hw, !ieee80211_is_bufferable_mmpdu(hdr->frame_control)) sta = NULL; + /* If there is no sta, and it's not offchannel - send through AP */ + if (info->control.vif->type == NL80211_IFTYPE_STATION && + info->hw_queue != IWL_MVM_OFFCHANNEL_QUEUE && !sta) { + struct iwl_mvm_vif *mvmvif = + iwl_mvm_vif_from_mac80211(info->control.vif); + u8 ap_sta_id = READ_ONCE(mvmvif->ap_sta_id); + + if (ap_sta_id < IWL_MVM_STATION_COUNT) { + /* mac80211 holds rcu read lock */ + sta = rcu_dereference(mvm->fw_id_to_mac_id[ap_sta_id]); + if (IS_ERR_OR_NULL(sta)) + goto drop; + } + } + if (sta) { if (iwl_mvm_defer_tx(mvm, sta, skb)) return; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index ca2d66ce8424..b2e393c4fab5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -298,7 +298,7 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) int regulatory_type; /* Checking for required sections */ - if (mvm->trans->cfg->nvm_type != IWL_NVM_EXT) { + if (mvm->trans->cfg->nvm_type == IWL_NVM) { if (!mvm->nvm_sections[NVM_SECTION_TYPE_SW].data || !mvm->nvm_sections[mvm->cfg->nvm_hw_section_num].data) { IWL_ERR(mvm, "Can't parse empty OTP/NVM sections\n"); @@ -326,7 +326,8 @@ iwl_parse_nvm_sections(struct iwl_mvm *mvm) } /* PHY_SKU section is mandatory in B0 */ - if (!mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) { + if (mvm->trans->cfg->nvm_type == IWL_NVM_EXT && + !mvm->nvm_sections[NVM_SECTION_TYPE_PHY_SKU].data) { IWL_ERR(mvm, "Can't parse phy_sku in B0, empty sections\n"); return NULL; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c index c73e4be9bde3..c31303d13069 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c @@ -62,6 +62,7 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ +#include #include #include #include "iwl-trans.h" @@ -290,7 +291,7 @@ void iwl_mvm_rx_rx_mpdu(struct iwl_mvm *mvm, struct napi_struct *napi, rx_res = (struct iwl_rx_mpdu_res_start *)pkt->data; hdr = (struct ieee80211_hdr *)(pkt->data + sizeof(*rx_res)); len = le16_to_cpu(rx_res->byte_count); - rx_pkt_status = le32_to_cpup((__le32 *) + rx_pkt_status = get_unaligned_le32((__le32 *) (pkt->data + sizeof(*rx_res) + len)); /* Dont use dev_alloc_skb(), we'll have enough headroom once diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 7fb8bbaf2142..1a12e829e98b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -871,12 +871,12 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, bool toggle_bit = phy_info & IWL_RX_MPDU_PHY_AMPDU_TOGGLE; rx_status->flag |= RX_FLAG_AMPDU_DETAILS; - rx_status->ampdu_reference = mvm->ampdu_ref; /* toggle is switched whenever new aggregation starts */ if (toggle_bit != mvm->ampdu_toggle) { mvm->ampdu_ref++; mvm->ampdu_toggle = toggle_bit; } + rx_status->ampdu_reference = mvm->ampdu_ref; } rcu_read_lock(); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index d31d84eebc5d..d9ab85c8eb6a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -436,6 +436,16 @@ static int iwl_mvm_remove_sta_queue_marking(struct iwl_mvm *mvm, int queue) rcu_read_unlock(); + /* + * The TX path may have been using this TXQ_ID from the tid_data, + * so make sure it's no longer running so that we can safely reuse + * this TXQ later. We've set all the TIDs to IWL_MVM_INVALID_QUEUE + * above, but nothing guarantees we've stopped using them. Thus, + * without this, we could get to iwl_mvm_disable_txq() and remove + * the queue while still sending frames to it. + */ + synchronize_net(); + return disable_agg_tids; } @@ -2407,7 +2417,7 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); struct iwl_mvm_tid_data *tid_data; u16 normalized_ssn; - int txq_id; + u16 txq_id; int ret; if (WARN_ON_ONCE(tid >= IWL_MAX_TID_COUNT)) @@ -2442,17 +2452,24 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, */ txq_id = mvmsta->tid_data[tid].txq_id; if (txq_id == IWL_MVM_INVALID_QUEUE) { - txq_id = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id, - IWL_MVM_DQA_MIN_DATA_QUEUE, - IWL_MVM_DQA_MAX_DATA_QUEUE); - if (txq_id < 0) { - ret = txq_id; + ret = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id, + IWL_MVM_DQA_MIN_DATA_QUEUE, + IWL_MVM_DQA_MAX_DATA_QUEUE); + if (ret < 0) { IWL_ERR(mvm, "Failed to allocate agg queue\n"); goto release_locks; } + txq_id = ret; + /* TXQ hasn't yet been enabled, so mark it only as reserved */ mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_RESERVED; + } else if (WARN_ON(txq_id >= IWL_MAX_HW_QUEUES)) { + ret = -ENXIO; + IWL_ERR(mvm, "tid_id %d out of range (0, %d)!\n", + tid, IWL_MAX_HW_QUEUES - 1); + goto out; + } else if (unlikely(mvm->queue_info[txq_id].status == IWL_MVM_QUEUE_SHARED)) { ret = -ENXIO; @@ -2964,6 +2981,10 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, igtk_cmd.sta_id = cpu_to_le32(sta_id); if (remove_key) { + /* This is a valid situation for IGTK */ + if (sta_id == IWL_MVM_INVALID_STA) + return 0; + igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_NOT_VALID); } else { struct ieee80211_key_seq seq; @@ -3067,10 +3088,6 @@ static int __iwl_mvm_set_sta_key(struct iwl_mvm *mvm, switch (keyconf->cipher) { case WLAN_CIPHER_SUITE_TKIP: - if (vif->type == NL80211_IFTYPE_AP) { - ret = -EINVAL; - break; - } addr = iwl_mvm_get_mac_addr(mvm, vif, sta); /* get phase 1 key from mac80211 */ ieee80211_get_key_rx_seq(keyconf, 0, &seq); @@ -3272,9 +3289,9 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n", keyconf->keyidx, sta_id); - if (mvm_sta && (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || - keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || - keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256)) + if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true); if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c index 1232f63278eb..319103f4b432 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c @@ -739,7 +739,8 @@ static struct thermal_zone_device_ops tzone_ops = { static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm) { int i; - char name[] = "iwlwifi"; + char name[16]; + static atomic_t counter = ATOMIC_INIT(0); if (!iwl_mvm_is_tt_in_fw(mvm)) { mvm->tz_device.tzone = NULL; @@ -749,6 +750,7 @@ static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm) BUILD_BUG_ON(ARRAY_SIZE(name) >= THERMAL_NAME_LENGTH); + sprintf(name, "iwlwifi_%u", atomic_inc_return(&counter) & 0xFF); mvm->tz_device.tzone = thermal_zone_device_register(name, IWL_MAX_DTS_TRIPS, IWL_WRITABLE_TRIPS_MSK, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index efef28012a6b..ac1e05b93a9a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1378,6 +1378,14 @@ static void iwl_mvm_rx_tx_cmd_single(struct iwl_mvm *mvm, break; } + /* + * If we are freeing multiple frames, mark all the frames + * but the first one as acked, since they were acknowledged + * before + * */ + if (skb_freed > 1) + info->flags |= IEEE80211_TX_STAT_ACK; + iwl_mvm_tx_status_check_trigger(mvm, status); info->status.rates[0].count = tx_resp->failure_frame + 1; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index dffa697d71e0..8a074a516fb2 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -3023,6 +3023,15 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, spin_lock_init(&trans_pcie->reg_lock); mutex_init(&trans_pcie->mutex); init_waitqueue_head(&trans_pcie->ucode_write_waitq); + + trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", + WQ_HIGHPRI | WQ_UNBOUND, 1); + if (!trans_pcie->rba.alloc_wq) { + ret = -ENOMEM; + goto out_free_trans; + } + INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); + trans_pcie->tso_hdr_page = alloc_percpu(struct iwl_tso_hdr_page); if (!trans_pcie->tso_hdr_page) { ret = -ENOMEM; @@ -3195,10 +3204,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, trans_pcie->inta_mask = CSR_INI_SET_MASK; } - trans_pcie->rba.alloc_wq = alloc_workqueue("rb_allocator", - WQ_HIGHPRI | WQ_UNBOUND, 1); - INIT_WORK(&trans_pcie->rba.rx_alloc, iwl_pcie_rx_allocator_work); - #ifdef CONFIG_IWLWIFI_PCIE_RTPM trans->runtime_pm_mode = IWL_PLAT_PM_MODE_D0I3; #else @@ -3211,6 +3216,8 @@ out_free_ict: iwl_pcie_free_ict(trans); out_no_pci: free_percpu(trans_pcie->tso_hdr_page); + destroy_workqueue(trans_pcie->rba.alloc_wq); +out_free_trans: iwl_trans_free(trans); return ERR_PTR(ret); } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 6f45c8148b27..bbb39d6ec2ee 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -232,27 +232,23 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; - u16 length, iv_len, amsdu_pad; + u16 length, amsdu_pad; u8 *start_hdr; struct iwl_tso_hdr_page *hdr_page; struct page **page_ptr; struct tso_t tso; - /* if the packet is protected, then it must be CCMP or GCMP */ - iv_len = ieee80211_has_protected(hdr->frame_control) ? - IEEE80211_CCMP_HDR_LEN : 0; - trace_iwlwifi_dev_tx(trans->dev, skb, tfd, sizeof(*tfd), &dev_cmd->hdr, start_len, 0); ip_hdrlen = skb_transport_header(skb) - skb_network_header(skb); snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb); - total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len - iv_len; + total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len; amsdu_pad = 0; /* total amount of header we may need for this A-MSDU */ hdr_room = DIV_ROUND_UP(total_len, mss) * - (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len; + (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)); /* Our device supports 9 segments at most, it will fit in 1 page */ hdr_page = get_page_hdr(trans, hdr_room); @@ -263,14 +259,12 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, start_hdr = hdr_page->pos; page_ptr = (void *)((u8 *)skb->cb + trans_pcie->page_offs); *page_ptr = hdr_page->page; - memcpy(hdr_page->pos, skb->data + hdr_len, iv_len); - hdr_page->pos += iv_len; /* - * Pull the ieee80211 header + IV to be able to use TSO core, + * Pull the ieee80211 header to be able to use TSO core, * we will restore it for the tx_status flow. */ - skb_pull(skb, hdr_len + iv_len); + skb_pull(skb, hdr_len); /* * Remove the length of all the headers that we don't actually @@ -348,8 +342,8 @@ static int iwl_pcie_gen2_build_amsdu(struct iwl_trans *trans, } } - /* re -add the WiFi header and IV */ - skb_push(skb, hdr_len + iv_len); + /* re -add the WiFi header */ + skb_push(skb, hdr_len); return 0; diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c index 1a8d8db80b05..486ca1ee306e 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_ap.c +++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c @@ -2568,7 +2568,7 @@ static int prism2_hostapd_add_sta(struct ap_data *ap, sta->supported_rates[0] = 2; if (sta->tx_supp_rates & WLAN_RATE_2M) sta->supported_rates[1] = 4; - if (sta->tx_supp_rates & WLAN_RATE_5M5) + if (sta->tx_supp_rates & WLAN_RATE_5M5) sta->supported_rates[2] = 11; if (sta->tx_supp_rates & WLAN_RATE_11M) sta->supported_rates[3] = 22; diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c index 56f6e3b71f48..5a64674a5c8d 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -1364,7 +1364,8 @@ static int ezusb_init(struct hermes *hw) int retval; BUG_ON(in_interrupt()); - BUG_ON(!upriv); + if (!upriv) + return -EINVAL; upriv->reply_count = 0; /* Write the MAGIC number on the simulated registers to keep @@ -1613,9 +1614,9 @@ static int ezusb_probe(struct usb_interface *interface, /* set up the endpoint information */ /* check out the endpoints */ - iface_desc = &interface->altsetting[0].desc; + iface_desc = &interface->cur_altsetting->desc; for (i = 0; i < iface_desc->bNumEndpoints; ++i) { - ep = &interface->altsetting[0].endpoint[i].desc; + ep = &interface->cur_altsetting->endpoint[i].desc; if (usb_endpoint_is_bulk_in(ep)) { /* we found a bulk in endpoint */ diff --git a/drivers/net/wireless/marvell/libertas/cfg.c b/drivers/net/wireless/marvell/libertas/cfg.c index 9f3a7b512673..fbeb12018c3d 100644 --- a/drivers/net/wireless/marvell/libertas/cfg.c +++ b/drivers/net/wireless/marvell/libertas/cfg.c @@ -273,6 +273,10 @@ add_ie_rates(u8 *tlv, const u8 *ie, int *nrates) int hw, ap, ap_max = ie[1]; u8 hw_rate; + if (ap_max > MAX_RATES) { + lbs_deb_assoc("invalid rates\n"); + return tlv; + } /* Advance past IE header */ ie += 2; @@ -1720,6 +1724,9 @@ static int lbs_ibss_join_existing(struct lbs_private *priv, struct cmd_ds_802_11_ad_hoc_join cmd; u8 preamble = RADIO_PREAMBLE_SHORT; int ret = 0; + int hw, i; + u8 rates_max; + u8 *rates; /* TODO: set preamble based on scan result */ ret = lbs_set_radio(priv, preamble, 1); @@ -1778,9 +1785,14 @@ static int lbs_ibss_join_existing(struct lbs_private *priv, if (!rates_eid) { lbs_add_rates(cmd.bss.rates); } else { - int hw, i; - u8 rates_max = rates_eid[1]; - u8 *rates = cmd.bss.rates; + rates_max = rates_eid[1]; + if (rates_max > MAX_RATES) { + lbs_deb_join("invalid rates"); + rcu_read_unlock(); + ret = -EINVAL; + goto out; + } + rates = cmd.bss.rates; for (hw = 0; hw < ARRAY_SIZE(lbs_rates); hw++) { u8 hw_rate = lbs_rates[hw].bitrate / 5; for (i = 0; i < rates_max; i++) { diff --git a/drivers/net/wireless/marvell/libertas/if_sdio.c b/drivers/net/wireless/marvell/libertas/if_sdio.c index 39bf85d0ade0..c7f8a29d2606 100644 --- a/drivers/net/wireless/marvell/libertas/if_sdio.c +++ b/drivers/net/wireless/marvell/libertas/if_sdio.c @@ -1183,6 +1183,10 @@ static int if_sdio_probe(struct sdio_func *func, spin_lock_init(&card->lock); card->workqueue = alloc_workqueue("libertas_sdio", WQ_MEM_RECLAIM, 0); + if (unlikely(!card->workqueue)) { + ret = -ENOMEM; + goto err_queue; + } INIT_WORK(&card->packet_worker, if_sdio_host_to_card_worker); init_waitqueue_head(&card->pwron_waitq); @@ -1234,6 +1238,7 @@ err_activate_card: lbs_remove_card(priv); free: destroy_workqueue(card->workqueue); +err_queue: while (card->packets) { packet = card->packets; card->packets = card->packets->next; diff --git a/drivers/net/wireless/marvell/libertas_tf/cmd.c b/drivers/net/wireless/marvell/libertas_tf/cmd.c index 909ac3685010..2b193f1257a5 100644 --- a/drivers/net/wireless/marvell/libertas_tf/cmd.c +++ b/drivers/net/wireless/marvell/libertas_tf/cmd.c @@ -69,7 +69,7 @@ static void lbtf_geo_init(struct lbtf_private *priv) break; } - for (ch = priv->range.start; ch < priv->range.end; ch++) + for (ch = range->start; ch < range->end; ch++) priv->channels[CHAN_TO_IDX(ch)].flags = 0; } diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index dde47c548818..5e8e34a08b2d 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -362,11 +362,20 @@ mwifiex_cfg80211_set_tx_power(struct wiphy *wiphy, struct mwifiex_power_cfg power_cfg; int dbm = MBM_TO_DBM(mbm); - if (type == NL80211_TX_POWER_FIXED) { + switch (type) { + case NL80211_TX_POWER_FIXED: power_cfg.is_power_auto = 0; + power_cfg.is_power_fixed = 1; power_cfg.power_level = dbm; - } else { + break; + case NL80211_TX_POWER_LIMITED: + power_cfg.is_power_auto = 0; + power_cfg.is_power_fixed = 0; + power_cfg.power_level = dbm; + break; + case NL80211_TX_POWER_AUTOMATIC: power_cfg.is_power_auto = 1; + break; } priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c index 6f4239be609d..49ca84ef1a99 100644 --- a/drivers/net/wireless/marvell/mwifiex/debugfs.c +++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c @@ -296,15 +296,13 @@ mwifiex_histogram_read(struct file *file, char __user *ubuf, "total samples = %d\n", atomic_read(&phist_data->num_samples)); - p += sprintf(p, "rx rates (in Mbps): 0=1M 1=2M"); - p += sprintf(p, "2=5.5M 3=11M 4=6M 5=9M 6=12M\n"); - p += sprintf(p, "7=18M 8=24M 9=36M 10=48M 11=54M"); - p += sprintf(p, "12-27=MCS0-15(BW20) 28-43=MCS0-15(BW40)\n"); + p += sprintf(p, + "rx rates (in Mbps): 0=1M 1=2M 2=5.5M 3=11M 4=6M 5=9M 6=12M\n" + "7=18M 8=24M 9=36M 10=48M 11=54M 12-27=MCS0-15(BW20) 28-43=MCS0-15(BW40)\n"); if (ISSUPP_11ACENABLED(priv->adapter->fw_cap_info)) { - p += sprintf(p, "44-53=MCS0-9(VHT:BW20)"); - p += sprintf(p, "54-63=MCS0-9(VHT:BW40)"); - p += sprintf(p, "64-73=MCS0-9(VHT:BW80)\n\n"); + p += sprintf(p, + "44-53=MCS0-9(VHT:BW20) 54-63=MCS0-9(VHT:BW40) 64-73=MCS0-9(VHT:BW80)\n\n"); } else { p += sprintf(p, "\n"); } @@ -333,7 +331,7 @@ mwifiex_histogram_read(struct file *file, char __user *ubuf, for (i = 0; i < MWIFIEX_MAX_NOISE_FLR; i++) { value = atomic_read(&phist_data->noise_flr[i]); if (value) - p += sprintf(p, "noise_flr[-%02ddBm] = %d\n", + p += sprintf(p, "noise_flr[%02ddBm] = %d\n", (int)(i-128), value); } for (i = 0; i < MWIFIEX_MAX_SIG_STRENGTH; i++) { diff --git a/drivers/net/wireless/marvell/mwifiex/ioctl.h b/drivers/net/wireless/marvell/mwifiex/ioctl.h index 48e154e1865d..0dd592ea6e83 100644 --- a/drivers/net/wireless/marvell/mwifiex/ioctl.h +++ b/drivers/net/wireless/marvell/mwifiex/ioctl.h @@ -267,6 +267,7 @@ struct mwifiex_ds_encrypt_key { struct mwifiex_power_cfg { u32 is_power_auto; + u32 is_power_fixed; u32 power_level; }; diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 9511f5fe62f4..8ee9609ef974 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -677,8 +677,11 @@ static int mwifiex_pcie_init_evt_ring(struct mwifiex_adapter *adapter) skb_put(skb, MAX_EVENT_SIZE); if (mwifiex_map_pci_memory(adapter, skb, MAX_EVENT_SIZE, - PCI_DMA_FROMDEVICE)) + PCI_DMA_FROMDEVICE)) { + kfree_skb(skb); + kfree(card->evtbd_ring_vbase); return -1; + } buf_pa = MWIFIEX_SKB_DMA_ADDR(skb); @@ -1019,8 +1022,10 @@ static int mwifiex_pcie_alloc_cmdrsp_buf(struct mwifiex_adapter *adapter) } skb_put(skb, MWIFIEX_UPLD_SIZE); if (mwifiex_map_pci_memory(adapter, skb, MWIFIEX_UPLD_SIZE, - PCI_DMA_FROMDEVICE)) + PCI_DMA_FROMDEVICE)) { + kfree_skb(skb); return -1; + } card->cmdrsp_buf = skb; diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index 67c334221077..0071c40afe81 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -1901,15 +1901,17 @@ mwifiex_parse_single_response_buf(struct mwifiex_private *priv, u8 **bss_info, ETH_ALEN)) mwifiex_update_curr_bss_params(priv, bss); - cfg80211_put_bss(priv->wdev.wiphy, bss); - } - if ((chan->flags & IEEE80211_CHAN_RADAR) || - (chan->flags & IEEE80211_CHAN_NO_IR)) { - mwifiex_dbg(adapter, INFO, - "radar or passive channel %d\n", - channel); - mwifiex_save_hidden_ssid_channels(priv, bss); + if ((chan->flags & IEEE80211_CHAN_RADAR) || + (chan->flags & IEEE80211_CHAN_NO_IR)) { + mwifiex_dbg(adapter, INFO, + "radar or passive channel %d\n", + channel); + mwifiex_save_hidden_ssid_channels(priv, + bss); + } + + cfg80211_put_bss(priv->wdev.wiphy, bss); } } } else { @@ -2888,6 +2890,13 @@ mwifiex_cmd_append_vsie_tlv(struct mwifiex_private *priv, vs_param_set->header.len = cpu_to_le16((((u16) priv->vs_ie[id].ie[1]) & 0x00FF) + 2); + if (le16_to_cpu(vs_param_set->header.len) > + MWIFIEX_MAX_VSIE_LEN) { + mwifiex_dbg(priv->adapter, ERROR, + "Invalid param length!\n"); + break; + } + memcpy(vs_param_set->ie, priv->vs_ie[id].ie, le16_to_cpu(vs_param_set->header.len)); *buffer += le16_to_cpu(vs_param_set->header.len) + diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c index 82828a207963..652acafca136 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c @@ -271,6 +271,15 @@ static int mwifiex_process_country_ie(struct mwifiex_private *priv, "11D: skip setting domain info in FW\n"); return 0; } + + if (country_ie_len > + (IEEE80211_COUNTRY_STRING_LEN + MWIFIEX_MAX_TRIPLET_802_11D)) { + rcu_read_unlock(); + mwifiex_dbg(priv->adapter, ERROR, + "11D: country_ie_len overflow!, deauth AP\n"); + return -EINVAL; + } + memcpy(priv->adapter->country_code, &country_ie[2], 2); domain_info->country_code[0] = country_ie[2]; @@ -314,8 +323,9 @@ int mwifiex_bss_start(struct mwifiex_private *priv, struct cfg80211_bss *bss, priv->scan_block = false; if (bss) { - if (adapter->region_code == 0x00) - mwifiex_process_country_ie(priv, bss); + if (adapter->region_code == 0x00 && + mwifiex_process_country_ie(priv, bss)) + return -EINVAL; /* Allocate and fill new bss descriptor */ bss_desc = kzalloc(sizeof(struct mwifiex_bssdescriptor), @@ -728,6 +738,9 @@ int mwifiex_set_tx_power(struct mwifiex_private *priv, txp_cfg = (struct host_cmd_ds_txpwr_cfg *) buf; txp_cfg->action = cpu_to_le16(HostCmd_ACT_GEN_SET); if (!power_cfg->is_power_auto) { + u16 dbm_min = power_cfg->is_power_fixed ? + dbm : priv->min_tx_power_level; + txp_cfg->mode = cpu_to_le32(1); pg_tlv = (struct mwifiex_types_power_group *) (buf + sizeof(struct host_cmd_ds_txpwr_cfg)); @@ -742,7 +755,7 @@ int mwifiex_set_tx_power(struct mwifiex_private *priv, pg->last_rate_code = 0x03; pg->modulation_class = MOD_CLASS_HR_DSSS; pg->power_step = 0; - pg->power_min = (s8) dbm; + pg->power_min = (s8) dbm_min; pg->power_max = (s8) dbm; pg++; /* Power group for modulation class OFDM */ @@ -750,7 +763,7 @@ int mwifiex_set_tx_power(struct mwifiex_private *priv, pg->last_rate_code = 0x07; pg->modulation_class = MOD_CLASS_OFDM; pg->power_step = 0; - pg->power_min = (s8) dbm; + pg->power_min = (s8) dbm_min; pg->power_max = (s8) dbm; pg++; /* Power group for modulation class HTBW20 */ @@ -758,7 +771,7 @@ int mwifiex_set_tx_power(struct mwifiex_private *priv, pg->last_rate_code = 0x20; pg->modulation_class = MOD_CLASS_HT; pg->power_step = 0; - pg->power_min = (s8) dbm; + pg->power_min = (s8) dbm_min; pg->power_max = (s8) dbm; pg->ht_bandwidth = HT_BW_20; pg++; @@ -767,7 +780,7 @@ int mwifiex_set_tx_power(struct mwifiex_private *priv, pg->last_rate_code = 0x20; pg->modulation_class = MOD_CLASS_HT; pg->power_step = 0; - pg->power_min = (s8) dbm; + pg->power_min = (s8) dbm_min; pg->power_max = (s8) dbm; pg->ht_bandwidth = HT_BW_40; } diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c index e76af2866a19..80d20fb6f348 100644 --- a/drivers/net/wireless/marvell/mwifiex/tdls.c +++ b/drivers/net/wireless/marvell/mwifiex/tdls.c @@ -897,7 +897,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, u8 *peer, *pos, *end; u8 i, action, basic; u16 cap = 0; - int ie_len = 0; + int ies_len = 0; if (len < (sizeof(struct ethhdr) + 3)) return; @@ -919,7 +919,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, pos = buf + sizeof(struct ethhdr) + 4; /* payload 1+ category 1 + action 1 + dialog 1 */ cap = get_unaligned_le16(pos); - ie_len = len - sizeof(struct ethhdr) - TDLS_REQ_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_REQ_FIX_LEN; pos += 2; break; @@ -929,7 +929,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, /* payload 1+ category 1 + action 1 + dialog 1 + status code 2*/ pos = buf + sizeof(struct ethhdr) + 6; cap = get_unaligned_le16(pos); - ie_len = len - sizeof(struct ethhdr) - TDLS_RESP_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_RESP_FIX_LEN; pos += 2; break; @@ -937,7 +937,7 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, if (len < (sizeof(struct ethhdr) + TDLS_CONFIRM_FIX_LEN)) return; pos = buf + sizeof(struct ethhdr) + TDLS_CONFIRM_FIX_LEN; - ie_len = len - sizeof(struct ethhdr) - TDLS_CONFIRM_FIX_LEN; + ies_len = len - sizeof(struct ethhdr) - TDLS_CONFIRM_FIX_LEN; break; default: mwifiex_dbg(priv->adapter, ERROR, "Unknown TDLS frame type.\n"); @@ -950,65 +950,104 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv, sta_ptr->tdls_cap.capab = cpu_to_le16(cap); - for (end = pos + ie_len; pos + 1 < end; pos += 2 + pos[1]) { - if (pos + 2 + pos[1] > end) + for (end = pos + ies_len; pos + 1 < end; pos += 2 + pos[1]) { + u8 ie_len = pos[1]; + + if (pos + 2 + ie_len > end) break; switch (*pos) { case WLAN_EID_SUPP_RATES: - sta_ptr->tdls_cap.rates_len = pos[1]; - for (i = 0; i < pos[1]; i++) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates)) + return; + sta_ptr->tdls_cap.rates_len = ie_len; + for (i = 0; i < ie_len; i++) sta_ptr->tdls_cap.rates[i] = pos[i + 2]; break; case WLAN_EID_EXT_SUPP_RATES: + if (ie_len > sizeof(sta_ptr->tdls_cap.rates)) + return; basic = sta_ptr->tdls_cap.rates_len; - for (i = 0; i < pos[1]; i++) + if (ie_len > sizeof(sta_ptr->tdls_cap.rates) - basic) + return; + for (i = 0; i < ie_len; i++) sta_ptr->tdls_cap.rates[basic + i] = pos[i + 2]; - sta_ptr->tdls_cap.rates_len += pos[1]; + sta_ptr->tdls_cap.rates_len += ie_len; break; case WLAN_EID_HT_CAPABILITY: - memcpy((u8 *)&sta_ptr->tdls_cap.ht_capb, pos, + if (ie_len != sizeof(struct ieee80211_ht_cap)) + return; + /* copy the ie's value into ht_capb*/ + memcpy((u8 *)&sta_ptr->tdls_cap.ht_capb, pos + 2, sizeof(struct ieee80211_ht_cap)); sta_ptr->is_11n_enabled = 1; break; case WLAN_EID_HT_OPERATION: - memcpy(&sta_ptr->tdls_cap.ht_oper, pos, + if (ie_len != sizeof(struct ieee80211_ht_operation)) + return; + /* copy the ie's value into ht_oper*/ + memcpy(&sta_ptr->tdls_cap.ht_oper, pos + 2, sizeof(struct ieee80211_ht_operation)); break; case WLAN_EID_BSS_COEX_2040: + if (ie_len != sizeof(pos[2])) + return; sta_ptr->tdls_cap.coex_2040 = pos[2]; break; case WLAN_EID_EXT_CAPABILITY: + if (ie_len < sizeof(struct ieee_types_header)) + return; + if (ie_len > 8) + return; memcpy((u8 *)&sta_ptr->tdls_cap.extcap, pos, sizeof(struct ieee_types_header) + - min_t(u8, pos[1], 8)); + min_t(u8, ie_len, 8)); break; case WLAN_EID_RSN: + if (ie_len < sizeof(struct ieee_types_header)) + return; + if (ie_len > IEEE_MAX_IE_SIZE - + sizeof(struct ieee_types_header)) + return; memcpy((u8 *)&sta_ptr->tdls_cap.rsn_ie, pos, sizeof(struct ieee_types_header) + - min_t(u8, pos[1], IEEE_MAX_IE_SIZE - + min_t(u8, ie_len, IEEE_MAX_IE_SIZE - sizeof(struct ieee_types_header))); break; case WLAN_EID_QOS_CAPA: + if (ie_len != sizeof(pos[2])) + return; sta_ptr->tdls_cap.qos_info = pos[2]; break; case WLAN_EID_VHT_OPERATION: - if (priv->adapter->is_hw_11ac_capable) - memcpy(&sta_ptr->tdls_cap.vhtoper, pos, + if (priv->adapter->is_hw_11ac_capable) { + if (ie_len != + sizeof(struct ieee80211_vht_operation)) + return; + /* copy the ie's value into vhtoper*/ + memcpy(&sta_ptr->tdls_cap.vhtoper, pos + 2, sizeof(struct ieee80211_vht_operation)); + } break; case WLAN_EID_VHT_CAPABILITY: if (priv->adapter->is_hw_11ac_capable) { - memcpy((u8 *)&sta_ptr->tdls_cap.vhtcap, pos, + if (ie_len != sizeof(struct ieee80211_vht_cap)) + return; + /* copy the ie's value into vhtcap*/ + memcpy((u8 *)&sta_ptr->tdls_cap.vhtcap, pos + 2, sizeof(struct ieee80211_vht_cap)); sta_ptr->is_11ac_enabled = 1; } break; case WLAN_EID_AID: - if (priv->adapter->is_hw_11ac_capable) + if (priv->adapter->is_hw_11ac_capable) { + if (ie_len != sizeof(u16)) + return; sta_ptr->tdls_cap.aid = get_unaligned_le16((pos + 2)); + } + break; default: break; } diff --git a/drivers/net/wireless/marvell/mwifiex/wmm.c b/drivers/net/wireless/marvell/mwifiex/wmm.c index 7fba4d940131..a13b05ec8fc0 100644 --- a/drivers/net/wireless/marvell/mwifiex/wmm.c +++ b/drivers/net/wireless/marvell/mwifiex/wmm.c @@ -976,6 +976,10 @@ int mwifiex_ret_wmm_get_status(struct mwifiex_private *priv, "WMM Parameter Set Count: %d\n", wmm_param_ie->qos_info_bitmap & mask); + if (wmm_param_ie->vend_hdr.len + 2 > + sizeof(struct ieee_types_wmm_parameter)) + break; + memcpy((u8 *) &priv->curr_bss_params.bss_descriptor. wmm_ie, wmm_param_ie, wmm_param_ie->vend_hdr.len + 2); diff --git a/drivers/net/wireless/mediatek/mt7601u/phy.c b/drivers/net/wireless/mediatek/mt7601u/phy.c index ca09a5d4305e..71a47459bf8a 100644 --- a/drivers/net/wireless/mediatek/mt7601u/phy.c +++ b/drivers/net/wireless/mediatek/mt7601u/phy.c @@ -221,7 +221,7 @@ int mt7601u_wait_bbp_ready(struct mt7601u_dev *dev) do { val = mt7601u_bbp_rr(dev, MT_BBP_REG_VERSION); - if (val && ~val) + if (val && val != 0xff) break; } while (--i); diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index a450bc6bc774..d02f68792ce4 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -509,9 +509,16 @@ static int qtnf_del_key(struct wiphy *wiphy, struct net_device *dev, int ret; ret = qtnf_cmd_send_del_key(vif, key_index, pairwise, mac_addr); - if (ret) - pr_err("VIF%u.%u: failed to delete key: idx=%u pw=%u\n", - vif->mac->macid, vif->vifid, key_index, pairwise); + if (ret) { + if (ret == -ENOENT) { + pr_debug("VIF%u.%u: key index %d out of bounds\n", + vif->mac->macid, vif->vifid, key_index); + } else { + pr_err("VIF%u.%u: failed to delete key: idx=%u pw=%u\n", + vif->mac->macid, vif->vifid, + key_index, pairwise); + } + } return ret; } diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index 4206886b110c..ed087bbc6f63 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -485,6 +485,9 @@ qtnf_sta_info_parse_rate(struct rate_info *rate_dst, rate_dst->flags |= RATE_INFO_FLAGS_MCS; else if (rate_src->flags & QLINK_STA_INFO_RATE_FLAG_VHT_MCS) rate_dst->flags |= RATE_INFO_FLAGS_VHT_MCS; + + if (rate_src->flags & QLINK_STA_INFO_RATE_FLAG_SHORT_GI) + rate_dst->flags |= RATE_INFO_FLAGS_SHORT_GI; } static void diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c index 9a1d15b3ce45..518caaaf8a98 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/dev.c @@ -444,12 +444,13 @@ static int rtl8187_init_urbs(struct ieee80211_hw *dev) skb_queue_tail(&priv->rx_queue, skb); usb_anchor_urb(entry, &priv->anchored); ret = usb_submit_urb(entry, GFP_KERNEL); - usb_put_urb(entry); if (ret) { skb_unlink(skb, &priv->rx_queue); usb_unanchor_urb(entry); + usb_put_urb(entry); goto err; } + usb_put_urb(entry); } return ret; diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/leds.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/leds.c index c2d5b495c179..c089540116fa 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/leds.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/leds.c @@ -146,7 +146,7 @@ static int rtl8187_register_led(struct ieee80211_hw *dev, led->dev = dev; led->ledpin = ledpin; led->is_radio = is_radio; - strncpy(led->name, name, sizeof(led->name)); + strlcpy(led->name, name, sizeof(led->name)); led->led_dev.name = led->name; led->led_dev.default_trigger = default_trigger; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index 95e3993d8a33..a895b6fd6f85 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -1349,6 +1349,7 @@ struct rtl8xxxu_fileops { u8 has_s0s1:1; u8 has_tx_report:1; u8 gen2_thermal_meter:1; + u8 needs_full_init:1; u32 adda_1t_init; u32 adda_1t_path_on; u32 adda_2t_path_on_a; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c index c4b86a84a721..27e97df996c7 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c @@ -1673,6 +1673,7 @@ struct rtl8xxxu_fileops rtl8723bu_fops = { .has_s0s1 = 1, .has_tx_report = 1, .gen2_thermal_meter = 1, + .needs_full_init = 1, .adda_1t_init = 0x01c00014, .adda_1t_path_on = 0x01c00014, .adda_2t_path_on_a = 0x01c00014, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 7806a4d2b1fc..63f37fa72e4b 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -3905,6 +3905,9 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) else macpower = true; + if (fops->needs_full_init) + macpower = false; + ret = fops->power_on(priv); if (ret < 0) { dev_warn(dev, "%s: Failed power on\n", __func__); @@ -5450,6 +5453,7 @@ static int rtl8xxxu_submit_int_urb(struct ieee80211_hw *hw) ret = usb_submit_urb(urb, GFP_KERNEL); if (ret) { usb_unanchor_urb(urb); + usb_free_urb(urb); goto error; } @@ -5691,6 +5695,7 @@ static int rtl8xxxu_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, break; case WLAN_CIPHER_SUITE_TKIP: key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC; + break; default: return -EOPNOTSUPP; } @@ -5916,7 +5921,7 @@ static int rtl8xxxu_parse_usb(struct rtl8xxxu_priv *priv, u8 dir, xtype, num; int ret = 0; - host_interface = &interface->altsetting[0]; + host_interface = interface->cur_altsetting; interface_desc = &host_interface->desc; endpoints = interface_desc->bNumEndpoints; diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 457a0f725c8a..ab74f3155854 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -1091,13 +1091,15 @@ done: return ret; } -static void _rtl_pci_irq_tasklet(struct ieee80211_hw *hw) +static void _rtl_pci_irq_tasklet(unsigned long data) { + struct ieee80211_hw *hw = (struct ieee80211_hw *)data; _rtl_pci_tx_chk_waitq(hw); } -static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw) +static void _rtl_pci_prepare_bcn_tasklet(unsigned long data) { + struct ieee80211_hw *hw = (struct ieee80211_hw *)data; struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); struct rtl_mac *mac = rtl_mac(rtl_priv(hw)); @@ -1223,10 +1225,10 @@ static void _rtl_pci_init_struct(struct ieee80211_hw *hw, /*task */ tasklet_init(&rtlpriv->works.irq_tasklet, - (void (*)(unsigned long))_rtl_pci_irq_tasklet, + _rtl_pci_irq_tasklet, (unsigned long)hw); tasklet_init(&rtlpriv->works.irq_prepare_bcn_tasklet, - (void (*)(unsigned long))_rtl_pci_prepare_bcn_tasklet, + _rtl_pci_prepare_bcn_tasklet, (unsigned long)hw); INIT_WORK(&rtlpriv->works.lps_change_work, rtl_lps_change_work_callback); diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index f6d00613c53d..e1297809535f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -774,6 +774,9 @@ static void rtl_p2p_noa_ie(struct ieee80211_hw *hw, void *data, return; } else { noa_num = (noa_len - 2) / 13; + if (noa_num > P2P_MAX_NOA_NUM) + noa_num = P2P_MAX_NOA_NUM; + } noa_index = ie[3]; if (rtlpriv->psc.p2p_ps_info.p2p_ps_mode == @@ -868,6 +871,9 @@ static void rtl_p2p_action_ie(struct ieee80211_hw *hw, void *data, return; } else { noa_num = (noa_len - 2) / 13; + if (noa_num > P2P_MAX_NOA_NUM) + noa_num = P2P_MAX_NOA_NUM; + } noa_index = ie[3]; if (rtlpriv->psc.p2p_ps_info.p2p_ps_mode == diff --git a/drivers/net/wireless/realtek/rtlwifi/regd.c b/drivers/net/wireless/realtek/rtlwifi/regd.c index 1bf3eb25c1da..72ca370331fb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/regd.c +++ b/drivers/net/wireless/realtek/rtlwifi/regd.c @@ -427,7 +427,7 @@ int rtl_regd_init(struct ieee80211_hw *hw, struct wiphy *wiphy = hw->wiphy; struct country_code_to_enum_rd *country = NULL; - if (wiphy == NULL || &rtlpriv->regd == NULL) + if (!wiphy) return -EINVAL; /* init country_code from efuse channel plan */ diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c index 530e80f0ef0b..1ee7f796113b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c @@ -1556,6 +1556,8 @@ static bool usb_cmd_send_packet(struct ieee80211_hw *hw, struct sk_buff *skb) * This is maybe necessary: * rtlpriv->cfg->ops->fill_tx_cmddesc(hw, buffer, 1, 1, skb); */ + dev_kfree_skb(skb); + return true; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c index f4129cf96e7c..bad70a4206fb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c @@ -173,7 +173,7 @@ static int _rtl92d_fw_init(struct ieee80211_hw *hw) rtl_read_byte(rtlpriv, FW_MAC1_READY)); } RT_TRACE(rtlpriv, COMP_FW, DBG_DMESG, - "Polling FW ready fail!! REG_MCUFWDL:0x%08ul\n", + "Polling FW ready fail!! REG_MCUFWDL:0x%08x\n", rtl_read_dword(rtlpriv, REG_MCUFWDL)); return -1; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c index cf28d25c551f..80002292cd27 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c @@ -1198,6 +1198,7 @@ void rtl92de_enable_interrupt(struct ieee80211_hw *hw) rtl_write_dword(rtlpriv, REG_HIMR, rtlpci->irq_mask[0] & 0xFFFFFFFF); rtl_write_dword(rtlpriv, REG_HIMRE, rtlpci->irq_mask[1] & 0xFFFFFFFF); + rtlpci->irq_enabled = true; } void rtl92de_disable_interrupt(struct ieee80211_hw *hw) @@ -1207,7 +1208,7 @@ void rtl92de_disable_interrupt(struct ieee80211_hw *hw) rtl_write_dword(rtlpriv, REG_HIMR, IMR8190_DISABLED); rtl_write_dword(rtlpriv, REG_HIMRE, IMR8190_DISABLED); - synchronize_irq(rtlpci->pdev->irq); + rtlpci->irq_enabled = false; } static void _rtl92de_poweroff_adapter(struct ieee80211_hw *hw) @@ -1378,7 +1379,7 @@ void rtl92de_set_beacon_related_registers(struct ieee80211_hw *hw) bcn_interval = mac->beacon_interval; atim_window = 2; - /*rtl92de_disable_interrupt(hw); */ + rtl92de_disable_interrupt(hw); rtl_write_word(rtlpriv, REG_ATIMWND, atim_window); rtl_write_word(rtlpriv, REG_BCN_INTERVAL, bcn_interval); rtl_write_word(rtlpriv, REG_BCNTCFG, 0x660f); @@ -1398,9 +1399,9 @@ void rtl92de_set_beacon_interval(struct ieee80211_hw *hw) RT_TRACE(rtlpriv, COMP_BEACON, DBG_DMESG, "beacon_interval:%d\n", bcn_interval); - /* rtl92de_disable_interrupt(hw); */ + rtl92de_disable_interrupt(hw); rtl_write_word(rtlpriv, REG_BCN_INTERVAL, bcn_interval); - /* rtl92de_enable_interrupt(hw); */ + rtl92de_enable_interrupt(hw); } void rtl92de_update_interrupt_mask(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c index a6549f5f6c59..3ec75032b9be 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c @@ -237,6 +237,7 @@ static struct rtl_hal_ops rtl8192de_hal_ops = { .led_control = rtl92de_led_control, .set_desc = rtl92de_set_desc, .get_desc = rtl92de_get_desc, + .is_tx_desc_closed = rtl92de_is_tx_desc_closed, .tx_polling = rtl92de_tx_polling, .enable_hw_sec = rtl92de_enable_hw_security_config, .set_key = rtl92de_set_key, diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c index 86019f654428..d1e56e09cfe8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c @@ -839,13 +839,15 @@ u32 rtl92de_get_desc(u8 *p_desc, bool istx, u8 desc_name) break; } } else { - struct rx_desc_92c *pdesc = (struct rx_desc_92c *)p_desc; switch (desc_name) { case HW_DESC_OWN: - ret = GET_RX_DESC_OWN(pdesc); + ret = GET_RX_DESC_OWN(p_desc); break; case HW_DESC_RXPKT_LEN: - ret = GET_RX_DESC_PKT_LEN(pdesc); + ret = GET_RX_DESC_PKT_LEN(p_desc); + break; + case HW_DESC_RXBUFF_ADDR: + ret = GET_RX_DESC_BUFF_ADDR(p_desc); break; default: WARN_ONCE(true, "rtl8192de: ERR rxdesc :%d not processed\n", @@ -856,6 +858,23 @@ u32 rtl92de_get_desc(u8 *p_desc, bool istx, u8 desc_name) return ret; } +bool rtl92de_is_tx_desc_closed(struct ieee80211_hw *hw, + u8 hw_queue, u16 index) +{ + struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw)); + struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue]; + u8 *entry = (u8 *)(&ring->desc[ring->idx]); + u8 own = (u8)rtl92de_get_desc(entry, true, HW_DESC_OWN); + + /* a beacon packet will only use the first + * descriptor by defaut, and the own bit may not + * be cleared by the hardware + */ + if (own) + return false; + return true; +} + void rtl92de_tx_polling(struct ieee80211_hw *hw, u8 hw_queue) { struct rtl_priv *rtlpriv = rtl_priv(hw); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h index 9bb6cc648590..6cf23c278953 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h @@ -736,6 +736,8 @@ bool rtl92de_rx_query_desc(struct ieee80211_hw *hw, void rtl92de_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx, u8 desc_name, u8 *val); u32 rtl92de_get_desc(u8 *pdesc, bool istx, u8 desc_name); +bool rtl92de_is_tx_desc_closed(struct ieee80211_hw *hw, + u8 hw_queue, u16 index); void rtl92de_tx_polling(struct ieee80211_hw *hw, u8 hw_queue); void rtl92de_tx_fill_cmddesc(struct ieee80211_hw *hw, u8 *pdesc, bool b_firstseg, bool b_lastseg, diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 2401c8bdb211..93eda23f0123 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -1068,8 +1068,10 @@ int rtl_usb_probe(struct usb_interface *intf, rtlpriv->hw = hw; rtlpriv->usb_data = kzalloc(RTL_USB_MAX_RX_COUNT * sizeof(u32), GFP_KERNEL); - if (!rtlpriv->usb_data) + if (!rtlpriv->usb_data) { + ieee80211_free_hw(hw); return -ENOMEM; + } /* this spin lock must be initialized early */ spin_lock_init(&rtlpriv->locks.usb_lock); @@ -1130,6 +1132,7 @@ error_out2: _rtl_usb_io_handler_release(hw); usb_put_dev(udev); complete(&rtlpriv->firmware_loading_complete); + kfree(rtlpriv->usb_data); return -ENODEV; } EXPORT_SYMBOL(rtl_usb_probe); diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index 120b0ff545c1..d205947c4c55 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -541,6 +541,7 @@ static int bl_cmd(struct rsi_hw *adapter, u8 cmd, u8 exp_resp, char *str) bl_start_cmd_timer(adapter, timeout); status = bl_write_cmd(adapter, cmd, exp_resp, ®out_val); if (status < 0) { + bl_stop_cmd_timer(adapter); rsi_dbg(ERR_ZONE, "%s: Command %s (%0x) writing failed..\n", __func__, str, cmd); @@ -656,10 +657,9 @@ static int ping_pong_write(struct rsi_hw *adapter, u8 cmd, u8 *addr, u32 size) } status = bl_cmd(adapter, cmd_req, cmd_resp, str); - if (status) { - bl_stop_cmd_timer(adapter); + if (status) return status; - } + return 0; } @@ -749,10 +749,9 @@ static int auto_fw_upgrade(struct rsi_hw *adapter, u8 *flash_content, status = bl_cmd(adapter, EOF_REACHED, FW_LOADING_SUCCESSFUL, "EOF_REACHED"); - if (status) { - bl_stop_cmd_timer(adapter); + if (status) return status; - } + rsi_dbg(INFO_ZONE, "FW loading is done and FW is running..\n"); return 0; } @@ -773,6 +772,7 @@ static int rsi_load_firmware(struct rsi_hw *adapter) status = hif_ops->master_reg_read(adapter, SWBL_REGOUT, ®out_val, 2); if (status < 0) { + bl_stop_cmd_timer(adapter); rsi_dbg(ERR_ZONE, "%s: REGOUT read failed\n", __func__); return status; diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index fa12c05d9e23..233b2239311d 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -218,6 +218,7 @@ void rsi_mac80211_detach(struct rsi_hw *adapter) ieee80211_stop_queues(hw); ieee80211_unregister_hw(hw); ieee80211_free_hw(hw); + adapter->hw = NULL; } for (band = 0; band < NUM_NL80211_BANDS; band++) { diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index f7b550f900c4..234e41e1cb57 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -1576,6 +1576,7 @@ static int rsi_send_beacon(struct rsi_common *common) skb_pull(skb, (64 - dword_align_bytes)); if (rsi_prepare_beacon(common, skb)) { rsi_dbg(ERR_ZONE, "Failed to prepare beacon\n"); + dev_kfree_skb(skb); return -EINVAL; } skb_queue_tail(&common->tx_queue[MGMT_BEACON_Q], skb); diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index f90c10b3c921..786a330bc470 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -105,7 +105,7 @@ static int rsi_find_bulk_in_and_out_endpoints(struct usb_interface *interface, __le16 buffer_size; int ii, bep_found = 0; - iface_desc = &(interface->altsetting[0]); + iface_desc = interface->cur_altsetting; for (ii = 0; ii < iface_desc->desc.bNumEndpoints; ++ii) { endpoint = &(iface_desc->endpoint[ii].desc); diff --git a/drivers/net/wireless/st/cw1200/fwio.c b/drivers/net/wireless/st/cw1200/fwio.c index 30e7646d04af..16be7fa82a23 100644 --- a/drivers/net/wireless/st/cw1200/fwio.c +++ b/drivers/net/wireless/st/cw1200/fwio.c @@ -323,12 +323,12 @@ int cw1200_load_firmware(struct cw1200_common *priv) goto out; } - priv->hw_type = cw1200_get_hw_type(val32, &major_revision); - if (priv->hw_type < 0) { + ret = cw1200_get_hw_type(val32, &major_revision); + if (ret < 0) { pr_err("Can't deduce hardware type.\n"); - ret = -ENOTSUPP; goto out; } + priv->hw_type = ret; /* Set DPLL Reg value, and read back to confirm writes work */ ret = cw1200_reg_write_32(priv, ST90TDS_TSET_GEN_R_W_REG_ID, diff --git a/drivers/net/wireless/ti/wlcore/vendor_cmd.c b/drivers/net/wireless/ti/wlcore/vendor_cmd.c index 5c0bcb1fe1a1..e75c3cee0252 100644 --- a/drivers/net/wireless/ti/wlcore/vendor_cmd.c +++ b/drivers/net/wireless/ti/wlcore/vendor_cmd.c @@ -66,7 +66,7 @@ wlcore_vendor_cmd_smart_config_start(struct wiphy *wiphy, out: mutex_unlock(&wl->mutex); - return 0; + return ret; } static int diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index c30bf118c67d..1e396eb26ccf 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -1272,7 +1272,7 @@ static void print_id(struct usb_device *udev) static int eject_installer(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); - struct usb_host_interface *iface_desc = &intf->altsetting[0]; + struct usb_host_interface *iface_desc = intf->cur_altsetting; struct usb_endpoint_descriptor *endpoint; unsigned char *cmd; u8 bulk_out_ep; diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index d465071656b5..b5fa910b47b7 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -172,7 +172,8 @@ static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb, return vif->hash.mapping[skb_get_hash_raw(skb) % size]; } -static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) +static netdev_tx_t +xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct xenvif *vif = netdev_priv(dev); struct xenvif_queue *queue = NULL; @@ -718,7 +719,6 @@ err_unmap: xenvif_unmap_frontend_data_rings(queue); netif_napi_del(&queue->napi); err: - module_put(THIS_MODULE); return err; } diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c index 08a4f82a2965..b86c8447df89 100644 --- a/drivers/nfc/fdp/i2c.c +++ b/drivers/nfc/fdp/i2c.c @@ -277,7 +277,7 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev, *fw_vsc_cfg, len); if (r) { - devm_kfree(dev, fw_vsc_cfg); + devm_kfree(dev, *fw_vsc_cfg); goto vsc_read_err; } } else { diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index 198585bbc771..d9492cffd00e 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -236,8 +236,10 @@ static irqreturn_t nxp_nci_i2c_irq_thread_fn(int irq, void *phy_id) if (r == -EREMOTEIO) { phy->hard_fault = r; - skb = NULL; - } else if (r < 0) { + if (info->mode == NXP_NCI_MODE_FW) + nxp_nci_fw_recv_frame(phy->ndev, NULL); + } + if (r < 0) { nfc_err(&client->dev, "Read failed with error %d\n", r); goto exit_irq_handled; } diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c index 5d823e965883..a2c9b3f3bc23 100644 --- a/drivers/nfc/pn533/usb.c +++ b/drivers/nfc/pn533/usb.c @@ -403,7 +403,7 @@ static int pn533_acr122_poweron_rdr(struct pn533_usb_phy *phy) cmd, sizeof(cmd), false); rc = usb_bulk_msg(phy->udev, phy->out_urb->pipe, buffer, sizeof(cmd), - &transferred, 0); + &transferred, 5000); kfree(buffer); if (rc || (transferred != sizeof(cmd))) { nfc_err(&phy->udev->dev, @@ -559,18 +559,25 @@ static int pn533_usb_probe(struct usb_interface *interface, rc = pn533_finalize_setup(priv); if (rc) - goto error; + goto err_deregister; usb_set_intfdata(interface, phy); return 0; +err_deregister: + pn533_unregister_device(phy->priv); error: + usb_kill_urb(phy->in_urb); + usb_kill_urb(phy->out_urb); + usb_kill_urb(phy->ack_urb); + usb_free_urb(phy->in_urb); usb_free_urb(phy->out_urb); usb_free_urb(phy->ack_urb); usb_put_dev(phy->udev); kfree(in_buf); + kfree(phy->ack_buffer); return rc; } diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index 4b14740edb67..8ba5a6d6329e 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -236,6 +236,7 @@ static void pn544_hci_i2c_platform_init(struct pn544_i2c_phy *phy) out: gpiod_set_value_cansleep(phy->gpiod_en, !phy->en_polarity); + usleep_range(10000, 15000); } static void pn544_hci_i2c_enable_mode(struct pn544_i2c_phy *phy, int run_mode) diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c index 70e898e38b16..f30bdf95610f 100644 --- a/drivers/nfc/pn544/pn544.c +++ b/drivers/nfc/pn544/pn544.c @@ -704,7 +704,7 @@ static int pn544_hci_check_presence(struct nfc_hci_dev *hdev, target->nfcid1_len != 10) return -EOPNOTSUPP; - return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, + return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE, PN544_RF_READER_CMD_ACTIVATE_NEXT, target->nfcid1, target->nfcid1_len, NULL); } else if (target->supported_protocols & (NFC_PROTO_JEWEL_MASK | diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index bb43cebda9dc..06bb226c62ef 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -574,7 +574,7 @@ static void port100_tx_update_payload_len(void *_frame, int len) { struct port100_frame *frame = _frame; - frame->datalen = cpu_to_le16(le16_to_cpu(frame->datalen) + len); + le16_add_cpu(&frame->datalen, len); } static bool port100_rx_frame_is_valid(void *_frame) @@ -792,7 +792,7 @@ static int port100_send_frame_async(struct port100 *dev, struct sk_buff *out, rc = port100_submit_urb_for_ack(dev, GFP_KERNEL); if (rc) - usb_unlink_urb(dev->out_urb); + usb_kill_urb(dev->out_urb); exit: mutex_unlock(&dev->out_urb_lock); diff --git a/drivers/nfc/st21nfca/core.c b/drivers/nfc/st21nfca/core.c index e803fdfa9189..f37069b53b20 100644 --- a/drivers/nfc/st21nfca/core.c +++ b/drivers/nfc/st21nfca/core.c @@ -719,6 +719,7 @@ static int st21nfca_hci_complete_target_discovered(struct nfc_hci_dev *hdev, NFC_PROTO_FELICA_MASK; } else { kfree_skb(nfcid_skb); + nfcid_skb = NULL; /* P2P in type A */ r = nfc_hci_get_param(hdev, ST21NFCA_RF_READER_F_GATE, ST21NFCA_RF_READER_F_NFCID1, diff --git a/drivers/ntb/hw/idt/ntb_hw_idt.c b/drivers/ntb/hw/idt/ntb_hw_idt.c index d44d7ef38fe8..b68e2cad74cc 100644 --- a/drivers/ntb/hw/idt/ntb_hw_idt.c +++ b/drivers/ntb/hw/idt/ntb_hw_idt.c @@ -1105,9 +1105,9 @@ static struct idt_mw_cfg *idt_scan_mws(struct idt_ntb_dev *ndev, int port, } /* Allocate memory for memory window descriptors */ - ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt, - sizeof(*ret_mws), GFP_KERNEL); - if (IS_ERR_OR_NULL(ret_mws)) + ret_mws = devm_kcalloc(&ndev->ntb.pdev->dev, *mw_cnt, sizeof(*ret_mws), + GFP_KERNEL); + if (!ret_mws) return ERR_PTR(-ENOMEM); /* Copy the info of detected memory windows */ @@ -2393,7 +2393,7 @@ static struct idt_ntb_dev *idt_create_dev(struct pci_dev *pdev, /* Allocate memory for the IDT PCIe-device descriptor */ ndev = devm_kzalloc(&pdev->dev, sizeof(*ndev), GFP_KERNEL); - if (IS_ERR_OR_NULL(ndev)) { + if (!ndev) { dev_err(&pdev->dev, "Memory allocation failed for descriptor"); return ERR_PTR(-ENOMEM); } diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 2557e2c05b90..58068f1447bb 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -348,7 +348,7 @@ static inline int ndev_db_clear_mask(struct intel_ntb_dev *ndev, u64 db_bits, return 0; } -static inline int ndev_vec_mask(struct intel_ntb_dev *ndev, int db_vector) +static inline u64 ndev_vec_mask(struct intel_ntb_dev *ndev, int db_vector) { u64 shift, mask; diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 5bdd499b5f4f..250471fa0fe6 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -102,4 +102,14 @@ config NVDIMM_DAX Select Y if unsure +config OF_PMEM + tristate "Device-tree support for persistent memory regions" + depends on OF + default LIBNVDIMM + help + Allows regions of persistent memory to be described in the + device-tree. + + Select Y if unsure. + endif diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 447e0e14f3b6..5b4f3d50b83c 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -4,6 +4,8 @@ obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o +obj-$(CONFIG_OF_PMEM) += of_pmem.o +obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o nd_pmem-y := pmem.o diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index b2feda35966b..471498469d0a 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1259,11 +1259,11 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip, ret = btt_data_read(arena, page, off, postmap, cur_len); if (ret) { - int rc; - /* Media error - set the e_flag */ - rc = btt_map_write(arena, premap, postmap, 0, 1, - NVDIMM_IO_ATOMIC); + if (btt_map_write(arena, premap, postmap, 0, 1, NVDIMM_IO_ATOMIC)) + dev_warn_ratelimited(to_dev(arena), + "Error persistently tracking bad blocks at %#x\n", + premap); goto out_rtt; } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 2f1b54fab399..4587c9773560 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -358,6 +358,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, nvdimm_bus->dev.release = nvdimm_bus_release; nvdimm_bus->dev.groups = nd_desc->attr_groups; nvdimm_bus->dev.bus = &nvdimm_bus_type; + nvdimm_bus->dev.of_node = nd_desc->of_node; dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); rc = device_register(&nvdimm_bus->dev); if (rc) { diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index b2fc29b8279b..32f2aaf62f27 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -263,7 +263,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512); sector_t sector = offset >> 9; - int rc = 0; + int rc = 0, ret = 0; if (unlikely(!size)) return 0; @@ -299,7 +299,9 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns, } memcpy_flushcache(nsio->addr + offset, buf, size); - nvdimm_flush(to_nd_region(ndns->dev.parent)); + ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL); + if (ret) + rc = ret; return rc; } diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index e3f060f0b83e..b79a8d0f9b48 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -170,6 +170,7 @@ struct nd_region { struct badblocks bb; struct nd_interleave_set *nd_set; struct nd_percpu_lane __percpu *lane; + int (*flush)(struct nd_region *nd_region, struct bio *bio); struct nd_mapping mapping[0]; }; diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c new file mode 100644 index 000000000000..8645275c08c2 --- /dev/null +++ b/drivers/nvdimm/nd_virtio.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * virtio_pmem.c: Virtio pmem Driver + * + * Discovers persistent memory range information + * from host and provides a virtio based flushing + * interface. + */ +#include "virtio_pmem.h" +#include "nd.h" + + /* The interrupt handler */ +void virtio_pmem_host_ack(struct virtqueue *vq) +{ + struct virtio_pmem *vpmem = vq->vdev->priv; + struct virtio_pmem_request *req_data, *req_buf; + unsigned long flags; + unsigned int len; + + spin_lock_irqsave(&vpmem->pmem_lock, flags); + while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) { + req_data->done = true; + wake_up(&req_data->host_acked); + + if (!list_empty(&vpmem->req_list)) { + req_buf = list_first_entry(&vpmem->req_list, + struct virtio_pmem_request, list); + req_buf->wq_buf_avail = true; + wake_up(&req_buf->wq_buf); + list_del(&req_buf->list); + } + } + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); +} +EXPORT_SYMBOL_GPL(virtio_pmem_host_ack); + + /* The request submission function */ +static int virtio_pmem_flush(struct nd_region *nd_region) +{ + struct virtio_device *vdev = nd_region->provider_data; + struct virtio_pmem *vpmem = vdev->priv; + struct virtio_pmem_request *req_data; + struct scatterlist *sgs[2], sg, ret; + unsigned long flags; + int err, err1; + + might_sleep(); + req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); + if (!req_data) + return -ENOMEM; + + req_data->done = false; + init_waitqueue_head(&req_data->host_acked); + init_waitqueue_head(&req_data->wq_buf); + INIT_LIST_HEAD(&req_data->list); + req_data->req.type = cpu_to_virtio32(vdev, VIRTIO_PMEM_REQ_TYPE_FLUSH); + sg_init_one(&sg, &req_data->req, sizeof(req_data->req)); + sgs[0] = &sg; + sg_init_one(&ret, &req_data->resp.ret, sizeof(req_data->resp)); + sgs[1] = &ret; + + spin_lock_irqsave(&vpmem->pmem_lock, flags); + /* + * If virtqueue_add_sgs returns -ENOSPC then req_vq virtual + * queue does not have free descriptor. We add the request + * to req_list and wait for host_ack to wake us up when free + * slots are available. + */ + while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req_data, + GFP_ATOMIC)) == -ENOSPC) { + + dev_info(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue\n"); + req_data->wq_buf_avail = false; + list_add_tail(&req_data->list, &vpmem->req_list); + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); + + /* A host response results in "host_ack" getting called */ + wait_event(req_data->wq_buf, req_data->wq_buf_avail); + spin_lock_irqsave(&vpmem->pmem_lock, flags); + } + err1 = virtqueue_kick(vpmem->req_vq); + spin_unlock_irqrestore(&vpmem->pmem_lock, flags); + /* + * virtqueue_add_sgs failed with error different than -ENOSPC, we can't + * do anything about that. + */ + if (err || !err1) { + dev_info(&vdev->dev, "failed to send command to virtio pmem device\n"); + err = -EIO; + } else { + /* A host repsonse results in "host_ack" getting called */ + wait_event(req_data->host_acked, req_data->done); + err = virtio32_to_cpu(vdev, req_data->resp.ret); + } + + kfree(req_data); + return err; +}; + +/* The asynchronous flush callback function */ +int async_pmem_flush(struct nd_region *nd_region, struct bio *bio) +{ + /* + * Create child bio for asynchronous flush and chain with + * parent bio. Otherwise directly call nd_region flush. + */ + if (bio && bio->bi_iter.bi_sector != -1) { + struct bio *child = bio_alloc(GFP_ATOMIC, 0); + + if (!child) + return -ENOMEM; + bio_copy_dev(child, bio); + child->bi_opf = REQ_PREFLUSH; + child->bi_iter.bi_sector = -1; + bio_chain(child, bio); + submit_bio(child); + return 0; + } + if (virtio_pmem_flush(nd_region)) + return -EIO; + + return 0; +}; +EXPORT_SYMBOL_GPL(async_pmem_flush); +MODULE_LICENSE("GPL"); diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c new file mode 100644 index 000000000000..6e5cbfd8cb13 --- /dev/null +++ b/drivers/nvdimm/of_pmem.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#define pr_fmt(fmt) "of_pmem: " fmt + +#include +#include +#include +#include +#include +#include + +static const struct attribute_group *region_attr_groups[] = { + &nd_region_attribute_group, + &nd_device_attribute_group, + NULL, +}; + +static const struct attribute_group *bus_attr_groups[] = { + &nvdimm_bus_attribute_group, + NULL, +}; + +struct of_pmem_private { + struct nvdimm_bus_descriptor bus_desc; + struct nvdimm_bus *bus; +}; + +static int of_pmem_region_probe(struct platform_device *pdev) +{ + struct of_pmem_private *priv; + struct device_node *np; + struct nvdimm_bus *bus; + bool is_volatile; + int i; + + np = dev_of_node(&pdev->dev); + if (!np) + return -ENXIO; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->bus_desc.attr_groups = bus_attr_groups; + priv->bus_desc.provider_name = kstrdup(pdev->name, GFP_KERNEL); + priv->bus_desc.module = THIS_MODULE; + priv->bus_desc.of_node = np; + + priv->bus = bus = nvdimm_bus_register(&pdev->dev, &priv->bus_desc); + if (!bus) { + kfree(priv); + return -ENODEV; + } + platform_set_drvdata(pdev, priv); + + is_volatile = !!of_find_property(np, "volatile", NULL); + dev_dbg(&pdev->dev, "Registering %s regions from %pOF\n", + is_volatile ? "volatile" : "non-volatile", np); + + for (i = 0; i < pdev->num_resources; i++) { + struct nd_region_desc ndr_desc; + struct nd_region *region; + + /* + * NB: libnvdimm copies the data from ndr_desc into it's own + * structures so passing a stack pointer is fine. + */ + memset(&ndr_desc, 0, sizeof(ndr_desc)); + ndr_desc.attr_groups = region_attr_groups; + ndr_desc.numa_node = dev_to_node(&pdev->dev); + ndr_desc.res = &pdev->resource[i]; + ndr_desc.of_node = np; + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); + + if (is_volatile) + region = nvdimm_volatile_region_create(bus, &ndr_desc); + else + region = nvdimm_pmem_region_create(bus, &ndr_desc); + + if (!region) + dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n", + ndr_desc.res, np); + else + dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n", + ndr_desc.res, np); + } + + return 0; +} + +static int of_pmem_region_remove(struct platform_device *pdev) +{ + struct of_pmem_private *priv = platform_get_drvdata(pdev); + + nvdimm_bus_unregister(priv->bus); + kfree(priv); + + return 0; +} + +static const struct of_device_id of_pmem_region_match[] = { + { .compatible = "pmem-region" }, + { }, +}; + +static struct platform_driver of_pmem_region_driver = { + .probe = of_pmem_region_probe, + .remove = of_pmem_region_remove, + .driver = { + .name = "of_pmem", + .of_match_table = of_pmem_region_match, + }, +}; + +module_platform_driver(of_pmem_region_driver); +MODULE_DEVICE_TABLE(of, of_pmem_region_match); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("IBM Corporation"); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 80f8bbf83742..76934fadca97 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -169,6 +169,7 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) { + int ret = 0; blk_status_t rc = 0; bool do_acct; unsigned long start; @@ -178,7 +179,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) struct nd_region *nd_region = to_region(pmem); if (bio->bi_opf & REQ_FLUSH) - nvdimm_flush(nd_region); + ret = nvdimm_flush(nd_region, bio); do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { @@ -194,7 +195,10 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) nd_iostat_end(bio, start); if (bio->bi_opf & REQ_FUA) - nvdimm_flush(nd_region); + ret = nvdimm_flush(nd_region, bio); + + if (ret) + bio->bi_status = errno_to_blk_status(ret); bio_endio(bio); return BLK_QC_T_NONE; @@ -415,7 +419,6 @@ static int pmem_attach_disk(struct device *dev, } dax_write_cache(dax_dev, wbc); pmem->dax_dev = dax_dev; - gendev = disk_to_dev(disk); gendev->groups = pmem_attribute_groups; @@ -473,14 +476,14 @@ static int nd_pmem_remove(struct device *dev) sysfs_put(pmem->bb_state); pmem->bb_state = NULL; } - nvdimm_flush(to_nd_region(dev->parent)); + nvdimm_flush(to_nd_region(dev->parent), NULL); return 0; } static void nd_pmem_shutdown(struct device *dev) { - nvdimm_flush(to_nd_region(dev->parent)); + nvdimm_flush(to_nd_region(dev->parent), NULL); } static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 708043d20d0d..c0e6a6d235de 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -282,7 +282,9 @@ static ssize_t deep_flush_store(struct device *dev, struct device_attribute *att return rc; if (!flush) return -EINVAL; - nvdimm_flush(nd_region); + rc = nvdimm_flush(nd_region, NULL); + if (rc) + return rc; return len; } @@ -999,8 +1001,14 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, dev->parent = &nvdimm_bus->dev; dev->type = dev_type; dev->groups = ndr_desc->attr_groups; + dev->of_node = ndr_desc->of_node; nd_region->ndr_size = resource_size(ndr_desc->res); nd_region->ndr_start = ndr_desc->res->start; + if (ndr_desc->flush) + nd_region->flush = ndr_desc->flush; + else + nd_region->flush = NULL; + nd_device_register(dev); return nd_region; @@ -1041,11 +1049,24 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus, } EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create); +int nvdimm_flush(struct nd_region *nd_region, struct bio *bio) +{ + int rc = 0; + + if (!nd_region->flush) + rc = generic_nvdimm_flush(nd_region); + else { + if (nd_region->flush(nd_region, bio)) + rc = -EIO; + } + + return rc; +} /** * nvdimm_flush - flush any posted write queues between the cpu and pmem media * @nd_region: blk or interleaved pmem region */ -void nvdimm_flush(struct nd_region *nd_region) +int generic_nvdimm_flush(struct nd_region *nd_region) { struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev); int i, idx; @@ -1069,6 +1090,8 @@ void nvdimm_flush(struct nd_region *nd_region) if (ndrd_get_flush_wpq(ndrd, i, 0)) writeq(1, ndrd_get_flush_wpq(ndrd, i, idx)); wmb(); + + return 0; } EXPORT_SYMBOL_GPL(nvdimm_flush); diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c new file mode 100644 index 000000000000..5e3d07b47e0c --- /dev/null +++ b/drivers/nvdimm/virtio_pmem.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * virtio_pmem.c: Virtio pmem Driver + * + * Discovers persistent memory range information + * from host and registers the virtual pmem device + * with libnvdimm core. + */ +#include "virtio_pmem.h" +#include "nd.h" + +static struct virtio_device_id id_table[] = { + { VIRTIO_ID_PMEM, VIRTIO_DEV_ANY_ID }, + { 0 }, +}; + + /* Initialize virt queue */ +static int init_vq(struct virtio_pmem *vpmem) +{ + /* single vq */ + vpmem->req_vq = virtio_find_single_vq(vpmem->vdev, + virtio_pmem_host_ack, "flush_queue"); + if (IS_ERR(vpmem->req_vq)) + return PTR_ERR(vpmem->req_vq); + + spin_lock_init(&vpmem->pmem_lock); + INIT_LIST_HEAD(&vpmem->req_list); + + return 0; +}; + +static int virtio_pmem_probe(struct virtio_device *vdev) +{ + struct nd_region_desc ndr_desc = {}; + int nid = dev_to_node(&vdev->dev); + struct nd_region *nd_region; + struct virtio_pmem *vpmem; + struct resource res; + int err = 0; + + if (!vdev->config->get) { + dev_err(&vdev->dev, "%s failure: config access disabled\n", + __func__); + return -EINVAL; + } + + vpmem = devm_kzalloc(&vdev->dev, sizeof(*vpmem), GFP_KERNEL); + if (!vpmem) { + err = -ENOMEM; + goto out_err; + } + + vpmem->vdev = vdev; + vdev->priv = vpmem; + err = init_vq(vpmem); + if (err) { + dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n"); + goto out_err; + } + + virtio_cread(vpmem->vdev, struct virtio_pmem_config, + start, &vpmem->start); + virtio_cread(vpmem->vdev, struct virtio_pmem_config, + size, &vpmem->size); + + res.start = vpmem->start; + res.end = vpmem->start + vpmem->size - 1; + vpmem->nd_desc.provider_name = "virtio-pmem"; + vpmem->nd_desc.module = THIS_MODULE; + + vpmem->nvdimm_bus = nvdimm_bus_register(&vdev->dev, + &vpmem->nd_desc); + if (!vpmem->nvdimm_bus) { + dev_err(&vdev->dev, "failed to register device with nvdimm_bus\n"); + err = -ENXIO; + goto out_vq; + } + + dev_set_drvdata(&vdev->dev, vpmem->nvdimm_bus); + + ndr_desc.res = &res; + ndr_desc.numa_node = nid; + ndr_desc.flush = async_pmem_flush; + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); + set_bit(ND_REGION_ASYNC, &ndr_desc.flags); + nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc); + if (!nd_region) { + dev_err(&vdev->dev, "failed to create nvdimm region\n"); + err = -ENXIO; + goto out_nd; + } + nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent); + return 0; +out_nd: + nvdimm_bus_unregister(vpmem->nvdimm_bus); +out_vq: + vdev->config->del_vqs(vdev); +out_err: + return err; +} + +static void virtio_pmem_remove(struct virtio_device *vdev) +{ + struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev); + + nvdimm_bus_unregister(nvdimm_bus); + vdev->config->del_vqs(vdev); + vdev->config->reset(vdev); +} + +static struct virtio_driver virtio_pmem_driver = { + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtio_pmem_probe, + .remove = virtio_pmem_remove, +}; + +module_virtio_driver(virtio_pmem_driver); +MODULE_DEVICE_TABLE(virtio, id_table); +MODULE_DESCRIPTION("Virtio pmem driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h new file mode 100644 index 000000000000..0dddefe594c4 --- /dev/null +++ b/drivers/nvdimm/virtio_pmem.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * virtio_pmem.h: virtio pmem Driver + * + * Discovers persistent memory range information + * from host and provides a virtio based flushing + * interface. + **/ + +#ifndef _LINUX_VIRTIO_PMEM_H +#define _LINUX_VIRTIO_PMEM_H + +#include +#include +#include +#include + +struct virtio_pmem_request { + struct virtio_pmem_req req; + struct virtio_pmem_resp resp; + + /* Wait queue to process deferred work after ack from host */ + wait_queue_head_t host_acked; + bool done; + + /* Wait queue to process deferred work after virt queue buffer avail */ + wait_queue_head_t wq_buf; + bool wq_buf_avail; + struct list_head list; +}; + +struct virtio_pmem { + struct virtio_device *vdev; + + /* Virtio pmem request queue */ + struct virtqueue *req_vq; + + /* nvdimm bus registers virtio pmem device */ + struct nvdimm_bus *nvdimm_bus; + struct nvdimm_bus_descriptor nd_desc; + + /* List to store deferred work if virtqueue is full */ + struct list_head req_list; + + /* Synchronize virtqueue data */ + spinlock_t pmem_lock; + + /* Memory region information */ + __u64 start; + __u64 size; +}; + +void virtio_pmem_host_ack(struct virtqueue *vq); +int async_pmem_flush(struct nd_region *nd_region, struct bio *bio); +#endif diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 768ac752a6e3..a760c449f4a9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -889,8 +889,8 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, void *buffer, size_t buflen, u32 *result) { + union nvme_result res = { 0 }; struct nvme_command c; - union nvme_result res; int ret; memset(&c, 0, sizeof(c)); @@ -1331,7 +1331,7 @@ static int nvme_pr_reserve(struct block_device *bdev, u64 key, static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, enum pr_type type, bool abort) { - u32 cdw10 = nvme_pr_type(type) << 8 | abort ? 2 : 1; + u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1); return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); } @@ -1343,7 +1343,7 @@ static int nvme_pr_clear(struct block_device *bdev, u64 key) static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { - u32 cdw10 = nvme_pr_type(type) << 8 | key ? 1 << 3 : 0; + u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0); return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 058d542647dd..9e4d2ecf736d 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -337,7 +337,8 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, !template->ls_req || !template->fcp_io || !template->ls_abort || !template->fcp_abort || !template->max_hw_queues || !template->max_sgl_segments || - !template->max_dif_sgl_segments || !template->dma_boundary) { + !template->max_dif_sgl_segments || !template->dma_boundary || + !template->module) { ret = -EINVAL; goto out_reghost_failed; } @@ -1762,6 +1763,7 @@ nvme_fc_ctrl_free(struct kref *ref) { struct nvme_fc_ctrl *ctrl = container_of(ref, struct nvme_fc_ctrl, ref); + struct nvme_fc_lport *lport = ctrl->lport; unsigned long flags; if (ctrl->ctrl.tagset) { @@ -1787,6 +1789,7 @@ nvme_fc_ctrl_free(struct kref *ref) if (ctrl->ctrl.opts) nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); + module_put(lport->ops->module); } static void @@ -2765,10 +2768,15 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, goto out_fail; } + if (!try_module_get(lport->ops->module)) { + ret = -EUNATCH; + goto out_free_ctrl; + } + idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); if (idx < 0) { ret = -ENOSPC; - goto out_free_ctrl; + goto out_mod_put; } ctrl->ctrl.opts = opts; @@ -2915,6 +2923,8 @@ out_free_queues: out_free_ida: put_device(ctrl->dev); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); +out_mod_put: + module_put(lport->ops->module); out_free_ctrl: kfree(ctrl); out_fail: diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cd11cced3678..3788c053a0b1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2274,7 +2274,7 @@ static int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) { - *val = readq(to_nvme_dev(ctrl)->bar + off); + *val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off); return 0; } diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 0b0a4825b3eb..b8fe8702065b 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -535,6 +535,7 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, break; /* Fall-Thru to RSP handling */ + /* FALLTHRU */ case NVMET_FCOP_RSP: if (fcpreq) { @@ -692,6 +693,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) #define FCLOOP_DMABOUND_4G 0xFFFFFFFF static struct nvme_fc_port_template fctemplate = { + .module = THIS_MODULE, .localport_delete = fcloop_localport_delete, .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index b414d9d207d4..08b171731664 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -617,7 +617,7 @@ static struct nvmem_device *nvmem_find(const char *name) d = bus_find_device(&nvmem_bus_type, NULL, (void *)name, nvmem_match); if (!d) - return NULL; + return ERR_PTR(-ENOENT); return to_nvmem_device(d); } diff --git a/drivers/nvmem/imx-ocotp.c b/drivers/nvmem/imx-ocotp.c index 193ca8fd350a..0c8c3b9bb6a7 100644 --- a/drivers/nvmem/imx-ocotp.c +++ b/drivers/nvmem/imx-ocotp.c @@ -199,7 +199,8 @@ static int imx_ocotp_write(void *context, unsigned int offset, void *val, strobe_prog = clk_rate / (1000000000 / 10000) + 2 * (DEF_RELAX + 1) - 1; strobe_read = clk_rate / (1000000000 / 40) + 2 * (DEF_RELAX + 1) - 1; - timing = strobe_prog & 0x00000FFF; + timing = readl(priv->base + IMX_OCOTP_ADDR_TIMING) & 0x0FC00000; + timing |= strobe_prog & 0x00000FFF; timing |= (relax << 12) & 0x0000F000; timing |= (strobe_read << 16) & 0x003F0000; diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig index ba7b034b2b91..6b8646db110c 100644 --- a/drivers/of/Kconfig +++ b/drivers/of/Kconfig @@ -112,4 +112,8 @@ config OF_OVERLAY config OF_NUMA bool +config OF_DMA_DEFAULT_COHERENT + # arches should select this if DMA is coherent by default for OF devices + bool + endif # OF diff --git a/drivers/of/address.c b/drivers/of/address.c index 792722e7d458..456339c19aed 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -894,12 +894,16 @@ EXPORT_SYMBOL_GPL(of_dma_get_range); * @np: device node * * It returns true if "dma-coherent" property was found - * for this device in DT. + * for this device in the DT, or if DMA is coherent by + * default for OF devices on the current platform. */ bool of_dma_is_coherent(struct device_node *np) { struct device_node *node = of_node_get(np); + if (IS_ENABLED(CONFIG_OF_DMA_DEFAULT_COHERENT)) + return true; + while (node) { if (of_property_read_bool(node, "dma-coherent")) { of_node_put(node); diff --git a/drivers/of/base.c b/drivers/of/base.c index ce8a6e0c9b6a..41b254be0295 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1837,7 +1837,7 @@ struct device_node *of_find_next_cache_node(const struct device_node *np) /* OF on pmac has nodes instead of properties named "l2-cache" * beneath CPU nodes. */ - if (!strcmp(np->type, "cpu")) + if (IS_ENABLED(CONFIG_PPC_PMAC) && !strcmp(np->type, "cpu")) for_each_child_of_node(np, child) if (!strcmp(child->type, "cache")) return child; diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c index 8c1819230ed2..fe26697d3bd7 100644 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@ -358,7 +358,7 @@ struct phy_device *of_phy_get_and_connect(struct net_device *dev, struct phy_device *phy; iface = of_get_phy_mode(np); - if (iface < 0) + if ((int)iface < 0) return NULL; phy_np = of_parse_phandle(np, "phy-handle", 0); diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 7c6aff761800..4bf6a9db6ac0 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -910,20 +910,44 @@ static void __init of_unittest_platform_populate(void) * of np into dup node (present in live tree) and * updates parent of children of np to dup. * - * @np: node already present in live tree + * @np: node whose properties are being added to the live tree * @dup: node present in live tree to be updated */ static void update_node_properties(struct device_node *np, struct device_node *dup) { struct property *prop; + struct property *save_next; struct device_node *child; - - for_each_property_of_node(np, prop) - of_add_property(dup, prop); + int ret; for_each_child_of_node(np, child) child->parent = dup; + + /* + * "unittest internal error: unable to add testdata property" + * + * If this message reports a property in node '/__symbols__' then + * the respective unittest overlay contains a label that has the + * same name as a label in the live devicetree. The label will + * be in the live devicetree only if the devicetree source was + * compiled with the '-@' option. If you encounter this error, + * please consider renaming __all__ of the labels in the unittest + * overlay dts files with an odd prefix that is unlikely to be + * used in a real devicetree. + */ + + /* + * open code for_each_property_of_node() because of_add_property() + * sets prop->next to NULL + */ + for (prop = np->properties; prop != NULL; prop = save_next) { + save_next = prop->next; + ret = of_add_property(dup, prop); + if (ret) + pr_err("unittest internal error: unable to add testdata property %pOF/%s", + np, prop->name); + } } /** @@ -932,18 +956,25 @@ static void update_node_properties(struct device_node *np, * * @np: Node to attach to live tree */ -static int attach_node_and_children(struct device_node *np) +static void attach_node_and_children(struct device_node *np) { struct device_node *next, *dup, *child; unsigned long flags; const char *full_name; full_name = kasprintf(GFP_KERNEL, "%pOF", np); + + if (!strcmp(full_name, "/__local_fixups__") || + !strcmp(full_name, "/__fixups__")) { + kfree(full_name); + return; + } + dup = of_find_node_by_path(full_name); kfree(full_name); if (dup) { update_node_properties(np, dup); - return 0; + return; } child = np->child; @@ -964,8 +995,6 @@ static int attach_node_and_children(struct device_node *np) attach_node_and_children(child); child = next; } - - return 0; } /** @@ -1002,6 +1031,7 @@ static int __init unittest_data_add(void) of_fdt_unflatten_tree(unittest_data, NULL, &unittest_data_node); if (!unittest_data_node) { pr_warn("%s: No tree to attach; not running tests\n", __func__); + kfree(unittest_data); return -ENODATA; } of_node_set_flag(unittest_data_node, OF_DETACHED); diff --git a/drivers/parport/share.c b/drivers/parport/share.c index 7b4ee33c1935..15c81cffd2de 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -230,6 +230,18 @@ static int port_check(struct device *dev, void *dev_drv) return 0; } +/* + * Iterates through all the devices connected to the bus and return 1 + * if the device is a parallel port. + */ + +static int port_detect(struct device *dev, void *dev_drv) +{ + if (is_parport(dev)) + return 1; + return 0; +} + /** * parport_register_driver - register a parallel port device driver * @drv: structure describing the driver @@ -282,6 +294,15 @@ int __parport_register_driver(struct parport_driver *drv, struct module *owner, if (ret) return ret; + /* + * check if bus has any parallel port registered, if + * none is found then load the lowlevel driver. + */ + ret = bus_for_each_dev(&parport_bus_type, NULL, NULL, + port_detect); + if (!ret) + get_lowlevel_driver(); + mutex_lock(®istration_lock); if (drv->match_port) bus_for_each_dev(&parport_bus_type, NULL, drv, diff --git a/drivers/pci/dwc/pci-dra7xx.c b/drivers/pci/dwc/pci-dra7xx.c index 7f5dfa169d0f..2e0d0b29cdcb 100644 --- a/drivers/pci/dwc/pci-dra7xx.c +++ b/drivers/pci/dwc/pci-dra7xx.c @@ -817,6 +817,22 @@ static int dra7xx_pcie_resume_noirq(struct device *dev) } #endif +void dra7xx_pcie_shutdown(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct dra7xx_pcie *dra7xx = dev_get_drvdata(dev); + int ret; + + dra7xx_pcie_stop_link(dra7xx->pci); + + ret = pm_runtime_put_sync(dev); + if (ret < 0) + dev_dbg(dev, "pm_runtime_put_sync failed\n"); + + pm_runtime_disable(dev); + dra7xx_pcie_disable_phy(dra7xx); +} + static const struct dev_pm_ops dra7xx_pcie_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(dra7xx_pcie_suspend, dra7xx_pcie_resume) SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(dra7xx_pcie_suspend_noirq, @@ -830,5 +846,6 @@ static struct platform_driver dra7xx_pcie_driver = { .suppress_bind_attrs = true, .pm = &dra7xx_pcie_pm_ops, }, + .shutdown = dra7xx_pcie_shutdown, }; builtin_platform_driver_probe(dra7xx_pcie_driver, dra7xx_pcie_probe); diff --git a/drivers/pci/dwc/pci-keystone-dw.c b/drivers/pci/dwc/pci-keystone-dw.c index 2fb20b887d2a..4cf2662930d8 100644 --- a/drivers/pci/dwc/pci-keystone-dw.c +++ b/drivers/pci/dwc/pci-keystone-dw.c @@ -510,7 +510,7 @@ void ks_dw_pcie_initiate_link_train(struct keystone_pcie *ks_pcie) /* Disable Link training */ val = ks_dw_app_readl(ks_pcie, CMD_STATUS); val &= ~LTSSM_EN_VAL; - ks_dw_app_writel(ks_pcie, CMD_STATUS, LTSSM_EN_VAL | val); + ks_dw_app_writel(ks_pcie, CMD_STATUS, val); /* Initiate Link Training */ val = ks_dw_app_readl(ks_pcie, CMD_STATUS); diff --git a/drivers/pci/dwc/pci-keystone.c b/drivers/pci/dwc/pci-keystone.c index 9bc52e4cf52a..3ea8288c1605 100644 --- a/drivers/pci/dwc/pci-keystone.c +++ b/drivers/pci/dwc/pci-keystone.c @@ -39,6 +39,7 @@ #define PCIE_RC_K2HK 0xb008 #define PCIE_RC_K2E 0xb009 #define PCIE_RC_K2L 0xb00a +#define PCIE_RC_K2G 0xb00b #define to_keystone_pcie(x) dev_get_drvdata((x)->dev) @@ -53,6 +54,8 @@ static void quirk_limit_mrrs(struct pci_dev *dev) .class = PCI_CLASS_BRIDGE_PCI << 8, .class_mask = ~0, }, { PCI_DEVICE(PCI_VENDOR_ID_TI, PCIE_RC_K2L), .class = PCI_CLASS_BRIDGE_PCI << 8, .class_mask = ~0, }, + { PCI_DEVICE(PCI_VENDOR_ID_TI, PCIE_RC_K2G), + .class = PCI_CLASS_BRIDGE_PCI << 8, .class_mask = ~0, }, { 0, }, }; diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index f9308c2f22e6..c2541a772abc 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -177,7 +177,7 @@ static int pci_epf_test_read(struct pci_epf_test *epf_test) goto err_map_addr; } - memcpy(buf, src_addr, reg->size); + memcpy_fromio(buf, src_addr, reg->size); crc32 = crc32_le(~0, buf, reg->size); if (crc32 != reg->checksum) @@ -231,7 +231,7 @@ static int pci_epf_test_write(struct pci_epf_test *epf_test) get_random_bytes(buf, reg->size); reg->checksum = crc32_le(~0, buf, reg->size); - memcpy(dst_addr, buf, reg->size); + memcpy_toio(dst_addr, buf, reg->size); /* * wait 1ms inorder for the write to complete. Without this delay L3 diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c index 8efd086c57c9..5bf874f30466 100644 --- a/drivers/pci/host/pci-tegra.c +++ b/drivers/pci/host/pci-tegra.c @@ -607,12 +607,15 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0bf1, tegra_pcie_fixup_class); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1c, tegra_pcie_fixup_class); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_NVIDIA, 0x0e1d, tegra_pcie_fixup_class); -/* Tegra PCIE requires relaxed ordering */ +/* Tegra20 and Tegra30 PCIE requires relaxed ordering */ static void tegra_pcie_relax_enable(struct pci_dev *dev) { pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_RELAX_EN); } -DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, tegra_pcie_relax_enable); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0bf0, tegra_pcie_relax_enable); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0bf1, tegra_pcie_relax_enable); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0e1c, tegra_pcie_relax_enable); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0e1d, tegra_pcie_relax_enable); static int tegra_pcie_request_resources(struct tegra_pcie *pcie) { diff --git a/drivers/pci/host/pcie-iproc.c b/drivers/pci/host/pcie-iproc.c index c0ecc9f35667..8f8dac0155d6 100644 --- a/drivers/pci/host/pcie-iproc.c +++ b/drivers/pci/host/pcie-iproc.c @@ -573,14 +573,6 @@ static void __iomem *iproc_pcie_map_cfg_bus(struct iproc_pcie *pcie, return (pcie->base + offset); } - /* - * PAXC is connected to an internally emulated EP within the SoC. It - * allows only one device. - */ - if (pcie->ep_is_internal) - if (slot > 0) - return NULL; - return iproc_pcie_map_ep_cfg_reg(pcie, busno, slot, fn, where); } diff --git a/drivers/pci/host/vmd.c b/drivers/pci/host/vmd.c index 2537b022f42d..af6d5da10ea5 100644 --- a/drivers/pci/host/vmd.c +++ b/drivers/pci/host/vmd.c @@ -753,12 +753,12 @@ static void vmd_remove(struct pci_dev *dev) { struct vmd_dev *vmd = pci_get_drvdata(dev); - vmd_detach_resources(vmd); sysfs_remove_link(&vmd->dev->dev.kobj, "domain"); pci_stop_root_bus(vmd->bus); pci_remove_root_bus(vmd->bus); vmd_cleanup_srcu(vmd); vmd_teardown_dma_ops(vmd); + vmd_detach_resources(vmd); irq_domain_remove(vmd->irq_domain); } diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 0fd8e164339c..0dc646c1bc3d 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -179,6 +179,7 @@ int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) failed2: sysfs_remove_link(&dev->dev.kobj, buf); failed1: + pci_stop_and_remove_bus_device(virtfn); pci_dev_put(dev); pci_stop_and_remove_bus_device(virtfn); failed0: diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 536e9a5cd2b1..2a203055b16e 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -211,7 +211,7 @@ u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag) return 0; mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; - if (flag) + if (flag & PCI_MSIX_ENTRY_CTRL_MASKBIT) mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL); @@ -1156,7 +1156,8 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, const struct irq_affinity *affd) { static const struct irq_affinity msi_default_affd; - int vecs = -ENOSPC; + int msix_vecs = -ENOSPC; + int msi_vecs = -ENOSPC; if (flags & PCI_IRQ_AFFINITY) { if (!affd) @@ -1167,16 +1168,17 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, } if (flags & PCI_IRQ_MSIX) { - vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs, - affd); - if (vecs > 0) - return vecs; + msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs, + max_vecs, affd); + if (msix_vecs > 0) + return msix_vecs; } if (flags & PCI_IRQ_MSI) { - vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd); - if (vecs > 0) - return vecs; + msi_vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, + affd); + if (msi_vecs > 0) + return msi_vecs; } /* use legacy irq if allowed */ @@ -1187,7 +1189,9 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, } } - return vecs; + if (msix_vecs == -ENOSPC) + return -ENOSPC; + return msi_vecs; } EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 7e4bec75fcde..522e59274b5d 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -967,17 +967,22 @@ static int pci_pm_thaw_noirq(struct device *dev) return error; } - if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_resume_early(dev); - /* - * pci_restore_state() requires the device to be in D0 (because of MSI - * restoration among other things), so force it into D0 in case the - * driver's "freeze" callbacks put it into a low-power state directly. + * Both the legacy ->resume_early() and the new pm->thaw_noirq() + * callbacks assume the device has been returned to D0 and its + * config state has been restored. + * + * In addition, pci_restore_state() restores MSI-X state in MMIO + * space, which requires the device to be in D0, so return it to D0 + * in case the driver's "freeze" callbacks put it into a low-power + * state. */ pci_set_power_state(pci_dev, PCI_D0); pci_restore_state(pci_dev); + if (pci_has_legacy_pm_support(pci_dev)) + return pci_legacy_resume_early(dev); + if (drv && drv->pm && drv->pm->thaw_noirq) error = drv->pm->thaw_noirq(dev); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 044b208f7f6a..c847b5554db6 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -748,19 +748,6 @@ void pci_update_current_state(struct pci_dev *dev, pci_power_t state) } } -/** - * pci_power_up - Put the given device into D0 forcibly - * @dev: PCI device to power up - */ -void pci_power_up(struct pci_dev *dev) -{ - if (platform_pci_power_manageable(dev)) - platform_pci_set_power_state(dev, PCI_D0); - - pci_raw_set_power_state(dev, PCI_D0); - pci_update_current_state(dev, PCI_D0); -} - /** * pci_platform_power_transition - Use platform to change device power state * @dev: PCI device to handle. @@ -939,6 +926,17 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state) } EXPORT_SYMBOL(pci_set_power_state); +/** + * pci_power_up - Put the given device into D0 forcibly + * @dev: PCI device to power up + */ +void pci_power_up(struct pci_dev *dev) +{ + __pci_start_power_transition(dev, PCI_D0); + pci_raw_set_power_state(dev, PCI_D0); + pci_update_current_state(dev, PCI_D0); +} + /** * pci_choose_state - Choose the power state of a PCI device * @dev: PCI device to be suspended diff --git a/drivers/pci/pcie/pme.c b/drivers/pci/pcie/pme.c index c2e6e3d1073f..5500660bbb10 100644 --- a/drivers/pci/pcie/pme.c +++ b/drivers/pci/pcie/pme.c @@ -441,6 +441,7 @@ static void pcie_pme_remove(struct pcie_device *srv) pcie_pme_disable_interrupt(srv->port, data); free_irq(srv->irq, srv); + cancel_work_sync(&data->work); kfree(data); } diff --git a/drivers/pci/pcie/ptm.c b/drivers/pci/pcie/ptm.c index bab8ac63c4f3..3008bba360f3 100644 --- a/drivers/pci/pcie/ptm.c +++ b/drivers/pci/pcie/ptm.c @@ -29,7 +29,7 @@ static void pci_ptm_info(struct pci_dev *dev) snprintf(clock_desc, sizeof(clock_desc), ">254ns"); break; default: - snprintf(clock_desc, sizeof(clock_desc), "%udns", + snprintf(clock_desc, sizeof(clock_desc), "%uns", dev->ptm_granularity); break; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 45c3fbd38f50..e7ed051ec125 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4019,6 +4019,40 @@ static void quirk_mic_x200_dma_alias(struct pci_dev *pdev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2260, quirk_mic_x200_dma_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2264, quirk_mic_x200_dma_alias); +/* + * Intel Visual Compute Accelerator (VCA) is a family of PCIe add-in devices + * exposing computational units via Non Transparent Bridges (NTB, PEX 87xx). + * + * Similarly to MIC x200, we need to add DMA aliases to allow buffer access + * when IOMMU is enabled. These aliases allow computational unit access to + * host memory. These aliases mark the whole VCA device as one IOMMU + * group. + * + * All possible slot numbers (0x20) are used, since we are unable to tell + * what slot is used on other side. This quirk is intended for both host + * and computational unit sides. The VCA devices have up to five functions + * (four for DMA channels and one additional). + */ +static void quirk_pex_vca_alias(struct pci_dev *pdev) +{ + const unsigned int num_pci_slots = 0x20; + unsigned int slot; + + for (slot = 0; slot < num_pci_slots; slot++) { + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x0)); + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x1)); + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x2)); + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x3)); + pci_add_dma_alias(pdev, PCI_DEVFN(slot, 0x4)); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2954, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2955, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2956, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2958, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2959, quirk_pex_vca_alias); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x295A, quirk_pex_vca_alias); + /* * The IOMMU and interrupt controller on Broadcom Vulcan/Cavium ThunderX2 are * associated not at the root bus, but at a bridge below. This quirk avoids @@ -4252,15 +4286,21 @@ static int pci_quirk_amd_sb_acs(struct pci_dev *dev, u16 acs_flags) static bool pci_quirk_cavium_acs_match(struct pci_dev *dev) { + if (!pci_is_pcie(dev) || pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) + return false; + + switch (dev->device) { /* - * Effectively selects all downstream ports for whole ThunderX 1 - * family by 0xf800 mask (which represents 8 SoCs), while the lower - * bits of device ID are used to indicate which subdevice is used - * within the SoC. + * Effectively selects all downstream ports for whole ThunderX1 + * (which represents 8 SoCs). */ - return (pci_is_pcie(dev) && - (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) && - ((dev->device & 0xf800) == 0xa000)); + case 0xa000 ... 0xa7ff: /* ThunderX1 */ + case 0xaf84: /* ThunderX2 */ + case 0xb884: /* ThunderX3 */ + return true; + default: + return false; + } } static int pci_quirk_cavium_acs(struct pci_dev *dev, u16 acs_flags) @@ -4600,7 +4640,7 @@ int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags) #define INTEL_BSPR_REG_BPPD (1 << 9) /* Upstream Peer Decode Configuration Register */ -#define INTEL_UPDCR_REG 0x1114 +#define INTEL_UPDCR_REG 0x1014 /* 5:0 Peer Decode Enable bits */ #define INTEL_UPDCR_REG_MASK 0x3f diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 958da7db9033..fb73e975d22b 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -1824,12 +1824,18 @@ again: /* restore size and flags */ list_for_each_entry(fail_res, &fail_head, list) { struct resource *res = fail_res->res; + int idx; res->start = fail_res->start; res->end = fail_res->end; res->flags = fail_res->flags; - if (fail_res->dev->subordinate) - res->flags = 0; + + if (pci_is_bridge(fail_res->dev)) { + idx = res - &fail_res->dev->resource[0]; + if (idx >= PCI_BRIDGE_RESOURCES && + idx <= PCI_BRIDGE_RESOURCE_END) + res->flags = 0; + } } free_list(&fail_head); @@ -1895,12 +1901,18 @@ again: /* restore size and flags */ list_for_each_entry(fail_res, &fail_head, list) { struct resource *res = fail_res->res; + int idx; res->start = fail_res->start; res->end = fail_res->end; res->flags = fail_res->flags; - if (fail_res->dev->subordinate) - res->flags = 0; + + if (pci_is_bridge(fail_res->dev)) { + idx = res - &fail_res->dev->resource[0]; + if (idx >= PCI_BRIDGE_RESOURCES && + idx <= PCI_BRIDGE_RESOURCE_END) + res->flags = 0; + } } free_list(&fail_head); diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index e3aefdafae89..bf229b442e72 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -23,7 +23,7 @@ #include #include #include - +#include #include MODULE_DESCRIPTION("Microsemi Switchtec(tm) PCIe Management Driver"); @@ -399,10 +399,6 @@ static void mrpc_cmd_submit(struct switchtec_dev *stdev) stuser->data, stuser->data_len); iowrite32(stuser->cmd, &stdev->mmio_mrpc->cmd); - stuser->status = ioread32(&stdev->mmio_mrpc->status); - if (stuser->status != SWITCHTEC_MRPC_STATUS_INPROGRESS) - mrpc_complete_cmd(stdev); - schedule_delayed_work(&stdev->mrpc_timeout, msecs_to_jiffies(500)); } @@ -898,7 +894,7 @@ static int ioctl_event_summary(struct switchtec_dev *stdev, u32 reg; s.global = ioread32(&stdev->mmio_sw_event->global_summary); - s.part_bitmap = ioread32(&stdev->mmio_sw_event->part_event_bitmap); + s.part_bitmap = readq(&stdev->mmio_sw_event->part_event_bitmap); s.local_part = ioread32(&stdev->mmio_part_cfg->part_event_summary); for (i = 0; i < stdev->partition_count; i++) { @@ -1403,7 +1399,7 @@ static int switchtec_init_isr(struct switchtec_dev *stdev) if (nvecs < 0) return nvecs; - event_irq = ioread32(&stdev->mmio_part_cfg->vep_vector_number); + event_irq = ioread16(&stdev->mmio_part_cfg->vep_vector_number); if (event_irq < 0 || event_irq >= nvecs) return -EFAULT; diff --git a/drivers/phy/broadcom/Kconfig b/drivers/phy/broadcom/Kconfig index 64fc59c3ae6d..181b8fde2bfe 100644 --- a/drivers/phy/broadcom/Kconfig +++ b/drivers/phy/broadcom/Kconfig @@ -60,7 +60,8 @@ config PHY_NS2_USB_DRD config PHY_BRCM_SATA tristate "Broadcom SATA PHY driver" - depends on ARCH_BRCMSTB || ARCH_BCM_IPROC || BMIPS_GENERIC || COMPILE_TEST + depends on ARCH_BRCMSTB || ARCH_BCM_IPROC || BMIPS_GENERIC || \ + ARCH_BCM_63XX || COMPILE_TEST depends on OF select GENERIC_PHY default ARCH_BCM_IPROC diff --git a/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c b/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c index 986224fca9e9..5a180f71d8d4 100644 --- a/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c +++ b/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c @@ -156,7 +156,6 @@ static int ltq_rcu_usb2_of_parse(struct ltq_rcu_usb2_priv *priv, { struct device *dev = priv->dev; const __be32 *offset; - int ret; priv->reg_bits = of_device_get_match_data(dev); diff --git a/drivers/phy/motorola/phy-cpcap-usb.c b/drivers/phy/motorola/phy-cpcap-usb.c index 6601ad0dfb3a..593c77dbde2e 100644 --- a/drivers/phy/motorola/phy-cpcap-usb.c +++ b/drivers/phy/motorola/phy-cpcap-usb.c @@ -115,7 +115,7 @@ struct cpcap_usb_ints_state { enum cpcap_gpio_mode { CPCAP_DM_DP, CPCAP_MDM_RX_TX, - CPCAP_UNKNOWN, + CPCAP_UNKNOWN_DISABLED, /* Seems to disable USB lines */ CPCAP_OTG_DM_DP, }; @@ -207,6 +207,19 @@ static int cpcap_phy_get_ints_state(struct cpcap_phy_ddata *ddata, static int cpcap_usb_set_uart_mode(struct cpcap_phy_ddata *ddata); static int cpcap_usb_set_usb_mode(struct cpcap_phy_ddata *ddata); +static void cpcap_usb_try_musb_mailbox(struct cpcap_phy_ddata *ddata, + enum musb_vbus_id_status status) +{ + int error; + + error = musb_mailbox(status); + if (!error) + return; + + dev_dbg(ddata->dev, "%s: musb_mailbox failed: %i\n", + __func__, error); +} + static void cpcap_usb_detect(struct work_struct *work) { struct cpcap_phy_ddata *ddata; @@ -226,9 +239,7 @@ static void cpcap_usb_detect(struct work_struct *work) if (error) goto out_err; - error = musb_mailbox(MUSB_ID_GROUND); - if (error) - goto out_err; + cpcap_usb_try_musb_mailbox(ddata, MUSB_ID_GROUND); error = regmap_update_bits(ddata->reg, CPCAP_REG_USBC3, CPCAP_BIT_VBUSSTBY_EN, @@ -255,9 +266,7 @@ static void cpcap_usb_detect(struct work_struct *work) error = cpcap_usb_set_usb_mode(ddata); if (error) goto out_err; - error = musb_mailbox(MUSB_ID_GROUND); - if (error) - goto out_err; + cpcap_usb_try_musb_mailbox(ddata, MUSB_ID_GROUND); return; } @@ -267,22 +276,18 @@ static void cpcap_usb_detect(struct work_struct *work) error = cpcap_usb_set_usb_mode(ddata); if (error) goto out_err; - error = musb_mailbox(MUSB_VBUS_VALID); - if (error) - goto out_err; + cpcap_usb_try_musb_mailbox(ddata, MUSB_VBUS_VALID); return; } + cpcap_usb_try_musb_mailbox(ddata, MUSB_VBUS_OFF); + /* Default to debug UART mode */ error = cpcap_usb_set_uart_mode(ddata); if (error) goto out_err; - error = musb_mailbox(MUSB_VBUS_OFF); - if (error) - goto out_err; - dev_dbg(ddata->dev, "set UART mode\n"); return; @@ -374,7 +379,8 @@ static int cpcap_usb_set_uart_mode(struct cpcap_phy_ddata *ddata) { int error; - error = cpcap_usb_gpio_set_mode(ddata, CPCAP_DM_DP); + /* Disable lines to prevent glitches from waking up mdm6600 */ + error = cpcap_usb_gpio_set_mode(ddata, CPCAP_UNKNOWN_DISABLED); if (error) goto out_err; @@ -401,6 +407,11 @@ static int cpcap_usb_set_uart_mode(struct cpcap_phy_ddata *ddata) if (error) goto out_err; + /* Enable UART mode */ + error = cpcap_usb_gpio_set_mode(ddata, CPCAP_DM_DP); + if (error) + goto out_err; + return 0; out_err: @@ -413,7 +424,8 @@ static int cpcap_usb_set_usb_mode(struct cpcap_phy_ddata *ddata) { int error; - error = cpcap_usb_gpio_set_mode(ddata, CPCAP_OTG_DM_DP); + /* Disable lines to prevent glitches from waking up mdm6600 */ + error = cpcap_usb_gpio_set_mode(ddata, CPCAP_UNKNOWN_DISABLED); if (error) return error; @@ -453,6 +465,11 @@ static int cpcap_usb_set_usb_mode(struct cpcap_phy_ddata *ddata) if (error) goto out_err; + /* Enable USB mode */ + error = cpcap_usb_gpio_set_mode(ddata, CPCAP_OTG_DM_DP); + if (error) + goto out_err; + return 0; out_err: @@ -647,9 +664,7 @@ static int cpcap_usb_phy_remove(struct platform_device *pdev) if (error) dev_err(ddata->dev, "could not set UART mode\n"); - error = musb_mailbox(MUSB_VBUS_OFF); - if (error) - dev_err(ddata->dev, "could not set mailbox\n"); + cpcap_usb_try_musb_mailbox(ddata, MUSB_VBUS_OFF); usb_remove_phy(&ddata->phy); cancel_delayed_work_sync(&ddata->detect_work); diff --git a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c index 69ce2afac015..c6925e3e878b 100644 --- a/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c +++ b/drivers/phy/qualcomm/phy-qcom-apq8064-sata.c @@ -88,7 +88,7 @@ static int read_poll_timeout(void __iomem *addr, u32 mask) if (readl_relaxed(addr) & mask) return 0; - usleep_range(DELAY_INTERVAL_US, DELAY_INTERVAL_US + 50); + usleep_range(DELAY_INTERVAL_US, DELAY_INTERVAL_US + 50); } while (!time_after(jiffies, timeout)); return (readl_relaxed(addr) & mask) ? 0 : -ETIMEDOUT; diff --git a/drivers/phy/qualcomm/phy-qcom-usb-hs.c b/drivers/phy/qualcomm/phy-qcom-usb-hs.c index 2d0c70b5589f..643934a2a70c 100644 --- a/drivers/phy/qualcomm/phy-qcom-usb-hs.c +++ b/drivers/phy/qualcomm/phy-qcom-usb-hs.c @@ -159,8 +159,8 @@ static int qcom_usb_hs_phy_power_on(struct phy *phy) /* setup initial state */ qcom_usb_hs_phy_vbus_notifier(&uphy->vbus_notify, state, uphy->vbus_edev); - ret = devm_extcon_register_notifier(&ulpi->dev, uphy->vbus_edev, - EXTCON_USB, &uphy->vbus_notify); + ret = extcon_register_notifier(uphy->vbus_edev, EXTCON_USB, + &uphy->vbus_notify); if (ret) goto err_ulpi; } @@ -181,6 +181,9 @@ static int qcom_usb_hs_phy_power_off(struct phy *phy) { struct qcom_usb_hs_phy *uphy = phy_get_drvdata(phy); + if (uphy->vbus_edev) + extcon_unregister_notifier(uphy->vbus_edev, EXTCON_USB, + &uphy->vbus_notify); regulator_disable(uphy->v3p3); regulator_disable(uphy->v1p8); clk_disable_unprepare(uphy->sleep_clk); diff --git a/drivers/phy/renesas/phy-rcar-gen3-usb2.c b/drivers/phy/renesas/phy-rcar-gen3-usb2.c index e8fe80312820..f8c7ce89d8d7 100644 --- a/drivers/phy/renesas/phy-rcar-gen3-usb2.c +++ b/drivers/phy/renesas/phy-rcar-gen3-usb2.c @@ -22,6 +22,7 @@ #include #include #include +#include #include /******* USB2.0 Host registers (original offset is +0x200) *******/ @@ -195,7 +196,7 @@ static void rcar_gen3_init_from_a_peri_to_a_host(struct rcar_gen3_chan *ch) val = readl(usb2_base + USB2_OBINTEN); writel(val & ~USB2_OBINT_BITS, usb2_base + USB2_OBINTEN); - rcar_gen3_enable_vbus_ctrl(ch, 0); + rcar_gen3_enable_vbus_ctrl(ch, 1); rcar_gen3_init_for_host(ch); writel(val | USB2_OBINT_BITS, usb2_base + USB2_OBINTEN); @@ -234,9 +235,9 @@ static ssize_t role_store(struct device *dev, struct device_attribute *attr, */ is_b_device = rcar_gen3_check_id(ch); is_host = rcar_gen3_is_host(ch); - if (!strncmp(buf, "host", strlen("host"))) + if (sysfs_streq(buf, "host")) new_mode_is_host = true; - else if (!strncmp(buf, "peripheral", strlen("peripheral"))) + else if (sysfs_streq(buf, "peripheral")) new_mode_is_host = false; else return -EINVAL; diff --git a/drivers/phy/ti/phy-twl4030-usb.c b/drivers/phy/ti/phy-twl4030-usb.c index a44680d64f9b..c267afb68f07 100644 --- a/drivers/phy/ti/phy-twl4030-usb.c +++ b/drivers/phy/ti/phy-twl4030-usb.c @@ -144,6 +144,7 @@ #define PMBR1 0x0D #define GPIO_USB_4PIN_ULPI_2430C (3 << 0) +static irqreturn_t twl4030_usb_irq(int irq, void *_twl); /* * If VBUS is valid or ID is ground, then we know a * cable is present and we need to be runtime-enabled @@ -395,6 +396,33 @@ static void __twl4030_phy_power(struct twl4030_usb *twl, int on) WARN_ON(twl4030_usb_write_verify(twl, PHY_PWR_CTRL, pwr) < 0); } +static int __maybe_unused twl4030_usb_suspend(struct device *dev) +{ + struct twl4030_usb *twl = dev_get_drvdata(dev); + + /* + * we need enabled runtime on resume, + * so turn irq off here, so we do not get it early + * note: wakeup on usb plug works independently of this + */ + dev_dbg(twl->dev, "%s\n", __func__); + disable_irq(twl->irq); + + return 0; +} + +static int __maybe_unused twl4030_usb_resume(struct device *dev) +{ + struct twl4030_usb *twl = dev_get_drvdata(dev); + + dev_dbg(twl->dev, "%s\n", __func__); + enable_irq(twl->irq); + /* check whether cable status changed */ + twl4030_usb_irq(0, twl); + + return 0; +} + static int __maybe_unused twl4030_usb_runtime_suspend(struct device *dev) { struct twl4030_usb *twl = dev_get_drvdata(dev); @@ -655,6 +683,7 @@ static const struct phy_ops ops = { static const struct dev_pm_ops twl4030_usb_pm_ops = { SET_RUNTIME_PM_OPS(twl4030_usb_runtime_suspend, twl4030_usb_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(twl4030_usb_suspend, twl4030_usb_resume) }; static int twl4030_usb_probe(struct platform_device *pdev) diff --git a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c index 85a8c97d9dfe..5fe419e468ec 100644 --- a/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c +++ b/drivers/pinctrl/bcm/pinctrl-iproc-gpio.c @@ -54,8 +54,12 @@ /* drive strength control for ASIU GPIO */ #define IPROC_GPIO_ASIU_DRV0_CTRL_OFFSET 0x58 -/* drive strength control for CCM/CRMU (AON) GPIO */ -#define IPROC_GPIO_DRV0_CTRL_OFFSET 0x00 +/* pinconf for CCM GPIO */ +#define IPROC_GPIO_PULL_DN_OFFSET 0x10 +#define IPROC_GPIO_PULL_UP_OFFSET 0x14 + +/* pinconf for CRMU(aon) GPIO and CCM GPIO*/ +#define IPROC_GPIO_DRV_CTRL_OFFSET 0x00 #define GPIO_BANK_SIZE 0x200 #define NGPIOS_PER_BANK 32 @@ -76,6 +80,12 @@ enum iproc_pinconf_param { IPROC_PINCON_MAX, }; +enum iproc_pinconf_ctrl_type { + IOCTRL_TYPE_AON = 1, + IOCTRL_TYPE_CDRU, + IOCTRL_TYPE_INVALID, +}; + /* * Iproc GPIO core * @@ -100,6 +110,7 @@ struct iproc_gpio { void __iomem *base; void __iomem *io_ctrl; + enum iproc_pinconf_ctrl_type io_ctrl_type; raw_spinlock_t lock; @@ -461,20 +472,44 @@ static const struct pinctrl_ops iproc_pctrl_ops = { static int iproc_gpio_set_pull(struct iproc_gpio *chip, unsigned gpio, bool disable, bool pull_up) { + void __iomem *base; unsigned long flags; + unsigned int shift; + u32 val_1, val_2; raw_spin_lock_irqsave(&chip->lock, flags); + if (chip->io_ctrl_type == IOCTRL_TYPE_CDRU) { + base = chip->io_ctrl; + shift = IPROC_GPIO_SHIFT(gpio); - if (disable) { - iproc_set_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio, false); + val_1 = readl(base + IPROC_GPIO_PULL_UP_OFFSET); + val_2 = readl(base + IPROC_GPIO_PULL_DN_OFFSET); + if (disable) { + /* no pull-up or pull-down */ + val_1 &= ~BIT(shift); + val_2 &= ~BIT(shift); + } else if (pull_up) { + val_1 |= BIT(shift); + val_2 &= ~BIT(shift); + } else { + val_1 &= ~BIT(shift); + val_2 |= BIT(shift); + } + writel(val_1, base + IPROC_GPIO_PULL_UP_OFFSET); + writel(val_2, base + IPROC_GPIO_PULL_DN_OFFSET); } else { - iproc_set_bit(chip, IPROC_GPIO_PAD_RES_OFFSET, gpio, - pull_up); - iproc_set_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio, true); + if (disable) { + iproc_set_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio, + false); + } else { + iproc_set_bit(chip, IPROC_GPIO_PAD_RES_OFFSET, gpio, + pull_up); + iproc_set_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio, + true); + } } raw_spin_unlock_irqrestore(&chip->lock, flags); - dev_dbg(chip->dev, "gpio:%u set pullup:%d\n", gpio, pull_up); return 0; @@ -483,14 +518,35 @@ static int iproc_gpio_set_pull(struct iproc_gpio *chip, unsigned gpio, static void iproc_gpio_get_pull(struct iproc_gpio *chip, unsigned gpio, bool *disable, bool *pull_up) { + void __iomem *base; unsigned long flags; + unsigned int shift; + u32 val_1, val_2; raw_spin_lock_irqsave(&chip->lock, flags); - *disable = !iproc_get_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio); - *pull_up = iproc_get_bit(chip, IPROC_GPIO_PAD_RES_OFFSET, gpio); + if (chip->io_ctrl_type == IOCTRL_TYPE_CDRU) { + base = chip->io_ctrl; + shift = IPROC_GPIO_SHIFT(gpio); + + val_1 = readl(base + IPROC_GPIO_PULL_UP_OFFSET) & BIT(shift); + val_2 = readl(base + IPROC_GPIO_PULL_DN_OFFSET) & BIT(shift); + + *pull_up = val_1 ? true : false; + *disable = (val_1 | val_2) ? false : true; + + } else { + *disable = !iproc_get_bit(chip, IPROC_GPIO_RES_EN_OFFSET, gpio); + *pull_up = iproc_get_bit(chip, IPROC_GPIO_PAD_RES_OFFSET, gpio); + } raw_spin_unlock_irqrestore(&chip->lock, flags); } +#define DRV_STRENGTH_OFFSET(gpio, bit, type) ((type) == IOCTRL_TYPE_AON ? \ + ((2 - (bit)) * 4 + IPROC_GPIO_DRV_CTRL_OFFSET) : \ + ((type) == IOCTRL_TYPE_CDRU) ? \ + ((bit) * 4 + IPROC_GPIO_DRV_CTRL_OFFSET) : \ + ((bit) * 4 + IPROC_GPIO_REG(gpio, IPROC_GPIO_ASIU_DRV0_CTRL_OFFSET))) + static int iproc_gpio_set_strength(struct iproc_gpio *chip, unsigned gpio, unsigned strength) { @@ -505,11 +561,8 @@ static int iproc_gpio_set_strength(struct iproc_gpio *chip, unsigned gpio, if (chip->io_ctrl) { base = chip->io_ctrl; - offset = IPROC_GPIO_DRV0_CTRL_OFFSET; } else { base = chip->base; - offset = IPROC_GPIO_REG(gpio, - IPROC_GPIO_ASIU_DRV0_CTRL_OFFSET); } shift = IPROC_GPIO_SHIFT(gpio); @@ -520,11 +573,11 @@ static int iproc_gpio_set_strength(struct iproc_gpio *chip, unsigned gpio, raw_spin_lock_irqsave(&chip->lock, flags); strength = (strength / 2) - 1; for (i = 0; i < GPIO_DRV_STRENGTH_BITS; i++) { + offset = DRV_STRENGTH_OFFSET(gpio, i, chip->io_ctrl_type); val = readl(base + offset); val &= ~BIT(shift); val |= ((strength >> i) & 0x1) << shift; writel(val, base + offset); - offset += 4; } raw_spin_unlock_irqrestore(&chip->lock, flags); @@ -541,11 +594,8 @@ static int iproc_gpio_get_strength(struct iproc_gpio *chip, unsigned gpio, if (chip->io_ctrl) { base = chip->io_ctrl; - offset = IPROC_GPIO_DRV0_CTRL_OFFSET; } else { base = chip->base; - offset = IPROC_GPIO_REG(gpio, - IPROC_GPIO_ASIU_DRV0_CTRL_OFFSET); } shift = IPROC_GPIO_SHIFT(gpio); @@ -553,10 +603,10 @@ static int iproc_gpio_get_strength(struct iproc_gpio *chip, unsigned gpio, raw_spin_lock_irqsave(&chip->lock, flags); *strength = 0; for (i = 0; i < GPIO_DRV_STRENGTH_BITS; i++) { + offset = DRV_STRENGTH_OFFSET(gpio, i, chip->io_ctrl_type); val = readl(base + offset) & BIT(shift); val >>= shift; *strength += (val << i); - offset += 4; } /* convert to mA */ @@ -734,6 +784,7 @@ static int iproc_gpio_probe(struct platform_device *pdev) u32 ngpios, pinconf_disable_mask = 0; int irq, ret; bool no_pinconf = false; + enum iproc_pinconf_ctrl_type io_ctrl_type = IOCTRL_TYPE_INVALID; /* NSP does not support drive strength config */ if (of_device_is_compatible(dev->of_node, "brcm,iproc-nsp-gpio")) @@ -764,8 +815,15 @@ static int iproc_gpio_probe(struct platform_device *pdev) dev_err(dev, "unable to map I/O memory\n"); return PTR_ERR(chip->io_ctrl); } + if (of_device_is_compatible(dev->of_node, + "brcm,cygnus-ccm-gpio")) + io_ctrl_type = IOCTRL_TYPE_CDRU; + else + io_ctrl_type = IOCTRL_TYPE_AON; } + chip->io_ctrl_type = io_ctrl_type; + if (of_property_read_u32(dev->of_node, "ngpios", &ngpios)) { dev_err(&pdev->dev, "missing ngpios DT property\n"); return -ENODEV; diff --git a/drivers/pinctrl/bcm/pinctrl-ns2-mux.c b/drivers/pinctrl/bcm/pinctrl-ns2-mux.c index 4b5cf0e0f16e..951090faa6a9 100644 --- a/drivers/pinctrl/bcm/pinctrl-ns2-mux.c +++ b/drivers/pinctrl/bcm/pinctrl-ns2-mux.c @@ -640,8 +640,8 @@ static int ns2_pinmux_enable(struct pinctrl_dev *pctrl_dev, const struct ns2_pin_function *func; const struct ns2_pin_group *grp; - if (grp_select > pinctrl->num_groups || - func_select > pinctrl->num_functions) + if (grp_select >= pinctrl->num_groups || + func_select >= pinctrl->num_functions) return -EINVAL; func = &pinctrl->functions[func_select]; diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index c55517312485..08ea74177de2 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -2031,7 +2031,6 @@ static int pinctrl_claim_hogs(struct pinctrl_dev *pctldev) return PTR_ERR(pctldev->p); } - kref_get(&pctldev->p->users); pctldev->hog_default = pinctrl_lookup_state(pctldev->p, PINCTRL_STATE_DEFAULT); if (IS_ERR(pctldev->hog_default)) { diff --git a/drivers/pinctrl/devicetree.c b/drivers/pinctrl/devicetree.c index c4aa411f5935..3a7c2d6e4d5f 100644 --- a/drivers/pinctrl/devicetree.c +++ b/drivers/pinctrl/devicetree.c @@ -40,6 +40,13 @@ struct pinctrl_dt_map { static void dt_free_map(struct pinctrl_dev *pctldev, struct pinctrl_map *map, unsigned num_maps) { + int i; + + for (i = 0; i < num_maps; ++i) { + kfree_const(map[i].dev_name); + map[i].dev_name = NULL; + } + if (pctldev) { const struct pinctrl_ops *ops = pctldev->desc->pctlops; if (ops->dt_free_map) @@ -74,7 +81,13 @@ static int dt_remember_or_free_map(struct pinctrl *p, const char *statename, /* Initialize common mapping table entry fields */ for (i = 0; i < num_maps; i++) { - map[i].dev_name = dev_name(p->dev); + const char *devname; + + devname = kstrdup_const(dev_name(p->dev), GFP_KERNEL); + if (!devname) + goto err_free_map; + + map[i].dev_name = devname; map[i].name = statename; if (pctldev) map[i].ctrl_dev_name = dev_name(pctldev->dev); @@ -82,10 +95,8 @@ static int dt_remember_or_free_map(struct pinctrl *p, const char *statename, /* Remember the converted mapping table entries */ dt_map = kzalloc(sizeof(*dt_map), GFP_KERNEL); - if (!dt_map) { - dt_free_map(pctldev, map, num_maps); - return -ENOMEM; - } + if (!dt_map) + goto err_free_map; dt_map->pctldev = pctldev; dt_map->map = map; @@ -93,6 +104,10 @@ static int dt_remember_or_free_map(struct pinctrl *p, const char *statename, list_add_tail(&dt_map->node, &p->dt_maps); return pinctrl_register_map(map, num_maps, false); + +err_free_map: + dt_free_map(pctldev, map, num_maps); + return -ENOMEM; } struct pinctrl_dev *of_pinctrl_get(struct device_node *np) diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c index beeb7cbb5015..4fb3e44f9133 100644 --- a/drivers/pinctrl/intel/pinctrl-baytrail.c +++ b/drivers/pinctrl/intel/pinctrl-baytrail.c @@ -204,7 +204,6 @@ struct byt_gpio { struct platform_device *pdev; struct pinctrl_dev *pctl_dev; struct pinctrl_desc pctl_desc; - raw_spinlock_t lock; const struct byt_pinctrl_soc_data *soc_data; struct byt_community *communities_copy; struct byt_gpio_pin_context *saved_context; @@ -715,6 +714,8 @@ static const struct byt_pinctrl_soc_data *byt_soc_data[] = { NULL, }; +static DEFINE_RAW_SPINLOCK(byt_lock); + static struct byt_community *byt_get_community(struct byt_gpio *vg, unsigned int pin) { @@ -856,7 +857,7 @@ static void byt_set_group_simple_mux(struct byt_gpio *vg, unsigned long flags; int i; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); for (i = 0; i < group.npins; i++) { void __iomem *padcfg0; @@ -876,7 +877,7 @@ static void byt_set_group_simple_mux(struct byt_gpio *vg, writel(value, padcfg0); } - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); } static void byt_set_group_mixed_mux(struct byt_gpio *vg, @@ -886,7 +887,7 @@ static void byt_set_group_mixed_mux(struct byt_gpio *vg, unsigned long flags; int i; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); for (i = 0; i < group.npins; i++) { void __iomem *padcfg0; @@ -906,7 +907,7 @@ static void byt_set_group_mixed_mux(struct byt_gpio *vg, writel(value, padcfg0); } - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); } static int byt_set_mux(struct pinctrl_dev *pctldev, unsigned int func_selector, @@ -955,11 +956,17 @@ static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned int offset) unsigned long flags; u32 value; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); value = readl(reg); - value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); + + /* Do not clear direct-irq enabled IRQs (from gpio_disable_free) */ + if (value & BYT_DIRECT_IRQ_EN) + /* nothing to do */ ; + else + value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL); + writel(value, reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); } static int byt_gpio_request_enable(struct pinctrl_dev *pctl_dev, @@ -971,7 +978,7 @@ static int byt_gpio_request_enable(struct pinctrl_dev *pctl_dev, u32 value, gpio_mux; unsigned long flags; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); /* * In most cases, func pin mux 000 means GPIO function. @@ -993,7 +1000,7 @@ static int byt_gpio_request_enable(struct pinctrl_dev *pctl_dev, "pin %u forcibly re-configured as GPIO\n", offset); } - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); pm_runtime_get(&vg->pdev->dev); @@ -1021,7 +1028,7 @@ static int byt_gpio_set_direction(struct pinctrl_dev *pctl_dev, unsigned long flags; u32 value; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); value = readl(val_reg); value &= ~BYT_DIR_MASK; @@ -1038,7 +1045,7 @@ static int byt_gpio_set_direction(struct pinctrl_dev *pctl_dev, "Potential Error: Setting GPIO with direct_irq_en to output"); writel(value, val_reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); return 0; } @@ -1107,11 +1114,11 @@ static int byt_pin_config_get(struct pinctrl_dev *pctl_dev, unsigned int offset, u32 conf, pull, val, debounce; u16 arg = 0; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); conf = readl(conf_reg); pull = conf & BYT_PULL_ASSIGN_MASK; val = readl(val_reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); switch (param) { case PIN_CONFIG_BIAS_DISABLE: @@ -1138,9 +1145,9 @@ static int byt_pin_config_get(struct pinctrl_dev *pctl_dev, unsigned int offset, if (!(conf & BYT_DEBOUNCE_EN)) return -EINVAL; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); debounce = readl(db_reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); switch (debounce & BYT_DEBOUNCE_PULSE_MASK) { case BYT_DEBOUNCE_PULSE_375US: @@ -1192,7 +1199,7 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, u32 conf, val, debounce; int i, ret = 0; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); conf = readl(conf_reg); val = readl(val_reg); @@ -1300,7 +1307,7 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev, if (!ret) writel(conf, conf_reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); return ret; } @@ -1325,9 +1332,9 @@ static int byt_gpio_get(struct gpio_chip *chip, unsigned offset) unsigned long flags; u32 val; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); val = readl(reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); return !!(val & BYT_LEVEL); } @@ -1342,13 +1349,13 @@ static void byt_gpio_set(struct gpio_chip *chip, unsigned offset, int value) if (!reg) return; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); old_val = readl(reg); if (value) writel(old_val | BYT_LEVEL, reg); else writel(old_val & ~BYT_LEVEL, reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); } static int byt_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) @@ -1361,9 +1368,9 @@ static int byt_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) if (!reg) return -EINVAL; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); value = readl(reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); if (!(value & BYT_OUTPUT_EN)) return GPIOF_DIR_OUT; @@ -1406,14 +1413,14 @@ static void byt_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) const char *label; unsigned int pin; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); pin = vg->soc_data->pins[i].number; reg = byt_gpio_reg(vg, pin, BYT_CONF0_REG); if (!reg) { seq_printf(s, "Could not retrieve pin %i conf0 reg\n", pin); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); continue; } conf0 = readl(reg); @@ -1422,11 +1429,11 @@ static void byt_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) if (!reg) { seq_printf(s, "Could not retrieve pin %i val reg\n", pin); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); continue; } val = readl(reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); comm = byt_get_community(vg, pin); if (!comm) { @@ -1510,9 +1517,9 @@ static void byt_irq_ack(struct irq_data *d) if (!reg) return; - raw_spin_lock(&vg->lock); + raw_spin_lock(&byt_lock); writel(BIT(offset % 32), reg); - raw_spin_unlock(&vg->lock); + raw_spin_unlock(&byt_lock); } static void byt_irq_mask(struct irq_data *d) @@ -1536,7 +1543,7 @@ static void byt_irq_unmask(struct irq_data *d) if (!reg) return; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); value = readl(reg); switch (irqd_get_trigger_type(d)) { @@ -1557,7 +1564,7 @@ static void byt_irq_unmask(struct irq_data *d) writel(value, reg); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); } static int byt_irq_type(struct irq_data *d, unsigned int type) @@ -1571,7 +1578,7 @@ static int byt_irq_type(struct irq_data *d, unsigned int type) if (!reg || offset >= vg->chip.ngpio) return -EINVAL; - raw_spin_lock_irqsave(&vg->lock, flags); + raw_spin_lock_irqsave(&byt_lock, flags); value = readl(reg); WARN(value & BYT_DIRECT_IRQ_EN, @@ -1593,7 +1600,7 @@ static int byt_irq_type(struct irq_data *d, unsigned int type) else if (type & IRQ_TYPE_LEVEL_MASK) irq_set_handler_locked(d, handle_level_irq); - raw_spin_unlock_irqrestore(&vg->lock, flags); + raw_spin_unlock_irqrestore(&byt_lock, flags); return 0; } @@ -1629,9 +1636,9 @@ static void byt_gpio_irq_handler(struct irq_desc *desc) continue; } - raw_spin_lock(&vg->lock); + raw_spin_lock(&byt_lock); pending = readl(reg); - raw_spin_unlock(&vg->lock); + raw_spin_unlock(&byt_lock); for_each_set_bit(pin, &pending, 32) { virq = irq_find_mapping(vg->chip.irqdomain, base + pin); generic_handle_irq(virq); @@ -1833,8 +1840,6 @@ static int byt_pinctrl_probe(struct platform_device *pdev) return PTR_ERR(vg->pctl_dev); } - raw_spin_lock_init(&vg->lock); - ret = byt_gpio_probe(vg); if (ret) return ret; @@ -1850,8 +1855,11 @@ static int byt_gpio_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct byt_gpio *vg = platform_get_drvdata(pdev); + unsigned long flags; int i; + raw_spin_lock_irqsave(&byt_lock, flags); + for (i = 0; i < vg->soc_data->npins; i++) { void __iomem *reg; u32 value; @@ -1872,6 +1880,7 @@ static int byt_gpio_suspend(struct device *dev) vg->saved_context[i].val = value; } + raw_spin_unlock_irqrestore(&byt_lock, flags); return 0; } @@ -1879,8 +1888,11 @@ static int byt_gpio_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct byt_gpio *vg = platform_get_drvdata(pdev); + unsigned long flags; int i; + raw_spin_lock_irqsave(&byt_lock, flags); + for (i = 0; i < vg->soc_data->npins; i++) { void __iomem *reg; u32 value; @@ -1918,6 +1930,7 @@ static int byt_gpio_resume(struct device *dev) } } + raw_spin_unlock_irqrestore(&byt_lock, flags); return 0; } #endif diff --git a/drivers/pinctrl/intel/pinctrl-lewisburg.c b/drivers/pinctrl/intel/pinctrl-lewisburg.c index 14d56ea6cfdc..c2164db14e9c 100644 --- a/drivers/pinctrl/intel/pinctrl-lewisburg.c +++ b/drivers/pinctrl/intel/pinctrl-lewisburg.c @@ -34,6 +34,7 @@ .npins = ((e) - (s) + 1), \ } +/* Lewisburg */ static const struct pinctrl_pin_desc lbg_pins[] = { /* GPP_A */ PINCTRL_PIN(0, "RCINB"), @@ -73,7 +74,7 @@ static const struct pinctrl_pin_desc lbg_pins[] = { PINCTRL_PIN(33, "SRCCLKREQB_4"), PINCTRL_PIN(34, "SRCCLKREQB_5"), PINCTRL_PIN(35, "GPP_B_11"), - PINCTRL_PIN(36, "GLB_RST_WARN_N"), + PINCTRL_PIN(36, "SLP_S0B"), PINCTRL_PIN(37, "PLTRSTB"), PINCTRL_PIN(38, "SPKR"), PINCTRL_PIN(39, "GPP_B_15"), @@ -186,96 +187,96 @@ static const struct pinctrl_pin_desc lbg_pins[] = { PINCTRL_PIN(141, "GBE_PCI_DIS"), PINCTRL_PIN(142, "GBE_LAN_DIS"), PINCTRL_PIN(143, "GPP_I_10"), - PINCTRL_PIN(144, "GPIO_RCOMP_3P3"), /* GPP_J */ - PINCTRL_PIN(145, "GBE_LED_0_0"), - PINCTRL_PIN(146, "GBE_LED_0_1"), - PINCTRL_PIN(147, "GBE_LED_1_0"), - PINCTRL_PIN(148, "GBE_LED_1_1"), - PINCTRL_PIN(149, "GBE_LED_2_0"), - PINCTRL_PIN(150, "GBE_LED_2_1"), - PINCTRL_PIN(151, "GBE_LED_3_0"), - PINCTRL_PIN(152, "GBE_LED_3_1"), - PINCTRL_PIN(153, "GBE_SCL_0"), - PINCTRL_PIN(154, "GBE_SDA_0"), - PINCTRL_PIN(155, "GBE_SCL_1"), - PINCTRL_PIN(156, "GBE_SDA_1"), - PINCTRL_PIN(157, "GBE_SCL_2"), - PINCTRL_PIN(158, "GBE_SDA_2"), - PINCTRL_PIN(159, "GBE_SCL_3"), - PINCTRL_PIN(160, "GBE_SDA_3"), - PINCTRL_PIN(161, "GBE_SDP_0_0"), - PINCTRL_PIN(162, "GBE_SDP_0_1"), - PINCTRL_PIN(163, "GBE_SDP_1_0"), - PINCTRL_PIN(164, "GBE_SDP_1_1"), - PINCTRL_PIN(165, "GBE_SDP_2_0"), - PINCTRL_PIN(166, "GBE_SDP_2_1"), - PINCTRL_PIN(167, "GBE_SDP_3_0"), - PINCTRL_PIN(168, "GBE_SDP_3_1"), + PINCTRL_PIN(144, "GBE_LED_0_0"), + PINCTRL_PIN(145, "GBE_LED_0_1"), + PINCTRL_PIN(146, "GBE_LED_1_0"), + PINCTRL_PIN(147, "GBE_LED_1_1"), + PINCTRL_PIN(148, "GBE_LED_2_0"), + PINCTRL_PIN(149, "GBE_LED_2_1"), + PINCTRL_PIN(150, "GBE_LED_3_0"), + PINCTRL_PIN(151, "GBE_LED_3_1"), + PINCTRL_PIN(152, "GBE_SCL_0"), + PINCTRL_PIN(153, "GBE_SDA_0"), + PINCTRL_PIN(154, "GBE_SCL_1"), + PINCTRL_PIN(155, "GBE_SDA_1"), + PINCTRL_PIN(156, "GBE_SCL_2"), + PINCTRL_PIN(157, "GBE_SDA_2"), + PINCTRL_PIN(158, "GBE_SCL_3"), + PINCTRL_PIN(159, "GBE_SDA_3"), + PINCTRL_PIN(160, "GBE_SDP_0_0"), + PINCTRL_PIN(161, "GBE_SDP_0_1"), + PINCTRL_PIN(162, "GBE_SDP_1_0"), + PINCTRL_PIN(163, "GBE_SDP_1_1"), + PINCTRL_PIN(164, "GBE_SDP_2_0"), + PINCTRL_PIN(165, "GBE_SDP_2_1"), + PINCTRL_PIN(166, "GBE_SDP_3_0"), + PINCTRL_PIN(167, "GBE_SDP_3_1"), /* GPP_K */ - PINCTRL_PIN(169, "GBE_RMIICLK"), - PINCTRL_PIN(170, "GBE_RMII_TXD_0"), - PINCTRL_PIN(171, "GBE_RMII_TXD_1"), + PINCTRL_PIN(168, "GBE_RMIICLK"), + PINCTRL_PIN(169, "GBE_RMII_RXD_0"), + PINCTRL_PIN(170, "GBE_RMII_RXD_1"), + PINCTRL_PIN(171, "GBE_RMII_CRS_DV"), PINCTRL_PIN(172, "GBE_RMII_TX_EN"), - PINCTRL_PIN(173, "GBE_RMII_CRS_DV"), - PINCTRL_PIN(174, "GBE_RMII_RXD_0"), - PINCTRL_PIN(175, "GBE_RMII_RXD_1"), - PINCTRL_PIN(176, "GBE_RMII_RX_ER"), - PINCTRL_PIN(177, "GBE_RMII_ARBIN"), - PINCTRL_PIN(178, "GBE_RMII_ARB_OUT"), - PINCTRL_PIN(179, "PE_RST_N"), - PINCTRL_PIN(180, "GPIO_RCOMP_1P8_3P3"), + PINCTRL_PIN(173, "GBE_RMII_TXD_0"), + PINCTRL_PIN(174, "GBE_RMII_TXD_1"), + PINCTRL_PIN(175, "GBE_RMII_RX_ER"), + PINCTRL_PIN(176, "GBE_RMII_ARBIN"), + PINCTRL_PIN(177, "GBE_RMII_ARB_OUT"), + PINCTRL_PIN(178, "PE_RST_N"), /* GPP_G */ - PINCTRL_PIN(181, "FAN_TACH_0"), - PINCTRL_PIN(182, "FAN_TACH_1"), - PINCTRL_PIN(183, "FAN_TACH_2"), - PINCTRL_PIN(184, "FAN_TACH_3"), - PINCTRL_PIN(185, "FAN_TACH_4"), - PINCTRL_PIN(186, "FAN_TACH_5"), - PINCTRL_PIN(187, "FAN_TACH_6"), - PINCTRL_PIN(188, "FAN_TACH_7"), - PINCTRL_PIN(189, "FAN_PWM_0"), - PINCTRL_PIN(190, "FAN_PWM_1"), - PINCTRL_PIN(191, "FAN_PWM_2"), - PINCTRL_PIN(192, "FAN_PWM_3"), - PINCTRL_PIN(193, "GSXDOUT"), - PINCTRL_PIN(194, "GSXSLOAD"), - PINCTRL_PIN(195, "GSXDIN"), - PINCTRL_PIN(196, "GSXSRESETB"), - PINCTRL_PIN(197, "GSXCLK"), - PINCTRL_PIN(198, "ADR_COMPLETE"), - PINCTRL_PIN(199, "NMIB"), - PINCTRL_PIN(200, "SMIB"), - PINCTRL_PIN(201, "SSATA_DEVSLP_0"), - PINCTRL_PIN(202, "SSATA_DEVSLP_1"), - PINCTRL_PIN(203, "SSATA_DEVSLP_2"), - PINCTRL_PIN(204, "SSATAXPCIE0_SSATAGP0"), + PINCTRL_PIN(179, "FAN_TACH_0"), + PINCTRL_PIN(180, "FAN_TACH_1"), + PINCTRL_PIN(181, "FAN_TACH_2"), + PINCTRL_PIN(182, "FAN_TACH_3"), + PINCTRL_PIN(183, "FAN_TACH_4"), + PINCTRL_PIN(184, "FAN_TACH_5"), + PINCTRL_PIN(185, "FAN_TACH_6"), + PINCTRL_PIN(186, "FAN_TACH_7"), + PINCTRL_PIN(187, "FAN_PWM_0"), + PINCTRL_PIN(188, "FAN_PWM_1"), + PINCTRL_PIN(189, "FAN_PWM_2"), + PINCTRL_PIN(190, "FAN_PWM_3"), + PINCTRL_PIN(191, "GSXDOUT"), + PINCTRL_PIN(192, "GSXSLOAD"), + PINCTRL_PIN(193, "GSXDIN"), + PINCTRL_PIN(194, "GSXSRESETB"), + PINCTRL_PIN(195, "GSXCLK"), + PINCTRL_PIN(196, "ADR_COMPLETE"), + PINCTRL_PIN(197, "NMIB"), + PINCTRL_PIN(198, "SMIB"), + PINCTRL_PIN(199, "SSATA_DEVSLP_0"), + PINCTRL_PIN(200, "SSATA_DEVSLP_1"), + PINCTRL_PIN(201, "SSATA_DEVSLP_2"), + PINCTRL_PIN(202, "SSATAXPCIE0_SSATAGP0"), /* GPP_H */ - PINCTRL_PIN(205, "SRCCLKREQB_6"), - PINCTRL_PIN(206, "SRCCLKREQB_7"), - PINCTRL_PIN(207, "SRCCLKREQB_8"), - PINCTRL_PIN(208, "SRCCLKREQB_9"), - PINCTRL_PIN(209, "SRCCLKREQB_10"), - PINCTRL_PIN(210, "SRCCLKREQB_11"), - PINCTRL_PIN(211, "SRCCLKREQB_12"), - PINCTRL_PIN(212, "SRCCLKREQB_13"), - PINCTRL_PIN(213, "SRCCLKREQB_14"), - PINCTRL_PIN(214, "SRCCLKREQB_15"), - PINCTRL_PIN(215, "SML2CLK"), - PINCTRL_PIN(216, "SML2DATA"), - PINCTRL_PIN(217, "SML2ALERTB"), - PINCTRL_PIN(218, "SML3CLK"), - PINCTRL_PIN(219, "SML3DATA"), - PINCTRL_PIN(220, "SML3ALERTB"), - PINCTRL_PIN(221, "SML4CLK"), - PINCTRL_PIN(222, "SML4DATA"), - PINCTRL_PIN(223, "SML4ALERTB"), - PINCTRL_PIN(224, "SSATAXPCIE1_SSATAGP1"), - PINCTRL_PIN(225, "SSATAXPCIE2_SSATAGP2"), - PINCTRL_PIN(226, "SSATAXPCIE3_SSATAGP3"), - PINCTRL_PIN(227, "SSATAXPCIE4_SSATAGP4"), - PINCTRL_PIN(228, "SSATAXPCIE5_SSATAGP5"), + PINCTRL_PIN(203, "SRCCLKREQB_6"), + PINCTRL_PIN(204, "SRCCLKREQB_7"), + PINCTRL_PIN(205, "SRCCLKREQB_8"), + PINCTRL_PIN(206, "SRCCLKREQB_9"), + PINCTRL_PIN(207, "SRCCLKREQB_10"), + PINCTRL_PIN(208, "SRCCLKREQB_11"), + PINCTRL_PIN(209, "SRCCLKREQB_12"), + PINCTRL_PIN(210, "SRCCLKREQB_13"), + PINCTRL_PIN(211, "SRCCLKREQB_14"), + PINCTRL_PIN(212, "SRCCLKREQB_15"), + PINCTRL_PIN(213, "SML2CLK"), + PINCTRL_PIN(214, "SML2DATA"), + PINCTRL_PIN(215, "SML2ALERTB"), + PINCTRL_PIN(216, "SML3CLK"), + PINCTRL_PIN(217, "SML3DATA"), + PINCTRL_PIN(218, "SML3ALERTB"), + PINCTRL_PIN(219, "SML4CLK"), + PINCTRL_PIN(220, "SML4DATA"), + PINCTRL_PIN(221, "SML4ALERTB"), + PINCTRL_PIN(222, "SSATAXPCIE1_SSATAGP1"), + PINCTRL_PIN(223, "SSATAXPCIE2_SSATAGP2"), + PINCTRL_PIN(224, "SSATAXPCIE3_SSATAGP3"), + PINCTRL_PIN(225, "SSATAXPCIE4_SSATAGP4"), + PINCTRL_PIN(226, "SSATAXPCIE5_SSATAGP5"), /* GPP_L */ + PINCTRL_PIN(227, "GPP_L_0"), + PINCTRL_PIN(228, "EC_CSME_INTR_OUT"), PINCTRL_PIN(229, "VISA2CH0_D0"), PINCTRL_PIN(230, "VISA2CH0_D1"), PINCTRL_PIN(231, "VISA2CH0_D2"), diff --git a/drivers/pinctrl/meson/pinctrl-meson-gxl.c b/drivers/pinctrl/meson/pinctrl-meson-gxl.c index 36c14b85fc7c..8db182067ecb 100644 --- a/drivers/pinctrl/meson/pinctrl-meson-gxl.c +++ b/drivers/pinctrl/meson/pinctrl-meson-gxl.c @@ -158,8 +158,8 @@ static const unsigned int sdio_d0_pins[] = { PIN(GPIOX_0, EE_OFF) }; static const unsigned int sdio_d1_pins[] = { PIN(GPIOX_1, EE_OFF) }; static const unsigned int sdio_d2_pins[] = { PIN(GPIOX_2, EE_OFF) }; static const unsigned int sdio_d3_pins[] = { PIN(GPIOX_3, EE_OFF) }; -static const unsigned int sdio_cmd_pins[] = { PIN(GPIOX_4, EE_OFF) }; -static const unsigned int sdio_clk_pins[] = { PIN(GPIOX_5, EE_OFF) }; +static const unsigned int sdio_clk_pins[] = { PIN(GPIOX_4, EE_OFF) }; +static const unsigned int sdio_cmd_pins[] = { PIN(GPIOX_5, EE_OFF) }; static const unsigned int sdio_irq_pins[] = { PIN(GPIOX_7, EE_OFF) }; static const unsigned int nand_ce0_pins[] = { PIN(BOOT_8, EE_OFF) }; diff --git a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c index c5fe7d4a9065..262f591ad8a6 100644 --- a/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c +++ b/drivers/pinctrl/mvebu/pinctrl-armada-37xx.c @@ -170,10 +170,10 @@ static struct armada_37xx_pin_group armada_37xx_nb_groups[] = { PIN_GRP_EXTRA("uart2", 9, 2, BIT(1) | BIT(13) | BIT(14) | BIT(19), BIT(1) | BIT(13) | BIT(14), BIT(1) | BIT(19), 18, 2, "gpio", "uart"), - PIN_GRP_GPIO("led0_od", 11, 1, BIT(20), "led"), - PIN_GRP_GPIO("led1_od", 12, 1, BIT(21), "led"), - PIN_GRP_GPIO("led2_od", 13, 1, BIT(22), "led"), - PIN_GRP_GPIO("led3_od", 14, 1, BIT(23), "led"), + PIN_GRP_GPIO_2("led0_od", 11, 1, BIT(20), BIT(20), 0, "led"), + PIN_GRP_GPIO_2("led1_od", 12, 1, BIT(21), BIT(21), 0, "led"), + PIN_GRP_GPIO_2("led2_od", 13, 1, BIT(22), BIT(22), 0, "led"), + PIN_GRP_GPIO_2("led3_od", 14, 1, BIT(23), BIT(23), 0, "led"), }; @@ -205,11 +205,11 @@ static const struct armada_37xx_pin_data armada_37xx_pin_sb = { }; static inline void armada_37xx_update_reg(unsigned int *reg, - unsigned int offset) + unsigned int *offset) { /* We never have more than 2 registers */ - if (offset >= GPIO_PER_REG) { - offset -= GPIO_PER_REG; + if (*offset >= GPIO_PER_REG) { + *offset -= GPIO_PER_REG; *reg += sizeof(u32); } } @@ -373,7 +373,7 @@ static inline void armada_37xx_irq_update_reg(unsigned int *reg, { int offset = irqd_to_hwirq(d); - armada_37xx_update_reg(reg, offset); + armada_37xx_update_reg(reg, &offset); } static int armada_37xx_gpio_direction_input(struct gpio_chip *chip, @@ -383,7 +383,7 @@ static int armada_37xx_gpio_direction_input(struct gpio_chip *chip, unsigned int reg = OUTPUT_EN; unsigned int mask; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); return regmap_update_bits(info->regmap, reg, mask, 0); @@ -396,7 +396,7 @@ static int armada_37xx_gpio_get_direction(struct gpio_chip *chip, unsigned int reg = OUTPUT_EN; unsigned int val, mask; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); regmap_read(info->regmap, reg, &val); @@ -410,7 +410,7 @@ static int armada_37xx_gpio_direction_output(struct gpio_chip *chip, unsigned int reg = OUTPUT_EN; unsigned int mask, val, ret; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); ret = regmap_update_bits(info->regmap, reg, mask, mask); @@ -431,7 +431,7 @@ static int armada_37xx_gpio_get(struct gpio_chip *chip, unsigned int offset) unsigned int reg = INPUT_VAL; unsigned int val, mask; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); regmap_read(info->regmap, reg, &val); @@ -446,7 +446,7 @@ static void armada_37xx_gpio_set(struct gpio_chip *chip, unsigned int offset, unsigned int reg = OUTPUT_VAL; unsigned int mask, val; - armada_37xx_update_reg(®, offset); + armada_37xx_update_reg(®, &offset); mask = BIT(offset); val = value ? mask : 0; diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index b78f42abff2f..7385cd81498c 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -509,7 +509,8 @@ static irqreturn_t amd_gpio_irq_handler(int irq, void *dev_id) irqreturn_t ret = IRQ_NONE; unsigned int i, irqnr; unsigned long flags; - u32 *regs, regval; + u32 __iomem *regs; + u32 regval; u64 status, mask; /* Read the wake status */ diff --git a/drivers/pinctrl/pinctrl-at91-pio4.c b/drivers/pinctrl/pinctrl-at91-pio4.c index e61e2f8c91ce..e9d797707255 100644 --- a/drivers/pinctrl/pinctrl-at91-pio4.c +++ b/drivers/pinctrl/pinctrl-at91-pio4.c @@ -483,7 +483,6 @@ static int atmel_pctl_dt_subnode_to_map(struct pinctrl_dev *pctldev, unsigned num_pins, num_configs, reserve; unsigned long *configs; struct property *pins; - bool has_config; u32 pinfunc; int ret, i; @@ -499,9 +498,6 @@ static int atmel_pctl_dt_subnode_to_map(struct pinctrl_dev *pctldev, return ret; } - if (num_configs) - has_config = true; - num_pins = pins->length / sizeof(u32); if (!num_pins) { dev_err(pctldev->dev, "no pins found in node %pOF\n", np); @@ -514,7 +510,7 @@ static int atmel_pctl_dt_subnode_to_map(struct pinctrl_dev *pctldev, * map for each pin. */ reserve = 1; - if (has_config && num_pins >= 1) + if (num_configs) reserve++; reserve *= num_pins; ret = pinctrl_utils_reserve_map(pctldev, map, reserved_maps, num_maps, @@ -537,7 +533,7 @@ static int atmel_pctl_dt_subnode_to_map(struct pinctrl_dev *pctldev, pinctrl_utils_add_map_mux(pctldev, map, reserved_maps, num_maps, group, func); - if (has_config) { + if (num_configs) { ret = pinctrl_utils_add_map_configs(pctldev, map, reserved_maps, num_maps, group, configs, num_configs, diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c index 569bc28cb909..404711f0985a 100644 --- a/drivers/pinctrl/pinctrl-at91.c +++ b/drivers/pinctrl/pinctrl-at91.c @@ -1566,16 +1566,6 @@ void at91_pinctrl_gpio_resume(void) #define gpio_irq_set_wake NULL #endif /* CONFIG_PM */ -static struct irq_chip gpio_irqchip = { - .name = "GPIO", - .irq_ack = gpio_irq_ack, - .irq_disable = gpio_irq_mask, - .irq_mask = gpio_irq_mask, - .irq_unmask = gpio_irq_unmask, - /* .irq_set_type is set dynamically */ - .irq_set_wake = gpio_irq_set_wake, -}; - static void gpio_irq_handler(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); @@ -1616,12 +1606,22 @@ static int at91_gpio_of_irq_setup(struct platform_device *pdev, struct gpio_chip *gpiochip_prev = NULL; struct at91_gpio_chip *prev = NULL; struct irq_data *d = irq_get_irq_data(at91_gpio->pioc_virq); + struct irq_chip *gpio_irqchip; int ret, i; + gpio_irqchip = devm_kzalloc(&pdev->dev, sizeof(*gpio_irqchip), GFP_KERNEL); + if (!gpio_irqchip) + return -ENOMEM; + at91_gpio->pioc_hwirq = irqd_to_hwirq(d); - /* Setup proper .irq_set_type function */ - gpio_irqchip.irq_set_type = at91_gpio->ops->irq_type; + gpio_irqchip->name = "GPIO"; + gpio_irqchip->irq_ack = gpio_irq_ack; + gpio_irqchip->irq_disable = gpio_irq_mask; + gpio_irqchip->irq_mask = gpio_irq_mask; + gpio_irqchip->irq_unmask = gpio_irq_unmask; + gpio_irqchip->irq_set_wake = gpio_irq_set_wake, + gpio_irqchip->irq_set_type = at91_gpio->ops->irq_type; /* Disable irqs of this PIO controller */ writel_relaxed(~0, at91_gpio->regbase + PIO_IDR); @@ -1632,7 +1632,7 @@ static int at91_gpio_of_irq_setup(struct platform_device *pdev, * interrupt. */ ret = gpiochip_irqchip_add(&at91_gpio->chip, - &gpio_irqchip, + gpio_irqchip, 0, handle_edge_irq, IRQ_TYPE_NONE); @@ -1650,7 +1650,7 @@ static int at91_gpio_of_irq_setup(struct platform_device *pdev, if (!gpiochip_prev) { /* Then register the chain on the parent IRQ */ gpiochip_set_chained_irqchip(&at91_gpio->chip, - &gpio_irqchip, + gpio_irqchip, at91_gpio->pioc_virq, gpio_irq_handler); return 0; diff --git a/drivers/pinctrl/pinctrl-gemini.c b/drivers/pinctrl/pinctrl-gemini.c index 39e6221e7100..78fa26c1a89f 100644 --- a/drivers/pinctrl/pinctrl-gemini.c +++ b/drivers/pinctrl/pinctrl-gemini.c @@ -551,13 +551,16 @@ static const unsigned int tvc_3512_pins[] = { 319, /* TVC_DATA[1] */ 301, /* TVC_DATA[2] */ 283, /* TVC_DATA[3] */ - 265, /* TVC_CLK */ 320, /* TVC_DATA[4] */ 302, /* TVC_DATA[5] */ 284, /* TVC_DATA[6] */ 266, /* TVC_DATA[7] */ }; +static const unsigned int tvc_clk_3512_pins[] = { + 265, /* TVC_CLK */ +}; + /* NAND flash pins */ static const unsigned int nflash_3512_pins[] = { 199, 200, 201, 202, 216, 217, 218, 219, 220, 234, 235, 236, 237, 252, @@ -589,7 +592,7 @@ static const unsigned int pflash_3512_pins_extended[] = { /* Serial flash pins CE0, CE1, DI, DO, CK */ static const unsigned int sflash_3512_pins[] = { 230, 231, 232, 233, 211 }; -/* The GPIO0A (0) pin overlap with TVC and extended parallel flash */ +/* The GPIO0A (0) pin overlap with TVC CLK and extended parallel flash */ static const unsigned int gpio0a_3512_pins[] = { 265 }; /* The GPIO0B (1-4) pins overlap with TVC and ICE */ @@ -772,7 +775,13 @@ static const struct gemini_pin_group gemini_3512_pin_groups[] = { .num_pins = ARRAY_SIZE(tvc_3512_pins), /* Conflict with character LCD and ICE */ .mask = LCD_PADS_ENABLE, - .value = TVC_PADS_ENABLE | TVC_CLK_PAD_ENABLE, + .value = TVC_PADS_ENABLE, + }, + { + .name = "tvcclkgrp", + .pins = tvc_clk_3512_pins, + .num_pins = ARRAY_SIZE(tvc_clk_3512_pins), + .value = TVC_CLK_PAD_ENABLE, }, /* * The construction is done such that it is possible to use a serial @@ -809,8 +818,8 @@ static const struct gemini_pin_group gemini_3512_pin_groups[] = { .name = "gpio0agrp", .pins = gpio0a_3512_pins, .num_pins = ARRAY_SIZE(gpio0a_3512_pins), - /* Conflict with TVC */ - .mask = TVC_PADS_ENABLE, + /* Conflict with TVC CLK */ + .mask = TVC_CLK_PAD_ENABLE, }, { .name = "gpio0bgrp", @@ -1476,13 +1485,16 @@ static const unsigned int tvc_3516_pins[] = { 311, /* TVC_DATA[1] */ 394, /* TVC_DATA[2] */ 374, /* TVC_DATA[3] */ - 333, /* TVC_CLK */ 354, /* TVC_DATA[4] */ 395, /* TVC_DATA[5] */ 312, /* TVC_DATA[6] */ 334, /* TVC_DATA[7] */ }; +static const unsigned int tvc_clk_3516_pins[] = { + 333, /* TVC_CLK */ +}; + /* NAND flash pins */ static const unsigned int nflash_3516_pins[] = { 243, 260, 261, 224, 280, 262, 281, 264, 300, 263, 282, 301, 320, 283, @@ -1515,7 +1527,7 @@ static const unsigned int pflash_3516_pins_extended[] = { static const unsigned int sflash_3516_pins[] = { 296, 338, 295, 359, 339 }; /* The GPIO0A (0-4) pins overlap with TVC and extended parallel flash */ -static const unsigned int gpio0a_3516_pins[] = { 333, 354, 395, 312, 334 }; +static const unsigned int gpio0a_3516_pins[] = { 354, 395, 312, 334 }; /* The GPIO0B (5-7) pins overlap with ICE */ static const unsigned int gpio0b_3516_pins[] = { 375, 396, 376 }; @@ -1547,6 +1559,9 @@ static const unsigned int gpio0j_3516_pins[] = { 359, 339 }; /* The GPIO0K (30,31) pins overlap with NAND flash */ static const unsigned int gpio0k_3516_pins[] = { 275, 298 }; +/* The GPIO0L (0) pins overlap with TVC_CLK */ +static const unsigned int gpio0l_3516_pins[] = { 333 }; + /* The GPIO1A (0-4) pins that overlap with IDE and parallel flash */ static const unsigned int gpio1a_3516_pins[] = { 221, 200, 222, 201, 220 }; @@ -1693,7 +1708,13 @@ static const struct gemini_pin_group gemini_3516_pin_groups[] = { .num_pins = ARRAY_SIZE(tvc_3516_pins), /* Conflict with character LCD */ .mask = LCD_PADS_ENABLE, - .value = TVC_PADS_ENABLE | TVC_CLK_PAD_ENABLE, + .value = TVC_PADS_ENABLE, + }, + { + .name = "tvcclkgrp", + .pins = tvc_clk_3516_pins, + .num_pins = ARRAY_SIZE(tvc_clk_3516_pins), + .value = TVC_CLK_PAD_ENABLE, }, /* * The construction is done such that it is possible to use a serial @@ -1804,6 +1825,13 @@ static const struct gemini_pin_group gemini_3516_pin_groups[] = { /* Conflict with parallel and NAND flash */ .value = PFLASH_PADS_DISABLE | NAND_PADS_DISABLE, }, + { + .name = "gpio0lgrp", + .pins = gpio0l_3516_pins, + .num_pins = ARRAY_SIZE(gpio0l_3516_pins), + /* Conflict with TVE CLK */ + .mask = TVC_CLK_PAD_ENABLE, + }, { .name = "gpio1agrp", .pins = gpio1a_3516_pins, @@ -2164,7 +2192,8 @@ static int gemini_pmx_set_mux(struct pinctrl_dev *pctldev, func->name, grp->name); regmap_read(pmx->map, GLOBAL_MISC_CTRL, &before); - regmap_update_bits(pmx->map, GLOBAL_MISC_CTRL, grp->mask, + regmap_update_bits(pmx->map, GLOBAL_MISC_CTRL, + grp->mask | grp->value, grp->value); regmap_read(pmx->map, GLOBAL_MISC_CTRL, &after); diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c index 103aaab41357..1541f8cba556 100644 --- a/drivers/pinctrl/pinctrl-ingenic.c +++ b/drivers/pinctrl/pinctrl-ingenic.c @@ -849,4 +849,4 @@ static int __init ingenic_pinctrl_drv_register(void) { return platform_driver_register(&ingenic_pinctrl_driver); } -postcore_initcall(ingenic_pinctrl_drv_register); +subsys_initcall(ingenic_pinctrl_drv_register); diff --git a/drivers/pinctrl/pinctrl-lpc18xx.c b/drivers/pinctrl/pinctrl-lpc18xx.c index d090f37ca4a1..8b4e3582af6e 100644 --- a/drivers/pinctrl/pinctrl-lpc18xx.c +++ b/drivers/pinctrl/pinctrl-lpc18xx.c @@ -630,14 +630,8 @@ static const struct pinctrl_pin_desc lpc18xx_pins[] = { LPC18XX_PIN(i2c0_sda, PIN_I2C0_SDA), }; -/** - * enum lpc18xx_pin_config_param - possible pin configuration parameters - * @PIN_CONFIG_GPIO_PIN_INT: route gpio to the gpio pin interrupt - * controller. - */ -enum lpc18xx_pin_config_param { - PIN_CONFIG_GPIO_PIN_INT = PIN_CONFIG_END + 1, -}; +/* PIN_CONFIG_GPIO_PIN_INT: route gpio to the gpio pin interrupt controller */ +#define PIN_CONFIG_GPIO_PIN_INT (PIN_CONFIG_END + 1) static const struct pinconf_generic_params lpc18xx_params[] = { {"nxp,gpio-pin-interrupt", PIN_CONFIG_GPIO_PIN_INT, 0}, diff --git a/drivers/pinctrl/pinctrl-xway.c b/drivers/pinctrl/pinctrl-xway.c index f9e98a7d4f0c..1b0c5958c56a 100644 --- a/drivers/pinctrl/pinctrl-xway.c +++ b/drivers/pinctrl/pinctrl-xway.c @@ -1748,14 +1748,6 @@ static int pinmux_xway_probe(struct platform_device *pdev) } xway_pctrl_desc.pins = xway_info.pads; - /* register the gpio chip */ - xway_chip.parent = &pdev->dev; - ret = devm_gpiochip_add_data(&pdev->dev, &xway_chip, NULL); - if (ret) { - dev_err(&pdev->dev, "Failed to register gpio chip\n"); - return ret; - } - /* setup the data needed by pinctrl */ xway_pctrl_desc.name = dev_name(&pdev->dev); xway_pctrl_desc.npins = xway_chip.ngpio; @@ -1777,10 +1769,33 @@ static int pinmux_xway_probe(struct platform_device *pdev) return ret; } - /* finish with registering the gpio range in pinctrl */ - xway_gpio_range.npins = xway_chip.ngpio; - xway_gpio_range.base = xway_chip.base; - pinctrl_add_gpio_range(xway_info.pctrl, &xway_gpio_range); + /* register the gpio chip */ + xway_chip.parent = &pdev->dev; + xway_chip.owner = THIS_MODULE; + xway_chip.of_node = pdev->dev.of_node; + ret = devm_gpiochip_add_data(&pdev->dev, &xway_chip, NULL); + if (ret) { + dev_err(&pdev->dev, "Failed to register gpio chip\n"); + return ret; + } + + /* + * For DeviceTree-supported systems, the gpio core checks the + * pinctrl's device node for the "gpio-ranges" property. + * If it is present, it takes care of adding the pin ranges + * for the driver. In this case the driver can skip ahead. + * + * In order to remain compatible with older, existing DeviceTree + * files which don't set the "gpio-ranges" property or systems that + * utilize ACPI the driver has to call gpiochip_add_pin_range(). + */ + if (!of_property_read_bool(pdev->dev.of_node, "gpio-ranges")) { + /* finish with registering the gpio range in pinctrl */ + xway_gpio_range.npins = xway_chip.ngpio; + xway_gpio_range.base = xway_chip.base; + pinctrl_add_gpio_range(xway_info.pctrl, &xway_gpio_range); + } + dev_info(&pdev->dev, "Init done\n"); return 0; } diff --git a/drivers/pinctrl/pinctrl-zynq.c b/drivers/pinctrl/pinctrl-zynq.c index a0daf27042bd..90fd37e8207b 100644 --- a/drivers/pinctrl/pinctrl-zynq.c +++ b/drivers/pinctrl/pinctrl-zynq.c @@ -971,15 +971,12 @@ enum zynq_io_standards { zynq_iostd_max }; -/** - * enum zynq_pin_config_param - possible pin configuration parameters - * @PIN_CONFIG_IOSTANDARD: if the pin can select an IO standard, the argument to +/* + * PIN_CONFIG_IOSTANDARD: if the pin can select an IO standard, the argument to * this parameter (on a custom format) tells the driver which alternative * IO standard to use. */ -enum zynq_pin_config_param { - PIN_CONFIG_IOSTANDARD = PIN_CONFIG_END + 1, -}; +#define PIN_CONFIG_IOSTANDARD (PIN_CONFIG_END + 1) static const struct pinconf_generic_params zynq_dt_params[] = { {"io-standard", PIN_CONFIG_IOSTANDARD, zynq_iostd_lvcmos18}, diff --git a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c index 22aaf4375fac..0f0049dfaa3a 100644 --- a/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-spmi-gpio.c @@ -1023,10 +1023,23 @@ static int pmic_gpio_probe(struct platform_device *pdev) return ret; } - ret = gpiochip_add_pin_range(&state->chip, dev_name(dev), 0, 0, npins); - if (ret) { - dev_err(dev, "failed to add pin range\n"); - goto err_range; + /* + * For DeviceTree-supported systems, the gpio core checks the + * pinctrl's device node for the "gpio-ranges" property. + * If it is present, it takes care of adding the pin ranges + * for the driver. In this case the driver can skip ahead. + * + * In order to remain compatible with older, existing DeviceTree + * files which don't set the "gpio-ranges" property or systems that + * utilize ACPI the driver has to call gpiochip_add_pin_range(). + */ + if (!of_property_read_bool(dev->of_node, "gpio-ranges")) { + ret = gpiochip_add_pin_range(&state->chip, dev_name(dev), 0, 0, + npins); + if (ret) { + dev_err(dev, "failed to add pin range\n"); + goto err_range; + } } return 0; diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index 0e153bae322e..6bed433e5420 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -762,12 +762,23 @@ static int pm8xxx_gpio_probe(struct platform_device *pdev) return ret; } - ret = gpiochip_add_pin_range(&pctrl->chip, - dev_name(pctrl->dev), - 0, 0, pctrl->chip.ngpio); - if (ret) { - dev_err(pctrl->dev, "failed to add pin range\n"); - goto unregister_gpiochip; + /* + * For DeviceTree-supported systems, the gpio core checks the + * pinctrl's device node for the "gpio-ranges" property. + * If it is present, it takes care of adding the pin ranges + * for the driver. In this case the driver can skip ahead. + * + * In order to remain compatible with older, existing DeviceTree + * files which don't set the "gpio-ranges" property or systems that + * utilize ACPI the driver has to call gpiochip_add_pin_range(). + */ + if (!of_property_read_bool(pctrl->dev->of_node, "gpio-ranges")) { + ret = gpiochip_add_pin_range(&pctrl->chip, dev_name(pctrl->dev), + 0, 0, pctrl->chip.ngpio); + if (ret) { + dev_err(pctrl->dev, "failed to add pin range\n"); + goto unregister_gpiochip; + } } platform_set_drvdata(pdev, pctrl); diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c index c8d0de7ea160..1c534d823fd7 100644 --- a/drivers/pinctrl/samsung/pinctrl-exynos.c +++ b/drivers/pinctrl/samsung/pinctrl-exynos.c @@ -467,8 +467,10 @@ int exynos_eint_wkup_init(struct samsung_pinctrl_drv_data *d) if (match) { irq_chip = kmemdup(match->data, sizeof(*irq_chip), GFP_KERNEL); - if (!irq_chip) + if (!irq_chip) { + of_node_put(np); return -ENOMEM; + } wkup_np = np; break; } diff --git a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c index 67da1cf18b68..46b1a9b2238b 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c24xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c24xx.c @@ -495,8 +495,10 @@ static int s3c24xx_eint_init(struct samsung_pinctrl_drv_data *d) return -ENODEV; eint_data = devm_kzalloc(dev, sizeof(*eint_data), GFP_KERNEL); - if (!eint_data) + if (!eint_data) { + of_node_put(eint_np); return -ENOMEM; + } eint_data->drvdata = d; @@ -508,12 +510,14 @@ static int s3c24xx_eint_init(struct samsung_pinctrl_drv_data *d) irq = irq_of_parse_and_map(eint_np, i); if (!irq) { dev_err(dev, "failed to get wakeup EINT IRQ %d\n", i); + of_node_put(eint_np); return -ENXIO; } eint_data->parents[i] = irq; irq_set_chained_handler_and_data(irq, handlers[i], eint_data); } + of_node_put(eint_np); bank = d->pin_banks; for (i = 0; i < d->nr_banks; ++i, ++bank) { diff --git a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c index 0bdc1e683181..cf3a3af82321 100644 --- a/drivers/pinctrl/samsung/pinctrl-s3c64xx.c +++ b/drivers/pinctrl/samsung/pinctrl-s3c64xx.c @@ -709,8 +709,10 @@ static int s3c64xx_eint_eint0_init(struct samsung_pinctrl_drv_data *d) return -ENODEV; data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); - if (!data) + if (!data) { + of_node_put(eint0_np); return -ENOMEM; + } data->drvdata = d; for (i = 0; i < NUM_EINT0_IRQ; ++i) { @@ -719,6 +721,7 @@ static int s3c64xx_eint_eint0_init(struct samsung_pinctrl_drv_data *d) irq = irq_of_parse_and_map(eint0_np, i); if (!irq) { dev_err(dev, "failed to get wakeup EINT IRQ %d\n", i); + of_node_put(eint0_np); return -ENXIO; } @@ -726,6 +729,7 @@ static int s3c64xx_eint_eint0_init(struct samsung_pinctrl_drv_data *d) s3c64xx_eint0_handlers[i], data); } + of_node_put(eint0_np); bank = d->pin_banks; for (i = 0; i < d->nr_banks; ++i, ++bank) { diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c index 26e8fab736f1..7c0f5d4e89f3 100644 --- a/drivers/pinctrl/samsung/pinctrl-samsung.c +++ b/drivers/pinctrl/samsung/pinctrl-samsung.c @@ -277,6 +277,7 @@ static int samsung_dt_node_to_map(struct pinctrl_dev *pctldev, &reserved_maps, num_maps); if (ret < 0) { samsung_dt_free_map(pctldev, *map, *num_maps); + of_node_put(np); return ret; } } @@ -761,8 +762,10 @@ static struct samsung_pmx_func *samsung_pinctrl_create_functions( if (!of_get_child_count(cfg_np)) { ret = samsung_pinctrl_create_function(dev, drvdata, cfg_np, func); - if (ret < 0) + if (ret < 0) { + of_node_put(cfg_np); return ERR_PTR(ret); + } if (ret > 0) { ++func; ++func_cnt; @@ -773,8 +776,11 @@ static struct samsung_pmx_func *samsung_pinctrl_create_functions( for_each_child_of_node(cfg_np, func_np) { ret = samsung_pinctrl_create_function(dev, drvdata, func_np, func); - if (ret < 0) + if (ret < 0) { + of_node_put(func_np); + of_node_put(cfg_np); return ERR_PTR(ret); + } if (ret > 0) { ++func; ++func_cnt; diff --git a/drivers/pinctrl/sh-pfc/pfc-emev2.c b/drivers/pinctrl/sh-pfc/pfc-emev2.c index 1cbbe04d7df6..eafd8edbcbe9 100644 --- a/drivers/pinctrl/sh-pfc/pfc-emev2.c +++ b/drivers/pinctrl/sh-pfc/pfc-emev2.c @@ -1263,6 +1263,14 @@ static const char * const dtv_groups[] = { "dtv_b", }; +static const char * const err_rst_reqb_groups[] = { + "err_rst_reqb", +}; + +static const char * const ext_clki_groups[] = { + "ext_clki", +}; + static const char * const iic0_groups[] = { "iic0", }; @@ -1285,6 +1293,10 @@ static const char * const lcd_groups[] = { "yuv3", }; +static const char * const lowpwr_groups[] = { + "lowpwr", +}; + static const char * const ntsc_groups[] = { "ntsc_clk", "ntsc_data", @@ -1298,6 +1310,10 @@ static const char * const pwm1_groups[] = { "pwm1", }; +static const char * const ref_clko_groups[] = { + "ref_clko", +}; + static const char * const sd_groups[] = { "sd_cki", }; @@ -1391,13 +1407,17 @@ static const struct sh_pfc_function pinmux_functions[] = { SH_PFC_FUNCTION(cam), SH_PFC_FUNCTION(cf), SH_PFC_FUNCTION(dtv), + SH_PFC_FUNCTION(err_rst_reqb), + SH_PFC_FUNCTION(ext_clki), SH_PFC_FUNCTION(iic0), SH_PFC_FUNCTION(iic1), SH_PFC_FUNCTION(jtag), SH_PFC_FUNCTION(lcd), + SH_PFC_FUNCTION(lowpwr), SH_PFC_FUNCTION(ntsc), SH_PFC_FUNCTION(pwm0), SH_PFC_FUNCTION(pwm1), + SH_PFC_FUNCTION(ref_clko), SH_PFC_FUNCTION(sd), SH_PFC_FUNCTION(sdi0), SH_PFC_FUNCTION(sdi1), diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7740.c b/drivers/pinctrl/sh-pfc/pfc-r8a7740.c index 35f436bcb849..e9739dbcb356 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7740.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7740.c @@ -1982,7 +1982,7 @@ static const unsigned int gether_gmii_pins[] = { */ 185, 186, 187, 188, 189, 190, 191, 192, 174, 161, 204, 171, 170, 169, 168, 167, 166, 173, 172, 176, 184, 183, 203, - 205, 163, 206, 207, + 205, 163, 206, 207, 158, }; static const unsigned int gether_gmii_mux[] = { ET_ERXD0_MARK, ET_ERXD1_MARK, ET_ERXD2_MARK, ET_ERXD3_MARK, @@ -2154,6 +2154,7 @@ static const unsigned int lcd0_data24_1_mux[] = { LCD0_D0_MARK, LCD0_D1_MARK, LCD0_D2_MARK, LCD0_D3_MARK, LCD0_D4_MARK, LCD0_D5_MARK, LCD0_D6_MARK, LCD0_D7_MARK, LCD0_D8_MARK, LCD0_D9_MARK, LCD0_D10_MARK, LCD0_D11_MARK, + LCD0_D12_MARK, LCD0_D13_MARK, LCD0_D14_MARK, LCD0_D15_MARK, LCD0_D16_MARK, LCD0_D17_MARK, LCD0_D18_PORT163_MARK, LCD0_D19_PORT162_MARK, LCD0_D20_PORT161_MARK, LCD0_D21_PORT158_MARK, LCD0_D22_PORT160_MARK, LCD0_D23_PORT159_MARK, diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7778.c b/drivers/pinctrl/sh-pfc/pfc-r8a7778.c index c3af9ebee4af..28c0405ba396 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7778.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7778.c @@ -2325,7 +2325,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { FN_ATAG0_A, 0, FN_REMOCON_B, 0, /* IP0_11_8 [4] */ FN_SD1_DAT2_A, FN_MMC_D2, 0, FN_BS, - FN_ATADIR0_A, 0, FN_SDSELF_B, 0, + FN_ATADIR0_A, 0, FN_SDSELF_A, 0, FN_PWM4_B, 0, 0, 0, 0, 0, 0, 0, /* IP0_7_5 [3] */ @@ -2367,7 +2367,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { FN_TS_SDAT0_A, 0, 0, 0, 0, 0, 0, 0, /* IP1_10_8 [3] */ - FN_SD1_CLK_B, FN_MMC_D6, 0, FN_A24, + FN_SD1_CD_A, FN_MMC_D6, 0, FN_A24, FN_DREQ1_A, 0, FN_HRX0_B, FN_TS_SPSYNC0_A, /* IP1_7_5 [3] */ FN_A23, FN_HTX0_B, FN_TX2_B, FN_DACK2_A, diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c index c01ef02d326b..e4774b220040 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7791.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7791.c @@ -3220,8 +3220,7 @@ static const unsigned int qspi_data4_b_pins[] = { RCAR_GP_PIN(6, 4), }; static const unsigned int qspi_data4_b_mux[] = { - SPCLK_B_MARK, MOSI_IO0_B_MARK, MISO_IO1_B_MARK, - IO2_B_MARK, IO3_B_MARK, SSL_B_MARK, + MOSI_IO0_B_MARK, MISO_IO1_B_MARK, IO2_B_MARK, IO3_B_MARK, }; /* - SCIF0 ------------------------------------------------------------------ */ static const unsigned int scif0_data_pins[] = { @@ -4349,17 +4348,14 @@ static const unsigned int vin1_b_data18_pins[] = { }; static const unsigned int vin1_b_data18_mux[] = { /* B */ - VI1_DATA0_B_MARK, VI1_DATA1_B_MARK, VI1_DATA2_B_MARK, VI1_DATA3_B_MARK, VI1_DATA4_B_MARK, VI1_DATA5_B_MARK, VI1_DATA6_B_MARK, VI1_DATA7_B_MARK, /* G */ - VI1_G0_B_MARK, VI1_G1_B_MARK, VI1_G2_B_MARK, VI1_G3_B_MARK, VI1_G4_B_MARK, VI1_G5_B_MARK, VI1_G6_B_MARK, VI1_G7_B_MARK, /* R */ - VI1_R0_B_MARK, VI1_R1_B_MARK, VI1_R2_B_MARK, VI1_R3_B_MARK, VI1_R4_B_MARK, VI1_R5_B_MARK, VI1_R6_B_MARK, VI1_R7_B_MARK, @@ -5213,7 +5209,7 @@ static const char * const scifb2_groups[] = { "scifb2_data_b", "scifb2_clk_b", "scifb2_ctrl_b", - "scifb0_data_c", + "scifb2_data_c", "scifb2_clk_c", "scifb2_data_d", }; diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7792.c b/drivers/pinctrl/sh-pfc/pfc-r8a7792.c index cc3597f66605..46c41ca6ea38 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7792.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7792.c @@ -1916,6 +1916,7 @@ static const char * const vin1_groups[] = { "vin1_data8", "vin1_data24_b", "vin1_data20_b", + "vin1_data18_b", "vin1_data16_b", "vin1_sync", "vin1_field", diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a7794.c b/drivers/pinctrl/sh-pfc/pfc-r8a7794.c index a0ed220071f5..93bdd3e8fb67 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a7794.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a7794.c @@ -4742,7 +4742,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { FN_AVB_MDC, FN_SSI_SDATA6_B, 0, 0, } }, { PINMUX_CFG_REG_VAR("IPSR9", 0xE6060044, 32, - 1, 3, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3, 3) { + 1, 3, 3, 3, 3, 2, 2, 3, 3, 3, 3, 3) { /* IP9_31 [1] */ 0, 0, /* IP9_30_28 [3] */ diff --git a/drivers/pinctrl/sh-pfc/pfc-r8a77995.c b/drivers/pinctrl/sh-pfc/pfc-r8a77995.c index 4f5ee1d7317d..36421df1b326 100644 --- a/drivers/pinctrl/sh-pfc/pfc-r8a77995.c +++ b/drivers/pinctrl/sh-pfc/pfc-r8a77995.c @@ -391,10 +391,10 @@ FM(IP12_31_28) IP12_31_28 \ #define MOD_SEL0_27 FM(SEL_MSIOF3_0) FM(SEL_MSIOF3_1) #define MOD_SEL0_26 FM(SEL_HSCIF3_0) FM(SEL_HSCIF3_1) #define MOD_SEL0_25 FM(SEL_SCIF4_0) FM(SEL_SCIF4_1) -#define MOD_SEL0_24_23 FM(SEL_PWM0_0) FM(SEL_PWM0_1) FM(SEL_PWM0_2) FM(SEL_PWM0_3) -#define MOD_SEL0_22_21 FM(SEL_PWM1_0) FM(SEL_PWM1_1) FM(SEL_PWM1_2) FM(SEL_PWM1_3) -#define MOD_SEL0_20_19 FM(SEL_PWM2_0) FM(SEL_PWM2_1) FM(SEL_PWM2_2) FM(SEL_PWM2_3) -#define MOD_SEL0_18_17 FM(SEL_PWM3_0) FM(SEL_PWM3_1) FM(SEL_PWM3_2) FM(SEL_PWM3_3) +#define MOD_SEL0_24_23 FM(SEL_PWM0_0) FM(SEL_PWM0_1) FM(SEL_PWM0_2) F_(0, 0) +#define MOD_SEL0_22_21 FM(SEL_PWM1_0) FM(SEL_PWM1_1) FM(SEL_PWM1_2) F_(0, 0) +#define MOD_SEL0_20_19 FM(SEL_PWM2_0) FM(SEL_PWM2_1) FM(SEL_PWM2_2) F_(0, 0) +#define MOD_SEL0_18_17 FM(SEL_PWM3_0) FM(SEL_PWM3_1) FM(SEL_PWM3_2) F_(0, 0) #define MOD_SEL0_15 FM(SEL_IRQ_0_0) FM(SEL_IRQ_0_1) #define MOD_SEL0_14 FM(SEL_IRQ_1_0) FM(SEL_IRQ_1_1) #define MOD_SEL0_13 FM(SEL_IRQ_2_0) FM(SEL_IRQ_2_1) diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7264.c b/drivers/pinctrl/sh-pfc/pfc-sh7264.c index 8070765311db..3ddb9565ed80 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7264.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7264.c @@ -500,17 +500,15 @@ enum { SD_WP_MARK, SD_CLK_MARK, SD_CMD_MARK, CRX0_MARK, CRX1_MARK, CTX0_MARK, CTX1_MARK, + CRX0_CRX1_MARK, CTX0_CTX1_MARK, PWM1A_MARK, PWM1B_MARK, PWM1C_MARK, PWM1D_MARK, PWM1E_MARK, PWM1F_MARK, PWM1G_MARK, PWM1H_MARK, PWM2A_MARK, PWM2B_MARK, PWM2C_MARK, PWM2D_MARK, PWM2E_MARK, PWM2F_MARK, PWM2G_MARK, PWM2H_MARK, IERXD_MARK, IETXD_MARK, - CRX0_CRX1_MARK, WDTOVF_MARK, - CRX0X1_MARK, - /* DMAC */ TEND0_MARK, DACK0_MARK, DREQ0_MARK, TEND1_MARK, DACK1_MARK, DREQ1_MARK, @@ -998,12 +996,12 @@ static const u16 pinmux_data[] = { PINMUX_DATA(PJ3_DATA, PJ3MD_00), PINMUX_DATA(CRX1_MARK, PJ3MD_01), - PINMUX_DATA(CRX0X1_MARK, PJ3MD_10), + PINMUX_DATA(CRX0_CRX1_MARK, PJ3MD_10), PINMUX_DATA(IRQ1_PJ_MARK, PJ3MD_11), PINMUX_DATA(PJ2_DATA, PJ2MD_000), PINMUX_DATA(CTX1_MARK, PJ2MD_001), - PINMUX_DATA(CRX0_CRX1_MARK, PJ2MD_010), + PINMUX_DATA(CTX0_CTX1_MARK, PJ2MD_010), PINMUX_DATA(CS2_MARK, PJ2MD_011), PINMUX_DATA(SCK0_MARK, PJ2MD_100), PINMUX_DATA(LCD_M_DISP_MARK, PJ2MD_101), @@ -1248,6 +1246,7 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(CTX1), GPIO_FN(CRX1), GPIO_FN(CTX0), + GPIO_FN(CTX0_CTX1), GPIO_FN(CRX0), GPIO_FN(CRX0_CRX1), @@ -1716,6 +1715,9 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { }, { PINMUX_CFG_REG("PFCR3", 0xfffe38a8, 16, 4) { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, PF12MD_000, PF12MD_001, 0, PF12MD_011, PF12MD_100, PF12MD_101, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } @@ -1759,8 +1761,10 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { 0, 0, 0, 0, 0, 0, 0, 0, PF1MD_000, PF1MD_001, PF1MD_010, PF1MD_011, PF1MD_100, PF1MD_101, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0 - } + 0, 0, 0, 0, 0, 0, 0, 0, + PF0MD_000, PF0MD_001, PF0MD_010, PF0MD_011, + PF0MD_100, PF0MD_101, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 } }, { PINMUX_CFG_REG("PFIOR0", 0xfffe38b2, 16, 1) { diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7269.c b/drivers/pinctrl/sh-pfc/pfc-sh7269.c index a50d22bef1f4..3df0c0d139d0 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7269.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7269.c @@ -740,13 +740,12 @@ enum { CRX0_MARK, CTX0_MARK, CRX1_MARK, CTX1_MARK, CRX2_MARK, CTX2_MARK, - CRX0_CRX1_MARK, - CRX0_CRX1_CRX2_MARK, - CTX0CTX1CTX2_MARK, + CRX0_CRX1_MARK, CTX0_CTX1_MARK, + CRX0_CRX1_CRX2_MARK, CTX0_CTX1_CTX2_MARK, CRX1_PJ22_MARK, CTX1_PJ23_MARK, CRX2_PJ20_MARK, CTX2_PJ21_MARK, - CRX0CRX1_PJ22_MARK, - CRX0CRX1CRX2_PJ20_MARK, + CRX0_CRX1_PJ22_MARK, CTX0_CTX1_PJ23_MARK, + CRX0_CRX1_CRX2_PJ20_MARK, CTX0_CTX1_CTX2_PJ21_MARK, /* VDC */ DV_CLK_MARK, @@ -824,6 +823,7 @@ static const u16 pinmux_data[] = { PINMUX_DATA(CS3_MARK, PC8MD_001), PINMUX_DATA(TXD7_MARK, PC8MD_010), PINMUX_DATA(CTX1_MARK, PC8MD_011), + PINMUX_DATA(CTX0_CTX1_MARK, PC8MD_100), PINMUX_DATA(PC7_DATA, PC7MD_000), PINMUX_DATA(CKE_MARK, PC7MD_001), @@ -836,11 +836,12 @@ static const u16 pinmux_data[] = { PINMUX_DATA(CAS_MARK, PC6MD_001), PINMUX_DATA(SCK7_MARK, PC6MD_010), PINMUX_DATA(CTX0_MARK, PC6MD_011), + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC6MD_100), PINMUX_DATA(PC5_DATA, PC5MD_000), PINMUX_DATA(RAS_MARK, PC5MD_001), PINMUX_DATA(CRX0_MARK, PC5MD_011), - PINMUX_DATA(CTX0CTX1CTX2_MARK, PC5MD_100), + PINMUX_DATA(CTX0_CTX1_CTX2_MARK, PC5MD_100), PINMUX_DATA(IRQ0_PC_MARK, PC5MD_101), PINMUX_DATA(PC4_DATA, PC4MD_00), @@ -1292,30 +1293,32 @@ static const u16 pinmux_data[] = { PINMUX_DATA(LCD_DATA23_PJ23_MARK, PJ23MD_010), PINMUX_DATA(LCD_TCON6_MARK, PJ23MD_011), PINMUX_DATA(IRQ3_PJ_MARK, PJ23MD_100), - PINMUX_DATA(CTX1_MARK, PJ23MD_101), + PINMUX_DATA(CTX1_PJ23_MARK, PJ23MD_101), + PINMUX_DATA(CTX0_CTX1_PJ23_MARK, PJ23MD_110), PINMUX_DATA(PJ22_DATA, PJ22MD_000), PINMUX_DATA(DV_DATA22_MARK, PJ22MD_001), PINMUX_DATA(LCD_DATA22_PJ22_MARK, PJ22MD_010), PINMUX_DATA(LCD_TCON5_MARK, PJ22MD_011), PINMUX_DATA(IRQ2_PJ_MARK, PJ22MD_100), - PINMUX_DATA(CRX1_MARK, PJ22MD_101), - PINMUX_DATA(CRX0_CRX1_MARK, PJ22MD_110), + PINMUX_DATA(CRX1_PJ22_MARK, PJ22MD_101), + PINMUX_DATA(CRX0_CRX1_PJ22_MARK, PJ22MD_110), PINMUX_DATA(PJ21_DATA, PJ21MD_000), PINMUX_DATA(DV_DATA21_MARK, PJ21MD_001), PINMUX_DATA(LCD_DATA21_PJ21_MARK, PJ21MD_010), PINMUX_DATA(LCD_TCON4_MARK, PJ21MD_011), PINMUX_DATA(IRQ1_PJ_MARK, PJ21MD_100), - PINMUX_DATA(CTX2_MARK, PJ21MD_101), + PINMUX_DATA(CTX2_PJ21_MARK, PJ21MD_101), + PINMUX_DATA(CTX0_CTX1_CTX2_PJ21_MARK, PJ21MD_110), PINMUX_DATA(PJ20_DATA, PJ20MD_000), PINMUX_DATA(DV_DATA20_MARK, PJ20MD_001), PINMUX_DATA(LCD_DATA20_PJ20_MARK, PJ20MD_010), PINMUX_DATA(LCD_TCON3_MARK, PJ20MD_011), PINMUX_DATA(IRQ0_PJ_MARK, PJ20MD_100), - PINMUX_DATA(CRX2_MARK, PJ20MD_101), - PINMUX_DATA(CRX0CRX1CRX2_PJ20_MARK, PJ20MD_110), + PINMUX_DATA(CRX2_PJ20_MARK, PJ20MD_101), + PINMUX_DATA(CRX0_CRX1_CRX2_PJ20_MARK, PJ20MD_110), PINMUX_DATA(PJ19_DATA, PJ19MD_000), PINMUX_DATA(DV_DATA19_MARK, PJ19MD_001), @@ -1666,12 +1669,24 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(WDTOVF), /* CAN */ + GPIO_FN(CTX2), + GPIO_FN(CRX2), GPIO_FN(CTX1), GPIO_FN(CRX1), GPIO_FN(CTX0), GPIO_FN(CRX0), + GPIO_FN(CTX0_CTX1), GPIO_FN(CRX0_CRX1), + GPIO_FN(CTX0_CTX1_CTX2), GPIO_FN(CRX0_CRX1_CRX2), + GPIO_FN(CTX2_PJ21), + GPIO_FN(CRX2_PJ20), + GPIO_FN(CTX1_PJ23), + GPIO_FN(CRX1_PJ22), + GPIO_FN(CTX0_CTX1_PJ23), + GPIO_FN(CRX0_CRX1_PJ22), + GPIO_FN(CTX0_CTX1_CTX2_PJ21), + GPIO_FN(CRX0_CRX1_CRX2_PJ20), /* DMAC */ GPIO_FN(TEND0), @@ -2119,7 +2134,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { }, { PINMUX_CFG_REG("PCIOR0", 0xfffe3852, 16, 1) { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, PC8_IN, PC8_OUT, PC7_IN, PC7_OUT, PC6_IN, PC6_OUT, diff --git a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c index d25e6f674d0a..6dca760f9f28 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh73a0.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh73a0.c @@ -3086,6 +3086,7 @@ static const unsigned int tpu4_to2_mux[] = { }; static const unsigned int tpu4_to3_pins[] = { /* TO */ + PIN_NUMBER(6, 26), }; static const unsigned int tpu4_to3_mux[] = { TPU4TO3_MARK, @@ -3366,7 +3367,8 @@ static const char * const fsic_groups[] = { "fsic_sclk_out", "fsic_data_in", "fsic_data_out", - "fsic_spdif", + "fsic_spdif_0", + "fsic_spdif_1", }; static const char * const fsid_groups[] = { diff --git a/drivers/pinctrl/sh-pfc/pfc-sh7734.c b/drivers/pinctrl/sh-pfc/pfc-sh7734.c index 6502e676d368..c691e5e9d9de 100644 --- a/drivers/pinctrl/sh-pfc/pfc-sh7734.c +++ b/drivers/pinctrl/sh-pfc/pfc-sh7734.c @@ -1453,7 +1453,7 @@ static const struct pinmux_func pinmux_func_gpios[] = { GPIO_FN(ET0_ETXD2_A), GPIO_FN(EX_CS5), GPIO_FN(SD1_CMD_A), GPIO_FN(ATADIR), GPIO_FN(QSSL_B), GPIO_FN(ET0_ETXD3_A), - GPIO_FN(RD_WR), GPIO_FN(TCLK1_B), + GPIO_FN(RD_WR), GPIO_FN(TCLK0), GPIO_FN(CAN_CLK_B), GPIO_FN(ET0_ETXD4), GPIO_FN(EX_WAIT0), GPIO_FN(TCLK1_B), GPIO_FN(EX_WAIT1), GPIO_FN(SD1_DAT0_A), GPIO_FN(DREQ2), GPIO_FN(CAN1_TX_C), GPIO_FN(ET0_LINK_C), GPIO_FN(ET0_ETXD5_A), @@ -1949,7 +1949,7 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { /* IP3_20 [1] */ FN_EX_WAIT0, FN_TCLK1_B, /* IP3_19_18 [2] */ - FN_RD_WR, FN_TCLK1_B, 0, 0, + FN_RD_WR, FN_TCLK0, FN_CAN_CLK_B, FN_ET0_ETXD4, /* IP3_17_15 [3] */ FN_EX_CS5, FN_SD1_CMD_A, FN_ATADIR, FN_QSSL_B, FN_ET0_ETXD3_A, 0, 0, 0, @@ -2213,31 +2213,31 @@ static const struct pinmux_cfg_reg pinmux_config_regs[] = { /* IP10_22 [1] */ FN_CAN_CLK_A, FN_RX4_D, /* IP10_21_19 [3] */ - FN_AUDIO_CLKOUT, FN_TX1_E, FN_HRTS0_C, FN_FSE_B, - FN_LCD_M_DISP_B, 0, 0, 0, + FN_AUDIO_CLKOUT, FN_TX1_E, 0, FN_HRTS0_C, FN_FSE_B, + FN_LCD_M_DISP_B, 0, 0, /* IP10_18_16 [3] */ - FN_AUDIO_CLKC, FN_SCK1_E, FN_HCTS0_C, FN_FRB_B, - FN_LCD_VEPWC_B, 0, 0, 0, + FN_AUDIO_CLKC, FN_SCK1_E, 0, FN_HCTS0_C, FN_FRB_B, + FN_LCD_VEPWC_B, 0, 0, /* IP10_15 [1] */ FN_AUDIO_CLKB_A, FN_LCD_CLK_B, /* IP10_14_12 [3] */ FN_AUDIO_CLKA_A, FN_VI1_CLK_B, FN_SCK1_D, FN_IECLK_B, FN_LCD_FLM_B, 0, 0, 0, /* IP10_11_9 [3] */ - FN_SSI_SDATA3, FN_VI1_7_B, FN_HTX0_C, FN_FWE_B, - FN_LCD_CL2_B, 0, 0, 0, + FN_SSI_SDATA3, FN_VI1_7_B, 0, FN_HTX0_C, FN_FWE_B, + FN_LCD_CL2_B, 0, 0, /* IP10_8_6 [3] */ - FN_SSI_SDATA2, FN_VI1_6_B, FN_HRX0_C, FN_FRE_B, - FN_LCD_CL1_B, 0, 0, 0, + FN_SSI_SDATA2, FN_VI1_6_B, 0, FN_HRX0_C, FN_FRE_B, + FN_LCD_CL1_B, 0, 0, /* IP10_5_3 [3] */ FN_SSI_WS23, FN_VI1_5_B, FN_TX1_D, FN_HSCK0_C, FN_FALE_B, - FN_LCD_DON_B, 0, 0, 0, + FN_LCD_DON_B, 0, 0, /* IP10_2_0 [3] */ FN_SSI_SCK23, FN_VI1_4_B, FN_RX1_D, FN_FCLE_B, FN_LCD_DATA15_B, 0, 0, 0 } }, { PINMUX_CFG_REG_VAR("IPSR11", 0xFFFC0048, 32, - 3, 1, 2, 2, 2, 3, 3, 1, 2, 3, 3, 1, 1, 1, 1) { + 3, 1, 2, 3, 2, 2, 3, 3, 1, 2, 3, 3, 1, 1, 1, 1) { /* IP11_31_29 [3] */ 0, 0, 0, 0, 0, 0, 0, 0, /* IP11_28 [1] */ diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 50299ad96659..072bd11074c6 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -403,7 +403,7 @@ static int stm32_pctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev, unsigned int num_configs; bool has_config = 0; unsigned reserve = 0; - int num_pins, num_funcs, maps_per_pin, i, err; + int num_pins, num_funcs, maps_per_pin, i, err = 0; pctl = pinctrl_dev_get_drvdata(pctldev); @@ -430,41 +430,45 @@ static int stm32_pctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev, if (has_config && num_pins >= 1) maps_per_pin++; - if (!num_pins || !maps_per_pin) - return -EINVAL; + if (!num_pins || !maps_per_pin) { + err = -EINVAL; + goto exit; + } reserve = num_pins * maps_per_pin; err = pinctrl_utils_reserve_map(pctldev, map, reserved_maps, num_maps, reserve); if (err) - return err; + goto exit; for (i = 0; i < num_pins; i++) { err = of_property_read_u32_index(node, "pinmux", i, &pinfunc); if (err) - return err; + goto exit; pin = STM32_GET_PIN_NO(pinfunc); func = STM32_GET_PIN_FUNC(pinfunc); if (!stm32_pctrl_is_function_valid(pctl, pin, func)) { dev_err(pctl->dev, "invalid function.\n"); - return -EINVAL; + err = -EINVAL; + goto exit; } grp = stm32_pctrl_find_group_by_pin(pctl, pin); if (!grp) { dev_err(pctl->dev, "unable to match pin %d to group\n", pin); - return -EINVAL; + err = -EINVAL; + goto exit; } err = stm32_pctrl_dt_node_to_map_func(pctl, pin, func, grp, map, reserved_maps, num_maps); if (err) - return err; + goto exit; if (has_config) { err = pinctrl_utils_add_map_configs(pctldev, map, @@ -472,11 +476,13 @@ static int stm32_pctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev, configs, num_configs, PIN_MAP_TYPE_CONFIGS_GROUP); if (err) - return err; + goto exit; } } - return 0; +exit: + kfree(configs); + return err; } static int stm32_pctrl_dt_node_to_map(struct pinctrl_dev *pctldev, diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index 52edf3b5988d..cc8b86a16da0 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -1039,6 +1039,7 @@ static int sunxi_pinctrl_add_function(struct sunxi_pinctrl *pctl, static int sunxi_pinctrl_build_state(struct platform_device *pdev) { struct sunxi_pinctrl *pctl = platform_get_drvdata(pdev); + void *ptr; int i; /* @@ -1105,13 +1106,15 @@ static int sunxi_pinctrl_build_state(struct platform_device *pdev) } /* And now allocated and fill the array for real */ - pctl->functions = krealloc(pctl->functions, - pctl->nfunctions * sizeof(*pctl->functions), - GFP_KERNEL); - if (!pctl->functions) { + ptr = krealloc(pctl->functions, + pctl->nfunctions * sizeof(*pctl->functions), + GFP_KERNEL); + if (!ptr) { kfree(pctl->functions); + pctl->functions = NULL; return -ENOMEM; } + pctl->functions = ptr; for (i = 0; i < pctl->desc->npins; i++) { const struct sunxi_desc_pin *pin = pctl->desc->pins + i; diff --git a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c index 5c1b6325d80d..8ac1f1ce4442 100644 --- a/drivers/pinctrl/ti/pinctrl-ti-iodelay.c +++ b/drivers/pinctrl/ti/pinctrl-ti-iodelay.c @@ -496,7 +496,7 @@ static int ti_iodelay_dt_node_to_map(struct pinctrl_dev *pctldev, return -EINVAL; rows = pinctrl_count_index_with_args(np, name); - if (rows == -EINVAL) + if (rows < 0) return rows; *map = devm_kzalloc(iod->dev, sizeof(**map), GFP_KERNEL); diff --git a/drivers/platform/mips/cpu_hwmon.c b/drivers/platform/mips/cpu_hwmon.c index 322de58eebaf..02484ae9a116 100644 --- a/drivers/platform/mips/cpu_hwmon.c +++ b/drivers/platform/mips/cpu_hwmon.c @@ -158,7 +158,7 @@ static int __init loongson_hwmon_init(void) cpu_hwmon_dev = hwmon_device_register(NULL); if (IS_ERR(cpu_hwmon_dev)) { - ret = -ENOMEM; + ret = PTR_ERR(cpu_hwmon_dev); pr_err("hwmon_device_register fail!\n"); goto fail_hwmon_device_register; } diff --git a/drivers/platform/x86/alienware-wmi.c b/drivers/platform/x86/alienware-wmi.c index e335b18da20f..2c82188f8486 100644 --- a/drivers/platform/x86/alienware-wmi.c +++ b/drivers/platform/x86/alienware-wmi.c @@ -505,23 +505,22 @@ static acpi_status alienware_wmax_command(struct wmax_basic_args *in_args, input.length = (acpi_size) sizeof(*in_args); input.pointer = in_args; - if (out_data != NULL) { + if (out_data) { output.length = ACPI_ALLOCATE_BUFFER; output.pointer = NULL; status = wmi_evaluate_method(WMAX_CONTROL_GUID, 0, command, &input, &output); - } else + if (ACPI_SUCCESS(status)) { + obj = (union acpi_object *)output.pointer; + if (obj && obj->type == ACPI_TYPE_INTEGER) + *out_data = (u32)obj->integer.value; + } + kfree(output.pointer); + } else { status = wmi_evaluate_method(WMAX_CONTROL_GUID, 0, command, &input, NULL); - - if (ACPI_SUCCESS(status) && out_data != NULL) { - obj = (union acpi_object *)output.pointer; - if (obj && obj->type == ACPI_TYPE_INTEGER) - *out_data = (u32) obj->integer.value; } - kfree(output.pointer); return status; - } /* @@ -571,7 +570,7 @@ static ssize_t show_hdmi_source(struct device *dev, return scnprintf(buf, PAGE_SIZE, "input [gpu] unknown\n"); } - pr_err("alienware-wmi: unknown HDMI source status: %d\n", out_data); + pr_err("alienware-wmi: unknown HDMI source status: %u\n", status); return scnprintf(buf, PAGE_SIZE, "input gpu [unknown]\n"); } diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c index 9c4b0d7f15c3..59f3a37a44d7 100644 --- a/drivers/platform/x86/asus-nb-wmi.c +++ b/drivers/platform/x86/asus-nb-wmi.c @@ -78,10 +78,12 @@ static bool asus_q500a_i8042_filter(unsigned char data, unsigned char str, static struct quirk_entry quirk_asus_unknown = { .wapf = 0, + .wmi_backlight_set_devstate = true, }; static struct quirk_entry quirk_asus_q500a = { .i8042_filter = asus_q500a_i8042_filter, + .wmi_backlight_set_devstate = true, }; /* @@ -92,26 +94,32 @@ static struct quirk_entry quirk_asus_q500a = { static struct quirk_entry quirk_asus_x55u = { .wapf = 4, .wmi_backlight_power = true, + .wmi_backlight_set_devstate = true, .no_display_toggle = true, }; static struct quirk_entry quirk_asus_wapf4 = { .wapf = 4, + .wmi_backlight_set_devstate = true, }; static struct quirk_entry quirk_asus_x200ca = { .wapf = 2, + .wmi_backlight_set_devstate = true, }; static struct quirk_entry quirk_asus_ux303ub = { .wmi_backlight_native = true, + .wmi_backlight_set_devstate = true, }; static struct quirk_entry quirk_asus_x550lb = { + .wmi_backlight_set_devstate = true, .xusb2pr = 0x01D9, }; -static struct quirk_entry quirk_asus_ux330uak = { +static struct quirk_entry quirk_asus_forceals = { + .wmi_backlight_set_devstate = true, .wmi_force_als_set = true, }; @@ -387,7 +395,7 @@ static const struct dmi_system_id asus_quirks[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "UX330UAK"), }, - .driver_data = &quirk_asus_ux330uak, + .driver_data = &quirk_asus_forceals, }, { .callback = dmi_matched, @@ -398,6 +406,15 @@ static const struct dmi_system_id asus_quirks[] = { }, .driver_data = &quirk_asus_x550lb, }, + { + .callback = dmi_matched, + .ident = "ASUSTeK COMPUTER INC. UX430UQ", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "UX430UQ"), + }, + .driver_data = &quirk_asus_forceals, + }, {}, }; diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 3f662cd774d7..af26ca49996d 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -457,13 +457,7 @@ static void kbd_led_update(struct work_struct *work) asus = container_of(work, struct asus_wmi, kbd_led_work); - /* - * bits 0-2: level - * bit 7: light on/off - */ - if (asus->kbd_led_wk > 0) - ctrl_param = 0x80 | (asus->kbd_led_wk & 0x7F); - + ctrl_param = 0x80 | (asus->kbd_led_wk & 0x7F); asus_wmi_set_devstate(ASUS_WMI_DEVID_KBD_BACKLIGHT, ctrl_param, NULL); } @@ -2147,7 +2141,7 @@ static int asus_wmi_add(struct platform_device *pdev) err = asus_wmi_backlight_init(asus); if (err && err != -ENODEV) goto fail_backlight; - } else + } else if (asus->driver->quirks->wmi_backlight_set_devstate) err = asus_wmi_set_devstate(ASUS_WMI_DEVID_BACKLIGHT, 2, NULL); status = wmi_install_notify_handler(asus->driver->event_guid, diff --git a/drivers/platform/x86/asus-wmi.h b/drivers/platform/x86/asus-wmi.h index 6c1311f4b04d..57a79bddb286 100644 --- a/drivers/platform/x86/asus-wmi.h +++ b/drivers/platform/x86/asus-wmi.h @@ -44,6 +44,7 @@ struct quirk_entry { bool store_backlight_power; bool wmi_backlight_power; bool wmi_backlight_native; + bool wmi_backlight_set_devstate; bool wmi_force_als_set; int wapf; /* diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index b4224389febe..06a3c1ef8eee 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -78,7 +78,7 @@ struct bios_args { u32 command; u32 commandtype; u32 datasize; - u32 data; + u8 data[128]; }; enum hp_wmi_commandtype { @@ -229,7 +229,7 @@ static int hp_wmi_perform_query(int query, enum hp_wmi_command command, .command = command, .commandtype = query, .datasize = insize, - .data = 0, + .data = { 0 }, }; struct acpi_buffer input = { sizeof(struct bios_args), &args }; struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; @@ -241,7 +241,7 @@ static int hp_wmi_perform_query(int query, enum hp_wmi_command command, if (WARN_ON(insize > sizeof(args.data))) return -EINVAL; - memcpy(&args.data, buffer, insize); + memcpy(&args.data[0], buffer, insize); wmi_evaluate_method(HPWMI_BIOS_GUID, 0, mid, &input, &output); @@ -313,7 +313,7 @@ static int __init hp_wmi_bios_2008_later(void) static int __init hp_wmi_bios_2009_later(void) { - int state = 0; + u8 state[128]; int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, HPWMI_READ, &state, sizeof(state), sizeof(state)); if (!ret) @@ -393,7 +393,7 @@ static int hp_wmi_rfkill2_refresh(void) int err, i; err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state, - 0, sizeof(state)); + sizeof(state), sizeof(state)); if (err) return err; @@ -790,7 +790,7 @@ static int __init hp_wmi_rfkill2_setup(struct platform_device *device) int err, i; err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state, - 0, sizeof(state)); + sizeof(state), sizeof(state)); if (err) return err < 0 ? err : -EINVAL; diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c index 5ad44204a9c3..10dbd6cac48a 100644 --- a/drivers/platform/x86/intel_mid_powerbtn.c +++ b/drivers/platform/x86/intel_mid_powerbtn.c @@ -158,9 +158,10 @@ static int mid_pb_probe(struct platform_device *pdev) input_set_capability(input, EV_KEY, KEY_POWER); - ddata = (struct mid_pb_ddata *)id->driver_data; + ddata = devm_kmemdup(&pdev->dev, (void *)id->driver_data, + sizeof(*ddata), GFP_KERNEL); if (!ddata) - return -ENODATA; + return -ENOMEM; ddata->dev = &pdev->dev; ddata->irq = irq; diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index 2c85f75e32b0..2434ce8bead6 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -69,26 +69,22 @@ struct intel_scu_ipc_pdata_t { u32 i2c_base; u32 i2c_len; - u8 irq_mode; }; static const struct intel_scu_ipc_pdata_t intel_scu_ipc_lincroft_pdata = { .i2c_base = 0xff12b000, .i2c_len = 0x10, - .irq_mode = 0, }; /* Penwell and Cloverview */ static const struct intel_scu_ipc_pdata_t intel_scu_ipc_penwell_pdata = { .i2c_base = 0xff12b000, .i2c_len = 0x10, - .irq_mode = 1, }; static const struct intel_scu_ipc_pdata_t intel_scu_ipc_tangier_pdata = { .i2c_base = 0xff00d000, .i2c_len = 0x10, - .irq_mode = 0, }; struct intel_scu_ipc_dev { @@ -101,6 +97,9 @@ struct intel_scu_ipc_dev { static struct intel_scu_ipc_dev ipcdev; /* Only one for now */ +#define IPC_STATUS 0x04 +#define IPC_STATUS_IRQ BIT(2) + /* * IPC Read Buffer (Read Only): * 16 byte buffer for receiving data from SCU, if IPC command @@ -122,11 +121,8 @@ static DEFINE_MUTEX(ipclock); /* lock used to prevent multiple call to SCU */ */ static inline void ipc_command(struct intel_scu_ipc_dev *scu, u32 cmd) { - if (scu->irq_mode) { - reinit_completion(&scu->cmd_complete); - writel(cmd | IPC_IOC, scu->ipc_base); - } - writel(cmd, scu->ipc_base); + reinit_completion(&scu->cmd_complete); + writel(cmd | IPC_IOC, scu->ipc_base); } /* @@ -612,9 +608,10 @@ EXPORT_SYMBOL(intel_scu_ipc_i2c_cntrl); static irqreturn_t ioc(int irq, void *dev_id) { struct intel_scu_ipc_dev *scu = dev_id; + int status = ipc_read_status(scu); - if (scu->irq_mode) - complete(&scu->cmd_complete); + writel(status | IPC_STATUS_IRQ, scu->ipc_base + IPC_STATUS); + complete(&scu->cmd_complete); return IRQ_HANDLED; } @@ -640,8 +637,6 @@ static int ipc_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!pdata) return -ENODEV; - scu->irq_mode = pdata->irq_mode; - err = pcim_enable_device(pdev); if (err) return err; diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index 63a6881c7078..74997194fd88 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -475,6 +475,21 @@ static const struct dmi_system_id critclk_systems[] = { DMI_MATCH(DMI_BOARD_NAME, "CB6363"), }, }, + { + .ident = "SIMATIC IPC227E", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SIEMENS AG"), + DMI_MATCH(DMI_PRODUCT_VERSION, "6ES7647-8B"), + }, + }, + { + .ident = "CONNECT X300", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "SIEMENS AG"), + DMI_MATCH(DMI_PRODUCT_VERSION, "A5E45074588"), + }, + }, + { /*sentinel*/ } }; diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 7f8fa42a1084..a56e997816b2 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -748,6 +748,9 @@ static int wmi_dev_match(struct device *dev, struct device_driver *driver) struct wmi_block *wblock = dev_to_wblock(dev); const struct wmi_device_id *id = wmi_driver->id_table; + if (id == NULL) + return 0; + while (id->guid_string) { uuid_le driver_guid; diff --git a/drivers/power/avs/smartreflex.c b/drivers/power/avs/smartreflex.c index 974fd684bab2..4b6fddc18394 100644 --- a/drivers/power/avs/smartreflex.c +++ b/drivers/power/avs/smartreflex.c @@ -994,8 +994,7 @@ static int omap_sr_remove(struct platform_device *pdev) if (sr_info->autocomp_active) sr_stop_vddautocomp(sr_info); - if (sr_info->dbg_dir) - debugfs_remove_recursive(sr_info->dbg_dir); + debugfs_remove_recursive(sr_info->dbg_dir); pm_runtime_disable(&pdev->dev); list_del(&sr_info->node); diff --git a/drivers/power/reset/at91-sama5d2_shdwc.c b/drivers/power/reset/at91-sama5d2_shdwc.c index 31080c254124..037976a1fe40 100644 --- a/drivers/power/reset/at91-sama5d2_shdwc.c +++ b/drivers/power/reset/at91-sama5d2_shdwc.c @@ -246,6 +246,9 @@ static int __init at91_shdwc_probe(struct platform_device *pdev) if (!pdev->dev.of_node) return -ENODEV; + if (at91_shdwc) + return -EBUSY; + at91_shdwc = devm_kzalloc(&pdev->dev, sizeof(*at91_shdwc), GFP_KERNEL); if (!at91_shdwc) return -ENOMEM; diff --git a/drivers/power/supply/ab8500_fg.c b/drivers/power/supply/ab8500_fg.c index c569f82a0071..b87768238b70 100644 --- a/drivers/power/supply/ab8500_fg.c +++ b/drivers/power/supply/ab8500_fg.c @@ -2437,17 +2437,14 @@ static ssize_t charge_full_store(struct ab8500_fg *di, const char *buf, size_t count) { unsigned long charge_full; - ssize_t ret; + int ret; ret = kstrtoul(buf, 10, &charge_full); + if (ret) + return ret; - dev_dbg(di->dev, "Ret %zd charge_full %lu", ret, charge_full); - - if (!ret) { - di->bat_cap.max_mah = (int) charge_full; - ret = count; - } - return ret; + di->bat_cap.max_mah = (int) charge_full; + return count; } static ssize_t charge_now_show(struct ab8500_fg *di, char *buf) @@ -2459,20 +2456,16 @@ static ssize_t charge_now_store(struct ab8500_fg *di, const char *buf, size_t count) { unsigned long charge_now; - ssize_t ret; + int ret; ret = kstrtoul(buf, 10, &charge_now); + if (ret) + return ret; - dev_dbg(di->dev, "Ret %zd charge_now %lu was %d", - ret, charge_now, di->bat_cap.prev_mah); - - if (!ret) { - di->bat_cap.user_mah = (int) charge_now; - di->flags.user_cap = true; - ret = count; - queue_delayed_work(di->fg_wq, &di->fg_periodic_work, 0); - } - return ret; + di->bat_cap.user_mah = (int) charge_now; + di->flags.user_cap = true; + queue_delayed_work(di->fg_wq, &di->fg_periodic_work, 0); + return count; } static struct ab8500_fg_sysfs_entry charge_full_attr = diff --git a/drivers/power/supply/cpcap-battery.c b/drivers/power/supply/cpcap-battery.c index fe7fcf3a2ad0..7df9d432ee42 100644 --- a/drivers/power/supply/cpcap-battery.c +++ b/drivers/power/supply/cpcap-battery.c @@ -82,7 +82,7 @@ struct cpcap_battery_config { }; struct cpcap_coulomb_counter_data { - s32 sample; /* 24-bits */ + s32 sample; /* 24 or 32 bits */ s32 accumulator; s16 offset; /* 10-bits */ }; @@ -213,7 +213,7 @@ static int cpcap_battery_get_current(struct cpcap_battery_ddata *ddata) * TI or ST coulomb counter in the PMIC. */ static int cpcap_battery_cc_raw_div(struct cpcap_battery_ddata *ddata, - u32 sample, s32 accumulator, + s32 sample, s32 accumulator, s16 offset, u32 divider) { s64 acc; @@ -224,7 +224,6 @@ static int cpcap_battery_cc_raw_div(struct cpcap_battery_ddata *ddata, if (!divider) return 0; - sample &= 0xffffff; /* 24-bits, unsigned */ offset &= 0x7ff; /* 10-bits, signed */ switch (ddata->vendor) { @@ -259,7 +258,7 @@ static int cpcap_battery_cc_raw_div(struct cpcap_battery_ddata *ddata, /* 3600000μAms = 1μAh */ static int cpcap_battery_cc_to_uah(struct cpcap_battery_ddata *ddata, - u32 sample, s32 accumulator, + s32 sample, s32 accumulator, s16 offset) { return cpcap_battery_cc_raw_div(ddata, sample, @@ -268,7 +267,7 @@ static int cpcap_battery_cc_to_uah(struct cpcap_battery_ddata *ddata, } static int cpcap_battery_cc_to_ua(struct cpcap_battery_ddata *ddata, - u32 sample, s32 accumulator, + s32 sample, s32 accumulator, s16 offset) { return cpcap_battery_cc_raw_div(ddata, sample, @@ -312,6 +311,8 @@ cpcap_battery_read_accumulated(struct cpcap_battery_ddata *ddata, /* Sample value CPCAP_REG_CCS1 & 2 */ ccd->sample = (buf[1] & 0x0fff) << 16; ccd->sample |= buf[0]; + if (ddata->vendor == CPCAP_VENDOR_TI) + ccd->sample = sign_extend32(24, ccd->sample); /* Accumulator value CPCAP_REG_CCA1 & 2 */ ccd->accumulator = ((s16)buf[3]) << 16; diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c index 9621d6dd88c6..50bdf2d5248b 100644 --- a/drivers/power/supply/ltc2941-battery-gauge.c +++ b/drivers/power/supply/ltc2941-battery-gauge.c @@ -406,7 +406,7 @@ static int ltc294x_i2c_remove(struct i2c_client *client) { struct ltc294x_info *info = i2c_get_clientdata(client); - cancel_delayed_work(&info->work); + cancel_delayed_work_sync(&info->work); power_supply_unregister(info->supply); return 0; } diff --git a/drivers/power/supply/max14656_charger_detector.c b/drivers/power/supply/max14656_charger_detector.c index d19307f791c6..9e6472834e37 100644 --- a/drivers/power/supply/max14656_charger_detector.c +++ b/drivers/power/supply/max14656_charger_detector.c @@ -240,6 +240,14 @@ static enum power_supply_property max14656_battery_props[] = { POWER_SUPPLY_PROP_MANUFACTURER, }; +static void stop_irq_work(void *data) +{ + struct max14656_chip *chip = data; + + cancel_delayed_work_sync(&chip->irq_work); +} + + static int max14656_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -278,8 +286,6 @@ static int max14656_probe(struct i2c_client *client, if (ret) return -ENODEV; - INIT_DELAYED_WORK(&chip->irq_work, max14656_irq_worker); - chip->detect_psy = devm_power_supply_register(dev, &chip->psy_desc, &psy_cfg); if (IS_ERR(chip->detect_psy)) { @@ -287,6 +293,13 @@ static int max14656_probe(struct i2c_client *client, return -EINVAL; } + INIT_DELAYED_WORK(&chip->irq_work, max14656_irq_worker); + ret = devm_add_action(dev, stop_irq_work, chip); + if (ret) { + dev_err(dev, "devm_add_action %d failed\n", ret); + return ret; + } + ret = devm_request_irq(dev, chip->irq, max14656_irq, IRQF_TRIGGER_FALLING, MAX14656_NAME, chip); diff --git a/drivers/power/supply/max8998_charger.c b/drivers/power/supply/max8998_charger.c index b64cf0f14142..66438029bdd0 100644 --- a/drivers/power/supply/max8998_charger.c +++ b/drivers/power/supply/max8998_charger.c @@ -85,7 +85,7 @@ static const struct power_supply_desc max8998_battery_desc = { static int max8998_battery_probe(struct platform_device *pdev) { struct max8998_dev *iodev = dev_get_drvdata(pdev->dev.parent); - struct max8998_platform_data *pdata = dev_get_platdata(iodev->dev); + struct max8998_platform_data *pdata = iodev->pdata; struct power_supply_config psy_cfg = {}; struct max8998_battery_data *max8998; struct i2c_client *i2c; diff --git a/drivers/power/supply/twl4030_charger.c b/drivers/power/supply/twl4030_charger.c index 0cc12bfe7b02..b20491016b1e 100644 --- a/drivers/power/supply/twl4030_charger.c +++ b/drivers/power/supply/twl4030_charger.c @@ -420,7 +420,8 @@ static void twl4030_current_worker(struct work_struct *data) if (v < USB_MIN_VOLT) { /* Back up and stop adjusting. */ - bci->usb_cur -= USB_CUR_STEP; + if (bci->usb_cur >= USB_CUR_STEP) + bci->usb_cur -= USB_CUR_STEP; bci->usb_cur_target = bci->usb_cur; } else if (bci->usb_cur >= bci->usb_cur_target || bci->usb_cur + USB_CUR_STEP > USB_MAX_CURRENT) { @@ -439,6 +440,7 @@ static void twl4030_current_worker(struct work_struct *data) static int twl4030_charger_enable_usb(struct twl4030_bci *bci, bool enable) { int ret; + u32 reg; if (bci->usb_mode == CHARGE_OFF) enable = false; @@ -452,14 +454,38 @@ static int twl4030_charger_enable_usb(struct twl4030_bci *bci, bool enable) bci->usb_enabled = 1; } - if (bci->usb_mode == CHARGE_AUTO) + if (bci->usb_mode == CHARGE_AUTO) { + /* Enable interrupts now. */ + reg = ~(u32)(TWL4030_ICHGLOW | TWL4030_ICHGEOC | + TWL4030_TBATOR2 | TWL4030_TBATOR1 | + TWL4030_BATSTS); + ret = twl_i2c_write_u8(TWL4030_MODULE_INTERRUPTS, reg, + TWL4030_INTERRUPTS_BCIIMR1A); + if (ret < 0) { + dev_err(bci->dev, + "failed to unmask interrupts: %d\n", + ret); + return ret; + } /* forcing the field BCIAUTOUSB (BOOT_BCI[1]) to 1 */ ret = twl4030_clear_set_boot_bci(0, TWL4030_BCIAUTOUSB); + } /* forcing USBFASTMCHG(BCIMFSTS4[2]) to 1 */ ret = twl4030_clear_set(TWL_MODULE_MAIN_CHARGE, 0, TWL4030_USBFASTMCHG, TWL4030_BCIMFSTS4); if (bci->usb_mode == CHARGE_LINEAR) { + /* Enable interrupts now. */ + reg = ~(u32)(TWL4030_ICHGLOW | TWL4030_TBATOR2 | + TWL4030_TBATOR1 | TWL4030_BATSTS); + ret = twl_i2c_write_u8(TWL4030_MODULE_INTERRUPTS, reg, + TWL4030_INTERRUPTS_BCIIMR1A); + if (ret < 0) { + dev_err(bci->dev, + "failed to unmask interrupts: %d\n", + ret); + return ret; + } twl4030_clear_set_boot_bci(TWL4030_BCIAUTOAC|TWL4030_CVENAC, 0); /* Watch dog key: WOVF acknowledge */ ret = twl_i2c_write_u8(TWL_MODULE_MAIN_CHARGE, 0x33, diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index c64903a5978f..e232233beb8f 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -175,10 +175,11 @@ static struct posix_clock_operations ptp_clock_ops = { .read = ptp_read, }; -static void delete_ptp_clock(struct posix_clock *pc) +static void ptp_clock_release(struct device *dev) { - struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev); + ptp_cleanup_pin_groups(ptp); mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); ida_simple_remove(&ptp_clocks_map, ptp->index); @@ -222,7 +223,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, } ptp->clock.ops = ptp_clock_ops; - ptp->clock.release = delete_ptp_clock; ptp->info = info; ptp->devid = MKDEV(major, index); ptp->index = index; @@ -249,15 +249,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, if (err) goto no_pin_groups; - /* Create a new device in our class. */ - ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid, - ptp, ptp->pin_attr_groups, - "ptp%d", ptp->index); - if (IS_ERR(ptp->dev)) { - err = PTR_ERR(ptp->dev); - goto no_device; - } - /* Register a new PPS source. */ if (info->pps) { struct pps_source_info pps; @@ -273,8 +264,18 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, } } - /* Create a posix clock. */ - err = posix_clock_register(&ptp->clock, ptp->devid); + /* Initialize a new device of our class in our clock structure. */ + device_initialize(&ptp->dev); + ptp->dev.devt = ptp->devid; + ptp->dev.class = ptp_class; + ptp->dev.parent = parent; + ptp->dev.groups = ptp->pin_attr_groups; + ptp->dev.release = ptp_clock_release; + dev_set_drvdata(&ptp->dev, ptp); + dev_set_name(&ptp->dev, "ptp%d", ptp->index); + + /* Create a posix clock and link it to the device. */ + err = posix_clock_register(&ptp->clock, &ptp->dev); if (err) { pr_err("failed to create posix clock\n"); goto no_clock; @@ -286,8 +287,6 @@ no_clock: if (ptp->pps_source) pps_unregister_source(ptp->pps_source); no_pps: - device_destroy(ptp_class, ptp->devid); -no_device: ptp_cleanup_pin_groups(ptp); no_pin_groups: if (ptp->kworker) @@ -317,10 +316,8 @@ int ptp_clock_unregister(struct ptp_clock *ptp) if (ptp->pps_source) pps_unregister_source(ptp->pps_source); - device_destroy(ptp_class, ptp->devid); - ptp_cleanup_pin_groups(ptp); - posix_clock_unregister(&ptp->clock); + return 0; } EXPORT_SYMBOL(ptp_clock_unregister); diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index b86f1bfecd6f..45ed9e172bb4 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -41,7 +41,7 @@ struct timestamp_event_queue { struct ptp_clock { struct posix_clock clock; - struct device *dev; + struct device dev; struct ptp_clock_info *info; dev_t devid; int index; /* index into clocks.map */ diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c index c45e5719ba17..b1b74cfb1571 100644 --- a/drivers/pwm/core.c +++ b/drivers/pwm/core.c @@ -874,6 +874,7 @@ void pwm_put(struct pwm_device *pwm) if (pwm->chip->ops->free) pwm->chip->ops->free(pwm->chip, pwm); + pwm_set_chip_data(pwm, NULL); pwm->label = NULL; module_put(pwm->chip->ops->owner); diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c index d961a8207b1c..31b01035d0ab 100644 --- a/drivers/pwm/pwm-bcm-iproc.c +++ b/drivers/pwm/pwm-bcm-iproc.c @@ -187,6 +187,7 @@ static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm, static const struct pwm_ops iproc_pwm_ops = { .apply = iproc_pwmc_apply, .get_state = iproc_pwmc_get_state, + .owner = THIS_MODULE, }; static int iproc_pwmc_probe(struct platform_device *pdev) diff --git a/drivers/pwm/pwm-berlin.c b/drivers/pwm/pwm-berlin.c index 771859aca4be..7bb819e3c0c1 100644 --- a/drivers/pwm/pwm-berlin.c +++ b/drivers/pwm/pwm-berlin.c @@ -78,7 +78,6 @@ static void berlin_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) { struct berlin_pwm_channel *channel = pwm_get_chip_data(pwm); - pwm_set_chip_data(pwm, NULL); kfree(channel); } diff --git a/drivers/pwm/pwm-clps711x.c b/drivers/pwm/pwm-clps711x.c index 26ec24e457b1..7e16b7def0dc 100644 --- a/drivers/pwm/pwm-clps711x.c +++ b/drivers/pwm/pwm-clps711x.c @@ -48,7 +48,7 @@ static void clps711x_pwm_update_val(struct clps711x_chip *priv, u32 n, u32 v) static unsigned int clps711x_get_duty(struct pwm_device *pwm, unsigned int v) { /* Duty cycle 0..15 max */ - return DIV_ROUND_CLOSEST(v * 0xf, pwm_get_period(pwm)); + return DIV_ROUND_CLOSEST(v * 0xf, pwm->args.period); } static int clps711x_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) @@ -71,7 +71,7 @@ static int clps711x_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, struct clps711x_chip *priv = to_clps711x_chip(chip); unsigned int duty; - if (period_ns != pwm_get_period(pwm)) + if (period_ns != pwm->args.period) return -EINVAL; duty = clps711x_get_duty(pwm, duty_ns); diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c index 4721a264bac2..7a4a6406cf69 100644 --- a/drivers/pwm/pwm-lpss.c +++ b/drivers/pwm/pwm-lpss.c @@ -97,7 +97,7 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm, unsigned long long on_time_div; unsigned long c = lpwm->info->clk_rate, base_unit_range; unsigned long long base_unit, freq = NSEC_PER_SEC; - u32 ctrl; + u32 orig_ctrl, ctrl; do_div(freq, period_ns); @@ -114,13 +114,17 @@ static void pwm_lpss_prepare(struct pwm_lpss_chip *lpwm, struct pwm_device *pwm, do_div(on_time_div, period_ns); on_time_div = 255ULL - on_time_div; - ctrl = pwm_lpss_read(pwm); + orig_ctrl = ctrl = pwm_lpss_read(pwm); ctrl &= ~PWM_ON_TIME_DIV_MASK; ctrl &= ~(base_unit_range << PWM_BASE_UNIT_SHIFT); base_unit &= base_unit_range; ctrl |= (u32) base_unit << PWM_BASE_UNIT_SHIFT; ctrl |= on_time_div; - pwm_lpss_write(pwm, ctrl); + + if (orig_ctrl != ctrl) { + pwm_lpss_write(pwm, ctrl); + pwm_lpss_write(pwm, ctrl | PWM_SW_UPDATE); + } } static inline void pwm_lpss_cond_enable(struct pwm_device *pwm, bool cond) @@ -144,7 +148,6 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, return ret; } pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period); - pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE); pwm_lpss_cond_enable(pwm, lpwm->info->bypass == false); ret = pwm_lpss_wait_for_update(pwm); if (ret) { @@ -157,7 +160,6 @@ static int pwm_lpss_apply(struct pwm_chip *chip, struct pwm_device *pwm, if (ret) return ret; pwm_lpss_prepare(lpwm, pwm, state->duty_cycle, state->period); - pwm_lpss_write(pwm, pwm_lpss_read(pwm) | PWM_SW_UPDATE); return pwm_lpss_wait_for_update(pwm); } } else if (pwm_is_enabled(pwm)) { @@ -214,6 +216,12 @@ EXPORT_SYMBOL_GPL(pwm_lpss_probe); int pwm_lpss_remove(struct pwm_lpss_chip *lpwm) { + int i; + + for (i = 0; i < lpwm->info->npwm; i++) { + if (pwm_is_enabled(&lpwm->chip.pwms[i])) + pm_runtime_put(lpwm->chip.dev); + } return pwmchip_remove(&lpwm->chip); } EXPORT_SYMBOL_GPL(pwm_lpss_remove); diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c index 9b79cbc7a715..3d2c36963a4f 100644 --- a/drivers/pwm/pwm-meson.c +++ b/drivers/pwm/pwm-meson.c @@ -188,7 +188,7 @@ static int meson_pwm_calc(struct meson_pwm *meson, do_div(fin_ps, fin_freq); /* Calc pre_div with the period */ - for (pre_div = 0; pre_div < MISC_CLK_DIV_MASK; pre_div++) { + for (pre_div = 0; pre_div <= MISC_CLK_DIV_MASK; pre_div++) { cnt = DIV_ROUND_CLOSEST_ULL((u64)period * 1000, fin_ps * (pre_div + 1)); dev_dbg(meson->chip.dev, "fin_ps=%llu pre_div=%u cnt=%u\n", @@ -197,7 +197,7 @@ static int meson_pwm_calc(struct meson_pwm *meson, break; } - if (pre_div == MISC_CLK_DIV_MASK) { + if (pre_div > MISC_CLK_DIV_MASK) { dev_err(meson->chip.dev, "unable to get period pre_div\n"); return -EINVAL; } @@ -325,11 +325,6 @@ static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, if (state->period != channel->state.period || state->duty_cycle != channel->state.duty_cycle || state->polarity != channel->state.polarity) { - if (channel->state.enabled) { - meson_pwm_disable(meson, pwm->hwpwm); - channel->state.enabled = false; - } - if (state->polarity != channel->state.polarity) { if (state->polarity == PWM_POLARITY_NORMAL) meson->inverter_mask |= BIT(pwm->hwpwm); diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c index 5ad42f33e70c..2e15acf13893 100644 --- a/drivers/pwm/pwm-omap-dmtimer.c +++ b/drivers/pwm/pwm-omap-dmtimer.c @@ -337,6 +337,11 @@ static int pwm_omap_dmtimer_probe(struct platform_device *pdev) static int pwm_omap_dmtimer_remove(struct platform_device *pdev) { struct pwm_omap_dmtimer_chip *omap = platform_get_drvdata(pdev); + int ret; + + ret = pwmchip_remove(&omap->chip); + if (ret) + return ret; if (pm_runtime_active(&omap->dm_timer_pdev->dev)) omap->pdata->stop(omap->dm_timer); @@ -345,7 +350,7 @@ static int pwm_omap_dmtimer_remove(struct platform_device *pdev) mutex_destroy(&omap->mutex); - return pwmchip_remove(&omap->chip); + return 0; } static const struct of_device_id pwm_omap_dmtimer_of_match[] = { diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c index a7eaf962a95b..e1e5dfcb16f3 100644 --- a/drivers/pwm/pwm-pca9685.c +++ b/drivers/pwm/pwm-pca9685.c @@ -170,14 +170,9 @@ static void pca9685_pwm_gpio_set(struct gpio_chip *gpio, unsigned int offset, static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset) { struct pca9685 *pca = gpiochip_get_data(gpio); - struct pwm_device *pwm; pca9685_pwm_gpio_set(gpio, offset, 0); pm_runtime_put(pca->chip.dev); - mutex_lock(&pca->lock); - pwm = &pca->chip.pwms[offset]; - pwm_set_chip_data(pwm, NULL); - mutex_unlock(&pca->lock); } static int pca9685_pwm_gpio_get_direction(struct gpio_chip *chip, diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c index 062f2cfc45ec..3762432dd6a7 100644 --- a/drivers/pwm/pwm-samsung.c +++ b/drivers/pwm/pwm-samsung.c @@ -238,7 +238,6 @@ static int pwm_samsung_request(struct pwm_chip *chip, struct pwm_device *pwm) static void pwm_samsung_free(struct pwm_chip *chip, struct pwm_device *pwm) { devm_kfree(chip->dev, pwm_get_chip_data(pwm)); - pwm_set_chip_data(pwm, NULL); } static int pwm_samsung_enable(struct pwm_chip *chip, struct pwm_device *pwm) diff --git a/drivers/rapidio/rio_cm.c b/drivers/rapidio/rio_cm.c index ef989a15aefc..b29fc258eeba 100644 --- a/drivers/rapidio/rio_cm.c +++ b/drivers/rapidio/rio_cm.c @@ -1215,7 +1215,9 @@ static int riocm_ch_listen(u16 ch_id) riocm_debug(CHOP, "(ch_%d)", ch_id); ch = riocm_get_channel(ch_id); - if (!ch || !riocm_cmp_exch(ch, RIO_CM_CHAN_BOUND, RIO_CM_LISTEN)) + if (!ch) + return -EINVAL; + if (!riocm_cmp_exch(ch, RIO_CM_CHAN_BOUND, RIO_CM_LISTEN)) ret = -EINVAL; riocm_put_channel(ch); return ret; diff --git a/drivers/regulator/ab8500.c b/drivers/regulator/ab8500.c index 0f97514e3474..c9f20e1394e3 100644 --- a/drivers/regulator/ab8500.c +++ b/drivers/regulator/ab8500.c @@ -1099,23 +1099,6 @@ static struct ab8500_regulator_info .update_val_idle = 0x82, .update_val_normal = 0x02, }, - [AB8505_LDO_USB] = { - .desc = { - .name = "LDO-USB", - .ops = &ab8500_regulator_mode_ops, - .type = REGULATOR_VOLTAGE, - .id = AB8505_LDO_USB, - .owner = THIS_MODULE, - .n_voltages = 1, - .volt_table = fixed_3300000_voltage, - }, - .update_bank = 0x03, - .update_reg = 0x82, - .update_mask = 0x03, - .update_val = 0x01, - .update_val_idle = 0x03, - .update_val_normal = 0x01, - }, [AB8505_LDO_AUDIO] = { .desc = { .name = "LDO-AUDIO", diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index d2428c262b7c..b2cb4f497ef6 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -4503,7 +4503,7 @@ static int __init regulator_init(void) /* init early to allow our consumers to complete system booting */ core_initcall(regulator_init); -static int regulator_late_cleanup(struct device *dev, void *data) +static int __init regulator_late_cleanup(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); const struct regulator_ops *ops = rdev->desc->ops; @@ -4552,8 +4552,17 @@ unlock: return 0; } -static void regulator_init_complete_work_function(struct work_struct *work) +static int __init regulator_init_complete(void) { + /* + * Since DT doesn't provide an idiomatic mechanism for + * enabling full constraints and since it's much more natural + * with DT to provide them just assume that a DT enabled + * system has full constraints. + */ + if (of_have_populated_dt()) + has_full_constraints = true; + /* * Regulators may had failed to resolve their input supplies * when were registered, either because the input supply was @@ -4571,35 +4580,6 @@ static void regulator_init_complete_work_function(struct work_struct *work) */ class_for_each_device(®ulator_class, NULL, NULL, regulator_late_cleanup); -} - -static DECLARE_DELAYED_WORK(regulator_init_complete_work, - regulator_init_complete_work_function); - -static int __init regulator_init_complete(void) -{ - /* - * Since DT doesn't provide an idiomatic mechanism for - * enabling full constraints and since it's much more natural - * with DT to provide them just assume that a DT enabled - * system has full constraints. - */ - if (of_have_populated_dt()) - has_full_constraints = true; - - /* - * We punt completion for an arbitrary amount of time since - * systems like distros will load many drivers from userspace - * so consumers might not always be ready yet, this is - * particularly an issue with laptops where this might bounce - * the display off then on. Ideally we'd get a notification - * from userspace when this happens but we don't so just wait - * a bit and hope we waited long enough. It'd be better if - * we'd only do this on systems that need it, and a kernel - * command line option might be useful. - */ - schedule_delayed_work(®ulator_init_complete_work, - msecs_to_jiffies(30000)); return 0; } diff --git a/drivers/regulator/lp87565-regulator.c b/drivers/regulator/lp87565-regulator.c index cfdbe294fb6a..32d4e6ec2e19 100644 --- a/drivers/regulator/lp87565-regulator.c +++ b/drivers/regulator/lp87565-regulator.c @@ -188,7 +188,7 @@ static int lp87565_regulator_probe(struct platform_device *pdev) struct lp87565 *lp87565 = dev_get_drvdata(pdev->dev.parent); struct regulator_config config = { }; struct regulator_dev *rdev; - int i, min_idx = LP87565_BUCK_1, max_idx = LP87565_BUCK_3; + int i, min_idx = LP87565_BUCK_0, max_idx = LP87565_BUCK_3; platform_set_drvdata(pdev, lp87565); diff --git a/drivers/regulator/max8907-regulator.c b/drivers/regulator/max8907-regulator.c index 860400d2cd85..a8f2f07239fb 100644 --- a/drivers/regulator/max8907-regulator.c +++ b/drivers/regulator/max8907-regulator.c @@ -299,7 +299,10 @@ static int max8907_regulator_probe(struct platform_device *pdev) memcpy(pmic->desc, max8907_regulators, sizeof(pmic->desc)); /* Backwards compatibility with MAX8907B; SD1 uses different voltages */ - regmap_read(max8907->regmap_gen, MAX8907_REG_II2RR, &val); + ret = regmap_read(max8907->regmap_gen, MAX8907_REG_II2RR, &val); + if (ret) + return ret; + if ((val & MAX8907_II2RR_VERSION_MASK) == MAX8907_II2RR_VERSION_REV_B) { pmic->desc[MAX8907_SD1].min_uV = 637500; @@ -336,14 +339,20 @@ static int max8907_regulator_probe(struct platform_device *pdev) } if (pmic->desc[i].ops == &max8907_ldo_ops) { - regmap_read(config.regmap, pmic->desc[i].enable_reg, + ret = regmap_read(config.regmap, pmic->desc[i].enable_reg, &val); + if (ret) + return ret; + if ((val & MAX8907_MASK_LDO_SEQ) != MAX8907_MASK_LDO_SEQ) pmic->desc[i].ops = &max8907_ldo_hwctl_ops; } else if (pmic->desc[i].ops == &max8907_out5v_ops) { - regmap_read(config.regmap, pmic->desc[i].enable_reg, + ret = regmap_read(config.regmap, pmic->desc[i].enable_reg, &val); + if (ret) + return ret; + if ((val & (MAX8907_MASK_OUT5V_VINEN | MAX8907_MASK_OUT5V_ENSRC)) != MAX8907_MASK_OUT5V_ENSRC) diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c index bb5ab7d78895..c2cc392a27d4 100644 --- a/drivers/regulator/palmas-regulator.c +++ b/drivers/regulator/palmas-regulator.c @@ -443,13 +443,16 @@ static int palmas_ldo_write(struct palmas *palmas, unsigned int reg, static int palmas_set_mode_smps(struct regulator_dev *dev, unsigned int mode) { int id = rdev_get_id(dev); + int ret; struct palmas_pmic *pmic = rdev_get_drvdata(dev); struct palmas_pmic_driver_data *ddata = pmic->palmas->pmic_ddata; struct palmas_regs_info *rinfo = &ddata->palmas_regs_info[id]; unsigned int reg; bool rail_enable = true; - palmas_smps_read(pmic->palmas, rinfo->ctrl_addr, ®); + ret = palmas_smps_read(pmic->palmas, rinfo->ctrl_addr, ®); + if (ret) + return ret; reg &= ~PALMAS_SMPS12_CTRL_MODE_ACTIVE_MASK; diff --git a/drivers/regulator/pfuze100-regulator.c b/drivers/regulator/pfuze100-regulator.c index 659e516455be..4f205366d8ae 100644 --- a/drivers/regulator/pfuze100-regulator.c +++ b/drivers/regulator/pfuze100-regulator.c @@ -632,7 +632,13 @@ static int pfuze100_regulator_probe(struct i2c_client *client, /* SW2~SW4 high bit check and modify the voltage value table */ if (i >= sw_check_start && i <= sw_check_end) { - regmap_read(pfuze_chip->regmap, desc->vsel_reg, &val); + ret = regmap_read(pfuze_chip->regmap, + desc->vsel_reg, &val); + if (ret) { + dev_err(&client->dev, "Fails to read from the register.\n"); + return ret; + } + if (val & sw_hi) { if (pfuze_chip->chip_id == PFUZE3000) { desc->volt_table = pfuze3000_sw2hi; diff --git a/drivers/regulator/pv88060-regulator.c b/drivers/regulator/pv88060-regulator.c index a9446056435f..1f2d8180506b 100644 --- a/drivers/regulator/pv88060-regulator.c +++ b/drivers/regulator/pv88060-regulator.c @@ -135,7 +135,7 @@ static int pv88060_set_current_limit(struct regulator_dev *rdev, int min, int i; /* search for closest to maximum */ - for (i = info->n_current_limits; i >= 0; i--) { + for (i = info->n_current_limits - 1; i >= 0; i--) { if (min <= info->current_limits[i] && max >= info->current_limits[i]) { return regmap_update_bits(rdev->regmap, diff --git a/drivers/regulator/pv88080-regulator.c b/drivers/regulator/pv88080-regulator.c index 9a08cb2de501..6770e4de2097 100644 --- a/drivers/regulator/pv88080-regulator.c +++ b/drivers/regulator/pv88080-regulator.c @@ -279,7 +279,7 @@ static int pv88080_set_current_limit(struct regulator_dev *rdev, int min, int i; /* search for closest to maximum */ - for (i = info->n_current_limits; i >= 0; i--) { + for (i = info->n_current_limits - 1; i >= 0; i--) { if (min <= info->current_limits[i] && max >= info->current_limits[i]) { return regmap_update_bits(rdev->regmap, diff --git a/drivers/regulator/pv88090-regulator.c b/drivers/regulator/pv88090-regulator.c index 7a0c15957bd0..2302b0df7630 100644 --- a/drivers/regulator/pv88090-regulator.c +++ b/drivers/regulator/pv88090-regulator.c @@ -157,7 +157,7 @@ static int pv88090_set_current_limit(struct regulator_dev *rdev, int min, int i; /* search for closest to maximum */ - for (i = info->n_current_limits; i >= 0; i--) { + for (i = info->n_current_limits - 1; i >= 0; i--) { if (min <= info->current_limits[i] && max >= info->current_limits[i]) { return regmap_update_bits(rdev->regmap, diff --git a/drivers/regulator/rk808-regulator.c b/drivers/regulator/rk808-regulator.c index 213b68743cc8..92498ac50303 100644 --- a/drivers/regulator/rk808-regulator.c +++ b/drivers/regulator/rk808-regulator.c @@ -714,7 +714,7 @@ static int rk808_regulator_dt_parse_pdata(struct device *dev, } if (!pdata->dvs_gpio[i]) { - dev_warn(dev, "there is no dvs%d gpio\n", i); + dev_info(dev, "there is no dvs%d gpio\n", i); continue; } diff --git a/drivers/regulator/rn5t618-regulator.c b/drivers/regulator/rn5t618-regulator.c index 790a4a73ea2c..40b74648bd31 100644 --- a/drivers/regulator/rn5t618-regulator.c +++ b/drivers/regulator/rn5t618-regulator.c @@ -154,6 +154,7 @@ static struct platform_driver rn5t618_regulator_driver = { module_platform_driver(rn5t618_regulator_driver); +MODULE_ALIAS("platform:rn5t618-regulator"); MODULE_AUTHOR("Beniamino Galvani "); MODULE_DESCRIPTION("RN5T618 regulator driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c index d2f994298753..6d17357b3a24 100644 --- a/drivers/regulator/ti-abb-regulator.c +++ b/drivers/regulator/ti-abb-regulator.c @@ -173,19 +173,14 @@ static int ti_abb_wait_txdone(struct device *dev, struct ti_abb *abb) while (timeout++ <= abb->settling_time) { status = ti_abb_check_txdone(abb); if (status) - break; + return 0; udelay(1); } - if (timeout > abb->settling_time) { - dev_warn_ratelimited(dev, - "%s:TRANXDONE timeout(%duS) int=0x%08x\n", - __func__, timeout, readl(abb->int_base)); - return -ETIMEDOUT; - } - - return 0; + dev_warn_ratelimited(dev, "%s:TRANXDONE timeout(%duS) int=0x%08x\n", + __func__, timeout, readl(abb->int_base)); + return -ETIMEDOUT; } /** @@ -205,19 +200,14 @@ static int ti_abb_clear_all_txdone(struct device *dev, const struct ti_abb *abb) status = ti_abb_check_txdone(abb); if (!status) - break; + return 0; udelay(1); } - if (timeout > abb->settling_time) { - dev_warn_ratelimited(dev, - "%s:TRANXDONE timeout(%duS) int=0x%08x\n", - __func__, timeout, readl(abb->int_base)); - return -ETIMEDOUT; - } - - return 0; + dev_warn_ratelimited(dev, "%s:TRANXDONE timeout(%duS) int=0x%08x\n", + __func__, timeout, readl(abb->int_base)); + return -ETIMEDOUT; } /** diff --git a/drivers/regulator/tps65086-regulator.c b/drivers/regulator/tps65086-regulator.c index 45e96e154690..5a5e9b5bf4be 100644 --- a/drivers/regulator/tps65086-regulator.c +++ b/drivers/regulator/tps65086-regulator.c @@ -90,8 +90,8 @@ static const struct regulator_linear_range tps65086_buck345_25mv_ranges[] = { static const struct regulator_linear_range tps65086_ldoa1_ranges[] = { REGULATOR_LINEAR_RANGE(1350000, 0x0, 0x0, 0), REGULATOR_LINEAR_RANGE(1500000, 0x1, 0x7, 100000), - REGULATOR_LINEAR_RANGE(2300000, 0x8, 0xA, 100000), - REGULATOR_LINEAR_RANGE(2700000, 0xB, 0xD, 150000), + REGULATOR_LINEAR_RANGE(2300000, 0x8, 0xB, 100000), + REGULATOR_LINEAR_RANGE(2850000, 0xC, 0xD, 150000), REGULATOR_LINEAR_RANGE(3300000, 0xE, 0xE, 0), }; diff --git a/drivers/regulator/tps65910-regulator.c b/drivers/regulator/tps65910-regulator.c index 81672a58fcc2..194fa0cbbc04 100644 --- a/drivers/regulator/tps65910-regulator.c +++ b/drivers/regulator/tps65910-regulator.c @@ -1102,8 +1102,10 @@ static int tps65910_probe(struct platform_device *pdev) platform_set_drvdata(pdev, pmic); /* Give control of all register to control port */ - tps65910_reg_set_bits(pmic->mfd, TPS65910_DEVCTRL, + err = tps65910_reg_set_bits(pmic->mfd, TPS65910_DEVCTRL, DEVCTRL_SR_CTL_I2C_SEL_MASK); + if (err < 0) + return err; switch (tps65910_chip_id(tps65910)) { case TPS65910: diff --git a/drivers/regulator/wm831x-dcdc.c b/drivers/regulator/wm831x-dcdc.c index 5a5bc4bb08d2..df591435d12a 100644 --- a/drivers/regulator/wm831x-dcdc.c +++ b/drivers/regulator/wm831x-dcdc.c @@ -327,8 +327,8 @@ static int wm831x_buckv_get_voltage_sel(struct regulator_dev *rdev) } /* Current limit options */ -static u16 wm831x_dcdc_ilim[] = { - 125, 250, 375, 500, 625, 750, 875, 1000 +static const unsigned int wm831x_dcdc_ilim[] = { + 125000, 250000, 375000, 500000, 625000, 750000, 875000, 1000000 }; static int wm831x_buckv_set_current_limit(struct regulator_dev *rdev, diff --git a/drivers/remoteproc/da8xx_remoteproc.c b/drivers/remoteproc/da8xx_remoteproc.c index bf3b9034c319..a127d2ccd7ca 100644 --- a/drivers/remoteproc/da8xx_remoteproc.c +++ b/drivers/remoteproc/da8xx_remoteproc.c @@ -207,7 +207,7 @@ static int da8xx_rproc_get_internal_memories(struct platform_device *pdev, res->start & DA8XX_RPROC_LOCAL_ADDRESS_MASK; drproc->mem[i].size = resource_size(res); - dev_dbg(dev, "memory %8s: bus addr %pa size 0x%x va %p da 0x%x\n", + dev_dbg(dev, "memory %8s: bus addr %pa size 0x%zx va %p da 0x%x\n", mem_names[i], &drproc->mem[i].bus_addr, drproc->mem[i].size, drproc->mem[i].cpu_addr, drproc->mem[i].dev_addr); diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index eab14b414bf0..cc733b89560a 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -1620,7 +1620,7 @@ static int __init remoteproc_init(void) return 0; } -module_init(remoteproc_init); +subsys_initcall(remoteproc_init); static void __exit remoteproc_exit(void) { diff --git a/drivers/remoteproc/remoteproc_sysfs.c b/drivers/remoteproc/remoteproc_sysfs.c index 47be411400e5..3a4c3d7cafca 100644 --- a/drivers/remoteproc/remoteproc_sysfs.c +++ b/drivers/remoteproc/remoteproc_sysfs.c @@ -48,6 +48,11 @@ static ssize_t firmware_store(struct device *dev, } len = strcspn(buf, "\n"); + if (!len) { + dev_err(dev, "can't provide a NULL firmware\n"); + err = -EINVAL; + goto out; + } p = kstrndup(buf, len, GFP_KERNEL); if (!p) { diff --git a/drivers/reset/core.c b/drivers/reset/core.c index da4292e9de97..7e0a14211c88 100644 --- a/drivers/reset/core.c +++ b/drivers/reset/core.c @@ -466,28 +466,29 @@ struct reset_control *__of_reset_control_get(struct device_node *node, break; } } - of_node_put(args.np); if (!rcdev) { - mutex_unlock(&reset_list_mutex); - return ERR_PTR(-EPROBE_DEFER); + rstc = ERR_PTR(-EPROBE_DEFER); + goto out; } if (WARN_ON(args.args_count != rcdev->of_reset_n_cells)) { - mutex_unlock(&reset_list_mutex); - return ERR_PTR(-EINVAL); + rstc = ERR_PTR(-EINVAL); + goto out; } rstc_id = rcdev->of_xlate(rcdev, &args); if (rstc_id < 0) { - mutex_unlock(&reset_list_mutex); - return ERR_PTR(rstc_id); + rstc = ERR_PTR(rstc_id); + goto out; } /* reset_list_mutex also protects the rcdev's reset_control list */ rstc = __reset_control_get_internal(rcdev, rstc_id, shared); +out: mutex_unlock(&reset_list_mutex); + of_node_put(args.np); return rstc; } @@ -512,6 +513,7 @@ static void reset_control_array_put(struct reset_control_array *resets) for (i = 0; i < resets->num_rstcs; i++) __reset_control_put_internal(resets->rstc[i]); mutex_unlock(&reset_list_mutex); + kfree(resets); } /** diff --git a/drivers/rpmsg/qcom_glink_native.c b/drivers/rpmsg/qcom_glink_native.c index e8e12c2b1d0e..1e6253f1e070 100644 --- a/drivers/rpmsg/qcom_glink_native.c +++ b/drivers/rpmsg/qcom_glink_native.c @@ -243,10 +243,31 @@ static void qcom_glink_channel_release(struct kref *ref) { struct glink_channel *channel = container_of(ref, struct glink_channel, refcount); + struct glink_core_rx_intent *intent; + struct glink_core_rx_intent *tmp; unsigned long flags; + int iid; + + /* cancel pending rx_done work */ + cancel_work_sync(&channel->intent_work); spin_lock_irqsave(&channel->intent_lock, flags); + /* Free all non-reuse intents pending rx_done work */ + list_for_each_entry_safe(intent, tmp, &channel->done_intents, node) { + if (!intent->reuse) { + kfree(intent->data); + kfree(intent); + } + } + + idr_for_each_entry(&channel->liids, tmp, iid) { + kfree(tmp->data); + kfree(tmp); + } idr_destroy(&channel->liids); + + idr_for_each_entry(&channel->riids, tmp, iid) + kfree(tmp); idr_destroy(&channel->riids); spin_unlock_irqrestore(&channel->intent_lock, flags); @@ -1095,13 +1116,12 @@ static int qcom_glink_create_remote(struct qcom_glink *glink, close_link: /* * Send a close request to "undo" our open-ack. The close-ack will - * release the last reference. + * release qcom_glink_send_open_req() reference and the last reference + * will be relesed after receiving remote_close or transport unregister + * by calling qcom_glink_native_remove(). */ qcom_glink_send_close_req(glink, channel); - /* Release qcom_glink_send_open_req() reference */ - kref_put(&channel->refcount, qcom_glink_channel_release); - return ret; } @@ -1392,15 +1412,13 @@ static int qcom_glink_rx_open(struct qcom_glink *glink, unsigned int rcid, ret = rpmsg_register_device(rpdev); if (ret) - goto free_rpdev; + goto rcid_remove; channel->rpdev = rpdev; } return 0; -free_rpdev: - kfree(rpdev); rcid_remove: spin_lock_irqsave(&glink->idr_lock, flags); idr_remove(&glink->rcids, channel->rcid); @@ -1521,6 +1539,18 @@ static void qcom_glink_work(struct work_struct *work) } } +static void qcom_glink_cancel_rx_work(struct qcom_glink *glink) +{ + struct glink_defer_cmd *dcmd; + struct glink_defer_cmd *tmp; + + /* cancel any pending deferred rx_work */ + cancel_work_sync(&glink->rx_work); + + list_for_each_entry_safe(dcmd, tmp, &glink->rx_queue, node) + kfree(dcmd); +} + struct qcom_glink *qcom_glink_native_probe(struct device *dev, unsigned long features, struct qcom_glink_pipe *rx, @@ -1591,23 +1621,24 @@ void qcom_glink_native_remove(struct qcom_glink *glink) struct glink_channel *channel; int cid; int ret; - unsigned long flags; disable_irq(glink->irq); - cancel_work_sync(&glink->rx_work); + qcom_glink_cancel_rx_work(glink); ret = device_for_each_child(glink->dev, NULL, qcom_glink_remove_device); if (ret) dev_warn(glink->dev, "Can't remove GLINK devices: %d\n", ret); - spin_lock_irqsave(&glink->idr_lock, flags); /* Release any defunct local channels, waiting for close-ack */ idr_for_each_entry(&glink->lcids, channel, cid) kref_put(&channel->refcount, qcom_glink_channel_release); + /* Release any defunct local channels, waiting for close-req */ + idr_for_each_entry(&glink->rcids, channel, cid) + kref_put(&channel->refcount, qcom_glink_channel_release); + idr_destroy(&glink->lcids); idr_destroy(&glink->rcids); - spin_unlock_irqrestore(&glink->idr_lock, flags); mbox_free_channel(glink->mbox_chan); } EXPORT_SYMBOL_GPL(qcom_glink_native_remove); diff --git a/drivers/rpmsg/qcom_glink_smem.c b/drivers/rpmsg/qcom_glink_smem.c index 5cdaa5f8fb61..53b3a43160f4 100644 --- a/drivers/rpmsg/qcom_glink_smem.c +++ b/drivers/rpmsg/qcom_glink_smem.c @@ -119,7 +119,7 @@ static void glink_smem_rx_advance(struct qcom_glink_pipe *np, tail = le32_to_cpu(*pipe->tail); tail += count; - if (tail > pipe->native.length) + if (tail >= pipe->native.length) tail -= pipe->native.length; *pipe->tail = cpu_to_le32(tail); diff --git a/drivers/rtc/rtc-88pm80x.c b/drivers/rtc/rtc-88pm80x.c index 466bf7f9a285..7da2a1fb50f8 100644 --- a/drivers/rtc/rtc-88pm80x.c +++ b/drivers/rtc/rtc-88pm80x.c @@ -116,12 +116,14 @@ static int pm80x_rtc_read_time(struct device *dev, struct rtc_time *tm) unsigned char buf[4]; unsigned long ticks, base, data; regmap_raw_read(info->map, PM800_RTC_EXPIRE2_1, buf, 4); - base = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + base = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; dev_dbg(info->dev, "%x-%x-%x-%x\n", buf[0], buf[1], buf[2], buf[3]); /* load 32-bit read-only counter */ regmap_raw_read(info->map, PM800_RTC_COUNTER1, buf, 4); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; ticks = base + data; dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); @@ -144,7 +146,8 @@ static int pm80x_rtc_set_time(struct device *dev, struct rtc_time *tm) /* load 32-bit read-only counter */ regmap_raw_read(info->map, PM800_RTC_COUNTER1, buf, 4); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; base = ticks - data; dev_dbg(info->dev, "set base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); @@ -165,11 +168,13 @@ static int pm80x_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) int ret; regmap_raw_read(info->map, PM800_RTC_EXPIRE2_1, buf, 4); - base = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + base = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; dev_dbg(info->dev, "%x-%x-%x-%x\n", buf[0], buf[1], buf[2], buf[3]); regmap_raw_read(info->map, PM800_RTC_EXPIRE1_1, buf, 4); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; ticks = base + data; dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); @@ -192,12 +197,14 @@ static int pm80x_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) regmap_update_bits(info->map, PM800_RTC_CONTROL, PM800_ALARM1_EN, 0); regmap_raw_read(info->map, PM800_RTC_EXPIRE2_1, buf, 4); - base = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + base = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; dev_dbg(info->dev, "%x-%x-%x-%x\n", buf[0], buf[1], buf[2], buf[3]); /* load 32-bit read-only counter */ regmap_raw_read(info->map, PM800_RTC_COUNTER1, buf, 4); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; ticks = base + data; dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); diff --git a/drivers/rtc/rtc-88pm860x.c b/drivers/rtc/rtc-88pm860x.c index 166faae3a59c..7d3e5168fcef 100644 --- a/drivers/rtc/rtc-88pm860x.c +++ b/drivers/rtc/rtc-88pm860x.c @@ -115,11 +115,13 @@ static int pm860x_rtc_read_time(struct device *dev, struct rtc_time *tm) pm860x_page_bulk_read(info->i2c, REG0_ADDR, 8, buf); dev_dbg(info->dev, "%x-%x-%x-%x-%x-%x-%x-%x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); - base = (buf[1] << 24) | (buf[3] << 16) | (buf[5] << 8) | buf[7]; + base = ((unsigned long)buf[1] << 24) | (buf[3] << 16) | + (buf[5] << 8) | buf[7]; /* load 32-bit read-only counter */ pm860x_bulk_read(info->i2c, PM8607_RTC_COUNTER1, 4, buf); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; ticks = base + data; dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); @@ -145,7 +147,8 @@ static int pm860x_rtc_set_time(struct device *dev, struct rtc_time *tm) /* load 32-bit read-only counter */ pm860x_bulk_read(info->i2c, PM8607_RTC_COUNTER1, 4, buf); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; base = ticks - data; dev_dbg(info->dev, "set base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); @@ -170,10 +173,12 @@ static int pm860x_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) pm860x_page_bulk_read(info->i2c, REG0_ADDR, 8, buf); dev_dbg(info->dev, "%x-%x-%x-%x-%x-%x-%x-%x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); - base = (buf[1] << 24) | (buf[3] << 16) | (buf[5] << 8) | buf[7]; + base = ((unsigned long)buf[1] << 24) | (buf[3] << 16) | + (buf[5] << 8) | buf[7]; pm860x_bulk_read(info->i2c, PM8607_RTC_EXPIRE1, 4, buf); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; ticks = base + data; dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); @@ -198,11 +203,13 @@ static int pm860x_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) pm860x_page_bulk_read(info->i2c, REG0_ADDR, 8, buf); dev_dbg(info->dev, "%x-%x-%x-%x-%x-%x-%x-%x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); - base = (buf[1] << 24) | (buf[3] << 16) | (buf[5] << 8) | buf[7]; + base = ((unsigned long)buf[1] << 24) | (buf[3] << 16) | + (buf[5] << 8) | buf[7]; /* load 32-bit read-only counter */ pm860x_bulk_read(info->i2c, PM8607_RTC_COUNTER1, 4, buf); - data = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + data = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; ticks = base + data; dev_dbg(info->dev, "get base:0x%lx, RO count:0x%lx, ticks:0x%lx\n", base, data, ticks); diff --git a/drivers/rtc/rtc-armada38x.c b/drivers/rtc/rtc-armada38x.c index 21f355c37eab..10b5c8549039 100644 --- a/drivers/rtc/rtc-armada38x.c +++ b/drivers/rtc/rtc-armada38x.c @@ -390,7 +390,6 @@ MODULE_DEVICE_TABLE(of, armada38x_rtc_of_match_table); static __init int armada38x_rtc_probe(struct platform_device *pdev) { - const struct rtc_class_ops *ops; struct resource *res; struct armada38x_rtc *rtc; const struct of_device_id *match; @@ -427,6 +426,11 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev) dev_err(&pdev->dev, "no irq\n"); return rtc->irq; } + + rtc->rtc_dev = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(rtc->rtc_dev)) + return PTR_ERR(rtc->rtc_dev); + if (devm_request_irq(&pdev->dev, rtc->irq, armada38x_rtc_alarm_irq, 0, pdev->name, rtc) < 0) { dev_warn(&pdev->dev, "Interrupt not available.\n"); @@ -436,28 +440,24 @@ static __init int armada38x_rtc_probe(struct platform_device *pdev) if (rtc->irq != -1) { device_init_wakeup(&pdev->dev, 1); - ops = &armada38x_rtc_ops; + rtc->rtc_dev->ops = &armada38x_rtc_ops; } else { /* * If there is no interrupt available then we can't * use the alarm */ - ops = &armada38x_rtc_ops_noirq; + rtc->rtc_dev->ops = &armada38x_rtc_ops_noirq; } rtc->data = (struct armada38x_rtc_data *)match->data; - /* Update RTC-MBUS bridge timing parameters */ rtc->data->update_mbus_timing(rtc); - rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, pdev->name, - ops, THIS_MODULE); - if (IS_ERR(rtc->rtc_dev)) { - ret = PTR_ERR(rtc->rtc_dev); + ret = rtc_register_device(rtc->rtc_dev); + if (ret) dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret); - return ret; - } - return 0; + + return ret; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 9dca53df3584..5b7c16b85dc0 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -806,7 +806,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq) rtc_cmos_int_handler = cmos_interrupt; retval = request_irq(rtc_irq, rtc_cmos_int_handler, - IRQF_SHARED, dev_name(&cmos_rtc.rtc->dev), + 0, dev_name(&cmos_rtc.rtc->dev), cmos_rtc.rtc); if (retval < 0) { dev_dbg(dev, "IRQ %d is already in use\n", rtc_irq); diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c index e7d9215c9201..8d45d93b1db6 100644 --- a/drivers/rtc/rtc-ds1307.c +++ b/drivers/rtc/rtc-ds1307.c @@ -733,8 +733,8 @@ static int rx8130_set_alarm(struct device *dev, struct rtc_wkalrm *t) if (ret < 0) return ret; - ctl[0] &= ~RX8130_REG_EXTENSION_WADA; - ctl[1] |= RX8130_REG_FLAG_AF; + ctl[0] &= RX8130_REG_EXTENSION_WADA; + ctl[1] &= ~RX8130_REG_FLAG_AF; ctl[2] &= ~RX8130_REG_CONTROL0_AIE; ret = regmap_bulk_write(ds1307->regmap, RX8130_REG_EXTENSION, ctl, @@ -757,8 +757,7 @@ static int rx8130_set_alarm(struct device *dev, struct rtc_wkalrm *t) ctl[2] |= RX8130_REG_CONTROL0_AIE; - return regmap_bulk_write(ds1307->regmap, RX8130_REG_EXTENSION, ctl, - sizeof(ctl)); + return regmap_write(ds1307->regmap, RX8130_REG_CONTROL0, ctl[2]); } static int rx8130_alarm_irq_enable(struct device *dev, unsigned int enabled) diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c index 9caaccccaa57..b1ebca099b0d 100644 --- a/drivers/rtc/rtc-ds1672.c +++ b/drivers/rtc/rtc-ds1672.c @@ -58,7 +58,8 @@ static int ds1672_get_datetime(struct i2c_client *client, struct rtc_time *tm) "%s: raw read data - counters=%02x,%02x,%02x,%02x\n", __func__, buf[0], buf[1], buf[2], buf[3]); - time = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0]; + time = ((unsigned long)buf[3] << 24) | (buf[2] << 16) | + (buf[1] << 8) | buf[0]; rtc_time_to_tm(time, tm); diff --git a/drivers/rtc/rtc-hym8563.c b/drivers/rtc/rtc-hym8563.c index e5ad527cb75e..a8c2d38b2411 100644 --- a/drivers/rtc/rtc-hym8563.c +++ b/drivers/rtc/rtc-hym8563.c @@ -105,7 +105,7 @@ static int hym8563_rtc_read_time(struct device *dev, struct rtc_time *tm) if (!hym8563->valid) { dev_warn(&client->dev, "no valid clock/calendar values available\n"); - return -EPERM; + return -EINVAL; } ret = i2c_smbus_read_i2c_block_data(client, HYM8563_SEC, 7, buf); diff --git a/drivers/rtc/rtc-max8997.c b/drivers/rtc/rtc-max8997.c index db984d4bf952..4cce5bd448f6 100644 --- a/drivers/rtc/rtc-max8997.c +++ b/drivers/rtc/rtc-max8997.c @@ -221,7 +221,7 @@ static int max8997_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) out: mutex_unlock(&info->lock); - return 0; + return ret; } static int max8997_rtc_stop_alarm(struct max8997_rtc_info *info) diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 2f1772a358ca..18a6f15e313d 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -82,7 +82,7 @@ unsigned int mc146818_get_time(struct rtc_time *time) time->tm_year += real_year - 72; #endif - if (century) + if (century > 20) time->tm_year += (century - 19) * 100; /* diff --git a/drivers/rtc/rtc-msm6242.c b/drivers/rtc/rtc-msm6242.c index c1c5c4e3b3b4..c981301efbe5 100644 --- a/drivers/rtc/rtc-msm6242.c +++ b/drivers/rtc/rtc-msm6242.c @@ -132,7 +132,8 @@ static int msm6242_read_time(struct device *dev, struct rtc_time *tm) msm6242_read(priv, MSM6242_SECOND1); tm->tm_min = msm6242_read(priv, MSM6242_MINUTE10) * 10 + msm6242_read(priv, MSM6242_MINUTE1); - tm->tm_hour = (msm6242_read(priv, MSM6242_HOUR10 & 3)) * 10 + + tm->tm_hour = (msm6242_read(priv, MSM6242_HOUR10) & + MSM6242_HOUR10_HR_MASK) * 10 + msm6242_read(priv, MSM6242_HOUR1); tm->tm_mday = msm6242_read(priv, MSM6242_DAY10) * 10 + msm6242_read(priv, MSM6242_DAY1); diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c index 85ea3b225560..b09f8767d654 100644 --- a/drivers/rtc/rtc-mt6397.c +++ b/drivers/rtc/rtc-mt6397.c @@ -69,6 +69,14 @@ #define RTC_AL_SEC 0x0018 +#define RTC_AL_SEC_MASK 0x003f +#define RTC_AL_MIN_MASK 0x003f +#define RTC_AL_HOU_MASK 0x001f +#define RTC_AL_DOM_MASK 0x001f +#define RTC_AL_DOW_MASK 0x0007 +#define RTC_AL_MTH_MASK 0x000f +#define RTC_AL_YEA_MASK 0x007f + #define RTC_PDN2 0x002e #define RTC_PDN1 0x002c #define RTC_SPAR0 0x0030 @@ -452,7 +460,7 @@ static irqreturn_t mtk_rtc_irq_handler_thread(int irq, void *data) irqen = irqsta & ~RTC_IRQ_EN_AL; if (regmap_write(rtc->regmap, rtc->addr_base + RTC_IRQ_EN, - irqen) >= 0) + irqen) == 0) mtk_rtc_write_trigger(rtc); mutex_unlock(&rtc->lock); @@ -780,29 +788,29 @@ static int mtk_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm) struct rtc_time *tm = &alm->time; struct mt6397_rtc *rtc = dev_get_drvdata(dev); int ret; - u16 data[RTC_OFFSET_COUNT], data_b[RTC_OFFSET_COUNT]; + u16 data[RTC_OFFSET_COUNT]; u32 irqsta, irqen, pdn2; tm->tm_year -= RTC_MIN_YEAR_OFFSET; tm->tm_mon++; ret = regmap_bulk_read(rtc->regmap, rtc->addr_base + RTC_AL_SEC, - data_b, RTC_OFFSET_COUNT); + data, RTC_OFFSET_COUNT); if (ret < 0) goto exit; - data[RTC_OFFSET_SEC] = ((data_b[RTC_OFFSET_SEC] & ~(RTC_AL_SEC_MASK)) - | (tm->tm_sec & RTC_AL_SEC_MASK)); - data[RTC_OFFSET_MIN] = ((data_b[RTC_OFFSET_MIN] & ~(RTC_AL_MIN_MASK)) - | (tm->tm_min & RTC_AL_MIN_MASK)); - data[RTC_OFFSET_HOUR] = ((data_b[RTC_OFFSET_HOUR] & ~(RTC_AL_HOU_MASK)) - | (tm->tm_hour & RTC_AL_HOU_MASK)); - data[RTC_OFFSET_DOM] = ((data_b[RTC_OFFSET_DOM] & ~(RTC_AL_DOM_MASK)) - | (tm->tm_mday & RTC_AL_DOM_MASK)); - data[RTC_OFFSET_MTH] = ((data_b[RTC_OFFSET_MTH] & ~(RTC_AL_MTH_MASK)) - | (tm->tm_mon & RTC_AL_MTH_MASK)); - data[RTC_OFFSET_YEAR] = ((data_b[RTC_OFFSET_YEAR] & ~(RTC_AL_YEA_MASK)) - | (tm->tm_year & RTC_AL_YEA_MASK)); + data[RTC_OFFSET_SEC] = ((data[RTC_OFFSET_SEC] & ~(RTC_AL_SEC_MASK)) | + (tm->tm_sec & RTC_AL_SEC_MASK)); + data[RTC_OFFSET_MIN] = ((data[RTC_OFFSET_MIN] & ~(RTC_AL_MIN_MASK)) | + (tm->tm_min & RTC_AL_MIN_MASK)); + data[RTC_OFFSET_HOUR] = ((data[RTC_OFFSET_HOUR] & ~(RTC_AL_HOU_MASK)) | + (tm->tm_hour & RTC_AL_HOU_MASK)); + data[RTC_OFFSET_DOM] = ((data[RTC_OFFSET_DOM] & ~(RTC_AL_DOM_MASK)) | + (tm->tm_mday & RTC_AL_DOM_MASK)); + data[RTC_OFFSET_MTH] = ((data[RTC_OFFSET_MTH] & ~(RTC_AL_MTH_MASK)) | + (tm->tm_mon & RTC_AL_MTH_MASK)); + data[RTC_OFFSET_YEAR] = ((data[RTC_OFFSET_YEAR] & ~(RTC_AL_YEA_MASK)) | + (tm->tm_year & RTC_AL_YEA_MASK)); dev_notice(rtc->dev, "set al time = %04d/%02d/%02d %02d:%02d:%02d (%d)\n", @@ -916,6 +924,10 @@ static int mtk_rtc_probe(struct platform_device *pdev) mt_rtc = rtc; platform_set_drvdata(pdev, rtc); + rtc->rtc_dev = devm_rtc_allocate_device(rtc->dev); + if (IS_ERR(rtc->rtc_dev)) + return PTR_ERR(rtc->rtc_dev); + ret = request_threaded_irq(rtc->irq, NULL, mtk_rtc_irq_handler_thread, IRQF_ONESHOT | IRQF_TRIGGER_HIGH, @@ -928,11 +940,11 @@ static int mtk_rtc_probe(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 1); - rtc->rtc_dev = rtc_device_register("mt6397-rtc", &pdev->dev, - &mtk_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc->rtc_dev)) { + rtc->rtc_dev->ops = &mtk_rtc_ops; + + ret = rtc_register_device(rtc->rtc_dev); + if (ret) { dev_err(&pdev->dev, "register rtc device failed\n"); - ret = PTR_ERR(rtc->rtc_dev); goto out_free_irq; } @@ -949,7 +961,6 @@ static int mtk_rtc_remove(struct platform_device *pdev) { struct mt6397_rtc *rtc = platform_get_drvdata(pdev); - rtc_device_unregister(rtc->rtc_dev); free_irq(rtc->irq, rtc->rtc_dev); irq_dispose_mapping(rtc->irq); diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c index 9f1b14bf91ae..367e0f803440 100644 --- a/drivers/rtc/rtc-pcf2127.c +++ b/drivers/rtc/rtc-pcf2127.c @@ -52,20 +52,14 @@ static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm) struct pcf2127 *pcf2127 = dev_get_drvdata(dev); unsigned char buf[10]; int ret; - int i; - for (i = 0; i <= PCF2127_REG_CTRL3; i++) { - ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL1 + i, - (unsigned int *)(buf + i)); - if (ret) { - dev_err(dev, "%s: read error\n", __func__); - return ret; - } - } - - ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_SC, - (buf + PCF2127_REG_SC), - ARRAY_SIZE(buf) - PCF2127_REG_SC); + /* + * Avoid reading CTRL2 register as it causes WD_VAL register + * value to reset to 0 which means watchdog is stopped. + */ + ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_CTRL3, + (buf + PCF2127_REG_CTRL3), + ARRAY_SIZE(buf) - PCF2127_REG_CTRL3); if (ret) { dev_err(dev, "%s: read error\n", __func__); return ret; @@ -86,14 +80,12 @@ static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm) } dev_dbg(dev, - "%s: raw data is cr1=%02x, cr2=%02x, cr3=%02x, " - "sec=%02x, min=%02x, hr=%02x, " + "%s: raw data is cr3=%02x, sec=%02x, min=%02x, hr=%02x, " "mday=%02x, wday=%02x, mon=%02x, year=%02x\n", - __func__, - buf[0], buf[1], buf[2], - buf[3], buf[4], buf[5], - buf[6], buf[7], buf[8], buf[9]); - + __func__, buf[PCF2127_REG_CTRL3], buf[PCF2127_REG_SC], + buf[PCF2127_REG_MN], buf[PCF2127_REG_HR], + buf[PCF2127_REG_DM], buf[PCF2127_REG_DW], + buf[PCF2127_REG_MO], buf[PCF2127_REG_YR]); tm->tm_sec = bcd2bin(buf[PCF2127_REG_SC] & 0x7F); tm->tm_min = bcd2bin(buf[PCF2127_REG_MN] & 0x7F); diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c index 3c8c6f942e67..a06792966ea9 100644 --- a/drivers/rtc/rtc-pcf8523.c +++ b/drivers/rtc/rtc-pcf8523.c @@ -94,8 +94,9 @@ static int pcf8523_voltage_low(struct i2c_client *client) return !!(value & REG_CONTROL3_BLF); } -static int pcf8523_select_capacitance(struct i2c_client *client, bool high) +static int pcf8523_load_capacitance(struct i2c_client *client) { + u32 load; u8 value; int err; @@ -103,14 +104,24 @@ static int pcf8523_select_capacitance(struct i2c_client *client, bool high) if (err < 0) return err; - if (!high) - value &= ~REG_CONTROL1_CAP_SEL; - else + load = 12500; + of_property_read_u32(client->dev.of_node, "quartz-load-femtofarads", + &load); + + switch (load) { + default: + dev_warn(&client->dev, "Unknown quartz-load-femtofarads value: %d. Assuming 12500", + load); + /* fall through */ + case 12500: value |= REG_CONTROL1_CAP_SEL; + break; + case 7000: + value &= ~REG_CONTROL1_CAP_SEL; + break; + } err = pcf8523_write(client, REG_CONTROL1, value); - if (err < 0) - return err; return err; } @@ -307,9 +318,10 @@ static int pcf8523_probe(struct i2c_client *client, if (!pcf) return -ENOMEM; - err = pcf8523_select_capacitance(client, true); + err = pcf8523_load_capacitance(client); if (err < 0) - return err; + dev_warn(&client->dev, "failed to set xtal load capacitance: %d", + err); err = pcf8523_set_pm(client, 0); if (err < 0) diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c index 8c836c51a508..4d0b81f9805f 100644 --- a/drivers/rtc/rtc-pcf8563.c +++ b/drivers/rtc/rtc-pcf8563.c @@ -563,7 +563,6 @@ static int pcf8563_probe(struct i2c_client *client, struct pcf8563 *pcf8563; int err; unsigned char buf; - unsigned char alm_pending; dev_dbg(&client->dev, "%s\n", __func__); @@ -587,13 +586,13 @@ static int pcf8563_probe(struct i2c_client *client, return err; } - err = pcf8563_get_alarm_mode(client, NULL, &alm_pending); - if (err) { - dev_err(&client->dev, "%s: read error\n", __func__); + /* Clear flags and disable interrupts */ + buf = 0; + err = pcf8563_write_block_data(client, PCF8563_REG_ST2, 1, &buf); + if (err < 0) { + dev_err(&client->dev, "%s: write error\n", __func__); return err; } - if (alm_pending) - pcf8563_set_alarm_mode(client, 0); pcf8563->rtc = devm_rtc_device_register(&client->dev, pcf8563_driver.driver.name, @@ -605,7 +604,7 @@ static int pcf8563_probe(struct i2c_client *client, if (client->irq > 0) { err = devm_request_threaded_irq(&client->dev, client->irq, NULL, pcf8563_irq, - IRQF_SHARED|IRQF_ONESHOT|IRQF_TRIGGER_FALLING, + IRQF_SHARED | IRQF_ONESHOT | IRQF_TRIGGER_LOW, pcf8563_driver.driver.name, client); if (err) { dev_err(&client->dev, "unable to request IRQ %d\n", diff --git a/drivers/rtc/rtc-pl030.c b/drivers/rtc/rtc-pl030.c index f85a1a93e669..343bb6ed1783 100644 --- a/drivers/rtc/rtc-pl030.c +++ b/drivers/rtc/rtc-pl030.c @@ -112,6 +112,13 @@ static int pl030_probe(struct amba_device *dev, const struct amba_id *id) goto err_rtc; } + rtc->rtc = devm_rtc_allocate_device(&dev->dev); + if (IS_ERR(rtc->rtc)) { + ret = PTR_ERR(rtc->rtc); + goto err_rtc; + } + + rtc->rtc->ops = &pl030_ops; rtc->base = ioremap(dev->res.start, resource_size(&dev->res)); if (!rtc->base) { ret = -ENOMEM; @@ -128,12 +135,9 @@ static int pl030_probe(struct amba_device *dev, const struct amba_id *id) if (ret) goto err_irq; - rtc->rtc = rtc_device_register("pl030", &dev->dev, &pl030_ops, - THIS_MODULE); - if (IS_ERR(rtc->rtc)) { - ret = PTR_ERR(rtc->rtc); + ret = rtc_register_device(rtc->rtc); + if (ret) goto err_reg; - } return 0; @@ -154,7 +158,6 @@ static int pl030_remove(struct amba_device *dev) writel(0, rtc->base + RTC_CR); free_irq(dev->irq[0], rtc); - rtc_device_unregister(rtc->rtc); iounmap(rtc->base); amba_release_regions(dev); diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c index fac835530671..a1b4b0ed1f19 100644 --- a/drivers/rtc/rtc-pm8xxx.c +++ b/drivers/rtc/rtc-pm8xxx.c @@ -186,7 +186,8 @@ static int pm8xxx_rtc_read_time(struct device *dev, struct rtc_time *tm) } } - secs = value[0] | (value[1] << 8) | (value[2] << 16) | (value[3] << 24); + secs = value[0] | (value[1] << 8) | (value[2] << 16) | + ((unsigned long)value[3] << 24); rtc_time_to_tm(secs, tm); @@ -267,7 +268,8 @@ static int pm8xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) return rc; } - secs = value[0] | (value[1] << 8) | (value[2] << 16) | (value[3] << 24); + secs = value[0] | (value[1] << 8) | (value[2] << 16) | + ((unsigned long)value[3] << 24); rtc_time_to_tm(secs, &alarm->time); diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c index aae2576741a6..6e06fb3b0b92 100644 --- a/drivers/rtc/rtc-rv8803.c +++ b/drivers/rtc/rtc-rv8803.c @@ -622,7 +622,7 @@ MODULE_DEVICE_TABLE(i2c, rv8803_id); static const struct of_device_id rv8803_of_match[] = { { .compatible = "microcrystal,rv8803", - .data = (void *)rx_8900 + .data = (void *)rv_8803 }, { .compatible = "epson,rx8900", diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c index 7067bca5c20d..6bfff0a6d655 100644 --- a/drivers/rtc/rtc-s35390a.c +++ b/drivers/rtc/rtc-s35390a.c @@ -108,7 +108,7 @@ static int s35390a_get_reg(struct s35390a *s35390a, int reg, char *buf, int len) static int s35390a_init(struct s35390a *s35390a) { - char buf; + u8 buf; int ret; unsigned initcount = 0; diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index a8992c227f61..4120a305954a 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -327,7 +327,6 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) struct rtc_time *tm = &alrm->time; unsigned int alrm_en; int ret; - int year = tm->tm_year - 100; dev_dbg(dev, "s3c_rtc_setalarm: %d, %04d.%02d.%02d %02d:%02d:%02d\n", alrm->enabled, @@ -356,11 +355,6 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) writeb(bin2bcd(tm->tm_hour), info->base + S3C2410_ALMHOUR); } - if (year < 100 && year >= 0) { - alrm_en |= S3C2410_RTCALM_YEAREN; - writeb(bin2bcd(year), info->base + S3C2410_ALMYEAR); - } - if (tm->tm_mon < 12 && tm->tm_mon >= 0) { alrm_en |= S3C2410_RTCALM_MONEN; writeb(bin2bcd(tm->tm_mon + 1), info->base + S3C2410_ALMMON); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 0d5e2d92e05b..aa651403546f 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -1133,7 +1133,8 @@ static u32 get_fcx_max_data(struct dasd_device *device) { struct dasd_eckd_private *private = device->private; int fcx_in_css, fcx_in_gneq, fcx_in_features; - int tpm, mdc; + unsigned int mdc; + int tpm; if (dasd_nofcx) return 0; @@ -1147,7 +1148,7 @@ static u32 get_fcx_max_data(struct dasd_device *device) return 0; mdc = ccw_device_get_mdc(device->cdev, 0); - if (mdc < 0) { + if (mdc == 0) { dev_warn(&device->cdev->dev, "Detecting the maximum supported data size for zHPF requests failed\n"); return 0; } else { @@ -1158,12 +1159,12 @@ static u32 get_fcx_max_data(struct dasd_device *device) static int verify_fcx_max_data(struct dasd_device *device, __u8 lpm) { struct dasd_eckd_private *private = device->private; - int mdc; + unsigned int mdc; u32 fcx_max_data; if (private->fcx_max_data) { mdc = ccw_device_get_mdc(device->cdev, lpm); - if ((mdc < 0)) { + if (mdc == 0) { dev_warn(&device->cdev->dev, "Detecting the maximum data size for zHPF " "requests failed (rc=%d) for a new path %x\n", @@ -1767,7 +1768,7 @@ out_err2: dasd_free_block(device->block); device->block = NULL; out_err1: - kfree(private->conf_data); + dasd_eckd_clear_conf_data(device); kfree(device->private); device->private = NULL; return rc; @@ -1776,7 +1777,6 @@ out_err1: static void dasd_eckd_uncheck_device(struct dasd_device *device) { struct dasd_eckd_private *private = device->private; - int i; if (!private) return; @@ -1786,21 +1786,7 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device) private->sneq = NULL; private->vdsneq = NULL; private->gneq = NULL; - private->conf_len = 0; - for (i = 0; i < 8; i++) { - kfree(device->path[i].conf_data); - if ((__u8 *)device->path[i].conf_data == - private->conf_data) { - private->conf_data = NULL; - private->conf_len = 0; - } - device->path[i].conf_data = NULL; - device->path[i].cssid = 0; - device->path[i].ssid = 0; - device->path[i].chpid = 0; - } - kfree(private->conf_data); - private->conf_data = NULL; + dasd_eckd_clear_conf_data(device); } static struct dasd_ccw_req * diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index 2a3f874a21d5..9cebff8e8d74 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -303,8 +303,10 @@ static void * cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset) { struct ccwdev_iter *iter; + loff_t p = *offset; - if (*offset >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) + (*offset)++; + if (p >= (__MAX_SUBCHANNEL + 1) * (__MAX_SSID + 1)) return NULL; iter = it; if (iter->devno == __MAX_SUBCHANNEL) { @@ -314,7 +316,6 @@ cio_ignore_proc_seq_next(struct seq_file *s, void *it, loff_t *offset) return NULL; } else iter->devno++; - (*offset)++; return iter; } diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index b22922ec32d1..474afec9ab87 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -595,7 +595,7 @@ EXPORT_SYMBOL(ccw_device_tm_start_timeout); * @mask: mask of paths to use * * Return the number of 64K-bytes blocks all paths at least support - * for a transport command. Return values <= 0 indicate failures. + * for a transport command. Return value 0 indicates failure. */ int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask) { diff --git a/drivers/s390/crypto/zcrypt_error.h b/drivers/s390/crypto/zcrypt_error.h index 9499cd3a05f8..02a936db0092 100644 --- a/drivers/s390/crypto/zcrypt_error.h +++ b/drivers/s390/crypto/zcrypt_error.h @@ -75,6 +75,7 @@ struct error_hdr { #define REP82_ERROR_EVEN_MOD_IN_OPND 0x85 #define REP82_ERROR_RESERVED_FIELD 0x88 #define REP82_ERROR_INVALID_DOMAIN_PENDING 0x8A +#define REP82_ERROR_FILTERED_BY_HYPERVISOR 0x8B #define REP82_ERROR_TRANSPORT_FAIL 0x90 #define REP82_ERROR_PACKET_TRUNCATED 0xA0 #define REP82_ERROR_ZERO_BUFFER_LEN 0xB0 @@ -105,6 +106,7 @@ static inline int convert_error(struct zcrypt_queue *zq, case REP82_ERROR_INVALID_DOMAIN_PRECHECK: case REP82_ERROR_INVALID_DOMAIN_PENDING: case REP82_ERROR_INVALID_SPECIAL_CMD: + case REP82_ERROR_FILTERED_BY_HYPERVISOR: // REP88_ERROR_INVALID_KEY // '82' CEX2A // REP88_ERROR_OPERAND // '84' CEX2A // REP88_ERROR_OPERAND_EVEN_MOD // '85' CEX2A diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 2845316db554..6fa07c246915 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -869,7 +869,10 @@ static int __qeth_l2_open(struct net_device *dev) if (qdio_stop_irq(card->data.ccwdev, 0) >= 0) { napi_enable(&card->napi); + local_bh_disable(); napi_schedule(&card->napi); + /* kick-start the NAPI softirq: */ + local_bh_enable(); } else rc = -EIO; return rc; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index d9830c86d0c1..8bccfd686b73 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2849,7 +2849,10 @@ static int __qeth_l3_open(struct net_device *dev) if (qdio_stop_irq(card->data.ccwdev, 0) >= 0) { napi_enable(&card->napi); + local_bh_disable(); napi_schedule(&card->napi); + /* kick-start the NAPI softirq: */ + local_bh_enable(); } else rc = -EIO; return rc; diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c index 599447032e50..bc6c1d6a1c42 100644 --- a/drivers/s390/scsi/zfcp_dbf.c +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -94,11 +94,9 @@ void zfcp_dbf_hba_fsf_res(char *tag, int level, struct zfcp_fsf_req *req) memcpy(rec->u.res.fsf_status_qual, &q_head->fsf_status_qual, FSF_STATUS_QUALIFIER_SIZE); - if (req->fsf_command != FSF_QTCB_FCP_CMND) { - rec->pl_len = q_head->log_length; - zfcp_dbf_pl_write(dbf, (char *)q_pref + q_head->log_start, - rec->pl_len, "fsf_res", req->req_id); - } + rec->pl_len = q_head->log_length; + zfcp_dbf_pl_write(dbf, (char *)q_pref + q_head->log_start, + rec->pl_len, "fsf_res", req->req_id); debug_event(dbf->hba, level, rec, sizeof(*rec)); spin_unlock_irqrestore(&dbf->hba_lock, flags); diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 64d70de98cdb..8f90e4cea254 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -179,9 +179,6 @@ static int zfcp_erp_handle_failed(int want, struct zfcp_adapter *adapter, adapter, ZFCP_STATUS_COMMON_ERP_FAILED); } break; - default: - need = 0; - break; } return need; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 00fb98f7b2cd..94d1bcc83fa2 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -21,6 +21,11 @@ struct kmem_cache *zfcp_fsf_qtcb_cache; +static bool ber_stop = true; +module_param(ber_stop, bool, 0600); +MODULE_PARM_DESC(ber_stop, + "Shuts down FCP devices for FCP channels that report a bit-error count in excess of its threshold (default on)"); + static void zfcp_fsf_request_timeout_handler(unsigned long data) { struct zfcp_adapter *adapter = (struct zfcp_adapter *) data; @@ -230,10 +235,15 @@ static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req) case FSF_STATUS_READ_SENSE_DATA_AVAIL: break; case FSF_STATUS_READ_BIT_ERROR_THRESHOLD: - dev_warn(&adapter->ccw_device->dev, - "The error threshold for checksum statistics " - "has been exceeded\n"); zfcp_dbf_hba_bit_err("fssrh_3", req); + if (ber_stop) { + dev_warn(&adapter->ccw_device->dev, + "All paths over this FCP device are disused because of excessive bit errors\n"); + zfcp_erp_adapter_shutdown(adapter, 0, "fssrh_b"); + } else { + dev_warn(&adapter->ccw_device->dev, + "The error threshold for checksum statistics has been exceeded\n"); + } break; case FSF_STATUS_READ_LINK_DOWN: zfcp_fsf_status_read_link_down(req); diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 41366339b950..881906dc33b8 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -966,7 +966,7 @@ config SCSI_SNI_53C710 config 53C700_LE_ON_BE bool - depends on SCSI_LASI700 + depends on SCSI_LASI700 || SCSI_SNI_53C710 default y config SCSI_STEX diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c index 8caa51797511..79b0b4eece19 100644 --- a/drivers/scsi/NCR5380.c +++ b/drivers/scsi/NCR5380.c @@ -129,8 +129,12 @@ #define NCR5380_release_dma_irq(x) #endif +static unsigned int disconnect_mask = ~0; +module_param(disconnect_mask, int, 0444); + static int do_abort(struct Scsi_Host *); static void do_reset(struct Scsi_Host *); +static void bus_reset_cleanup(struct Scsi_Host *); /** * initialize_SCp - init the scsi pointer field @@ -513,16 +517,15 @@ static void complete_cmd(struct Scsi_Host *instance, if (hostdata->sensing == cmd) { /* Autosense processing ends here */ - if ((cmd->result & 0xff) != SAM_STAT_GOOD) { + if (status_byte(cmd->result) != GOOD) { scsi_eh_restore_cmnd(cmd, &hostdata->ses); - set_host_byte(cmd, DID_ERROR); - } else + } else { scsi_eh_restore_cmnd(cmd, &hostdata->ses); + set_driver_byte(cmd, DRIVER_SENSE); + } hostdata->sensing = NULL; } - hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun); - cmd->scsi_done(cmd); } @@ -886,7 +889,14 @@ static irqreturn_t __maybe_unused NCR5380_intr(int irq, void *dev_id) /* Probably Bus Reset */ NCR5380_read(RESET_PARITY_INTERRUPT_REG); - dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n"); + if (sr & SR_RST) { + /* Certainly Bus Reset */ + shost_printk(KERN_WARNING, instance, + "bus reset interrupt\n"); + bus_reset_cleanup(instance); + } else { + dsprintk(NDEBUG_INTR, instance, "unknown interrupt\n"); + } #ifdef SUN3_SCSI_VME dregs->csr |= CSR_DMA_ENABLE; #endif @@ -904,20 +914,16 @@ static irqreturn_t __maybe_unused NCR5380_intr(int irq, void *dev_id) return IRQ_RETVAL(handled); } -/* - * Function : int NCR5380_select(struct Scsi_Host *instance, - * struct scsi_cmnd *cmd) +/** + * NCR5380_select - attempt arbitration and selection for a given command + * @instance: the Scsi_Host instance + * @cmd: the scsi_cmnd to execute * - * Purpose : establishes I_T_L or I_T_L_Q nexus for new or existing command, - * including ARBITRATION, SELECTION, and initial message out for - * IDENTIFY and queue messages. + * This routine establishes an I_T_L nexus for a SCSI command. This involves + * ARBITRATION, SELECTION and MESSAGE OUT phases and an IDENTIFY message. * - * Inputs : instance - instantiation of the 5380 driver on which this - * target lives, cmd - SCSI command to execute. - * - * Returns cmd if selection failed but should be retried, - * NULL if selection failed and should not be retried, or - * NULL if selection succeeded (hostdata->connected == cmd). + * Returns true if the operation should be retried. + * Returns false if it should not be retried. * * Side effects : * If bus busy, arbitration failed, etc, NCR5380_select() will exit @@ -925,16 +931,15 @@ static irqreturn_t __maybe_unused NCR5380_intr(int irq, void *dev_id) * SELECT_ENABLE will be set appropriately, the NCR5380 * will cease to drive any SCSI bus signals. * - * If successful : I_T_L or I_T_L_Q nexus will be established, - * instance->connected will be set to cmd. + * If successful : the I_T_L nexus will be established, and + * hostdata->connected will be set to cmd. * SELECT interrupt will be disabled. * * If failed (no target) : cmd->scsi_done() will be called, and the * cmd->result host byte set to DID_BAD_TARGET. */ -static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, - struct scsi_cmnd *cmd) +static bool NCR5380_select(struct Scsi_Host *instance, struct scsi_cmnd *cmd) __releases(&hostdata->lock) __acquires(&hostdata->lock) { struct NCR5380_hostdata *hostdata = shost_priv(instance); @@ -942,6 +947,10 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, unsigned char *data; int len; int err; + bool ret = true; + bool can_disconnect = instance->irq != NO_IRQ && + cmd->cmnd[0] != REQUEST_SENSE && + (disconnect_mask & BIT(scmd_id(cmd))); NCR5380_dprint(NDEBUG_ARBITRATION, instance); dsprintk(NDEBUG_ARBITRATION, instance, "starting arbitration, id = %d\n", @@ -950,7 +959,7 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, /* * Arbitration and selection phases are slow and involve dropping the * lock, so we have to watch out for EH. An exception handler may - * change 'selecting' to NULL. This function will then return NULL + * change 'selecting' to NULL. This function will then return false * so that the caller will forget about 'cmd'. (During information * transfer phases, EH may change 'connected' to NULL.) */ @@ -986,7 +995,7 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, if (!hostdata->selecting) { /* Command was aborted */ NCR5380_write(MODE_REG, MR_BASE); - return NULL; + return false; } if (err < 0) { NCR5380_write(MODE_REG, MR_BASE); @@ -1035,7 +1044,7 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, if (!hostdata->selecting) { NCR5380_write(MODE_REG, MR_BASE); NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE); - return NULL; + return false; } dsprintk(NDEBUG_ARBITRATION, instance, "won arbitration\n"); @@ -1118,13 +1127,13 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, /* Can't touch cmd if it has been reclaimed by the scsi ML */ if (!hostdata->selecting) - return NULL; + return false; cmd->result = DID_BAD_TARGET << 16; complete_cmd(instance, cmd); dsprintk(NDEBUG_SELECTION, instance, "target did not respond within 250ms\n"); - cmd = NULL; + ret = false; goto out; } @@ -1156,12 +1165,12 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, } if (!hostdata->selecting) { do_abort(instance); - return NULL; + return false; } dsprintk(NDEBUG_SELECTION, instance, "target %d selected, going into MESSAGE OUT phase.\n", scmd_id(cmd)); - tmp[0] = IDENTIFY(((instance->irq == NO_IRQ) ? 0 : 1), cmd->device->lun); + tmp[0] = IDENTIFY(can_disconnect, cmd->device->lun); len = 1; data = tmp; @@ -1172,7 +1181,7 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, cmd->result = DID_ERROR << 16; complete_cmd(instance, cmd); dsprintk(NDEBUG_SELECTION, instance, "IDENTIFY message transfer failed\n"); - cmd = NULL; + ret = false; goto out; } @@ -1187,13 +1196,13 @@ static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *instance, initialize_SCp(cmd); - cmd = NULL; + ret = false; out: if (!hostdata->selecting) return NULL; hostdata->selecting = NULL; - return cmd; + return ret; } /* @@ -1712,6 +1721,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) cmd->result = DID_ERROR << 16; complete_cmd(instance, cmd); hostdata->connected = NULL; + hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun); return; #endif case PHASE_DATAIN: @@ -1794,6 +1804,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) cmd, scmd_id(cmd), cmd->device->lun); hostdata->connected = NULL; + hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun); cmd->result &= ~0xffff; cmd->result |= cmd->SCp.Status; @@ -1953,6 +1964,7 @@ static void NCR5380_information_transfer(struct Scsi_Host *instance) NCR5380_transfer_pio(instance, &phase, &len, &data); if (msgout == ABORT) { hostdata->connected = NULL; + hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun); cmd->result = DID_ERROR << 16; complete_cmd(instance, cmd); maybe_release_dma_irq(instance); @@ -2015,8 +2027,11 @@ static void NCR5380_reselect(struct Scsi_Host *instance) NCR5380_write(MODE_REG, MR_BASE); target_mask = NCR5380_read(CURRENT_SCSI_DATA_REG) & ~(hostdata->id_mask); - - dsprintk(NDEBUG_RESELECTION, instance, "reselect\n"); + if (!target_mask || target_mask & (target_mask - 1)) { + shost_printk(KERN_WARNING, instance, + "reselect: bad target_mask 0x%02x\n", target_mask); + return; + } /* * At this point, we have detected that our SCSI ID is on the bus, @@ -2030,6 +2045,7 @@ static void NCR5380_reselect(struct Scsi_Host *instance) NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE | ICR_ASSERT_BSY); if (NCR5380_poll_politely(hostdata, STATUS_REG, SR_SEL, 0, 2 * HZ) < 0) { + shost_printk(KERN_ERR, instance, "reselect: !SEL timeout\n"); NCR5380_write(INITIATOR_COMMAND_REG, ICR_BASE); return; } @@ -2041,6 +2057,10 @@ static void NCR5380_reselect(struct Scsi_Host *instance) if (NCR5380_poll_politely(hostdata, STATUS_REG, SR_REQ, SR_REQ, 2 * HZ) < 0) { + if ((NCR5380_read(STATUS_REG) & (SR_BSY | SR_SEL)) == 0) + /* BUS FREE phase */ + return; + shost_printk(KERN_ERR, instance, "reselect: REQ timeout\n"); do_abort(instance); return; } @@ -2102,13 +2122,16 @@ static void NCR5380_reselect(struct Scsi_Host *instance) dsprintk(NDEBUG_RESELECTION | NDEBUG_QUEUES, instance, "reselect: removed %p from disconnected queue\n", tmp); } else { + int target = ffs(target_mask) - 1; + shost_printk(KERN_ERR, instance, "target bitmask 0x%02x lun %d not in disconnected queue.\n", target_mask, lun); /* * Since we have an established nexus that we can't do anything * with, we must abort it. */ - do_abort(instance); + if (do_abort(instance) == 0) + hostdata->busy[target] &= ~(1 << lun); return; } @@ -2273,15 +2296,16 @@ static int NCR5380_abort(struct scsi_cmnd *cmd) if (list_del_cmd(&hostdata->autosense, cmd)) { dsprintk(NDEBUG_ABORT, instance, "abort: removed %p from sense queue\n", cmd); - set_host_byte(cmd, DID_ERROR); complete_cmd(instance, cmd); } out: if (result == FAILED) dsprintk(NDEBUG_ABORT, instance, "abort: failed to abort %p\n", cmd); - else + else { + hostdata->busy[scmd_id(cmd)] &= ~(1 << cmd->device->lun); dsprintk(NDEBUG_ABORT, instance, "abort: successfully aborted %p\n", cmd); + } queue_work(hostdata->work_q, &hostdata->main_task); maybe_release_dma_irq(instance); @@ -2291,31 +2315,12 @@ out: } -/** - * NCR5380_host_reset - reset the SCSI host - * @cmd: SCSI command undergoing EH - * - * Returns SUCCESS - */ - -static int NCR5380_host_reset(struct scsi_cmnd *cmd) +static void bus_reset_cleanup(struct Scsi_Host *instance) { - struct Scsi_Host *instance = cmd->device->host; struct NCR5380_hostdata *hostdata = shost_priv(instance); int i; - unsigned long flags; struct NCR5380_cmd *ncmd; - spin_lock_irqsave(&hostdata->lock, flags); - -#if (NDEBUG & NDEBUG_ANY) - scmd_printk(KERN_INFO, cmd, __func__); -#endif - NCR5380_dprint(NDEBUG_ANY, instance); - NCR5380_dprint_phase(NDEBUG_ANY, instance); - - do_reset(instance); - /* reset NCR registers */ NCR5380_write(MODE_REG, MR_BASE); NCR5380_write(TARGET_COMMAND_REG, 0); @@ -2327,11 +2332,6 @@ static int NCR5380_host_reset(struct scsi_cmnd *cmd) * commands! */ - if (list_del_cmd(&hostdata->unissued, cmd)) { - cmd->result = DID_RESET << 16; - cmd->scsi_done(cmd); - } - if (hostdata->selecting) { hostdata->selecting->result = DID_RESET << 16; complete_cmd(instance, hostdata->selecting); @@ -2349,7 +2349,6 @@ static int NCR5380_host_reset(struct scsi_cmnd *cmd) list_for_each_entry(ncmd, &hostdata->autosense, list) { struct scsi_cmnd *cmd = NCR5380_to_scmd(ncmd); - set_host_byte(cmd, DID_RESET); cmd->scsi_done(cmd); } INIT_LIST_HEAD(&hostdata->autosense); @@ -2366,6 +2365,41 @@ static int NCR5380_host_reset(struct scsi_cmnd *cmd) queue_work(hostdata->work_q, &hostdata->main_task); maybe_release_dma_irq(instance); +} + +/** + * NCR5380_host_reset - reset the SCSI host + * @cmd: SCSI command undergoing EH + * + * Returns SUCCESS + */ + +static int NCR5380_host_reset(struct scsi_cmnd *cmd) +{ + struct Scsi_Host *instance = cmd->device->host; + struct NCR5380_hostdata *hostdata = shost_priv(instance); + unsigned long flags; + struct NCR5380_cmd *ncmd; + + spin_lock_irqsave(&hostdata->lock, flags); + +#if (NDEBUG & NDEBUG_ANY) + shost_printk(KERN_INFO, instance, __func__); +#endif + NCR5380_dprint(NDEBUG_ANY, instance); + NCR5380_dprint_phase(NDEBUG_ANY, instance); + + list_for_each_entry(ncmd, &hostdata->unissued, list) { + struct scsi_cmnd *scmd = NCR5380_to_scmd(ncmd); + + scmd->result = DID_RESET << 16; + scmd->scsi_done(scmd); + } + INIT_LIST_HEAD(&hostdata->unissued); + + do_reset(instance); + bus_reset_cleanup(instance); + spin_unlock_irqrestore(&hostdata->lock, flags); return SUCCESS; diff --git a/drivers/scsi/NCR5380.h b/drivers/scsi/NCR5380.h index 8a6d002e6789..5935fd6d1a05 100644 --- a/drivers/scsi/NCR5380.h +++ b/drivers/scsi/NCR5380.h @@ -275,7 +275,7 @@ static irqreturn_t NCR5380_intr(int irq, void *dev_id); static void NCR5380_main(struct work_struct *work); static const char *NCR5380_info(struct Scsi_Host *instance); static void NCR5380_reselect(struct Scsi_Host *instance); -static struct scsi_cmnd *NCR5380_select(struct Scsi_Host *, struct scsi_cmnd *); +static bool NCR5380_select(struct Scsi_Host *, struct scsi_cmnd *); static int NCR5380_transfer_dma(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data); static int NCR5380_transfer_pio(struct Scsi_Host *instance, unsigned char *phase, int *count, unsigned char **data); static int NCR5380_poll_politely2(struct NCR5380_hostdata *, diff --git a/drivers/scsi/aic7xxx/aic7xxx_core.c b/drivers/scsi/aic7xxx/aic7xxx_core.c index 381846164003..fdbb0a3dc9b4 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_core.c +++ b/drivers/scsi/aic7xxx/aic7xxx_core.c @@ -2321,7 +2321,7 @@ ahc_find_syncrate(struct ahc_softc *ahc, u_int *period, * At some speeds, we only support * ST transfers. */ - if ((syncrate->sxfr_u2 & ST_SXFR) != 0) + if ((syncrate->sxfr_u2 & ST_SXFR) != 0) *ppr_options &= ~MSG_EXT_PPR_DT_REQ; break; } diff --git a/drivers/scsi/atari_scsi.c b/drivers/scsi/atari_scsi.c index 89f5154c40b6..764c46d7333e 100644 --- a/drivers/scsi/atari_scsi.c +++ b/drivers/scsi/atari_scsi.c @@ -742,7 +742,7 @@ static int __init atari_scsi_probe(struct platform_device *pdev) atari_scsi_template.sg_tablesize = SG_ALL; } else { atari_scsi_template.can_queue = 1; - atari_scsi_template.sg_tablesize = SG_NONE; + atari_scsi_template.sg_tablesize = 1; } if (setup_can_queue > 0) @@ -751,8 +751,8 @@ static int __init atari_scsi_probe(struct platform_device *pdev) if (setup_cmd_per_lun > 0) atari_scsi_template.cmd_per_lun = setup_cmd_per_lun; - /* Leave sg_tablesize at 0 on a Falcon! */ - if (ATARIHW_PRESENT(TT_SCSI) && setup_sg_tablesize >= 0) + /* Don't increase sg_tablesize on Falcon! */ + if (ATARIHW_PRESENT(TT_SCSI) && setup_sg_tablesize > 0) atari_scsi_template.sg_tablesize = setup_sg_tablesize; if (setup_hostid >= 0) { diff --git a/drivers/scsi/bfa/bfad_attr.c b/drivers/scsi/bfa/bfad_attr.c index d0a504af5b4f..0a70d54a4df6 100644 --- a/drivers/scsi/bfa/bfad_attr.c +++ b/drivers/scsi/bfa/bfad_attr.c @@ -283,8 +283,10 @@ bfad_im_get_stats(struct Scsi_Host *shost) rc = bfa_port_get_stats(BFA_FCPORT(&bfad->bfa), fcstats, bfad_hcb_comp, &fcomp); spin_unlock_irqrestore(&bfad->bfad_lock, flags); - if (rc != BFA_STATUS_OK) + if (rc != BFA_STATUS_OK) { + kfree(fcstats); return NULL; + } wait_for_completion(&fcomp.comp); diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index 03c104b47f31..b832bd0ce202 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -915,12 +915,12 @@ void bnx2i_free_hba(struct bnx2i_hba *hba) INIT_LIST_HEAD(&hba->ep_ofld_list); INIT_LIST_HEAD(&hba->ep_active_list); INIT_LIST_HEAD(&hba->ep_destroy_list); - pci_dev_put(hba->pcidev); if (hba->regview) { pci_iounmap(hba->pcidev, hba->regview); hba->regview = NULL; } + pci_dev_put(hba->pcidev); bnx2i_free_mp_bdt(hba); bnx2i_release_free_cid_que(hba); iscsi_host_free(shost); diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c index c535c52e72e5..3f7c25d104fe 100644 --- a/drivers/scsi/ch.c +++ b/drivers/scsi/ch.c @@ -578,7 +578,6 @@ ch_release(struct inode *inode, struct file *file) scsi_changer *ch = file->private_data; scsi_device_put(ch->device); - ch->device = NULL; file->private_data = NULL; kref_put(&ch->ref, ch_destroy); return 0; diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index 28a9c7d706cb..03c7b1603dbc 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -649,7 +649,7 @@ csio_shost_init(struct csio_hw *hw, struct device *dev, if (csio_lnode_init(ln, hw, pln)) goto err_shost_put; - if (scsi_add_host(shost, dev)) + if (scsi_add_host_with_dma(shost, dev, &hw->pdev->dev)) goto err_lnode_exit; return ln; diff --git a/drivers/scsi/csiostor/csio_lnode.c b/drivers/scsi/csiostor/csio_lnode.c index be5ee2d37815..957767d38361 100644 --- a/drivers/scsi/csiostor/csio_lnode.c +++ b/drivers/scsi/csiostor/csio_lnode.c @@ -301,6 +301,7 @@ csio_ln_fdmi_rhba_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req) struct fc_fdmi_port_name *port_name; uint8_t buf[64]; uint8_t *fc4_type; + unsigned long flags; if (fdmi_req->wr_status != FW_SUCCESS) { csio_ln_dbg(ln, "WR error:%x in processing fdmi rhba cmd\n", @@ -377,13 +378,13 @@ csio_ln_fdmi_rhba_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req) len = (uint32_t)(pld - (uint8_t *)cmd); /* Submit FDMI RPA request */ - spin_lock_irq(&hw->lock); + spin_lock_irqsave(&hw->lock, flags); if (csio_ln_mgmt_submit_req(fdmi_req, csio_ln_fdmi_done, FCOE_CT, &fdmi_req->dma_buf, len)) { CSIO_INC_STATS(ln, n_fdmi_err); csio_ln_dbg(ln, "Failed to issue fdmi rpa req\n"); } - spin_unlock_irq(&hw->lock); + spin_unlock_irqrestore(&hw->lock, flags); } /* @@ -404,6 +405,7 @@ csio_ln_fdmi_dprt_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req) struct fc_fdmi_rpl *reg_pl; struct fs_fdmi_attrs *attrib_blk; uint8_t buf[64]; + unsigned long flags; if (fdmi_req->wr_status != FW_SUCCESS) { csio_ln_dbg(ln, "WR error:%x in processing fdmi dprt cmd\n", @@ -483,13 +485,13 @@ csio_ln_fdmi_dprt_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req) attrib_blk->numattrs = htonl(numattrs); /* Submit FDMI RHBA request */ - spin_lock_irq(&hw->lock); + spin_lock_irqsave(&hw->lock, flags); if (csio_ln_mgmt_submit_req(fdmi_req, csio_ln_fdmi_rhba_cbfn, FCOE_CT, &fdmi_req->dma_buf, len)) { CSIO_INC_STATS(ln, n_fdmi_err); csio_ln_dbg(ln, "Failed to issue fdmi rhba req\n"); } - spin_unlock_irq(&hw->lock); + spin_unlock_irqrestore(&hw->lock, flags); } /* @@ -504,6 +506,7 @@ csio_ln_fdmi_dhba_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req) void *cmd; struct fc_fdmi_port_name *port_name; uint32_t len; + unsigned long flags; if (fdmi_req->wr_status != FW_SUCCESS) { csio_ln_dbg(ln, "WR error:%x in processing fdmi dhba cmd\n", @@ -534,13 +537,13 @@ csio_ln_fdmi_dhba_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req) len += sizeof(*port_name); /* Submit FDMI request */ - spin_lock_irq(&hw->lock); + spin_lock_irqsave(&hw->lock, flags); if (csio_ln_mgmt_submit_req(fdmi_req, csio_ln_fdmi_dprt_cbfn, FCOE_CT, &fdmi_req->dma_buf, len)) { CSIO_INC_STATS(ln, n_fdmi_err); csio_ln_dbg(ln, "Failed to issue fdmi dprt req\n"); } - spin_unlock_irq(&hw->lock); + spin_unlock_irqrestore(&hw->lock, flags); } /** diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c index e09c7f360dbd..0cb585759de6 100644 --- a/drivers/scsi/csiostor/csio_scsi.c +++ b/drivers/scsi/csiostor/csio_scsi.c @@ -1383,7 +1383,7 @@ csio_device_reset(struct device *dev, return -EINVAL; /* Delete NPIV lnodes */ - csio_lnodes_exit(hw, 1); + csio_lnodes_exit(hw, 1); /* Block upper IOs */ csio_lnodes_block_request(hw); diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index 902f5e03ec94..0d45658f163a 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -121,7 +121,8 @@ static inline void cxgbi_device_destroy(struct cxgbi_device *cdev) "cdev 0x%p, p# %u.\n", cdev, cdev->nports); cxgbi_hbas_remove(cdev); cxgbi_device_portmap_cleanup(cdev); - cxgbi_ppm_release(cdev->cdev2ppm(cdev)); + if (cdev->cdev2ppm) + cxgbi_ppm_release(cdev->cdev2ppm(cdev)); if (cdev->pmap.max_connect) cxgbi_free_big_mem(cdev->pmap.port_csk); kfree(cdev); diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index 5ee7f44cf869..830b2d2dcf20 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -1972,6 +1972,11 @@ static void sg_update_list(struct ScsiReqBlk *srb, u32 left) xferred -= psge->length; } else { /* Partial SG entry done */ + pci_dma_sync_single_for_cpu(srb->dcb-> + acb->dev, + srb->sg_bus_addr, + SEGMENTX_LEN, + PCI_DMA_TODEVICE); psge->length -= xferred; psge->address += xferred; srb->sg_index = idx; @@ -3450,14 +3455,12 @@ static void srb_done(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb, } } - if (dir != PCI_DMA_NONE && scsi_sg_count(cmd)) - pci_dma_sync_sg_for_cpu(acb->dev, scsi_sglist(cmd), - scsi_sg_count(cmd), dir); - ckc_only = 0; /* Check Error Conditions */ ckc_e: + pci_unmap_srb(acb, srb); + if (cmd->cmnd[0] == INQUIRY) { unsigned char *base = NULL; struct ScsiInqData *ptr; @@ -3511,7 +3514,6 @@ static void srb_done(struct AdapterCtlBlk *acb, struct DeviceCtlBlk *dcb, cmd, cmd->result); srb_free_insert(acb, srb); } - pci_unmap_srb(acb, srb); cmd->scsi_done(cmd); waiting_process_next(acb); diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 41f5f6410163..135376ee2cbf 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -523,6 +523,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) unsigned int tpg_desc_tbl_off; unsigned char orig_transition_tmo; unsigned long flags; + bool transitioning_sense = false; if (!pg->expiry) { unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; @@ -567,13 +568,19 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) goto retry; } /* - * Retry on ALUA state transition or if any - * UNIT ATTENTION occurred. + * If the array returns with 'ALUA state transition' + * sense code here it cannot return RTPG data during + * transition. So set the state to 'transitioning' directly. */ if (sense_hdr.sense_key == NOT_READY && - sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) - err = SCSI_DH_RETRY; - else if (sense_hdr.sense_key == UNIT_ATTENTION) + sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) { + transitioning_sense = true; + goto skip_rtpg; + } + /* + * Retry on any other UNIT ATTENTION occurred. + */ + if (sense_hdr.sense_key == UNIT_ATTENTION) err = SCSI_DH_RETRY; if (err == SCSI_DH_RETRY && pg->expiry != 0 && time_before(jiffies, pg->expiry)) { @@ -661,7 +668,11 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) off = 8 + (desc[7] * 4); } + skip_rtpg: spin_lock_irqsave(&pg->lock, flags); + if (transitioning_sense) + pg->state = SCSI_ACCESS_STATE_TRANSITIONING; + sdev_printk(KERN_INFO, sdev, "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n", ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), diff --git a/drivers/scsi/esas2r/esas2r_flash.c b/drivers/scsi/esas2r/esas2r_flash.c index 7bd376d95ed5..b02ac389e6c6 100644 --- a/drivers/scsi/esas2r/esas2r_flash.c +++ b/drivers/scsi/esas2r/esas2r_flash.c @@ -1197,6 +1197,7 @@ bool esas2r_nvram_read_direct(struct esas2r_adapter *a) if (!esas2r_read_flash_block(a, a->nvram, FLS_OFFSET_NVR, sizeof(struct esas2r_sas_nvram))) { esas2r_hdebug("NVRAM read failed, using defaults"); + up(&a->nvram_semaphore); return false; } diff --git a/drivers/scsi/fnic/fnic_isr.c b/drivers/scsi/fnic/fnic_isr.c index 4e3a50202e8c..d28088218c36 100644 --- a/drivers/scsi/fnic/fnic_isr.c +++ b/drivers/scsi/fnic/fnic_isr.c @@ -254,7 +254,7 @@ int fnic_set_intr_mode(struct fnic *fnic) int vecs = n + m + o + 1; if (pci_alloc_irq_vectors(fnic->pdev, vecs, vecs, - PCI_IRQ_MSIX) < 0) { + PCI_IRQ_MSIX) == vecs) { fnic->rq_count = n; fnic->raw_wq_count = m; fnic->wq_copy_count = o; @@ -280,7 +280,7 @@ int fnic_set_intr_mode(struct fnic *fnic) fnic->wq_copy_count >= 1 && fnic->cq_count >= 3 && fnic->intr_count >= 1 && - pci_alloc_irq_vectors(fnic->pdev, 1, 1, PCI_IRQ_MSI) < 0) { + pci_alloc_irq_vectors(fnic->pdev, 1, 1, PCI_IRQ_MSI) == 1) { fnic->rq_count = 1; fnic->raw_wq_count = 1; fnic->wq_copy_count = 1; diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c index 242e2ee494a1..d79ac0b24f5a 100644 --- a/drivers/scsi/fnic/fnic_scsi.c +++ b/drivers/scsi/fnic/fnic_scsi.c @@ -446,6 +446,9 @@ static int fnic_queuecommand_lck(struct scsi_cmnd *sc, void (*done)(struct scsi_ if (unlikely(fnic_chk_state_flags_locked(fnic, FNIC_FLAGS_IO_BLOCKED))) return SCSI_MLQUEUE_HOST_BUSY; + if (unlikely(fnic_chk_state_flags_locked(fnic, FNIC_FLAGS_FWRESET))) + return SCSI_MLQUEUE_HOST_BUSY; + rport = starget_to_rport(scsi_target(sc->device)); if (!rport) { FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, diff --git a/drivers/scsi/fnic/vnic_dev.c b/drivers/scsi/fnic/vnic_dev.c index ba69d6112fa1..c5b89a003d2a 100644 --- a/drivers/scsi/fnic/vnic_dev.c +++ b/drivers/scsi/fnic/vnic_dev.c @@ -445,26 +445,26 @@ int vnic_dev_soft_reset_done(struct vnic_dev *vdev, int *done) int vnic_dev_hang_notify(struct vnic_dev *vdev) { - u64 a0, a1; + u64 a0 = 0, a1 = 0; int wait = 1000; return vnic_dev_cmd(vdev, CMD_HANG_NOTIFY, &a0, &a1, wait); } int vnic_dev_mac_addr(struct vnic_dev *vdev, u8 *mac_addr) { - u64 a0, a1; + u64 a[2] = {}; int wait = 1000; int err, i; for (i = 0; i < ETH_ALEN; i++) mac_addr[i] = 0; - err = vnic_dev_cmd(vdev, CMD_MAC_ADDR, &a0, &a1, wait); + err = vnic_dev_cmd(vdev, CMD_MAC_ADDR, &a[0], &a[1], wait); if (err) return err; for (i = 0; i < ETH_ALEN; i++) - mac_addr[i] = ((u8 *)&a0)[i]; + mac_addr[i] = ((u8 *)&a)[i]; return 0; } @@ -489,30 +489,30 @@ void vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, void vnic_dev_add_addr(struct vnic_dev *vdev, u8 *addr) { - u64 a0 = 0, a1 = 0; + u64 a[2] = {}; int wait = 1000; int err; int i; for (i = 0; i < ETH_ALEN; i++) - ((u8 *)&a0)[i] = addr[i]; + ((u8 *)&a)[i] = addr[i]; - err = vnic_dev_cmd(vdev, CMD_ADDR_ADD, &a0, &a1, wait); + err = vnic_dev_cmd(vdev, CMD_ADDR_ADD, &a[0], &a[1], wait); if (err) pr_err("Can't add addr [%pM], %d\n", addr, err); } void vnic_dev_del_addr(struct vnic_dev *vdev, u8 *addr) { - u64 a0 = 0, a1 = 0; + u64 a[2] = {}; int wait = 1000; int err; int i; for (i = 0; i < ETH_ALEN; i++) - ((u8 *)&a0)[i] = addr[i]; + ((u8 *)&a)[i] = addr[i]; - err = vnic_dev_cmd(vdev, CMD_ADDR_DEL, &a0, &a1, wait); + err = vnic_dev_cmd(vdev, CMD_ADDR_DEL, &a[0], &a[1], wait); if (err) pr_err("Can't del addr [%pM], %d\n", addr, err); } diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index 67621308eb9c..ea652f1e2071 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -3497,6 +3497,7 @@ ips_send_cmd(ips_ha_t * ha, ips_scb_t * scb) case START_STOP: scb->scsi_cmd->result = DID_OK << 16; + break; case TEST_UNIT_READY: case INQUIRY: diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index 609dafd661d1..da4583a2fa23 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -2717,9 +2717,9 @@ enum sci_status sci_controller_continue_io(struct isci_request *ireq) * the task management request. * @task_request: the handle to the task request object to start. */ -enum sci_task_status sci_controller_start_task(struct isci_host *ihost, - struct isci_remote_device *idev, - struct isci_request *ireq) +enum sci_status sci_controller_start_task(struct isci_host *ihost, + struct isci_remote_device *idev, + struct isci_request *ireq) { enum sci_status status; @@ -2728,7 +2728,7 @@ enum sci_task_status sci_controller_start_task(struct isci_host *ihost, "%s: SCIC Controller starting task from invalid " "state\n", __func__); - return SCI_TASK_FAILURE_INVALID_STATE; + return SCI_FAILURE_INVALID_STATE; } status = sci_remote_device_start_task(ihost, idev, ireq); diff --git a/drivers/scsi/isci/host.h b/drivers/scsi/isci/host.h index b3539928073c..6bc3f022630a 100644 --- a/drivers/scsi/isci/host.h +++ b/drivers/scsi/isci/host.h @@ -489,7 +489,7 @@ enum sci_status sci_controller_start_io( struct isci_remote_device *idev, struct isci_request *ireq); -enum sci_task_status sci_controller_start_task( +enum sci_status sci_controller_start_task( struct isci_host *ihost, struct isci_remote_device *idev, struct isci_request *ireq); diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index ed197bc8e801..2f151708b59a 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -1626,9 +1626,9 @@ static enum sci_status atapi_d2h_reg_frame_handler(struct isci_request *ireq, if (status == SCI_SUCCESS) { if (ireq->stp.rsp.status & ATA_ERR) - status = SCI_IO_FAILURE_RESPONSE_VALID; + status = SCI_FAILURE_IO_RESPONSE_VALID; } else { - status = SCI_IO_FAILURE_RESPONSE_VALID; + status = SCI_FAILURE_IO_RESPONSE_VALID; } if (status != SCI_SUCCESS) { diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c index 6dcaed0c1fc8..fb6eba331ac6 100644 --- a/drivers/scsi/isci/task.c +++ b/drivers/scsi/isci/task.c @@ -258,7 +258,7 @@ static int isci_task_execute_tmf(struct isci_host *ihost, struct isci_tmf *tmf, unsigned long timeout_ms) { DECLARE_COMPLETION_ONSTACK(completion); - enum sci_task_status status = SCI_TASK_FAILURE; + enum sci_status status = SCI_FAILURE; struct isci_request *ireq; int ret = TMF_RESP_FUNC_FAILED; unsigned long flags; @@ -301,7 +301,7 @@ static int isci_task_execute_tmf(struct isci_host *ihost, /* start the TMF io. */ status = sci_controller_start_task(ihost, idev, ireq); - if (status != SCI_TASK_SUCCESS) { + if (status != SCI_SUCCESS) { dev_dbg(&ihost->pdev->dev, "%s: start_io failed - status = 0x%x, request = %p\n", __func__, diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index e11eff6b0e97..e3ca16043f9a 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -372,8 +372,16 @@ static int iscsi_sw_tcp_pdu_xmit(struct iscsi_task *task) { struct iscsi_conn *conn = task->conn; unsigned int noreclaim_flag; + struct iscsi_tcp_conn *tcp_conn = conn->dd_data; + struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; int rc = 0; + if (!tcp_sw_conn->sock) { + iscsi_conn_printk(KERN_ERR, conn, + "Transport not bound to socket!\n"); + return -EINVAL; + } + noreclaim_flag = memalloc_noreclaim_save(); while (iscsi_sw_tcp_xmit_qlen(conn)) { @@ -798,7 +806,8 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, return rc; return iscsi_conn_get_addr_param((struct sockaddr_storage *) - &addr, param, buf); + &addr, + (enum iscsi_param)param, buf); default: return iscsi_host_get_param(shost, param, buf); } @@ -881,6 +890,10 @@ free_host: static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session) { struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); + struct iscsi_session *session = cls_session->dd_data; + + if (WARN_ON_ONCE(session->leadconn)) + return; iscsi_tcp_r2tpool_free(cls_session->dd_data); iscsi_session_teardown(cls_session); diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index bb9c1c016643..28b50ab2fbb0 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -652,6 +652,8 @@ redisc: } out: kref_put(&rdata->kref, fc_rport_destroy); + if (!IS_ERR(fp)) + fc_frame_free(fp); } /** diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c index 42bcf7f3a0f9..6ba257cbc6d9 100644 --- a/drivers/scsi/libfc/fc_exch.c +++ b/drivers/scsi/libfc/fc_exch.c @@ -2603,7 +2603,7 @@ void fc_exch_recv(struct fc_lport *lport, struct fc_frame *fp) /* lport lock ? */ if (!lport || lport->state == LPORT_ST_DISABLED) { - FC_LPORT_DBG(lport, "Receiving frames for an lport that " + FC_LIBFC_DBG("Receiving frames for an lport that " "has not been initialized correctly\n"); fc_frame_free(fp); return; diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 5ea5d42bac76..662df16b07a4 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1983,7 +1983,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) ISCSI_DBG_EH(session, "scsi cmd %p timedout\n", sc); - spin_lock(&session->frwd_lock); + spin_lock_bh(&session->frwd_lock); task = (struct iscsi_task *)sc->SCp.ptr; if (!task) { /* @@ -2110,7 +2110,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) done: if (task) task->last_timeout = jiffies; - spin_unlock(&session->frwd_lock); + spin_unlock_bh(&session->frwd_lock); ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ? "timer reset" : "shutdown or nh"); return rc; diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 60de66252fa2..b200edc665a5 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -97,12 +97,21 @@ static int sas_get_port_device(struct asd_sas_port *port) else dev->dev_type = SAS_SATA_DEV; dev->tproto = SAS_PROTOCOL_SATA; - } else { + } else if (port->oob_mode == SAS_OOB_MODE) { struct sas_identify_frame *id = (struct sas_identify_frame *) dev->frame_rcvd; dev->dev_type = id->dev_type; dev->iproto = id->initiator_bits; dev->tproto = id->target_bits; + } else { + /* If the oob mode is OOB_NOT_CONNECTED, the port is + * disconnected due to race with PHY down. We cannot + * continue to discover this port + */ + sas_put_device(dev); + pr_warn("Port %016llx is disconnected when discovering\n", + SAS_ADDR(port->attached_sas_addr)); + return -ENODEV; } sas_init_dev(dev); diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c index 259ee0d3c3e6..f77d72f01da9 100644 --- a/drivers/scsi/libsas/sas_expander.c +++ b/drivers/scsi/libsas/sas_expander.c @@ -614,7 +614,14 @@ int sas_smp_phy_control(struct domain_device *dev, int phy_id, } res = smp_execute_task(dev, pc_req, PC_REQ_SIZE, pc_resp,PC_RESP_SIZE); - + if (res) { + pr_err("ex %016llx phy%02d PHY control failed: %d\n", + SAS_ADDR(dev->sas_addr), phy_id, res); + } else if (pc_resp[2] != SMP_RESP_FUNC_ACC) { + pr_err("ex %016llx phy%02d PHY control failed: function result 0x%x\n", + SAS_ADDR(dev->sas_addr), phy_id, pc_resp[2]); + res = pc_resp[2]; + } kfree(pc_resp); kfree(pc_req); return res; @@ -817,6 +824,26 @@ static struct domain_device *sas_ex_discover_end_dev( #ifdef CONFIG_SCSI_SAS_ATA if ((phy->attached_tproto & SAS_PROTOCOL_STP) || phy->attached_sata_dev) { + if (child->linkrate > parent->min_linkrate) { + struct sas_phy_linkrates rates = { + .maximum_linkrate = parent->min_linkrate, + .minimum_linkrate = parent->min_linkrate, + }; + int ret; + + pr_notice("ex %016llx phy%02d SATA device linkrate > min pathway connection rate, attempting to lower device linkrate\n", + SAS_ADDR(child->sas_addr), phy_id); + ret = sas_smp_phy_control(parent, phy_id, + PHY_FUNC_LINK_RESET, &rates); + if (ret) { + pr_err("ex %016llx phy%02d SATA device could not set linkrate (%d)\n", + SAS_ADDR(child->sas_addr), phy_id, ret); + goto out_free; + } + pr_notice("ex %016llx phy%02d SATA device set linkrate successfully\n", + SAS_ADDR(child->sas_addr), phy_id); + child->linkrate = child->min_linkrate; + } res = sas_get_ata_info(child, phy); if (res) goto out_free; @@ -2060,14 +2087,11 @@ static int sas_rediscover_dev(struct domain_device *dev, int phy_id, bool last) return res; } - /* delete the old link */ - if (SAS_ADDR(phy->attached_sas_addr) && - SAS_ADDR(sas_addr) != SAS_ADDR(phy->attached_sas_addr)) { - SAS_DPRINTK("ex %016llx phy 0x%x replace %016llx\n", - SAS_ADDR(dev->sas_addr), phy_id, - SAS_ADDR(phy->attached_sas_addr)); - sas_unregister_devs_sas_addr(dev, phy_id, last); - } + /* we always have to delete the old device when we went here */ + SAS_DPRINTK("ex %016llx phy 0x%x replace %016llx\n", + SAS_ADDR(dev->sas_addr), phy_id, + SAS_ADDR(phy->attached_sas_addr)); + sas_unregister_devs_sas_addr(dev, phy_id, last); return sas_discover_new(dev, phy_id); } diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index bc61cc8bc6f0..5fc41aa53ceb 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -969,7 +969,8 @@ struct lpfc_hba { struct list_head port_list; struct lpfc_vport *pport; /* physical lpfc_vport pointer */ uint16_t max_vpi; /* Maximum virtual nports */ -#define LPFC_MAX_VPI 0xFFFF /* Max number of VPI supported */ +#define LPFC_MAX_VPI 0xFF /* Max number VPI supported 0 - 0xff */ +#define LPFC_MAX_VPORTS 0x100 /* Max vports per port, with pport */ uint16_t max_vports; /* * For IOV HBAs max_vpi can change * after a reset. max_vports is max @@ -1239,6 +1240,12 @@ lpfc_sli_read_hs(struct lpfc_hba *phba) static inline struct lpfc_sli_ring * lpfc_phba_elsring(struct lpfc_hba *phba) { + /* Return NULL if sli_rev has become invalid due to bad fw */ + if (phba->sli_rev != LPFC_SLI_REV4 && + phba->sli_rev != LPFC_SLI_REV3 && + phba->sli_rev != LPFC_SLI_REV2) + return NULL; + if (phba->sli_rev == LPFC_SLI_REV4) { if (phba->sli4_hba.els_wq) return phba->sli4_hba.els_wq->pring; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 3da242201cb4..f447355cc9c0 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1178,7 +1178,7 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode) return -EACCES; if ((phba->sli_rev < LPFC_SLI_REV4) || - (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != + (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) < LPFC_SLI_INTF_IF_TYPE_2)) return -EPERM; @@ -1478,6 +1478,9 @@ lpfc_get_hba_info(struct lpfc_hba *phba, max_vpi = (bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config) > 0) ? (bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config) - 1) : 0; + /* Limit the max we support */ + if (max_vpi > LPFC_MAX_VPI) + max_vpi = LPFC_MAX_VPI; if (mvpi) *mvpi = max_vpi; if (avpi) @@ -1493,8 +1496,13 @@ lpfc_get_hba_info(struct lpfc_hba *phba, *axri = pmb->un.varRdConfig.avail_xri; if (mvpi) *mvpi = pmb->un.varRdConfig.max_vpi; - if (avpi) - *avpi = pmb->un.varRdConfig.avail_vpi; + if (avpi) { + /* avail_vpi is only valid if link is up and ready */ + if (phba->link_state == LPFC_HBA_READY) + *avpi = pmb->un.varRdConfig.avail_vpi; + else + *avpi = pmb->un.varRdConfig.max_vpi; + } } mempool_free(pmboxq, phba->mbox_mem_pool); @@ -4056,7 +4064,7 @@ lpfc_link_speed_store(struct device *dev, struct device_attribute *attr, uint32_t prev_val, if_type; if_type = bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf); - if (if_type == LPFC_SLI_INTF_IF_TYPE_2 && + if (if_type >= LPFC_SLI_INTF_IF_TYPE_2 && phba->hba_flag & HBA_FORCED_LINK_SPEED) return -EPERM; diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index d89816222b23..08ed27b0d4c6 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -2221,7 +2221,7 @@ lpfc_bsg_diag_loopback_mode(struct bsg_job *job) if (phba->sli_rev < LPFC_SLI_REV4) rc = lpfc_sli3_bsg_diag_loopback_mode(phba, job); - else if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) == + else if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) >= LPFC_SLI_INTF_IF_TYPE_2) rc = lpfc_sli4_bsg_diag_loopback_mode(phba, job); else @@ -2261,7 +2261,7 @@ lpfc_sli4_bsg_diag_mode_end(struct bsg_job *job) if (phba->sli_rev < LPFC_SLI_REV4) return -ENODEV; - if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != + if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) < LPFC_SLI_INTF_IF_TYPE_2) return -ENODEV; @@ -2353,7 +2353,7 @@ lpfc_sli4_bsg_link_diag_test(struct bsg_job *job) rc = -ENODEV; goto job_error; } - if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != + if (bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) < LPFC_SLI_INTF_IF_TYPE_2) { rc = -ENODEV; goto job_error; @@ -4419,12 +4419,6 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job, phba->mbox_ext_buf_ctx.seqNum++; nemb_tp = phba->mbox_ext_buf_ctx.nembType; - dd_data = kmalloc(sizeof(struct bsg_job_data), GFP_KERNEL); - if (!dd_data) { - rc = -ENOMEM; - goto job_error; - } - pbuf = (uint8_t *)dmabuf->virt; size = job->request_payload.payload_len; sg_copy_to_buffer(job->request_payload.sg_list, @@ -4461,6 +4455,13 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job, "2968 SLI_CONFIG ext-buffer wr all %d " "ebuffers received\n", phba->mbox_ext_buf_ctx.numBuf); + + dd_data = kmalloc(sizeof(struct bsg_job_data), GFP_KERNEL); + if (!dd_data) { + rc = -ENOMEM; + goto job_error; + } + /* mailbox command structure for base driver */ pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); if (!pmboxq) { @@ -4509,6 +4510,8 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job, return SLI_CONFIG_HANDLED; job_error: + if (pmboxq) + mempool_free(pmboxq, phba->mbox_mem_pool); lpfc_bsg_dma_page_free(phba, dmabuf); kfree(dd_data); diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index ddd29752d96d..4c84c2ae1112 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1152,6 +1152,7 @@ stop_rr_fcf_flogi: phba->fcf.fcf_flag &= ~FCF_DISCOVERY; phba->hba_flag &= ~(FCF_RR_INPROG | HBA_DEVLOSS_TMO); spin_unlock_irq(&phba->hbalock); + phba->fcf.fcf_redisc_attempted = 0; /* reset */ goto out; } if (!rc) { @@ -1166,6 +1167,7 @@ stop_rr_fcf_flogi: phba->fcf.fcf_flag &= ~FCF_DISCOVERY; phba->hba_flag &= ~(FCF_RR_INPROG | HBA_DEVLOSS_TMO); spin_unlock_irq(&phba->hbalock); + phba->fcf.fcf_redisc_attempted = 0; /* reset */ goto out; } } @@ -1335,6 +1337,8 @@ lpfc_els_abort_flogi(struct lpfc_hba *phba) Fabric_DID); pring = lpfc_phba_elsring(phba); + if (unlikely(!pring)) + return -EIO; /* * Check the txcmplq for an iocb that matches the nport the driver is @@ -1548,8 +1552,10 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, */ new_ndlp = lpfc_findnode_wwpn(vport, &sp->portName); + /* return immediately if the WWPN matches ndlp */ if (new_ndlp == ndlp && NLP_CHK_NODE_ACT(new_ndlp)) return ndlp; + if (phba->sli_rev == LPFC_SLI_REV4) { active_rrqs_xri_bitmap = mempool_alloc(phba->active_rrq_pool, GFP_KERNEL); @@ -1558,9 +1564,13 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, phba->cfg_rrq_xri_bitmap_sz); } - lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, - "3178 PLOGI confirm: ndlp %p x%x: new_ndlp %p\n", - ndlp, ndlp->nlp_DID, new_ndlp); + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS | LOG_NODE, + "3178 PLOGI confirm: ndlp x%x x%x x%x: " + "new_ndlp x%x x%x x%x\n", + ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_fc4_type, + (new_ndlp ? new_ndlp->nlp_DID : 0), + (new_ndlp ? new_ndlp->nlp_flag : 0), + (new_ndlp ? new_ndlp->nlp_fc4_type : 0)); if (!new_ndlp) { rc = memcmp(&ndlp->nlp_portname, name, @@ -1609,6 +1619,14 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, phba->cfg_rrq_xri_bitmap_sz); } + /* At this point in this routine, we know new_ndlp will be + * returned. however, any previous GID_FTs that were done + * would have updated nlp_fc4_type in ndlp, so we must ensure + * new_ndlp has the right value. + */ + if (vport->fc_flag & FC_FABRIC) + new_ndlp->nlp_fc4_type = ndlp->nlp_fc4_type; + lpfc_unreg_rpi(vport, new_ndlp); new_ndlp->nlp_DID = ndlp->nlp_DID; new_ndlp->nlp_prev_state = ndlp->nlp_prev_state; @@ -1730,6 +1748,12 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, active_rrqs_xri_bitmap) mempool_free(active_rrqs_xri_bitmap, phba->active_rrq_pool); + + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS | LOG_NODE, + "3173 PLOGI confirm exit: new_ndlp x%x x%x x%x\n", + new_ndlp->nlp_DID, new_ndlp->nlp_flag, + new_ndlp->nlp_fc4_type); + return new_ndlp; } @@ -4078,7 +4102,7 @@ lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, mempool_free(mbox, phba->mbox_mem_pool); } out: - if (ndlp && NLP_CHK_NODE_ACT(ndlp)) { + if (ndlp && NLP_CHK_NODE_ACT(ndlp) && shost) { spin_lock_irq(shost->host_lock); ndlp->nlp_flag &= ~(NLP_ACC_REGLOGIN | NLP_RM_DFLT_RPI); spin_unlock_irq(shost->host_lock); @@ -5526,7 +5550,7 @@ lpfc_els_rcv_rdp(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, struct ls_rjt stat; if (phba->sli_rev < LPFC_SLI_REV4 || - bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) != + bf_get(lpfc_sli_intf_if_type, &phba->sli4_hba.sli_intf) < LPFC_SLI_INTF_IF_TYPE_2) { rjt_err = LSRJT_UNABLE_TPC; rjt_expl = LSEXP_REQ_UNSUPPORTED; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index b970933a218d..4a0889dd4c1d 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -1999,6 +1999,26 @@ int lpfc_sli4_fcf_rr_next_proc(struct lpfc_vport *vport, uint16_t fcf_index) "failover and change port state:x%x/x%x\n", phba->pport->port_state, LPFC_VPORT_UNKNOWN); phba->pport->port_state = LPFC_VPORT_UNKNOWN; + + if (!phba->fcf.fcf_redisc_attempted) { + lpfc_unregister_fcf(phba); + + rc = lpfc_sli4_redisc_fcf_table(phba); + if (!rc) { + lpfc_printf_log(phba, KERN_INFO, LOG_FIP, + "3195 Rediscover FCF table\n"); + phba->fcf.fcf_redisc_attempted = 1; + lpfc_sli4_clear_fcf_rr_bmask(phba); + } else { + lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, + "3196 Rediscover FCF table " + "failed. Status:x%x\n", rc); + } + } else { + lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, + "3197 Already rediscover FCF table " + "attempted. No more retry\n"); + } goto stop_flogi_current_fcf; } else { lpfc_printf_log(phba, KERN_INFO, LOG_FIP | LOG_ELS, @@ -4749,7 +4769,7 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) if (phba->sli_rev == LPFC_SLI_REV4 && (!(vport->load_flag & FC_UNLOADING)) && (bf_get(lpfc_sli_intf_if_type, - &phba->sli4_hba.sli_intf) == + &phba->sli4_hba.sli_intf) >= LPFC_SLI_INTF_IF_TYPE_2) && (kref_read(&ndlp->kref) > 0)) { mbox->context1 = lpfc_nlp_get(ndlp); @@ -5200,9 +5220,14 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) /* If we've already received a PLOGI from this NPort * we don't need to try to discover it again. */ - if (ndlp->nlp_flag & NLP_RCV_PLOGI) + if (ndlp->nlp_flag & NLP_RCV_PLOGI && + !(ndlp->nlp_type & + (NLP_FCP_TARGET | NLP_NVME_TARGET))) return NULL; + ndlp->nlp_prev_state = ndlp->nlp_state; + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 25612ccf6ff2..9fc5507ee39e 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1773,7 +1773,12 @@ lpfc_sli4_port_sta_fn_reset(struct lpfc_hba *phba, int mbx_action, lpfc_offline(phba); /* release interrupt for possible resource change */ lpfc_sli4_disable_intr(phba); - lpfc_sli_brdrestart(phba); + rc = lpfc_sli_brdrestart(phba); + if (rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "6309 Failed to restart board\n"); + return rc; + } /* request and enable interrupt */ intr_mode = lpfc_sli4_enable_intr(phba, phba->intr_mode); if (intr_mode == LPFC_INTR_ERROR) { @@ -4997,7 +5002,7 @@ lpfc_sli4_async_fip_evt(struct lpfc_hba *phba, break; } /* If fast FCF failover rescan event is pending, do nothing */ - if (phba->fcf.fcf_flag & FCF_REDISC_EVT) { + if (phba->fcf.fcf_flag & (FCF_REDISC_EVT | FCF_REDISC_PEND)) { spin_unlock_irq(&phba->hbalock); break; } @@ -7638,6 +7643,9 @@ lpfc_sli4_read_config(struct lpfc_hba *phba) bf_get(lpfc_mbx_rd_conf_xri_base, rd_config); phba->sli4_hba.max_cfg_param.max_vpi = bf_get(lpfc_mbx_rd_conf_vpi_count, rd_config); + /* Limit the max we support */ + if (phba->sli4_hba.max_cfg_param.max_vpi > LPFC_MAX_VPORTS) + phba->sli4_hba.max_cfg_param.max_vpi = LPFC_MAX_VPORTS; phba->sli4_hba.max_cfg_param.vpi_base = bf_get(lpfc_mbx_rd_conf_vpi_base, rd_config); phba->sli4_hba.max_cfg_param.max_rpi = diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 36fb549eb4e8..96411754aa43 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -483,8 +483,10 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, * single discovery thread, this will cause a huge delay in * discovery. Also this will cause multiple state machines * running in parallel for this node. + * This only applies to a fabric environment. */ - if (ndlp->nlp_state == NLP_STE_PLOGI_ISSUE) { + if ((ndlp->nlp_state == NLP_STE_PLOGI_ISSUE) && + (vport->fc_flag & FC_FABRIC)) { /* software abort outstanding PLOGI */ lpfc_els_abort(phba, ndlp); } @@ -809,9 +811,9 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) if (!(vport->fc_flag & FC_PT2PT)) { /* Check config parameter use-adisc or FCP-2 */ - if ((vport->cfg_use_adisc && (vport->fc_flag & FC_RSCN_MODE)) || + if (vport->cfg_use_adisc && ((vport->fc_flag & FC_RSCN_MODE) || ((ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) && - (ndlp->nlp_type & NLP_FCP_TARGET))) { + (ndlp->nlp_type & NLP_FCP_TARGET)))) { spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_ADISC; spin_unlock_irq(shost->host_lock); @@ -2829,8 +2831,9 @@ lpfc_disc_state_machine(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, /* DSM in event on NPort in state */ lpfc_printf_vlog(vport, KERN_INFO, LOG_DISCOVERY, "0211 DSM in event x%x on NPort x%x in " - "state %d Data: x%x\n", - evt, ndlp->nlp_DID, cur_state, ndlp->nlp_flag); + "state %d Data: x%x x%x\n", + evt, ndlp->nlp_DID, cur_state, + ndlp->nlp_flag, ndlp->nlp_fc4_type); lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM, "DSM in: evt:%d ste:%d did:x%x", diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 23bdb1ca106e..af937b91765e 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -144,7 +144,7 @@ lpfc_nvme_delete_queue(struct nvme_fc_local_port *pnvme_lport, vport = lport->vport; lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME, - "6001 ENTER. lpfc_pnvme %p, qidx x%xi qhandle %p\n", + "6001 ENTER. lpfc_pnvme %p, qidx x%x qhandle %p\n", lport, qidx, handle); kfree(handle); } @@ -1544,7 +1544,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG); /* word 7 */ - bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0); bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX); bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com, nvmereq_wqe->iocb.ulpClass); @@ -1559,7 +1558,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, abts_buf->iotag); /* word 10 */ - bf_set(wqe_wqid, &abts_wqe->abort_cmd.wqe_com, nvmereq_wqe->hba_wqidx); bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1); bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE); @@ -1593,6 +1591,8 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* Declare and initialization an instance of the FC NVME template. */ static struct nvme_fc_port_template lpfc_nvme_template = { + .module = THIS_MODULE, + /* initiator-based functions */ .localport_delete = lpfc_nvme_localport_delete, .remoteport_delete = lpfc_nvme_remoteport_delete, diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 7ac1a067d780..eacdcb931bda 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -1078,15 +1078,14 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) idx = 0; } - infop = phba->sli4_hba.nvmet_ctx_info; - for (j = 0; j < phba->cfg_nvmet_mrq; j++) { - for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { + for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) { + for (j = 0; j < phba->cfg_nvmet_mrq; j++) { + infop = lpfc_get_ctx_list(phba, i, j); lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT, "6408 TOTAL NVMET ctx for CPU %d " "MRQ %d: cnt %d nextcpu %p\n", i, j, infop->nvmet_ctx_list_cnt, infop->nvmet_ctx_next_cpu); - infop++; } } return 0; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 4ade13d72deb..2eba0c39ac1c 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -2714,6 +2714,7 @@ lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, int datasegcnt, protsegcnt, datadir = scsi_cmnd->sc_data_direction; int prot_group_type = 0; int fcpdl; + struct lpfc_vport *vport = phba->pport; /* * Start the lpfc command prep by bumping the bpl beyond fcp_cmnd @@ -2819,6 +2820,14 @@ lpfc_bg_scsi_prep_dma_buf_s3(struct lpfc_hba *phba, */ iocb_cmd->un.fcpi.fcpi_parm = fcpdl; + /* + * For First burst, we may need to adjust the initial transfer + * length for DIF + */ + if (iocb_cmd->un.fcpi.fcpi_XRdy && + (fcpdl < vport->cfg_first_burst_size)) + iocb_cmd->un.fcpi.fcpi_XRdy = fcpdl; + return 0; err: if (lpfc_cmd->seg_cnt) @@ -3371,6 +3380,7 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, int datasegcnt, protsegcnt, datadir = scsi_cmnd->sc_data_direction; int prot_group_type = 0; int fcpdl; + struct lpfc_vport *vport = phba->pport; /* * Start the lpfc command prep by bumping the sgl beyond fcp_cmnd @@ -3486,6 +3496,14 @@ lpfc_bg_scsi_prep_dma_buf_s4(struct lpfc_hba *phba, */ iocb_cmd->un.fcpi.fcpi_parm = fcpdl; + /* + * For First burst, we may need to adjust the initial transfer + * length for DIF + */ + if (iocb_cmd->un.fcpi.fcpi_XRdy && + (fcpdl < vport->cfg_first_burst_size)) + iocb_cmd->un.fcpi.fcpi_XRdy = fcpdl; + /* * If the OAS driver feature is enabled and the lun is enabled for * OAS, set the oas iocb related flags. @@ -4152,7 +4170,7 @@ lpfc_scsi_cmd_iocb_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pIocbIn, /* If pCmd was set to NULL from abort path, do not call scsi_done */ if (xchg(&lpfc_cmd->pCmd, NULL) == NULL) { lpfc_printf_vlog(vport, KERN_INFO, LOG_FCP, - "0711 FCP cmd already NULL, sid: 0x%06x, " + "5688 FCP cmd already NULL, sid: 0x%06x, " "did: 0x%06x, oxid: 0x%04x\n", vport->fc_myDID, (pnode) ? pnode->nlp_DID : 0, diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 6c2b098b7609..d8e0ba68879c 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -4421,6 +4421,8 @@ lpfc_sli_brdrestart_s4(struct lpfc_hba *phba) hba_aer_enabled = phba->hba_flag & HBA_AER_ENABLED; rc = lpfc_sli4_brdreset(phba); + if (rc) + return rc; spin_lock_irq(&phba->hbalock); phba->pport->stopped = 0; @@ -10720,19 +10722,12 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* Complete prepping the abort wqe and issue to the FW. */ abts_wqe = &abtsiocbp->wqe; - bf_set(abort_cmd_ia, &abts_wqe->abort_cmd, 0); + + /* Clear any stale WQE contents */ + memset(abts_wqe, 0, sizeof(union lpfc_wqe)); bf_set(abort_cmd_criteria, &abts_wqe->abort_cmd, T_XRI_TAG); - /* Explicitly set reserved fields to zero.*/ - abts_wqe->abort_cmd.rsrvd4 = 0; - abts_wqe->abort_cmd.rsrvd5 = 0; - - /* WQE Common - word 6. Context is XRI tag. Set 0. */ - bf_set(wqe_xri_tag, &abts_wqe->abort_cmd.wqe_com, 0); - bf_set(wqe_ctxt_tag, &abts_wqe->abort_cmd.wqe_com, 0); - /* word 7 */ - bf_set(wqe_ct, &abts_wqe->abort_cmd.wqe_com, 0); bf_set(wqe_cmnd, &abts_wqe->abort_cmd.wqe_com, CMD_ABORT_XRI_CX); bf_set(wqe_class, &abts_wqe->abort_cmd.wqe_com, cmdiocb->iocb.ulpClass); @@ -10747,7 +10742,6 @@ lpfc_sli4_abort_nvme_io(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, abtsiocbp->iotag); /* word 10 */ - bf_set(wqe_wqid, &abts_wqe->abort_cmd.wqe_com, cmdiocb->hba_wqidx); bf_set(wqe_qosd, &abts_wqe->abort_cmd.wqe_com, 1); bf_set(wqe_lenloc, &abts_wqe->abort_cmd.wqe_com, LPFC_WQE_LENLOC_NONE); @@ -12695,13 +12689,19 @@ send_current_mbox: phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE; /* Setting active mailbox pointer need to be in sync to flag clear */ phba->sli.mbox_active = NULL; + if (bf_get(lpfc_trailer_consumed, mcqe)) + lpfc_sli4_mq_release(phba->sli4_hba.mbx_wq); spin_unlock_irqrestore(&phba->hbalock, iflags); /* Wake up worker thread to post the next pending mailbox command */ lpfc_worker_wake_up(phba); + return workposted; + out_no_mqe_complete: + spin_lock_irqsave(&phba->hbalock, iflags); if (bf_get(lpfc_trailer_consumed, mcqe)) lpfc_sli4_mq_release(phba->sli4_hba.mbx_wq); - return workposted; + spin_unlock_irqrestore(&phba->hbalock, iflags); + return false; } /** @@ -17492,6 +17492,13 @@ lpfc_sli4_alloc_rpi(struct lpfc_hba *phba) static void __lpfc_sli4_free_rpi(struct lpfc_hba *phba, int rpi) { + /* + * if the rpi value indicates a prior unreg has already + * been done, skip the unreg. + */ + if (rpi == LPFC_RPI_ALLOC_ERROR) + return; + if (test_and_clear_bit(rpi, phba->sli4_hba.rpi_bmask)) { phba->sli4_hba.rpi_count--; phba->sli4_hba.max_cfg_param.rpi_used--; @@ -18056,15 +18063,8 @@ next_priority: goto initial_priority; lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, "2844 No roundrobin failover FCF available\n"); - if (next_fcf_index >= LPFC_SLI4_FCF_TBL_INDX_MAX) - return LPFC_FCOE_FCF_NEXT_NONE; - else { - lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, - "3063 Only FCF available idx %d, flag %x\n", - next_fcf_index, - phba->fcf.fcf_pri[next_fcf_index].fcf_rec.flag); - return next_fcf_index; - } + + return LPFC_FCOE_FCF_NEXT_NONE; } if (next_fcf_index < LPFC_SLI4_FCF_TBL_INDX_MAX && diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 60200385fe00..a132a83ef233 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -265,6 +265,7 @@ struct lpfc_fcf { #define FCF_REDISC_EVT 0x100 /* FCF rediscovery event to worker thread */ #define FCF_REDISC_FOV 0x200 /* Post FCF rediscovery fast failover */ #define FCF_REDISC_PROG (FCF_REDISC_PEND | FCF_REDISC_EVT) + uint16_t fcf_redisc_attempted; uint32_t addr_mode; uint32_t eligible_fcf_cnt; struct lpfc_fcf_rec current_rec; diff --git a/drivers/scsi/mac_scsi.c b/drivers/scsi/mac_scsi.c index 643321fc152d..b5050c2ede00 100644 --- a/drivers/scsi/mac_scsi.c +++ b/drivers/scsi/mac_scsi.c @@ -429,7 +429,7 @@ static int __init mac_scsi_probe(struct platform_device *pdev) mac_scsi_template.can_queue = setup_can_queue; if (setup_cmd_per_lun > 0) mac_scsi_template.cmd_per_lun = setup_cmd_per_lun; - if (setup_sg_tablesize >= 0) + if (setup_sg_tablesize > 0) mac_scsi_template.sg_tablesize = setup_sg_tablesize; if (setup_hostid >= 0) mac_scsi_template.this_id = setup_hostid & 7; diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 9b6f5d024dba..f5c09bbf9374 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -4221,11 +4221,11 @@ megaraid_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) */ if (pdev->subsystem_vendor == PCI_VENDOR_ID_COMPAQ && pdev->subsystem_device == 0xC000) - return -ENODEV; + goto out_disable_device; /* Now check the magic signature byte */ pci_read_config_word(pdev, PCI_CONF_AMISIG, &magic); if (magic != HBA_SIGNATURE_471 && magic != HBA_SIGNATURE) - return -ENODEV; + goto out_disable_device; /* Ok it is probably a megaraid */ } diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 8595d83229b7..42d876034741 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -3823,12 +3823,12 @@ megasas_transition_to_ready(struct megasas_instance *instance, int ocr) /* * The cur_state should not last for more than max_wait secs */ - for (i = 0; i < (max_wait * 1000); i++) { + for (i = 0; i < max_wait * 50; i++) { curr_abs_state = instance->instancet-> read_fw_status_reg(instance->reg_set); if (abs_state == curr_abs_state) { - msleep(1); + msleep(20); } else break; } @@ -4109,7 +4109,8 @@ dcmd_timeout_ocr_possible(struct megasas_instance *instance) { if (instance->adapter_type == MFI_SERIES) return KILL_ADAPTER; else if (instance->unload || - test_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags)) + test_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, + &instance->reset_flags)) return IGNORE_TIMEOUT; else return INITIATE_OCR; @@ -5324,7 +5325,7 @@ static int megasas_init_fw(struct megasas_instance *instance) if (!instance->msix_vectors) { i = pci_alloc_irq_vectors(instance->pdev, 1, 1, PCI_IRQ_LEGACY); if (i < 0) - goto fail_setup_irqs; + goto fail_init_adapter; } megasas_setup_reply_map(instance); @@ -5541,9 +5542,8 @@ static int megasas_init_fw(struct megasas_instance *instance) fail_get_ld_pd_list: instance->instancet->disable_intr(instance); -fail_init_adapter: megasas_destroy_irqs(instance); -fail_setup_irqs: +fail_init_adapter: if (instance->msix_vectors) pci_free_irq_vectors(instance->pdev); instance->msix_vectors = 0; diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index 7be2b9e11332..b13721290f4b 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -4212,6 +4212,7 @@ int megasas_reset_fusion(struct Scsi_Host *shost, int reason) if (instance->requestorId && !instance->skip_heartbeat_timer_del) del_timer_sync(&instance->sriov_heartbeat_timer); set_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags); + set_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, &instance->reset_flags); atomic_set(&instance->adprecovery, MEGASAS_ADPRESET_SM_POLLING); instance->instancet->disable_intr(instance); megasas_sync_irqs((unsigned long)instance); @@ -4399,7 +4400,7 @@ fail_kill_adapter: atomic_set(&instance->adprecovery, MEGASAS_HBA_OPERATIONAL); } out: - clear_bit(MEGASAS_FUSION_IN_RESET, &instance->reset_flags); + clear_bit(MEGASAS_FUSION_OCR_NOT_POSSIBLE, &instance->reset_flags); mutex_unlock(&instance->reset_mutex); return retval; } diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.h b/drivers/scsi/megaraid/megaraid_sas_fusion.h index 7c1f7ccf031d..40724df20780 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.h +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h @@ -100,6 +100,7 @@ enum MR_RAID_FLAGS_IO_SUB_TYPE { #define MEGASAS_FP_CMD_LEN 16 #define MEGASAS_FUSION_IN_RESET 0 +#define MEGASAS_FUSION_OCR_NOT_POSSIBLE 1 #define THRESHOLD_REPLY_COUNT 50 #define RAID_1_PEER_CMDS 2 #define JBOD_MAPS_COUNT 2 diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 9064b6d7b72b..2c8e144d4861 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -3140,7 +3140,7 @@ _base_static_config_pages(struct MPT3SAS_ADAPTER *ioc) * flag unset in NVDATA. */ mpt3sas_config_get_manufacturing_pg11(ioc, &mpi_reply, &ioc->manu_pg11); - if (ioc->manu_pg11.EEDPTagMode == 0) { + if (!ioc->is_gen35_ioc && ioc->manu_pg11.EEDPTagMode == 0) { pr_err("%s: overriding NVDATA EEDPTagMode setting\n", ioc->name); ioc->manu_pg11.EEDPTagMode &= ~0x3; diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c index dd6270125614..58acbff40abc 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_config.c +++ b/drivers/scsi/mpt3sas/mpt3sas_config.c @@ -674,10 +674,6 @@ mpt3sas_config_set_manufacturing_pg11(struct MPT3SAS_ADAPTER *ioc, r = _config_request(ioc, &mpi_request, mpi_reply, MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page, sizeof(*config_page)); - mpi_request.Action = MPI2_CONFIG_ACTION_PAGE_WRITE_NVRAM; - r = _config_request(ioc, &mpi_request, mpi_reply, - MPT3_CONFIG_PAGE_DEFAULT_TIMEOUT, config_page, - sizeof(*config_page)); out: return r; } diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index bdffb692bded..622dcf2984a9 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -1502,7 +1502,8 @@ _ctl_diag_register_2(struct MPT3SAS_ADAPTER *ioc, " for diag buffers, requested size(%d)\n", ioc->name, __func__, request_data_sz); mpt3sas_base_free_smid(ioc, smid); - return -ENOMEM; + rc = -ENOMEM; + goto out; } ioc->diag_buffer[buffer_type] = request_data; ioc->diag_buffer_sz[buffer_type] = request_data_sz; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 83640d984f70..5fb87cfac84d 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -3328,6 +3328,40 @@ _scsih_tm_tr_complete(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index, return _scsih_check_for_pending_tm(ioc, smid); } +/** _scsih_allow_scmd_to_device - check whether scmd needs to + * issue to IOC or not. + * @ioc: per adapter object + * @scmd: pointer to scsi command object + * + * Returns true if scmd can be issued to IOC otherwise returns false. + */ +inline bool _scsih_allow_scmd_to_device(struct MPT3SAS_ADAPTER *ioc, + struct scsi_cmnd *scmd) +{ + + if (ioc->pci_error_recovery) + return false; + + if (ioc->hba_mpi_version_belonged == MPI2_VERSION) { + if (ioc->remove_host) + return false; + + return true; + } + + if (ioc->remove_host) { + + switch (scmd->cmnd[0]) { + case SYNCHRONIZE_CACHE: + case START_STOP: + return true; + default: + return false; + } + } + + return true; +} /** * _scsih_sas_control_complete - completion routine @@ -4100,7 +4134,7 @@ scsih_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *scmd) return 0; } - if (ioc->pci_error_recovery || ioc->remove_host) { + if (!(_scsih_allow_scmd_to_device(ioc, scmd))) { scmd->result = DID_NO_CONNECT << 16; scmd->scsi_done(scmd); return 0; diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 10546faac58c..f374abfb7f1f 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -1479,6 +1479,12 @@ u32 pm8001_mpi_msg_consume(struct pm8001_hba_info *pm8001_ha, } else { u32 producer_index; void *pi_virt = circularQ->pi_virt; + /* spurious interrupt during setup if + * kexec-ing and driver doing a doorbell access + * with the pre-kexec oq interrupt setup + */ + if (!pi_virt) + break; /* Update the producer index from SPC */ producer_index = pm8001_read_32(pi_virt); circularQ->producer_index = cpu_to_le32(producer_index); diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index ce584c31d36e..e64a13f0bce1 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -374,6 +374,13 @@ static int pm8001_task_exec(struct sas_task *task, return 0; } pm8001_ha = pm8001_find_ha_by_dev(task->dev); + if (pm8001_ha->controller_fatal_error) { + struct task_status_struct *ts = &t->task_status; + + ts->resp = SAS_TASK_UNDELIVERED; + t->task_done(t); + return 0; + } PM8001_IO_DBG(pm8001_ha, pm8001_printk("pm8001_task_exec device \n ")); spin_lock_irqsave(&pm8001_ha->lock, flags); do { @@ -466,7 +473,7 @@ err_out: dev_printk(KERN_ERR, pm8001_ha->dev, "pm8001 exec failed[%d]!\n", rc); if (!sas_protocol_ata(t->task_proto)) if (n_elem) - dma_unmap_sg(pm8001_ha->dev, t->scatter, n_elem, + dma_unmap_sg(pm8001_ha->dev, t->scatter, t->num_scatter, t->data_dir); out_done: spin_unlock_irqrestore(&pm8001_ha->lock, flags); diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h index e81a8fa7ef1a..e954ecd3f6c0 100644 --- a/drivers/scsi/pm8001/pm8001_sas.h +++ b/drivers/scsi/pm8001/pm8001_sas.h @@ -529,6 +529,7 @@ struct pm8001_hba_info { u32 logging_level; u32 fw_status; u32 smp_exp_mode; + bool controller_fatal_error; const struct firmware *fw_image; struct isr_param irq_vector[PM8001_MAX_MSIX_VEC]; }; diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index eb4fee61df72..df5f0bc29587 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -572,6 +572,9 @@ static void update_main_config_table(struct pm8001_hba_info *pm8001_ha) pm8001_ha->main_cfg_tbl.pm80xx_tbl.pcs_event_log_size); pm8001_mw32(address, MAIN_PCS_EVENT_LOG_OPTION, pm8001_ha->main_cfg_tbl.pm80xx_tbl.pcs_event_log_severity); + /* Update Fatal error interrupt vector */ + pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt |= + ((pm8001_ha->number_of_intr - 1) << 8); pm8001_mw32(address, MAIN_FATAL_ERROR_INTERRUPT, pm8001_ha->main_cfg_tbl.pm80xx_tbl.fatal_err_interrupt); pm8001_mw32(address, MAIN_EVENT_CRC_CHECK, @@ -1099,6 +1102,9 @@ static int pm80xx_chip_init(struct pm8001_hba_info *pm8001_ha) return -EBUSY; } + /* Initialize the controller fatal error flag */ + pm8001_ha->controller_fatal_error = false; + /* Initialize pci space address eg: mpi offset */ init_pci_device_addresses(pm8001_ha); init_default_table_values(pm8001_ha); @@ -1207,13 +1213,17 @@ pm80xx_chip_soft_rst(struct pm8001_hba_info *pm8001_ha) u32 bootloader_state; u32 ibutton0, ibutton1; - /* Check if MPI is in ready state to reset */ - if (mpi_uninit_check(pm8001_ha) != 0) { - PM8001_FAIL_DBG(pm8001_ha, - pm8001_printk("MPI state is not ready\n")); - return -1; + /* Process MPI table uninitialization only if FW is ready */ + if (!pm8001_ha->controller_fatal_error) { + /* Check if MPI is in ready state to reset */ + if (mpi_uninit_check(pm8001_ha) != 0) { + regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1); + PM8001_FAIL_DBG(pm8001_ha, pm8001_printk( + "MPI state is not ready scratch1 :0x%x\n", + regval)); + return -1; + } } - /* checked for reset register normal state; 0x0 */ regval = pm8001_cr32(pm8001_ha, 0, SPC_REG_SOFT_RESET); PM8001_INIT_DBG(pm8001_ha, @@ -2358,6 +2368,8 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) pm8001_printk("task 0x%p done with io_status 0x%x" " resp 0x%x stat 0x%x but aborted by upper layer!\n", t, status, ts->resp, ts->stat)); + if (t->slow_task) + complete(&t->slow_task->completion); pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); } else { spin_unlock_irqrestore(&t->task_state_lock, flags); @@ -3717,6 +3729,46 @@ static void process_one_iomb(struct pm8001_hba_info *pm8001_ha, void *piomb) } } +static void print_scratchpad_registers(struct pm8001_hba_info *pm8001_ha) +{ + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_SCRATCH_PAD_0: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_0))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_SCRATCH_PAD_1:0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_SCRATCH_PAD_2: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_2))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_SCRATCH_PAD_3: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_3))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_HOST_SCRATCH_PAD_0: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_0))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_HOST_SCRATCH_PAD_1: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_1))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_HOST_SCRATCH_PAD_2: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_2))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_HOST_SCRATCH_PAD_3: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_3))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_HOST_SCRATCH_PAD_4: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_4))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_HOST_SCRATCH_PAD_5: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_5))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_RSVD_SCRATCH_PAD_0: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_6))); + PM8001_FAIL_DBG(pm8001_ha, + pm8001_printk("MSGU_RSVD_SCRATCH_PAD_1: 0x%x\n", + pm8001_cr32(pm8001_ha, 0, MSGU_HOST_SCRATCH_PAD_7))); +} + static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec) { struct outbound_queue_table *circularQ; @@ -3724,10 +3776,28 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec) u8 uninitialized_var(bc); u32 ret = MPI_IO_STATUS_FAIL; unsigned long flags; + u32 regval; + if (vec == (pm8001_ha->number_of_intr - 1)) { + regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1); + if ((regval & SCRATCH_PAD_MIPSALL_READY) != + SCRATCH_PAD_MIPSALL_READY) { + pm8001_ha->controller_fatal_error = true; + PM8001_FAIL_DBG(pm8001_ha, pm8001_printk( + "Firmware Fatal error! Regval:0x%x\n", regval)); + print_scratchpad_registers(pm8001_ha); + return ret; + } + } spin_lock_irqsave(&pm8001_ha->lock, flags); circularQ = &pm8001_ha->outbnd_q_tbl[vec]; do { + /* spurious interrupt during setup if kexec-ing and + * driver doing a doorbell access w/ the pre-kexec oq + * interrupt setup. + */ + if (!circularQ->pi_virt) + break; ret = pm8001_mpi_msg_consume(pm8001_ha, circularQ, &pMsg1, &bc); if (MPI_IO_STATUS_SUCCESS == ret) { /* process the outbound message */ diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h index 7a443bad6163..411b414a9a0e 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.h +++ b/drivers/scsi/pm8001/pm80xx_hwi.h @@ -1288,6 +1288,9 @@ typedef struct SASProtocolTimerConfig SASProtocolTimerConfig_t; #define SCRATCH_PAD_BOOT_LOAD_SUCCESS 0x0 #define SCRATCH_PAD_IOP0_READY 0xC00 #define SCRATCH_PAD_IOP1_READY 0x3000 +#define SCRATCH_PAD_MIPSALL_READY (SCRATCH_PAD_IOP1_READY | \ + SCRATCH_PAD_IOP0_READY | \ + SCRATCH_PAD_RAAE_READY) /* boot loader state */ #define SCRATCH_PAD1_BOOTSTATE_MASK 0x70 /* Bit 4-6 */ diff --git a/drivers/scsi/qedf/qedf_els.c b/drivers/scsi/qedf/qedf_els.c index 59c18ca4cda9..e5927a09f7bc 100644 --- a/drivers/scsi/qedf/qedf_els.c +++ b/drivers/scsi/qedf/qedf_els.c @@ -23,8 +23,6 @@ static int qedf_initiate_els(struct qedf_rport *fcport, unsigned int op, int rc = 0; uint32_t did, sid; uint16_t xid; - uint32_t start_time = jiffies / HZ; - uint32_t current_time; struct fcoe_wqe *sqe; unsigned long flags; u16 sqe_idx; @@ -50,18 +48,12 @@ static int qedf_initiate_els(struct qedf_rport *fcport, unsigned int op, goto els_err; } -retry_els: els_req = qedf_alloc_cmd(fcport, QEDF_ELS); if (!els_req) { - current_time = jiffies / HZ; - if ((current_time - start_time) > 10) { - QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, - "els: Failed els 0x%x\n", op); - rc = -ENOMEM; - goto els_err; - } - mdelay(20 * USEC_PER_MSEC); - goto retry_els; + QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_ELS, + "Failed to alloc ELS request 0x%x\n", op); + rc = -ENOMEM; + goto els_err; } QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_ELS, "initiate_els els_req = " diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 1844c2f59460..656253285db9 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -652,7 +652,8 @@ qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj, break; } else { /* Make sure FC side is not in reset */ - qla2x00_wait_for_hba_online(vha); + WARN_ON_ONCE(qla2x00_wait_for_hba_online(vha) != + QLA_SUCCESS); /* Issue MPI reset */ scsi_block_requests(vha->host); diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 2ea0ef93f5cb..c1ca21a88a09 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -258,7 +258,7 @@ qla2x00_process_els(struct bsg_job *bsg_job) srb_t *sp; const char *type; int req_sg_cnt, rsp_sg_cnt; - int rval = (DRIVER_ERROR << 16); + int rval = (DID_ERROR << 16); uint16_t nextlid = 0; if (bsg_request->msgcode == FC_BSG_RPT_ELS) { @@ -342,6 +342,8 @@ qla2x00_process_els(struct bsg_job *bsg_job) dma_map_sg(&ha->pdev->dev, bsg_job->request_payload.sg_list, bsg_job->request_payload.sg_cnt, DMA_TO_DEVICE); if (!req_sg_cnt) { + dma_unmap_sg(&ha->pdev->dev, bsg_job->request_payload.sg_list, + bsg_job->request_payload.sg_cnt, DMA_TO_DEVICE); rval = -ENOMEM; goto done_free_fcport; } @@ -349,6 +351,8 @@ qla2x00_process_els(struct bsg_job *bsg_job) rsp_sg_cnt = dma_map_sg(&ha->pdev->dev, bsg_job->reply_payload.sg_list, bsg_job->reply_payload.sg_cnt, DMA_FROM_DEVICE); if (!rsp_sg_cnt) { + dma_unmap_sg(&ha->pdev->dev, bsg_job->reply_payload.sg_list, + bsg_job->reply_payload.sg_cnt, DMA_FROM_DEVICE); rval = -ENOMEM; goto done_free_fcport; } @@ -433,7 +437,7 @@ qla2x00_process_ct(struct bsg_job *bsg_job) struct Scsi_Host *host = fc_bsg_to_shost(bsg_job); scsi_qla_host_t *vha = shost_priv(host); struct qla_hw_data *ha = vha->hw; - int rval = (DRIVER_ERROR << 16); + int rval = (DID_ERROR << 16); int req_sg_cnt, rsp_sg_cnt; uint16_t loop_id; struct fc_port *fcport; @@ -1778,8 +1782,8 @@ qla24xx_process_bidir_cmd(struct bsg_job *bsg_job) uint16_t nextlid = 0; uint32_t tot_dsds; srb_t *sp = NULL; - uint32_t req_data_len = 0; - uint32_t rsp_data_len = 0; + uint32_t req_data_len; + uint32_t rsp_data_len; /* Check the type of the adapter */ if (!IS_BIDI_CAPABLE(ha)) { @@ -1884,6 +1888,9 @@ qla24xx_process_bidir_cmd(struct bsg_job *bsg_job) goto done_unmap_sg; } + req_data_len = bsg_job->request_payload.payload_len; + rsp_data_len = bsg_job->reply_payload.payload_len; + if (req_data_len != rsp_data_len) { rval = EXT_STATUS_BUSY; ql_log(ql_log_warn, vha, 0x70aa, @@ -1891,10 +1898,6 @@ qla24xx_process_bidir_cmd(struct bsg_job *bsg_job) goto done_unmap_sg; } - req_data_len = bsg_job->request_payload.payload_len; - rsp_data_len = bsg_job->reply_payload.payload_len; - - /* Alloc SRB structure */ sp = qla2x00_get_sp(vha, &(vha->bidir_fcport), GFP_KERNEL); if (!sp) { @@ -1951,7 +1954,7 @@ qlafx00_mgmt_cmd(struct bsg_job *bsg_job) struct Scsi_Host *host = fc_bsg_to_shost(bsg_job); scsi_qla_host_t *vha = shost_priv(host); struct qla_hw_data *ha = vha->hw; - int rval = (DRIVER_ERROR << 16); + int rval = (DID_ERROR << 16); struct qla_mt_iocb_rqst_fx00 *piocb_rqst; srb_t *sp; int req_sg_cnt = 0, rsp_sg_cnt = 0; diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 3e9dc54b89a3..91e185731b1e 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -2517,12 +2517,6 @@ qla83xx_fw_dump_failed: /* Driver Debug Functions. */ /****************************************************************************/ -static inline int -ql_mask_match(uint32_t level) -{ - return (level & ql2xextended_error_logging) == level; -} - /* * This function is for formatting and logging debug information. * It is to be used when vha is available. It formats the message diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index 8877aa97d829..ceca6dd34db1 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -374,3 +374,9 @@ extern int qla24xx_dump_ram(struct qla_hw_data *, uint32_t, uint32_t *, extern void qla24xx_pause_risc(struct device_reg_24xx __iomem *, struct qla_hw_data *); extern int qla24xx_soft_reset(struct qla_hw_data *); + +static inline int +ql_mask_match(uint level) +{ + return (level & ql2xextended_error_logging) == level; +} diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index 2a19ec0660cb..1088038e6a41 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -3033,7 +3033,7 @@ static void qla24xx_async_gpsc_sp_done(void *s, int res) ql_dbg(ql_dbg_disc, vha, 0x2019, "GPSC command unsupported, disabling query.\n"); ha->flags.gpsc_supported = 0; - res = QLA_SUCCESS; + goto done; } } else { switch (be16_to_cpu(ct_rsp->rsp.gpsc.speed)) { @@ -3066,13 +3066,13 @@ static void qla24xx_async_gpsc_sp_done(void *s, int res) be16_to_cpu(ct_rsp->rsp.gpsc.speeds), be16_to_cpu(ct_rsp->rsp.gpsc.speed)); } -done: memset(&ea, 0, sizeof(ea)); ea.event = FCME_GPSC_DONE; ea.rc = res; ea.fcport = fcport; qla2x00_fcport_event_handler(vha, &ea); +done: sp->free(sp); } diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 0e154fea693e..a66f7cec797c 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -5145,8 +5145,7 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha) if (test_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags)) break; - if ((fcport->flags & FCF_FABRIC_DEVICE) == 0 || - (fcport->flags & FCF_LOGIN_NEEDED) == 0) + if ((fcport->flags & FCF_FABRIC_DEVICE) == 0) continue; if (fcport->scan_state == QLA_FCPORT_SCAN) { @@ -5171,7 +5170,8 @@ qla2x00_find_all_fabric_devs(scsi_qla_host_t *vha) } } - if (fcport->scan_state == QLA_FCPORT_FOUND) + if (fcport->scan_state == QLA_FCPORT_FOUND && + (fcport->flags & FCF_LOGIN_NEEDED) != 0) qla24xx_fcport_handle_login(vha, fcport); } return (rval); @@ -8092,8 +8092,6 @@ int qla2xxx_delete_qpair(struct scsi_qla_host *vha, struct qla_qpair *qpair) struct qla_hw_data *ha = qpair->hw; qpair->delete_in_progress = 1; - while (atomic_read(&qpair->ref_count)) - msleep(500); ret = qla25xx_delete_req_que(vha, qpair->req); if (ret != QLA_SUCCESS) diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index df94ef816826..b39faf2bfa0d 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1046,8 +1046,6 @@ global_port_update: ql_dbg(ql_dbg_async, vha, 0x5011, "Asynchronous PORT UPDATE ignored %04x/%04x/%04x.\n", mb[1], mb[2], mb[3]); - - qlt_async_event(mb[0], vha, mb); break; } @@ -1065,8 +1063,6 @@ global_port_update: set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); set_bit(VP_CONFIG_OK, &vha->vp_flags); - - qlt_async_event(mb[0], vha, mb); break; case MBA_RSCN_UPDATE: /* State Change Registration */ @@ -1857,6 +1853,18 @@ qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, void *tsk) inbuf = (uint32_t *)&sts->nvme_ersp_data; outbuf = (uint32_t *)fd->rspaddr; iocb->u.nvme.rsp_pyld_len = le16_to_cpu(sts->nvme_rsp_pyld_len); + if (unlikely(iocb->u.nvme.rsp_pyld_len > + sizeof(struct nvme_fc_ersp_iu))) { + if (ql_mask_match(ql_dbg_io)) { + WARN_ONCE(1, "Unexpected response payload length %u.\n", + iocb->u.nvme.rsp_pyld_len); + ql_log(ql_log_warn, fcport->vha, 0x5100, + "Unexpected response payload length %u.\n", + iocb->u.nvme.rsp_pyld_len); + } + iocb->u.nvme.rsp_pyld_len = + sizeof(struct nvme_fc_ersp_iu); + } iter = iocb->u.nvme.rsp_pyld_len >> 2; for (; iter; iter--) *outbuf++ = swab32(*inbuf++); @@ -2792,6 +2800,7 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt) case ELS_IOCB_TYPE: case ABORT_IOCB_TYPE: case MBX_IOCB_TYPE: + default: sp = qla2x00_get_sp_from_handle(vha, func, req, pkt); if (sp) { sp->done(sp, res); @@ -2802,7 +2811,6 @@ qla2x00_error_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, sts_entry_t *pkt) case ABTS_RESP_24XX: case CTIO_TYPE7: case CTIO_CRC2: - default: return 1; } fatal: @@ -3369,10 +3377,8 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) ha->msix_count, ret); goto msix_out; } else if (ret < ha->msix_count) { - ql_log(ql_log_warn, vha, 0x00c6, - "MSI-X: Failed to enable support " - "with %d vectors, using %d vectors.\n", - ha->msix_count, ret); + ql_log(ql_log_info, vha, 0x00c6, + "MSI-X: Using %d vectors\n", ret); ha->msix_count = ret; /* Recalculate queue values */ if (ha->mqiobase && ql2xmqsupport) { @@ -3525,7 +3531,7 @@ qla2x00_request_irqs(struct qla_hw_data *ha, struct rsp_que *rsp) skip_msix: ql_log(ql_log_info, vha, 0x0037, - "Falling back-to MSI mode -%d.\n", ret); + "Falling back-to MSI mode -- ret=%d.\n", ret); if (!IS_QLA24XX(ha) && !IS_QLA2532(ha) && !IS_QLA8432(ha) && !IS_QLA8001(ha) && !IS_P3P_TYPE(ha) && !IS_QLAFX00(ha) && @@ -3533,13 +3539,13 @@ skip_msix: goto skip_msi; ret = pci_alloc_irq_vectors(ha->pdev, 1, 1, PCI_IRQ_MSI); - if (!ret) { + if (ret > 0) { ql_dbg(ql_dbg_init, vha, 0x0038, "MSI: Enabled.\n"); ha->flags.msi_enabled = 1; } else ql_log(ql_log_warn, vha, 0x0039, - "Falling back-to INTa mode -- %d.\n", ret); + "Falling back-to INTa mode -- ret=%d.\n", ret); skip_msi: /* Skip INTx on ISP82xx. */ diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 929ec087b8eb..5e8ae510aef8 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -624,6 +624,7 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr) mcp->mb[2] = LSW(risc_addr); mcp->mb[3] = 0; mcp->mb[4] = 0; + mcp->mb[11] = 0; ha->flags.using_lr_setting = 0; if (IS_QLA25XX(ha) || IS_QLA81XX(ha) || IS_QLA83XX(ha) || IS_QLA27XX(ha)) { @@ -667,7 +668,7 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr) if (ha->flags.exchoffld_enabled) mcp->mb[4] |= ENABLE_EXCHANGE_OFFLD; - mcp->out_mb |= MBX_4|MBX_3|MBX_2|MBX_1; + mcp->out_mb |= MBX_4 | MBX_3 | MBX_2 | MBX_1 | MBX_11; mcp->in_mb |= MBX_3 | MBX_2 | MBX_1; } else { mcp->mb[1] = LSW(risc_addr); @@ -5852,9 +5853,8 @@ qla2x00_dump_mctp_data(scsi_qla_host_t *vha, dma_addr_t req_dma, uint32_t addr, mcp->mb[7] = LSW(MSD(req_dma)); mcp->mb[8] = MSW(addr); /* Setting RAM ID to valid */ - mcp->mb[10] |= BIT_7; /* For MCTP RAM ID is 0x40 */ - mcp->mb[10] |= 0x40; + mcp->mb[10] = BIT_7 | 0x40; mcp->out_mb |= MBX_10|MBX_8|MBX_7|MBX_6|MBX_5|MBX_4|MBX_3|MBX_2|MBX_1| MBX_0; diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 6b33a1f24f56..7dceed021236 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -578,6 +578,7 @@ static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport) } static struct nvme_fc_port_template qla_nvme_fc_transport = { + .module = THIS_MODULE, .localport_delete = qla_nvme_localport_delete, .remoteport_delete = qla_nvme_remoteport_delete, .create_queue = qla_nvme_alloc_queue, diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index a77c33987703..a5b8313cf491 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c @@ -1605,8 +1605,7 @@ qla82xx_get_bootld_offset(struct qla_hw_data *ha) return (u8 *)&ha->hablob->fw->data[offset]; } -static __le32 -qla82xx_get_fw_size(struct qla_hw_data *ha) +static u32 qla82xx_get_fw_size(struct qla_hw_data *ha) { struct qla82xx_uri_data_desc *uri_desc = NULL; @@ -1617,7 +1616,7 @@ qla82xx_get_fw_size(struct qla_hw_data *ha) return cpu_to_le32(uri_desc->size); } - return cpu_to_le32(*(u32 *)&ha->hablob->fw->data[FW_SIZE_OFFSET]); + return get_unaligned_le32(&ha->hablob->fw->data[FW_SIZE_OFFSET]); } static u8 * @@ -1808,7 +1807,7 @@ qla82xx_fw_load_from_blob(struct qla_hw_data *ha) } flashaddr = FLASH_ADDR_START; - size = (__force u32)qla82xx_get_fw_size(ha) / 8; + size = qla82xx_get_fw_size(ha) / 8; ptr64 = (u64 *)qla82xx_get_fw_offs(ha); for (i = 0; i < size; i++) { diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 7d7fb5bbb600..d4024015f859 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3178,6 +3178,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) base_vha->mgmt_svr_loop_id, host->sg_tablesize); ha->wq = alloc_workqueue("qla2xxx_wq", WQ_MEM_RECLAIM, 0); + if (unlikely(!ha->wq)) { + ret = -ENOMEM; + goto probe_failed; + } if (ha->mqenable) { bool mq = false; @@ -3437,6 +3441,10 @@ qla2x00_shutdown(struct pci_dev *pdev) /* Stop currently executing firmware. */ qla2x00_try_to_stop_firmware(vha); + /* Disable timer */ + if (vha->timer_active) + qla2x00_stop_timer(vha); + /* Turn adapter off line */ vha->flags.online = 0; @@ -5797,12 +5805,27 @@ qla2x00_do_dpc(void *data) if (test_and_clear_bit (ISP_ABORT_NEEDED, &base_vha->dpc_flags) && !test_bit(UNLOADING, &base_vha->dpc_flags)) { + bool do_reset = true; - ql_dbg(ql_dbg_dpc, base_vha, 0x4007, - "ISP abort scheduled.\n"); - if (!(test_and_set_bit(ABORT_ISP_ACTIVE, + switch (ql2x_ini_mode) { + case QLA2XXX_INI_MODE_ENABLED: + break; + case QLA2XXX_INI_MODE_DISABLED: + if (!qla_tgt_mode_enabled(base_vha)) + do_reset = false; + break; + case QLA2XXX_INI_MODE_DUAL: + if (!qla_dual_mode_enabled(base_vha)) + do_reset = false; + break; + default: + break; + } + + if (do_reset && !(test_and_set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags))) { - + ql_dbg(ql_dbg_dpc, base_vha, 0x4007, + "ISP abort scheduled.\n"); if (ha->isp_ops->abort_isp(base_vha)) { /* failed. retry later */ set_bit(ISP_ABORT_NEEDED, @@ -5810,10 +5833,9 @@ qla2x00_do_dpc(void *data) } clear_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags); + ql_dbg(ql_dbg_dpc, base_vha, 0x4008, + "ISP abort end.\n"); } - - ql_dbg(ql_dbg_dpc, base_vha, 0x4008, - "ISP abort end.\n"); } if (test_and_clear_bit(FCPORT_UPDATE_NEEDED, @@ -6696,8 +6718,7 @@ qla2x00_module_init(void) /* Initialize target kmem_cache and mem_pools */ ret = qlt_init(); if (ret < 0) { - kmem_cache_destroy(srb_cachep); - return ret; + goto destroy_cache; } else if (ret > 0) { /* * If initiator mode is explictly disabled by qlt_init(), @@ -6718,11 +6739,10 @@ qla2x00_module_init(void) qla2xxx_transport_template = fc_attach_transport(&qla2xxx_transport_functions); if (!qla2xxx_transport_template) { - kmem_cache_destroy(srb_cachep); ql_log(ql_log_fatal, NULL, 0x0002, "fc_attach_transport failed...Failing load!.\n"); - qlt_exit(); - return -ENODEV; + ret = -ENODEV; + goto qlt_exit; } apidev_major = register_chrdev(0, QLA2XXX_APIDEV, &apidev_fops); @@ -6734,27 +6754,37 @@ qla2x00_module_init(void) qla2xxx_transport_vport_template = fc_attach_transport(&qla2xxx_transport_vport_functions); if (!qla2xxx_transport_vport_template) { - kmem_cache_destroy(srb_cachep); - qlt_exit(); - fc_release_transport(qla2xxx_transport_template); ql_log(ql_log_fatal, NULL, 0x0004, "fc_attach_transport vport failed...Failing load!.\n"); - return -ENODEV; + ret = -ENODEV; + goto unreg_chrdev; } ql_log(ql_log_info, NULL, 0x0005, "QLogic Fibre Channel HBA Driver: %s.\n", qla2x00_version_str); ret = pci_register_driver(&qla2xxx_pci_driver); if (ret) { - kmem_cache_destroy(srb_cachep); - qlt_exit(); - fc_release_transport(qla2xxx_transport_template); - fc_release_transport(qla2xxx_transport_vport_template); ql_log(ql_log_fatal, NULL, 0x0006, "pci_register_driver failed...ret=%d Failing load!.\n", ret); + goto release_vport_transport; } return ret; + +release_vport_transport: + fc_release_transport(qla2xxx_transport_vport_template); + +unreg_chrdev: + if (apidev_major >= 0) + unregister_chrdev(apidev_major, QLA2XXX_APIDEV); + fc_release_transport(qla2xxx_transport_template); + +qlt_exit: + qlt_exit(); + +destroy_cache: + kmem_cache_destroy(srb_cachep); + return ret; } /** diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 87e04c4a4982..21011c5fddeb 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -996,6 +996,7 @@ static void qlt_free_session_done(struct work_struct *work) if (logout_started) { bool traced = false; + u16 cnt = 0; while (!ACCESS_ONCE(sess->logout_completed)) { if (!traced) { @@ -1005,6 +1006,9 @@ static void qlt_free_session_done(struct work_struct *work) traced = true; } msleep(100); + cnt++; + if (cnt > 200) + break; } ql_dbg(ql_dbg_disc, vha, 0xf087, @@ -1206,7 +1210,6 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess, "Scheduling sess %p for deletion %8phC\n", sess, sess->port_name); - INIT_WORK(&sess->del_work, qla24xx_delete_sess_fn); queue_work(sess->vha->hw->wq, &sess->del_work); } @@ -2119,14 +2122,14 @@ void qlt_send_resp_ctio(struct qla_qpair *qpair, struct qla_tgt_cmd *cmd, ctio->u.status1.scsi_status |= cpu_to_le16(SS_RESIDUAL_UNDER); - /* Response code and sense key */ - put_unaligned_le32(((0x70 << 24) | (sense_key << 8)), - (&ctio->u.status1.sense_data)[0]); + /* Fixed format sense data. */ + ctio->u.status1.sense_data[0] = 0x70; + ctio->u.status1.sense_data[2] = sense_key; /* Additional sense length */ - put_unaligned_le32(0x0a, (&ctio->u.status1.sense_data)[1]); + ctio->u.status1.sense_data[7] = 0xa; /* ASC and ASCQ */ - put_unaligned_le32(((asc << 24) | (ascq << 16)), - (&ctio->u.status1.sense_data)[3]); + ctio->u.status1.sense_data[12] = asc; + ctio->u.status1.sense_data[13] = ascq; /* Memory Barrier */ wmb(); @@ -2176,7 +2179,7 @@ void qlt_xmit_tm_rsp(struct qla_tgt_mgmt_cmd *mcmd) mcmd->orig_iocb.imm_ntfy.u.isp24.status_subcode == ELS_TPRLO) { ql_dbg(ql_dbg_disc, vha, 0x2106, - "TM response logo %phC status %#x state %#x", + "TM response logo %8phC status %#x state %#x", mcmd->sess->port_name, mcmd->fc_tm_rsp, mcmd->flags); qlt_schedule_sess_for_deletion_lock(mcmd->sess); @@ -5914,7 +5917,6 @@ static void qlt_abort_work(struct qla_tgt *tgt, struct qla_hw_data *ha = vha->hw; struct fc_port *sess = NULL; unsigned long flags = 0, flags2 = 0; - uint32_t be_s_id; uint8_t s_id[3]; int rc; @@ -5927,8 +5929,7 @@ static void qlt_abort_work(struct qla_tgt *tgt, s_id[1] = prm->abts.fcp_hdr_le.s_id[1]; s_id[2] = prm->abts.fcp_hdr_le.s_id[0]; - sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, - (unsigned char *)&be_s_id); + sess = ha->tgt.tgt_ops->find_sess_by_s_id(vha, s_id); if (!sess) { spin_unlock_irqrestore(&ha->tgt.sess_lock, flags2); @@ -6392,7 +6393,8 @@ qlt_enable_vha(struct scsi_qla_host *vha) } else { set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags); qla2xxx_wake_dpc(base_vha); - qla2x00_wait_for_hba_online(base_vha); + WARN_ON_ONCE(qla2x00_wait_for_hba_online(base_vha) != + QLA_SUCCESS); } } EXPORT_SYMBOL(qlt_enable_vha); @@ -6422,7 +6424,9 @@ static void qlt_disable_vha(struct scsi_qla_host *vha) set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); qla2xxx_wake_dpc(vha); - qla2x00_wait_for_hba_online(vha); + if (qla2x00_wait_for_hba_online(vha) != QLA_SUCCESS) + ql_dbg(ql_dbg_tgt, vha, 0xe081, + "qla2x00_wait_for_hba_online() failed\n"); } /* diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 2fcdaadd10fa..e08ac431bc49 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -903,38 +903,14 @@ static ssize_t tcm_qla2xxx_tpg_enable_show(struct config_item *item, atomic_read(&tpg->lport_tpg_enabled)); } -static void tcm_qla2xxx_depend_tpg(struct work_struct *work) -{ - struct tcm_qla2xxx_tpg *base_tpg = container_of(work, - struct tcm_qla2xxx_tpg, tpg_base_work); - struct se_portal_group *se_tpg = &base_tpg->se_tpg; - struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha; - - if (!target_depend_item(&se_tpg->tpg_group.cg_item)) { - atomic_set(&base_tpg->lport_tpg_enabled, 1); - qlt_enable_vha(base_vha); - } - complete(&base_tpg->tpg_base_comp); -} - -static void tcm_qla2xxx_undepend_tpg(struct work_struct *work) -{ - struct tcm_qla2xxx_tpg *base_tpg = container_of(work, - struct tcm_qla2xxx_tpg, tpg_base_work); - struct se_portal_group *se_tpg = &base_tpg->se_tpg; - struct scsi_qla_host *base_vha = base_tpg->lport->qla_vha; - - if (!qlt_stop_phase1(base_vha->vha_tgt.qla_tgt)) { - atomic_set(&base_tpg->lport_tpg_enabled, 0); - target_undepend_item(&se_tpg->tpg_group.cg_item); - } - complete(&base_tpg->tpg_base_comp); -} - static ssize_t tcm_qla2xxx_tpg_enable_store(struct config_item *item, const char *page, size_t count) { struct se_portal_group *se_tpg = to_tpg(item); + struct se_wwn *se_wwn = se_tpg->se_tpg_wwn; + struct tcm_qla2xxx_lport *lport = container_of(se_wwn, + struct tcm_qla2xxx_lport, lport_wwn); + struct scsi_qla_host *vha = lport->qla_vha; struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg, struct tcm_qla2xxx_tpg, se_tpg); unsigned long op; @@ -953,24 +929,16 @@ static ssize_t tcm_qla2xxx_tpg_enable_store(struct config_item *item, if (atomic_read(&tpg->lport_tpg_enabled)) return -EEXIST; - INIT_WORK(&tpg->tpg_base_work, tcm_qla2xxx_depend_tpg); + atomic_set(&tpg->lport_tpg_enabled, 1); + qlt_enable_vha(vha); } else { if (!atomic_read(&tpg->lport_tpg_enabled)) return count; - INIT_WORK(&tpg->tpg_base_work, tcm_qla2xxx_undepend_tpg); + atomic_set(&tpg->lport_tpg_enabled, 0); + qlt_stop_phase1(vha->vha_tgt.qla_tgt); } - init_completion(&tpg->tpg_base_comp); - schedule_work(&tpg->tpg_base_work); - wait_for_completion(&tpg->tpg_base_comp); - if (op) { - if (!atomic_read(&tpg->lport_tpg_enabled)) - return -ENODEV; - } else { - if (atomic_read(&tpg->lport_tpg_enabled)) - return -EPERM; - } return count; } diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.h b/drivers/scsi/qla2xxx/tcm_qla2xxx.h index 7550ba2831c3..147cf6c90366 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.h +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.h @@ -48,9 +48,6 @@ struct tcm_qla2xxx_tpg { struct tcm_qla2xxx_tpg_attrib tpg_attrib; /* Returned by tcm_qla2xxx_make_tpg() */ struct se_portal_group se_tpg; - /* Items for dealing with configfs_depend_item */ - struct completion tpg_base_comp; - struct work_struct tpg_base_work; }; struct tcm_qla2xxx_fc_loopid { diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c index 1da04f323d38..c402fc583da3 100644 --- a/drivers/scsi/qla4xxx/ql4_mbx.c +++ b/drivers/scsi/qla4xxx/ql4_mbx.c @@ -641,9 +641,6 @@ int qla4xxx_initialize_fw_cb(struct scsi_qla_host * ha) if (qla4xxx_get_ifcb(ha, &mbox_cmd[0], &mbox_sts[0], init_fw_cb_dma) != QLA_SUCCESS) { - dma_free_coherent(&ha->pdev->dev, - sizeof(struct addr_ctrl_blk), - init_fw_cb, init_fw_cb_dma); goto exit_init_fw_cb; } diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 4421f9bdfcf7..fb3abaf817a3 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -4150,7 +4150,7 @@ static void qla4xxx_mem_free(struct scsi_qla_host *ha) dma_free_coherent(&ha->pdev->dev, ha->queues_len, ha->queues, ha->queues_dma); - if (ha->fw_dump) + if (ha->fw_dump) vfree(ha->fw_dump); ha->queues_len = 0; @@ -4285,7 +4285,6 @@ static int qla4xxx_mem_alloc(struct scsi_qla_host *ha) return QLA_SUCCESS; mem_alloc_error_exit: - qla4xxx_mem_free(ha); return QLA_ERROR; } diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 92bc5b2d24ae..ac936b5ca74e 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -4960,6 +4960,11 @@ static int __init scsi_debug_init(void) return -EINVAL; } + if (sdebug_num_tgts < 0) { + pr_err("num_tgts must be >= 0\n"); + return -EINVAL; + } + if (sdebug_guard > 1) { pr_err("guard must be 0 or 1\n"); return -EINVAL; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index cf70f0bb8375..bdec5f429440 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -935,6 +935,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, ses->sdb = scmd->sdb; ses->next_rq = scmd->request->next_rq; ses->result = scmd->result; + ses->resid_len = scmd->req.resid_len; ses->underflow = scmd->underflow; ses->prot_op = scmd->prot_op; ses->eh_eflags = scmd->eh_eflags; @@ -946,6 +947,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, memset(&scmd->sdb, 0, sizeof(scmd->sdb)); scmd->request->next_rq = NULL; scmd->result = 0; + scmd->req.resid_len = 0; if (sense_bytes) { scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE, @@ -999,6 +1001,7 @@ void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses) scmd->sdb = ses->sdb; scmd->request->next_rq = ses->next_rq; scmd->result = ses->result; + scmd->req.resid_len = ses->resid_len; scmd->underflow = ses->underflow; scmd->prot_op = ses->prot_op; scmd->eh_eflags = ses->eh_eflags; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 4e826c0e36b2..365a95d537d6 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2305,7 +2305,8 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) { unsigned int cmd_size, sgl_size; - sgl_size = scsi_mq_sgl_size(shost); + sgl_size = max_t(unsigned int, sizeof(struct scatterlist), + scsi_mq_sgl_size(shost)); cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size; if (scsi_host_get_prot(shost)) cmd_size += sizeof(struct scsi_data_buffer) + sgl_size; diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index e03252dda249..6182024e25d2 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -722,6 +722,14 @@ sdev_store_delete(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kernfs_node *kn; + struct scsi_device *sdev = to_scsi_device(dev); + + /* + * We need to try to get module, avoiding the module been removed + * during delete. + */ + if (scsi_device_get(sdev)) + return -ENODEV; kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); WARN_ON_ONCE(!kn); @@ -736,9 +744,10 @@ sdev_store_delete(struct device *dev, struct device_attribute *attr, * state into SDEV_DEL. */ device_remove_file(dev, attr); - scsi_remove_device(to_scsi_device(dev)); + scsi_remove_device(sdev); if (kn) sysfs_unbreak_active_protection(kn); + scsi_device_put(sdev); return count; }; static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete); diff --git a/drivers/scsi/scsi_trace.c b/drivers/scsi/scsi_trace.c index 0ff083bbf5b1..22472d140ef7 100644 --- a/drivers/scsi/scsi_trace.c +++ b/drivers/scsi/scsi_trace.c @@ -21,7 +21,7 @@ #include #define SERVICE_ACTION16(cdb) (cdb[1] & 0x1f) -#define SERVICE_ACTION32(cdb) ((cdb[8] << 8) | cdb[9]) +#define SERVICE_ACTION32(cdb) (get_unaligned_be16(&cdb[8])) static const char * scsi_trace_misc(struct trace_seq *, unsigned char *, int); @@ -30,15 +30,18 @@ static const char * scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p); - sector_t lba = 0, txlen = 0; + u32 lba = 0, txlen; lba |= ((cdb[1] & 0x1F) << 16); lba |= (cdb[2] << 8); lba |= cdb[3]; - txlen = cdb[4]; + /* + * From SBC-2: a TRANSFER LENGTH field set to zero specifies that 256 + * logical blocks shall be read (READ(6)) or written (WRITE(6)). + */ + txlen = cdb[4] ? cdb[4] : 256; - trace_seq_printf(p, "lba=%llu txlen=%llu", - (unsigned long long)lba, (unsigned long long)txlen); + trace_seq_printf(p, "lba=%u txlen=%u", lba, txlen); trace_seq_putc(p, 0); return ret; @@ -48,17 +51,12 @@ static const char * scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p); - sector_t lba = 0, txlen = 0; + u32 lba, txlen; - lba |= (cdb[2] << 24); - lba |= (cdb[3] << 16); - lba |= (cdb[4] << 8); - lba |= cdb[5]; - txlen |= (cdb[7] << 8); - txlen |= cdb[8]; + lba = get_unaligned_be32(&cdb[2]); + txlen = get_unaligned_be16(&cdb[7]); - trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", - (unsigned long long)lba, (unsigned long long)txlen, + trace_seq_printf(p, "lba=%u txlen=%u protect=%u", lba, txlen, cdb[1] >> 5); if (cdb[0] == WRITE_SAME) @@ -73,19 +71,12 @@ static const char * scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p); - sector_t lba = 0, txlen = 0; + u32 lba, txlen; - lba |= (cdb[2] << 24); - lba |= (cdb[3] << 16); - lba |= (cdb[4] << 8); - lba |= cdb[5]; - txlen |= (cdb[6] << 24); - txlen |= (cdb[7] << 16); - txlen |= (cdb[8] << 8); - txlen |= cdb[9]; + lba = get_unaligned_be32(&cdb[2]); + txlen = get_unaligned_be32(&cdb[6]); - trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", - (unsigned long long)lba, (unsigned long long)txlen, + trace_seq_printf(p, "lba=%u txlen=%u protect=%u", lba, txlen, cdb[1] >> 5); trace_seq_putc(p, 0); @@ -96,23 +87,13 @@ static const char * scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p); - sector_t lba = 0, txlen = 0; + u64 lba; + u32 txlen; - lba |= ((u64)cdb[2] << 56); - lba |= ((u64)cdb[3] << 48); - lba |= ((u64)cdb[4] << 40); - lba |= ((u64)cdb[5] << 32); - lba |= (cdb[6] << 24); - lba |= (cdb[7] << 16); - lba |= (cdb[8] << 8); - lba |= cdb[9]; - txlen |= (cdb[10] << 24); - txlen |= (cdb[11] << 16); - txlen |= (cdb[12] << 8); - txlen |= cdb[13]; + lba = get_unaligned_be64(&cdb[2]); + txlen = get_unaligned_be32(&cdb[10]); - trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u", - (unsigned long long)lba, (unsigned long long)txlen, + trace_seq_printf(p, "lba=%llu txlen=%u protect=%u", lba, txlen, cdb[1] >> 5); if (cdb[0] == WRITE_SAME_16) @@ -127,8 +108,8 @@ static const char * scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p), *cmd; - sector_t lba = 0, txlen = 0; - u32 ei_lbrt = 0; + u64 lba; + u32 ei_lbrt, txlen; switch (SERVICE_ACTION32(cdb)) { case READ_32: @@ -148,26 +129,12 @@ scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len) goto out; } - lba |= ((u64)cdb[12] << 56); - lba |= ((u64)cdb[13] << 48); - lba |= ((u64)cdb[14] << 40); - lba |= ((u64)cdb[15] << 32); - lba |= (cdb[16] << 24); - lba |= (cdb[17] << 16); - lba |= (cdb[18] << 8); - lba |= cdb[19]; - ei_lbrt |= (cdb[20] << 24); - ei_lbrt |= (cdb[21] << 16); - ei_lbrt |= (cdb[22] << 8); - ei_lbrt |= cdb[23]; - txlen |= (cdb[28] << 24); - txlen |= (cdb[29] << 16); - txlen |= (cdb[30] << 8); - txlen |= cdb[31]; + lba = get_unaligned_be64(&cdb[12]); + ei_lbrt = get_unaligned_be32(&cdb[20]); + txlen = get_unaligned_be32(&cdb[28]); - trace_seq_printf(p, "%s_32 lba=%llu txlen=%llu protect=%u ei_lbrt=%u", - cmd, (unsigned long long)lba, - (unsigned long long)txlen, cdb[10] >> 5, ei_lbrt); + trace_seq_printf(p, "%s_32 lba=%llu txlen=%u protect=%u ei_lbrt=%u", + cmd, lba, txlen, cdb[10] >> 5, ei_lbrt); if (SERVICE_ACTION32(cdb) == WRITE_SAME_32) trace_seq_printf(p, " unmap=%u", cdb[10] >> 3 & 1); @@ -182,7 +149,7 @@ static const char * scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p); - unsigned int regions = cdb[7] << 8 | cdb[8]; + unsigned int regions = get_unaligned_be16(&cdb[7]); trace_seq_printf(p, "regions=%u", (regions - 8) / 16); trace_seq_putc(p, 0); @@ -194,8 +161,8 @@ static const char * scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len) { const char *ret = trace_seq_buffer_ptr(p), *cmd; - sector_t lba = 0; - u32 alloc_len = 0; + u64 lba; + u32 alloc_len; switch (SERVICE_ACTION16(cdb)) { case SAI_READ_CAPACITY_16: @@ -209,21 +176,10 @@ scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len) goto out; } - lba |= ((u64)cdb[2] << 56); - lba |= ((u64)cdb[3] << 48); - lba |= ((u64)cdb[4] << 40); - lba |= ((u64)cdb[5] << 32); - lba |= (cdb[6] << 24); - lba |= (cdb[7] << 16); - lba |= (cdb[8] << 8); - lba |= cdb[9]; - alloc_len |= (cdb[10] << 24); - alloc_len |= (cdb[11] << 16); - alloc_len |= (cdb[12] << 8); - alloc_len |= cdb[13]; + lba = get_unaligned_be64(&cdb[2]); + alloc_len = get_unaligned_be32(&cdb[10]); - trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd, - (unsigned long long)lba, alloc_len); + trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd, lba, alloc_len); out: trace_seq_putc(p, 0); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index b4d06bd9ed51..aecb563a2b4e 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -37,6 +37,8 @@ #define ISCSI_TRANSPORT_VERSION "2.0-870" +#define ISCSI_SEND_MAX_ALLOWED 10 + static int dbg_session; module_param_named(debug_session, dbg_session, int, S_IRUGO | S_IWUSR); @@ -2943,6 +2945,24 @@ iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev) return err; } +static int iscsi_session_has_conns(int sid) +{ + struct iscsi_cls_conn *conn; + unsigned long flags; + int found = 0; + + spin_lock_irqsave(&connlock, flags); + list_for_each_entry(conn, &connlist, conn_list) { + if (iscsi_conn_get_sid(conn) == sid) { + found = 1; + break; + } + } + spin_unlock_irqrestore(&connlock, flags); + + return found; +} + static int iscsi_set_iface_params(struct iscsi_transport *transport, struct iscsi_uevent *ev, uint32_t len) @@ -3520,10 +3540,12 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) break; case ISCSI_UEVENT_DESTROY_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); - if (session) - transport->destroy_session(session); - else + if (!session) err = -EINVAL; + else if (iscsi_session_has_conns(ev->u.d_session.sid)) + err = -EBUSY; + else + transport->destroy_session(session); break; case ISCSI_UEVENT_UNBIND_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); @@ -3680,6 +3702,7 @@ iscsi_if_rx(struct sk_buff *skb) struct nlmsghdr *nlh; struct iscsi_uevent *ev; uint32_t group; + int retries = ISCSI_SEND_MAX_ALLOWED; nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < sizeof(*nlh) + sizeof(*ev) || @@ -3710,6 +3733,10 @@ iscsi_if_rx(struct sk_buff *skb) break; err = iscsi_if_send_reply(portid, nlh->nlmsg_type, ev, sizeof(*ev)); + if (err == -EAGAIN && --retries < 0) { + printk(KERN_WARNING "Send reply failed, error %d\n", err); + break; + } } while (err < 0 && err != -ECONNREFUSED && err != -ESRCH); skb_pull(skb, rlen); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index bc1774938468..04d4d3c34a2f 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1679,7 +1679,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) /* we need to evaluate the error return */ if (scsi_sense_valid(sshdr) && (sshdr->asc == 0x3a || /* medium not present */ - sshdr->asc == 0x20)) /* invalid command */ + sshdr->asc == 0x20 || /* invalid command */ + (sshdr->asc == 0x74 && sshdr->ascq == 0x71))) /* drive is password locked */ /* this is no error here */ return 0; @@ -1717,20 +1718,30 @@ static void sd_rescan(struct device *dev) static int sd_compat_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { - struct scsi_device *sdev = scsi_disk(bdev->bd_disk)->device; + struct gendisk *disk = bdev->bd_disk; + struct scsi_disk *sdkp = scsi_disk(disk); + struct scsi_device *sdev = sdkp->device; + void __user *p = compat_ptr(arg); int error; + error = scsi_verify_blk_ioctl(bdev, cmd); + if (error < 0) + return error; + error = scsi_ioctl_block_when_processing_errors(sdev, cmd, (mode & FMODE_NDELAY) != 0); if (error) return error; + + if (is_sed_ioctl(cmd)) + return sed_ioctl(sdkp->opal_dev, cmd, p); /* * Let the static ioctl translation table take care of it. */ if (!sdev->host->hostt->compat_ioctl) return -ENOIOCTLCMD; - return sdev->host->hostt->compat_ioctl(sdev, cmd, (void __user *)arg); + return sdev->host->hostt->compat_ioctl(sdev, cmd, p); } #endif @@ -1991,9 +2002,13 @@ static int sd_done(struct scsi_cmnd *SCpnt) } break; case REQ_OP_ZONE_REPORT: + /* To avoid that the block layer performs an incorrect + * bio_advance() call and restart of the remainder of + * incomplete report zone BIOs, always indicate a full + * completion of REQ_OP_ZONE_REPORT. + */ if (!result) { - good_bytes = scsi_bufflen(SCpnt) - - scsi_get_resid(SCpnt); + good_bytes = scsi_bufflen(SCpnt); scsi_set_resid(SCpnt, 0); } else { good_bytes = 0; @@ -2226,8 +2241,10 @@ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer u8 type; int ret = 0; - if (scsi_device_protection(sdp) == 0 || (buffer[12] & 1) == 0) + if (scsi_device_protection(sdp) == 0 || (buffer[12] & 1) == 0) { + sdkp->protection_type = 0; return ret; + } type = ((buffer[12] >> 1) & 7) + 1; /* P_TYPE 0 = Type 1 */ diff --git a/drivers/scsi/sni_53c710.c b/drivers/scsi/sni_53c710.c index 1f9a087daf69..3102a75984d3 100644 --- a/drivers/scsi/sni_53c710.c +++ b/drivers/scsi/sni_53c710.c @@ -78,10 +78,8 @@ static int snirm710_probe(struct platform_device *dev) base = res->start; hostdata = kzalloc(sizeof(*hostdata), GFP_KERNEL); - if (!hostdata) { - dev_printk(KERN_ERR, dev, "Failed to allocate host data\n"); + if (!hostdata) return -ENOMEM; - } hostdata->dev = &dev->dev; dma_set_mask(&dev->dev, DMA_BIT_MASK(32)); diff --git a/drivers/scsi/sun3_scsi.c b/drivers/scsi/sun3_scsi.c index 9492638296c8..af8a7ef9c858 100644 --- a/drivers/scsi/sun3_scsi.c +++ b/drivers/scsi/sun3_scsi.c @@ -498,7 +498,7 @@ static struct scsi_host_template sun3_scsi_template = { .eh_host_reset_handler = sun3scsi_host_reset, .can_queue = 16, .this_id = 7, - .sg_tablesize = SG_NONE, + .sg_tablesize = 1, .cmd_per_lun = 2, .use_clustering = DISABLE_CLUSTERING, .cmd_size = NCR5380_CMD_SIZE, @@ -520,7 +520,7 @@ static int __init sun3_scsi_probe(struct platform_device *pdev) sun3_scsi_template.can_queue = setup_can_queue; if (setup_cmd_per_lun > 0) sun3_scsi_template.cmd_per_lun = setup_cmd_per_lun; - if (setup_sg_tablesize >= 0) + if (setup_sg_tablesize > 0) sun3_scsi_template.sg_tablesize = setup_sg_tablesize; if (setup_hostid >= 0) sun3_scsi_template.this_id = setup_hostid & 7; diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c index 378af306fda1..b87b6c63431d 100644 --- a/drivers/scsi/sym53c8xx_2/sym_hipd.c +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c @@ -4371,6 +4371,13 @@ static void sym_nego_rejected(struct sym_hcb *np, struct sym_tcb *tp, struct sym OUTB(np, HS_PRT, HS_BUSY); } +#define sym_printk(lvl, tp, cp, fmt, v...) do { \ + if (cp) \ + scmd_printk(lvl, cp->cmd, fmt, ##v); \ + else \ + starget_printk(lvl, tp->starget, fmt, ##v); \ +} while (0) + /* * chip exception handler for programmed interrupts. */ @@ -4416,7 +4423,7 @@ static void sym_int_sir(struct sym_hcb *np) * been selected with ATN. We do not want to handle that. */ case SIR_SEL_ATN_NO_MSG_OUT: - scmd_printk(KERN_WARNING, cp->cmd, + sym_printk(KERN_WARNING, tp, cp, "No MSG OUT phase after selection with ATN\n"); goto out_stuck; /* @@ -4424,7 +4431,7 @@ static void sym_int_sir(struct sym_hcb *np) * having reselected the initiator. */ case SIR_RESEL_NO_MSG_IN: - scmd_printk(KERN_WARNING, cp->cmd, + sym_printk(KERN_WARNING, tp, cp, "No MSG IN phase after reselection\n"); goto out_stuck; /* @@ -4432,7 +4439,7 @@ static void sym_int_sir(struct sym_hcb *np) * an IDENTIFY. */ case SIR_RESEL_NO_IDENTIFY: - scmd_printk(KERN_WARNING, cp->cmd, + sym_printk(KERN_WARNING, tp, cp, "No IDENTIFY after reselection\n"); goto out_stuck; /* @@ -4461,7 +4468,7 @@ static void sym_int_sir(struct sym_hcb *np) case SIR_RESEL_ABORTED: np->lastmsg = np->msgout[0]; np->msgout[0] = M_NOOP; - scmd_printk(KERN_WARNING, cp->cmd, + sym_printk(KERN_WARNING, tp, cp, "message %x sent on bad reselection\n", np->lastmsg); goto out; /* diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig index 76da34ecd329..30c4b9a56fea 100644 --- a/drivers/scsi/ufs/Kconfig +++ b/drivers/scsi/ufs/Kconfig @@ -156,4 +156,13 @@ config SCSI_SKHPB bool "Activate HPB Host-aware Performance Booster" depends on SCSI_UFSHCD help - Activate or deactive SKHPB driver \ No newline at end of file + Activate or deactive SKHPB driver + +config SCSI_UFS_CRYPTO + bool "UFS Crypto Engine Support" + depends on SCSI_UFSHCD && BLK_INLINE_ENCRYPTION + help + Enable Crypto Engine Support in UFS. + Enabling this makes it possible for the kernel to use the crypto + capabilities of the UFS device (if present) to perform crypto + operations on data being transferred to/from the device. diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile index a280fdcf75aa..7d1dcdb21bd7 100644 --- a/drivers/scsi/ufs/Makefile +++ b/drivers/scsi/ufs/Makefile @@ -20,7 +20,9 @@ ccflags-y += -I$(srctree)/drivers/scsi/ufs/mediatek/$(MTK_PLATFORM) endif obj-$(CONFIG_SCSI_UFS_QCOM) += ufs-qcom.o -obj-$(CONFIG_SCSI_UFSHCD) += ufshcd.o +obj-$(CONFIG_SCSI_UFSHCD) += ufshcd-core.o +ufshcd-core-y := ufshcd.o +ufshcd-core-$(CONFIG_SCSI_UFS_CRYPTO) += ufshcd-crypto.o obj-$(CONFIG_UFSFEATURE) += ufsfeature.o obj-$(CONFIG_UFSHPB) += ufshpb.o obj-$(CONFIG_UFSTW) += ufstw.o @@ -30,4 +32,4 @@ obj-$(CONFIG_SCSI_UFSHCD_PLATFORM) += ufshcd-pltfrm.o obj-$(CONFIG_MTK_UFS_SUPPORT) += ufs-mtk.o obj-$(CONFIG_MTK_UFS_SUPPORT) += mediatek/$(MTK_PLATFORM)/ufs-mtk-platform.o obj-$(CONFIG_MTK_UFS_BLOCK_IO_LOG) += ufs-mtk-block.o -obj-$(CONFIG_MTK_UFS_SUPPORT) += ufs-mtk-dbg.o +obj-$(CONFIG_MTK_UFS_SUPPORT) += ufs-mtk-dbg.o \ No newline at end of file diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index c87d770b519a..55463471651c 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -1103,6 +1103,13 @@ static void ufs_qcom_advertise_quirks(struct ufs_hba *hba) | UFSHCD_QUIRK_DME_PEER_ACCESS_AUTO_MODE | UFSHCD_QUIRK_BROKEN_PA_RXHSUNTERMCAP); } + + /* + * Inline crypto is currently broken with ufs-qcom at least because the + * device tree doesn't include the crypto registers. There are likely + * to be other issues that will need to be addressed too. + */ + hba->quirks |= UFSHCD_QUIRK_BROKEN_CRYPTO; } static void ufs_qcom_set_caps(struct ufs_hba *hba) diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c new file mode 100644 index 000000000000..e3de448c9bbe --- /dev/null +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ + +#include +#include "ufshcd.h" +#include "ufshcd-crypto.h" + +static bool ufshcd_cap_idx_valid(struct ufs_hba *hba, unsigned int cap_idx) +{ + return cap_idx < hba->crypto_capabilities.num_crypto_cap; +} + +static u8 get_data_unit_size_mask(unsigned int data_unit_size) +{ + if (data_unit_size < 512 || data_unit_size > 65536 || + !is_power_of_2(data_unit_size)) + return 0; + + return data_unit_size / 512; +} + +static size_t get_keysize_bytes(enum ufs_crypto_key_size size) +{ + switch (size) { + case UFS_CRYPTO_KEY_SIZE_128: + return 16; + case UFS_CRYPTO_KEY_SIZE_192: + return 24; + case UFS_CRYPTO_KEY_SIZE_256: + return 32; + case UFS_CRYPTO_KEY_SIZE_512: + return 64; + default: + return 0; + } +} + +int ufshcd_crypto_cap_find(struct ufs_hba *hba, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size) +{ + enum ufs_crypto_alg ufs_alg; + u8 data_unit_mask; + int cap_idx; + enum ufs_crypto_key_size ufs_key_size; + union ufs_crypto_cap_entry *ccap_array = hba->crypto_cap_array; + + if (!ufshcd_hba_is_crypto_supported(hba)) + return -EINVAL; + + switch (crypto_mode) { + case BLK_ENCRYPTION_MODE_AES_256_XTS: + ufs_alg = UFS_CRYPTO_ALG_AES_XTS; + ufs_key_size = UFS_CRYPTO_KEY_SIZE_256; + break; + default: + return -EINVAL; + } + + data_unit_mask = get_data_unit_size_mask(data_unit_size); + + for (cap_idx = 0; cap_idx < hba->crypto_capabilities.num_crypto_cap; + cap_idx++) { + if (ccap_array[cap_idx].algorithm_id == ufs_alg && + (ccap_array[cap_idx].sdus_mask & data_unit_mask) && + ccap_array[cap_idx].key_size == ufs_key_size) + return cap_idx; + } + + return -EINVAL; +} +EXPORT_SYMBOL(ufshcd_crypto_cap_find); + +/** + * ufshcd_crypto_cfg_entry_write_key - Write a key into a crypto_cfg_entry + * + * Writes the key with the appropriate format - for AES_XTS, + * the first half of the key is copied as is, the second half is + * copied with an offset halfway into the cfg->crypto_key array. + * For the other supported crypto algs, the key is just copied. + * + * @cfg: The crypto config to write to + * @key: The key to write + * @cap: The crypto capability (which specifies the crypto alg and key size) + * + * Returns 0 on success, or -EINVAL + */ +static int ufshcd_crypto_cfg_entry_write_key(union ufs_crypto_cfg_entry *cfg, + const u8 *key, + union ufs_crypto_cap_entry cap) +{ + size_t key_size_bytes = get_keysize_bytes(cap.key_size); + + if (key_size_bytes == 0) + return -EINVAL; + + switch (cap.algorithm_id) { + case UFS_CRYPTO_ALG_AES_XTS: + key_size_bytes *= 2; + if (key_size_bytes > UFS_CRYPTO_KEY_MAX_SIZE) + return -EINVAL; + + memcpy(cfg->crypto_key, key, key_size_bytes/2); + memcpy(cfg->crypto_key + UFS_CRYPTO_KEY_MAX_SIZE/2, + key + key_size_bytes/2, key_size_bytes/2); + return 0; + case UFS_CRYPTO_ALG_BITLOCKER_AES_CBC: + /* fall through */ + case UFS_CRYPTO_ALG_AES_ECB: + /* fall through */ + case UFS_CRYPTO_ALG_ESSIV_AES_CBC: + memcpy(cfg->crypto_key, key, key_size_bytes); + return 0; + } + + return -EINVAL; +} + +static int ufshcd_program_key(struct ufs_hba *hba, + const union ufs_crypto_cfg_entry *cfg, int slot) +{ + int i; + u32 slot_offset = hba->crypto_cfg_register + slot * sizeof(*cfg); + int err; + + ufshcd_hold(hba, false); + + if (hba->vops->program_key) { + err = hba->vops->program_key(hba, cfg, slot); + goto out; + } + + /* Clear the dword 16 */ + ufshcd_writel(hba, 0, slot_offset + 16 * sizeof(cfg->reg_val[0])); + /* Ensure that CFGE is cleared before programming the key */ + wmb(); + for (i = 0; i < 16; i++) { + ufshcd_writel(hba, le32_to_cpu(cfg->reg_val[i]), + slot_offset + i * sizeof(cfg->reg_val[0])); + /* Spec says each dword in key must be written sequentially */ + wmb(); + } + /* Write dword 17 */ + ufshcd_writel(hba, le32_to_cpu(cfg->reg_val[17]), + slot_offset + 17 * sizeof(cfg->reg_val[0])); + /* Dword 16 must be written last */ + wmb(); + /* Write dword 16 */ + ufshcd_writel(hba, le32_to_cpu(cfg->reg_val[16]), + slot_offset + 16 * sizeof(cfg->reg_val[0])); + wmb(); + err = 0; +out: + ufshcd_release(hba); + return err; +} + +static void ufshcd_clear_keyslot(struct ufs_hba *hba, int slot) +{ + union ufs_crypto_cfg_entry cfg = { 0 }; + int err; + + err = ufshcd_program_key(hba, &cfg, slot); + WARN_ON_ONCE(err); +} + +/* Clear all keyslots at driver init time */ +static void ufshcd_clear_all_keyslots(struct ufs_hba *hba) +{ + int slot; + + for (slot = 0; slot < ufshcd_num_keyslots(hba); slot++) + ufshcd_clear_keyslot(hba, slot); +} + +static int ufshcd_crypto_keyslot_program(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, + unsigned int slot) +{ + struct ufs_hba *hba = keyslot_manager_private(ksm); + int err = 0; + u8 data_unit_mask; + union ufs_crypto_cfg_entry cfg; + int cap_idx; + + cap_idx = ufshcd_crypto_cap_find(hba, key->crypto_mode, + key->data_unit_size); + + if (!ufshcd_is_crypto_enabled(hba) || + !ufshcd_keyslot_valid(hba, slot) || + !ufshcd_cap_idx_valid(hba, cap_idx)) + return -EINVAL; + + data_unit_mask = get_data_unit_size_mask(key->data_unit_size); + + if (!(data_unit_mask & hba->crypto_cap_array[cap_idx].sdus_mask)) + return -EINVAL; + + memset(&cfg, 0, sizeof(cfg)); + cfg.data_unit_size = data_unit_mask; + cfg.crypto_cap_idx = cap_idx; + cfg.config_enable |= UFS_CRYPTO_CONFIGURATION_ENABLE; + + err = ufshcd_crypto_cfg_entry_write_key(&cfg, key->raw, + hba->crypto_cap_array[cap_idx]); + if (err) + return err; + + err = ufshcd_program_key(hba, &cfg, slot); + + memzero_explicit(&cfg, sizeof(cfg)); + + return err; +} + +static int ufshcd_crypto_keyslot_evict(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, + unsigned int slot) +{ + struct ufs_hba *hba = keyslot_manager_private(ksm); + + if (!ufshcd_is_crypto_enabled(hba) || + !ufshcd_keyslot_valid(hba, slot)) + return -EINVAL; + + /* + * Clear the crypto cfg on the device. Clearing CFGE + * might not be sufficient, so just clear the entire cfg. + */ + ufshcd_clear_keyslot(hba, slot); + + return 0; +} + +/* Functions implementing UFSHCI v2.1 specification behaviour */ +void ufshcd_crypto_enable_spec(struct ufs_hba *hba) +{ + if (!ufshcd_hba_is_crypto_supported(hba)) + return; + + hba->caps |= UFSHCD_CAP_CRYPTO; + + /* Reset might clear all keys, so reprogram all the keys. */ + keyslot_manager_reprogram_all_keys(hba->ksm); +} +EXPORT_SYMBOL_GPL(ufshcd_crypto_enable_spec); + +void ufshcd_crypto_disable_spec(struct ufs_hba *hba) +{ + hba->caps &= ~UFSHCD_CAP_CRYPTO; +} +EXPORT_SYMBOL_GPL(ufshcd_crypto_disable_spec); + +static const struct keyslot_mgmt_ll_ops ufshcd_ksm_ops = { + .keyslot_program = ufshcd_crypto_keyslot_program, + .keyslot_evict = ufshcd_crypto_keyslot_evict, +}; + +enum blk_crypto_mode_num ufshcd_blk_crypto_mode_num_for_alg_dusize( + enum ufs_crypto_alg ufs_crypto_alg, + enum ufs_crypto_key_size key_size) +{ + /* + * This is currently the only mode that UFS and blk-crypto both support. + */ + if (ufs_crypto_alg == UFS_CRYPTO_ALG_AES_XTS && + key_size == UFS_CRYPTO_KEY_SIZE_256) + return BLK_ENCRYPTION_MODE_AES_256_XTS; + + return BLK_ENCRYPTION_MODE_INVALID; +} + +/** + * ufshcd_hba_init_crypto - Read crypto capabilities, init crypto fields in hba + * @hba: Per adapter instance + * + * Return: 0 if crypto was initialized or is not supported, else a -errno value. + */ +int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, + const struct keyslot_mgmt_ll_ops *ksm_ops) +{ + int cap_idx = 0; + int err = 0; + unsigned int crypto_modes_supported[BLK_ENCRYPTION_MODE_MAX]; + enum blk_crypto_mode_num blk_mode_num; + + /* Default to disabling crypto */ + hba->caps &= ~UFSHCD_CAP_CRYPTO; + + /* Return 0 if crypto support isn't present */ + if (!(hba->capabilities & MASK_CRYPTO_SUPPORT) || + (hba->quirks & UFSHCD_QUIRK_BROKEN_CRYPTO)) + goto out; + + /* + * Crypto Capabilities should never be 0, because the + * config_array_ptr > 04h. So we use a 0 value to indicate that + * crypto init failed, and can't be enabled. + */ + hba->crypto_capabilities.reg_val = + cpu_to_le32(ufshcd_readl(hba, REG_UFS_CCAP)); + hba->crypto_cfg_register = + (u32)hba->crypto_capabilities.config_array_ptr * 0x100; + hba->crypto_cap_array = + devm_kcalloc(hba->dev, + hba->crypto_capabilities.num_crypto_cap, + sizeof(hba->crypto_cap_array[0]), + GFP_KERNEL); + if (!hba->crypto_cap_array) { + err = -ENOMEM; + goto out; + } + + memset(crypto_modes_supported, 0, sizeof(crypto_modes_supported)); + /* + * Store all the capabilities now so that we don't need to repeatedly + * access the device each time we want to know its capabilities + */ + for (cap_idx = 0; cap_idx < hba->crypto_capabilities.num_crypto_cap; + cap_idx++) { + hba->crypto_cap_array[cap_idx].reg_val = + cpu_to_le32(ufshcd_readl(hba, + REG_UFS_CRYPTOCAP + + cap_idx * sizeof(__le32))); + blk_mode_num = ufshcd_blk_crypto_mode_num_for_alg_dusize( + hba->crypto_cap_array[cap_idx].algorithm_id, + hba->crypto_cap_array[cap_idx].key_size); + if (blk_mode_num == BLK_ENCRYPTION_MODE_INVALID) + continue; + crypto_modes_supported[blk_mode_num] |= + hba->crypto_cap_array[cap_idx].sdus_mask * 512; + } + + ufshcd_clear_all_keyslots(hba); + + hba->ksm = keyslot_manager_create(hba->dev, ufshcd_num_keyslots(hba), + ksm_ops, crypto_modes_supported, hba); + + if (!hba->ksm) { + err = -ENOMEM; + goto out_free_caps; + } + + return 0; + +out_free_caps: + devm_kfree(hba->dev, hba->crypto_cap_array); +out: + /* Indicate that init failed by setting crypto_capabilities to 0 */ + hba->crypto_capabilities.reg_val = 0; + return err; +} +EXPORT_SYMBOL_GPL(ufshcd_hba_init_crypto_spec); + +void ufshcd_crypto_setup_rq_keyslot_manager_spec(struct ufs_hba *hba, + struct request_queue *q) +{ + if (!ufshcd_hba_is_crypto_supported(hba) || !q) + return; + + q->ksm = hba->ksm; +} +EXPORT_SYMBOL_GPL(ufshcd_crypto_setup_rq_keyslot_manager_spec); + +void ufshcd_crypto_destroy_rq_keyslot_manager_spec(struct ufs_hba *hba, + struct request_queue *q) +{ + keyslot_manager_destroy(hba->ksm); +} +EXPORT_SYMBOL_GPL(ufshcd_crypto_destroy_rq_keyslot_manager_spec); + +int ufshcd_prepare_lrbp_crypto_spec(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp) +{ + struct bio_crypt_ctx *bc; + + if (!bio_crypt_should_process(cmd->request)) { + lrbp->crypto_enable = false; + return 0; + } + bc = cmd->request->bio->bi_crypt_context; + + if (WARN_ON(!ufshcd_is_crypto_enabled(hba))) { + /* + * Upper layer asked us to do inline encryption + * but that isn't enabled, so we fail this request. + */ + return -EINVAL; + } + if (!ufshcd_keyslot_valid(hba, bc->bc_keyslot)) + return -EINVAL; + + lrbp->crypto_enable = true; + lrbp->crypto_key_slot = bc->bc_keyslot; + lrbp->data_unit_num = bc->bc_dun[0]; + + return 0; +} +EXPORT_SYMBOL_GPL(ufshcd_prepare_lrbp_crypto_spec); + +/* Crypto Variant Ops Support */ + +void ufshcd_crypto_enable(struct ufs_hba *hba) +{ + if (hba->crypto_vops && hba->crypto_vops->enable) + return hba->crypto_vops->enable(hba); + + return ufshcd_crypto_enable_spec(hba); +} + +void ufshcd_crypto_disable(struct ufs_hba *hba) +{ + if (hba->crypto_vops && hba->crypto_vops->disable) + return hba->crypto_vops->disable(hba); + + return ufshcd_crypto_disable_spec(hba); +} + +int ufshcd_hba_init_crypto(struct ufs_hba *hba) +{ + if (hba->crypto_vops && hba->crypto_vops->hba_init_crypto) + return hba->crypto_vops->hba_init_crypto(hba, + &ufshcd_ksm_ops); + + return ufshcd_hba_init_crypto_spec(hba, &ufshcd_ksm_ops); +} + +void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba, + struct request_queue *q) +{ + if (hba->crypto_vops && hba->crypto_vops->setup_rq_keyslot_manager) + return hba->crypto_vops->setup_rq_keyslot_manager(hba, q); + + return ufshcd_crypto_setup_rq_keyslot_manager_spec(hba, q); +} + +void ufshcd_crypto_destroy_rq_keyslot_manager(struct ufs_hba *hba, + struct request_queue *q) +{ + if (hba->crypto_vops && hba->crypto_vops->destroy_rq_keyslot_manager) + return hba->crypto_vops->destroy_rq_keyslot_manager(hba, q); + + return ufshcd_crypto_destroy_rq_keyslot_manager_spec(hba, q); +} + +int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp) +{ + if (hba->crypto_vops && hba->crypto_vops->prepare_lrbp_crypto) + return hba->crypto_vops->prepare_lrbp_crypto(hba, cmd, lrbp); + + return ufshcd_prepare_lrbp_crypto_spec(hba, cmd, lrbp); +} + +int ufshcd_complete_lrbp_crypto(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp) +{ + if (hba->crypto_vops && hba->crypto_vops->complete_lrbp_crypto) + return hba->crypto_vops->complete_lrbp_crypto(hba, cmd, lrbp); + + return 0; +} + +void ufshcd_crypto_debug(struct ufs_hba *hba) +{ + if (hba->crypto_vops && hba->crypto_vops->debug) + hba->crypto_vops->debug(hba); +} + +int ufshcd_crypto_suspend(struct ufs_hba *hba, + enum ufs_pm_op pm_op) +{ + if (hba->crypto_vops && hba->crypto_vops->suspend) + return hba->crypto_vops->suspend(hba, pm_op); + + return 0; +} + +int ufshcd_crypto_resume(struct ufs_hba *hba, + enum ufs_pm_op pm_op) +{ + if (hba->crypto_vops && hba->crypto_vops->resume) + return hba->crypto_vops->resume(hba, pm_op); + + return 0; +} + +void ufshcd_crypto_set_vops(struct ufs_hba *hba, + struct ufs_hba_crypto_variant_ops *crypto_vops) +{ + hba->crypto_vops = crypto_vops; +} diff --git a/drivers/scsi/ufs/ufshcd-crypto.h b/drivers/scsi/ufs/ufshcd-crypto.h new file mode 100644 index 000000000000..95f37c9f7672 --- /dev/null +++ b/drivers/scsi/ufs/ufshcd-crypto.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ + +#ifndef _UFSHCD_CRYPTO_H +#define _UFSHCD_CRYPTO_H + +#ifdef CONFIG_SCSI_UFS_CRYPTO +#include +#include "ufshcd.h" +#include "ufshci.h" + +static inline int ufshcd_num_keyslots(struct ufs_hba *hba) +{ + return hba->crypto_capabilities.config_count + 1; +} + +static inline bool ufshcd_keyslot_valid(struct ufs_hba *hba, unsigned int slot) +{ + /* + * The actual number of configurations supported is (CFGC+1), so slot + * numbers range from 0 to config_count inclusive. + */ + return slot < ufshcd_num_keyslots(hba); +} + +static inline bool ufshcd_hba_is_crypto_supported(struct ufs_hba *hba) +{ + return hba->crypto_capabilities.reg_val != 0; +} + +static inline bool ufshcd_is_crypto_enabled(struct ufs_hba *hba) +{ + return hba->caps & UFSHCD_CAP_CRYPTO; +} + +/* Functions implementing UFSHCI v2.1 specification behaviour */ +int ufshcd_crypto_cap_find(struct ufs_hba *hba, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size); + +int ufshcd_prepare_lrbp_crypto_spec(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp); + +void ufshcd_crypto_enable_spec(struct ufs_hba *hba); + +void ufshcd_crypto_disable_spec(struct ufs_hba *hba); + +struct keyslot_mgmt_ll_ops; +int ufshcd_hba_init_crypto_spec(struct ufs_hba *hba, + const struct keyslot_mgmt_ll_ops *ksm_ops); + +void ufshcd_crypto_setup_rq_keyslot_manager_spec(struct ufs_hba *hba, + struct request_queue *q); + +void ufshcd_crypto_destroy_rq_keyslot_manager_spec(struct ufs_hba *hba, + struct request_queue *q); + +static inline bool ufshcd_lrbp_crypto_enabled(struct ufshcd_lrb *lrbp) +{ + return lrbp->crypto_enable; +} + +/* Crypto Variant Ops Support */ +void ufshcd_crypto_enable(struct ufs_hba *hba); + +void ufshcd_crypto_disable(struct ufs_hba *hba); + +int ufshcd_hba_init_crypto(struct ufs_hba *hba); + +void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba, + struct request_queue *q); + +void ufshcd_crypto_destroy_rq_keyslot_manager(struct ufs_hba *hba, + struct request_queue *q); + +int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp); + +int ufshcd_complete_lrbp_crypto(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp); + +void ufshcd_crypto_debug(struct ufs_hba *hba); + +int ufshcd_crypto_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op); + +int ufshcd_crypto_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op); + +void ufshcd_crypto_set_vops(struct ufs_hba *hba, + struct ufs_hba_crypto_variant_ops *crypto_vops); + +#else /* CONFIG_SCSI_UFS_CRYPTO */ + +static inline bool ufshcd_keyslot_valid(struct ufs_hba *hba, + unsigned int slot) +{ + return false; +} + +static inline bool ufshcd_hba_is_crypto_supported(struct ufs_hba *hba) +{ + return false; +} + +static inline bool ufshcd_is_crypto_enabled(struct ufs_hba *hba) +{ + return false; +} + +static inline void ufshcd_crypto_enable(struct ufs_hba *hba) { } + +static inline void ufshcd_crypto_disable(struct ufs_hba *hba) { } + +static inline int ufshcd_hba_init_crypto(struct ufs_hba *hba) +{ + return 0; +} + +static inline void ufshcd_crypto_setup_rq_keyslot_manager(struct ufs_hba *hba, + struct request_queue *q) { } + +static inline void ufshcd_crypto_destroy_rq_keyslot_manager(struct ufs_hba *hba, + struct request_queue *q) { } + +static inline int ufshcd_prepare_lrbp_crypto(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp) +{ + return 0; +} + +static inline bool ufshcd_lrbp_crypto_enabled(struct ufshcd_lrb *lrbp) +{ + return false; +} + +static inline int ufshcd_complete_lrbp_crypto(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp) +{ + return 0; +} + +static inline void ufshcd_crypto_debug(struct ufs_hba *hba) { } + +static inline int ufshcd_crypto_suspend(struct ufs_hba *hba, + enum ufs_pm_op pm_op) +{ + return 0; +} + +static inline int ufshcd_crypto_resume(struct ufs_hba *hba, + enum ufs_pm_op pm_op) +{ + return 0; +} + +static inline void ufshcd_crypto_set_vops(struct ufs_hba *hba, + struct ufs_hba_crypto_variant_ops *crypto_vops) { } + +#endif /* CONFIG_SCSI_UFS_CRYPTO */ + +#endif /* _UFSHCD_CRYPTO_H */ diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 45264042d4b4..6ae6e0c528a0 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -45,6 +45,7 @@ #include "ufshcd.h" #include "ufs_quirks.h" #include "unipro.h" +#include "ufshcd-crypto.h" /* MTK PATCH */ #include @@ -652,6 +653,8 @@ static void ufshcd_print_host_regs(struct ufs_hba *hba) if (hba->vops && hba->vops->dbg_register_dump) hba->vops->dbg_register_dump(hba); + + ufshcd_crypto_debug(hba); } static @@ -681,8 +684,11 @@ void ufshcd_print_trs(struct ufs_hba *hba, unsigned long bitmap, bool pr_prdt) ufshcd_hex_dump("UPIU RSP: ", lrbp->ucd_rsp_ptr, sizeof(struct utp_upiu_rsp)); - prdt_length = le16_to_cpu( - lrbp->utr_descriptor_ptr->prd_table_length); + prdt_length = + le16_to_cpu(lrbp->utr_descriptor_ptr->prd_table_length); + if (hba->quirks & UFSHCD_QUIRK_PRDT_BYTE_GRAN) + prdt_length /= hba->sg_entry_size; + dev_err(hba->dev, "UPIU[%d] - PRDT - %d entries phys@0x%llx\n", tag, prdt_length, @@ -690,7 +696,7 @@ void ufshcd_print_trs(struct ufs_hba *hba, unsigned long bitmap, bool pr_prdt) if (pr_prdt) ufshcd_hex_dump("UPIU PRDT: ", lrbp->ucd_prdt_ptr, - sizeof(struct ufshcd_sg_entry) * prdt_length); + hba->sg_entry_size * prdt_length); } } @@ -1130,7 +1136,14 @@ static void ufshcd_enable_run_stop_reg(struct ufs_hba *hba) */ static inline void ufshcd_hba_start(struct ufs_hba *hba) { - ufshcd_writel(hba, CONTROLLER_ENABLE, REG_CONTROLLER_ENABLE); + u32 val = CONTROLLER_ENABLE; + + if (ufshcd_hba_is_crypto_supported(hba)) { + ufshcd_crypto_enable(hba); + val |= CRYPTO_GENERAL_ENABLE; + } + + ufshcd_writel(hba, val, REG_CONTROLLER_ENABLE); } /** @@ -2363,7 +2376,7 @@ int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) #endif { - struct ufshcd_sg_entry *prd_table; + struct ufshcd_sg_entry *prd; struct scatterlist *sg; struct scsi_cmnd *cmd; int sg_segments; @@ -2378,21 +2391,22 @@ static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) if (hba->quirks & UFSHCD_QUIRK_PRDT_BYTE_GRAN) lrbp->utr_descriptor_ptr->prd_table_length = cpu_to_le16((u16)(sg_segments * - sizeof(struct ufshcd_sg_entry))); + hba->sg_entry_size)); else lrbp->utr_descriptor_ptr->prd_table_length = cpu_to_le16((u16) (sg_segments)); - prd_table = (struct ufshcd_sg_entry *)lrbp->ucd_prdt_ptr; + prd = (struct ufshcd_sg_entry *)lrbp->ucd_prdt_ptr; scsi_for_each_sg(cmd, sg, sg_segments, i) { - prd_table[i].size = + prd->size = cpu_to_le32(((u32) sg_dma_len(sg))-1); - prd_table[i].base_addr = + prd->base_addr = cpu_to_le32(lower_32_bits(sg->dma_address)); - prd_table[i].upper_addr = + prd->upper_addr = cpu_to_le32(upper_32_bits(sg->dma_address)); - prd_table[i].reserved = 0; + prd->reserved = 0; + prd = (void *)prd + hba->sg_entry_size; } } else { lrbp->utr_descriptor_ptr->prd_table_length = 0; @@ -2491,13 +2505,23 @@ static void ufshcd_prepare_req_desc_hdr(struct ufs_hba *hba, dword_0 |= UTP_REQ_DESC_INT_CMD; /* Transfer request descriptor header fields */ + if (ufshcd_lrbp_crypto_enabled(lrbp)) { +#if IS_ENABLED(CONFIG_SCSI_UFS_CRYPTO) + dword_0 |= UTP_REQ_DESC_CRYPTO_ENABLE_CMD; + dword_0 |= lrbp->crypto_key_slot; + req_desc->header.dword_1 = + cpu_to_le32(lower_32_bits(lrbp->data_unit_num)); + req_desc->header.dword_3 = + cpu_to_le32(upper_32_bits(lrbp->data_unit_num)); +#endif /* CONFIG_SCSI_UFS_CRYPTO */ + } else { + /* dword_1 and dword_3 are reserved, hence they are set to 0 */ + req_desc->header.dword_1 = 0; + req_desc->header.dword_3 = 0; + } + req_desc->header.dword_0 = cpu_to_le32(dword_0); - /* MTK PATCH: dword_1 is not reserved if crypto_en */ - if (lrbp->crypto_en) - req_desc->header.dword_1 = cpu_to_le32(lrbp->crypto_dunl); - else - req_desc->header.dword_1 = 0; /* * assigning invalid value for command status. Controller * updates OCS on command completion, with the command @@ -2506,12 +2530,6 @@ static void ufshcd_prepare_req_desc_hdr(struct ufs_hba *hba, req_desc->header.dword_2 = cpu_to_le32(OCS_INVALID_COMMAND_STATUS); - /* MTK PATCH: dword_3 is not reserved if crypto_en */ - if (lrbp->crypto_en) - req_desc->header.dword_3 = cpu_to_le32(lrbp->crypto_dunu); - else - req_desc->header.dword_3 = 0; - req_desc->prd_table_length = 0; } @@ -2852,6 +2870,13 @@ send_orig_cmd: lrbp->task_tag = tag; lrbp->lun = ufshcd_scsi_to_upiu_lun(cmd->device->lun); lrbp->intr_cmd = !ufshcd_is_intr_aggr_allowed(hba) ? true : false; + + err = ufshcd_prepare_lrbp_crypto(hba, cmd, lrbp); + if (err) { + lrbp->cmd = NULL; + clear_bit_unlock(tag, &hba->lrb_in_use); + goto out; + } lrbp->req_abort_skip = false; /* reset crypto_en first and set it later only on encrypted request */ @@ -2955,6 +2980,9 @@ static int ufshcd_compose_dev_cmd(struct ufs_hba *hba, lrbp->task_tag = tag; lrbp->lun = 0; /* device management cmd is not specific to any LUN */ lrbp->intr_cmd = true; /* No interrupt aggregation */ +#if IS_ENABLED(CONFIG_SCSI_UFS_CRYPTO) + lrbp->crypto_enable = false; /* No crypto operations */ +#endif hba->dev_cmd.type = cmd_type; lrbp->crypto_en = 0; /* device command shall not enable crypto */ @@ -3458,10 +3486,10 @@ static int __ufshcd_query_descriptor(struct ufs_hba *hba, goto out_unlock; } - hba->dev_cmd.query.descriptor = NULL; *buf_len = be16_to_cpu(response->upiu_res.length); out_unlock: + hba->dev_cmd.query.descriptor = NULL; mutex_unlock(&hba->dev_cmd.lock); out: ufshcd_release(hba); @@ -3824,7 +3852,7 @@ static int ufshcd_memory_alloc(struct ufs_hba *hba) size_t utmrdl_size, utrdl_size, ucdl_size; /* Allocate memory for UTP command descriptors */ - ucdl_size = (sizeof(struct utp_transfer_cmd_desc) * hba->nutrs); + ucdl_size = (sizeof_utp_transfer_cmd_desc(hba) * hba->nutrs); hba->ucdl_base_addr = dmam_alloc_coherent(hba->dev, ucdl_size, &hba->ucdl_dma_addr, @@ -3920,7 +3948,7 @@ static void ufshcd_host_memory_configure(struct ufs_hba *hba) prdt_offset = offsetof(struct utp_transfer_cmd_desc, prd_table); - cmd_desc_size = sizeof(struct utp_transfer_cmd_desc); + cmd_desc_size = sizeof_utp_transfer_cmd_desc(hba); cmd_desc_dma_addr = hba->ucdl_dma_addr; for (i = 0; i < hba->nutrs; i++) { @@ -3952,17 +3980,17 @@ static void ufshcd_host_memory_configure(struct ufs_hba *hba) hba->lrb[i].utr_descriptor_ptr = (utrdlp + i); hba->lrb[i].utrd_dma_addr = hba->utrdl_dma_addr + (i * sizeof(struct utp_transfer_req_desc)); - hba->lrb[i].ucd_req_ptr = - (struct utp_upiu_req *)(cmd_descp + i); + hba->lrb[i].ucd_req_ptr = (struct utp_upiu_req *)cmd_descp; hba->lrb[i].ucd_req_dma_addr = cmd_desc_element_addr; hba->lrb[i].ucd_rsp_ptr = - (struct utp_upiu_rsp *)cmd_descp[i].response_upiu; + (struct utp_upiu_rsp *)cmd_descp->response_upiu; hba->lrb[i].ucd_rsp_dma_addr = cmd_desc_element_addr + response_offset; hba->lrb[i].ucd_prdt_ptr = - (struct ufshcd_sg_entry *)cmd_descp[i].prd_table; + (struct ufshcd_sg_entry *)cmd_descp->prd_table; hba->lrb[i].ucd_prdt_dma_addr = cmd_desc_element_addr + prdt_offset; + cmd_descp = (void *)cmd_descp + cmd_desc_size; } } @@ -4361,8 +4389,8 @@ static int __ufshcd_uic_hibern8_enter(struct ufs_hba *hba) __func__, ret); /* - * If link recovery fails then return error code (-ENOLINK) - * returned ufshcd_link_recovery(). + * If link recovery fails then return error code returned from + * ufshcd_link_recovery(). * If link recovery succeeds then return -EAGAIN to attempt * hibern8 enter retry again. */ @@ -4389,7 +4417,7 @@ static int ufshcd_uic_hibern8_enter(struct ufs_hba *hba) for (retries = UIC_HIBERN8_ENTER_RETRIES; retries > 0; retries--) { ret = __ufshcd_uic_hibern8_enter(hba); - if (!ret || ret == -ENOLINK) + if (!ret) goto out; } out: @@ -4720,6 +4748,8 @@ static inline void ufshcd_hba_stop(struct ufs_hba *hba, bool can_sleep) { int err; + ufshcd_crypto_disable(hba); + ufshcd_writel(hba, CONTROLLER_DISABLE, REG_CONTROLLER_ENABLE); err = ufshcd_wait_for_register(hba, REG_CONTROLLER_ENABLE, CONTROLLER_ENABLE, CONTROLLER_DISABLE, @@ -5195,6 +5225,8 @@ static int ufshcd_slave_configure(struct scsi_device *sdev) } #endif + ufshcd_crypto_setup_rq_keyslot_manager(hba, q); + return 0; } @@ -5205,6 +5237,7 @@ static int ufshcd_slave_configure(struct scsi_device *sdev) static void ufshcd_slave_destroy(struct scsi_device *sdev) { struct ufs_hba *hba; + struct request_queue *q = sdev->request_queue; hba = shost_priv(sdev->host); /* Drop the reference as it won't be needed anymore */ @@ -5215,6 +5248,8 @@ static void ufshcd_slave_destroy(struct scsi_device *sdev) hba->sdev_ufs_device = NULL; spin_unlock_irqrestore(hba->host->host_lock, flags); } + + ufshcd_crypto_destroy_rq_keyslot_manager(hba, q); } /** @@ -5378,6 +5413,8 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) case OCS_MISMATCH_RESP_UPIU_SIZE: case OCS_PEER_COMM_FAILURE: case OCS_FATAL_ERROR: + case OCS_INVALID_CRYPTO_CONFIG: + case OCS_GENERAL_CRYPTO_ERROR: default: result |= DID_ERROR << 16; dev_err(hba->dev, @@ -5388,7 +5425,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) break; } /* end of switch */ - if (host_byte(result) != DID_OK) + if ((host_byte(result) != DID_OK) && !hba->silence_err_logs) ufshcd_print_trs(hba, 1 << lrbp->task_tag, true); return result; } @@ -5466,6 +5503,7 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, } #endif lrbp->complete_time_stamp = sched_clock(); + ufshcd_complete_lrbp_crypto(hba, cmd, lrbp); /* Mark completed command as NULL in LRB */ lrbp->cmd = NULL; clear_bit_unlock(index, &hba->lrb_in_use); @@ -5660,6 +5698,7 @@ static int ufshcd_disable_auto_bkops(struct ufs_hba *hba) hba->auto_bkops_enabled = false; trace_ufshcd_auto_bkops_state(dev_name(hba->dev), "Disabled"); + hba->is_urgent_bkops_lvl_checked = false; out: return err; } @@ -5684,6 +5723,7 @@ static void ufshcd_force_reset_auto_bkops(struct ufs_hba *hba) hba->ee_ctrl_mask &= ~MASK_EE_URGENT_BKOPS; ufshcd_disable_auto_bkops(hba); } + hba->is_urgent_bkops_lvl_checked = false; } static inline int ufshcd_get_bkops_status(struct ufs_hba *hba, u32 *status) @@ -5730,6 +5770,7 @@ static int ufshcd_bkops_ctrl(struct ufs_hba *hba, err = ufshcd_enable_auto_bkops(hba); else err = ufshcd_disable_auto_bkops(hba); + hba->urgent_bkops_lvl = curr_status; out: return err; } @@ -5965,8 +6006,8 @@ static void ufshcd_err_handler(struct work_struct *work) /* * if host reset is required then skip clearing the pending - * transfers forcefully because they will automatically get - * cleared after link startup. + * transfers forcefully because they will get cleared during + * host reset and restore */ if (needs_reset) goto skip_pending_xfer_clear; @@ -6841,13 +6882,19 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) int err; unsigned long flags; - /* Reset the host controller */ + /* + * Stop the host controller and complete the requests + * cleared by h/w + */ spin_lock_irqsave(hba->host->host_lock, flags); ufshcd_hba_stop(hba, false); #if defined(CONFIG_UFSFEATURE) ufsf_hpb_reset_host(&hba->ufsf); ufsf_tw_reset_host(&hba->ufsf); #endif + hba->silence_err_logs = true; + ufshcd_complete_requests(hba); + hba->silence_err_logs = false; spin_unlock_irqrestore(hba->host->host_lock, flags); /* scale up clocks to max frequency before full reinitialization */ @@ -6881,7 +6928,6 @@ out: static int ufshcd_reset_and_restore(struct ufs_hba *hba) { int err = 0; - unsigned long flags; int retries = MAX_HOST_RESET_RETRIES; do { @@ -6893,15 +6939,6 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba) err = ufshcd_host_reset_and_restore(hba); } while (err && --retries); - /* - * After reset the door-bell might be cleared, complete - * outstanding requests in s/w here. - */ - spin_lock_irqsave(hba->host->host_lock, flags); - ufshcd_transfer_req_compl(hba); - ufshcd_tmc_handler(hba); - spin_unlock_irqrestore(hba->host->host_lock, flags); - return err; } @@ -7841,7 +7878,8 @@ _link_retry: ufshcd_init_icc_levels(hba); /* Add required well known logical units to scsi mid layer */ - if (ufshcd_scsi_add_wlus(hba)) + ret = ufshcd_scsi_add_wlus(hba); + if (ret) goto out; /* Initialize devfreq after UFS device is detected */ @@ -8792,6 +8830,9 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) if (hba->card->wmanufacturerid == UFS_VENDOR_SKHYNIX) skhpb_suspend(hba); #endif + ret = ufshcd_crypto_suspend(hba, pm_op); + if (ret) + goto out; /* * If we can't transition into any of the low power modes @@ -8955,6 +8996,7 @@ enable_gating: ufsf_tw_resume(&hba->ufsf); #endif ufshcd_release(hba); + ufshcd_crypto_resume(hba, pm_op); out: hba->pm_op_in_progress = 0; @@ -8984,9 +9026,11 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) /* MTK PATCH: Lock deepidle/SODI @enter UFS resume callback */ ufshcd_vops_deepidle_lock(hba, true); + enum ufs_dev_pwr_mode old_pwr_mode; hba->pm_op_in_progress = 1; old_link_state = hba->uic_link_state; + old_pwr_mode = hba->curr_dev_pwr_mode; ufshcd_hba_vreg_set_hpm(hba); /* Make sure clocks are enabled before accessing controller */ @@ -9056,6 +9100,10 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) } } + ret = ufshcd_crypto_resume(hba, pm_op); + if (ret) + goto set_old_dev_pwr_mode; + if (ufshcd_keep_autobkops_enabled_except_suspend(hba)) ufshcd_enable_auto_bkops(hba); else @@ -9082,6 +9130,9 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) goto out; +set_old_dev_pwr_mode: + if (old_pwr_mode != hba->curr_dev_pwr_mode) + ufshcd_set_dev_pwr_mode(hba, old_pwr_mode); set_old_link_state: ufshcd_link_state_transition(hba, old_link_state, 0); vendor_suspend: @@ -9426,9 +9477,8 @@ int ufshcd_shutdown(struct ufs_hba *hba) { int ret = 0; - /* For checking UFS init fail issue */ if (!hba->is_powered) - BUG_ON(1); + goto out; if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba)) goto out; @@ -9544,6 +9594,7 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) hba->host = host; hba->dev = dev; *hba_handle = hba; + hba->sg_entry_size = sizeof(struct ufshcd_sg_entry); INIT_LIST_HEAD(&hba->clk_list_head); @@ -9682,6 +9733,13 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) if (hba->lanes_per_direction == 2) ufshcd_vops_device_reset(hba); + /* Init crypto */ + err = ufshcd_hba_init_crypto(hba); + if (err) { + dev_err(hba->dev, "crypto setup failed\n"); + goto out_remove_scsi_host; + } + /* Host controller enable */ err = ufshcd_hba_enable(hba); if (err) { diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index b611d2d667ca..e7cbe45dc0cc 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -175,6 +175,9 @@ struct ufs_pm_lvl_states { * @lun: LUN of the command * @intr_cmd: Interrupt command (doesn't participate in interrupt aggregation) * @issue_time_stamp: time stamp for debug purposes + * @crypto_enable: whether or not the request needs inline crypto operations + * @crypto_key_slot: the key slot to use for inline crypto + * @data_unit_num: the data unit number for the first block for inline crypto * @req_abort_skip: skip request abort task flag */ struct ufshcd_lrb { @@ -197,6 +200,11 @@ struct ufshcd_lrb { int task_tag; u8 lun; /* UPIU LUN id field is only 8-bit wide */ bool intr_cmd; +#if IS_ENABLED(CONFIG_SCSI_UFS_CRYPTO) + bool crypto_enable; + u8 crypto_key_slot; + u64 data_unit_num; +#endif /* CONFIG_SCSI_UFS_CRYPTO */ /* * Use sched_clock instead of ktime_get to align with @@ -304,6 +312,8 @@ enum ufs_scsi_dev_cfg { #define UFS_RESCTL_CMD_SEND BIT(0) #define UFS_RESCTL_CMD_COMP BIT(1) +union ufs_crypto_cfg_entry; + /** * struct ufs_hba_variant_ops - variant specific callbacks * @name: variant name @@ -333,6 +343,7 @@ enum ufs_scsi_dev_cfg { * @dbg_register_dump: used to dump controller debug information * @phy_initialization: used to initialize phys * @device_reset: called to issue a reset pulse on the UFS device + * @program_key: program an inline encryption key into a keyslot */ struct ufs_hba_variant_ops { const char *name; @@ -387,6 +398,30 @@ struct ufs_hba_variant_ops { * MTK PATCH: SCSI device slave alloc/configure/destroy. */ int (*scsi_dev_cfg)(struct scsi_device *, enum ufs_scsi_dev_cfg); + int (*program_key)(struct ufs_hba *hba, + const union ufs_crypto_cfg_entry *cfg, int slot); +}; + +struct keyslot_mgmt_ll_ops; +struct ufs_hba_crypto_variant_ops { + void (*setup_rq_keyslot_manager)(struct ufs_hba *hba, + struct request_queue *q); + void (*destroy_rq_keyslot_manager)(struct ufs_hba *hba, + struct request_queue *q); + int (*hba_init_crypto)(struct ufs_hba *hba, + const struct keyslot_mgmt_ll_ops *ksm_ops); + void (*enable)(struct ufs_hba *hba); + void (*disable)(struct ufs_hba *hba); + int (*suspend)(struct ufs_hba *hba, enum ufs_pm_op pm_op); + int (*resume)(struct ufs_hba *hba, enum ufs_pm_op pm_op); + int (*debug)(struct ufs_hba *hba); + int (*prepare_lrbp_crypto)(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp); + int (*complete_lrbp_crypto)(struct ufs_hba *hba, + struct scsi_cmnd *cmd, + struct ufshcd_lrb *lrbp); + void *priv; }; /* clock gating state */ @@ -582,6 +617,7 @@ enum ufs_crypto_state { * @ufs_version: UFS Version to which controller complies * @vops: pointer to variant specific operations * @priv: pointer to variant specific private data + * @sg_entry_size: size of struct ufshcd_sg_entry (may include variant fields) * @irq: Irq number of the controller * @active_uic_cmd: handle of active UIC command * @uic_cmd_mutex: mutex for uic command @@ -603,6 +639,7 @@ enum ufs_crypto_state { * @uic_error: UFS interconnect layer error status * @saved_err: sticky error mask * @saved_uic_err: sticky UIC error mask + * @silence_err_logs: flag to silence error logs * @dev_cmd: ufs device management command information * @last_dme_cmd_tstamp: time stamp of the last completed DME command * @auto_bkops_enabled: to track whether bkops is enabled in device @@ -615,6 +652,10 @@ enum ufs_crypto_state { * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for * device is known or not. * @scsi_block_reqs_cnt: reference counting for scsi block requests + * @crypto_capabilities: Content of crypto capabilities register (0x100) + * @crypto_cap_array: Array of crypto capabilities + * @crypto_cfg_register: Start of the crypto cfg array + * @ksm: the keyslot manager tied to this hba */ struct ufs_hba { void __iomem *mmio_base; @@ -668,6 +709,8 @@ struct ufs_hba { u32 ufs_version; struct ufs_hba_variant_ops *vops; void *priv; + const struct ufs_hba_crypto_variant_ops *crypto_vops; + size_t sg_entry_size; unsigned int irq; bool is_irq_enabled; @@ -738,6 +781,12 @@ struct ufs_hba { */ #define UFSHCD_QUIRK_UFS_HCI_DISABLE_AH8_BEFORE_RDB UFS_BIT(10) + /* + * This quirk needs to be enabled if the host controller advertises + * inline encryption support but it doesn't work correctly. + */ + #define UFSHCD_QUIRK_BROKEN_CRYPTO UFS_BIT(11) + unsigned int quirks; /* Deviations from standard UFSHCI spec. */ /* Device deviations from standard UFS device spec. */ @@ -771,6 +820,7 @@ struct ufs_hba { u32 saved_err; u32 saved_uic_err; struct ufs_stats ufs_stats; + bool silence_err_logs; /* Device management request data */ struct ufs_dev_cmd dev_cmd; @@ -817,6 +867,11 @@ struct ufs_hba { * the performance of ongoing read/write operations. */ #define UFSHCD_CAP_KEEP_AUTO_BKOPS_ENABLED_EXCEPT_SUSPEND (1 << 5) + /* + * This capability allows the host controller driver to use the + * inline crypto engine, if it is present + */ +#define UFSHCD_CAP_CRYPTO (1 << 7) struct devfreq *devfreq; struct ufs_clk_scaling clk_scaling; @@ -875,6 +930,14 @@ struct ufs_hba { #if defined(CONFIG_SCSI_SKHPB) struct scsi_device *sdev_ufs_lu[UFS_UPIU_MAX_GENERAL_LUN]; #endif + +#ifdef CONFIG_SCSI_UFS_CRYPTO + /* crypto */ + union ufs_crypto_capabilities crypto_capabilities; + union ufs_crypto_cap_entry *crypto_cap_array; + u32 crypto_cfg_register; + struct keyslot_manager *ksm; +#endif /* CONFIG_SCSI_UFS_CRYPTO */ }; /* MTK PATCH */ diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h index e00eadd561ec..292afecc3a99 100644 --- a/drivers/scsi/ufs/ufshci.h +++ b/drivers/scsi/ufs/ufshci.h @@ -104,6 +104,7 @@ enum { MASK_64_ADDRESSING_SUPPORT = 0x01000000, MASK_OUT_OF_ORDER_DATA_DELIVERY_SUPPORT = 0x02000000, MASK_UIC_DME_TEST_MODE_SUPPORT = 0x04000000, + MASK_CRYPTO_SUPPORT = 0x10000000, }; #define UFS_MASK(mask, offset) ((mask) << (offset)) @@ -156,6 +157,7 @@ enum { #define DEVICE_FATAL_ERROR UFS_BIT(11) #define CONTROLLER_FATAL_ERROR UFS_BIT(16) #define SYSTEM_BUS_FATAL_ERROR UFS_BIT(17) +#define CRYPTO_ENGINE_FATAL_ERROR UFS_BIT(18) #define UFSHCD_UIC_HIBERN8_MASK (UIC_HIBERNATE_ENTER |\ UIC_HIBERNATE_EXIT) @@ -168,11 +170,13 @@ enum { #define UFSHCD_ERROR_MASK (UIC_ERROR |\ DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ - SYSTEM_BUS_FATAL_ERROR) + SYSTEM_BUS_FATAL_ERROR |\ + CRYPTO_ENGINE_FATAL_ERROR) #define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ - SYSTEM_BUS_FATAL_ERROR) + SYSTEM_BUS_FATAL_ERROR |\ + CRYPTO_ENGINE_FATAL_ERROR) /* HCS - Host Controller Status 30h */ #define DEVICE_PRESENT UFS_BIT(0) @@ -317,6 +321,61 @@ enum { INTERRUPT_MASK_ALL_VER_21 = 0x71FFF, }; +/* CCAP - Crypto Capability 100h */ +union ufs_crypto_capabilities { + __le32 reg_val; + struct { + u8 num_crypto_cap; + u8 config_count; + u8 reserved; + u8 config_array_ptr; + }; +}; + +enum ufs_crypto_key_size { + UFS_CRYPTO_KEY_SIZE_INVALID = 0x0, + UFS_CRYPTO_KEY_SIZE_128 = 0x1, + UFS_CRYPTO_KEY_SIZE_192 = 0x2, + UFS_CRYPTO_KEY_SIZE_256 = 0x3, + UFS_CRYPTO_KEY_SIZE_512 = 0x4, +}; + +enum ufs_crypto_alg { + UFS_CRYPTO_ALG_AES_XTS = 0x0, + UFS_CRYPTO_ALG_BITLOCKER_AES_CBC = 0x1, + UFS_CRYPTO_ALG_AES_ECB = 0x2, + UFS_CRYPTO_ALG_ESSIV_AES_CBC = 0x3, +}; + +/* x-CRYPTOCAP - Crypto Capability X */ +union ufs_crypto_cap_entry { + __le32 reg_val; + struct { + u8 algorithm_id; + u8 sdus_mask; /* Supported data unit size mask */ + u8 key_size; + u8 reserved; + }; +}; + +#define UFS_CRYPTO_CONFIGURATION_ENABLE (1 << 7) +#define UFS_CRYPTO_KEY_MAX_SIZE 64 +/* x-CRYPTOCFG - Crypto Configuration X */ +union ufs_crypto_cfg_entry { + __le32 reg_val[32]; + struct { + u8 crypto_key[UFS_CRYPTO_KEY_MAX_SIZE]; + u8 data_unit_size; + u8 crypto_cap_idx; + u8 reserved_1; + u8 config_enable; + u8 reserved_multi_host; + u8 reserved_2; + u8 vsb[2]; + u8 reserved_3[56]; + }; +}; + /* * Request Descriptor Definitions */ @@ -338,6 +397,7 @@ enum { UTP_NATIVE_UFS_COMMAND = 0x10000000, UTP_DEVICE_MANAGEMENT_FUNCTION = 0x20000000, UTP_REQ_DESC_INT_CMD = 0x01000000, + UTP_REQ_DESC_CRYPTO_ENABLE_CMD = 0x00800000, }; /* UTP Transfer Request Data Direction (DD) */ @@ -357,6 +417,9 @@ enum { OCS_PEER_COMM_FAILURE = 0x5, OCS_ABORTED = 0x6, OCS_FATAL_ERROR = 0x7, + OCS_DEVICE_FATAL_ERROR = 0x8, + OCS_INVALID_CRYPTO_CONFIG = 0x9, + OCS_GENERAL_CRYPTO_ERROR = 0xA, OCS_INVALID_COMMAND_STATUS = 0x0F, MASK_OCS = 0x0F, }; @@ -378,20 +441,28 @@ struct ufshcd_sg_entry { __le32 upper_addr; __le32 reserved; __le32 size; + /* + * followed by variant-specific fields if + * hba->sg_entry_size != sizeof(struct ufshcd_sg_entry) + */ }; /** * struct utp_transfer_cmd_desc - UFS Command Descriptor structure * @command_upiu: Command UPIU Frame address * @response_upiu: Response UPIU Frame address - * @prd_table: Physical Region Descriptor + * @prd_table: Physical Region Descriptor: an array of SG_ALL struct + * ufshcd_sg_entry's. Variant-specific fields may be present after each. */ struct utp_transfer_cmd_desc { u8 command_upiu[ALIGNED_UPIU_SIZE]; u8 response_upiu[ALIGNED_UPIU_SIZE]; - struct ufshcd_sg_entry prd_table[SG_ALL]; + u8 prd_table[]; }; +#define sizeof_utp_transfer_cmd_desc(hba) \ + (sizeof(struct utp_transfer_cmd_desc) + SG_ALL * (hba)->sg_entry_size) + /** * struct request_desc_header - Descriptor Header common to both UTRD and UTMRD * @dword0: Descriptor Header DW0 diff --git a/drivers/soc/fsl/qe/gpio.c b/drivers/soc/fsl/qe/gpio.c index 3b27075c21a7..5cbc5ce5ac15 100644 --- a/drivers/soc/fsl/qe/gpio.c +++ b/drivers/soc/fsl/qe/gpio.c @@ -152,8 +152,10 @@ struct qe_pin *qe_pin_request(struct device_node *np, int index) if (err < 0) goto err0; gc = gpio_to_chip(err); - if (WARN_ON(!gc)) + if (WARN_ON(!gc)) { + err = -ENODEV; goto err0; + } if (!of_device_is_compatible(gc->of_node, "fsl,mpc8323-qe-pario-bank")) { pr_debug("%s: tried to get a non-qe pin\n", __func__); diff --git a/drivers/soc/imx/gpc.c b/drivers/soc/imx/gpc.c index c54d229f8da4..3a12123de466 100644 --- a/drivers/soc/imx/gpc.c +++ b/drivers/soc/imx/gpc.c @@ -73,7 +73,7 @@ static int imx6_pm_domain_power_off(struct generic_pm_domain *genpd) return -EBUSY; /* Read ISO and ISO2SW power down delays */ - regmap_read(pd->regmap, pd->reg_offs + GPC_PGC_PUPSCR_OFFS, &val); + regmap_read(pd->regmap, pd->reg_offs + GPC_PGC_PDNSCR_OFFS, &val); iso = val & 0x3f; iso2sw = (val >> 8) & 0x3f; diff --git a/drivers/soc/qcom/wcnss_ctrl.c b/drivers/soc/qcom/wcnss_ctrl.c index df3ccb30bc2d..373400dd816d 100644 --- a/drivers/soc/qcom/wcnss_ctrl.c +++ b/drivers/soc/qcom/wcnss_ctrl.c @@ -281,7 +281,7 @@ struct rpmsg_endpoint *qcom_wcnss_open_channel(void *wcnss, const char *name, rp struct rpmsg_channel_info chinfo; struct wcnss_ctrl *_wcnss = wcnss; - strncpy(chinfo.name, name, sizeof(chinfo.name)); + strscpy(chinfo.name, name, sizeof(chinfo.name)); chinfo.src = RPMSG_ADDR_ANY; chinfo.dst = RPMSG_ADDR_ANY; diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig index e9e277178c94..96c00470ab32 100644 --- a/drivers/soc/tegra/Kconfig +++ b/drivers/soc/tegra/Kconfig @@ -76,6 +76,7 @@ config ARCH_TEGRA_210_SOC select PINCTRL_TEGRA210 select SOC_TEGRA_FLOWCTRL select SOC_TEGRA_PMC + depends on !LTO_CLANG help Enable support for the NVIDIA Tegra210 SoC. Also known as Tegra X1, the Tegra210 has four Cortex-A57 cores paired with four Cortex-A53 diff --git a/drivers/soc/tegra/fuse/tegra-apbmisc.c b/drivers/soc/tegra/fuse/tegra-apbmisc.c index 5b18f6ffa45c..cd61c883c19f 100644 --- a/drivers/soc/tegra/fuse/tegra-apbmisc.c +++ b/drivers/soc/tegra/fuse/tegra-apbmisc.c @@ -134,7 +134,7 @@ void __init tegra_init_apbmisc(void) apbmisc.flags = IORESOURCE_MEM; /* strapping options */ - if (tegra_get_chip_id() == TEGRA124) { + if (of_machine_is_compatible("nvidia,tegra124")) { straps.start = 0x7000e864; straps.end = 0x7000e867; } else { diff --git a/drivers/soc/ti/wkup_m3_ipc.c b/drivers/soc/ti/wkup_m3_ipc.c index 369aef5e7228..651827c6ee6f 100644 --- a/drivers/soc/ti/wkup_m3_ipc.c +++ b/drivers/soc/ti/wkup_m3_ipc.c @@ -375,6 +375,8 @@ static void wkup_m3_rproc_boot_thread(struct wkup_m3_ipc *m3_ipc) ret = rproc_boot(m3_ipc->rproc); if (ret) dev_err(dev, "rproc_boot failed\n"); + else + m3_ipc_state = m3_ipc; do_exit(0); } @@ -461,8 +463,6 @@ static int wkup_m3_ipc_probe(struct platform_device *pdev) goto err_put_rproc; } - m3_ipc_state = m3_ipc; - return 0; err_put_rproc: diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 047875861df1..7b739c449227 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -301,7 +301,6 @@ struct atmel_spi { bool use_cs_gpios; bool keep_cs; - bool cs_active; u32 fifo_size; }; @@ -1150,10 +1149,8 @@ static int atmel_spi_setup(struct spi_device *spi) as = spi_master_get_devdata(spi->master); /* see notes above re chipselect */ - if (!atmel_spi_is_v2(as) - && spi->chip_select == 0 - && (spi->mode & SPI_CS_HIGH)) { - dev_dbg(&spi->dev, "setup: can't be active-high\n"); + if (!as->use_cs_gpios && (spi->mode & SPI_CS_HIGH)) { + dev_warn(&spi->dev, "setup: non GPIO CS can't be active-high\n"); return -EINVAL; } @@ -1340,11 +1337,9 @@ static int atmel_spi_one_transfer(struct spi_master *master, &msg->transfers)) { as->keep_cs = true; } else { - as->cs_active = !as->cs_active; - if (as->cs_active) - cs_activate(as, msg->spi); - else - cs_deactivate(as, msg->spi); + cs_deactivate(as, msg->spi); + udelay(10); + cs_activate(as, msg->spi); } } @@ -1367,7 +1362,6 @@ static int atmel_spi_transfer_one_message(struct spi_master *master, atmel_spi_lock(as); cs_activate(as, spi); - as->cs_active = true; as->keep_cs = false; msg->status = 0; diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c index 5c89bbb05441..e075712c501e 100644 --- a/drivers/spi/spi-bcm2835aux.c +++ b/drivers/spi/spi-bcm2835aux.c @@ -416,7 +416,18 @@ static int bcm2835aux_spi_probe(struct platform_device *pdev) platform_set_drvdata(pdev, master); master->mode_bits = (SPI_CPOL | SPI_CS_HIGH | SPI_NO_CS); master->bits_per_word_mask = SPI_BPW_MASK(8); - master->num_chipselect = -1; + /* even though the driver never officially supported native CS + * allow a single native CS for legacy DT support purposes when + * no cs-gpio is configured. + * Known limitations for native cs are: + * * multiple chip-selects: cs0-cs2 are all simultaniously asserted + * whenever there is a transfer - this even includes SPI_NO_CS + * * SPI_CS_HIGH: is ignores - cs are always asserted low + * * cs_change: cs is deasserted after each spi_transfer + * * cs_delay_usec: cs is always deasserted one SCK cycle after + * a spi_transfer + */ + master->num_chipselect = 1; master->transfer_one = bcm2835aux_spi_transfer_one; master->handle_err = bcm2835aux_spi_handle_err; master->prepare_message = bcm2835aux_spi_prepare_message; diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 02bd1eba045b..d08ad93d97a1 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -584,11 +584,6 @@ static int cdns_spi_probe(struct platform_device *pdev) goto clk_dis_apb; } - pm_runtime_use_autosuspend(&pdev->dev); - pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT); - pm_runtime_set_active(&pdev->dev); - pm_runtime_enable(&pdev->dev); - ret = of_property_read_u32(pdev->dev.of_node, "num-cs", &num_cs); if (ret < 0) master->num_chipselect = CDNS_SPI_DEFAULT_NUM_CS; @@ -603,8 +598,10 @@ static int cdns_spi_probe(struct platform_device *pdev) /* SPI controller initializations */ cdns_spi_init_hw(xspi); - pm_runtime_mark_last_busy(&pdev->dev); - pm_runtime_put_autosuspend(&pdev->dev); + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(&pdev->dev); + pm_runtime_use_autosuspend(&pdev->dev); + pm_runtime_set_autosuspend_delay(&pdev->dev, SPI_AUTOSUSPEND_TIMEOUT); irq = platform_get_irq(pdev, 0); if (irq <= 0) { diff --git a/drivers/spi/spi-cavium-thunderx.c b/drivers/spi/spi-cavium-thunderx.c index 877937706240..828fbbebc3c4 100644 --- a/drivers/spi/spi-cavium-thunderx.c +++ b/drivers/spi/spi-cavium-thunderx.c @@ -81,6 +81,7 @@ static int thunderx_spi_probe(struct pci_dev *pdev, error: clk_disable_unprepare(p->clk); + pci_release_regions(pdev); spi_master_put(master); return ret; } @@ -95,6 +96,7 @@ static void thunderx_spi_remove(struct pci_dev *pdev) return; clk_disable_unprepare(p->clk); + pci_release_regions(pdev); /* Put everything in a known state. */ writeq(0, p->register_base + OCTEON_SPI_CFG(p)); } diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c index b217c22ff72f..b461200871f8 100644 --- a/drivers/spi/spi-dw.c +++ b/drivers/spi/spi-dw.c @@ -180,9 +180,11 @@ static inline u32 rx_max(struct dw_spi *dws) static void dw_writer(struct dw_spi *dws) { - u32 max = tx_max(dws); + u32 max; u16 txw = 0; + spin_lock(&dws->buf_lock); + max = tx_max(dws); while (max--) { /* Set the tx word if the transfer's original "tx" is not null */ if (dws->tx_end - dws->len) { @@ -194,13 +196,16 @@ static void dw_writer(struct dw_spi *dws) dw_write_io_reg(dws, DW_SPI_DR, txw); dws->tx += dws->n_bytes; } + spin_unlock(&dws->buf_lock); } static void dw_reader(struct dw_spi *dws) { - u32 max = rx_max(dws); + u32 max; u16 rxw; + spin_lock(&dws->buf_lock); + max = rx_max(dws); while (max--) { rxw = dw_read_io_reg(dws, DW_SPI_DR); /* Care rx only if the transfer's original "rx" is not null */ @@ -212,6 +217,7 @@ static void dw_reader(struct dw_spi *dws) } dws->rx += dws->n_bytes; } + spin_unlock(&dws->buf_lock); } static void int_error_stop(struct dw_spi *dws, const char *msg) @@ -284,18 +290,20 @@ static int dw_spi_transfer_one(struct spi_master *master, { struct dw_spi *dws = spi_master_get_devdata(master); struct chip_data *chip = spi_get_ctldata(spi); + unsigned long flags; u8 imask = 0; u16 txlevel = 0; u32 cr0; int ret; dws->dma_mapped = 0; - + spin_lock_irqsave(&dws->buf_lock, flags); dws->tx = (void *)transfer->tx_buf; dws->tx_end = dws->tx + transfer->len; dws->rx = transfer->rx_buf; dws->rx_end = dws->rx + transfer->len; dws->len = transfer->len; + spin_unlock_irqrestore(&dws->buf_lock, flags); spi_enable_chip(dws, 0); @@ -486,6 +494,7 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws) dws->type = SSI_MOTO_SPI; dws->dma_inited = 0; dws->dma_addr = (dma_addr_t)(dws->paddr + DW_SPI_DR); + spin_lock_init(&dws->buf_lock); ret = request_irq(dws->irq, dw_spi_irq, IRQF_SHARED, dev_name(dev), master); diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h index 5c07cf8f19e0..45fbf3ad591c 100644 --- a/drivers/spi/spi-dw.h +++ b/drivers/spi/spi-dw.h @@ -117,6 +117,7 @@ struct dw_spi { size_t len; void *tx; void *tx_end; + spinlock_t buf_lock; void *rx; void *rx_end; int dma_mapped; diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index cb3c73007ca1..8fe51f7541bb 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -287,7 +287,7 @@ static int fsl_lpspi_config(struct fsl_lpspi_data *fsl_lpspi) fsl_lpspi_set_watermark(fsl_lpspi); - temp = CFGR1_PCSCFG | CFGR1_MASTER | CFGR1_NOSTALL; + temp = CFGR1_PCSCFG | CFGR1_MASTER; if (fsl_lpspi->config.mode & SPI_CS_HIGH) temp |= CFGR1_PCSPOL; writel(temp, fsl_lpspi->base + IMX7ULP_CFGR1); diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c index 8f2e97857e8b..cd784552de7f 100644 --- a/drivers/spi/spi-fsl-spi.c +++ b/drivers/spi/spi-fsl-spi.c @@ -407,7 +407,6 @@ static int fsl_spi_do_one_msg(struct spi_master *master, } m->status = status; - spi_finalize_current_message(master); if (status || !cs_change) { ndelay(nsecs); @@ -415,6 +414,7 @@ static int fsl_spi_do_one_msg(struct spi_master *master, } fsl_spi_setup_transfer(spi, NULL); + spi_finalize_current_message(master); return 0; } @@ -832,9 +832,9 @@ static int of_fsl_spi_probe(struct platform_device *ofdev) if (ret) goto err; - irq = irq_of_parse_and_map(np, 0); - if (!irq) { - ret = -EINVAL; + irq = platform_get_irq(ofdev, 0); + if (irq < 0) { + ret = irq; goto err; } @@ -847,7 +847,6 @@ static int of_fsl_spi_probe(struct platform_device *ofdev) return 0; err: - irq_dispose_mapping(irq); if (type == TYPE_FSL) of_fsl_spi_free_chipselects(dev); return ret; diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c index 7a37090dabbe..2e65b70c7879 100644 --- a/drivers/spi/spi-img-spfi.c +++ b/drivers/spi/spi-img-spfi.c @@ -673,6 +673,8 @@ static int img_spfi_probe(struct platform_device *pdev) dma_release_channel(spfi->tx_ch); if (spfi->rx_ch) dma_release_channel(spfi->rx_ch); + spfi->tx_ch = NULL; + spfi->rx_ch = NULL; dev_warn(spfi->dev, "Failed to get DMA channels, falling back to PIO mode\n"); } else { master->dma_tx = spfi->tx_ch; diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index fd4f18a682e9..0c2867deb36f 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -21,13 +21,10 @@ #include #include #include -#include #include #include #include #include -#include - #define SPI_CFG0_REG 0x0000 #define SPI_CFG1_REG 0x0004 @@ -37,11 +34,8 @@ #define SPI_RX_DATA_REG 0x0014 #define SPI_CMD_REG 0x0018 #define SPI_STATUS0_REG 0x001c -#define SPI_STATUS1_REG 0x0020 #define SPI_PAD_SEL_REG 0x0024 #define SPI_CFG2_REG 0x0028 -#define SPI_TX_SRC_REG_64 0x002c -#define SPI_RX_DST_REG_64 0x0030 #define SPI_CFG0_SCK_HIGH_OFFSET 0 #define SPI_CFG0_SCK_LOW_OFFSET 8 @@ -54,7 +48,7 @@ #define SPI_CFG1_CS_IDLE_OFFSET 0 #define SPI_CFG1_PACKET_LOOP_OFFSET 8 #define SPI_CFG1_PACKET_LENGTH_OFFSET 16 -#define SPI_CFG1_GET_TICK_DLY_OFFSET 29 +#define SPI_CFG1_GET_TICK_DLY_OFFSET 30 #define SPI_CFG1_CS_IDLE_MASK 0xff #define SPI_CFG1_PACKET_LOOP_MASK 0xff00 @@ -87,38 +81,27 @@ #define MTK_SPI_MAX_FIFO_SIZE 32U #define MTK_SPI_PACKET_SIZE 1024 -#define ADDRSHIFT_R_OFFSET (6) -#define ADDRSHIFT_R_MASK (0xFFFFF03F) -#define ADDRSHIFT_W_MASK (0xFFFFFFC0) -#define MTK_SPI_32BIS_MASK (0xFFFFFFFF) -#define MTK_SPI_32BIS_SHIFT (32) - -#define DMA_ADDR_BITS (36) struct mtk_spi_compatible { bool need_pad_sel; /* Must explicitly send dummy Tx bytes to do Rx only transfer */ bool must_tx; - /* some IC design adjust register define */ + /* some IC design adjust cfg register to enhance time accuracy */ bool enhance_timing; - /*some chip support 8GB DRAM access, there are two kinds solutions*/ - bool dma8g_peri_ext; - bool dma8g_spi_ext; }; struct mtk_spi { void __iomem *base; - void __iomem *peri_regs; u32 state; int pad_num; u32 *pad_sel; struct clk *parent_clk, *sel_clk, *spi_clk; struct spi_transfer *cur_transfer; u32 xfer_len; + u32 num_xfered; struct scatterlist *tx_sgl, *rx_sgl; u32 tx_sgl_len, rx_sgl_len; const struct mtk_spi_compatible *dev_comp; - u32 dram_8gb_offset; }; static const struct mtk_spi_compatible mtk_common_compat; @@ -127,27 +110,6 @@ static const struct mtk_spi_compatible mt2712_compat = { .must_tx = true, }; -static const struct mtk_spi_compatible mt3967_compat = { - .need_pad_sel = true, - .enhance_timing = true, - .dma8g_spi_ext = true, - .must_tx = true, -}; - -static const struct mtk_spi_compatible mt6758_compat = { - .need_pad_sel = true, - .enhance_timing = true, - .dma8g_peri_ext = true, - .must_tx = true, -}; - -static const struct mtk_spi_compatible mt6765_compat = { - .need_pad_sel = true, - .enhance_timing = true, - .dma8g_spi_ext = true, - .must_tx = true, -}; - static const struct mtk_spi_compatible mt7622_compat = { .must_tx = true, .enhance_timing = true, @@ -158,12 +120,6 @@ static const struct mtk_spi_compatible mt8173_compat = { .must_tx = true, }; -static const struct mtk_spi_compatible mt8183_compat = { - .need_pad_sel = true, - .must_tx = true, - .enhance_timing = true, -}; - /* * A piece of default chip info unless the platform * supplies it. @@ -173,11 +129,6 @@ static const struct mtk_chip_config mtk_default_chip_info = { .tx_mlsb = 1, .cs_pol = 0, .sample_sel = 0, - - .cs_setuptime = 0, - .cs_holdtime = 0, - .cs_idletime = 0, - .deassert_mode = 0, }; static const struct of_device_id mtk_spi_of_match[] = { @@ -187,127 +138,22 @@ static const struct of_device_id mtk_spi_of_match[] = { { .compatible = "mediatek,mt2712-spi", .data = (void *)&mt2712_compat, }, - { .compatible = "mediatek,mt3967-spi", - .data = (void *)&mt3967_compat, - }, { .compatible = "mediatek,mt6589-spi", .data = (void *)&mtk_common_compat, }, - { .compatible = "mediatek,mt6758-spi", - .data = (void *)&mt6758_compat, - }, - { .compatible = "mediatek,mt6765-spi", - .data = (void *)&mt6765_compat, - }, { .compatible = "mediatek,mt7622-spi", .data = (void *)&mt7622_compat, }, { .compatible = "mediatek,mt8135-spi", .data = (void *)&mtk_common_compat, }, - { .compatible = "mediatek,mt8167-spi", - .data = (void *)&mt2712_compat, - }, { .compatible = "mediatek,mt8173-spi", .data = (void *)&mt8173_compat, }, - { .compatible = "mediatek,mt8183-spi", - .data = (void *)&mt8183_compat, - }, {} }; MODULE_DEVICE_TABLE(of, mtk_spi_of_match); -#define LOG_CLOSE 0 -#define LOG_OPEN 1 -u8 spi_log_status = LOG_CLOSE; - -#define spi_debug(fmt, args...) do { \ - if (spi_log_status == LOG_OPEN) {\ - pr_info("[spi]%s() " fmt, __func__, ##args);\ - } \ -} while (0) - -static ssize_t spi_log_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - char buf_temp[50] = { 0 }; - - if (buf == NULL) { - pr_notice("%s() *buf is NULL\n", __func__); - return -EINVAL; - } - - snprintf(buf_temp, sizeof(buf_temp), "Now spi log %s.\n", - (spi_log_status == LOG_CLOSE)?"disabled":"enabled"); - strncat(buf, buf_temp, strlen(buf_temp)); - - return strlen(buf); -} - -static ssize_t spi_log_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - if (strlen(buf) < 1) { - pr_notice("%s() Invalid input!\n", __func__); - return -EINVAL; - } - - pr_info("[spi]%s buflen:%zu buf:%s\n", __func__, strlen(buf), buf); - if (!strncmp(buf, "1", 1)) { - pr_info("[spi]%s Now enable spi log\n", __func__); - spi_log_status = LOG_OPEN; - } else if (!strncmp(buf, "0", 1)) { - pr_info("[spi]%s Now disable spi log\n", __func__); - spi_log_status = LOG_CLOSE; - } else - pr_info("[spi]%s invalid parameter.Plz Input 1 or 0\n", - __func__); - - return count; -} - -static DEVICE_ATTR(spi_log, 0644, spi_log_show, spi_log_store); - -static void spi_dump_reg(struct mtk_spi *mdata) -{ - spi_debug("||**************%s**************||\n", __func__); - spi_debug("cfg0:0x%.8x\n", readl(mdata->base + SPI_CFG0_REG)); - spi_debug("cfg1:0x%.8x\n", readl(mdata->base + SPI_CFG1_REG)); - spi_debug("cfg2:0x%.8x\n", readl(mdata->base + SPI_CFG2_REG)); - spi_debug("cmd :0x%.8x\n", readl(mdata->base + SPI_CMD_REG)); - spi_debug("tx_s:0x%.8x\n", readl(mdata->base + SPI_TX_SRC_REG)); - spi_debug("rx_d:0x%.8x\n", readl(mdata->base + SPI_RX_DST_REG)); - spi_debug("status1:0x%.8x\n", readl(mdata->base + SPI_STATUS1_REG)); - spi_debug("pad_sel:0x%.8x\n", readl(mdata->base + SPI_PAD_SEL_REG)); - spi_debug("||**************%s end**************||\n", __func__); -} - -static void spi_dump_config(struct spi_master *master, struct spi_message *msg) -{ - struct spi_device *spi = msg->spi; - struct mtk_chip_config *chip_config = spi->controller_data; - struct mtk_spi *mdata = spi_master_get_devdata(master); - - spi_debug("||**************%s**************||\n", __func__); - spi_debug("chip_config->spi_mode:0x%.4x\n", spi->mode); - spi_debug("chip_config->tx_mlsb:%d.\n", chip_config->tx_mlsb); - spi_debug("chip_config->rx_mlsb:%d.\n", chip_config->rx_mlsb); - spi_debug("chip_config->cs_pol:%d.\n", chip_config->cs_pol); - spi_debug("chip_config->sample_sel:%d\n", chip_config->sample_sel); - spi_debug("chip_config->cs_setuptime=%d\n", - chip_config->cs_setuptime); - spi_debug("chip_config->cs_holdtime=%d\n", - chip_config->cs_holdtime); - spi_debug("chip_config->cs_idletime=%d\n", - chip_config->cs_idletime); - spi_debug("chip_config->deassert_mode=%d\n", - chip_config->deassert_mode); - spi_debug("chip_config->chip_select:%d,chip_config->pad_sel:%d\n", - spi->chip_select, mdata->pad_sel[spi->chip_select]); - spi_debug("||**************%s end**************||\n", __func__); -} - static void mtk_spi_reset(struct mtk_spi *mdata) { u32 reg_val; @@ -334,9 +180,6 @@ static int mtk_spi_prepare_message(struct spi_master *master, cpha = spi->mode & SPI_CPHA ? 1 : 0; cpol = spi->mode & SPI_CPOL ? 1 : 0; - spi_debug("cpha:%d cpol:%d. chip_config as below\n", cpha, cpol); - spi_dump_config(master, msg); - reg_val = readl(mdata->base + SPI_CMD_REG); if (cpha) reg_val |= SPI_CMD_CPHA; @@ -384,10 +227,7 @@ static int mtk_spi_prepare_message(struct spi_master *master, reg_val &= ~(SPI_CMD_TX_DMA | SPI_CMD_RX_DMA); /* disable deassert mode */ - if (chip_config->deassert_mode == 1) - reg_val |= SPI_CMD_DEASSERT; - else - reg_val &= ~SPI_CMD_DEASSERT; + reg_val &= ~SPI_CMD_DEASSERT; writel(reg_val, mdata->base + SPI_CMD_REG); @@ -402,14 +242,8 @@ static int mtk_spi_prepare_message(struct spi_master *master, static void mtk_spi_set_cs(struct spi_device *spi, bool enable) { u32 reg_val; - int ret; struct mtk_spi *mdata = spi_master_get_devdata(spi->master); - ret = clk_prepare_enable(mdata->spi_clk); - if (ret < 0) { - pr_info("failed to enable spi_clk (%d)\n", ret); - return; - } reg_val = readl(mdata->base + SPI_CMD_REG); if (!enable) { reg_val |= SPI_CMD_PAUSE_EN; @@ -420,16 +254,13 @@ static void mtk_spi_set_cs(struct spi_device *spi, bool enable) mdata->state = MTK_SPI_IDLE; mtk_spi_reset(mdata); } - clk_disable_unprepare(mdata->spi_clk); } static void mtk_spi_prepare_transfer(struct spi_master *master, - struct spi_transfer *xfer, struct spi_device *spi) + struct spi_transfer *xfer) { u32 spi_clk_hz, div, sck_time, cs_time, reg_val = 0; struct mtk_spi *mdata = spi_master_get_devdata(master); - u32 cs_setuptime, cs_holdtime, cs_idletime = 0; - struct mtk_chip_config *chip_config = spi->controller_data; spi_clk_hz = clk_get_rate(mdata->spi_clk); if (xfer->speed_hz < spi_clk_hz / 2) @@ -440,49 +271,29 @@ static void mtk_spi_prepare_transfer(struct spi_master *master, sck_time = (div + 1) / 2; cs_time = sck_time * 2; - if (chip_config->cs_setuptime) - cs_setuptime = chip_config->cs_setuptime; - else - cs_setuptime = cs_time; - - if (chip_config->cs_holdtime) - cs_holdtime = chip_config->cs_holdtime; - else - cs_holdtime = cs_time; - - if (chip_config->cs_idletime) - cs_idletime = chip_config->cs_idletime; - else - cs_idletime = cs_time; - if (mdata->dev_comp->enhance_timing) { reg_val |= (((sck_time - 1) & 0xffff) << SPI_CFG0_SCK_HIGH_OFFSET); reg_val |= (((sck_time - 1) & 0xffff) << SPI_ADJUST_CFG0_SCK_LOW_OFFSET); writel(reg_val, mdata->base + SPI_CFG2_REG); - - reg_val = 0; - reg_val |= (((cs_holdtime - 1) & 0xffff) + reg_val |= (((cs_time - 1) & 0xffff) << SPI_ADJUST_CFG0_CS_HOLD_OFFSET); - reg_val |= (((cs_setuptime - 1) & 0xffff) + reg_val |= (((cs_time - 1) & 0xffff) << SPI_ADJUST_CFG0_CS_SETUP_OFFSET); writel(reg_val, mdata->base + SPI_CFG0_REG); } else { reg_val |= (((sck_time - 1) & 0xff) << SPI_CFG0_SCK_HIGH_OFFSET); - reg_val |= (((sck_time - 1) & 0xff) << - SPI_CFG0_SCK_LOW_OFFSET); - reg_val |= (((cs_holdtime - 1) & 0xff) << - SPI_CFG0_CS_HOLD_OFFSET); - reg_val |= (((cs_setuptime - 1) & 0xff) << - SPI_CFG0_CS_SETUP_OFFSET); + reg_val |= (((sck_time - 1) & 0xff) << SPI_CFG0_SCK_LOW_OFFSET); + reg_val |= (((cs_time - 1) & 0xff) << SPI_CFG0_CS_HOLD_OFFSET); + reg_val |= (((cs_time - 1) & 0xff) << SPI_CFG0_CS_SETUP_OFFSET); writel(reg_val, mdata->base + SPI_CFG0_REG); } reg_val = readl(mdata->base + SPI_CFG1_REG); reg_val &= ~SPI_CFG1_CS_IDLE_MASK; - reg_val |= (((cs_idletime - 1) & 0xff) << SPI_CFG1_CS_IDLE_OFFSET); + reg_val |= (((cs_time - 1) & 0xff) << SPI_CFG1_CS_IDLE_OFFSET); writel(reg_val, mdata->base + SPI_CFG1_REG); } @@ -557,56 +368,12 @@ static void mtk_spi_update_mdata_len(struct spi_master *master) static void mtk_spi_setup_dma_addr(struct spi_master *master, struct spi_transfer *xfer) { - u32 addr_ext; struct mtk_spi *mdata = spi_master_get_devdata(master); - spi_debug("xfer->tx_dma:0x%llx,xfer->rx_dma:0x%llx\n", - (uint64_t)xfer->tx_dma, (uint64_t)xfer->rx_dma); - if (mdata->dev_comp->dma8g_peri_ext) { - if (mdata->tx_sgl) { - addr_ext = readl(mdata->peri_regs + - mdata->dram_8gb_offset); - addr_ext = ((addr_ext & (ADDRSHIFT_W_MASK)) | - (u32)(cpu_to_le64(xfer->tx_dma) / SZ_1G)); - writel(addr_ext, - mdata->dram_8gb_offset + mdata->peri_regs); - writel((u32)(cpu_to_le64(xfer->tx_dma) % SZ_1G), - mdata->base + SPI_TX_SRC_REG); - } - if (mdata->rx_sgl) { - addr_ext = readl(mdata->peri_regs + - mdata->dram_8gb_offset); - addr_ext = ((addr_ext & (ADDRSHIFT_R_MASK)) | - (u32)((cpu_to_le64(xfer->rx_dma) / SZ_1G) - << ADDRSHIFT_R_OFFSET)); - writel(addr_ext, - mdata->dram_8gb_offset + mdata->peri_regs); - writel((u32)(cpu_to_le64(xfer->rx_dma) % SZ_1G), - mdata->base + SPI_RX_DST_REG); - } - } else if (mdata->dev_comp->dma8g_spi_ext) { - if (mdata->tx_sgl) { - writel((u32)(cpu_to_le64(xfer->tx_dma) & - MTK_SPI_32BIS_MASK), mdata->base + SPI_TX_SRC_REG); -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - writel((u32)(cpu_to_le64(xfer->tx_dma) >> - MTK_SPI_32BIS_SHIFT), mdata->base + SPI_TX_SRC_REG_64); -#endif - } - if (mdata->rx_sgl) { - writel((u32)(cpu_to_le64(xfer->rx_dma) & - MTK_SPI_32BIS_MASK), mdata->base + SPI_RX_DST_REG); -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - writel((u32)(cpu_to_le64(xfer->rx_dma) >> - MTK_SPI_32BIS_SHIFT), mdata->base + SPI_RX_DST_REG_64); -#endif - } - } else { - if (mdata->tx_sgl) - writel(xfer->tx_dma, mdata->base + SPI_TX_SRC_REG); - if (mdata->rx_sgl) - writel(xfer->rx_dma, mdata->base + SPI_RX_DST_REG); - } + if (mdata->tx_sgl) + writel(xfer->tx_dma, mdata->base + SPI_TX_SRC_REG); + if (mdata->rx_sgl) + writel(xfer->rx_dma, mdata->base + SPI_RX_DST_REG); } static int mtk_spi_fifo_transfer(struct spi_master *master, @@ -618,24 +385,21 @@ static int mtk_spi_fifo_transfer(struct spi_master *master, struct mtk_spi *mdata = spi_master_get_devdata(master); mdata->cur_transfer = xfer; - mdata->xfer_len = xfer->len; - mtk_spi_prepare_transfer(master, xfer, spi); + mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, xfer->len); + mdata->num_xfered = 0; + mtk_spi_prepare_transfer(master, xfer); mtk_spi_setup_packet(master); cnt = xfer->len / 4; - if (xfer->tx_buf) - iowrite32_rep(mdata->base + SPI_TX_DATA_REG, xfer->tx_buf, cnt); + iowrite32_rep(mdata->base + SPI_TX_DATA_REG, xfer->tx_buf, cnt); remainder = xfer->len % 4; - if (xfer->tx_buf && remainder > 0) { + if (remainder > 0) { reg_val = 0; memcpy(®_val, xfer->tx_buf + (cnt * 4), remainder); writel(reg_val, mdata->base + SPI_TX_DATA_REG); } - spi_debug("spi setting Done.Dump reg before Transfer start:\n"); - spi_dump_reg(mdata); - mtk_spi_enable_transfer(master); return 1; @@ -653,8 +417,9 @@ static int mtk_spi_dma_transfer(struct spi_master *master, mdata->tx_sgl_len = 0; mdata->rx_sgl_len = 0; mdata->cur_transfer = xfer; + mdata->num_xfered = 0; - mtk_spi_prepare_transfer(master, xfer, spi); + mtk_spi_prepare_transfer(master, xfer); cmd = readl(mdata->base + SPI_CMD_REG); if (xfer->tx_buf) @@ -680,10 +445,6 @@ static int mtk_spi_dma_transfer(struct spi_master *master, mtk_spi_update_mdata_len(master); mtk_spi_setup_packet(master); mtk_spi_setup_dma_addr(master, xfer); - - spi_debug("spi setting Done.Dump reg before Transfer start:\n"); - spi_dump_reg(mdata); - mtk_spi_enable_transfer(master); return 1; @@ -693,7 +454,6 @@ static int mtk_spi_transfer_one(struct spi_master *master, struct spi_device *spi, struct spi_transfer *xfer) { - spi_debug("xfer->len:%d\n", xfer->len); if (master->can_dma(master, spi, xfer)) return mtk_spi_dma_transfer(master, spi, xfer); else @@ -704,7 +464,10 @@ static bool mtk_spi_can_dma(struct spi_master *master, struct spi_device *spi, struct spi_transfer *xfer) { - return xfer->len > MTK_SPI_MAX_FIFO_SIZE; + /* Buffers for DMA transactions must be 4-byte aligned */ + return (xfer->len > MTK_SPI_MAX_FIFO_SIZE && + (unsigned long)xfer->tx_buf % 4 == 0 && + (unsigned long)xfer->rx_buf % 4 == 0); } static int mtk_spi_setup(struct spi_device *spi) @@ -722,7 +485,7 @@ static int mtk_spi_setup(struct spi_device *spi) static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) { - u32 cmd, reg_val, cnt, remainder; + u32 cmd, reg_val, cnt, remainder, len; struct spi_master *master = dev_id; struct mtk_spi *mdata = spi_master_get_devdata(master); struct spi_transfer *trans = mdata->cur_transfer; @@ -737,16 +500,43 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) if (trans->rx_buf) { cnt = mdata->xfer_len / 4; ioread32_rep(mdata->base + SPI_RX_DATA_REG, - trans->rx_buf, cnt); + trans->rx_buf + mdata->num_xfered, cnt); remainder = mdata->xfer_len % 4; if (remainder > 0) { reg_val = readl(mdata->base + SPI_RX_DATA_REG); - memcpy(trans->rx_buf + (cnt * 4), - ®_val, remainder); + memcpy(trans->rx_buf + + mdata->num_xfered + + (cnt * 4), + ®_val, + remainder); } } - spi_finalize_current_transfer(master); - spi_debug("The last fifo transfer Done.\n"); + + mdata->num_xfered += mdata->xfer_len; + if (mdata->num_xfered == trans->len) { + spi_finalize_current_transfer(master); + return IRQ_HANDLED; + } + + len = trans->len - mdata->num_xfered; + mdata->xfer_len = min(MTK_SPI_MAX_FIFO_SIZE, len); + mtk_spi_setup_packet(master); + + cnt = mdata->xfer_len / 4; + iowrite32_rep(mdata->base + SPI_TX_DATA_REG, + trans->tx_buf + mdata->num_xfered, cnt); + + remainder = mdata->xfer_len % 4; + if (remainder > 0) { + reg_val = 0; + memcpy(®_val, + trans->tx_buf + (cnt * 4) + mdata->num_xfered, + remainder); + writel(reg_val, mdata->base + SPI_TX_DATA_REG); + } + + mtk_spi_enable_transfer(master); + return IRQ_HANDLED; } @@ -778,12 +568,9 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) writel(cmd, mdata->base + SPI_CMD_REG); spi_finalize_current_transfer(master); - spi_debug("The last DMA transfer Done.\n"); return IRQ_HANDLED; } - spi_debug("One DMA transfer Done.Start Next\n"); - mtk_spi_update_mdata_len(master); mtk_spi_setup_packet(master); mtk_spi_setup_dma_addr(master, trans); @@ -798,8 +585,7 @@ static int mtk_spi_probe(struct platform_device *pdev) struct mtk_spi *mdata; const struct of_device_id *of_id; struct resource *res; - int i, irq, ret, value; - struct device_node *node_pericfg; + int i, irq, ret; master = spi_alloc_master(&pdev->dev, sizeof(*mdata)); if (!master) { @@ -829,18 +615,6 @@ static int mtk_spi_probe(struct platform_device *pdev) if (mdata->dev_comp->must_tx) master->flags = SPI_MASTER_MUST_TX; - ret = of_property_read_u32(pdev->dev.of_node, - "mediatek,kthread-rt", &value); - if (ret < 0) - dev_notice(&pdev->dev, - "No 'mediatek,kthread-rt' property\n"); - else { - if (value == 1) - master->rt = true; - else - master->rt = false; - } - if (mdata->dev_comp->need_pad_sel) { mdata->pad_num = of_property_count_u32_elems( pdev->dev.of_node, @@ -938,17 +712,16 @@ static int mtk_spi_probe(struct platform_device *pdev) goto err_put_master; } + clk_disable_unprepare(mdata->spi_clk); + pm_runtime_enable(&pdev->dev); ret = devm_spi_register_master(&pdev->dev, master); if (ret) { dev_err(&pdev->dev, "failed to register master (%d)\n", ret); - clk_disable_unprepare(mdata->spi_clk); goto err_disable_runtime_pm; } - clk_disable_unprepare(mdata->spi_clk); - if (mdata->dev_comp->need_pad_sel) { if (mdata->pad_num != master->num_chipselect) { dev_err(&pdev->dev, @@ -979,37 +752,6 @@ static int mtk_spi_probe(struct platform_device *pdev) } } - if (mdata->dev_comp->dma8g_peri_ext) { - node_pericfg = of_find_compatible_node(NULL, NULL, - "mediatek,pericfg"); - if (!node_pericfg) { - dev_notice(&pdev->dev, "error: node_pericfg init fail\n"); - goto err_disable_runtime_pm; - } - mdata->peri_regs = of_iomap(node_pericfg, 0); - if (IS_ERR(*(void **)&(mdata->peri_regs))) { - ret = PTR_ERR(*(void **)&mdata->peri_regs); - dev_notice(&pdev->dev, "error: ms->peri_regs init fail\n"); - mdata->peri_regs = NULL; - goto err_disable_runtime_pm; - } - if (of_property_read_u32(pdev->dev.of_node, - "mediatek,dram-8gb-offset", &mdata->dram_8gb_offset)) { - dev_notice(&pdev->dev, "SPI get dram-8gb-offset failed\n"); - goto err_disable_runtime_pm; - } - } - - ret = device_create_file(&pdev->dev, &dev_attr_spi_log); - if (ret) - dev_notice(&pdev->dev, "SPI sysfs_create_file fail, ret:%d\n", - ret); - - ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(DMA_ADDR_BITS)); - if (ret) - dev_notice(&pdev->dev, "SPI dma_set_mask(%d) failed, ret:%d\n", - DMA_ADDR_BITS, ret); - return 0; err_disable_runtime_pm: diff --git a/drivers/spi/spi-omap2-mcspi.c b/drivers/spi/spi-omap2-mcspi.c index 9bf64e6eca9b..1db4d3c1d2bf 100644 --- a/drivers/spi/spi-omap2-mcspi.c +++ b/drivers/spi/spi-omap2-mcspi.c @@ -298,7 +298,7 @@ static void omap2_mcspi_set_fifo(const struct spi_device *spi, struct omap2_mcspi_cs *cs = spi->controller_state; struct omap2_mcspi *mcspi; unsigned int wcnt; - int max_fifo_depth, fifo_depth, bytes_per_word; + int max_fifo_depth, bytes_per_word; u32 chconf, xferlevel; mcspi = spi_master_get_devdata(master); @@ -314,10 +314,6 @@ static void omap2_mcspi_set_fifo(const struct spi_device *spi, else max_fifo_depth = OMAP2_MCSPI_MAX_FIFODEPTH; - fifo_depth = gcd(t->len, max_fifo_depth); - if (fifo_depth < 2 || fifo_depth % bytes_per_word != 0) - goto disable_fifo; - wcnt = t->len / bytes_per_word; if (wcnt > OMAP2_MCSPI_MAX_FIFOWCNT) goto disable_fifo; @@ -325,16 +321,17 @@ static void omap2_mcspi_set_fifo(const struct spi_device *spi, xferlevel = wcnt << 16; if (t->rx_buf != NULL) { chconf |= OMAP2_MCSPI_CHCONF_FFER; - xferlevel |= (fifo_depth - 1) << 8; + xferlevel |= (bytes_per_word - 1) << 8; } + if (t->tx_buf != NULL) { chconf |= OMAP2_MCSPI_CHCONF_FFET; - xferlevel |= fifo_depth - 1; + xferlevel |= bytes_per_word - 1; } mcspi_write_reg(master, OMAP2_MCSPI_XFERLEVEL, xferlevel); mcspi_write_chconf0(spi, chconf); - mcspi->fifo_depth = fifo_depth; + mcspi->fifo_depth = max_fifo_depth; return; } @@ -601,7 +598,6 @@ omap2_mcspi_txrx_dma(struct spi_device *spi, struct spi_transfer *xfer) struct dma_slave_config cfg; enum dma_slave_buswidth width; unsigned es; - u32 burst; void __iomem *chstat_reg; void __iomem *irqstat_reg; int wait_res; @@ -623,22 +619,14 @@ omap2_mcspi_txrx_dma(struct spi_device *spi, struct spi_transfer *xfer) } count = xfer->len; - burst = 1; - - if (mcspi->fifo_depth > 0) { - if (count > mcspi->fifo_depth) - burst = mcspi->fifo_depth / es; - else - burst = count / es; - } memset(&cfg, 0, sizeof(cfg)); cfg.src_addr = cs->phys + OMAP2_MCSPI_RX0; cfg.dst_addr = cs->phys + OMAP2_MCSPI_TX0; cfg.src_addr_width = width; cfg.dst_addr_width = width; - cfg.src_maxburst = burst; - cfg.dst_maxburst = burst; + cfg.src_maxburst = 1; + cfg.dst_maxburst = 1; rx = xfer->rx_buf; tx = xfer->tx_buf; diff --git a/drivers/spi/spi-pic32.c b/drivers/spi/spi-pic32.c index f8a45af1fa9f..288002f6c613 100644 --- a/drivers/spi/spi-pic32.c +++ b/drivers/spi/spi-pic32.c @@ -320,7 +320,7 @@ static int pic32_spi_dma_transfer(struct pic32_spi *pic32s, desc_rx = dmaengine_prep_slave_sg(master->dma_rx, xfer->rx_sg.sgl, xfer->rx_sg.nents, - DMA_FROM_DEVICE, + DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!desc_rx) { ret = -EINVAL; @@ -330,7 +330,7 @@ static int pic32_spi_dma_transfer(struct pic32_spi *pic32s, desc_tx = dmaengine_prep_slave_sg(master->dma_tx, xfer->tx_sg.sgl, xfer->tx_sg.nents, - DMA_TO_DEVICE, + DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); if (!desc_tx) { ret = -EINVAL; diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 9bf3e5f945c7..b2245cdce230 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1559,7 +1559,13 @@ pxa2xx_spi_init_pdata(struct platform_device *pdev) } ssp->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(ssp->clk)) + return NULL; + ssp->irq = platform_get_irq(pdev, 0); + if (ssp->irq < 0) + return NULL; + ssp->type = type; ssp->pdev = pdev; ssp->port_id = pxa2xx_spi_get_port_id(adev); diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index fdcf3076681b..185bbdce62b1 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -445,6 +445,9 @@ static int rockchip_spi_prepare_dma(struct rockchip_spi *rs) struct dma_slave_config rxconf, txconf; struct dma_async_tx_descriptor *rxdesc, *txdesc; + memset(&rxconf, 0, sizeof(rxconf)); + memset(&txconf, 0, sizeof(txconf)); + spin_lock_irqsave(&rs->lock, flags); rs->state &= ~RXBUSY; rs->state &= ~TXBUSY; diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index db2a529accae..a7bd3c92356b 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -1283,8 +1283,8 @@ static int sh_msiof_spi_probe(struct platform_device *pdev) i = platform_get_irq(pdev, 0); if (i < 0) { - dev_err(&pdev->dev, "cannot get platform IRQ\n"); - ret = -ENOENT; + dev_err(&pdev->dev, "cannot get IRQ\n"); + ret = i; goto err1; } diff --git a/drivers/spi/spi-st-ssc4.c b/drivers/spi/spi-st-ssc4.c index a4e43fc19ece..5df01ffdef46 100644 --- a/drivers/spi/spi-st-ssc4.c +++ b/drivers/spi/spi-st-ssc4.c @@ -385,6 +385,7 @@ static int spi_st_probe(struct platform_device *pdev) return 0; clk_disable: + pm_runtime_disable(&pdev->dev); clk_disable_unprepare(spi_st->clk); put_master: spi_master_put(master); @@ -396,6 +397,8 @@ static int spi_st_remove(struct platform_device *pdev) struct spi_master *master = platform_get_drvdata(pdev); struct spi_st *spi_st = spi_master_get_devdata(master); + pm_runtime_disable(&pdev->dev); + clk_disable_unprepare(spi_st->clk); pinctrl_pm_select_sleep_state(&pdev->dev); diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index 2ad04796ef29..84ff0c507f0b 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -307,10 +307,16 @@ static unsigned tegra_spi_fill_tx_fifo_from_client_txbuf( x |= (u32)(*tx_buf++) << (i * 8); tegra_spi_writel(tspi, x, SPI_TX_FIFO); } + + tspi->cur_tx_pos += written_words * tspi->bytes_per_word; } else { + unsigned int write_bytes; max_n_32bit = min(tspi->curr_dma_words, tx_empty_count); written_words = max_n_32bit; nbytes = written_words * tspi->bytes_per_word; + if (nbytes > t->len - tspi->cur_pos) + nbytes = t->len - tspi->cur_pos; + write_bytes = nbytes; for (count = 0; count < max_n_32bit; count++) { u32 x = 0; @@ -319,8 +325,10 @@ static unsigned tegra_spi_fill_tx_fifo_from_client_txbuf( x |= (u32)(*tx_buf++) << (i * 8); tegra_spi_writel(tspi, x, SPI_TX_FIFO); } + + tspi->cur_tx_pos += write_bytes; } - tspi->cur_tx_pos += written_words * tspi->bytes_per_word; + return written_words; } @@ -344,20 +352,27 @@ static unsigned int tegra_spi_read_rx_fifo_to_client_rxbuf( for (i = 0; len && (i < 4); i++, len--) *rx_buf++ = (x >> i*8) & 0xFF; } - tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word; read_words += tspi->curr_dma_words; + tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word; } else { u32 rx_mask = ((u32)1 << t->bits_per_word) - 1; + u8 bytes_per_word = tspi->bytes_per_word; + unsigned int read_bytes; + len = rx_full_count * bytes_per_word; + if (len > t->len - tspi->cur_pos) + len = t->len - tspi->cur_pos; + read_bytes = len; for (count = 0; count < rx_full_count; count++) { u32 x = tegra_spi_readl(tspi, SPI_RX_FIFO) & rx_mask; - for (i = 0; (i < tspi->bytes_per_word); i++) + for (i = 0; len && (i < bytes_per_word); i++, len--) *rx_buf++ = (x >> (i*8)) & 0xFF; } - tspi->cur_rx_pos += rx_full_count * tspi->bytes_per_word; read_words += rx_full_count; + tspi->cur_rx_pos += read_bytes; } + return read_words; } @@ -372,12 +387,17 @@ static void tegra_spi_copy_client_txbuf_to_spi_txbuf( unsigned len = tspi->curr_dma_words * tspi->bytes_per_word; memcpy(tspi->tx_dma_buf, t->tx_buf + tspi->cur_pos, len); + tspi->cur_tx_pos += tspi->curr_dma_words * tspi->bytes_per_word; } else { unsigned int i; unsigned int count; u8 *tx_buf = (u8 *)t->tx_buf + tspi->cur_tx_pos; unsigned consume = tspi->curr_dma_words * tspi->bytes_per_word; + unsigned int write_bytes; + if (consume > t->len - tspi->cur_pos) + consume = t->len - tspi->cur_pos; + write_bytes = consume; for (count = 0; count < tspi->curr_dma_words; count++) { u32 x = 0; @@ -386,8 +406,9 @@ static void tegra_spi_copy_client_txbuf_to_spi_txbuf( x |= (u32)(*tx_buf++) << (i * 8); tspi->tx_dma_buf[count] = x; } + + tspi->cur_tx_pos += write_bytes; } - tspi->cur_tx_pos += tspi->curr_dma_words * tspi->bytes_per_word; /* Make the dma buffer to read by dma */ dma_sync_single_for_device(tspi->dev, tspi->tx_dma_phys, @@ -405,20 +426,28 @@ static void tegra_spi_copy_spi_rxbuf_to_client_rxbuf( unsigned len = tspi->curr_dma_words * tspi->bytes_per_word; memcpy(t->rx_buf + tspi->cur_rx_pos, tspi->rx_dma_buf, len); + tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word; } else { unsigned int i; unsigned int count; unsigned char *rx_buf = t->rx_buf + tspi->cur_rx_pos; u32 rx_mask = ((u32)1 << t->bits_per_word) - 1; + unsigned consume = tspi->curr_dma_words * tspi->bytes_per_word; + unsigned int read_bytes; + if (consume > t->len - tspi->cur_pos) + consume = t->len - tspi->cur_pos; + read_bytes = consume; for (count = 0; count < tspi->curr_dma_words; count++) { u32 x = tspi->rx_dma_buf[count] & rx_mask; - for (i = 0; (i < tspi->bytes_per_word); i++) + for (i = 0; consume && (i < tspi->bytes_per_word); + i++, consume--) *rx_buf++ = (x >> (i*8)) & 0xFF; } + + tspi->cur_rx_pos += read_bytes; } - tspi->cur_rx_pos += tspi->curr_dma_words * tspi->bytes_per_word; /* Make the dma buffer to read by dma */ dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys, @@ -470,21 +499,38 @@ static int tegra_spi_start_rx_dma(struct tegra_spi_data *tspi, int len) return 0; } +static int tegra_spi_flush_fifos(struct tegra_spi_data *tspi) +{ + unsigned long timeout = jiffies + HZ; + u32 status; + + status = tegra_spi_readl(tspi, SPI_FIFO_STATUS); + if ((status & SPI_FIFO_EMPTY) != SPI_FIFO_EMPTY) { + status |= SPI_RX_FIFO_FLUSH | SPI_TX_FIFO_FLUSH; + tegra_spi_writel(tspi, status, SPI_FIFO_STATUS); + while ((status & SPI_FIFO_EMPTY) != SPI_FIFO_EMPTY) { + status = tegra_spi_readl(tspi, SPI_FIFO_STATUS); + if (time_after(jiffies, timeout)) { + dev_err(tspi->dev, + "timeout waiting for fifo flush\n"); + return -EIO; + } + + udelay(1); + } + } + + return 0; +} + static int tegra_spi_start_dma_based_transfer( struct tegra_spi_data *tspi, struct spi_transfer *t) { u32 val; unsigned int len; int ret = 0; - u32 status; - - /* Make sure that Rx and Tx fifo are empty */ - status = tegra_spi_readl(tspi, SPI_FIFO_STATUS); - if ((status & SPI_FIFO_EMPTY) != SPI_FIFO_EMPTY) { - dev_err(tspi->dev, "Rx/Tx fifo are not empty status 0x%08x\n", - (unsigned)status); - return -EIO; - } + u8 dma_burst; + struct dma_slave_config dma_sconfig = {0}; val = SPI_DMA_BLK_SET(tspi->curr_dma_words - 1); tegra_spi_writel(tspi, val, SPI_DMA_BLK); @@ -496,12 +542,16 @@ static int tegra_spi_start_dma_based_transfer( len = tspi->curr_dma_words * 4; /* Set attention level based on length of transfer */ - if (len & 0xF) + if (len & 0xF) { val |= SPI_TX_TRIG_1 | SPI_RX_TRIG_1; - else if (((len) >> 4) & 0x1) + dma_burst = 1; + } else if (((len) >> 4) & 0x1) { val |= SPI_TX_TRIG_4 | SPI_RX_TRIG_4; - else + dma_burst = 4; + } else { val |= SPI_TX_TRIG_8 | SPI_RX_TRIG_8; + dma_burst = 8; + } if (tspi->cur_direction & DATA_DIR_TX) val |= SPI_IE_TX; @@ -512,7 +562,18 @@ static int tegra_spi_start_dma_based_transfer( tegra_spi_writel(tspi, val, SPI_DMA_CTL); tspi->dma_control_reg = val; + dma_sconfig.device_fc = true; if (tspi->cur_direction & DATA_DIR_TX) { + dma_sconfig.dst_addr = tspi->phys + SPI_TX_FIFO; + dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma_sconfig.dst_maxburst = dma_burst; + ret = dmaengine_slave_config(tspi->tx_dma_chan, &dma_sconfig); + if (ret < 0) { + dev_err(tspi->dev, + "DMA slave config failed: %d\n", ret); + return ret; + } + tegra_spi_copy_client_txbuf_to_spi_txbuf(tspi, t); ret = tegra_spi_start_tx_dma(tspi, len); if (ret < 0) { @@ -523,6 +584,16 @@ static int tegra_spi_start_dma_based_transfer( } if (tspi->cur_direction & DATA_DIR_RX) { + dma_sconfig.src_addr = tspi->phys + SPI_RX_FIFO; + dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + dma_sconfig.src_maxburst = dma_burst; + ret = dmaengine_slave_config(tspi->rx_dma_chan, &dma_sconfig); + if (ret < 0) { + dev_err(tspi->dev, + "DMA slave config failed: %d\n", ret); + return ret; + } + /* Make the dma buffer to read by dma */ dma_sync_single_for_device(tspi->dev, tspi->rx_dma_phys, tspi->dma_buf_size, DMA_FROM_DEVICE); @@ -582,7 +653,6 @@ static int tegra_spi_init_dma_param(struct tegra_spi_data *tspi, u32 *dma_buf; dma_addr_t dma_phys; int ret; - struct dma_slave_config dma_sconfig; dma_chan = dma_request_slave_channel_reason(tspi->dev, dma_to_memory ? "rx" : "tx"); @@ -602,19 +672,6 @@ static int tegra_spi_init_dma_param(struct tegra_spi_data *tspi, return -ENOMEM; } - if (dma_to_memory) { - dma_sconfig.src_addr = tspi->phys + SPI_RX_FIFO; - dma_sconfig.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - dma_sconfig.src_maxburst = 0; - } else { - dma_sconfig.dst_addr = tspi->phys + SPI_TX_FIFO; - dma_sconfig.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; - dma_sconfig.dst_maxburst = 0; - } - - ret = dmaengine_slave_config(dma_chan, &dma_sconfig); - if (ret) - goto scrub; if (dma_to_memory) { tspi->rx_dma_chan = dma_chan; tspi->rx_dma_buf = dma_buf; @@ -625,11 +682,6 @@ static int tegra_spi_init_dma_param(struct tegra_spi_data *tspi, tspi->tx_dma_phys = dma_phys; } return 0; - -scrub: - dma_free_coherent(tspi->dev, tspi->dma_buf_size, dma_buf, dma_phys); - dma_release_channel(dma_chan); - return ret; } static void tegra_spi_deinit_dma_param(struct tegra_spi_data *tspi, @@ -730,6 +782,8 @@ static int tegra_spi_start_transfer_one(struct spi_device *spi, if (tspi->is_packed) command1 |= SPI_PACKED; + else + command1 &= ~SPI_PACKED; command1 &= ~(SPI_CS_SEL_MASK | SPI_TX_EN | SPI_RX_EN); tspi->cur_direction = 0; @@ -748,6 +802,9 @@ static int tegra_spi_start_transfer_one(struct spi_device *spi, dev_dbg(tspi->dev, "The def 0x%x and written 0x%x\n", tspi->def_command1_reg, (unsigned)command1); + ret = tegra_spi_flush_fifos(tspi); + if (ret < 0) + return ret; if (total_fifo_words > SPI_FIFO_DEPTH) ret = tegra_spi_start_dma_based_transfer(tspi, t); else @@ -838,7 +895,17 @@ static int tegra_spi_transfer_one_message(struct spi_master *master, if (WARN_ON(ret == 0)) { dev_err(tspi->dev, "spi transfer timeout, err %d\n", ret); + if (tspi->is_curr_dma_xfer && + (tspi->cur_direction & DATA_DIR_TX)) + dmaengine_terminate_all(tspi->tx_dma_chan); + if (tspi->is_curr_dma_xfer && + (tspi->cur_direction & DATA_DIR_RX)) + dmaengine_terminate_all(tspi->rx_dma_chan); ret = -EIO; + tegra_spi_flush_fifos(tspi); + reset_control_assert(tspi->rst); + udelay(2); + reset_control_deassert(tspi->rst); goto complete_xfer; } @@ -889,6 +956,7 @@ static irqreturn_t handle_cpu_based_xfer(struct tegra_spi_data *tspi) tspi->status_reg); dev_err(tspi->dev, "CpuXfer 0x%08x:0x%08x\n", tspi->command1_reg, tspi->dma_control_reg); + tegra_spi_flush_fifos(tspi); reset_control_assert(tspi->rst); udelay(2); reset_control_deassert(tspi->rst); @@ -961,6 +1029,7 @@ static irqreturn_t handle_dma_based_xfer(struct tegra_spi_data *tspi) tspi->status_reg); dev_err(tspi->dev, "DmaXfer 0x%08x:0x%08x\n", tspi->command1_reg, tspi->dma_control_reg); + tegra_spi_flush_fifos(tspi); reset_control_assert(tspi->rst); udelay(2); reset_control_deassert(tspi->rst); diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 9831c1106945..62b074b167a9 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1078,7 +1078,7 @@ static int tegra_slink_probe(struct platform_device *pdev) ret = clk_enable(tspi->clk); if (ret < 0) { dev_err(&pdev->dev, "Clock enable failed %d\n", ret); - goto exit_free_master; + goto exit_clk_unprepare; } spi_irq = platform_get_irq(pdev, 0); @@ -1151,6 +1151,8 @@ exit_free_irq: free_irq(spi_irq, tspi); exit_clk_disable: clk_disable(tspi->clk); +exit_clk_unprepare: + clk_unprepare(tspi->clk); exit_free_master: spi_master_put(master); return ret; @@ -1164,6 +1166,7 @@ static int tegra_slink_remove(struct platform_device *pdev) free_irq(tspi->irq, tspi); clk_disable(tspi->clk); + clk_unprepare(tspi->clk); if (tspi->tx_dma_chan) tegra_slink_deinit_dma_param(tspi, false); diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index 4389ab80c23e..fa730a871d25 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -1008,6 +1008,9 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw) /* RX */ dma->sg_rx_p = kcalloc(num, sizeof(*dma->sg_rx_p), GFP_ATOMIC); + if (!dma->sg_rx_p) + return; + sg_init_table(dma->sg_rx_p, num); /* Initialize SG table */ /* offset, length setting */ sg = dma->sg_rx_p; @@ -1068,6 +1071,9 @@ static void pch_spi_handle_dma(struct pch_spi_data *data, int *bpw) } dma->sg_tx_p = kcalloc(num, sizeof(*dma->sg_tx_p), GFP_ATOMIC); + if (!dma->sg_tx_p) + return; + sg_init_table(dma->sg_tx_p, num); /* Initialize SG table */ /* offset, length setting */ sg = dma->sg_tx_p; diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index cda10719d1d1..028725573e63 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -634,6 +634,9 @@ static int spidev_release(struct inode *inode, struct file *filp) if (dofree) kfree(spidev); } +#ifdef CONFIG_SPI_SLAVE + spi_slave_abort(spidev->spi); +#endif mutex_unlock(&device_list_lock); return 0; @@ -724,11 +727,9 @@ static int spidev_probe(struct spi_device *spi) * compatible string, it is a Linux implementation thing * rather than a description of the hardware. */ - if (spi->dev.of_node && !of_match_device(spidev_dt_ids, &spi->dev)) { - dev_err(&spi->dev, "buggy DT: spidev listed directly in DT\n"); - WARN_ON(spi->dev.of_node && - !of_match_device(spidev_dt_ids, &spi->dev)); - } + WARN(spi->dev.of_node && + of_device_is_compatible(spi->dev.of_node, "spidev"), + "%pOF: buggy DT: spidev listed directly in DT\n", spi->dev.of_node); spidev_probe_acpi(spi); diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index 924b7f53a382..a53825bca481 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -52,6 +52,8 @@ #include #endif +static atomic_long_t total_heap_bytes; + struct ion_dmabuf_list { struct list_head head; struct mutex lock; /* dmabuf lock */ @@ -389,6 +391,7 @@ exit: mutex_lock(&dev->buffer_lock); ion_buffer_add(dev, buffer); mutex_unlock(&dev->buffer_lock); + atomic_long_add(len, &total_heap_bytes); trace_ion_heap_grow(heap->name, len, atomic_long_read(&heap->total_allocated)); atomic_long_add(len, &heap->total_allocated); @@ -425,6 +428,7 @@ static void _ion_buffer_destroy(struct kref *kref) mutex_lock(&dev->buffer_lock); rb_erase(&buffer->node, &dev->buffers); mutex_unlock(&dev->buffer_lock); + atomic_long_sub(buffer->size, &total_heap_bytes); if (heap->flags & ION_HEAP_FLAG_DEFER_FREE) ion_heap_freelist_add(heap, buffer); @@ -2297,6 +2301,56 @@ void ion_device_add_heap(struct ion_device *dev, struct ion_heap *heap) } EXPORT_SYMBOL(ion_device_add_heap); +static ssize_t +total_heaps_kb_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + u64 size_in_bytes = atomic_long_read(&total_heap_bytes); + + return sprintf(buf, "%llu\n", div_u64(size_in_bytes, 1024)); +} + +static ssize_t +total_pools_kb_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + u64 size_in_bytes = ion_page_pool_nr_pages() * PAGE_SIZE; + + return sprintf(buf, "%llu\n", div_u64(size_in_bytes, 1024)); +} + +static struct kobj_attribute total_heaps_kb_attr = + __ATTR_RO(total_heaps_kb); + +static struct kobj_attribute total_pools_kb_attr = + __ATTR_RO(total_pools_kb); + +static struct attribute *ion_device_attrs[] = { + &total_heaps_kb_attr.attr, + &total_pools_kb_attr.attr, + NULL, +}; + +ATTRIBUTE_GROUPS(ion_device); + +static int ion_init_sysfs(void) +{ + struct kobject *ion_kobj; + int ret; + + ion_kobj = kobject_create_and_add("ion", kernel_kobj); + if (!ion_kobj) + return -ENOMEM; + + ret = sysfs_create_groups(ion_kobj, ion_device_groups); + if (ret) { + kobject_put(ion_kobj); + return ret; + } + + return 0; +} + struct ion_device *ion_device_create(long (*custom_ioctl) (struct ion_client *client, unsigned int cmd, @@ -2316,8 +2370,13 @@ struct ion_device *ion_device_create(long (*custom_ioctl) ret = misc_register(&idev->dev); if (ret) { pr_err("ion: failed to register misc device.\n"); - kfree(idev); - return ERR_PTR(ret); + goto err_reg; + } + + ret = ion_init_sysfs(); + if (ret) { + pr_err("ion: failed to add sysfs attributes.\n"); + goto err_sysfs; } idev->debug_root = debugfs_create_dir("ion", NULL); @@ -2344,6 +2403,12 @@ debugfs_done: plist_head_init(&idev->heaps); idev->clients = RB_ROOT; return idev; + +err_sysfs: + misc_deregister(&idev->dev); +err_reg: + kfree(idev); + return ERR_PTR(ret); } EXPORT_SYMBOL(ion_device_create); diff --git a/drivers/staging/android/ion/ion_page_pool.c b/drivers/staging/android/ion/ion_page_pool.c index 4130f975e647..921072ef0fdd 100644 --- a/drivers/staging/android/ion/ion_page_pool.c +++ b/drivers/staging/android/ion/ion_page_pool.c @@ -27,6 +27,13 @@ static unsigned long long last_alloc_ts; +/* + * We avoid atomic_long_t to minimize cache flushes at the cost of possible + * race which would result in a small accounting inaccuracy that we can + * tolerate. + */ +static long nr_total_pages; + static void *ion_page_pool_alloc_pages(struct ion_page_pool *pool) { unsigned long long start, end; @@ -75,6 +82,10 @@ static int ion_page_pool_add(struct ion_page_pool *pool, struct page *page) list_add_tail(&page->lru, &pool->low_items); pool->low_count++; } + + nr_total_pages += 1 << pool->order; + mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, + 1 << pool->order); mutex_unlock(&pool->mutex); return 0; } @@ -94,6 +105,9 @@ static struct page *ion_page_pool_remove(struct ion_page_pool *pool, bool high) } list_del(&page->lru); + nr_total_pages -= 1 << pool->order; + mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, + -(1 << pool->order)); return page; } @@ -141,6 +155,14 @@ static int ion_page_pool_total(struct ion_page_pool *pool, bool high) return count << pool->order; } +long ion_page_pool_nr_pages(void) +{ + /* Correct possible overflow caused by racing writes */ + if (nr_total_pages < 0) + nr_total_pages = 0; + return nr_total_pages; +} + int ion_page_pool_shrink(struct ion_page_pool *pool, gfp_t gfp_mask, int nr_to_scan) { diff --git a/drivers/staging/android/ion/ion_priv.h b/drivers/staging/android/ion/ion_priv.h index c41f6a46c62e..62ea18f19492 100644 --- a/drivers/staging/android/ion/ion_priv.h +++ b/drivers/staging/android/ion/ion_priv.h @@ -474,6 +474,7 @@ struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order, void ion_page_pool_destroy(struct ion_page_pool *pool); struct page *ion_page_pool_alloc(struct ion_page_pool *pool); void ion_page_pool_free(struct ion_page_pool *pool, struct page *page); +long ion_page_pool_nr_pages(void); /** ion_page_pool_shrink - shrinks the size of the memory cached in the pool * @pool: the pool diff --git a/drivers/staging/ccree/cc_hw_queue_defs.h b/drivers/staging/ccree/cc_hw_queue_defs.h index 2ae0f655e7a0..b86f47712e30 100644 --- a/drivers/staging/ccree/cc_hw_queue_defs.h +++ b/drivers/staging/ccree/cc_hw_queue_defs.h @@ -467,8 +467,7 @@ static inline void set_flow_mode(struct cc_hw_desc *pdesc, * @pdesc: pointer HW descriptor struct * @mode: Any one of the modes defined in [CC7x-DESC] */ -static inline void set_cipher_mode(struct cc_hw_desc *pdesc, - enum drv_cipher_mode mode) +static inline void set_cipher_mode(struct cc_hw_desc *pdesc, int mode) { pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_MODE, mode); } @@ -479,8 +478,7 @@ static inline void set_cipher_mode(struct cc_hw_desc *pdesc, * @pdesc: pointer HW descriptor struct * @mode: Any one of the modes defined in [CC7x-DESC] */ -static inline void set_cipher_config0(struct cc_hw_desc *pdesc, - enum drv_crypto_direction mode) +static inline void set_cipher_config0(struct cc_hw_desc *pdesc, int mode) { pdesc->word[4] |= FIELD_PREP(WORD4_CIPHER_CONF0, mode); } diff --git a/drivers/staging/comedi/drivers/adv_pci1710.c b/drivers/staging/comedi/drivers/adv_pci1710.c index 2c1b6de30da8..385e14269870 100644 --- a/drivers/staging/comedi/drivers/adv_pci1710.c +++ b/drivers/staging/comedi/drivers/adv_pci1710.c @@ -45,8 +45,8 @@ #define PCI171X_RANGE_UNI BIT(4) #define PCI171X_RANGE_GAIN(x) (((x) & 0x7) << 0) #define PCI171X_MUX_REG 0x04 /* W: A/D multiplexor control */ -#define PCI171X_MUX_CHANH(x) (((x) & 0xf) << 8) -#define PCI171X_MUX_CHANL(x) (((x) & 0xf) << 0) +#define PCI171X_MUX_CHANH(x) (((x) & 0xff) << 8) +#define PCI171X_MUX_CHANL(x) (((x) & 0xff) << 0) #define PCI171X_MUX_CHAN(x) (PCI171X_MUX_CHANH(x) | PCI171X_MUX_CHANL(x)) #define PCI171X_STATUS_REG 0x06 /* R: status register */ #define PCI171X_STATUS_IRQ BIT(11) /* 1=IRQ occurred */ diff --git a/drivers/staging/comedi/drivers/gsc_hpdi.c b/drivers/staging/comedi/drivers/gsc_hpdi.c index e5b948405fd9..a09631f9d813 100644 --- a/drivers/staging/comedi/drivers/gsc_hpdi.c +++ b/drivers/staging/comedi/drivers/gsc_hpdi.c @@ -632,6 +632,11 @@ static int gsc_hpdi_auto_attach(struct comedi_device *dev, dma_alloc_coherent(&pcidev->dev, DMA_BUFFER_SIZE, &devpriv->dio_buffer_phys_addr[i], GFP_KERNEL); + if (!devpriv->dio_buffer[i]) { + dev_warn(dev->class_dev, + "failed to allocate DMA buffer\n"); + return -ENOMEM; + } } /* allocate dma descriptors */ devpriv->dma_desc = dma_alloc_coherent(&pcidev->dev, @@ -639,6 +644,11 @@ static int gsc_hpdi_auto_attach(struct comedi_device *dev, NUM_DMA_DESCRIPTORS, &devpriv->dma_desc_phys_addr, GFP_KERNEL); + if (!devpriv->dma_desc) { + dev_warn(dev->class_dev, + "failed to allocate DMA descriptors\n"); + return -ENOMEM; + } if (devpriv->dma_desc_phys_addr & 0xf) { dev_warn(dev->class_dev, " dma descriptors not quad-word aligned (bug)\n"); diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 36361bdf934a..2f82dcb1fd06 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -4991,7 +4991,10 @@ static int ni_valid_rtsi_output_source(struct comedi_device *dev, case NI_RTSI_OUTPUT_G_SRC0: case NI_RTSI_OUTPUT_G_GATE0: case NI_RTSI_OUTPUT_RGOUT0: - case NI_RTSI_OUTPUT_RTSI_BRD_0: + case NI_RTSI_OUTPUT_RTSI_BRD(0): + case NI_RTSI_OUTPUT_RTSI_BRD(1): + case NI_RTSI_OUTPUT_RTSI_BRD(2): + case NI_RTSI_OUTPUT_RTSI_BRD(3): return 1; case NI_RTSI_OUTPUT_RTSI_OSC: return (devpriv->is_m_series) ? 1 : 0; @@ -5012,11 +5015,18 @@ static int ni_set_rtsi_routing(struct comedi_device *dev, devpriv->rtsi_trig_a_output_reg |= NISTC_RTSI_TRIG(chan, src); ni_stc_writew(dev, devpriv->rtsi_trig_a_output_reg, NISTC_RTSI_TRIGA_OUT_REG); - } else if (chan < 8) { + } else if (chan < NISTC_RTSI_TRIG_NUM_CHAN(devpriv->is_m_series)) { devpriv->rtsi_trig_b_output_reg &= ~NISTC_RTSI_TRIG_MASK(chan); devpriv->rtsi_trig_b_output_reg |= NISTC_RTSI_TRIG(chan, src); ni_stc_writew(dev, devpriv->rtsi_trig_b_output_reg, NISTC_RTSI_TRIGB_OUT_REG); + } else if (chan != NISTC_RTSI_TRIG_OLD_CLK_CHAN) { + /* probably should never reach this, since the + * ni_valid_rtsi_output_source above errors out if chan is too + * high + */ + dev_err(dev->class_dev, "%s: unknown rtsi channel\n", __func__); + return -EINVAL; } return 2; } @@ -5032,12 +5042,12 @@ static unsigned int ni_get_rtsi_routing(struct comedi_device *dev, } else if (chan < NISTC_RTSI_TRIG_NUM_CHAN(devpriv->is_m_series)) { return NISTC_RTSI_TRIG_TO_SRC(chan, devpriv->rtsi_trig_b_output_reg); - } else { - if (chan == NISTC_RTSI_TRIG_OLD_CLK_CHAN) - return NI_RTSI_OUTPUT_RTSI_OSC; - dev_err(dev->class_dev, "bug! should never get here?\n"); - return 0; + } else if (chan == NISTC_RTSI_TRIG_OLD_CLK_CHAN) { + return NI_RTSI_OUTPUT_RTSI_OSC; } + + dev_err(dev->class_dev, "%s: unknown rtsi channel\n", __func__); + return -EINVAL; } static int ni_rtsi_insn_config(struct comedi_device *dev, diff --git a/drivers/staging/comedi/drivers/usbduxfast.c b/drivers/staging/comedi/drivers/usbduxfast.c index 608403c7586b..f0572d6a5f63 100644 --- a/drivers/staging/comedi/drivers/usbduxfast.c +++ b/drivers/staging/comedi/drivers/usbduxfast.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004-2014 Bernd Porr, mail@berndporr.me.uk + * Copyright (C) 2004-2019 Bernd Porr, mail@berndporr.me.uk * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,7 +17,7 @@ * Description: University of Stirling USB DAQ & INCITE Technology Limited * Devices: [ITL] USB-DUX-FAST (usbduxfast) * Author: Bernd Porr - * Updated: 10 Oct 2014 + * Updated: 16 Nov 2019 * Status: stable */ @@ -31,6 +31,7 @@ * * * Revision history: + * 1.0: Fixed a rounding error in usbduxfast_ai_cmdtest * 0.9: Dropping the first data packet which seems to be from the last transfer. * Buffer overflows in the FX2 are handed over to comedi. * 0.92: Dropping now 4 packets. The quad buffer has to be emptied. @@ -359,6 +360,7 @@ static int usbduxfast_ai_cmdtest(struct comedi_device *dev, struct comedi_cmd *cmd) { int err = 0; + int err2 = 0; unsigned int steps; unsigned int arg; @@ -408,11 +410,16 @@ static int usbduxfast_ai_cmdtest(struct comedi_device *dev, */ steps = (cmd->convert_arg * 30) / 1000; if (cmd->chanlist_len != 1) - err |= comedi_check_trigger_arg_min(&steps, - MIN_SAMPLING_PERIOD); - err |= comedi_check_trigger_arg_max(&steps, MAX_SAMPLING_PERIOD); - arg = (steps * 1000) / 30; - err |= comedi_check_trigger_arg_is(&cmd->convert_arg, arg); + err2 |= comedi_check_trigger_arg_min(&steps, + MIN_SAMPLING_PERIOD); + else + err2 |= comedi_check_trigger_arg_min(&steps, 1); + err2 |= comedi_check_trigger_arg_max(&steps, MAX_SAMPLING_PERIOD); + if (err2) { + err |= err2; + arg = (steps * 1000) / 30; + err |= comedi_check_trigger_arg_is(&cmd->convert_arg, arg); + } if (cmd->stop_src == TRIG_COUNT) err |= comedi_check_trigger_arg_min(&cmd->stop_arg, 1); diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c index 0cbcbad8f074..b81c6dfa5b24 100644 --- a/drivers/staging/fbtft/fbtft-core.c +++ b/drivers/staging/fbtft/fbtft-core.c @@ -780,7 +780,7 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display, fbdefio->deferred_io = fbtft_deferred_io; fb_deferred_io_init(info); - strncpy(info->fix.id, dev->driver->name, 16); + snprintf(info->fix.id, sizeof(info->fix.id), "%s", dev->driver->name); info->fix.type = FB_TYPE_PACKED_PIXELS; info->fix.visual = FB_VISUAL_TRUECOLOR; info->fix.xpanstep = 0; diff --git a/drivers/staging/greybus/audio_manager.c b/drivers/staging/greybus/audio_manager.c index aa6508b44fab..ed7c32542cb3 100644 --- a/drivers/staging/greybus/audio_manager.c +++ b/drivers/staging/greybus/audio_manager.c @@ -90,8 +90,8 @@ void gb_audio_manager_remove_all(void) list_for_each_entry_safe(module, next, &modules_list, list) { list_del(&module->list); - kobject_put(&module->kobj); ida_simple_remove(&module_id, module->id); + kobject_put(&module->kobj); } is_empty = list_empty(&modules_list); diff --git a/drivers/staging/greybus/light.c b/drivers/staging/greybus/light.c index 0f538b8c3a07..4e7575147775 100644 --- a/drivers/staging/greybus/light.c +++ b/drivers/staging/greybus/light.c @@ -1103,21 +1103,21 @@ static void gb_lights_channel_release(struct gb_channel *channel) static void gb_lights_light_release(struct gb_light *light) { int i; - int count; light->ready = false; - count = light->channels_count; - if (light->has_flash) gb_lights_light_v4l2_unregister(light); + light->has_flash = false; - for (i = 0; i < count; i++) { + for (i = 0; i < light->channels_count; i++) gb_lights_channel_release(&light->channels[i]); - light->channels_count--; - } + light->channels_count = 0; + kfree(light->channels); + light->channels = NULL; kfree(light->name); + light->name = NULL; } static void gb_lights_release(struct gb_lights *glights) diff --git a/drivers/staging/iio/addac/adt7316-i2c.c b/drivers/staging/iio/addac/adt7316-i2c.c index f66dd3ebbab1..856bcfa60c6c 100644 --- a/drivers/staging/iio/addac/adt7316-i2c.c +++ b/drivers/staging/iio/addac/adt7316-i2c.c @@ -35,6 +35,8 @@ static int adt7316_i2c_read(void *client, u8 reg, u8 *data) return ret; } + *data = ret; + return 0; } diff --git a/drivers/staging/most/aim-cdev/cdev.c b/drivers/staging/most/aim-cdev/cdev.c index 1e5cbc893496..d000b6ff8a7d 100644 --- a/drivers/staging/most/aim-cdev/cdev.c +++ b/drivers/staging/most/aim-cdev/cdev.c @@ -455,7 +455,9 @@ static int aim_probe(struct most_interface *iface, int channel_id, c->devno = MKDEV(major, current_minor); cdev_init(&c->cdev, &channel_fops); c->cdev.owner = THIS_MODULE; - cdev_add(&c->cdev, c->devno, 1); + retval = cdev_add(&c->cdev, c->devno, 1); + if (retval < 0) + goto err_free_c; c->iface = iface; c->cfg = cfg; c->channel_id = channel_id; @@ -491,6 +493,7 @@ error_create_device: list_del(&c->list); error_alloc_kfifo: cdev_del(&c->cdev); +err_free_c: kfree(c); error_alloc_channel: ida_simple_remove(&minor_id, current_minor); diff --git a/drivers/staging/most/aim-network/networking.c b/drivers/staging/most/aim-network/networking.c index 936f013c350e..6398c27563c9 100644 --- a/drivers/staging/most/aim-network/networking.c +++ b/drivers/staging/most/aim-network/networking.c @@ -85,6 +85,11 @@ static int skb_to_mamac(const struct sk_buff *skb, struct mbo *mbo) unsigned int payload_len = skb->len - ETH_HLEN; unsigned int mdp_len = payload_len + MDP_HDR_LEN; + if (mdp_len < skb->len) { + pr_err("drop: too large packet! (%u)\n", skb->len); + return -EINVAL; + } + if (mbo->buffer_length < mdp_len) { pr_err("drop: too small buffer! (%d for %d)\n", mbo->buffer_length, mdp_len); @@ -132,6 +137,11 @@ static int skb_to_mep(const struct sk_buff *skb, struct mbo *mbo) u8 *buff = mbo->virt_address; unsigned int mep_len = skb->len + MEP_HDR_LEN; + if (mep_len < skb->len) { + pr_err("drop: too large packet! (%u)\n", skb->len); + return -EINVAL; + } + if (mbo->buffer_length < mep_len) { pr_err("drop: too small buffer! (%d for %d)\n", mbo->buffer_length, mep_len); diff --git a/drivers/staging/rtl8188eu/core/rtw_xmit.c b/drivers/staging/rtl8188eu/core/rtw_xmit.c index 904b988ecc4e..7c895af1ba31 100644 --- a/drivers/staging/rtl8188eu/core/rtw_xmit.c +++ b/drivers/staging/rtl8188eu/core/rtw_xmit.c @@ -805,7 +805,7 @@ s32 rtw_make_wlanhdr(struct adapter *padapter, u8 *hdr, struct pkt_attrib *pattr memcpy(pwlanhdr->addr2, get_bssid(pmlmepriv), ETH_ALEN); memcpy(pwlanhdr->addr3, pattrib->src, ETH_ALEN); - if (psta->qos_option) + if (psta && psta->qos_option) qos_option = true; } else if (check_fwstate(pmlmepriv, WIFI_ADHOC_STATE) || check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE)) { @@ -813,7 +813,7 @@ s32 rtw_make_wlanhdr(struct adapter *padapter, u8 *hdr, struct pkt_attrib *pattr memcpy(pwlanhdr->addr2, pattrib->src, ETH_ALEN); memcpy(pwlanhdr->addr3, get_bssid(pmlmepriv), ETH_ALEN); - if (psta->qos_option) + if (psta && psta->qos_option) qos_option = true; } else { RT_TRACE(_module_rtl871x_xmit_c_, _drv_err_, ("fw_state:%x is not allowed to xmit frame\n", get_fwstate(pmlmepriv))); diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index 446310775e90..184fc05a0f8b 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -2051,7 +2051,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) struct ieee_param *param; uint ret = 0; - if (p->length < sizeof(struct ieee_param) || !p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } @@ -2856,7 +2856,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) goto out; } - if (!p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 2fc7056cbff7..536453358568 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -45,6 +45,7 @@ static const struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2001, 0x3311)}, /* DLink GO-USB-N150 REV B1 */ {USB_DEVICE(0x2001, 0x331B)}, /* D-Link DWA-121 rev B1 */ {USB_DEVICE(0x2357, 0x010c)}, /* TP-Link TL-WN722N v2 */ + {USB_DEVICE(0x2357, 0x0111)}, /* TP-Link TL-WN727N v5.21 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ {} /* Terminating entry */ @@ -78,7 +79,7 @@ static struct dvobj_priv *usb_dvobj_init(struct usb_interface *usb_intf) phost_conf = pusbd->actconfig; pconf_desc = &phost_conf->desc; - phost_iface = &usb_intf->altsetting[0]; + phost_iface = usb_intf->cur_altsetting; piface_desc = &phost_iface->desc; pdvobjpriv->NumInterfaces = pconf_desc->bNumInterfaces; @@ -357,8 +358,10 @@ static struct adapter *rtw_usb_if1_init(struct dvobj_priv *dvobj, } padapter->HalData = kzalloc(sizeof(struct hal_data_8188e), GFP_KERNEL); - if (!padapter->HalData) - DBG_88E("cant not alloc memory for HAL DATA\n"); + if (!padapter->HalData) { + DBG_88E("Failed to allocate memory for HAL data\n"); + goto free_adapter; + } /* step read_chip_version */ rtw_hal_read_chip_version(padapter); diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index aca52654825b..811cada301ac 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -1630,14 +1630,15 @@ static void _rtl92e_hard_data_xmit(struct sk_buff *skb, struct net_device *dev, memcpy((unsigned char *)(skb->cb), &dev, sizeof(dev)); skb_push(skb, priv->rtllib->tx_headroom); ret = _rtl92e_tx(dev, skb); - if (ret != 0) - kfree_skb(skb); if (queue_index != MGNT_QUEUE) { priv->rtllib->stats.tx_bytes += (skb->len - priv->rtllib->tx_headroom); priv->rtllib->stats.tx_packets++; } + + if (ret != 0) + kfree_skb(skb); } static int _rtl92e_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c index db3eb7ec5809..fbbd1b59dc11 100644 --- a/drivers/staging/rtl8192u/r8192U_core.c +++ b/drivers/staging/rtl8192u/r8192U_core.c @@ -1506,7 +1506,7 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff *skb) (tx_fwinfo_819x_usb *)(skb->data + USB_HWDESC_HEADER_LEN); struct usb_device *udev = priv->udev; int pend; - int status; + int status, rt = -1; struct urb *tx_urb = NULL, *tx_urb_zero = NULL; unsigned int idx_pipe; @@ -1650,8 +1650,10 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff *skb) } if (bSend0Byte) { tx_urb_zero = usb_alloc_urb(0, GFP_ATOMIC); - if (!tx_urb_zero) - return -ENOMEM; + if (!tx_urb_zero) { + rt = -ENOMEM; + goto error; + } usb_fill_bulk_urb(tx_urb_zero, udev, usb_sndbulkpipe(udev, idx_pipe), &zero, 0, tx_zero_isr, dev); @@ -1661,7 +1663,7 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff *skb) "Error TX URB for zero byte %d, error %d", atomic_read(&priv->tx_pending[tcb_desc->queue_index]), status); - return -1; + goto error; } } netif_trans_update(dev); @@ -1672,7 +1674,12 @@ short rtl8192_tx(struct net_device *dev, struct sk_buff *skb) RT_TRACE(COMP_ERR, "Error TX URB %d, error %d", atomic_read(&priv->tx_pending[tcb_desc->queue_index]), status); - return -1; + +error: + dev_kfree_skb_any(skb); + usb_free_urb(tx_urb); + usb_free_urb(tx_urb_zero); + return rt; } static short rtl8192_usb_initendpoints(struct net_device *dev) diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c index b3e266bd57ab..8be4fcc54ad6 100644 --- a/drivers/staging/rtl8712/usb_intf.c +++ b/drivers/staging/rtl8712/usb_intf.c @@ -275,7 +275,7 @@ static uint r8712_usb_dvobj_init(struct _adapter *padapter) pdvobjpriv->padapter = padapter; padapter->EepromAddressSize = 6; - phost_iface = &pintf->altsetting[0]; + phost_iface = pintf->cur_altsetting; piface_desc = &phost_iface->desc; pdvobjpriv->nr_endpoint = piface_desc->bNumEndpoints; if (pusbd->speed == USB_SPEED_HIGH) { diff --git a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c index d0b317077511..f92f9073c507 100644 --- a/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c +++ b/drivers/staging/rtl8723bs/hal/rtl8723bs_xmit.c @@ -486,14 +486,13 @@ int rtl8723bs_xmit_thread(void *context) s32 ret; struct adapter *padapter; struct xmit_priv *pxmitpriv; - u8 thread_name[20] = "RTWHALXT"; - + u8 thread_name[20]; ret = _SUCCESS; padapter = context; pxmitpriv = &padapter->xmitpriv; - rtw_sprintf(thread_name, 20, "%s-"ADPT_FMT, thread_name, ADPT_ARG(padapter)); + rtw_sprintf(thread_name, 20, "RTWHALXT-" ADPT_FMT, ADPT_ARG(padapter)); thread_enter(thread_name); DBG_871X("start "FUNC_ADPT_FMT"\n", FUNC_ADPT_ARG(padapter)); diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index 1b61da61690b..d51f6c452972 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -3495,7 +3495,7 @@ static int wpa_supplicant_ioctl(struct net_device *dev, struct iw_point *p) /* down(&ieee->wx_sem); */ - if (p->length < sizeof(struct ieee_param) || !p->pointer) { + if (!p->pointer || p->length != sizeof(struct ieee_param)) { ret = -EINVAL; goto out; } @@ -4340,7 +4340,7 @@ static int rtw_hostapd_ioctl(struct net_device *dev, struct iw_point *p) /* if (p->length < sizeof(struct ieee_param) || !p->pointer) { */ - if (!p->pointer) { + if (!p->pointer || p->length != sizeof(*param)) { ret = -EINVAL; goto out; } diff --git a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c index 943324877707..33e052106ce7 100644 --- a/drivers/staging/rtl8723bs/os_dep/sdio_intf.c +++ b/drivers/staging/rtl8723bs/os_dep/sdio_intf.c @@ -25,18 +25,13 @@ static const struct sdio_device_id sdio_ids[] = { { SDIO_DEVICE(0x024c, 0x0523), }, + { SDIO_DEVICE(0x024c, 0x0525), }, { SDIO_DEVICE(0x024c, 0x0623), }, { SDIO_DEVICE(0x024c, 0x0626), }, { SDIO_DEVICE(0x024c, 0xb723), }, { /* end: all zeroes */ }, }; -static const struct acpi_device_id acpi_ids[] = { - {"OBDA8723", 0x0000}, - {} -}; - MODULE_DEVICE_TABLE(sdio, sdio_ids); -MODULE_DEVICE_TABLE(acpi, acpi_ids); static int rtw_drv_init(struct sdio_func *func, const struct sdio_device_id *id); static void rtw_dev_remove(struct sdio_func *func); diff --git a/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c b/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c index 544f638ed3ef..65edd14a1147 100644 --- a/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c +++ b/drivers/staging/rtlwifi/halmac/halmac_88xx/halmac_func_88xx.c @@ -2492,8 +2492,11 @@ halmac_parse_psd_data_88xx(struct halmac_adapter *halmac_adapter, u8 *c2h_buf, segment_size = (u8)PSD_DATA_GET_SEGMENT_SIZE(c2h_buf); psd_set->data_size = total_size; - if (!psd_set->data) + if (!psd_set->data) { psd_set->data = kzalloc(psd_set->data_size, GFP_KERNEL); + if (!psd_set->data) + return HALMAC_RET_MALLOC_FAIL; + } if (segment_id == 0) psd_set->segment_size = segment_size; diff --git a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c index 377da037f31c..b521752d9aa0 100644 --- a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c +++ b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c @@ -1849,6 +1849,12 @@ static int __init bm2835_mmal_init(void) num_cameras = get_num_cameras(instance, resolutions, MAX_BCM2835_CAMERAS); + + if (num_cameras < 1) { + ret = -ENODEV; + goto cleanup_mmal; + } + if (num_cameras > MAX_BCM2835_CAMERAS) num_cameras = MAX_BCM2835_CAMERAS; @@ -1948,6 +1954,9 @@ cleanup_gdev: pr_info("%s: error %d while loading driver\n", BM2835_MMAL_MODULE_NAME, ret); +cleanup_mmal: + vchiq_mmal_finalise(instance); + return ret; } diff --git a/drivers/staging/vt6656/device.h b/drivers/staging/vt6656/device.h index 74715c854856..41a4f9555d07 100644 --- a/drivers/staging/vt6656/device.h +++ b/drivers/staging/vt6656/device.h @@ -62,6 +62,8 @@ #define RATE_AUTO 12 #define MAX_RATE 12 +#define VNT_B_RATES (BIT(RATE_1M) | BIT(RATE_2M) |\ + BIT(RATE_5M) | BIT(RATE_11M)) /* * device specific @@ -269,6 +271,7 @@ struct vnt_private { u8 mac_hw; /* netdev */ struct usb_device *usb; + struct usb_interface *intf; u64 tsf_time; u8 rx_rate; diff --git a/drivers/staging/vt6656/dpc.c b/drivers/staging/vt6656/dpc.c index 655f0002f880..7b73fa2f8834 100644 --- a/drivers/staging/vt6656/dpc.c +++ b/drivers/staging/vt6656/dpc.c @@ -140,7 +140,7 @@ int vnt_rx_data(struct vnt_private *priv, struct vnt_rcb *ptr_rcb, vnt_rf_rssi_to_dbm(priv, *rssi, &rx_dbm); - priv->bb_pre_ed_rssi = (u8)rx_dbm + 1; + priv->bb_pre_ed_rssi = (u8)-rx_dbm + 1; priv->current_rssi = priv->bb_pre_ed_rssi; frame = skb_data + 8; diff --git a/drivers/staging/vt6656/int.c b/drivers/staging/vt6656/int.c index c6ffbe0e2728..c521729c4192 100644 --- a/drivers/staging/vt6656/int.c +++ b/drivers/staging/vt6656/int.c @@ -107,9 +107,11 @@ static int vnt_int_report_rate(struct vnt_private *priv, u8 pkt_no, u8 tsr) info->status.rates[0].count = tx_retry; - if (!(tsr & (TSR_TMO | TSR_RETRYTMO))) { + if (!(tsr & TSR_TMO)) { info->status.rates[0].idx = idx; - info->flags |= IEEE80211_TX_STAT_ACK; + + if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) + info->flags |= IEEE80211_TX_STAT_ACK; } ieee80211_tx_status_irqsafe(priv->hw, context->skb); diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c index cc6d8778fe5b..e8ccd800c94f 100644 --- a/drivers/staging/vt6656/main_usb.c +++ b/drivers/staging/vt6656/main_usb.c @@ -954,6 +954,7 @@ vt6656_probe(struct usb_interface *intf, const struct usb_device_id *id) priv = hw->priv; priv->hw = hw; priv->usb = udev; + priv->intf = intf; vnt_set_options(priv); @@ -976,6 +977,7 @@ vt6656_probe(struct usb_interface *intf, const struct usb_device_id *id) ieee80211_hw_set(priv->hw, RX_INCLUDES_FCS); ieee80211_hw_set(priv->hw, REPORTS_TX_ACK_STATUS); ieee80211_hw_set(priv->hw, SUPPORTS_PS); + ieee80211_hw_set(priv->hw, PS_NULLFUNC_STACK); priv->hw->max_signal = 100; diff --git a/drivers/staging/vt6656/rxtx.c b/drivers/staging/vt6656/rxtx.c index a44abcce6fb4..f78f31ce6443 100644 --- a/drivers/staging/vt6656/rxtx.c +++ b/drivers/staging/vt6656/rxtx.c @@ -288,11 +288,9 @@ static u16 vnt_rxtx_datahead_g(struct vnt_usb_send_context *tx_context, PK_TYPE_11B, &buf->b); /* Get Duration and TimeStamp */ - if (ieee80211_is_pspoll(hdr->frame_control)) { - __le16 dur = cpu_to_le16(priv->current_aid | BIT(14) | BIT(15)); - - buf->duration_a = dur; - buf->duration_b = dur; + if (ieee80211_is_nullfunc(hdr->frame_control)) { + buf->duration_a = hdr->duration_id; + buf->duration_b = hdr->duration_id; } else { buf->duration_a = vnt_get_duration_le(priv, tx_context->pkt_type, need_ack); @@ -381,10 +379,8 @@ static u16 vnt_rxtx_datahead_ab(struct vnt_usb_send_context *tx_context, tx_context->pkt_type, &buf->ab); /* Get Duration and TimeStampOff */ - if (ieee80211_is_pspoll(hdr->frame_control)) { - __le16 dur = cpu_to_le16(priv->current_aid | BIT(14) | BIT(15)); - - buf->duration = dur; + if (ieee80211_is_nullfunc(hdr->frame_control)) { + buf->duration = hdr->duration_id; } else { buf->duration = vnt_get_duration_le(priv, tx_context->pkt_type, need_ack); @@ -825,10 +821,14 @@ int vnt_tx_packet(struct vnt_private *priv, struct sk_buff *skb) if (info->band == NL80211_BAND_5GHZ) { pkt_type = PK_TYPE_11A; } else { - if (tx_rate->flags & IEEE80211_TX_RC_USE_CTS_PROTECT) - pkt_type = PK_TYPE_11GB; - else - pkt_type = PK_TYPE_11GA; + if (tx_rate->flags & IEEE80211_TX_RC_USE_CTS_PROTECT) { + if (priv->basic_rates & VNT_B_RATES) + pkt_type = PK_TYPE_11GB; + else + pkt_type = PK_TYPE_11GA; + } else { + pkt_type = PK_TYPE_11A; + } } } else { pkt_type = PK_TYPE_11B; diff --git a/drivers/staging/vt6656/wcmd.c b/drivers/staging/vt6656/wcmd.c index b2fc17f1381b..3f6ccdeb6dec 100644 --- a/drivers/staging/vt6656/wcmd.c +++ b/drivers/staging/vt6656/wcmd.c @@ -109,6 +109,7 @@ void vnt_run_command(struct work_struct *work) if (vnt_init(priv)) { /* If fail all ends TODO retry */ dev_err(&priv->usb->dev, "failed to start\n"); + usb_set_intfdata(priv->intf, NULL); ieee80211_free_hw(priv->hw); return; } diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c index 42912257e2b9..07b807ceae6d 100644 --- a/drivers/staging/wlan-ng/cfg80211.c +++ b/drivers/staging/wlan-ng/cfg80211.c @@ -490,10 +490,8 @@ static int prism2_connect(struct wiphy *wiphy, struct net_device *dev, /* Set the encryption - we only support wep */ if (is_wep) { if (sme->key) { - if (sme->key_idx >= NUM_WEPKEYS) { - err = -EINVAL; - goto exit; - } + if (sme->key_idx >= NUM_WEPKEYS) + return -EINVAL; result = prism2_domibset_uint32(wlandev, DIDmib_dot11smt_dot11PrivacyTable_dot11WEPDefaultKeyID, diff --git a/drivers/staging/wlan-ng/prism2mgmt.c b/drivers/staging/wlan-ng/prism2mgmt.c index c4aa9e7e7003..be89a0ee44bf 100644 --- a/drivers/staging/wlan-ng/prism2mgmt.c +++ b/drivers/staging/wlan-ng/prism2mgmt.c @@ -945,7 +945,7 @@ int prism2mgmt_flashdl_state(struct wlandevice *wlandev, void *msgp) } } - return 0; + return result; } /*---------------------------------------------------------------- diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 0c00bb27c9c5..c764b292f6ba 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -1767,7 +1767,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb) while (credits) { struct sk_buff *p = cxgbit_sock_peek_wr(csk); - const u32 csum = (__force u32)p->csum; + u32 csum; if (unlikely(!p)) { pr_err("csk 0x%p,%u, cr %u,%u+%u, empty.\n", @@ -1776,6 +1776,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb) break; } + csum = (__force u32)p->csum; if (unlikely(credits < csum)) { pr_warn("csk 0x%p,%u, cr %u,%u+%u, < %u.\n", csk, csk->tid, diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index e2fa3a3bc81d..b6bf605fa5c1 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -78,7 +78,7 @@ static int chap_check_algorithm(const char *a_str) if (!token) goto out; - if (!strncmp(token, "5", 1)) { + if (!strcmp(token, "5")) { pr_debug("Selected MD5 Algorithm\n"); kfree(orig); return CHAP_DIGEST_MD5; diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 84742125f773..cebef8e5a43d 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -85,7 +85,7 @@ transport_lookup_cmd_lun(struct se_cmd *se_cmd, u64 unpacked_lun) goto out_unlock; } - se_cmd->se_lun = rcu_dereference(deve->se_lun); + se_cmd->se_lun = se_lun; se_cmd->pr_res_key = deve->pr_res_key; se_cmd->orig_fe_lun = unpacked_lun; se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; @@ -176,7 +176,7 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd, u64 unpacked_lun) goto out_unlock; } - se_cmd->se_lun = rcu_dereference(deve->se_lun); + se_cmd->se_lun = se_lun; se_cmd->pr_res_key = deve->pr_res_key; se_cmd->orig_fe_lun = unpacked_lun; se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD; @@ -1151,27 +1151,6 @@ passthrough_parse_cdb(struct se_cmd *cmd, struct se_device *dev = cmd->se_dev; unsigned int size; - /* - * Clear a lun set in the cdb if the initiator talking to use spoke - * and old standards version, as we can't assume the underlying device - * won't choke up on it. - */ - switch (cdb[0]) { - case READ_10: /* SBC - RDProtect */ - case READ_12: /* SBC - RDProtect */ - case READ_16: /* SBC - RDProtect */ - case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */ - case VERIFY: /* SBC - VRProtect */ - case VERIFY_16: /* SBC - VRProtect */ - case WRITE_VERIFY: /* SBC - VRProtect */ - case WRITE_VERIFY_12: /* SBC - VRProtect */ - case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */ - break; - default: - cdb[1] &= 0x1f; /* clear logical unit number */ - break; - } - /* * For REPORT LUNS we always need to emulate the response, for everything * else, pass it up. diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 508da345b73f..95aa47ac4dcd 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -131,7 +131,7 @@ static int srp_get_pr_transport_id( memset(buf + 8, 0, leading_zero_bytes); rc = hex2bin(buf + 8 + leading_zero_bytes, p, count); if (rc < 0) { - pr_debug("hex2bin failed for %s: %d\n", __func__, rc); + pr_debug("hex2bin failed for %s: %d\n", p, rc); return rc; } diff --git a/drivers/tee/optee/Kconfig b/drivers/tee/optee/Kconfig index 0126de898036..108600c6eb56 100644 --- a/drivers/tee/optee/Kconfig +++ b/drivers/tee/optee/Kconfig @@ -2,6 +2,7 @@ config OPTEE tristate "OP-TEE" depends on HAVE_ARM_SMCCC + depends on MMU help This implements the OP-TEE Trusted Execution Environment (TEE) driver. diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index edb6e4e9ef3a..834884c370c5 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -590,8 +590,10 @@ static int __init optee_driver_init(void) return -ENODEV; np = of_find_matching_node(fw_np, optee_match); - if (!np) + if (!np || !of_device_is_available(np)) { + of_node_put(np); return -ENODEV; + } optee = optee_probe(np); of_node_put(np); diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 908a8014cf76..aed995ec2c90 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -514,7 +514,7 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, load = 0; total_load += load; - if (trace_thermal_power_cpu_limit_enabled() && load_cpu) + if (load_cpu) load_cpu[i] = load; i++; diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c index 1e61c09153c9..76b92083744c 100644 --- a/drivers/thermal/mtk_thermal.c +++ b/drivers/thermal/mtk_thermal.c @@ -407,7 +407,8 @@ static int mtk_thermal_bank_temperature(struct mtk_thermal_bank *bank) u32 raw; for (i = 0; i < conf->bank_data[bank->id].num_sensors; i++) { - raw = readl(mt->thermal_base + conf->msr[i]); + raw = readl(mt->thermal_base + + conf->msr[conf->bank_data[bank->id].sensors[i]]); temp = raw_to_mcelsius(mt, conf->bank_data[bank->id].sensors[i], @@ -544,7 +545,8 @@ static void mtk_thermal_init_bank(struct mtk_thermal *mt, int num, for (i = 0; i < conf->bank_data[num].num_sensors; i++) writel(conf->sensor_mux_values[conf->bank_data[num].sensors[i]], - mt->thermal_base + conf->adcpnp[i]); + mt->thermal_base + + conf->adcpnp[conf->bank_data[num].sensors[i]]); writel((1 << conf->bank_data[num].num_sensors) - 1, mt->thermal_base + TEMP_MONCTL0); diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 73e5fee6cf1d..83126e2dce36 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -401,8 +401,8 @@ static irqreturn_t rcar_thermal_irq(int irq, void *data) rcar_thermal_for_each_priv(priv, common) { if (rcar_thermal_had_changed(priv, status)) { rcar_thermal_irq_disable(priv); - schedule_delayed_work(&priv->work, - msecs_to_jiffies(300)); + queue_delayed_work(system_freezable_wq, &priv->work, + msecs_to_jiffies(300)); } } diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 6b2f3b00a117..e1cbf22e1fba 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -301,7 +301,7 @@ static void thermal_zone_device_set_polling(struct thermal_zone_device *tz, &tz->poll_queue, msecs_to_jiffies(delay)); else - cancel_delayed_work_sync(&tz->poll_queue); + cancel_delayed_work(&tz->poll_queue); } static void monitor_thermal_zone(struct thermal_zone_device *tz) diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c index 16c607075ede..af44e6e6b3bf 100644 --- a/drivers/thunderbolt/nhi.c +++ b/drivers/thunderbolt/nhi.c @@ -134,9 +134,20 @@ static void __iomem *ring_options_base(struct tb_ring *ring) return io; } -static void ring_iowrite16desc(struct tb_ring *ring, u32 value, u32 offset) +static void ring_iowrite_cons(struct tb_ring *ring, u16 cons) { - iowrite16(value, ring_desc_base(ring) + offset); + /* + * The other 16-bits in the register is read-only and writes to it + * are ignored by the hardware so we can save one ioread32() by + * filling the read-only bits with zeroes. + */ + iowrite32(cons, ring_desc_base(ring) + 8); +} + +static void ring_iowrite_prod(struct tb_ring *ring, u16 prod) +{ + /* See ring_iowrite_cons() above for explanation */ + iowrite32(prod << 16, ring_desc_base(ring) + 8); } static void ring_iowrite32desc(struct tb_ring *ring, u32 value, u32 offset) @@ -188,7 +199,10 @@ static void ring_write_descriptors(struct tb_ring *ring) descriptor->sof = frame->sof; } ring->head = (ring->head + 1) % ring->size; - ring_iowrite16desc(ring, ring->head, ring->is_tx ? 10 : 8); + if (ring->is_tx) + ring_iowrite_prod(ring, ring->head); + else + ring_iowrite_cons(ring, ring->head); } } @@ -461,7 +475,7 @@ void ring_stop(struct tb_ring *ring) ring_iowrite32options(ring, 0, 0); ring_iowrite64desc(ring, 0, 0); - ring_iowrite16desc(ring, 0, ring->is_tx ? 10 : 8); + ring_iowrite32desc(ring, 0, 8); ring_iowrite32desc(ring, 0, 12); ring->head = 0; ring->tail = 0; diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index fe2384b019ec..9cfc65ca173d 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -240,6 +240,12 @@ static int tb_switch_nvm_read(void *priv, unsigned int offset, void *val, return dma_port_flash_read(sw->dma_port, offset, val, bytes); } +static int tb_switch_nvm_no_read(void *priv, unsigned int offset, void *val, + size_t bytes) +{ + return -EPERM; +} + static int tb_switch_nvm_write(void *priv, unsigned int offset, void *val, size_t bytes) { @@ -285,6 +291,7 @@ static struct nvmem_device *register_nvmem(struct tb_switch *sw, int id, config.read_only = true; } else { config.name = "nvm_non_active"; + config.reg_read = tb_switch_nvm_no_read; config.reg_write = tb_switch_nvm_write; config.root_only = true; } diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c index a1d272ac82bb..c33150fcd964 100644 --- a/drivers/tty/hvc/hvc_vio.c +++ b/drivers/tty/hvc/hvc_vio.c @@ -120,6 +120,14 @@ static int hvterm_raw_get_chars(uint32_t vtermno, char *buf, int count) return got; } +/** + * hvterm_raw_put_chars: send characters to firmware for given vterm adapter + * @vtermno: The virtual terminal number. + * @buf: The characters to send. Because of the underlying hypercall in + * hvc_put_chars(), this buffer must be at least 16 bytes long, even if + * you are sending fewer chars. + * @count: number of chars to send. + */ static int hvterm_raw_put_chars(uint32_t vtermno, const char *buf, int count) { struct hvterm_priv *pv = hvterm_privs[vtermno]; @@ -232,6 +240,7 @@ static const struct hv_ops hvterm_hvsi_ops = { static void udbg_hvc_putc(char c) { int count = -1; + unsigned char bounce_buffer[16]; if (!hvterm_privs[0]) return; @@ -242,7 +251,12 @@ static void udbg_hvc_putc(char c) do { switch(hvterm_privs[0]->proto) { case HV_PROTOCOL_RAW: - count = hvterm_raw_put_chars(0, &c, 1); + /* + * hvterm_raw_put_chars requires at least a 16-byte + * buffer, so go via the bounce buffer + */ + bounce_buffer[0] = c; + count = hvterm_raw_put_chars(0, bounce_buffer, 1); break; case HV_PROTOCOL_HVSI: count = hvterm_hvsi_put_chars(0, &c, 1); diff --git a/drivers/tty/ipwireless/hardware.c b/drivers/tty/ipwireless/hardware.c index a6b8240af6cd..960e9375a1a9 100644 --- a/drivers/tty/ipwireless/hardware.c +++ b/drivers/tty/ipwireless/hardware.c @@ -1516,6 +1516,8 @@ static void ipw_send_setup_packet(struct ipw_hardware *hw) sizeof(struct ipw_setup_get_version_query_packet), ADDR_SETUP_PROT, TL_PROTOCOLID_SETUP, TL_SETUP_SIGNO_GET_VERSION_QRY); + if (!ver_packet) + return; ver_packet->header.length = sizeof(struct tl_setup_get_version_qry); /* diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 08bd6b965847..19c4aa800c81 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -614,7 +614,7 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file, } /* no data */ - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { ret = -EAGAIN; break; } @@ -681,7 +681,7 @@ static ssize_t n_hdlc_tty_write(struct tty_struct *tty, struct file *file, if (tbuf) break; - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { error = -EAGAIN; break; } @@ -969,6 +969,11 @@ static int __init n_hdlc_init(void) } /* end of init_module() */ +#ifdef CONFIG_SPARC +#undef __exitdata +#define __exitdata +#endif + static const char hdlc_unregister_ok[] __exitdata = KERN_INFO "N_HDLC: line discipline unregistered\n"; static const char hdlc_unregister_fail[] __exitdata = diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c index 305b6490d405..08ac04d08991 100644 --- a/drivers/tty/n_r3964.c +++ b/drivers/tty/n_r3964.c @@ -1080,7 +1080,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file, pMsg = remove_msg(pInfo, pClient); if (pMsg == NULL) { /* no messages available. */ - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { ret = -EAGAIN; goto unlock; } diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 904fc9c37fde..8214b0326b3a 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1704,7 +1704,7 @@ n_tty_receive_buf_common(struct tty_struct *tty, const unsigned char *cp, down_read(&tty->termios_rwsem); - while (1) { + do { /* * When PARMRK is set, each input char may take up to 3 chars * in the read buf; reduce the buffer space avail by 3x @@ -1746,7 +1746,7 @@ n_tty_receive_buf_common(struct tty_struct *tty, const unsigned char *cp, fp += n; count -= n; rcvd += n; - } + } while (!test_bit(TTY_LDISC_CHANGING, &tty->flags)); tty->receive_room = room; @@ -2213,7 +2213,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, break; if (!timeout) break; - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { retval = -EAGAIN; break; } @@ -2367,7 +2367,7 @@ static ssize_t n_tty_write(struct tty_struct *tty, struct file *file, } if (!nr) break; - if (file->f_flags & O_NONBLOCK) { + if (tty_io_nonblock(tty, file)) { retval = -EAGAIN; break; } diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 9e26c530d2dd..b3208b1b1028 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -28,6 +28,7 @@ #include #include #include +#include #undef TTY_DEBUG_HANGUP #ifdef TTY_DEBUG_HANGUP @@ -488,6 +489,7 @@ static int pty_bsd_ioctl(struct tty_struct *tty, return -ENOIOCTLCMD; } +#ifdef CONFIG_COMPAT static long pty_bsd_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { @@ -495,8 +497,11 @@ static long pty_bsd_compat_ioctl(struct tty_struct *tty, * PTY ioctls don't require any special translation between 32-bit and * 64-bit userspace, they are already compatible. */ - return pty_bsd_ioctl(tty, cmd, arg); + return pty_bsd_ioctl(tty, cmd, (unsigned long)compat_ptr(arg)); } +#else +#define pty_bsd_compat_ioctl NULL +#endif static int legacy_count = CONFIG_LEGACY_PTY_COUNT; /* @@ -676,6 +681,7 @@ static int pty_unix98_ioctl(struct tty_struct *tty, return -ENOIOCTLCMD; } +#ifdef CONFIG_COMPAT static long pty_unix98_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { @@ -683,8 +689,12 @@ static long pty_unix98_compat_ioctl(struct tty_struct *tty, * PTY ioctls don't require any special translation between 32-bit and * 64-bit userspace, they are already compatible. */ - return pty_unix98_ioctl(tty, cmd, arg); + return pty_unix98_ioctl(tty, cmd, + cmd == TIOCSIG ? arg : (unsigned long)compat_ptr(arg)); } +#else +#define pty_unix98_compat_ioctl NULL +#endif /** * ptm_unix98_lookup - find a pty master diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c index ae2564ecddcd..e6c9ff65402a 100644 --- a/drivers/tty/serdev/core.c +++ b/drivers/tty/serdev/core.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -49,14 +50,32 @@ static const struct device_type serdev_ctrl_type = { static int serdev_device_match(struct device *dev, struct device_driver *drv) { - /* TODO: ACPI and platform matching */ - return of_driver_match_device(dev, drv); + /* TODO: ACPI matching */ + + if (of_driver_match_device(dev, drv)) + return 1; + + if (dev->parent->parent->bus == &platform_bus_type && + dev->parent->parent->bus->match(dev->parent->parent, drv)) + return 1; + + return 0; } static int serdev_uevent(struct device *dev, struct kobj_uevent_env *env) { - /* TODO: ACPI and platform modalias */ - return of_device_uevent_modalias(dev, env); + int rc; + + /* TODO: ACPI modalias */ + + rc = of_device_uevent_modalias(dev, env); + if (rc != -ENODEV) + return rc; + + if (dev->parent->parent->bus == &platform_bus_type) + rc = dev->parent->parent->bus->uevent(dev->parent->parent, env); + + return rc; } /** @@ -153,6 +172,7 @@ int serdev_device_write(struct serdev_device *serdev, unsigned long timeout) { struct serdev_controller *ctrl = serdev->ctrl; + int written = 0; int ret; if (!ctrl || !ctrl->ops->write_buf || @@ -167,14 +187,21 @@ int serdev_device_write(struct serdev_device *serdev, if (ret < 0) break; + written += ret; buf += ret; count -= ret; - } while (count && (timeout = wait_for_completion_timeout(&serdev->write_comp, timeout))); mutex_unlock(&serdev->write_lock); - return ret < 0 ? ret : (count ? -ETIMEDOUT : 0); + + if (ret < 0) + return ret; + + if (timeout == 0 && written == 0) + return -ETIMEDOUT; + + return written; } EXPORT_SYMBOL_GPL(serdev_device_write); @@ -398,16 +425,45 @@ static int of_serdev_register_devices(struct serdev_controller *ctrl) return 0; } +static int platform_serdev_register_devices(struct serdev_controller *ctrl) +{ + struct serdev_device *serdev; + int err; + + if (ctrl->dev.parent->bus != &platform_bus_type) + return -ENODEV; + + serdev = serdev_device_alloc(ctrl); + if (!serdev) { + dev_err(&ctrl->dev, "failed to allocate serdev device for %s\n", + dev_name(ctrl->dev.parent)); + return -ENOMEM; + } + + pm_runtime_no_callbacks(&serdev->dev); + + err = serdev_device_add(serdev); + if (err) { + dev_err(&serdev->dev, + "failure adding device. status %d\n", err); + serdev_device_put(serdev); + } + + return err; +} + + /** - * serdev_controller_add() - Add an serdev controller + * serdev_controller_add_platform() - Add an serdev controller * @ctrl: controller to be registered. + * @platform: whether to permit fallthrough to platform device probe * * Register a controller previously allocated via serdev_controller_alloc() with - * the serdev core. + * the serdev core. Optionally permit probing via a platform device fallback. */ -int serdev_controller_add(struct serdev_controller *ctrl) +int serdev_controller_add_platform(struct serdev_controller *ctrl, bool platform) { - int ret; + int ret, ret_of, ret_platform = -ENODEV; /* Can't register until after driver model init */ if (WARN_ON(!is_registered)) @@ -417,9 +473,16 @@ int serdev_controller_add(struct serdev_controller *ctrl) if (ret) return ret; - ret = of_serdev_register_devices(ctrl); - if (ret) + ret_of = of_serdev_register_devices(ctrl); + if (platform) + ret_platform = platform_serdev_register_devices(ctrl); + if (ret_of && ret_platform) { + dev_dbg(&ctrl->dev, "no devices registered: of:%d " + "platform:%d\n", + ret_of, ret_platform); + ret = -ENODEV; goto out_dev_del; + } dev_dbg(&ctrl->dev, "serdev%d registered: dev:%p\n", ctrl->nr, &ctrl->dev); @@ -429,7 +492,7 @@ out_dev_del: device_del(&ctrl->dev); return ret; }; -EXPORT_SYMBOL_GPL(serdev_controller_add); +EXPORT_SYMBOL_GPL(serdev_controller_add_platform); /* Remove a device associated with a controller */ static int serdev_remove_device(struct device *dev, void *data) diff --git a/drivers/tty/serdev/serdev-ttyport.c b/drivers/tty/serdev/serdev-ttyport.c index 69fc6d9ab490..0669e18ff879 100644 --- a/drivers/tty/serdev/serdev-ttyport.c +++ b/drivers/tty/serdev/serdev-ttyport.c @@ -15,9 +15,15 @@ #include #include #include +#include +#include #define SERPORT_ACTIVE 1 +static char *pdev_tty_port; +module_param(pdev_tty_port, charp, 0644); +MODULE_PARM_DESC(pdev_tty_port, "platform device tty port to claim"); + struct serport { struct tty_port *port; struct tty_struct *tty; @@ -238,9 +244,9 @@ struct device *serdev_tty_port_register(struct tty_port *port, struct device *parent, struct tty_driver *drv, int idx) { - const struct tty_port_client_operations *old_ops; struct serdev_controller *ctrl; struct serport *serport; + bool platform = false; int ret; if (!port || !drv || !parent) @@ -257,11 +263,27 @@ struct device *serdev_tty_port_register(struct tty_port *port, ctrl->ops = &ctrl_ops; - old_ops = port->client_ops; port->client_ops = &client_ops; port->client_data = ctrl; - ret = serdev_controller_add(ctrl); + /* There is not always a way to bind specific platform devices because + * they may be defined on platforms without DT or ACPI. When dealing + * with a platform devices, do not allow direct binding unless it is + * whitelisted by module parameter. If a platform device is otherwise + * described by DT or ACPI it will still be bound and this check will + * be ignored. + */ + if (parent->bus == &platform_bus_type) { + char tty_port_name[7]; + + sprintf(tty_port_name, "%s%d", drv->name, idx); + if (pdev_tty_port && + !strcmp(pdev_tty_port, tty_port_name)) { + platform = true; + } + } + + ret = serdev_controller_add_platform(ctrl, platform); if (ret) goto err_reset_data; @@ -270,7 +292,7 @@ struct device *serdev_tty_port_register(struct tty_port *port, err_reset_data: port->client_data = NULL; - port->client_ops = old_ops; + port->client_ops = &tty_port_default_client_ops; serdev_controller_put(ctrl); return ERR_PTR(ret); @@ -285,8 +307,8 @@ int serdev_tty_port_unregister(struct tty_port *port) return -ENODEV; serdev_controller_remove(ctrl); - port->client_ops = NULL; port->client_data = NULL; + port->client_ops = &tty_port_default_client_ops; serdev_controller_put(ctrl); return 0; diff --git a/drivers/tty/serial/8250/8250_aspeed_vuart.c b/drivers/tty/serial/8250/8250_aspeed_vuart.c index 33a801353114..0a89df390f24 100644 --- a/drivers/tty/serial/8250/8250_aspeed_vuart.c +++ b/drivers/tty/serial/8250/8250_aspeed_vuart.c @@ -256,7 +256,6 @@ static int aspeed_vuart_probe(struct platform_device *pdev) port.port.line = rc; port.port.irq = irq_of_parse_and_map(np, 0); - port.port.irqflags = IRQF_SHARED; port.port.iotype = UPIO_MEM; port.port.type = PORT_16550A; port.port.uartclk = clk; diff --git a/drivers/tty/serial/8250/8250_bcm2835aux.c b/drivers/tty/serial/8250/8250_bcm2835aux.c index a23c7da42ea8..7bbcae75e651 100644 --- a/drivers/tty/serial/8250/8250_bcm2835aux.c +++ b/drivers/tty/serial/8250/8250_bcm2835aux.c @@ -119,7 +119,7 @@ static int bcm2835aux_serial_remove(struct platform_device *pdev) { struct bcm2835aux_data *data = platform_get_drvdata(pdev); - serial8250_unregister_port(data->uart.port.line); + serial8250_unregister_port(data->line); clk_disable_unprepare(data->clk); return 0; diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index d29b512a7d9f..5017a0f46b82 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -181,7 +181,7 @@ static int serial_link_irq_chain(struct uart_8250_port *up) struct hlist_head *h; struct hlist_node *n; struct irq_info *i; - int ret, irq_flags = up->port.flags & UPF_SHARE_IRQ ? IRQF_SHARED : 0; + int ret; mutex_lock(&hash_mutex); @@ -216,9 +216,8 @@ static int serial_link_irq_chain(struct uart_8250_port *up) INIT_LIST_HEAD(&up->list); i->head = &up->list; spin_unlock_irq(&i->lock); - irq_flags |= up->port.irqflags; ret = request_irq(up->port.irq, serial8250_interrupt, - irq_flags, up->port.name, i); + up->port.irqflags, up->port.name, i); if (ret < 0) serial_do_unlink(i, up); } @@ -953,6 +952,21 @@ static struct uart_8250_port *serial8250_find_match_or_unused(struct uart_port * return NULL; } +static void serial_8250_overrun_backoff_work(struct work_struct *work) +{ + struct uart_8250_port *up = + container_of(to_delayed_work(work), struct uart_8250_port, + overrun_backoff); + struct uart_port *port = &up->port; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + up->ier |= UART_IER_RLSI | UART_IER_RDI; + up->port.read_status_mask |= UART_LSR_DR; + serial_out(up, UART_IER, up->ier); + spin_unlock_irqrestore(&port->lock, flags); +} + /** * serial8250_register_8250_port - register a serial port * @up: serial port template @@ -1062,7 +1076,18 @@ int serial8250_register_8250_port(struct uart_8250_port *up) ret = 0; } + + /* Initialise interrupt backoff work if required */ + if (up->overrun_backoff_time_ms > 0) { + uart->overrun_backoff_time_ms = + up->overrun_backoff_time_ms; + INIT_DELAYED_WORK(&uart->overrun_backoff, + serial_8250_overrun_backoff_work); + } else { + uart->overrun_backoff_time_ms = 0; + } } + mutex_unlock(&serial_mutex); return ret; diff --git a/drivers/tty/serial/8250/8250_exar.c b/drivers/tty/serial/8250/8250_exar.c index 411b4b03457b..899f36b59af7 100644 --- a/drivers/tty/serial/8250/8250_exar.c +++ b/drivers/tty/serial/8250/8250_exar.c @@ -27,6 +27,14 @@ #include "8250.h" +#define PCI_DEVICE_ID_ACCES_COM_2S 0x1052 +#define PCI_DEVICE_ID_ACCES_COM_4S 0x105d +#define PCI_DEVICE_ID_ACCES_COM_8S 0x106c +#define PCI_DEVICE_ID_ACCES_COM232_8 0x10a8 +#define PCI_DEVICE_ID_ACCES_COM_2SM 0x10d2 +#define PCI_DEVICE_ID_ACCES_COM_4SM 0x10db +#define PCI_DEVICE_ID_ACCES_COM_8SM 0x10ea + #define PCI_DEVICE_ID_COMMTECH_4224PCI335 0x0002 #define PCI_DEVICE_ID_COMMTECH_4222PCI335 0x0004 #define PCI_DEVICE_ID_COMMTECH_2324PCI335 0x000a @@ -562,6 +570,22 @@ static int __maybe_unused exar_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(exar_pci_pm, exar_suspend, exar_resume); +static const struct exar8250_board acces_com_2x = { + .num_ports = 2, + .setup = pci_xr17c154_setup, +}; + +static const struct exar8250_board acces_com_4x = { + .num_ports = 4, + .setup = pci_xr17c154_setup, +}; + +static const struct exar8250_board acces_com_8x = { + .num_ports = 8, + .setup = pci_xr17c154_setup, +}; + + static const struct exar8250_board pbn_fastcom335_2 = { .num_ports = 2, .setup = pci_fastcom335_setup, @@ -632,6 +656,15 @@ static const struct exar8250_board pbn_exar_XR17V8358 = { } static const struct pci_device_id exar_pci_tbl[] = { + EXAR_DEVICE(ACCESSIO, ACCES_COM_2S, acces_com_2x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_4S, acces_com_4x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_8S, acces_com_8x), + EXAR_DEVICE(ACCESSIO, ACCES_COM232_8, acces_com_8x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_2SM, acces_com_2x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_4SM, acces_com_4x), + EXAR_DEVICE(ACCESSIO, ACCES_COM_8SM, acces_com_8x), + + CONNECT_DEVICE(XR17C152, UART_2_232, pbn_connect), CONNECT_DEVICE(XR17C154, UART_4_232, pbn_connect), CONNECT_DEVICE(XR17C158, UART_8_232, pbn_connect), diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c index 910bfee5a88b..cc138c24ae88 100644 --- a/drivers/tty/serial/8250/8250_fsl.c +++ b/drivers/tty/serial/8250/8250_fsl.c @@ -48,8 +48,29 @@ int fsl8250_handle_irq(struct uart_port *port) lsr = orig_lsr = up->port.serial_in(&up->port, UART_LSR); - if (lsr & (UART_LSR_DR | UART_LSR_BI)) + /* Process incoming characters first */ + if ((lsr & (UART_LSR_DR | UART_LSR_BI)) && + (up->ier & (UART_IER_RLSI | UART_IER_RDI))) { lsr = serial8250_rx_chars(up, lsr); + } + + /* Stop processing interrupts on input overrun */ + if ((orig_lsr & UART_LSR_OE) && (up->overrun_backoff_time_ms > 0)) { + unsigned long delay; + + up->ier = port->serial_in(port, UART_IER); + if (up->ier & (UART_IER_RLSI | UART_IER_RDI)) { + port->ops->stop_rx(port); + } else { + /* Keep restarting the timer until + * the input overrun subsides. + */ + cancel_delayed_work(&up->overrun_backoff); + } + + delay = msecs_to_jiffies(up->overrun_backoff_time_ms); + schedule_delayed_work(&up->overrun_backoff, delay); + } serial8250_modem_status(up); diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index ec510e342e06..c51044ba503c 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -232,6 +232,11 @@ static int of_platform_serial_probe(struct platform_device *ofdev) if (of_property_read_bool(ofdev->dev.of_node, "auto-flow-control")) port8250.capabilities |= UART_CAP_AFE; + if (of_property_read_u32(ofdev->dev.of_node, + "overrun-throttle-ms", + &port8250.overrun_backoff_time_ms) != 0) + port8250.overrun_backoff_time_ms = 0; + ret = serial8250_register_8250_port(&port8250); if (ret < 0) goto err_dispose; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 839a00e5f2db..fccfc5467d6d 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2267,6 +2267,10 @@ int serial8250_do_startup(struct uart_port *port) } } + /* Check if we need to have shared IRQs */ + if (port->irq && (up->port.flags & UPF_SHARE_IRQ)) + up->port.irqflags |= IRQF_SHARED; + if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { unsigned char iir1; /* diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 4a4a9f33715c..637f72fb6427 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -829,10 +829,8 @@ __acquires(&uap->port.lock) if (!uap->using_tx_dma) return; - /* Avoid deadlock with the DMA engine callback */ - spin_unlock(&uap->port.lock); - dmaengine_terminate_all(uap->dmatx.chan); - spin_lock(&uap->port.lock); + dmaengine_terminate_async(uap->dmatx.chan); + if (uap->dmatx.queued) { dma_unmap_sg(uap->dmatx.chan->device->dev, &uap->dmatx.sg, 1, DMA_TO_DEVICE); diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c index ed545a61413c..ac56a5131a9c 100644 --- a/drivers/tty/serial/ar933x_uart.c +++ b/drivers/tty/serial/ar933x_uart.c @@ -289,6 +289,10 @@ static void ar933x_uart_set_termios(struct uart_port *port, ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, AR933X_UART_CS_HOST_INT_EN); + /* enable RX and TX ready overide */ + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE); + /* reenable the UART */ ar933x_uart_rmw(up, AR933X_UART_CS_REG, AR933X_UART_CS_IF_MODE_M << AR933X_UART_CS_IF_MODE_S, @@ -421,6 +425,10 @@ static int ar933x_uart_startup(struct uart_port *port) ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, AR933X_UART_CS_HOST_INT_EN); + /* enable RX and TX ready overide */ + ar933x_uart_rmw_set(up, AR933X_UART_CS_REG, + AR933X_UART_CS_TX_READY_ORIDE | AR933X_UART_CS_RX_READY_ORIDE); + /* Enable RX interrupts */ up->ier = AR933X_UART_INT_RX_VALID; ar933x_uart_write(up, AR933X_UART_INT_EN_REG, up->ier); diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 9ee41ba0e55b..a00227d312d3 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -498,7 +498,8 @@ static void atmel_stop_tx(struct uart_port *port) atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); if (atmel_uart_is_half_duplex(port)) - atmel_start_rx(port); + if (!atomic_read(&atmel_port->tasklet_shutdown)) + atmel_start_rx(port); } @@ -2183,27 +2184,6 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios, mode |= ATMEL_US_USMODE_NORMAL; } - /* set the mode, clock divisor, parity, stop bits and data size */ - atmel_uart_writel(port, ATMEL_US_MR, mode); - - /* - * when switching the mode, set the RTS line state according to the - * new mode, otherwise keep the former state - */ - if ((old_mode & ATMEL_US_USMODE) != (mode & ATMEL_US_USMODE)) { - unsigned int rts_state; - - if ((mode & ATMEL_US_USMODE) == ATMEL_US_USMODE_HWHS) { - /* let the hardware control the RTS line */ - rts_state = ATMEL_US_RTSDIS; - } else { - /* force RTS line to low level */ - rts_state = ATMEL_US_RTSEN; - } - - atmel_uart_writel(port, ATMEL_US_CR, rts_state); - } - /* * Set the baud rate: * Fractional baudrate allows to setup output frequency more @@ -2229,6 +2209,28 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios, quot = cd | fp << ATMEL_US_FP_OFFSET; atmel_uart_writel(port, ATMEL_US_BRGR, quot); + + /* set the mode, clock divisor, parity, stop bits and data size */ + atmel_uart_writel(port, ATMEL_US_MR, mode); + + /* + * when switching the mode, set the RTS line state according to the + * new mode, otherwise keep the former state + */ + if ((old_mode & ATMEL_US_USMODE) != (mode & ATMEL_US_USMODE)) { + unsigned int rts_state; + + if ((mode & ATMEL_US_USMODE) == ATMEL_US_USMODE_HWHS) { + /* let the hardware control the RTS line */ + rts_state = ATMEL_US_RTSDIS; + } else { + /* force RTS line to low level */ + rts_state = ATMEL_US_RTSEN; + } + + atmel_uart_writel(port, ATMEL_US_CR, rts_state); + } + atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_RSTSTA | ATMEL_US_RSTRX); atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN | ATMEL_US_RXEN); diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 32a473f9d1d3..16422987ab0f 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -380,8 +380,8 @@ static void lpuart_dma_tx(struct lpuart_port *sport) } sport->dma_tx_desc = dmaengine_prep_slave_sg(sport->dma_tx_chan, sgl, - sport->dma_tx_nents, - DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT); + ret, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT); if (!sport->dma_tx_desc) { dma_unmap_sg(dev, sgl, sport->dma_tx_nents, DMA_TO_DEVICE); dev_err(dev, "Cannot prepare TX slave DMA!\n"); @@ -532,26 +532,26 @@ static int lpuart32_poll_init(struct uart_port *port) spin_lock_irqsave(&sport->port.lock, flags); /* Disable Rx & Tx */ - writel(0, sport->port.membase + UARTCTRL); + lpuart32_write(&sport->port, UARTCTRL, 0); - temp = readl(sport->port.membase + UARTFIFO); + temp = lpuart32_read(&sport->port, UARTFIFO); /* Enable Rx and Tx FIFO */ - writel(temp | UARTFIFO_RXFE | UARTFIFO_TXFE, - sport->port.membase + UARTFIFO); + lpuart32_write(&sport->port, UARTFIFO, + temp | UARTFIFO_RXFE | UARTFIFO_TXFE); /* flush Tx and Rx FIFO */ - writel(UARTFIFO_TXFLUSH | UARTFIFO_RXFLUSH, - sport->port.membase + UARTFIFO); + lpuart32_write(&sport->port, UARTFIFO, + UARTFIFO_TXFLUSH | UARTFIFO_RXFLUSH); /* explicitly clear RDRF */ - if (readl(sport->port.membase + UARTSTAT) & UARTSTAT_RDRF) { - readl(sport->port.membase + UARTDATA); - writel(UARTFIFO_RXUF, sport->port.membase + UARTFIFO); + if (lpuart32_read(&sport->port, UARTSTAT) & UARTSTAT_RDRF) { + lpuart32_read(&sport->port, UARTDATA); + lpuart32_write(&sport->port, UARTFIFO, UARTFIFO_RXUF); } /* Enable Rx and Tx */ - writel(UARTCTRL_RE | UARTCTRL_TE, sport->port.membase + UARTCTRL); + lpuart32_write(&sport->port, UARTCTRL, UARTCTRL_RE | UARTCTRL_TE); spin_unlock_irqrestore(&sport->port.lock, flags); return 0; @@ -559,18 +559,18 @@ static int lpuart32_poll_init(struct uart_port *port) static void lpuart32_poll_put_char(struct uart_port *port, unsigned char c) { - while (!(readl(port->membase + UARTSTAT) & UARTSTAT_TDRE)) + while (!(lpuart32_read(port, UARTSTAT) & UARTSTAT_TDRE)) barrier(); - writel(c, port->membase + UARTDATA); + lpuart32_write(port, UARTDATA, c); } static int lpuart32_poll_get_char(struct uart_port *port) { - if (!(readl(port->membase + UARTSTAT) & UARTSTAT_RDRF)) + if (!(lpuart32_read(port, UARTSTAT) & UARTSTAT_RDRF)) return NO_POLL_CHAR; - return readl(port->membase + UARTDATA); + return lpuart32_read(port, UARTDATA); } #endif diff --git a/drivers/tty/serial/ifx6x60.c b/drivers/tty/serial/ifx6x60.c index f190a84a0246..d54ebe6b1d50 100644 --- a/drivers/tty/serial/ifx6x60.c +++ b/drivers/tty/serial/ifx6x60.c @@ -1245,6 +1245,9 @@ static int ifx_spi_spi_remove(struct spi_device *spi) struct ifx_spi_device *ifx_dev = spi_get_drvdata(spi); /* stop activity */ tasklet_kill(&ifx_dev->io_work_tasklet); + + pm_runtime_disable(&spi->dev); + /* free irq */ free_irq(gpio_to_irq(ifx_dev->gpio.reset_out), ifx_dev); free_irq(gpio_to_irq(ifx_dev->gpio.srdy), ifx_dev); diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index 4e827e5a52a3..630065b551f5 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -80,7 +80,7 @@ #define UCR1_IDEN (1<<12) /* Idle condition interrupt */ #define UCR1_ICD_REG(x) (((x) & 3) << 10) /* idle condition detect */ #define UCR1_RRDYEN (1<<9) /* Recv ready interrupt enable */ -#define UCR1_RDMAEN (1<<8) /* Recv ready DMA enable */ +#define UCR1_RXDMAEN (1<<8) /* Recv ready DMA enable */ #define UCR1_IREN (1<<7) /* Infrared interface enable */ #define UCR1_TXMPTYEN (1<<6) /* Transimitter empty interrupt enable */ #define UCR1_RTSDEN (1<<5) /* RTS delta interrupt enable */ @@ -352,6 +352,30 @@ static void imx_port_rts_auto(struct imx_port *sport, unsigned long *ucr2) *ucr2 |= UCR2_CTSC; } +/* + * interrupts disabled on entry + */ +static void imx_start_rx(struct uart_port *port) +{ + struct imx_port *sport = (struct imx_port *)port; + unsigned int ucr1, ucr2; + + ucr1 = readl(port->membase + UCR1); + ucr2 = readl(port->membase + UCR2); + + ucr2 |= UCR2_RXEN; + + if (sport->dma_is_enabled) { + ucr1 |= UCR1_RXDMAEN | UCR1_ATDMAEN; + } else { + ucr1 |= UCR1_RRDYEN; + } + + /* Write UCR2 first as it includes RXEN */ + writel(ucr2, port->membase + UCR2); + writel(ucr1, port->membase + UCR1); +} + /* * interrupts disabled on entry */ @@ -378,9 +402,10 @@ static void imx_stop_tx(struct uart_port *port) imx_port_rts_active(sport, &temp); else imx_port_rts_inactive(sport, &temp); - temp |= UCR2_RXEN; writel(temp, port->membase + UCR2); + imx_start_rx(port); + temp = readl(port->membase + UCR4); temp &= ~UCR4_TCEN; writel(temp, port->membase + UCR4); @@ -393,7 +418,7 @@ static void imx_stop_tx(struct uart_port *port) static void imx_stop_rx(struct uart_port *port) { struct imx_port *sport = (struct imx_port *)port; - unsigned long temp; + unsigned long ucr1, ucr2; if (sport->dma_is_enabled && sport->dma_is_rxing) { if (sport->port.suspended) { @@ -404,12 +429,18 @@ static void imx_stop_rx(struct uart_port *port) } } - temp = readl(sport->port.membase + UCR2); - writel(temp & ~UCR2_RXEN, sport->port.membase + UCR2); + ucr1 = readl(sport->port.membase + UCR1); + ucr2 = readl(sport->port.membase + UCR2); - /* disable the `Receiver Ready Interrrupt` */ - temp = readl(sport->port.membase + UCR1); - writel(temp & ~UCR1_RRDYEN, sport->port.membase + UCR1); + if (sport->dma_is_enabled) { + ucr1 &= ~(UCR1_RXDMAEN | UCR1_ATDMAEN); + } else { + ucr1 &= ~UCR1_RRDYEN; + } + writel(ucr1, port->membase + UCR1); + + ucr2 &= ~UCR2_RXEN; + writel(ucr2, port->membase + UCR2); } /* @@ -526,7 +557,7 @@ static void imx_dma_tx(struct imx_port *sport) sport->tx_bytes = uart_circ_chars_pending(xmit); - if (xmit->tail < xmit->head) { + if (xmit->tail < xmit->head || xmit->head == 0) { sport->dma_tx_nents = 1; sg_init_one(sgl, xmit->buf + xmit->tail, sport->tx_bytes); } else { @@ -542,7 +573,7 @@ static void imx_dma_tx(struct imx_port *sport) dev_err(dev, "DMA mapping error for TX.\n"); return; } - desc = dmaengine_prep_slave_sg(chan, sgl, sport->dma_tx_nents, + desc = dmaengine_prep_slave_sg(chan, sgl, ret, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT); if (!desc) { dma_unmap_sg(dev, sgl, sport->dma_tx_nents, @@ -581,10 +612,11 @@ static void imx_start_tx(struct uart_port *port) imx_port_rts_active(sport, &temp); else imx_port_rts_inactive(sport, &temp); - if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) - temp &= ~UCR2_RXEN; writel(temp, port->membase + UCR2); + if (!(port->rs485.flags & SER_RS485_RX_DURING_TX)) + imx_stop_rx(port); + /* enable transmitter and shifter empty irq */ temp = readl(port->membase + UCR4); temp |= UCR4_TCEN; @@ -811,14 +843,42 @@ static void imx_mctrl_check(struct imx_port *sport) static irqreturn_t imx_int(int irq, void *dev_id) { struct imx_port *sport = dev_id; - unsigned int sts; - unsigned int sts2; + unsigned int usr1, usr2, ucr1, ucr2, ucr3, ucr4; irqreturn_t ret = IRQ_NONE; - sts = readl(sport->port.membase + USR1); - sts2 = readl(sport->port.membase + USR2); + usr1 = readl(sport->port.membase + USR1); + usr2 = readl(sport->port.membase + USR2); + ucr1 = readl(sport->port.membase + UCR1); + ucr2 = readl(sport->port.membase + UCR2); + ucr3 = readl(sport->port.membase + UCR3); + ucr4 = readl(sport->port.membase + UCR4); - if (sts & (USR1_RRDY | USR1_AGTIM)) { + /* + * Even if a condition is true that can trigger an irq only handle it if + * the respective irq source is enabled. This prevents some undesired + * actions, for example if a character that sits in the RX FIFO and that + * should be fetched via DMA is tried to be fetched using PIO. Or the + * receiver is currently off and so reading from URXD0 results in an + * exception. So just mask the (raw) status bits for disabled irqs. + */ + if ((ucr1 & UCR1_RRDYEN) == 0) + usr1 &= ~USR1_RRDY; + if ((ucr2 & UCR2_ATEN) == 0) + usr1 &= ~USR1_AGTIM; + if ((ucr1 & UCR1_TXMPTYEN) == 0) + usr1 &= ~USR1_TRDY; + if ((ucr4 & UCR4_TCEN) == 0) + usr2 &= ~USR2_TXDC; + if ((ucr3 & UCR3_DTRDEN) == 0) + usr1 &= ~USR1_DTRD; + if ((ucr1 & UCR1_RTSDEN) == 0) + usr1 &= ~USR1_RTSD; + if ((ucr3 & UCR3_AWAKEN) == 0) + usr1 &= ~USR1_AWAKE; + if ((ucr4 & UCR4_OREN) == 0) + usr2 &= ~USR2_ORE; + + if (usr1 & (USR1_RRDY | USR1_AGTIM)) { if (sport->dma_is_enabled) imx_dma_rxint(sport); else @@ -826,18 +886,15 @@ static irqreturn_t imx_int(int irq, void *dev_id) ret = IRQ_HANDLED; } - if ((sts & USR1_TRDY && - readl(sport->port.membase + UCR1) & UCR1_TXMPTYEN) || - (sts2 & USR2_TXDC && - readl(sport->port.membase + UCR4) & UCR4_TCEN)) { + if ((usr1 & USR1_TRDY) || (usr2 & USR2_TXDC)) { imx_txint(irq, dev_id); ret = IRQ_HANDLED; } - if (sts & USR1_DTRD) { + if (usr1 & USR1_DTRD) { unsigned long flags; - if (sts & USR1_DTRD) + if (usr1 & USR1_DTRD) writel(USR1_DTRD, sport->port.membase + USR1); spin_lock_irqsave(&sport->port.lock, flags); @@ -847,17 +904,17 @@ static irqreturn_t imx_int(int irq, void *dev_id) ret = IRQ_HANDLED; } - if (sts & USR1_RTSD) { + if (usr1 & USR1_RTSD) { imx_rtsint(irq, dev_id); ret = IRQ_HANDLED; } - if (sts & USR1_AWAKE) { + if (usr1 & USR1_AWAKE) { writel(USR1_AWAKE, sport->port.membase + USR1); ret = IRQ_HANDLED; } - if (sts2 & USR2_ORE) { + if (usr2 & USR2_ORE) { sport->port.icount.overrun++; writel(USR2_ORE, sport->port.membase + USR2); ret = IRQ_HANDLED; @@ -1206,7 +1263,7 @@ static void imx_enable_dma(struct imx_port *sport) /* set UCR1 */ temp = readl(sport->port.membase + UCR1); - temp |= UCR1_RDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN; + temp |= UCR1_RXDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN; writel(temp, sport->port.membase + UCR1); temp = readl(sport->port.membase + UCR2); @@ -1224,7 +1281,7 @@ static void imx_disable_dma(struct imx_port *sport) /* clear UCR1 */ temp = readl(sport->port.membase + UCR1); - temp &= ~(UCR1_RDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN); + temp &= ~(UCR1_RXDMAEN | UCR1_TDMAEN | UCR1_ATDMAEN); writel(temp, sport->port.membase + UCR1); /* clear UCR2 */ @@ -1289,11 +1346,9 @@ static int imx_startup(struct uart_port *port) writel(USR1_RTSD | USR1_DTRD, sport->port.membase + USR1); writel(USR2_ORE, sport->port.membase + USR2); - if (sport->dma_is_inited && !sport->dma_is_enabled) - imx_enable_dma(sport); - temp = readl(sport->port.membase + UCR1); - temp |= UCR1_RRDYEN | UCR1_UARTEN; + temp &= ~UCR1_RRDYEN; + temp |= UCR1_UARTEN; if (sport->have_rtscts) temp |= UCR1_RTSDEN; @@ -1332,14 +1387,13 @@ static int imx_startup(struct uart_port *port) */ imx_enable_ms(&sport->port); - /* - * Start RX DMA immediately instead of waiting for RX FIFO interrupts. - * In our iMX53 the average delay for the first reception dropped from - * approximately 35000 microseconds to 1000 microseconds. - */ - if (sport->dma_is_enabled) { - imx_disable_rx_int(sport); + if (sport->dma_is_inited) { + imx_enable_dma(sport); start_rx_dma(sport); + } else { + temp = readl(sport->port.membase + UCR1); + temp |= UCR1_RRDYEN; + writel(temp, sport->port.membase + UCR1); } spin_unlock_irqrestore(&sport->port.lock, flags); @@ -1386,7 +1440,8 @@ static void imx_shutdown(struct uart_port *port) spin_lock_irqsave(&sport->port.lock, flags); temp = readl(sport->port.membase + UCR1); - temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN); + temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN | + UCR1_RXDMAEN | UCR1_ATDMAEN); writel(temp, sport->port.membase + UCR1); spin_unlock_irqrestore(&sport->port.lock, flags); @@ -1659,7 +1714,7 @@ static int imx_poll_init(struct uart_port *port) { struct imx_port *sport = (struct imx_port *)port; unsigned long flags; - unsigned long temp; + unsigned long ucr1, ucr2; int retval; retval = clk_prepare_enable(sport->clk_ipg); @@ -1673,16 +1728,29 @@ static int imx_poll_init(struct uart_port *port) spin_lock_irqsave(&sport->port.lock, flags); - temp = readl(sport->port.membase + UCR1); - if (is_imx1_uart(sport)) - temp |= IMX1_UCR1_UARTCLKEN; - temp |= UCR1_UARTEN | UCR1_RRDYEN; - temp &= ~(UCR1_TXMPTYEN | UCR1_RTSDEN); - writel(temp, sport->port.membase + UCR1); + /* + * Be careful about the order of enabling bits here. First enable the + * receiver (UARTEN + RXEN) and only then the corresponding irqs. + * This prevents that a character that already sits in the RX fifo is + * triggering an irq but the try to fetch it from there results in an + * exception because UARTEN or RXEN is still off. + */ + ucr1 = readl(port->membase + UCR1); + ucr2 = readl(port->membase + UCR2); - temp = readl(sport->port.membase + UCR2); - temp |= UCR2_RXEN; - writel(temp, sport->port.membase + UCR2); + if (is_imx1_uart(sport)) + ucr1 |= IMX1_UCR1_UARTCLKEN; + + ucr1 |= UCR1_UARTEN; + ucr1 &= ~(UCR1_TXMPTYEN | UCR1_RTSDEN | UCR1_RRDYEN); + + ucr2 |= UCR2_RXEN; + + writel(ucr1, sport->port.membase + UCR1); + writel(ucr2, sport->port.membase + UCR2); + + /* now enable irqs */ + writel(ucr1 | UCR1_RRDYEN, sport->port.membase + UCR1); spin_unlock_irqrestore(&sport->port.lock, flags); @@ -1742,11 +1810,8 @@ static int imx_rs485_config(struct uart_port *port, /* Make sure Rx is enabled in case Tx is active with Rx disabled */ if (!(rs485conf->flags & SER_RS485_ENABLED) || - rs485conf->flags & SER_RS485_RX_DURING_TX) { - temp = readl(sport->port.membase + UCR2); - temp |= UCR2_RXEN; - writel(temp, sport->port.membase + UCR2); - } + rs485conf->flags & SER_RS485_RX_DURING_TX) + imx_start_rx(port); port->rs485 = *rs485conf; @@ -1956,7 +2021,7 @@ imx_console_setup(struct console *co, char *options) retval = clk_prepare(sport->clk_per); if (retval) - clk_disable_unprepare(sport->clk_ipg); + clk_unprepare(sport->clk_ipg); error_console: return retval; diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 0969a0d97b2b..cec995ec11ea 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -769,12 +769,9 @@ static void max310x_start_tx(struct uart_port *port) static unsigned int max310x_tx_empty(struct uart_port *port) { - unsigned int lvl, sts; + u8 lvl = max310x_port_read(port, MAX310X_TXFIFOLVL_REG); - lvl = max310x_port_read(port, MAX310X_TXFIFOLVL_REG); - sts = max310x_port_read(port, MAX310X_IRQSTS_REG); - - return ((sts & MAX310X_IRQ_TXEMPTY_BIT) && !lvl) ? TIOCSER_TEMT : 0; + return lvl ? 0 : TIOCSER_TEMT; } static unsigned int max310x_get_mctrl(struct uart_port *port) diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index 0e0ccc132ab0..77a1f00fe843 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -988,6 +988,7 @@ static unsigned int msm_get_mctrl(struct uart_port *port) static void msm_reset(struct uart_port *port) { struct msm_port *msm_port = UART_TO_MSM(port); + unsigned int mr; /* reset everything */ msm_write(port, UART_CR_CMD_RESET_RX, UART_CR); @@ -995,7 +996,10 @@ static void msm_reset(struct uart_port *port) msm_write(port, UART_CR_CMD_RESET_ERR, UART_CR); msm_write(port, UART_CR_CMD_RESET_BREAK_INT, UART_CR); msm_write(port, UART_CR_CMD_RESET_CTS, UART_CR); - msm_write(port, UART_CR_CMD_SET_RFR, UART_CR); + msm_write(port, UART_CR_CMD_RESET_RFR, UART_CR); + mr = msm_read(port, UART_MR1); + mr &= ~UART_MR1_RX_RDY_CTL; + msm_write(port, mr, UART_MR1); /* Disable DM modes */ if (msm_port->is_uartdm) @@ -1584,6 +1588,7 @@ static void __msm_console_write(struct uart_port *port, const char *s, int num_newlines = 0; bool replaced = false; void __iomem *tf; + int locked = 1; if (is_uartdm) tf = port->membase + UARTDM_TF; @@ -1596,7 +1601,13 @@ static void __msm_console_write(struct uart_port *port, const char *s, num_newlines++; count += num_newlines; - spin_lock(&port->lock); + if (port->sysrq) + locked = 0; + else if (oops_in_progress) + locked = spin_trylock(&port->lock); + else + spin_lock(&port->lock); + if (is_uartdm) msm_reset_dm_count(port, count); @@ -1632,7 +1643,9 @@ static void __msm_console_write(struct uart_port *port, const char *s, iowrite32_rep(tf, buf, 1); i += num_chars; } - spin_unlock(&port->lock); + + if (locked) + spin_unlock(&port->lock); } static void msm_console_write(struct console *co, const char *s, diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 401c983ec5f3..a10e4aa9e18e 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -581,7 +581,7 @@ static int mvebu_uart_probe(struct platform_device *pdev) port->membase = devm_ioremap_resource(&pdev->dev, reg); if (IS_ERR(port->membase)) - return -PTR_ERR(port->membase); + return PTR_ERR(port->membase); data = devm_kzalloc(&pdev->dev, sizeof(struct mvebu_uart_data), GFP_KERNEL); diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index 673c8fd7e34f..e83750831f15 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -1638,8 +1638,9 @@ static int mxs_auart_request_gpio_irq(struct mxs_auart_port *s) /* * If something went wrong, rollback. + * Be careful: i may be unsigned. */ - while (err && (--i >= 0)) + while (err && (i-- > 0)) if (irq[i] >= 0) free_irq(irq[i], s); diff --git a/drivers/tty/serial/owl-uart.c b/drivers/tty/serial/owl-uart.c index b9c859365334..d774f6013d7b 100644 --- a/drivers/tty/serial/owl-uart.c +++ b/drivers/tty/serial/owl-uart.c @@ -754,7 +754,7 @@ static int __init owl_uart_init(void) return ret; } -static void __init owl_uart_exit(void) +static void __exit owl_uart_exit(void) { platform_driver_unregister(&owl_uart_platform_driver); uart_unregister_driver(&owl_uart_driver); diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index d9123f995705..15ddcbd1f9d2 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -247,6 +247,7 @@ struct eg20t_port { struct dma_chan *chan_rx; struct scatterlist *sg_tx_p; int nent; + int orig_nent; struct scatterlist sg_rx; int tx_dma_use; void *rx_buf_virt; @@ -801,9 +802,10 @@ static void pch_dma_tx_complete(void *arg) } xmit->tail &= UART_XMIT_SIZE - 1; async_tx_ack(priv->desc_tx); - dma_unmap_sg(port->dev, sg, priv->nent, DMA_TO_DEVICE); + dma_unmap_sg(port->dev, sg, priv->orig_nent, DMA_TO_DEVICE); priv->tx_dma_use = 0; priv->nent = 0; + priv->orig_nent = 0; kfree(priv->sg_tx_p); pch_uart_hal_enable_interrupt(priv, PCH_UART_HAL_TX_INT); } @@ -1027,6 +1029,7 @@ static unsigned int dma_handle_tx(struct eg20t_port *priv) dev_err(priv->port.dev, "%s:dma_map_sg Failed\n", __func__); return 0; } + priv->orig_nent = num; priv->nent = nent; for (i = 0; i < nent; i++, sg++) { diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index f4b8e4e17a86..808373d4e37a 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -1922,7 +1922,11 @@ static int s3c24xx_serial_resume(struct device *dev) if (port) { clk_prepare_enable(ourport->clk); + if (!IS_ERR(ourport->baudclk)) + clk_prepare_enable(ourport->baudclk); s3c24xx_serial_resetport(port, s3c24xx_port_to_cfg(port)); + if (!IS_ERR(ourport->baudclk)) + clk_disable_unprepare(ourport->baudclk); clk_disable_unprepare(ourport->clk); uart_resume_port(&s3c24xx_uart_drv, port); @@ -1945,7 +1949,11 @@ static int s3c24xx_serial_resume_noirq(struct device *dev) if (rx_enabled(port)) uintm &= ~S3C64XX_UINTM_RXD_MSK; clk_prepare_enable(ourport->clk); + if (!IS_ERR(ourport->baudclk)) + clk_prepare_enable(ourport->baudclk); wr_regl(port, S3C64XX_UINTM, uintm); + if (!IS_ERR(ourport->baudclk)) + clk_disable_unprepare(ourport->baudclk); clk_disable_unprepare(ourport->clk); } } diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index e48523da47ac..c1655aba131f 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -333,6 +333,7 @@ struct sc16is7xx_port { struct kthread_worker kworker; struct task_struct *kworker_task; struct kthread_work irq_work; + struct mutex efr_lock; struct sc16is7xx_one p[0]; }; @@ -504,6 +505,21 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) div /= 4; } + /* In an amazing feat of design, the Enhanced Features Register shares + * the address of the Interrupt Identification Register, and is + * switched in by writing a magic value (0xbf) to the Line Control + * Register. Any interrupt firing during this time will see the EFR + * where it expects the IIR to be, leading to "Unexpected interrupt" + * messages. + * + * Prevent this possibility by claiming a mutex while accessing the + * EFR, and claiming the same mutex from within the interrupt handler. + * This is similar to disabling the interrupt, but that doesn't work + * because the bulk of the interrupt processing is run as a workqueue + * job in thread context. + */ + mutex_lock(&s->efr_lock); + lcr = sc16is7xx_port_read(port, SC16IS7XX_LCR_REG); /* Open the LCR divisors for configuration */ @@ -519,6 +535,8 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) /* Put LCR back to the normal mode */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); + mutex_unlock(&s->efr_lock); + sc16is7xx_port_update(port, SC16IS7XX_MCR_REG, SC16IS7XX_MCR_CLKSEL_BIT, prescaler); @@ -701,6 +719,8 @@ static void sc16is7xx_ist(struct kthread_work *ws) { struct sc16is7xx_port *s = to_sc16is7xx_port(ws, irq_work); + mutex_lock(&s->efr_lock); + while (1) { bool keep_polling = false; int i; @@ -710,6 +730,8 @@ static void sc16is7xx_ist(struct kthread_work *ws) if (!keep_polling) break; } + + mutex_unlock(&s->efr_lock); } static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) @@ -904,6 +926,9 @@ static void sc16is7xx_set_termios(struct uart_port *port, if (!(termios->c_cflag & CREAD)) port->ignore_status_mask |= SC16IS7XX_LSR_BRK_ERROR_MASK; + /* As above, claim the mutex while accessing the EFR. */ + mutex_lock(&s->efr_lock); + sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, SC16IS7XX_LCR_CONF_MODE_B); @@ -925,6 +950,8 @@ static void sc16is7xx_set_termios(struct uart_port *port, /* Update LCR register */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); + mutex_unlock(&s->efr_lock); + /* Get baud rate generator configuration */ baud = uart_get_baud_rate(port, termios, old, port->uartclk / 16 / 4 / 0xffff, @@ -1187,6 +1214,7 @@ static int sc16is7xx_probe(struct device *dev, s->regmap = regmap; s->devtype = devtype; dev_set_drvdata(dev, s); + mutex_init(&s->efr_lock); kthread_init_worker(&s->kworker); kthread_init_work(&s->irq_work, sc16is7xx_ist); diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 17e2311f7b00..0ff8de7725cf 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -1125,7 +1125,7 @@ static int uart_break_ctl(struct tty_struct *tty, int break_state) if (!uport) goto out; - if (uport->type != PORT_UNKNOWN) + if (uport->type != PORT_UNKNOWN && uport->ops->break_ctl) uport->ops->break_ctl(uport, break_state); ret = 0; out: @@ -2810,6 +2810,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport) if (uport->cons && uport->dev) of_console_check(uport->dev->of_node, uport->cons->name, uport->line); + tty_port_link_device(port, drv->tty_driver, uport->line); uart_configure_port(drv, state, uport); port->console = uart_console(uport); diff --git a/drivers/tty/serial/serial_mctrl_gpio.c b/drivers/tty/serial/serial_mctrl_gpio.c index 42e42e3e7a6e..388f71046849 100644 --- a/drivers/tty/serial/serial_mctrl_gpio.c +++ b/drivers/tty/serial/serial_mctrl_gpio.c @@ -69,6 +69,9 @@ EXPORT_SYMBOL_GPL(mctrl_gpio_set); struct gpio_desc *mctrl_gpio_to_gpiod(struct mctrl_gpios *gpios, enum mctrl_gpio_idx gidx) { + if (gpios == NULL) + return NULL; + return gpios->gpio[gidx]; } EXPORT_SYMBOL_GPL(mctrl_gpio_to_gpiod); diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 03a583264d9e..1e854e1851fb 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -118,35 +118,51 @@ static void stm32_receive_chars(struct uart_port *port, bool threaded) while (stm32_pending_rx(port, &sr, &stm32_port->last_res, threaded)) { sr |= USART_SR_DUMMY_RX; - c = stm32_get_char(port, &sr, &stm32_port->last_res); flag = TTY_NORMAL; - port->icount.rx++; + /* + * Status bits has to be cleared before reading the RDR: + * In FIFO mode, reading the RDR will pop the next data + * (if any) along with its status bits into the SR. + * Not doing so leads to misalignement between RDR and SR, + * and clear status bits of the next rx data. + * + * Clear errors flags for stm32f7 and stm32h7 compatible + * devices. On stm32f4 compatible devices, the error bit is + * cleared by the sequence [read SR - read DR]. + */ + if ((sr & USART_SR_ERR_MASK) && ofs->icr != UNDEF_REG) + writel_relaxed(sr & USART_SR_ERR_MASK, + port->membase + ofs->icr); + + c = stm32_get_char(port, &sr, &stm32_port->last_res); + port->icount.rx++; if (sr & USART_SR_ERR_MASK) { - if (sr & USART_SR_LBD) { - port->icount.brk++; - if (uart_handle_break(port)) - continue; - } else if (sr & USART_SR_ORE) { - if (ofs->icr != UNDEF_REG) - writel_relaxed(USART_ICR_ORECF, - port->membase + - ofs->icr); + if (sr & USART_SR_ORE) { port->icount.overrun++; } else if (sr & USART_SR_PE) { port->icount.parity++; } else if (sr & USART_SR_FE) { - port->icount.frame++; + /* Break detection if character is null */ + if (!c) { + port->icount.brk++; + if (uart_handle_break(port)) + continue; + } else { + port->icount.frame++; + } } sr &= port->read_status_mask; - if (sr & USART_SR_LBD) - flag = TTY_BREAK; - else if (sr & USART_SR_PE) + if (sr & USART_SR_PE) { flag = TTY_PARITY; - else if (sr & USART_SR_FE) - flag = TTY_FRAME; + } else if (sr & USART_SR_FE) { + if (!c) + flag = TTY_BREAK; + else + flag = TTY_FRAME; + } } if (uart_handle_sysrq_char(port, c)) @@ -164,21 +180,6 @@ static void stm32_tx_dma_complete(void *arg) struct uart_port *port = arg; struct stm32_port *stm32port = to_stm32_port(port); struct stm32_usart_offsets *ofs = &stm32port->info->ofs; - unsigned int isr; - int ret; - - ret = readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr, - isr, - (isr & USART_SR_TC), - 10, 100000); - - if (ret) - dev_err(port->dev, "terminal count not set\n"); - - if (ofs->icr == UNDEF_REG) - stm32_clr_bits(port, ofs->isr, USART_SR_TC); - else - stm32_set_bits(port, ofs->icr, USART_CR_TC); stm32_clr_bits(port, ofs->cr3, USART_CR3_DMAT); stm32port->tx_dma_busy = false; @@ -270,7 +271,6 @@ static void stm32_transmit_chars_dma(struct uart_port *port) /* Issue pending DMA TX requests */ dma_async_issue_pending(stm32port->tx_ch); - stm32_clr_bits(port, ofs->isr, USART_SR_TC); stm32_set_bits(port, ofs->cr3, USART_CR3_DMAT); xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1); @@ -294,15 +294,15 @@ static void stm32_transmit_chars(struct uart_port *port) return; } - if (uart_tx_stopped(port)) { - stm32_stop_tx(port); + if (uart_circ_empty(xmit) || uart_tx_stopped(port)) { + stm32_clr_bits(port, ofs->cr1, USART_CR1_TXEIE); return; } - if (uart_circ_empty(xmit)) { - stm32_stop_tx(port); - return; - } + if (ofs->icr == UNDEF_REG) + stm32_clr_bits(port, ofs->isr, USART_SR_TC); + else + writel_relaxed(USART_ICR_TCCF, port->membase + ofs->icr); if (stm32_port->tx_ch) stm32_transmit_chars_dma(port); @@ -313,7 +313,7 @@ static void stm32_transmit_chars(struct uart_port *port) uart_write_wakeup(port); if (uart_circ_empty(xmit)) - stm32_stop_tx(port); + stm32_clr_bits(port, ofs->cr1, USART_CR1_TXEIE); } static irqreturn_t stm32_interrupt(int irq, void *ptr) @@ -447,7 +447,6 @@ static int stm32_startup(struct uart_port *port) { struct stm32_port *stm32_port = to_stm32_port(port); struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; - struct stm32_usart_config *cfg = &stm32_port->info->cfg; const char *name = to_platform_device(port->dev)->name; u32 val; int ret; @@ -458,15 +457,6 @@ static int stm32_startup(struct uart_port *port) if (ret) return ret; - if (cfg->has_wakeup && stm32_port->wakeirq >= 0) { - ret = dev_pm_set_dedicated_wake_irq(port->dev, - stm32_port->wakeirq); - if (ret) { - free_irq(port->irq, port); - return ret; - } - } - val = USART_CR1_RXNEIE | USART_CR1_TE | USART_CR1_RE; if (stm32_port->fifoen) val |= USART_CR1_FIFOEN; @@ -480,15 +470,23 @@ static void stm32_shutdown(struct uart_port *port) struct stm32_port *stm32_port = to_stm32_port(port); struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; struct stm32_usart_config *cfg = &stm32_port->info->cfg; - u32 val; + u32 val, isr; + int ret; val = USART_CR1_TXEIE | USART_CR1_RXNEIE | USART_CR1_TE | USART_CR1_RE; val |= BIT(cfg->uart_enable_bit); if (stm32_port->fifoen) val |= USART_CR1_FIFOEN; + + ret = readl_relaxed_poll_timeout(port->membase + ofs->isr, + isr, (isr & USART_SR_TC), + 10, 100000); + + if (ret) + dev_err(port->dev, "transmission complete not set\n"); + stm32_clr_bits(port, ofs->cr1, val); - dev_pm_clear_wake_irq(port->dev); free_irq(port->irq, port); } @@ -569,14 +567,14 @@ static void stm32_set_termios(struct uart_port *port, struct ktermios *termios, if (termios->c_iflag & INPCK) port->read_status_mask |= USART_SR_PE | USART_SR_FE; if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK)) - port->read_status_mask |= USART_SR_LBD; + port->read_status_mask |= USART_SR_FE; /* Characters to ignore */ port->ignore_status_mask = 0; if (termios->c_iflag & IGNPAR) port->ignore_status_mask = USART_SR_PE | USART_SR_FE; if (termios->c_iflag & IGNBRK) { - port->ignore_status_mask |= USART_SR_LBD; + port->ignore_status_mask |= USART_SR_FE; /* * If we're ignoring parity and break indicators, * ignore overruns too (for real raw support). @@ -895,11 +893,18 @@ static int stm32_serial_probe(struct platform_device *pdev) ret = device_init_wakeup(&pdev->dev, true); if (ret) goto err_uninit; + + ret = dev_pm_set_dedicated_wake_irq(&pdev->dev, + stm32port->wakeirq); + if (ret) + goto err_nowup; + + device_set_wakeup_enable(&pdev->dev, false); } ret = uart_add_one_port(&stm32_usart_driver, &stm32port->port); if (ret) - goto err_nowup; + goto err_wirq; ret = stm32_of_dma_rx_probe(stm32port, pdev); if (ret) @@ -913,6 +918,10 @@ static int stm32_serial_probe(struct platform_device *pdev) return 0; +err_wirq: + if (stm32port->info->cfg.has_wakeup && stm32port->wakeirq >= 0) + dev_pm_clear_wake_irq(&pdev->dev); + err_nowup: if (stm32port->info->cfg.has_wakeup && stm32port->wakeirq >= 0) device_init_wakeup(&pdev->dev, false); @@ -950,8 +959,10 @@ static int stm32_serial_remove(struct platform_device *pdev) TX_BUF_L, stm32_port->tx_buf, stm32_port->tx_dma_buf); - if (cfg->has_wakeup && stm32_port->wakeirq >= 0) + if (cfg->has_wakeup && stm32_port->wakeirq >= 0) { + dev_pm_clear_wake_irq(&pdev->dev); device_init_wakeup(&pdev->dev, false); + } clk_disable_unprepare(stm32_port->clk); diff --git a/drivers/tty/serial/stm32-usart.h b/drivers/tty/serial/stm32-usart.h index ffc0c5285e51..9d087881913a 100644 --- a/drivers/tty/serial/stm32-usart.h +++ b/drivers/tty/serial/stm32-usart.h @@ -108,7 +108,6 @@ struct stm32_usart_info stm32h7_info = { #define USART_SR_RXNE BIT(5) #define USART_SR_TC BIT(6) #define USART_SR_TXE BIT(7) -#define USART_SR_LBD BIT(8) #define USART_SR_CTSIF BIT(9) #define USART_SR_CTS BIT(10) /* F7 */ #define USART_SR_RTOF BIT(11) /* F7 */ @@ -120,8 +119,7 @@ struct stm32_usart_info stm32h7_info = { #define USART_SR_SBKF BIT(18) /* F7 */ #define USART_SR_WUF BIT(20) /* H7 */ #define USART_SR_TEACK BIT(21) /* F7 */ -#define USART_SR_ERR_MASK (USART_SR_LBD | USART_SR_ORE | \ - USART_SR_FE | USART_SR_PE) +#define USART_SR_ERR_MASK (USART_SR_ORE | USART_SR_FE | USART_SR_PE) /* Dummy bits */ #define USART_SR_DUMMY_RX BIT(16) @@ -166,8 +164,6 @@ struct stm32_usart_info stm32h7_info = { /* USART_CR2 */ #define USART_CR2_ADD_MASK GENMASK(3, 0) /* F4 */ #define USART_CR2_ADDM7 BIT(4) /* F7 */ -#define USART_CR2_LBDL BIT(5) -#define USART_CR2_LBDIE BIT(6) #define USART_CR2_LBCL BIT(8) #define USART_CR2_CPHA BIT(9) #define USART_CR2_CPOL BIT(10) @@ -224,12 +220,10 @@ struct stm32_usart_info stm32h7_info = { /* USART_ICR */ #define USART_ICR_PECF BIT(0) /* F7 */ -#define USART_ICR_FFECF BIT(1) /* F7 */ -#define USART_ICR_NCF BIT(2) /* F7 */ +#define USART_ICR_FECF BIT(1) /* F7 */ #define USART_ICR_ORECF BIT(3) /* F7 */ #define USART_ICR_IDLECF BIT(4) /* F7 */ #define USART_ICR_TCCF BIT(6) /* F7 */ -#define USART_ICR_LBDCF BIT(8) /* F7 */ #define USART_ICR_CTSCF BIT(9) /* F7 */ #define USART_ICR_RTOCF BIT(11) /* F7 */ #define USART_ICR_EOBCF BIT(12) /* F7 */ diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index b0da63737aa1..81657f09761c 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -31,6 +31,7 @@ #include #include #include +#include #define CDNS_UART_TTY_NAME "ttyPS" #define CDNS_UART_NAME "xuartps" @@ -39,6 +40,7 @@ #define CDNS_UART_NR_PORTS 2 #define CDNS_UART_FIFO_SIZE 64 /* FIFO size */ #define CDNS_UART_REGISTER_SPACE 0x1000 +#define TX_TIMEOUT 500000 /* Rx Trigger level */ static int rx_trigger_level = 56; @@ -685,18 +687,21 @@ static void cdns_uart_set_termios(struct uart_port *port, unsigned int cval = 0; unsigned int baud, minbaud, maxbaud; unsigned long flags; - unsigned int ctrl_reg, mode_reg; - - spin_lock_irqsave(&port->lock, flags); + unsigned int ctrl_reg, mode_reg, val; + int err; /* Wait for the transmit FIFO to empty before making changes */ if (!(readl(port->membase + CDNS_UART_CR) & CDNS_UART_CR_TX_DIS)) { - while (!(readl(port->membase + CDNS_UART_SR) & - CDNS_UART_SR_TXEMPTY)) { - cpu_relax(); + err = readl_poll_timeout(port->membase + CDNS_UART_SR, + val, (val & CDNS_UART_SR_TXEMPTY), + 1000, TX_TIMEOUT); + if (err) { + dev_err(port->dev, "timed out waiting for tx empty"); + return; } } + spin_lock_irqsave(&port->lock, flags); /* Disable the TX and RX to set baud rate */ ctrl_reg = readl(port->membase + CDNS_UART_CR); @@ -1342,24 +1347,11 @@ static struct uart_driver cdns_uart_uart_driver = { static int cdns_uart_suspend(struct device *device) { struct uart_port *port = dev_get_drvdata(device); - struct tty_struct *tty; - struct device *tty_dev; - int may_wake = 0; + int may_wake; - /* Get the tty which could be NULL so don't assume it's valid */ - tty = tty_port_tty_get(&port->state->port); - if (tty) { - tty_dev = tty->dev; - may_wake = device_may_wakeup(tty_dev); - tty_kref_put(tty); - } + may_wake = device_may_wakeup(device); - /* - * Call the API provided in serial_core.c file which handles - * the suspend. - */ - uart_suspend_port(&cdns_uart_uart_driver, port); - if (!(console_suspend_enabled && !may_wake)) { + if (console_suspend_enabled && may_wake) { unsigned long flags = 0; spin_lock_irqsave(&port->lock, flags); @@ -1374,7 +1366,11 @@ static int cdns_uart_suspend(struct device *device) spin_unlock_irqrestore(&port->lock, flags); } - return 0; + /* + * Call the API provided in serial_core.c file which handles + * the suspend. + */ + return uart_suspend_port(&cdns_uart_uart_driver, port); } /** @@ -1388,17 +1384,9 @@ static int cdns_uart_resume(struct device *device) struct uart_port *port = dev_get_drvdata(device); unsigned long flags = 0; u32 ctrl_reg; - struct tty_struct *tty; - struct device *tty_dev; - int may_wake = 0; + int may_wake; - /* Get the tty which could be NULL so don't assume it's valid */ - tty = tty_port_tty_get(&port->state->port); - if (tty) { - tty_dev = tty->dev; - may_wake = device_may_wakeup(tty_dev); - tty_kref_put(tty); - } + may_wake = device_may_wakeup(device); if (console_suspend_enabled && !may_wake) { struct cdns_uart *cdns_uart = port->private_data; diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c index 636b8ae29b46..9d68f89a2bf8 100644 --- a/drivers/tty/synclink_gt.c +++ b/drivers/tty/synclink_gt.c @@ -1187,14 +1187,13 @@ static long slgt_compat_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct slgt_info *info = tty->driver_data; - int rc = -ENOIOCTLCMD; + int rc; if (sanity_check(info, tty->name, "compat_ioctl")) return -ENODEV; DBGINFO(("%s compat_ioctl() cmd=%08X\n", info->device_name, cmd)); switch (cmd) { - case MGSL_IOCSPARAMS32: rc = set_params32(info, compat_ptr(arg)); break; @@ -1214,18 +1213,11 @@ static long slgt_compat_ioctl(struct tty_struct *tty, case MGSL_IOCWAITGPIO: case MGSL_IOCGXSYNC: case MGSL_IOCGXCTRL: - case MGSL_IOCSTXIDLE: - case MGSL_IOCTXENABLE: - case MGSL_IOCRXENABLE: - case MGSL_IOCTXABORT: - case TIOCMIWAIT: - case MGSL_IOCSIF: - case MGSL_IOCSXSYNC: - case MGSL_IOCSXCTRL: - rc = ioctl(tty, cmd, arg); + rc = ioctl(tty, cmd, (unsigned long)compat_ptr(arg)); break; + default: + rc = ioctl(tty, cmd, arg); } - DBGINFO(("%s compat_ioctl() cmd=%08X rc=%d\n", info->device_name, cmd, rc)); return rc; } @@ -1357,10 +1349,10 @@ static void throttle(struct tty_struct * tty) DBGINFO(("%s throttle\n", info->device_name)); if (I_IXOFF(tty)) send_xchar(tty, STOP_CHAR(tty)); - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals &= ~SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1382,10 +1374,10 @@ static void unthrottle(struct tty_struct * tty) else send_xchar(tty, START_CHAR(tty)); } - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->signals |= SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -2583,8 +2575,8 @@ static void change_params(struct slgt_info *info) info->read_status_mask = IRQ_RXOVER; if (I_INPCK(info->port.tty)) info->read_status_mask |= MASK_PARITY | MASK_FRAMING; - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) - info->read_status_mask |= MASK_BREAK; + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) + info->read_status_mask |= MASK_BREAK; if (I_IGNPAR(info->port.tty)) info->ignore_status_mask |= MASK_PARITY | MASK_FRAMING; if (I_IGNBRK(info->port.tty)) { @@ -3215,7 +3207,7 @@ static int tiocmset(struct tty_struct *tty, info->signals &= ~SerialSignal_DTR; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); return 0; } @@ -3226,7 +3218,7 @@ static int carrier_raised(struct tty_port *port) struct slgt_info *info = container_of(port, struct slgt_info, port); spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); return (info->signals & SerialSignal_DCD) ? 1 : 0; } @@ -3241,7 +3233,7 @@ static void dtr_rts(struct tty_port *port, int on) info->signals |= SerialSignal_RTS | SerialSignal_DTR; else info->signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c index 4fed9e7b281f..3c9e314406b4 100644 --- a/drivers/tty/synclinkmp.c +++ b/drivers/tty/synclinkmp.c @@ -1467,10 +1467,10 @@ static void throttle(struct tty_struct * tty) if (I_IXOFF(tty)) send_xchar(tty, STOP_CHAR(tty)); - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->serial_signals &= ~SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -1496,10 +1496,10 @@ static void unthrottle(struct tty_struct * tty) send_xchar(tty, START_CHAR(tty)); } - if (C_CRTSCTS(tty)) { + if (C_CRTSCTS(tty)) { spin_lock_irqsave(&info->lock,flags); info->serial_signals |= SerialSignal_RTS; - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } } @@ -2484,7 +2484,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status ) if (status & SerialSignal_CTS) { if ( debug_level >= DEBUG_LEVEL_ISR ) printk("CTS tx start..."); - info->port.tty->hw_stopped = 0; + info->port.tty->hw_stopped = 0; tx_start(info); info->pending_bh |= BH_TRANSMIT; return; @@ -2493,7 +2493,7 @@ static void isr_io_pin( SLMP_INFO *info, u16 status ) if (!(status & SerialSignal_CTS)) { if ( debug_level >= DEBUG_LEVEL_ISR ) printk("CTS tx stop..."); - info->port.tty->hw_stopped = 1; + info->port.tty->hw_stopped = 1; tx_stop(info); } } @@ -2820,8 +2820,8 @@ static void change_params(SLMP_INFO *info) info->read_status_mask2 = OVRN; if (I_INPCK(info->port.tty)) info->read_status_mask2 |= PE | FRME; - if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) - info->read_status_mask1 |= BRKD; + if (I_BRKINT(info->port.tty) || I_PARMRK(info->port.tty)) + info->read_status_mask1 |= BRKD; if (I_IGNPAR(info->port.tty)) info->ignore_status_mask2 |= PE | FRME; if (I_IGNBRK(info->port.tty)) { @@ -3191,7 +3191,7 @@ static int tiocmget(struct tty_struct *tty) unsigned long flags; spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); result = ((info->serial_signals & SerialSignal_RTS) ? TIOCM_RTS : 0) | @@ -3229,7 +3229,7 @@ static int tiocmset(struct tty_struct *tty, info->serial_signals &= ~SerialSignal_DTR; spin_lock_irqsave(&info->lock,flags); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); return 0; @@ -3241,7 +3241,7 @@ static int carrier_raised(struct tty_port *port) unsigned long flags; spin_lock_irqsave(&info->lock,flags); - get_signals(info); + get_signals(info); spin_unlock_irqrestore(&info->lock,flags); return (info->serial_signals & SerialSignal_DCD) ? 1 : 0; @@ -3257,7 +3257,7 @@ static void dtr_rts(struct tty_port *port, int on) info->serial_signals |= SerialSignal_RTS | SerialSignal_DTR; else info->serial_signals &= ~(SerialSignal_RTS | SerialSignal_DTR); - set_signals(info); + set_signals(info); spin_unlock_irqrestore(&info->lock,flags); } diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 377b3592384e..4c716ddd6599 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -546,7 +546,6 @@ void __handle_sysrq(int key, bool check_mask) */ orig_log_level = console_loglevel; console_loglevel = CONSOLE_LOGLEVEL_DEFAULT; - pr_info("SysRq : "); op_p = __sysrq_get_key_op(key); if (op_p) { @@ -555,14 +554,15 @@ void __handle_sysrq(int key, bool check_mask) * should not) and is the invoked operation enabled? */ if (!check_mask || sysrq_on_mask(op_p->enable_mask)) { - pr_cont("%s\n", op_p->action_msg); + pr_info("%s\n", op_p->action_msg); console_loglevel = orig_log_level; op_p->handler(key); } else { - pr_cont("This sysrq operation is disabled.\n"); + pr_info("This sysrq operation is disabled.\n"); + console_loglevel = orig_log_level; } } else { - pr_cont("HELP : "); + pr_info("HELP : "); /* Only print the help msg once per handler */ for (i = 0; i < ARRAY_SIZE(sysrq_key_table); i++) { if (sysrq_key_table[i]) { diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 01fcdc7ff077..62dd2abb57fe 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -348,6 +348,11 @@ int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) { int ret; + /* Kindly asking blocked readers to release the read side */ + set_bit(TTY_LDISC_CHANGING, &tty->flags); + wake_up_interruptible_all(&tty->read_wait); + wake_up_interruptible_all(&tty->write_wait); + ret = __tty_ldisc_lock(tty, timeout); if (!ret) return -EBUSY; @@ -358,6 +363,8 @@ int tty_ldisc_lock(struct tty_struct *tty, unsigned long timeout) void tty_ldisc_unlock(struct tty_struct *tty) { clear_bit(TTY_LDISC_HALTED, &tty->flags); + /* Can be cleared here - ldisc_unlock will wake up writers firstly */ + clear_bit(TTY_LDISC_CHANGING, &tty->flags); __tty_ldisc_unlock(tty); } diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index c93a33701d32..dd12c3b86eb4 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -51,10 +51,11 @@ static void tty_port_default_wakeup(struct tty_port *port) } } -static const struct tty_port_client_operations default_client_ops = { +const struct tty_port_client_operations tty_port_default_client_ops = { .receive_buf = tty_port_default_receive_buf, .write_wakeup = tty_port_default_wakeup, }; +EXPORT_SYMBOL_GPL(tty_port_default_client_ops); void tty_port_init(struct tty_port *port) { @@ -67,7 +68,7 @@ void tty_port_init(struct tty_port *port) spin_lock_init(&port->lock); port->close_delay = (50 * HZ) / 100; port->closing_wait = (3000 * HZ) / 100; - port->client_ops = &default_client_ops; + port->client_ops = &tty_port_default_client_ops; kref_init(&port->kref); } EXPORT_SYMBOL(tty_port_init); diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 7506bbcf8259..b9ec4e2828e2 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -1460,7 +1460,7 @@ static void kbd_event(struct input_handle *handle, unsigned int event_type, if (event_type == EV_MSC && event_code == MSC_RAW && HW_RAW(handle->dev)) kbd_rawcode(value); - if (event_type == EV_KEY) + if (event_type == EV_KEY && event_code <= KEY_MAX) kbd_keycode(event_code, value, HW_RAW(handle->dev)); spin_unlock(&kbd_event_lock); diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 7a4c8022c023..91ffe3f2b8a0 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -27,6 +28,8 @@ #include #include +#include + /* Don't take this from : 011-015 on the screen aren't spaces */ #define isspace(c) ((c) == ' ') @@ -41,6 +44,7 @@ static volatile int sel_start = -1; /* cleared by clear_selection */ static int sel_end; static int sel_buffer_lth; static char *sel_buffer; +static DEFINE_MUTEX(sel_lock); /* clear_selection, highlight and highlight_pointer can be called from interrupt (via scrollback/front) */ @@ -153,14 +157,14 @@ static int store_utf8(u16 c, char *p) * The entire selection process is managed under the console_lock. It's * a lot under the lock but its hardly a performance path */ -int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty) +static int __set_selection(const struct tiocl_selection __user *sel, struct tty_struct *tty) { struct vc_data *vc = vc_cons[fg_console].d; int sel_mode, new_sel_start, new_sel_end, spc; char *bp, *obp; int i, ps, pe, multiplier; u16 c; - int mode; + int mode, ret = 0; poke_blanked_console(); @@ -321,7 +325,21 @@ int set_selection(const struct tiocl_selection __user *sel, struct tty_struct *t } } sel_buffer_lth = bp - sel_buffer; - return 0; + + return ret; +} + +int set_selection(const struct tiocl_selection __user *v, struct tty_struct *tty) +{ + int ret; + + mutex_lock(&sel_lock); + console_lock(); + ret = __set_selection(v, tty); + console_unlock(); + mutex_unlock(&sel_lock); + + return ret; } /* Insert the contents of the selection buffer into the @@ -338,6 +356,7 @@ int paste_selection(struct tty_struct *tty) unsigned int count; struct tty_ldisc *ld; DECLARE_WAITQUEUE(wait, current); + int ret = 0; console_lock(); poke_blanked_console(); @@ -349,10 +368,17 @@ int paste_selection(struct tty_struct *tty) tty_buffer_lock_exclusive(&vc->port); add_wait_queue(&vc->paste_wait, &wait); + mutex_lock(&sel_lock); while (sel_buffer && sel_buffer_lth > pasted) { set_current_state(TASK_INTERRUPTIBLE); + if (signal_pending(current)) { + ret = -EINTR; + break; + } if (tty_throttled(tty)) { + mutex_unlock(&sel_lock); schedule(); + mutex_lock(&sel_lock); continue; } __set_current_state(TASK_RUNNING); @@ -361,10 +387,11 @@ int paste_selection(struct tty_struct *tty) count); pasted += count; } + mutex_unlock(&sel_lock); remove_wait_queue(&vc->paste_wait, &wait); __set_current_state(TASK_RUNNING); tty_buffer_unlock_exclusive(&vc->port); tty_ldisc_deref(ld); - return 0; + return ret; } diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 06761fcedeff..826433af4bdd 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2688,9 +2688,7 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) switch (type) { case TIOCL_SETSEL: - console_lock(); ret = set_selection((struct tiocl_selection __user *)(p+1), tty); - console_unlock(); break; case TIOCL_PASTESEL: ret = paste_selection(tty); diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 7b34b0ddbf0e..c320fefab360 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -847,58 +847,49 @@ int vt_ioctl(struct tty_struct *tty, case VT_RESIZEX: { - struct vt_consize __user *vtconsize = up; - ushort ll,cc,vlin,clin,vcol,ccol; + struct vt_consize v; if (!perm) return -EPERM; - if (!access_ok(VERIFY_READ, vtconsize, - sizeof(struct vt_consize))) { - ret = -EFAULT; - break; - } + if (copy_from_user(&v, up, sizeof(struct vt_consize))) + return -EFAULT; /* FIXME: Should check the copies properly */ - __get_user(ll, &vtconsize->v_rows); - __get_user(cc, &vtconsize->v_cols); - __get_user(vlin, &vtconsize->v_vlin); - __get_user(clin, &vtconsize->v_clin); - __get_user(vcol, &vtconsize->v_vcol); - __get_user(ccol, &vtconsize->v_ccol); - vlin = vlin ? vlin : vc->vc_scan_lines; - if (clin) { - if (ll) { - if (ll != vlin/clin) { - /* Parameters don't add up */ - ret = -EINVAL; - break; - } - } else - ll = vlin/clin; + if (!v.v_vlin) + v.v_vlin = vc->vc_scan_lines; + if (v.v_clin) { + int rows = v.v_vlin/v.v_clin; + if (v.v_rows != rows) { + if (v.v_rows) /* Parameters don't add up */ + return -EINVAL; + v.v_rows = rows; + } } - if (vcol && ccol) { - if (cc) { - if (cc != vcol/ccol) { - ret = -EINVAL; - break; - } - } else - cc = vcol/ccol; + if (v.v_vcol && v.v_ccol) { + int cols = v.v_vcol/v.v_ccol; + if (v.v_cols != cols) { + if (v.v_cols) + return -EINVAL; + v.v_cols = cols; + } } - if (clin > 32) { - ret = -EINVAL; - break; - } - + if (v.v_clin > 32) + return -EINVAL; + for (i = 0; i < MAX_NR_CONSOLES; i++) { + struct vc_data *vcp; + if (!vc_cons[i].d) continue; console_lock(); - if (vlin) - vc_cons[i].d->vc_scan_lines = vlin; - if (clin) - vc_cons[i].d->vc_font.height = clin; - vc_cons[i].d->vc_resize_user = 1; - vc_resize(vc_cons[i].d, cc, ll); + vcp = vc_cons[i].d; + if (vcp) { + if (v.v_vlin) + vcp->vc_scan_lines = v.v_vlin; + if (v.v_clin) + vcp->vc_font.height = v.v_clin; + vcp->vc_resize_user = 1; + vc_resize(vcp, v.v_cols, v.v_rows); + } console_unlock(); } break; diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index fb5c9701b1fb..7c18536a3742 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -939,9 +939,12 @@ int __uio_register_device(struct module *owner, atomic_set(&idev->event, 0); ret = uio_get_minor(idev); - if (ret) + if (ret) { + kfree(idev); return ret; + } + device_initialize(&idev->dev); idev->dev.devt = MKDEV(uio_major, idev->minor); idev->dev.class = &uio_class; idev->dev.parent = parent; @@ -952,7 +955,7 @@ int __uio_register_device(struct module *owner, if (ret) goto err_device_create; - ret = device_register(&idev->dev); + ret = device_add(&idev->dev); if (ret) goto err_device_create; @@ -984,9 +987,10 @@ int __uio_register_device(struct module *owner, err_request_irq: uio_dev_del_attributes(idev); err_uio_dev_add_attributes: - device_unregister(&idev->dev); + device_del(&idev->dev); err_device_create: uio_free_minor(idev); + put_device(&idev->dev); return ret; } EXPORT_SYMBOL_GPL(__uio_register_device); diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c index e1134a4d97f3..a00b4aee6c79 100644 --- a/drivers/uio/uio_dmem_genirq.c +++ b/drivers/uio/uio_dmem_genirq.c @@ -135,11 +135,13 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on) if (irq_on) { if (test_and_clear_bit(0, &priv->flags)) enable_irq(dev_info->irq); + spin_unlock_irqrestore(&priv->lock, flags); } else { - if (!test_and_set_bit(0, &priv->flags)) + if (!test_and_set_bit(0, &priv->flags)) { + spin_unlock_irqrestore(&priv->lock, flags); disable_irq(dev_info->irq); + } } - spin_unlock_irqrestore(&priv->lock, flags); return 0; } diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c index ba7616395db2..f649b7b83200 100644 --- a/drivers/usb/atm/ueagle-atm.c +++ b/drivers/usb/atm/ueagle-atm.c @@ -2167,10 +2167,11 @@ resubmit: /* * Start the modem : init the data and start kernel thread */ -static int uea_boot(struct uea_softc *sc) +static int uea_boot(struct uea_softc *sc, struct usb_interface *intf) { - int ret, size; struct intr_pkt *intr; + int ret = -ENOMEM; + int size; uea_enters(INS_TO_USBDEV(sc)); @@ -2195,6 +2196,11 @@ static int uea_boot(struct uea_softc *sc) if (UEA_CHIP_VERSION(sc) == ADI930) load_XILINX_firmware(sc); + if (intf->cur_altsetting->desc.bNumEndpoints < 1) { + ret = -ENODEV; + goto err0; + } + intr = kmalloc(size, GFP_KERNEL); if (!intr) goto err0; @@ -2206,8 +2212,7 @@ static int uea_boot(struct uea_softc *sc) usb_fill_int_urb(sc->urb_int, sc->usb_dev, usb_rcvintpipe(sc->usb_dev, UEA_INTR_PIPE), intr, size, uea_intr, sc, - sc->usb_dev->actconfig->interface[0]->altsetting[0]. - endpoint[0].desc.bInterval); + intf->cur_altsetting->endpoint[0].desc.bInterval); ret = usb_submit_urb(sc->urb_int, GFP_KERNEL); if (ret < 0) { @@ -2222,6 +2227,7 @@ static int uea_boot(struct uea_softc *sc) sc->kthread = kthread_create(uea_kthread, sc, "ueagle-atm"); if (IS_ERR(sc->kthread)) { uea_err(INS_TO_USBDEV(sc), "failed to create thread\n"); + ret = PTR_ERR(sc->kthread); goto err2; } @@ -2236,7 +2242,7 @@ err1: kfree(intr); err0: uea_leaves(INS_TO_USBDEV(sc)); - return -ENOMEM; + return ret; } /* @@ -2597,7 +2603,7 @@ static int uea_bind(struct usbatm_data *usbatm, struct usb_interface *intf, if (ret < 0) goto error; - ret = uea_boot(sc); + ret = uea_boot(sc, intf); if (ret < 0) goto error_rm_grp; diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c index 18cb8e46262d..83683a5627f3 100644 --- a/drivers/usb/chipidea/host.c +++ b/drivers/usb/chipidea/host.c @@ -37,6 +37,7 @@ static int (*orig_bus_suspend)(struct usb_hcd *hcd); struct ehci_ci_priv { struct regulator *reg_vbus; + bool enabled; }; static int ehci_ci_portpower(struct usb_hcd *hcd, int portnum, bool enable) @@ -48,7 +49,7 @@ static int ehci_ci_portpower(struct usb_hcd *hcd, int portnum, bool enable) int ret = 0; int port = HCS_N_PORTS(ehci->hcs_params); - if (priv->reg_vbus) { + if (priv->reg_vbus && enable != priv->enabled) { if (port > 1) { dev_warn(dev, "Not support multi-port regulator control\n"); @@ -64,6 +65,7 @@ static int ehci_ci_portpower(struct usb_hcd *hcd, int portnum, bool enable) enable ? "enable" : "disable", ret); return ret; } + priv->enabled = enable; } if (enable && (ci->platdata->phy_mode == USBPHY_INTERFACE_MODE_HSIC)) { diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c index 10236fe71522..8bf4032226ed 100644 --- a/drivers/usb/chipidea/otg.c +++ b/drivers/usb/chipidea/otg.c @@ -206,14 +206,17 @@ static void ci_otg_work(struct work_struct *work) } pm_runtime_get_sync(ci->dev); + if (ci->id_event) { ci->id_event = false; ci_handle_id_switch(ci); - } else if (ci->b_sess_valid_event) { + } + + if (ci->b_sess_valid_event) { ci->b_sess_valid_event = false; ci_handle_vbus_change(ci); - } else - dev_err(ci->dev, "unexpected event occurs at %s\n", __func__); + } + pm_runtime_put_sync(ci->dev); enable_irq(ci->irq); diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c index 9f4a0185dd60..b7477fd4443a 100644 --- a/drivers/usb/chipidea/usbmisc_imx.c +++ b/drivers/usb/chipidea/usbmisc_imx.c @@ -343,6 +343,8 @@ static int usbmisc_imx6q_init(struct imx_usbmisc_data *data) } else if (data->oc_polarity == 1) { /* High active */ reg &= ~(MX6_BM_OVER_CUR_DIS | MX6_BM_OVER_CUR_POLARITY); + } else { + reg &= ~(MX6_BM_OVER_CUR_DIS); } writel(reg, usbmisc->base + data->index * 4); diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index a593cdfc897f..d5d42dccda10 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -1085,7 +1085,7 @@ static int wdm_post_reset(struct usb_interface *intf) rv = recover_from_urb_loss(desc); mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); - return 0; + return rv; } static struct usb_driver wdm_driver = { diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 50836f79f908..5e456a83779d 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -458,6 +458,7 @@ static void usblp_cleanup(struct usblp *usblp) kfree(usblp->readbuf); kfree(usblp->device_id_string); kfree(usblp->statusbuf); + usb_put_intf(usblp->intf); kfree(usblp); } @@ -1120,7 +1121,7 @@ static int usblp_probe(struct usb_interface *intf, init_waitqueue_head(&usblp->wwait); init_usb_anchor(&usblp->urbs); usblp->ifnum = intf->cur_altsetting->desc.bInterfaceNumber; - usblp->intf = intf; + usblp->intf = usb_get_intf(intf); /* Malloc device ID string buffer to the largest expected length, * since we can re-query it on an ioctl and a dynamic string @@ -1209,6 +1210,7 @@ abort: kfree(usblp->readbuf); kfree(usblp->statusbuf); kfree(usblp->device_id_string); + usb_put_intf(usblp->intf); kfree(usblp); abort_ret: return retval; diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index d03d0e46b121..7df7faa3eed5 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -203,9 +203,58 @@ static const unsigned short super_speed_maxpacket_maxes[4] = { [USB_ENDPOINT_XFER_INT] = 1024, }; -static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, - int asnum, struct usb_host_interface *ifp, int num_ep, - unsigned char *buffer, int size) +static bool endpoint_is_duplicate(struct usb_endpoint_descriptor *e1, + struct usb_endpoint_descriptor *e2) +{ + if (e1->bEndpointAddress == e2->bEndpointAddress) + return true; + + if (usb_endpoint_xfer_control(e1) || usb_endpoint_xfer_control(e2)) { + if (usb_endpoint_num(e1) == usb_endpoint_num(e2)) + return true; + } + + return false; +} + +/* + * Check for duplicate endpoint addresses in other interfaces and in the + * altsetting currently being parsed. + */ +static bool config_endpoint_is_duplicate(struct usb_host_config *config, + int inum, int asnum, struct usb_endpoint_descriptor *d) +{ + struct usb_endpoint_descriptor *epd; + struct usb_interface_cache *intfc; + struct usb_host_interface *alt; + int i, j, k; + + for (i = 0; i < config->desc.bNumInterfaces; ++i) { + intfc = config->intf_cache[i]; + + for (j = 0; j < intfc->num_altsetting; ++j) { + alt = &intfc->altsetting[j]; + + if (alt->desc.bInterfaceNumber == inum && + alt->desc.bAlternateSetting != asnum) + continue; + + for (k = 0; k < alt->desc.bNumEndpoints; ++k) { + epd = &alt->endpoint[k].desc; + + if (endpoint_is_duplicate(epd, d)) + return true; + } + } + } + + return false; +} + +static int usb_parse_endpoint(struct device *ddev, int cfgno, + struct usb_host_config *config, int inum, int asnum, + struct usb_host_interface *ifp, int num_ep, + unsigned char *buffer, int size) { unsigned char *buffer0 = buffer; struct usb_endpoint_descriptor *d; @@ -242,13 +291,10 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, goto skip_to_next_endpoint_or_interface_descriptor; /* Check for duplicate endpoint addresses */ - for (i = 0; i < ifp->desc.bNumEndpoints; ++i) { - if (ifp->endpoint[i].desc.bEndpointAddress == - d->bEndpointAddress) { - dev_warn(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n", - cfgno, inum, asnum, d->bEndpointAddress); - goto skip_to_next_endpoint_or_interface_descriptor; - } + if (config_endpoint_is_duplicate(config, inum, asnum, d)) { + dev_warn(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n", + cfgno, inum, asnum, d->bEndpointAddress); + goto skip_to_next_endpoint_or_interface_descriptor; } endpoint = &ifp->endpoint[ifp->desc.bNumEndpoints]; @@ -346,8 +392,17 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, endpoint->desc.wMaxPacketSize = cpu_to_le16(8); } - /* Validate the wMaxPacketSize field */ + /* + * Validate the wMaxPacketSize field. + * Some devices have isochronous endpoints in altsetting 0; + * the USB-2 spec requires such endpoints to have wMaxPacketSize = 0 + * (see the end of section 5.6.3), so don't warn about them. + */ maxp = usb_endpoint_maxp(&endpoint->desc); + if (maxp == 0 && !(usb_endpoint_xfer_isoc(d) && asnum == 0)) { + dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid wMaxPacketSize 0\n", + cfgno, inum, asnum, d->bEndpointAddress); + } /* Find the highest legal maxpacket size for this endpoint */ i = 0; /* additional transactions per microframe */ @@ -517,8 +572,8 @@ static int usb_parse_interface(struct device *ddev, int cfgno, if (((struct usb_descriptor_header *) buffer)->bDescriptorType == USB_DT_INTERFACE) break; - retval = usb_parse_endpoint(ddev, cfgno, inum, asnum, alt, - num_ep, buffer, size); + retval = usb_parse_endpoint(ddev, cfgno, config, inum, asnum, + alt, num_ep, buffer, size); if (retval < 0) return retval; ++n; diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index ae4fad13a442..b26df8102e1c 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -756,8 +756,15 @@ static int claimintf(struct usb_dev_state *ps, unsigned int ifnum) intf = usb_ifnum_to_if(dev, ifnum); if (!intf) err = -ENOENT; - else + else { + unsigned int old_suppress; + + /* suppress uevents while claiming interface */ + old_suppress = dev_get_uevent_suppress(&intf->dev); + dev_set_uevent_suppress(&intf->dev, 1); err = usb_driver_claim_interface(&usbfs_driver, intf, ps); + dev_set_uevent_suppress(&intf->dev, old_suppress); + } if (err == 0) set_bit(ifnum, &ps->ifclaimed); return err; @@ -777,7 +784,13 @@ static int releaseintf(struct usb_dev_state *ps, unsigned int ifnum) if (!intf) err = -ENOENT; else if (test_and_clear_bit(ifnum, &ps->ifclaimed)) { + unsigned int old_suppress; + + /* suppress uevents while releasing interface */ + old_suppress = dev_get_uevent_suppress(&intf->dev); + dev_set_uevent_suppress(&intf->dev, 1); usb_driver_release_interface(&usbfs_driver, intf); + dev_set_uevent_suppress(&intf->dev, old_suppress); err = 0; } return err; diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index b543a4730ef2..6f595960038a 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -36,7 +37,9 @@ #include "otg_whitelist.h" #define USB_VENDOR_GENESYS_LOGIC 0x05e3 +#define USB_VENDOR_SMSC 0x0424 #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND 0x01 +#define HUB_QUIRK_DISABLE_AUTOSUSPEND 0x02 /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can @@ -104,6 +107,8 @@ EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); static void hub_release(struct kref *kref); static int usb_reset_and_verify_device(struct usb_device *udev); static int hub_port_disable(struct usb_hub *hub, int port1, int set_state); +static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1, + u16 portstatus); static inline char *portspeed(struct usb_hub *hub, int portstatus) { @@ -954,13 +959,17 @@ int usb_remove_device(struct usb_device *udev) { struct usb_hub *hub; struct usb_interface *intf; + int ret; if (!udev->parent) /* Can't remove a root hub */ return -EINVAL; hub = usb_hub_to_struct_hub(udev->parent); intf = to_usb_interface(hub->intfdev); - usb_autopm_get_interface(intf); + ret = usb_autopm_get_interface(intf); + if (ret < 0) + return ret; + set_bit(udev->portnum, hub->removed_bits); hub_port_logical_disconnect(hub, udev->portnum); usb_autopm_put_interface(intf); @@ -1110,6 +1119,11 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) USB_PORT_FEAT_ENABLE); } + /* Make sure a warm-reset request is handled by port_event */ + if (type == HUB_RESUME && + hub_port_warm_reset_required(hub, port1, portstatus)) + set_bit(port1, hub->event_bits); + /* * Add debounce if USB3 link is in polling/link training state. * Link will automatically transition to Enabled state after @@ -1157,6 +1171,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) * PORT_OVER_CURRENT is not. So check for any of them. */ if (udev || (portstatus & USB_PORT_STAT_CONNECTION) || + (portchange & USB_PORT_STAT_C_CONNECTION) || (portstatus & USB_PORT_STAT_OVERCURRENT) || (portchange & USB_PORT_STAT_C_OVERCURRENT)) set_bit(port1, hub->change_bits); @@ -1181,11 +1196,6 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) #ifdef CONFIG_PM udev->reset_resume = 1; #endif - /* Don't set the change_bits when the device - * was powered off. - */ - if (test_bit(port1, hub->power_bits)) - set_bit(port1, hub->change_bits); } else { /* The power session is gone; tell hub_wq */ @@ -1677,6 +1687,10 @@ static void hub_disconnect(struct usb_interface *intf) kfree(hub->buffer); pm_suspend_ignore_children(&intf->dev, false); + + if (hub->quirk_disable_autosuspend) + usb_autopm_put_interface(intf); + kref_put(&hub->kref, hub_release); } @@ -1807,6 +1821,11 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND) hub->quirk_check_port_auto_suspend = 1; + if (id->driver_info & HUB_QUIRK_DISABLE_AUTOSUSPEND) { + hub->quirk_disable_autosuspend = 1; + usb_autopm_get_interface_no_resume(intf); + } + if (hub_configure(hub, &desc->endpoint[0].desc) >= 0) return 0; @@ -5176,6 +5195,8 @@ static void hub_event(struct work_struct *work) hub_dev = hub->intfdev; intf = to_usb_interface(hub_dev); + kcov_remote_start_usb((u64)hdev->bus->busnum); + dev_dbg(hub_dev, "state %d ports %d chg %04x evt %04x\n", hdev->state, hdev->maxchild, /* NOTE: expects max 15 ports... */ @@ -5282,9 +5303,15 @@ out_hdev_lock: /* Balance the stuff in kick_hub_wq() and allow autosuspend */ usb_autopm_put_interface(intf); kref_put(&hub->kref, hub_release); + + kcov_remote_stop(); } static const struct usb_device_id hub_id_table[] = { + { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS, + .idVendor = USB_VENDOR_SMSC, + .bInterfaceClass = USB_CLASS_HUB, + .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS, .idVendor = USB_VENDOR_GENESYS_LOGIC, @@ -5623,7 +5650,7 @@ re_enumerate_no_bos: /** * usb_reset_device - warn interface drivers and perform a USB port reset - * @udev: device to reset (not in SUSPENDED or NOTATTACHED state) + * @udev: device to reset (not in NOTATTACHED state) * * Warns all drivers bound to registered interfaces (using their pre_reset * method), performs the port reset, and then lets the drivers know that @@ -5651,8 +5678,7 @@ int usb_reset_device(struct usb_device *udev) struct usb_host_config *config = udev->actconfig; struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); - if (udev->state == USB_STATE_NOTATTACHED || - udev->state == USB_STATE_SUSPENDED) { + if (udev->state == USB_STATE_NOTATTACHED) { dev_dbg(&udev->dev, "device reset not allowed in state %d\n", udev->state); return -EINVAL; diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h index 34c1a7e22aae..657bacfbe3a7 100644 --- a/drivers/usb/core/hub.h +++ b/drivers/usb/core/hub.h @@ -69,6 +69,7 @@ struct usb_hub { unsigned quiescing:1; unsigned disconnected:1; unsigned in_reset:1; + unsigned quirk_disable_autosuspend:1; unsigned quirk_check_port_auto_suspend:1; diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index 460c855be0d0..53c1f6e604b1 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -179,7 +179,10 @@ static int usb_port_runtime_resume(struct device *dev) if (!port_dev->is_superspeed && peer) pm_runtime_get_sync(&peer->dev); - usb_autopm_get_interface(intf); + retval = usb_autopm_get_interface(intf); + if (retval < 0) + return retval; + retval = usb_hub_set_port_power(hdev, hub, port1, true); msleep(hub_power_on_good_delay(hub)); if (udev && !retval) { @@ -232,7 +235,10 @@ static int usb_port_runtime_suspend(struct device *dev) if (usb_port_block_power_off) return -EBUSY; - usb_autopm_get_interface(intf); + retval = usb_autopm_get_interface(intf); + if (retval < 0) + return retval; + retval = usb_hub_set_port_power(hdev, hub, port1, false); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION); if (!port_dev->is_superspeed) diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 19e819aa2419..64c03e871f2d 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -86,6 +86,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* Logitech PTZ Pro Camera */ { USB_DEVICE(0x046d, 0x0853), .driver_info = USB_QUIRK_DELAY_INIT }, + /* Logitech Screen Share */ + { USB_DEVICE(0x046d, 0x086c), .driver_info = USB_QUIRK_NO_LPM }, + /* Logitech Quickcam Fusion */ { USB_DEVICE(0x046d, 0x08c1), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -291,6 +294,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, + /* novation SoundControl XL */ + { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME }, + { } /* terminating entry must be last */ }; diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index 8b800e34407b..83bd48734af5 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -45,6 +45,7 @@ void usb_init_urb(struct urb *urb) if (urb) { memset(urb, 0, sizeof(*urb)); kref_init(&urb->kref); + INIT_LIST_HEAD(&urb->urb_list); INIT_LIST_HEAD(&urb->anchor_list); } } diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index e164439b2154..aeb6f7c84ea0 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -2276,6 +2276,7 @@ static unsigned int dwc2_gadget_get_xfersize_ddma(struct dwc2_hsotg_ep *hs_ep) if (status & DEV_DMA_STS_MASK) dev_err(hsotg->dev, "descriptor %d closed with %x\n", i, status & DEV_DMA_STS_MASK); + desc++; } return bytes_rem; @@ -3932,11 +3933,12 @@ static int dwc2_hsotg_ep_enable(struct usb_ep *ep, * a unique tx-fifo even if it is non-periodic. */ if (dir_in && hsotg->dedicated_fifos) { + unsigned fifo_count = dwc2_hsotg_tx_fifo_count(hsotg); u32 fifo_index = 0; u32 fifo_size = UINT_MAX; size = hs_ep->ep.maxpacket * hs_ep->mc; - for (i = 1; i < hsotg->num_of_eps; ++i) { + for (i = 1; i <= fifo_count; ++i) { if (hsotg->fifo_map & (1 << i)) continue; val = dwc2_readl(hsotg->regs + DPTXFSIZN(i)); diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 945330ea8d5c..021899c58028 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -282,8 +282,7 @@ static void dwc3_frame_length_adjustment(struct dwc3 *dwc) reg = dwc3_readl(dwc->regs, DWC3_GFLADJ); dft = reg & DWC3_GFLADJ_30MHZ_MASK; - if (!dev_WARN_ONCE(dwc->dev, dft == dwc->fladj, - "request value same as default, ignoring\n")) { + if (dft != dwc->fladj) { reg &= ~DWC3_GFLADJ_30MHZ_MASK; reg |= DWC3_GFLADJ_30MHZ_SDBND_SEL | dwc->fladj; dwc3_writel(dwc->regs, DWC3_GFLADJ, reg); @@ -1032,6 +1031,9 @@ static void dwc3_core_exit_mode(struct dwc3 *dwc) /* do nothing */ break; } + + /* de-assert DRVVBUS for HOST and OTG mode */ + dwc3_set_prtcap(dwc, DWC3_GCTL_PRTCAP_DEVICE); } static void dwc3_get_properties(struct dwc3 *dwc) @@ -1115,6 +1117,9 @@ static void dwc3_get_properties(struct dwc3 *dwc) device_property_read_u32(dev, "snps,quirk-frame-length-adjustment", &dwc->fladj); + dwc->dis_metastability_quirk = device_property_read_bool(dev, + "snps,dis_metastability_quirk"); + dwc->lpm_nyet_threshold = lpm_nyet_threshold; dwc->tx_de_emphasis = tx_de_emphasis; @@ -1259,7 +1264,8 @@ static int dwc3_probe(struct platform_device *pdev) ret = dwc3_core_init(dwc); if (ret) { - dev_err(dev, "failed to initialize core\n"); + if (ret != -EPROBE_DEFER) + dev_err(dev, "failed to initialize core: %d\n", ret); goto err4; } diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index abd1142c9e4d..40bf0e0768d9 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -869,6 +869,7 @@ struct dwc3_scratchpad_array { * 1 - -3.5dB de-emphasis * 2 - No de-emphasis * 3 - Reserved + * @dis_metastability_quirk: set to disable metastability quirk. * @imod_interval: set the interrupt moderation interval in 250ns * increments or 0 to disable. */ @@ -1025,6 +1026,8 @@ struct dwc3 { unsigned tx_de_emphasis_quirk:1; unsigned tx_de_emphasis:2; + unsigned dis_metastability_quirk:1; + u16 imod_interval; }; diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h index 5e9c070ec874..1b4c2f8bb3da 100644 --- a/drivers/usb/dwc3/debug.h +++ b/drivers/usb/dwc3/debug.h @@ -124,6 +124,35 @@ dwc3_gadget_link_string(enum dwc3_link_state link_state) } } +/** + * dwc3_gadget_hs_link_string - returns highspeed and below link name + * @link_state: link state code + */ +static inline const char * +dwc3_gadget_hs_link_string(enum dwc3_link_state link_state) +{ + switch (link_state) { + case DWC3_LINK_STATE_U0: + return "On"; + case DWC3_LINK_STATE_U2: + return "Sleep"; + case DWC3_LINK_STATE_U3: + return "Suspend"; + case DWC3_LINK_STATE_SS_DIS: + return "Disconnected"; + case DWC3_LINK_STATE_RX_DET: + return "Early Suspend"; + case DWC3_LINK_STATE_RECOV: + return "Recovery"; + case DWC3_LINK_STATE_RESET: + return "Reset"; + case DWC3_LINK_STATE_RESUME: + return "Resume"; + default: + return "UNKNOWN link state\n"; + } +} + /** * dwc3_trb_type_string - returns TRB type as a string * @type: the type of the TRB diff --git a/drivers/usb/dwc3/debugfs.c b/drivers/usb/dwc3/debugfs.c index 4e09be80e59f..0d6a6a168a7e 100644 --- a/drivers/usb/dwc3/debugfs.c +++ b/drivers/usb/dwc3/debugfs.c @@ -436,13 +436,17 @@ static int dwc3_link_state_show(struct seq_file *s, void *unused) unsigned long flags; enum dwc3_link_state state; u32 reg; + u8 speed; spin_lock_irqsave(&dwc->lock, flags); reg = dwc3_readl(dwc->regs, DWC3_DSTS); state = DWC3_DSTS_USBLNKST(reg); - spin_unlock_irqrestore(&dwc->lock, flags); + speed = reg & DWC3_DSTS_CONNECTSPD; - seq_printf(s, "%s\n", dwc3_gadget_link_string(state)); + seq_printf(s, "%s\n", (speed >= DWC3_DSTS_SUPERSPEED) ? + dwc3_gadget_link_string(state) : + dwc3_gadget_hs_link_string(state)); + spin_unlock_irqrestore(&dwc->lock, flags); return 0; } @@ -460,6 +464,8 @@ static ssize_t dwc3_link_state_write(struct file *file, unsigned long flags; enum dwc3_link_state state = 0; char buf[32]; + u32 reg; + u8 speed; if (copy_from_user(&buf, ubuf, min_t(size_t, sizeof(buf) - 1, count))) return -EFAULT; @@ -480,6 +486,15 @@ static ssize_t dwc3_link_state_write(struct file *file, return -EINVAL; spin_lock_irqsave(&dwc->lock, flags); + reg = dwc3_readl(dwc->regs, DWC3_DSTS); + speed = reg & DWC3_DSTS_CONNECTSPD; + + if (speed < DWC3_DSTS_SUPERSPEED && + state != DWC3_LINK_STATE_RECOV) { + spin_unlock_irqrestore(&dwc->lock, flags); + return -EINVAL; + } + dwc3_gadget_set_link_state(dwc, state); spin_unlock_irqrestore(&dwc->lock, flags); diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 89fe53c846ef..cb50806d2459 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -1147,6 +1147,9 @@ static void dwc3_ep0_xfernotready(struct dwc3 *dwc, void dwc3_ep0_interrupt(struct dwc3 *dwc, const struct dwc3_event_depevt *event) { + struct dwc3_ep *dep = dwc->eps[event->endpoint_number]; + u8 cmd; + switch (event->endpoint_event) { case DWC3_DEPEVT_XFERCOMPLETE: dwc3_ep0_xfer_complete(dwc, event); @@ -1159,7 +1162,12 @@ void dwc3_ep0_interrupt(struct dwc3 *dwc, case DWC3_DEPEVT_XFERINPROGRESS: case DWC3_DEPEVT_RXTXFIFOEVT: case DWC3_DEPEVT_STREAMEVT: + break; case DWC3_DEPEVT_EPCMDCMPLT: + cmd = DEPEVT_PARAMETER_CMD(event->parameters); + + if (cmd == DWC3_DEPCMD_ENDTRANSFER) + dep->flags &= ~DWC3_EP_TRANSFER_STARTED; break; } } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 1b99d44e52b9..e96b22d6fa52 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -277,27 +277,36 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd, const struct usb_endpoint_descriptor *desc = dep->endpoint.desc; struct dwc3 *dwc = dep->dwc; u32 timeout = 1000; + u32 saved_config = 0; u32 reg; int cmd_status = 0; - int susphy = false; int ret = -EINVAL; /* - * Synopsys Databook 2.60a states, on section 6.3.2.5.[1-8], that if - * we're issuing an endpoint command, we must check if - * GUSB2PHYCFG.SUSPHY bit is set. If it is, then we need to clear it. + * When operating in USB 2.0 speeds (HS/FS), if GUSB2PHYCFG.ENBLSLPM or + * GUSB2PHYCFG.SUSPHY is set, it must be cleared before issuing an + * endpoint command. * - * We will also set SUSPHY bit to what it was before returning as stated - * by the same section on Synopsys databook. + * Save and clear both GUSB2PHYCFG.ENBLSLPM and GUSB2PHYCFG.SUSPHY + * settings. Restore them after the command is completed. + * + * DWC_usb3 3.30a and DWC_usb31 1.90a programming guide section 3.2.2 */ if (dwc->gadget.speed <= USB_SPEED_HIGH) { reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); if (unlikely(reg & DWC3_GUSB2PHYCFG_SUSPHY)) { - susphy = true; + saved_config |= DWC3_GUSB2PHYCFG_SUSPHY; reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; - dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); } + + if (reg & DWC3_GUSB2PHYCFG_ENBLSLPM) { + saved_config |= DWC3_GUSB2PHYCFG_ENBLSLPM; + reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM; + } + + if (saved_config) + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); } if (DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_STARTTRANSFER) { @@ -395,9 +404,9 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned cmd, } } - if (unlikely(susphy)) { + if (saved_config) { reg = dwc3_readl(dwc->regs, DWC3_GUSB2PHYCFG(0)); - reg |= DWC3_GUSB2PHYCFG_SUSPHY; + reg |= saved_config; dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); } @@ -2034,7 +2043,8 @@ static void dwc3_gadget_set_speed(struct usb_gadget *g, * STAR#9000525659: Clock Domain Crossing on DCTL in * USB 2.0 Mode */ - if (dwc->revision < DWC3_REVISION_220A) { + if (dwc->revision < DWC3_REVISION_220A && + !dwc->dis_metastability_quirk) { reg |= DWC3_DCFG_SUPERSPEED; } else { switch (speed) { @@ -3265,7 +3275,8 @@ int dwc3_gadget_init(struct dwc3 *dwc) * is less than super speed because we don't have means, yet, to tell * composite.c that we are USB 2.0 + LPM ECN. */ - if (dwc->revision < DWC3_REVISION_220A) + if (dwc->revision < DWC3_REVISION_220A && + !dwc->dis_metastability_quirk) dev_info(dwc->dev, "changing max_speed on rev %08x\n", dwc->revision); diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 09f02d583d6b..a7fcb37253b2 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -442,12 +442,14 @@ static u8 encode_bMaxPower(enum usb_device_speed speed, val = CONFIG_USB_GADGET_VBUS_DRAW; if (!val) return 0; - switch (speed) { - case USB_SPEED_SUPER: - return DIV_ROUND_UP(val, 8); - default: - return DIV_ROUND_UP(val, 2); - } + if (speed < USB_SPEED_SUPER) + return min(val, 500U) / 2; + else + /* + * USB 3.x supports up to 900mA, but since 900 isn't divisible + * by 8 the integral division will effectively cap to 896mA. + */ + return min(val, 900U) / 8; } static int config_buf(struct usb_configuration *config, @@ -845,6 +847,10 @@ static int set_config(struct usb_composite_dev *cdev, /* when we return, be sure our power usage is valid */ power = c->MaxPower ? c->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW; + if (gadget->speed < USB_SPEED_SUPER) + power = min(power, 500U); + else + power = min(power, 900U); done: usb_gadget_vbus_draw(gadget, power); if (result >= 0 && cdev->delayed_status) @@ -2336,7 +2342,7 @@ void composite_resume(struct usb_gadget *gadget) { struct usb_composite_dev *cdev = get_gadget_data(gadget); struct usb_function *f; - u16 maxpower; + unsigned maxpower; /* REVISIT: should we have config level * suspend/resume callbacks? @@ -2350,10 +2356,14 @@ void composite_resume(struct usb_gadget *gadget) f->resume(f); } - maxpower = cdev->config->MaxPower; + maxpower = cdev->config->MaxPower ? + cdev->config->MaxPower : CONFIG_USB_GADGET_VBUS_DRAW; + if (gadget->speed < USB_SPEED_SUPER) + maxpower = min(maxpower, 500U); + else + maxpower = min(maxpower, 900U); - usb_gadget_vbus_draw(gadget, maxpower ? - maxpower : CONFIG_USB_GADGET_VBUS_DRAW); + usb_gadget_vbus_draw(gadget, maxpower); } cdev->suspended = 0; diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 2579b92b34cd..fd2c5e67c937 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1538,6 +1538,7 @@ static void configfs_composite_unbind(struct usb_gadget *gadget) spin_lock_irqsave(&gi->spinlock, flags); gi->unbind = 1; spin_unlock_irqrestore(&gi->spinlock, flags); + kfree(otg_desc[0]); otg_desc[0] = NULL; purge_configs_funcs(gi); @@ -1549,6 +1550,97 @@ static void configfs_composite_unbind(struct usb_gadget *gadget) spin_unlock_irqrestore(&gi->spinlock, flags); } +static int configfs_composite_setup(struct usb_gadget *gadget, + const struct usb_ctrlrequest *ctrl) +{ + struct usb_composite_dev *cdev; + struct gadget_info *gi; + unsigned long flags; + int ret; + + cdev = get_gadget_data(gadget); + if (!cdev) + return 0; + + gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + cdev = get_gadget_data(gadget); + if (!cdev || gi->unbind) { + spin_unlock_irqrestore(&gi->spinlock, flags); + return 0; + } + + ret = composite_setup(gadget, ctrl); + spin_unlock_irqrestore(&gi->spinlock, flags); + return ret; +} + +static void configfs_composite_disconnect(struct usb_gadget *gadget) +{ + struct usb_composite_dev *cdev; + struct gadget_info *gi; + unsigned long flags; + + cdev = get_gadget_data(gadget); + if (!cdev) + return; + + gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + cdev = get_gadget_data(gadget); + if (!cdev || gi->unbind) { + spin_unlock_irqrestore(&gi->spinlock, flags); + return; + } + + composite_disconnect(gadget); + spin_unlock_irqrestore(&gi->spinlock, flags); +} + +static void configfs_composite_suspend(struct usb_gadget *gadget) +{ + struct usb_composite_dev *cdev; + struct gadget_info *gi; + unsigned long flags; + + cdev = get_gadget_data(gadget); + if (!cdev) + return; + + gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + cdev = get_gadget_data(gadget); + if (!cdev || gi->unbind) { + spin_unlock_irqrestore(&gi->spinlock, flags); + return; + } + + composite_suspend(gadget); + spin_unlock_irqrestore(&gi->spinlock, flags); +} + +static void configfs_composite_resume(struct usb_gadget *gadget) +{ + struct usb_composite_dev *cdev; + struct gadget_info *gi; + unsigned long flags; + + cdev = get_gadget_data(gadget); + if (!cdev) + return; + + gi = container_of(cdev, struct gadget_info, cdev); + spin_lock_irqsave(&gi->spinlock, flags); + cdev = get_gadget_data(gadget); + if (!cdev || gi->unbind) { + spin_unlock_irqrestore(&gi->spinlock, flags); + return; + } + + composite_resume(gadget); + spin_unlock_irqrestore(&gi->spinlock, flags); +} + #ifdef CONFIG_USB_CONFIGFS_UEVENT static int android_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *c) @@ -1641,12 +1733,12 @@ static const struct usb_gadget_driver configfs_driver_template = { .reset = android_disconnect, .disconnect = android_disconnect, #else - .setup = composite_setup, - .reset = composite_disconnect, - .disconnect = composite_disconnect, + .setup = configfs_composite_setup, + .reset = configfs_composite_disconnect, + .disconnect = configfs_composite_disconnect, #endif - .suspend = composite_suspend, - .resume = composite_resume, + .suspend = configfs_composite_suspend, + .resume = configfs_composite_resume, .max_speed = USB_SPEED_SUPER, .driver = { @@ -1850,6 +1942,7 @@ static struct config_group *gadgets_make( gi->composite.resume = NULL; gi->composite.max_speed = USB_SPEED_SUPER; + spin_lock_init(&gi->spinlock); mutex_init(&gi->lock); spin_lock_init(&gi->spinlock); INIT_LIST_HEAD(&gi->string_list); diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c index 4c488d15b6f6..8e3e44382785 100644 --- a/drivers/usb/gadget/function/f_ecm.c +++ b/drivers/usb/gadget/function/f_ecm.c @@ -56,6 +56,7 @@ struct f_ecm { struct usb_ep *notify; struct usb_request *notify_req; u8 notify_state; + atomic_t notify_count; bool is_open; /* FIXME is_open needs some irq-ish locking @@ -384,7 +385,7 @@ static void ecm_do_notify(struct f_ecm *ecm) int status; /* notification already in flight? */ - if (!req) + if (atomic_read(&ecm->notify_count)) return; event = req->buf; @@ -424,10 +425,10 @@ static void ecm_do_notify(struct f_ecm *ecm) event->bmRequestType = 0xA1; event->wIndex = cpu_to_le16(ecm->ctrl_id); - ecm->notify_req = NULL; + atomic_inc(&ecm->notify_count); status = usb_ep_queue(ecm->notify, req, GFP_ATOMIC); if (status < 0) { - ecm->notify_req = req; + atomic_dec(&ecm->notify_count); DBG(cdev, "notify --> %d\n", status); } } @@ -452,17 +453,19 @@ static void ecm_notify_complete(struct usb_ep *ep, struct usb_request *req) switch (req->status) { case 0: /* no fault */ + atomic_dec(&ecm->notify_count); break; case -ECONNRESET: case -ESHUTDOWN: + atomic_set(&ecm->notify_count, 0); ecm->notify_state = ECM_NOTIFY_NONE; break; default: DBG(cdev, "event %02x --> %d\n", event->bNotificationType, req->status); + atomic_dec(&ecm->notify_count); break; } - ecm->notify_req = req; ecm_do_notify(ecm); } @@ -625,8 +628,12 @@ static void ecm_disable(struct usb_function *f) DBG(cdev, "ecm deactivated\n"); - if (ecm->port.in_ep->enabled) + if (ecm->port.in_ep->enabled) { gether_disconnect(&ecm->port); + } else { + ecm->port.in_ep->desc = NULL; + ecm->port.out_ep->desc = NULL; + } usb_ep_disable(ecm->notify); ecm->notify->desc = NULL; @@ -905,6 +912,11 @@ static void ecm_unbind(struct usb_configuration *c, struct usb_function *f) usb_free_all_descriptors(f); + if (atomic_read(&ecm->notify_count)) { + usb_ep_dequeue(ecm->notify, ecm->notify_req); + atomic_set(&ecm->notify_count, 0); + } + kfree(ecm->notify_req->buf); usb_ep_free_request(ecm->notify, ecm->notify_req); } diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 57a30016c357..8b21717f3612 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1259,18 +1259,19 @@ static int ffs_aio_cancel(struct kiocb *kiocb) { struct ffs_io_data *io_data = kiocb->private; struct ffs_epfile *epfile = kiocb->ki_filp->private_data; + unsigned long flags; int value; ENTER(); - spin_lock_irq(&epfile->ffs->eps_lock); + spin_lock_irqsave(&epfile->ffs->eps_lock, flags); if (likely(io_data && io_data->ep && io_data->req)) value = usb_ep_dequeue(io_data->ep, io_data->req); else value = -EINVAL; - spin_unlock_irq(&epfile->ffs->eps_lock); + spin_unlock_irqrestore(&epfile->ffs->eps_lock, flags); return value; } diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c index 45b334ceaf2e..5c2d39232bb0 100644 --- a/drivers/usb/gadget/function/f_ncm.c +++ b/drivers/usb/gadget/function/f_ncm.c @@ -58,6 +58,7 @@ struct f_ncm { struct usb_ep *notify; struct usb_request *notify_req; u8 notify_state; + atomic_t notify_count; bool is_open; const struct ndp_parser_opts *parser_opts; @@ -553,7 +554,7 @@ static void ncm_do_notify(struct f_ncm *ncm) int status; /* notification already in flight? */ - if (!req) + if (atomic_read(&ncm->notify_count)) return; event = req->buf; @@ -593,7 +594,8 @@ static void ncm_do_notify(struct f_ncm *ncm) event->bmRequestType = 0xA1; event->wIndex = cpu_to_le16(ncm->ctrl_id); - ncm->notify_req = NULL; + atomic_inc(&ncm->notify_count); + /* * In double buffering if there is a space in FIFO, * completion callback can be called right after the call, @@ -603,7 +605,7 @@ static void ncm_do_notify(struct f_ncm *ncm) status = usb_ep_queue(ncm->notify, req, GFP_ATOMIC); spin_lock(&ncm->lock); if (status < 0) { - ncm->notify_req = req; + atomic_dec(&ncm->notify_count); DBG(cdev, "notify --> %d\n", status); } } @@ -638,17 +640,19 @@ static void ncm_notify_complete(struct usb_ep *ep, struct usb_request *req) case 0: VDBG(cdev, "Notification %02x sent\n", event->bNotificationType); + atomic_dec(&ncm->notify_count); break; case -ECONNRESET: case -ESHUTDOWN: + atomic_set(&ncm->notify_count, 0); ncm->notify_state = NCM_NOTIFY_NONE; break; default: DBG(cdev, "event %02x --> %d\n", event->bNotificationType, req->status); + atomic_dec(&ncm->notify_count); break; } - ncm->notify_req = req; ncm_do_notify(ncm); spin_unlock(&ncm->lock); } @@ -1632,6 +1636,11 @@ static void ncm_unbind(struct usb_configuration *c, struct usb_function *f) ncm_string_defs[0].id = 0; usb_free_all_descriptors(f); + if (atomic_read(&ncm->notify_count)) { + usb_ep_dequeue(ncm->notify, ncm->notify_req); + atomic_set(&ncm->notify_count, 0); + } + kfree(ncm->notify_req->buf); usb_ep_free_request(ncm->notify, ncm->notify_req); } diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c index f0d16f17da0d..bbb05ce6868e 100644 --- a/drivers/usb/gadget/function/f_rndis.c +++ b/drivers/usb/gadget/function/f_rndis.c @@ -753,6 +753,7 @@ static void rndis_disable(struct usb_function *f) gether_disconnect(&rndis->port); usb_ep_disable(rndis->notify); + rndis->notify->desc = NULL; } /*-------------------------------------------------------------------------*/ diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index d1e6764d9d8c..f25c8ee14c7b 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -758,8 +758,10 @@ static int gs_start_io(struct gs_port *port) port->n_read = 0; started = gs_start_rx(port); - /* unblock any pending writes into our circular buffer */ if (started) { + gs_start_tx(port); + /* Unblock any pending writes into our circular buffer, in case + * we didn't in gs_start_tx() */ tty_wakeup(port->port.tty); } else { gs_free_requests(ep, head, &port->read_allocated); @@ -1439,8 +1441,10 @@ int gserial_alloc_line(unsigned char *line_num) __func__, port_num, PTR_ERR(tty_dev)); ret = PTR_ERR(tty_dev); + mutex_lock(&ports[port_num].lock); port = ports[port_num].port; ports[port_num].port = NULL; + mutex_unlock(&ports[port_num].lock); gserial_free_port(port); goto err; } diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 844cb738bafd..57f6e8a668cf 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -543,6 +543,7 @@ static int uvcg_control_class_allow_link(struct config_item *src, unlock: mutex_unlock(&opts->lock); out: + config_item_put(header); mutex_unlock(su_mutex); return ret; } @@ -578,6 +579,7 @@ static void uvcg_control_class_drop_link(struct config_item *src, unlock: mutex_unlock(&opts->lock); out: + config_item_put(header); mutex_unlock(su_mutex); } @@ -763,6 +765,7 @@ static int uvcg_streaming_header_allow_link(struct config_item *src, format_ptr->fmt = target_fmt; list_add_tail(&format_ptr->entry, &src_hdr->formats); ++src_hdr->num_fmt; + ++target_fmt->linked; out: mutex_unlock(&opts->lock); @@ -800,6 +803,8 @@ static void uvcg_streaming_header_drop_link(struct config_item *src, break; } + --target_fmt->linked; + out: mutex_unlock(&opts->lock); mutex_unlock(su_mutex); @@ -2037,6 +2042,7 @@ static int uvcg_streaming_class_allow_link(struct config_item *src, unlock: mutex_unlock(&opts->lock); out: + config_item_put(header); mutex_unlock(su_mutex); return ret; } @@ -2077,6 +2083,7 @@ static void uvcg_streaming_class_drop_link(struct config_item *src, unlock: mutex_unlock(&opts->lock); out: + config_item_put(header); mutex_unlock(su_mutex); } diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index 0f01c04d7cbd..d6bab12b0b47 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -129,6 +129,21 @@ uvc_video_encode_isoc(struct usb_request *req, struct uvc_video *video, * Request handling */ +static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req) +{ + int ret; + + ret = usb_ep_queue(video->ep, req, GFP_ATOMIC); + if (ret < 0) { + printk(KERN_INFO "Failed to queue request (%d).\n", ret); + /* Isochronous endpoints can't be halted. */ + if (usb_endpoint_xfer_bulk(video->ep->desc)) + usb_ep_set_halt(video->ep); + } + + return ret; +} + /* * I somehow feel that synchronisation won't be easy to achieve here. We have * three events that control USB requests submission: @@ -193,14 +208,13 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req) video->encode(req, video, buf); - if ((ret = usb_ep_queue(ep, req, GFP_ATOMIC)) < 0) { - printk(KERN_INFO "Failed to queue request (%d).\n", ret); - usb_ep_set_halt(ep); - spin_unlock_irqrestore(&video->queue.irqlock, flags); + ret = uvcg_video_ep_queue(video, req); + spin_unlock_irqrestore(&video->queue.irqlock, flags); + + if (ret < 0) { uvcg_queue_cancel(queue, 0); goto requeue; } - spin_unlock_irqrestore(&video->queue.irqlock, flags); return; @@ -320,15 +334,13 @@ int uvcg_video_pump(struct uvc_video *video) video->encode(req, video, buf); /* Queue the USB request */ - ret = usb_ep_queue(video->ep, req, GFP_ATOMIC); + ret = uvcg_video_ep_queue(video, req); + spin_unlock_irqrestore(&queue->irqlock, flags); + if (ret < 0) { - printk(KERN_INFO "Failed to queue request (%d)\n", ret); - usb_ep_set_halt(video->ep); - spin_unlock_irqrestore(&queue->irqlock, flags); uvcg_queue_cancel(queue, 0); break; } - spin_unlock_irqrestore(&queue->irqlock, flags); } spin_lock_irqsave(&video->req_lock, flags); diff --git a/drivers/usb/gadget/legacy/cdc2.c b/drivers/usb/gadget/legacy/cdc2.c index 51c08682de84..5ee25beb52f0 100644 --- a/drivers/usb/gadget/legacy/cdc2.c +++ b/drivers/usb/gadget/legacy/cdc2.c @@ -229,7 +229,7 @@ static struct usb_composite_driver cdc_driver = { .name = "g_cdc", .dev = &device_desc, .strings = dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = cdc_bind, .unbind = cdc_unbind, }; diff --git a/drivers/usb/gadget/legacy/g_ffs.c b/drivers/usb/gadget/legacy/g_ffs.c index 6da7316f8e87..54ee4e31645b 100644 --- a/drivers/usb/gadget/legacy/g_ffs.c +++ b/drivers/usb/gadget/legacy/g_ffs.c @@ -153,7 +153,7 @@ static struct usb_composite_driver gfs_driver = { .name = DRIVER_NAME, .dev = &gfs_dev_desc, .strings = gfs_dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = gfs_bind, .unbind = gfs_unbind, }; diff --git a/drivers/usb/gadget/legacy/multi.c b/drivers/usb/gadget/legacy/multi.c index a70a406580ea..3b7fc5c7e9c3 100644 --- a/drivers/usb/gadget/legacy/multi.c +++ b/drivers/usb/gadget/legacy/multi.c @@ -486,7 +486,7 @@ static struct usb_composite_driver multi_driver = { .name = "g_multi", .dev = &device_desc, .strings = dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = multi_bind, .unbind = multi_unbind, .needs_serial = 1, diff --git a/drivers/usb/gadget/legacy/ncm.c b/drivers/usb/gadget/legacy/ncm.c index 0aba68253e3d..2fb4a847dd52 100644 --- a/drivers/usb/gadget/legacy/ncm.c +++ b/drivers/usb/gadget/legacy/ncm.c @@ -203,7 +203,7 @@ static struct usb_composite_driver ncm_driver = { .name = "g_ncm", .dev = &device_desc, .strings = dev_strings, - .max_speed = USB_SPEED_HIGH, + .max_speed = USB_SPEED_SUPER, .bind = gncm_bind, .unbind = gncm_unbind, }; diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index cb66f982c313..39676824a2c6 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -488,9 +488,11 @@ static void submit_request(struct usba_ep *ep, struct usba_request *req) next_fifo_transaction(ep, req); if (req->last_transaction) { usba_ep_writel(ep, CTL_DIS, USBA_TX_PK_RDY); - usba_ep_writel(ep, CTL_ENB, USBA_TX_COMPLETE); + if (ep_is_control(ep)) + usba_ep_writel(ep, CTL_ENB, USBA_TX_COMPLETE); } else { - usba_ep_writel(ep, CTL_DIS, USBA_TX_COMPLETE); + if (ep_is_control(ep)) + usba_ep_writel(ep, CTL_DIS, USBA_TX_COMPLETE); usba_ep_writel(ep, CTL_ENB, USBA_TX_PK_RDY); } } diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index 821f80d78143..36c65a67f77d 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -107,6 +107,17 @@ int usb_ep_enable(struct usb_ep *ep) if (ep->enabled) goto out; + /* UDC drivers can't handle endpoints with maxpacket size 0 */ + if (usb_endpoint_maxp(ep->desc) == 0) { + /* + * We should log an error message here, but we can't call + * dev_err() because there's no way to find the gadget + * given only ep. + */ + ret = -EINVAL; + goto out; + } + ret = ep->ops->enable(ep, ep->desc); if (ret) goto out; diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c index 7e90f786d923..8ee76524a0b7 100644 --- a/drivers/usb/gadget/udc/dummy_hcd.c +++ b/drivers/usb/gadget/udc/dummy_hcd.c @@ -980,8 +980,18 @@ static int dummy_udc_start(struct usb_gadget *g, struct dummy_hcd *dum_hcd = gadget_to_dummy_hcd(g); struct dummy *dum = dum_hcd->dum; - if (driver->max_speed == USB_SPEED_UNKNOWN) + switch (g->speed) { + /* All the speeds we support */ + case USB_SPEED_LOW: + case USB_SPEED_FULL: + case USB_SPEED_HIGH: + case USB_SPEED_SUPER: + break; + default: + dev_err(dummy_dev(dum_hcd), "Unsupported driver max speed %d\n", + driver->max_speed); return -EINVAL; + } /* * SLAVE side init ... the layer above hardware, which @@ -1325,7 +1335,7 @@ static int dummy_perform_transfer(struct urb *urb, struct dummy_request *req, u32 this_sg; bool next_sg; - to_host = usb_pipein(urb->pipe); + to_host = usb_urb_dir_in(urb); rbuf = req->req.buf + req->req.actual; if (!urb->num_sgs) { @@ -1413,7 +1423,7 @@ top: /* FIXME update emulated data toggle too */ - to_host = usb_pipein(urb->pipe); + to_host = usb_urb_dir_in(urb); if (unlikely(len == 0)) is_short = 1; else { @@ -1770,6 +1780,7 @@ static void dummy_timer(unsigned long _dum_hcd) int i; /* simplistic model for one frame's bandwidth */ + /* FIXME: account for transaction and packet overhead */ switch (dum->gadget.speed) { case USB_SPEED_LOW: total = 8/*bytes*/ * 12/*packets*/; @@ -1784,9 +1795,10 @@ static void dummy_timer(unsigned long _dum_hcd) /* Bus speed is 500000 bytes/ms, so use a little less */ total = 490000; break; - default: + default: /* Can't happen */ dev_err(dummy_dev(dum_hcd), "bogus device speed\n"); - return; + total = 0; + break; } /* FIXME if HZ != 1000 this will probably misbehave ... */ @@ -1814,7 +1826,6 @@ restart: struct dummy_request *req; u8 address; struct dummy_ep *ep = NULL; - int type; int status = -EINPROGRESS; /* stop when we reach URBs queued after the timer interrupt */ @@ -1826,18 +1837,14 @@ restart: goto return_urb; else if (dum_hcd->rh_state != DUMMY_RH_RUNNING) continue; - type = usb_pipetype(urb->pipe); - /* used up this frame's non-periodic bandwidth? - * FIXME there's infinite bandwidth for control and - * periodic transfers ... unrealistic. - */ - if (total <= 0 && type == PIPE_BULK) + /* Used up this frame's bandwidth? */ + if (total <= 0) continue; /* find the gadget's ep for this request (if configured) */ address = usb_pipeendpoint (urb->pipe); - if (usb_pipein(urb->pipe)) + if (usb_urb_dir_in(urb)) address |= USB_DIR_IN; ep = find_endpoint(dum, address); if (!ep) { @@ -2390,7 +2397,7 @@ static inline ssize_t show_urb(char *buf, size_t size, struct urb *urb) s = "?"; break; } s; }), - ep, ep ? (usb_pipein(urb->pipe) ? "in" : "out") : "", + ep, ep ? (usb_urb_dir_in(urb) ? "in" : "out") : "", ({ char *s; \ switch (usb_pipetype(urb->pipe)) { \ case PIPE_CONTROL: \ @@ -2734,7 +2741,7 @@ static struct platform_driver dummy_hcd_driver = { }; /*-------------------------------------------------------------------------*/ -#define MAX_NUM_UDC 2 +#define MAX_NUM_UDC 32 static struct platform_device *the_udc_pdev[MAX_NUM_UDC]; static struct platform_device *the_hcd_pdev[MAX_NUM_UDC]; diff --git a/drivers/usb/gadget/udc/fotg210-udc.c b/drivers/usb/gadget/udc/fotg210-udc.c index d17d7052605b..6866a0be249e 100644 --- a/drivers/usb/gadget/udc/fotg210-udc.c +++ b/drivers/usb/gadget/udc/fotg210-udc.c @@ -744,7 +744,7 @@ static void fotg210_get_status(struct fotg210_udc *fotg210, fotg210->ep0_req->length = 2; spin_unlock(&fotg210->lock); - fotg210_ep_queue(fotg210->gadget.ep0, fotg210->ep0_req, GFP_KERNEL); + fotg210_ep_queue(fotg210->gadget.ep0, fotg210->ep0_req, GFP_ATOMIC); spin_lock(&fotg210->lock); } diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c index 7874c112f3fd..ee48c7938d61 100644 --- a/drivers/usb/gadget/udc/fsl_udc_core.c +++ b/drivers/usb/gadget/udc/fsl_udc_core.c @@ -2569,7 +2569,7 @@ static int fsl_udc_remove(struct platform_device *pdev) dma_pool_destroy(udc_controller->td_pool); free_irq(udc_controller->irq, udc_controller); iounmap(dr_regs); - if (pdata->operating_mode == FSL_USB2_DR_DEVICE) + if (res && (pdata->operating_mode == FSL_USB2_DR_DEVICE)) release_mem_region(res->start, resource_size(res)); /* free udc --wait for the release() finished */ diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c index 1f9941145746..feb73a1c42ef 100644 --- a/drivers/usb/gadget/udc/gr_udc.c +++ b/drivers/usb/gadget/udc/gr_udc.c @@ -2200,8 +2200,6 @@ static int gr_probe(struct platform_device *pdev) return -ENOMEM; } - spin_lock(&dev->lock); - /* Inside lock so that no gadget can use this udc until probe is done */ retval = usb_add_gadget_udc(dev->dev, &dev->gadget); if (retval) { @@ -2210,15 +2208,21 @@ static int gr_probe(struct platform_device *pdev) } dev->added = 1; - retval = gr_udc_init(dev); - if (retval) - goto out; + spin_lock(&dev->lock); - gr_dfs_create(dev); + retval = gr_udc_init(dev); + if (retval) { + spin_unlock(&dev->lock); + goto out; + } /* Clear all interrupt enables that might be left on since last boot */ gr_disable_interrupts_and_pullup(dev); + spin_unlock(&dev->lock); + + gr_dfs_create(dev); + retval = gr_request_irq(dev, dev->irq); if (retval) { dev_err(dev->dev, "Failed to request irq %d\n", dev->irq); @@ -2247,8 +2251,6 @@ static int gr_probe(struct platform_device *pdev) dev_info(dev->dev, "regs: %p, irq %d\n", dev->regs, dev->irq); out: - spin_unlock(&dev->lock); - if (retval) gr_remove(pdev); diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c index 6df1aded4503..ac2aa04ca657 100644 --- a/drivers/usb/gadget/udc/lpc32xx_udc.c +++ b/drivers/usb/gadget/udc/lpc32xx_udc.c @@ -1178,11 +1178,11 @@ static void udc_pop_fifo(struct lpc32xx_udc *udc, u8 *data, u32 bytes) tmp = readl(USBD_RXDATA(udc->udp_baseaddr)); bl = bytes - n; - if (bl > 3) - bl = 3; + if (bl > 4) + bl = 4; for (i = 0; i < bl; i++) - data[n + i] = (u8) ((tmp >> (n * 8)) & 0xFF); + data[n + i] = (u8) ((tmp >> (i * 8)) & 0xFF); } break; diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c index 84dcbcd756f0..08bbe2c24134 100644 --- a/drivers/usb/gadget/udc/pch_udc.c +++ b/drivers/usb/gadget/udc/pch_udc.c @@ -1523,7 +1523,6 @@ static void pch_udc_free_dma_chain(struct pch_udc_dev *dev, td = phys_to_virt(addr); addr2 = (dma_addr_t)td->next; dma_pool_free(dev->data_requests, td, addr); - td->next = 0x00; addr = addr2; } req->chain_len = 1; diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c index 8f3f055c05fa..477ba3842cc4 100644 --- a/drivers/usb/host/ehci-q.c +++ b/drivers/usb/host/ehci-q.c @@ -40,6 +40,10 @@ /*-------------------------------------------------------------------------*/ +/* PID Codes that are used here, from EHCI specification, Table 3-16. */ +#define PID_CODE_IN 1 +#define PID_CODE_SETUP 2 + /* fill a qtd, returning how much of the buffer we were able to queue up */ static int @@ -203,7 +207,7 @@ static int qtd_copy_status ( int status = -EINPROGRESS; /* count IN/OUT bytes, not SETUP (even short packets) */ - if (likely (QTD_PID (token) != 2)) + if (likely(QTD_PID(token) != PID_CODE_SETUP)) urb->actual_length += length - QTD_LENGTH (token); /* don't modify error codes */ @@ -219,6 +223,13 @@ static int qtd_copy_status ( if (token & QTD_STS_BABBLE) { /* FIXME "must" disable babbling device's port too */ status = -EOVERFLOW; + /* + * When MMF is active and PID Code is IN, queue is halted. + * EHCI Specification, Table 4-13. + */ + } else if ((token & QTD_STS_MMF) && + (QTD_PID(token) == PID_CODE_IN)) { + status = -EPROTO; /* CERR nonzero + halt --> stall */ } else if (QTD_CERR(token)) { status = -EPIPE; diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 238514f8b162..7fd96fb1d31f 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -859,7 +859,7 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, struct xhci_bus_state *bus_state, __le32 __iomem **port_array, u16 wIndex, u32 raw_port_status, - unsigned long flags) + unsigned long *flags) __releases(&xhci->lock) __acquires(&xhci->lock) { @@ -891,6 +891,14 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, status |= USB_PORT_STAT_C_BH_RESET << 16; if ((raw_port_status & PORT_CEC)) status |= USB_PORT_STAT_C_CONFIG_ERROR << 16; + + /* USB3 remote wake resume signaling completed */ + if (bus_state->port_remote_wakeup & (1 << wIndex) && + (raw_port_status & PORT_PLS_MASK) != XDEV_RESUME && + (raw_port_status & PORT_PLS_MASK) != XDEV_RECOVERY) { + bus_state->port_remote_wakeup &= ~(1 << wIndex); + usb_hcd_end_port_resume(&hcd->self, wIndex); + } } if (hcd->speed < HCD_USB3) { @@ -941,12 +949,12 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, xhci_set_link_state(xhci, port_array, wIndex, XDEV_U0); - spin_unlock_irqrestore(&xhci->lock, flags); + spin_unlock_irqrestore(&xhci->lock, *flags); time_left = wait_for_completion_timeout( &bus_state->rexit_done[wIndex], msecs_to_jiffies( XHCI_MAX_REXIT_TIMEOUT_MS)); - spin_lock_irqsave(&xhci->lock, flags); + spin_lock_irqsave(&xhci->lock, *flags); if (time_left) { slot_id = xhci_find_slot_id_by_port(hcd, @@ -1094,7 +1102,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, break; } status = xhci_get_port_status(hcd, bus_state, port_array, - wIndex, temp, flags); + wIndex, temp, &flags); if (status == 0xffffffff) goto error; @@ -1114,7 +1122,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, } port_li = readl(port_array[wIndex] + PORTLI); status = xhci_get_ext_port_status(temp, port_li); - put_unaligned_le32(cpu_to_le32(status), &buf[4]); + put_unaligned_le32(status, &buf[4]); } break; case SetPortFeature: diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index ecebd63bb15b..7c3e0b6ec8c0 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1580,9 +1580,15 @@ int xhci_endpoint_init(struct xhci_hcd *xhci, /* Allow 3 retries for everything but isoc, set CErr = 3 */ if (!usb_endpoint_xfer_isoc(&ep->desc)) err_count = 3; - /* Some devices get this wrong */ - if (usb_endpoint_xfer_bulk(&ep->desc) && udev->speed == USB_SPEED_HIGH) - max_packet = 512; + /* HS bulk max packet should be 512, FS bulk supports 8, 16, 32 or 64 */ + if (usb_endpoint_xfer_bulk(&ep->desc)) { + if (udev->speed == USB_SPEED_HIGH) + max_packet = 512; + if (udev->speed == USB_SPEED_FULL) { + max_packet = rounddown_pow_of_two(max_packet); + max_packet = clamp_val(max_packet, 8, 64); + } + } /* xHCI 1.0 and 1.1 indicates that ctrl ep avg TRB Length should be 8 */ if (usb_endpoint_xfer_control(&ep->desc) && xhci->hci_version >= 0x100) avg_trb_len = 8; @@ -2022,10 +2028,14 @@ no_bw: kfree(xhci->port_array); kfree(xhci->rh_bw); kfree(xhci->ext_caps); + kfree(xhci->usb2_rhub.psi); + kfree(xhci->usb3_rhub.psi); xhci->usb2_ports = NULL; xhci->usb3_ports = NULL; xhci->port_array = NULL; + xhci->usb2_rhub.psi = NULL; + xhci->usb3_rhub.psi = NULL; xhci->rh_bw = NULL; xhci->ext_caps = NULL; diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index 6e7ddf6cafae..defaf950e631 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -122,7 +122,9 @@ static void setup_sch_info(struct usb_device *udev, } if (ep_type == ISOC_IN_EP || ep_type == ISOC_OUT_EP) { - if (esit_pkts <= sch_ep->esit) + if (sch_ep->esit == 1) + sch_ep->pkts = esit_pkts; + else if (esit_pkts <= sch_ep->esit) sch_ep->pkts = 1; else sch_ep->pkts = roundup_pow_of_two(esit_pkts) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 1de006aebec5..908496ed3254 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -53,6 +53,7 @@ #define PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI 0x1aa8 #define PCI_DEVICE_ID_INTEL_APL_XHCI 0x5aa8 #define PCI_DEVICE_ID_INTEL_DNV_XHCI 0x19d0 +#define PCI_DEVICE_ID_INTEL_CML_XHCI 0xa3af #define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9 #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba @@ -191,7 +192,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_M_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_BROXTON_B_XHCI || pdev->device == PCI_DEVICE_ID_INTEL_APL_XHCI || - pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI)) { + pdev->device == PCI_DEVICE_ID_INTEL_DNV_XHCI || + pdev->device == PCI_DEVICE_ID_INTEL_CML_XHCI)) { xhci->quirks |= XHCI_PME_STUCK_QUIRK; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && @@ -284,6 +286,9 @@ static int xhci_pci_setup(struct usb_hcd *hcd) if (!usb_hcd_is_primary_hcd(hcd)) return 0; + if (xhci->quirks & XHCI_PME_STUCK_QUIRK) + xhci_pme_acpi_rtd3_enable(pdev); + xhci_dbg(xhci, "Got SBRN %u\n", (unsigned int) xhci->sbrn); /* Find any debug ports */ @@ -344,9 +349,6 @@ static int xhci_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) HCC_MAX_PSA(xhci->hcc_params) >= 4) xhci->shared_hcd->can_do_streams = 1; - if (xhci->quirks & XHCI_PME_STUCK_QUIRK) - xhci_pme_acpi_rtd3_enable(dev); - /* USB-2 and USB-3 roothubs initialized, allow runtime pm suspend */ pm_runtime_put_noidle(&dev->dev); @@ -497,6 +499,18 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated) retval = xhci_resume(xhci, hibernated); return retval; } + +static void xhci_pci_shutdown(struct usb_hcd *hcd) +{ + struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + + xhci_shutdown(hcd); + + /* Yet another workaround for spurious wakeups at shutdown with HSW */ + if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) + pci_set_power_state(pdev, PCI_D3hot); +} #endif /* CONFIG_PM */ /*-------------------------------------------------------------------------*/ @@ -534,6 +548,7 @@ static int __init xhci_pci_init(void) #ifdef CONFIG_PM xhci_pci_hc_driver.pci_suspend = xhci_pci_suspend; xhci_pci_hc_driver.pci_resume = xhci_pci_resume; + xhci_pci_hc_driver.shutdown = xhci_pci_shutdown; #endif return pci_register_driver(&xhci_pci_driver); } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b4a7d09234cc..65197272bae0 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1645,6 +1645,12 @@ static void handle_port_status(struct xhci_hcd *xhci, if ((major_revision == 0x03) != (hcd->speed >= HCD_USB3)) hcd = xhci->shared_hcd; + if (!hcd) { + xhci_dbg(xhci, "No hcd found for port %u event\n", port_id); + bogus_port_status = true; + goto cleanup; + } + if (major_revision == 0) { xhci_warn(xhci, "Event for port %u not in " "Extended Capabilities, ignoring.\n", @@ -1684,9 +1690,6 @@ static void handle_port_status(struct xhci_hcd *xhci, usb_hcd_resume_root_hub(hcd); } - if (hcd->speed >= HCD_USB3 && (portsc & PORT_PLS_MASK) == XDEV_INACTIVE) - bus_state->port_remote_wakeup &= ~(1 << faked_port_index); - if ((portsc & PORT_PLC) && (portsc & PORT_PLS_MASK) == XDEV_RESUME) { xhci_dbg(xhci, "port resume event for port %d\n", port_id); @@ -1705,6 +1708,7 @@ static void handle_port_status(struct xhci_hcd *xhci, bus_state->port_remote_wakeup |= 1 << faked_port_index; xhci_test_and_clear_bit(xhci, port_array, faked_port_index, PORT_PLC); + usb_hcd_start_port_resume(&hcd->self, faked_port_index); xhci_set_link_state(xhci, port_array, faked_port_index, XDEV_U0); /* Need to wait until the next link state change @@ -1742,8 +1746,6 @@ static void handle_port_status(struct xhci_hcd *xhci, if (slot_id && xhci->devs[slot_id]) xhci_ring_device(xhci, slot_id); if (bus_state->port_remote_wakeup & (1 << faked_port_index)) { - bus_state->port_remote_wakeup &= - ~(1 << faked_port_index); xhci_test_and_clear_bit(xhci, port_array, faked_port_index, PORT_PLC); usb_wakeup_notification(hcd->self.root_hub, @@ -2403,7 +2405,8 @@ static int handle_tx_event(struct xhci_hcd *xhci, case COMP_SUCCESS: if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) == 0) break; - if (xhci->quirks & XHCI_TRUST_TX_LENGTH) + if (xhci->quirks & XHCI_TRUST_TX_LENGTH || + ep_ring->last_td_was_short) trb_comp_code = COMP_SHORT_PACKET; else xhci_warn_ratelimited(xhci, @@ -2766,6 +2769,42 @@ static int xhci_handle_event(struct xhci_hcd *xhci) return 1; } +/* + * Update Event Ring Dequeue Pointer: + * - When all events have finished + * - To avoid "Event Ring Full Error" condition + */ +static void xhci_update_erst_dequeue(struct xhci_hcd *xhci, + union xhci_trb *event_ring_deq) +{ + u64 temp_64; + dma_addr_t deq; + + temp_64 = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue); + /* If necessary, update the HW's version of the event ring deq ptr. */ + if (event_ring_deq != xhci->event_ring->dequeue) { + deq = xhci_trb_virt_to_dma(xhci->event_ring->deq_seg, + xhci->event_ring->dequeue); + if (deq == 0) + xhci_warn(xhci, "WARN something wrong with SW event ring dequeue ptr\n"); + /* + * Per 4.9.4, Software writes to the ERDP register shall + * always advance the Event Ring Dequeue Pointer value. + */ + if ((temp_64 & (u64) ~ERST_PTR_MASK) == + ((u64) deq & (u64) ~ERST_PTR_MASK)) + return; + + /* Update HC event ring dequeue pointer */ + temp_64 &= ERST_PTR_MASK; + temp_64 |= ((u64) deq & (u64) ~ERST_PTR_MASK); + } + + /* Clear the event handler busy flag (RW1C) */ + temp_64 |= ERST_EHB; + xhci_write_64(xhci, temp_64, &xhci->ir_set->erst_dequeue); +} + /* * xHCI spec says we can get an interrupt, and if the HC has an error condition, * we might get bad data out of the event ring. Section 4.10.2.7 has a list of @@ -2777,9 +2816,9 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) union xhci_trb *event_ring_deq; irqreturn_t ret = IRQ_NONE; unsigned long flags; - dma_addr_t deq; u64 temp_64; u32 status; + int event_loop = 0; spin_lock_irqsave(&xhci->lock, flags); /* Check if the xHC generated the interrupt, or the irq is shared */ @@ -2833,24 +2872,14 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) /* FIXME this should be a delayed service routine * that clears the EHB. */ - while (xhci_handle_event(xhci) > 0) {} - - temp_64 = xhci_read_64(xhci, &xhci->ir_set->erst_dequeue); - /* If necessary, update the HW's version of the event ring deq ptr. */ - if (event_ring_deq != xhci->event_ring->dequeue) { - deq = xhci_trb_virt_to_dma(xhci->event_ring->deq_seg, - xhci->event_ring->dequeue); - if (deq == 0) - xhci_warn(xhci, "WARN something wrong with SW event " - "ring dequeue ptr.\n"); - /* Update HC event ring dequeue pointer */ - temp_64 &= ERST_PTR_MASK; - temp_64 |= ((u64) deq & (u64) ~ERST_PTR_MASK); + while (xhci_handle_event(xhci) > 0) { + if (event_loop++ < TRBS_PER_SEGMENT / 2) + continue; + xhci_update_erst_dequeue(xhci, event_ring_deq); + event_loop = 0; } - /* Clear the event handler busy flag (RW1C); event ring is empty. */ - temp_64 |= ERST_EHB; - xhci_write_64(xhci, temp_64, &xhci->ir_set->erst_dequeue); + xhci_update_erst_dequeue(xhci, event_ring_deq); ret = IRQ_HANDLED; out: diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 3f51f9f56d0d..f598bff567ca 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -736,7 +736,7 @@ static void xhci_stop(struct usb_hcd *hcd) * * This will only ever be called with the main usb_hcd (the USB3 roothub). */ -static void xhci_shutdown(struct usb_hcd *hcd) +void xhci_shutdown(struct usb_hcd *hcd) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); @@ -755,11 +755,8 @@ static void xhci_shutdown(struct usb_hcd *hcd) xhci_dbg_trace(xhci, trace_xhci_dbg_init, "xhci_shutdown completed - status = %x", readl(&xhci->op_regs->status)); - - /* Yet another workaround for spurious wakeups at shutdown with HSW */ - if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) - pci_set_power_state(to_pci_dev(hcd->self.sysdev), PCI_D3hot); } +EXPORT_SYMBOL_GPL(xhci_shutdown); #ifdef CONFIG_PM static void xhci_save_registers(struct xhci_hcd *xhci) @@ -930,7 +927,7 @@ static bool xhci_pending_portevent(struct xhci_hcd *xhci) int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) { int rc = 0; - unsigned int delay = XHCI_MAX_HALT_USEC; + unsigned int delay = XHCI_MAX_HALT_USEC * 2; struct usb_hcd *hcd = xhci_to_hcd(xhci); u32 command; u32 res; diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 925c8d73793d..cdbdaf3dc57a 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -2031,6 +2031,7 @@ int xhci_start(struct xhci_hcd *xhci); int xhci_reset(struct xhci_hcd *xhci); int xhci_run(struct usb_hcd *hcd); int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks); +void xhci_shutdown(struct usb_hcd *hcd); void xhci_init_driver(struct hc_driver *drv, const struct xhci_driver_overrides *over); int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id); diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c index c8c8fa3f1f46..45390045c75d 100644 --- a/drivers/usb/misc/adutux.c +++ b/drivers/usb/misc/adutux.c @@ -671,7 +671,7 @@ static int adu_probe(struct usb_interface *interface, init_waitqueue_head(&dev->read_wait); init_waitqueue_head(&dev->write_wait); - res = usb_find_common_endpoints_reverse(&interface->altsetting[0], + res = usb_find_common_endpoints_reverse(interface->cur_altsetting, NULL, NULL, &dev->interrupt_in_endpoint, &dev->interrupt_out_endpoint); diff --git a/drivers/usb/misc/appledisplay.c b/drivers/usb/misc/appledisplay.c index 03be7c75c5be..aad7963e40e7 100644 --- a/drivers/usb/misc/appledisplay.c +++ b/drivers/usb/misc/appledisplay.c @@ -160,8 +160,11 @@ static int appledisplay_bl_update_status(struct backlight_device *bd) pdata->msgdata, 2, ACD_USB_TIMEOUT); mutex_unlock(&pdata->sysfslock); - - return retval; + + if (retval < 0) + return retval; + else + return 0; } static int appledisplay_bl_get_brightness(struct backlight_device *bd) @@ -179,7 +182,12 @@ static int appledisplay_bl_get_brightness(struct backlight_device *bd) 0, pdata->msgdata, 2, ACD_USB_TIMEOUT); - brightness = pdata->msgdata[1]; + if (retval < 2) { + if (retval >= 0) + retval = -EMSGSIZE; + } else { + brightness = pdata->msgdata[1]; + } mutex_unlock(&pdata->sysfslock); if (retval < 0) @@ -314,6 +322,7 @@ error: if (pdata) { if (pdata->urb) { usb_kill_urb(pdata->urb); + cancel_delayed_work_sync(&pdata->work); if (pdata->urbdata) usb_free_coherent(pdata->udev, ACD_URB_BUFFER_LEN, pdata->urbdata, pdata->urb->transfer_dma); diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index eb0795c5ff7a..3a701c1e9e75 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -396,13 +396,17 @@ static int _chaoskey_fill(struct chaoskey *dev) !dev->reading, (started ? NAK_TIMEOUT : ALEA_FIRST_TIMEOUT) ); - if (result < 0) + if (result < 0) { + usb_kill_urb(dev->urb); goto out; + } - if (result == 0) + if (result == 0) { result = -ETIMEDOUT; - else + usb_kill_urb(dev->urb); + } else { result = dev->valid; + } out: /* Let the device go back to sleep eventually */ usb_autopm_put_interface(dev->interface); @@ -538,7 +542,21 @@ static int chaoskey_suspend(struct usb_interface *interface, static int chaoskey_resume(struct usb_interface *interface) { + struct chaoskey *dev; + struct usb_device *udev = interface_to_usbdev(interface); + usb_dbg(interface, "resume"); + dev = usb_get_intfdata(interface); + + /* + * We may have lost power. + * In that case the device that needs a long time + * for the first requests needs an extended timeout + * again + */ + if (le16_to_cpu(udev->descriptor.idVendor) == ALEA_VENDOR_ID) + dev->reads_started = false; + return 0; } #else diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c index 39d8fedfaf3b..01ef2551be46 100644 --- a/drivers/usb/misc/idmouse.c +++ b/drivers/usb/misc/idmouse.c @@ -341,7 +341,7 @@ static int idmouse_probe(struct usb_interface *interface, int result; /* check if we have gotten the data or the hid interface */ - iface_desc = &interface->altsetting[0]; + iface_desc = interface->cur_altsetting; if (iface_desc->desc.bInterfaceClass != 0x0A) return -ENODEV; diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 7f226cc3ef8a..1ec32e5aa004 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -32,6 +32,14 @@ #define USB_DEVICE_ID_CODEMERCS_IOWPV2 0x1512 /* full speed iowarrior */ #define USB_DEVICE_ID_CODEMERCS_IOW56 0x1503 +/* fuller speed iowarrior */ +#define USB_DEVICE_ID_CODEMERCS_IOW28 0x1504 +#define USB_DEVICE_ID_CODEMERCS_IOW28L 0x1505 +#define USB_DEVICE_ID_CODEMERCS_IOW100 0x1506 + +/* OEMed devices */ +#define USB_DEVICE_ID_CODEMERCS_IOW24SAG 0x158a +#define USB_DEVICE_ID_CODEMERCS_IOW56AM 0x158b /* Get a minor range for your devices from the usb maintainer */ #ifdef CONFIG_USB_DYNAMIC_MINORS @@ -137,6 +145,11 @@ static const struct usb_device_id iowarrior_ids[] = { {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV1)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOWPV2)}, {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW24SAG)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW56AM)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW28L)}, + {USB_DEVICE(USB_VENDOR_ID_CODEMERCS, USB_DEVICE_ID_CODEMERCS_IOW100)}, {} /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, iowarrior_ids); @@ -364,6 +377,7 @@ static ssize_t iowarrior_write(struct file *file, } switch (dev->product_id) { case USB_DEVICE_ID_CODEMERCS_IOW24: + case USB_DEVICE_ID_CODEMERCS_IOW24SAG: case USB_DEVICE_ID_CODEMERCS_IOWPV1: case USB_DEVICE_ID_CODEMERCS_IOWPV2: case USB_DEVICE_ID_CODEMERCS_IOW40: @@ -378,6 +392,10 @@ static ssize_t iowarrior_write(struct file *file, goto exit; break; case USB_DEVICE_ID_CODEMERCS_IOW56: + case USB_DEVICE_ID_CODEMERCS_IOW56AM: + case USB_DEVICE_ID_CODEMERCS_IOW28: + case USB_DEVICE_ID_CODEMERCS_IOW28L: + case USB_DEVICE_ID_CODEMERCS_IOW100: /* The IOW56 uses asynchronous IO and more urbs */ if (atomic_read(&dev->write_busy) == MAX_WRITES_IN_FLIGHT) { /* Wait until we are below the limit for submitted urbs */ @@ -502,6 +520,7 @@ static long iowarrior_ioctl(struct file *file, unsigned int cmd, switch (cmd) { case IOW_WRITE: if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24 || + dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW24SAG || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV1 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOWPV2 || dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW40) { @@ -786,7 +805,11 @@ static int iowarrior_probe(struct usb_interface *interface, goto error; } - if (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) { + if ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100)) { res = usb_find_last_int_out_endpoint(iface_desc, &dev->int_out_endpoint); if (res) { @@ -799,7 +822,11 @@ static int iowarrior_probe(struct usb_interface *interface, /* we have to check the report_size often, so remember it in the endianness suitable for our machine */ dev->report_size = usb_endpoint_maxp(dev->int_in_endpoint); if ((dev->interface->cur_altsetting->desc.bInterfaceNumber == 0) && - (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56)) + ((dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW56AM) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW28L) || + (dev->product_id == USB_DEVICE_ID_CODEMERCS_IOW100))) /* IOWarrior56 has wMaxPacketSize different from report size */ dev->report_size = 7; diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c index fa5cf349ae19..e16af177d467 100644 --- a/drivers/usb/misc/ldusb.c +++ b/drivers/usb/misc/ldusb.c @@ -383,10 +383,7 @@ static int ld_usb_release(struct inode *inode, struct file *file) goto exit; } - if (mutex_lock_interruptible(&dev->mutex)) { - retval = -ERESTARTSYS; - goto exit; - } + mutex_lock(&dev->mutex); if (dev->open_count != 1) { retval = -ENODEV; @@ -470,7 +467,7 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, /* wait for data */ spin_lock_irq(&dev->rbsl); - if (dev->ring_head == dev->ring_tail) { + while (dev->ring_head == dev->ring_tail) { dev->interrupt_in_done = 0; spin_unlock_irq(&dev->rbsl); if (file->f_flags & O_NONBLOCK) { @@ -480,15 +477,20 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, retval = wait_event_interruptible(dev->read_wait, dev->interrupt_in_done); if (retval < 0) goto unlock_exit; - } else { - spin_unlock_irq(&dev->rbsl); + + spin_lock_irq(&dev->rbsl); } + spin_unlock_irq(&dev->rbsl); /* actual_buffer contains actual_length + interrupt_in_buffer */ actual_buffer = (size_t *)(dev->ring_buffer + dev->ring_tail * (sizeof(size_t)+dev->interrupt_in_endpoint_size)); + if (*actual_buffer > dev->interrupt_in_endpoint_size) { + retval = -EIO; + goto unlock_exit; + } bytes_to_read = min(count, *actual_buffer); if (bytes_to_read < *actual_buffer) - dev_warn(&dev->intf->dev, "Read buffer overflow, %zd bytes dropped\n", + dev_warn(&dev->intf->dev, "Read buffer overflow, %zu bytes dropped\n", *actual_buffer-bytes_to_read); /* copy one interrupt_in_buffer from ring_buffer into userspace */ @@ -496,11 +498,11 @@ static ssize_t ld_usb_read(struct file *file, char __user *buffer, size_t count, retval = -EFAULT; goto unlock_exit; } - dev->ring_tail = (dev->ring_tail+1) % ring_buffer_size; - retval = bytes_to_read; spin_lock_irq(&dev->rbsl); + dev->ring_tail = (dev->ring_tail + 1) % ring_buffer_size; + if (dev->buffer_overflow) { dev->buffer_overflow = 0; spin_unlock_irq(&dev->rbsl); @@ -563,8 +565,9 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer, /* write the data into interrupt_out_buffer from userspace */ bytes_to_write = min(count, write_buffer_size*dev->interrupt_out_endpoint_size); if (bytes_to_write < count) - dev_warn(&dev->intf->dev, "Write buffer overflow, %zd bytes dropped\n", count-bytes_to_write); - dev_dbg(&dev->intf->dev, "%s: count = %zd, bytes_to_write = %zd\n", + dev_warn(&dev->intf->dev, "Write buffer overflow, %zu bytes dropped\n", + count - bytes_to_write); + dev_dbg(&dev->intf->dev, "%s: count = %zu, bytes_to_write = %zu\n", __func__, count, bytes_to_write); if (copy_from_user(dev->interrupt_out_buffer, buffer, bytes_to_write)) { @@ -581,7 +584,7 @@ static ssize_t ld_usb_write(struct file *file, const char __user *buffer, 1 << 8, 0, dev->interrupt_out_buffer, bytes_to_write, - USB_CTRL_SET_TIMEOUT * HZ); + USB_CTRL_SET_TIMEOUT); if (retval < 0) dev_err(&dev->intf->dev, "Couldn't submit HID_REQ_SET_REPORT %d\n", @@ -696,7 +699,9 @@ static int ld_usb_probe(struct usb_interface *intf, const struct usb_device_id * dev_warn(&intf->dev, "Interrupt out endpoint not found (using control endpoint instead)\n"); dev->interrupt_in_endpoint_size = usb_endpoint_maxp(dev->interrupt_in_endpoint); - dev->ring_buffer = kmalloc(ring_buffer_size*(sizeof(size_t)+dev->interrupt_in_endpoint_size), GFP_KERNEL); + dev->ring_buffer = kcalloc(ring_buffer_size, + sizeof(size_t) + dev->interrupt_in_endpoint_size, + GFP_KERNEL); if (!dev->ring_buffer) goto error; dev->interrupt_in_buffer = kmalloc(dev->interrupt_in_endpoint_size, GFP_KERNEL); diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c index 155615aadc9c..a1ed6be87471 100644 --- a/drivers/usb/misc/legousbtower.c +++ b/drivers/usb/misc/legousbtower.c @@ -423,10 +423,7 @@ static int tower_release (struct inode *inode, struct file *file) goto exit; } - if (mutex_lock_interruptible(&dev->lock)) { - retval = -ERESTARTSYS; - goto exit; - } + mutex_lock(&dev->lock); if (dev->open_count != 1) { dev_dbg(&dev->udev->dev, "%s: device not opened exactly once\n", @@ -884,7 +881,7 @@ static int tower_probe (struct usb_interface *interface, const struct usb_device get_version_reply, sizeof(*get_version_reply), 1000); - if (result < sizeof(*get_version_reply)) { + if (result != sizeof(*get_version_reply)) { if (result >= 0) result = -EIO; dev_err(idev, "get version request failed: %d\n", result); diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index f932f40302df..156aebf62e61 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -1038,12 +1038,18 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg mutex_lock(&rp->fetch_lock); spin_lock_irqsave(&rp->b_lock, flags); - mon_free_buff(rp->b_vec, rp->b_size/CHUNK_SIZE); - kfree(rp->b_vec); - rp->b_vec = vec; - rp->b_size = size; - rp->b_read = rp->b_in = rp->b_out = rp->b_cnt = 0; - rp->cnt_lost = 0; + if (rp->mmap_active) { + mon_free_buff(vec, size/CHUNK_SIZE); + kfree(vec); + ret = -EBUSY; + } else { + mon_free_buff(rp->b_vec, rp->b_size/CHUNK_SIZE); + kfree(rp->b_vec); + rp->b_vec = vec; + rp->b_size = size; + rp->b_read = rp->b_in = rp->b_out = rp->b_cnt = 0; + rp->cnt_lost = 0; + } spin_unlock_irqrestore(&rp->b_lock, flags); mutex_unlock(&rp->fetch_lock); } @@ -1215,13 +1221,21 @@ mon_bin_poll(struct file *file, struct poll_table_struct *wait) static void mon_bin_vma_open(struct vm_area_struct *vma) { struct mon_reader_bin *rp = vma->vm_private_data; + unsigned long flags; + + spin_lock_irqsave(&rp->b_lock, flags); rp->mmap_active++; + spin_unlock_irqrestore(&rp->b_lock, flags); } static void mon_bin_vma_close(struct vm_area_struct *vma) { + unsigned long flags; + struct mon_reader_bin *rp = vma->vm_private_data; + spin_lock_irqsave(&rp->b_lock, flags); rp->mmap_active--; + spin_unlock_irqrestore(&rp->b_lock, flags); } /* @@ -1233,16 +1247,12 @@ static int mon_bin_vma_fault(struct vm_fault *vmf) unsigned long offset, chunk_idx; struct page *pageptr; - mutex_lock(&rp->fetch_lock); offset = vmf->pgoff << PAGE_SHIFT; - if (offset >= rp->b_size) { - mutex_unlock(&rp->fetch_lock); + if (offset >= rp->b_size) return VM_FAULT_SIGBUS; - } chunk_idx = offset / CHUNK_SIZE; pageptr = rp->b_vec[chunk_idx].pg; get_page(pageptr); - mutex_unlock(&rp->fetch_lock); vmf->page = pageptr; return 0; } diff --git a/drivers/usb/mtu3/mtu3_qmu.c b/drivers/usb/mtu3/mtu3_qmu.c index 8f6430304f2e..5b45d631a9e6 100644 --- a/drivers/usb/mtu3/mtu3_qmu.c +++ b/drivers/usb/mtu3/mtu3_qmu.c @@ -393,7 +393,7 @@ static void qmu_tx_zlp_error_handler(struct mtu3 *mtu, u8 epnum) return; } - dev_dbg(mtu->dev, "%s send ZLP for req=%p\n", __func__, mreq); + dev_dbg(mtu->dev, "%s send ZLP for req=%p\n", __func__, req); mtu3_clrbits(mbase, MU3D_EP_TXCR0(mep->epnum), TX_DMAREQEN); diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index ff17e94ef465..dca39c9a13b0 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -1838,6 +1838,9 @@ static const struct attribute_group musb_attr_group = { #define MUSB_QUIRK_B_INVALID_VBUS_91 (MUSB_DEVCTL_BDEVICE | \ (2 << MUSB_DEVCTL_VBUS_SHIFT) | \ MUSB_DEVCTL_SESSION) +#define MUSB_QUIRK_B_DISCONNECT_99 (MUSB_DEVCTL_BDEVICE | \ + (3 << MUSB_DEVCTL_VBUS_SHIFT) | \ + MUSB_DEVCTL_SESSION) #define MUSB_QUIRK_A_DISCONNECT_19 ((3 << MUSB_DEVCTL_VBUS_SHIFT) | \ MUSB_DEVCTL_SESSION) @@ -1860,6 +1863,11 @@ static void musb_pm_runtime_check_session(struct musb *musb) s = MUSB_DEVCTL_FSDEV | MUSB_DEVCTL_LSDEV | MUSB_DEVCTL_HR; switch (devctl & ~s) { + case MUSB_QUIRK_B_DISCONNECT_99: + musb_dbg(musb, "Poll devctl in case of suspend after disconnect\n"); + schedule_delayed_work(&musb->irq_work, + msecs_to_jiffies(1000)); + break; case MUSB_QUIRK_B_INVALID_VBUS_91: if (musb->quirk_retries && !musb->flush_irq_work) { musb_dbg(musb, @@ -2320,6 +2328,9 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl) musb_disable_interrupts(musb); musb_writeb(musb->mregs, MUSB_DEVCTL, 0); + /* MUSB_POWER_SOFTCONN might be already set, JZ4740 does this. */ + musb_writeb(musb->mregs, MUSB_POWER, 0); + /* Init IRQ workqueue before request_irq */ INIT_DELAYED_WORK(&musb->irq_work, musb_irq_work); INIT_DELAYED_WORK(&musb->deassert_reset_work, musb_deassert_reset); diff --git a/drivers/usb/musb/musbhsdma.c b/drivers/usb/musb/musbhsdma.c index 512108e22d2b..1dc35ab31275 100644 --- a/drivers/usb/musb/musbhsdma.c +++ b/drivers/usb/musb/musbhsdma.c @@ -399,7 +399,7 @@ struct dma_controller *musbhs_dma_controller_create(struct musb *musb, controller->controller.channel_abort = dma_channel_abort; if (request_irq(irq, dma_controller_irq, 0, - dev_name(musb->controller), &controller->controller)) { + dev_name(musb->controller), controller)) { dev_err(dev, "request_irq %d failed!\n", irq); musb_dma_controller_destroy(&controller->controller); diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c index 456f3e6ecf03..26e69c2766f5 100644 --- a/drivers/usb/musb/omap2430.c +++ b/drivers/usb/musb/omap2430.c @@ -388,8 +388,6 @@ static const struct musb_platform_ops omap2430_ops = { .init = omap2430_musb_init, .exit = omap2430_musb_exit, - .set_vbus = omap2430_musb_set_vbus, - .enable = omap2430_musb_enable, .disable = omap2430_musb_disable, diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig index 16e2372271ca..75b714ea56e3 100644 --- a/drivers/usb/phy/Kconfig +++ b/drivers/usb/phy/Kconfig @@ -28,7 +28,7 @@ config AB8500_USB in host mode, low speed. config FSL_USB2_OTG - bool "Freescale USB OTG Transceiver Driver" + tristate "Freescale USB OTG Transceiver Driver" depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM=y && PM depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y' select USB_PHY diff --git a/drivers/usb/phy/phy-twl6030-usb.c b/drivers/usb/phy/phy-twl6030-usb.c index b5dc077ed7d3..8e14fa221191 100644 --- a/drivers/usb/phy/phy-twl6030-usb.c +++ b/drivers/usb/phy/phy-twl6030-usb.c @@ -413,7 +413,7 @@ static int twl6030_usb_remove(struct platform_device *pdev) { struct twl6030_usb *twl = platform_get_drvdata(pdev); - cancel_delayed_work(&twl->get_status_work); + cancel_delayed_work_sync(&twl->get_status_work); twl6030_interrupt_mask(TWL6030_USBOTG_INT_MASK, REG_INT_MSK_LINE_C); twl6030_interrupt_mask(TWL6030_USBOTG_INT_MASK, diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h index b8620aa6b72e..8424c165f732 100644 --- a/drivers/usb/renesas_usbhs/common.h +++ b/drivers/usb/renesas_usbhs/common.h @@ -163,11 +163,12 @@ struct usbhs_priv; #define VBSTS (1 << 7) /* VBUS_0 and VBUSIN_0 Input Status */ #define VALID (1 << 3) /* USB Request Receive */ -#define DVSQ_MASK (0x3 << 4) /* Device State */ +#define DVSQ_MASK (0x7 << 4) /* Device State */ #define POWER_STATE (0 << 4) #define DEFAULT_STATE (1 << 4) #define ADDRESS_STATE (2 << 4) #define CONFIGURATION_STATE (3 << 4) +#define SUSPENDED_STATE (4 << 4) #define CTSQ_MASK (0x7) /* Control Transfer Stage */ #define IDLE_SETUP_STAGE 0 /* Idle stage or setup stage */ diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 0dedb0d91dcc..b27f2135b66d 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -465,12 +465,18 @@ static int usbhsg_irq_dev_state(struct usbhs_priv *priv, { struct usbhsg_gpriv *gpriv = usbhsg_priv_to_gpriv(priv); struct device *dev = usbhsg_gpriv_to_dev(gpriv); + int state = usbhs_status_get_device_state(irq_state); gpriv->gadget.speed = usbhs_bus_get_speed(priv); - dev_dbg(dev, "state = %x : speed : %d\n", - usbhs_status_get_device_state(irq_state), - gpriv->gadget.speed); + dev_dbg(dev, "state = %x : speed : %d\n", state, gpriv->gadget.speed); + + if (gpriv->gadget.speed != USB_SPEED_UNKNOWN && + (state & SUSPENDED_STATE)) { + if (gpriv->driver && gpriv->driver->suspend) + gpriv->driver->suspend(&gpriv->gadget); + usb_gadget_set_state(&gpriv->gadget, USB_STATE_SUSPENDED); + } return 0; } diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 578596d301b8..31cd798d2dac 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -592,9 +592,13 @@ static int ch341_tiocmget(struct tty_struct *tty) static int ch341_reset_resume(struct usb_serial *serial) { struct usb_serial_port *port = serial->port[0]; - struct ch341_private *priv = usb_get_serial_port_data(port); + struct ch341_private *priv; int ret; + priv = usb_get_serial_port_data(port); + if (!priv) + return 0; + /* reconfigure ch341 serial port after bus-reset */ ch341_configure(serial->dev, priv); diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 98e466c3cfca..8dd9852f399d 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -128,6 +128,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */ { USB_DEVICE(0x10C4, 0x8382) }, /* Cygnal Integrated Products, Inc. */ { USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */ + { USB_DEVICE(0x10C4, 0x83AA) }, /* Mark-10 Digital Force Gauge */ { USB_DEVICE(0x10C4, 0x83D8) }, /* DekTec DTA Plus VHF/UHF Booster/Attenuator */ { USB_DEVICE(0x10C4, 0x8411) }, /* Kyocera GPS Module */ { USB_DEVICE(0x10C4, 0x8418) }, /* IRZ Automation Teleport SG-10 GSM/GPRS Modem */ diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c index 90110de715e0..d0aa4c853f56 100644 --- a/drivers/usb/serial/cypress_m8.c +++ b/drivers/usb/serial/cypress_m8.c @@ -773,7 +773,7 @@ send: usb_fill_int_urb(port->interrupt_out_urb, port->serial->dev, usb_sndintpipe(port->serial->dev, port->interrupt_out_endpointAddress), - port->interrupt_out_buffer, port->interrupt_out_size, + port->interrupt_out_buffer, actual_size, cypress_write_int_callback, port, priv->write_urb_interval); result = usb_submit_urb(port->interrupt_out_urb, GFP_ATOMIC); if (result) { diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 8b5c99df0f2b..a962065227c4 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1028,6 +1028,9 @@ static const struct usb_device_id id_table_combined[] = { /* Sienna devices */ { USB_DEVICE(FTDI_VID, FTDI_SIENNA_PID) }, { USB_DEVICE(ECHELON_VID, ECHELON_U20_PID) }, + /* U-Blox devices */ + { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ZED_PID) }, + { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ODIN_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 22d66217cb41..e8373528264c 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1558,3 +1558,10 @@ */ #define UNJO_VID 0x22B7 #define UNJO_ISODEBUG_V1_PID 0x150D + +/* + * U-Blox products (http://www.u-blox.com). + */ +#define UBLOX_VID 0x1546 +#define UBLOX_C099F9P_ZED_PID 0x0502 +#define UBLOX_C099F9P_ODIN_PID 0x0503 diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index 01f3ac7769f3..3705b64ab948 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -652,6 +652,7 @@ static void edge_interrupt_callback(struct urb *urb) struct usb_serial_port *port; unsigned char *data = urb->transfer_buffer; int length = urb->actual_length; + unsigned long flags; int bytes_avail; int position; int txCredits; @@ -683,7 +684,7 @@ static void edge_interrupt_callback(struct urb *urb) if (length > 1) { bytes_avail = data[0] | (data[1] << 8); if (bytes_avail) { - spin_lock(&edge_serial->es_lock); + spin_lock_irqsave(&edge_serial->es_lock, flags); edge_serial->rxBytesAvail += bytes_avail; dev_dbg(dev, "%s - bytes_avail=%d, rxBytesAvail=%d, read_in_progress=%d\n", @@ -706,7 +707,8 @@ static void edge_interrupt_callback(struct urb *urb) edge_serial->read_in_progress = false; } } - spin_unlock(&edge_serial->es_lock); + spin_unlock_irqrestore(&edge_serial->es_lock, + flags); } } /* grab the txcredits for the ports if available */ @@ -718,10 +720,12 @@ static void edge_interrupt_callback(struct urb *urb) if (txCredits) { port = edge_serial->serial->port[portNumber]; edge_port = usb_get_serial_port_data(port); - if (edge_port->open) { - spin_lock(&edge_port->ep_lock); + if (edge_port && edge_port->open) { + spin_lock_irqsave(&edge_port->ep_lock, + flags); edge_port->txCredits += txCredits; - spin_unlock(&edge_port->ep_lock); + spin_unlock_irqrestore(&edge_port->ep_lock, + flags); dev_dbg(dev, "%s - txcredits for port%d = %d\n", __func__, portNumber, edge_port->txCredits); @@ -762,6 +766,7 @@ static void edge_bulk_in_callback(struct urb *urb) int retval; __u16 raw_data_length; int status = urb->status; + unsigned long flags; if (status) { dev_dbg(&urb->dev->dev, "%s - nonzero read bulk status received: %d\n", @@ -781,7 +786,7 @@ static void edge_bulk_in_callback(struct urb *urb) usb_serial_debug_data(dev, __func__, raw_data_length, data); - spin_lock(&edge_serial->es_lock); + spin_lock_irqsave(&edge_serial->es_lock, flags); /* decrement our rxBytes available by the number that we just got */ edge_serial->rxBytesAvail -= raw_data_length; @@ -805,7 +810,7 @@ static void edge_bulk_in_callback(struct urb *urb) edge_serial->read_in_progress = false; } - spin_unlock(&edge_serial->es_lock); + spin_unlock_irqrestore(&edge_serial->es_lock, flags); } @@ -1733,7 +1738,8 @@ static void edge_break(struct tty_struct *tty, int break_state) static void process_rcvd_data(struct edgeport_serial *edge_serial, unsigned char *buffer, __u16 bufferLength) { - struct device *dev = &edge_serial->serial->dev->dev; + struct usb_serial *serial = edge_serial->serial; + struct device *dev = &serial->dev->dev; struct usb_serial_port *port; struct edgeport_port *edge_port; __u16 lastBufferLength; @@ -1838,11 +1844,10 @@ static void process_rcvd_data(struct edgeport_serial *edge_serial, /* spit this data back into the tty driver if this port is open */ - if (rxLen) { - port = edge_serial->serial->port[ - edge_serial->rxPort]; + if (rxLen && edge_serial->rxPort < serial->num_ports) { + port = serial->port[edge_serial->rxPort]; edge_port = usb_get_serial_port_data(port); - if (edge_port->open) { + if (edge_port && edge_port->open) { dev_dbg(dev, "%s - Sending %d bytes to TTY for port %d\n", __func__, rxLen, edge_serial->rxPort); @@ -1850,8 +1855,8 @@ static void process_rcvd_data(struct edgeport_serial *edge_serial, rxLen); edge_port->port->icount.rx += rxLen; } - buffer += rxLen; } + buffer += rxLen; break; case EXPECT_HDR3: /* Expect 3rd byte of status header */ @@ -1886,6 +1891,8 @@ static void process_rcvd_status(struct edgeport_serial *edge_serial, __u8 code = edge_serial->rxStatusCode; /* switch the port pointer to the one being currently talked about */ + if (edge_serial->rxPort >= edge_serial->serial->num_ports) + return; port = edge_serial->serial->port[edge_serial->rxPort]; edge_port = usb_get_serial_port_data(port); if (edge_port == NULL) { @@ -2918,16 +2925,18 @@ static int edge_startup(struct usb_serial *serial) response = 0; if (edge_serial->is_epic) { + struct usb_host_interface *alt; + + alt = serial->interface->cur_altsetting; + /* EPIC thing, set up our interrupt polling now and our read * urb, so that the device knows it really is connected. */ interrupt_in_found = bulk_in_found = bulk_out_found = false; - for (i = 0; i < serial->interface->altsetting[0] - .desc.bNumEndpoints; ++i) { + for (i = 0; i < alt->desc.bNumEndpoints; ++i) { struct usb_endpoint_descriptor *endpoint; int buffer_size; - endpoint = &serial->interface->altsetting[0]. - endpoint[i].desc; + endpoint = &alt->endpoint[i].desc; buffer_size = usb_endpoint_maxp(endpoint); if (!interrupt_in_found && (usb_endpoint_is_int_in(endpoint))) { diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c index f9734a96d516..a3e3b4703f38 100644 --- a/drivers/usb/serial/ir-usb.c +++ b/drivers/usb/serial/ir-usb.c @@ -49,9 +49,10 @@ static int buffer_size; static int xbof = -1; static int ir_startup (struct usb_serial *serial); -static int ir_open(struct tty_struct *tty, struct usb_serial_port *port); -static int ir_prepare_write_buffer(struct usb_serial_port *port, - void *dest, size_t size); +static int ir_write(struct tty_struct *tty, struct usb_serial_port *port, + const unsigned char *buf, int count); +static int ir_write_room(struct tty_struct *tty); +static void ir_write_bulk_callback(struct urb *urb); static void ir_process_read_urb(struct urb *urb); static void ir_set_termios(struct tty_struct *tty, struct usb_serial_port *port, struct ktermios *old_termios); @@ -81,8 +82,9 @@ static struct usb_serial_driver ir_device = { .num_ports = 1, .set_termios = ir_set_termios, .attach = ir_startup, - .open = ir_open, - .prepare_write_buffer = ir_prepare_write_buffer, + .write = ir_write, + .write_room = ir_write_room, + .write_bulk_callback = ir_write_bulk_callback, .process_read_urb = ir_process_read_urb, }; @@ -199,6 +201,9 @@ static int ir_startup(struct usb_serial *serial) struct usb_irda_cs_descriptor *irda_desc; int rates; + if (serial->num_bulk_in < 1 || serial->num_bulk_out < 1) + return -ENODEV; + irda_desc = irda_usb_find_class_desc(serial, 0); if (!irda_desc) { dev_err(&serial->dev->dev, @@ -255,35 +260,102 @@ static int ir_startup(struct usb_serial *serial) return 0; } -static int ir_open(struct tty_struct *tty, struct usb_serial_port *port) +static int ir_write(struct tty_struct *tty, struct usb_serial_port *port, + const unsigned char *buf, int count) { - int i; + struct urb *urb = NULL; + unsigned long flags; + int ret; - for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) - port->write_urbs[i]->transfer_flags = URB_ZERO_PACKET; + if (port->bulk_out_size == 0) + return -EINVAL; - /* Start reading from the device */ - return usb_serial_generic_open(tty, port); -} + if (count == 0) + return 0; -static int ir_prepare_write_buffer(struct usb_serial_port *port, - void *dest, size_t size) -{ - unsigned char *buf = dest; - int count; + count = min(count, port->bulk_out_size - 1); + + spin_lock_irqsave(&port->lock, flags); + if (__test_and_clear_bit(0, &port->write_urbs_free)) { + urb = port->write_urbs[0]; + port->tx_bytes += count; + } + spin_unlock_irqrestore(&port->lock, flags); + + if (!urb) + return 0; /* * The first byte of the packet we send to the device contains an - * inbound header which indicates an additional number of BOFs and + * outbound header which indicates an additional number of BOFs and * a baud rate change. * * See section 5.4.2.2 of the USB IrDA spec. */ - *buf = ir_xbof | ir_baud; + *(u8 *)urb->transfer_buffer = ir_xbof | ir_baud; - count = kfifo_out_locked(&port->write_fifo, buf + 1, size - 1, - &port->lock); - return count + 1; + memcpy(urb->transfer_buffer + 1, buf, count); + + urb->transfer_buffer_length = count + 1; + urb->transfer_flags = URB_ZERO_PACKET; + + ret = usb_submit_urb(urb, GFP_ATOMIC); + if (ret) { + dev_err(&port->dev, "failed to submit write urb: %d\n", ret); + + spin_lock_irqsave(&port->lock, flags); + __set_bit(0, &port->write_urbs_free); + port->tx_bytes -= count; + spin_unlock_irqrestore(&port->lock, flags); + + return ret; + } + + return count; +} + +static void ir_write_bulk_callback(struct urb *urb) +{ + struct usb_serial_port *port = urb->context; + int status = urb->status; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + __set_bit(0, &port->write_urbs_free); + port->tx_bytes -= urb->transfer_buffer_length - 1; + spin_unlock_irqrestore(&port->lock, flags); + + switch (status) { + case 0: + break; + case -ENOENT: + case -ECONNRESET: + case -ESHUTDOWN: + dev_dbg(&port->dev, "write urb stopped: %d\n", status); + return; + case -EPIPE: + dev_err(&port->dev, "write urb stopped: %d\n", status); + return; + default: + dev_err(&port->dev, "nonzero write-urb status: %d\n", status); + break; + } + + usb_serial_port_softint(port); +} + +static int ir_write_room(struct tty_struct *tty) +{ + struct usb_serial_port *port = tty->driver_data; + int count = 0; + + if (port->bulk_out_size == 0) + return 0; + + if (test_bit(0, &port->write_urbs_free)) + count = port->bulk_out_size - 1; + + return count; } static void ir_process_read_urb(struct urb *urb) @@ -336,34 +408,34 @@ static void ir_set_termios(struct tty_struct *tty, switch (baud) { case 2400: - ir_baud = USB_IRDA_BR_2400; + ir_baud = USB_IRDA_LS_2400; break; case 9600: - ir_baud = USB_IRDA_BR_9600; + ir_baud = USB_IRDA_LS_9600; break; case 19200: - ir_baud = USB_IRDA_BR_19200; + ir_baud = USB_IRDA_LS_19200; break; case 38400: - ir_baud = USB_IRDA_BR_38400; + ir_baud = USB_IRDA_LS_38400; break; case 57600: - ir_baud = USB_IRDA_BR_57600; + ir_baud = USB_IRDA_LS_57600; break; case 115200: - ir_baud = USB_IRDA_BR_115200; + ir_baud = USB_IRDA_LS_115200; break; case 576000: - ir_baud = USB_IRDA_BR_576000; + ir_baud = USB_IRDA_LS_576000; break; case 1152000: - ir_baud = USB_IRDA_BR_1152000; + ir_baud = USB_IRDA_LS_1152000; break; case 4000000: - ir_baud = USB_IRDA_BR_4000000; + ir_baud = USB_IRDA_LS_4000000; break; default: - ir_baud = USB_IRDA_BR_9600; + ir_baud = USB_IRDA_LS_9600; baud = 9600; } diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index 2c5a53bdccd4..55a768487990 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -1062,6 +1062,8 @@ static void usa49_glocont_callback(struct urb *urb) for (i = 0; i < serial->num_ports; ++i) { port = serial->port[i]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + continue; if (p_priv->resend_cont) { dev_dbg(&port->dev, "%s - sending setup\n", __func__); @@ -1463,6 +1465,8 @@ static void usa67_glocont_callback(struct urb *urb) for (i = 0; i < serial->num_ports; ++i) { port = serial->port[i]; p_priv = usb_get_serial_port_data(port); + if (!p_priv) + continue; if (p_priv->resend_cont) { dev_dbg(&port->dev, "%s - sending setup\n", __func__); diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 393a91ab56ed..37967f4d93fd 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -1905,10 +1905,6 @@ static int mos7720_startup(struct usb_serial *serial) product = le16_to_cpu(serial->dev->descriptor.idProduct); dev = serial->dev; - /* setting configuration feature to one */ - usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), - (__u8)0x03, 0x00, 0x01, 0x00, NULL, 0x00, 5000); - if (product == MOSCHIP_DEVICE_ID_7715) { struct urb *urb = serial->port[0]->interrupt_in_urb; diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 5e490177cf75..285527f115dd 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -131,11 +131,15 @@ /* This driver also supports * ATEN UC2324 device using Moschip MCS7840 * ATEN UC2322 device using Moschip MCS7820 + * MOXA UPort 2210 device using Moschip MCS7820 */ #define USB_VENDOR_ID_ATENINTL 0x0557 #define ATENINTL_DEVICE_ID_UC2324 0x2011 #define ATENINTL_DEVICE_ID_UC2322 0x7820 +#define USB_VENDOR_ID_MOXA 0x110a +#define MOXA_DEVICE_ID_2210 0x2210 + /* Interrupt Routine Defines */ #define SERIAL_IIR_RLS 0x06 @@ -206,6 +210,7 @@ static const struct usb_device_id id_table[] = { {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL2_4)}, {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2324)}, {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2322)}, + {USB_DEVICE(USB_VENDOR_ID_MOXA, MOXA_DEVICE_ID_2210)}, {} /* terminating entry */ }; MODULE_DEVICE_TABLE(usb, id_table); @@ -2065,6 +2070,7 @@ static int mos7840_probe(struct usb_serial *serial, const struct usb_device_id *id) { u16 product = le16_to_cpu(serial->dev->descriptor.idProduct); + u16 vid = le16_to_cpu(serial->dev->descriptor.idVendor); u8 *buf; int device_type; @@ -2074,6 +2080,11 @@ static int mos7840_probe(struct usb_serial *serial, goto out; } + if (vid == USB_VENDOR_ID_MOXA && product == MOXA_DEVICE_ID_2210) { + device_type = MOSCHIP_DEVICE_ID_7820; + goto out; + } + buf = kzalloc(VENDOR_READ_LENGTH, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -2327,11 +2338,6 @@ out: goto error; } else dev_dbg(&port->dev, "ZLP_REG5 Writing success status%d\n", status); - - /* setting configuration feature to one */ - usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), - 0x03, 0x00, 0x01, 0x00, NULL, 0x00, - MOS_WDR_TIMEOUT); } return 0; error: diff --git a/drivers/usb/serial/opticon.c b/drivers/usb/serial/opticon.c index 58657d64678b..c37572a8bb06 100644 --- a/drivers/usb/serial/opticon.c +++ b/drivers/usb/serial/opticon.c @@ -116,7 +116,7 @@ static int send_control_msg(struct usb_serial_port *port, u8 requesttype, retval = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), requesttype, USB_DIR_OUT|USB_TYPE_VENDOR|USB_RECIP_INTERFACE, - 0, 0, buffer, 1, 0); + 0, 0, buffer, 1, USB_CTRL_SET_TIMEOUT); kfree(buffer); if (retval < 0) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index b9fad046828d..eff353de47cd 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -200,6 +200,7 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5804_MINICARD_ATT 0x819b /* Novatel E371 */ #define DELL_PRODUCT_5821E 0x81d7 +#define DELL_PRODUCT_5821E_ESIM 0x81e0 #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da @@ -250,6 +251,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_BG96 0x0296 #define QUECTEL_PRODUCT_EP06 0x0306 #define QUECTEL_PRODUCT_EM12 0x0512 +#define QUECTEL_PRODUCT_RM500Q 0x0800 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -569,6 +571,9 @@ static void option_instat_callback(struct urb *urb); /* Interface must have two endpoints */ #define NUMEP2 BIT(16) +/* Device needs ZLP */ +#define ZLP BIT(17) + static const struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_COLT) }, @@ -1047,6 +1052,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(DELL_VENDOR_ID, DELL_PRODUCT_5804_MINICARD_ATT, 0xff, 0xff, 0xff) }, { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E), .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E_ESIM), + .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) }, @@ -1101,6 +1108,11 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x30) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), + .driver_info = ZLP }, + { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), @@ -1172,6 +1184,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1102, 0xff), /* Telit ME910 (ECM) */ .driver_info = NCTRL(0) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x110a, 0xff), /* Telit ME910G1 */ + .driver_info = NCTRL(0) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), @@ -1196,6 +1210,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff), /* Telit LN940 (MBIM) */ .driver_info = NCTRL(0) }, + { USB_DEVICE(TELIT_VENDOR_ID, 0x9010), /* Telit SBL FN980 flashing device */ + .driver_info = NCTRL(0) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff), .driver_info = RSVD(1) }, @@ -1992,6 +2008,10 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x13) }, { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x14) }, { USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0xa31d, 0xff, 0x06, 0x1b) }, + { USB_DEVICE(0x0489, 0xe0b4), /* Foxconn T77W968 */ + .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, + { USB_DEVICE(0x0489, 0xe0b5), /* Foxconn T77W968 ESIM */ + .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(0x1508, 0x1001), /* Fibocom NL668 */ .driver_info = RSVD(4) | RSVD(5) | RSVD(6) }, { USB_DEVICE(0x2cb7, 0x0104), /* Fibocom NL678 series */ @@ -2100,6 +2120,9 @@ static int option_attach(struct usb_serial *serial) if (!(device_flags & NCTRL(iface_desc->bInterfaceNumber))) data->use_send_setup = 1; + if (device_flags & ZLP) + data->use_zlp = 1; + spin_lock_init(&data->susp_lock); usb_set_serial_data(serial, data); diff --git a/drivers/usb/serial/quatech2.c b/drivers/usb/serial/quatech2.c index 60e17d1444c3..f16e0b8c1ed4 100644 --- a/drivers/usb/serial/quatech2.c +++ b/drivers/usb/serial/quatech2.c @@ -867,7 +867,10 @@ static void qt2_update_msr(struct usb_serial_port *port, unsigned char *ch) u8 newMSR = (u8) *ch; unsigned long flags; + /* May be called from qt2_process_read_urb() for an unbound port. */ port_priv = usb_get_serial_port_data(port); + if (!port_priv) + return; spin_lock_irqsave(&port_priv->lock, flags); port_priv->shadowMSR = newMSR; @@ -895,7 +898,10 @@ static void qt2_update_lsr(struct usb_serial_port *port, unsigned char *ch) unsigned long flags; u8 newLSR = (u8) *ch; + /* May be called from qt2_process_read_urb() for an unbound port. */ port_priv = usb_get_serial_port_data(port); + if (!port_priv) + return; if (newLSR & UART_LSR_BI) newLSR &= (u8) (UART_LSR_OE | UART_LSR_BI); diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index 57e9f6617084..98c22ace784a 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -780,7 +780,6 @@ static void ti_close(struct usb_serial_port *port) struct ti_port *tport; int port_number; int status; - int do_unlock; unsigned long flags; tdev = usb_get_serial_data(port->serial); @@ -804,16 +803,13 @@ static void ti_close(struct usb_serial_port *port) "%s - cannot send close port command, %d\n" , __func__, status); - /* if mutex_lock is interrupted, continue anyway */ - do_unlock = !mutex_lock_interruptible(&tdev->td_open_close_lock); + mutex_lock(&tdev->td_open_close_lock); --tport->tp_tdev->td_open_port_count; - if (tport->tp_tdev->td_open_port_count <= 0) { + if (tport->tp_tdev->td_open_port_count == 0) { /* last port is closed, shut down interrupt urb */ usb_kill_urb(port->serial->port[0]->interrupt_in_urb); - tport->tp_tdev->td_open_port_count = 0; } - if (do_unlock) - mutex_unlock(&tdev->td_open_close_lock); + mutex_unlock(&tdev->td_open_close_lock); } diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 511242111403..15e05ebf37ac 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -89,6 +89,8 @@ DEVICE(moto_modem, MOTO_IDS); #define MOTOROLA_TETRA_IDS() \ { USB_DEVICE(0x0cad, 0x9011) }, /* Motorola Solutions TETRA PEI */ \ { USB_DEVICE(0x0cad, 0x9012) }, /* MTP6550 */ \ + { USB_DEVICE(0x0cad, 0x9013) }, /* MTP3xxx */ \ + { USB_DEVICE(0x0cad, 0x9015) }, /* MTP85xx */ \ { USB_DEVICE(0x0cad, 0x9016) } /* TPG2200 */ DEVICE(motorola_tetra, MOTOROLA_TETRA_IDS); diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 8115b7cccf1a..3dc3464626fb 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -1332,6 +1332,9 @@ static int usb_serial_register(struct usb_serial_driver *driver) return -EINVAL; } + /* Prevent individual ports from being unbound. */ + driver->driver.suppress_bind_attrs = true; + usb_serial_operations_init(driver); /* Add this device to our list of devices */ diff --git a/drivers/usb/serial/usb-wwan.h b/drivers/usb/serial/usb-wwan.h index d28dab4b9eff..9879773fb39e 100644 --- a/drivers/usb/serial/usb-wwan.h +++ b/drivers/usb/serial/usb-wwan.h @@ -36,6 +36,7 @@ struct usb_wwan_intf_private { spinlock_t susp_lock; unsigned int suspended:1; unsigned int use_send_setup:1; + unsigned int use_zlp:1; int in_flight; unsigned int open_ports; void *private; diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c index 59bfcb3da116..95e9576c2fe6 100644 --- a/drivers/usb/serial/usb_wwan.c +++ b/drivers/usb/serial/usb_wwan.c @@ -492,6 +492,7 @@ static struct urb *usb_wwan_setup_urb(struct usb_serial_port *port, void (*callback) (struct urb *)) { struct usb_serial *serial = port->serial; + struct usb_wwan_intf_private *intfdata = usb_get_serial_data(serial); struct urb *urb; urb = usb_alloc_urb(0, GFP_KERNEL); /* No ISO */ @@ -502,6 +503,9 @@ static struct urb *usb_wwan_setup_urb(struct usb_serial_port *port, usb_sndbulkpipe(serial->dev, endpoint) | dir, buf, len, callback, ctx); + if (intfdata->use_zlp && dir == USB_DIR_OUT) + urb->transfer_flags |= URB_ZERO_PACKET; + return urb; } diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c index 55cebc1e6fec..163ede42af20 100644 --- a/drivers/usb/serial/whiteheat.c +++ b/drivers/usb/serial/whiteheat.c @@ -575,6 +575,10 @@ static int firm_send_command(struct usb_serial_port *port, __u8 command, command_port = port->serial->port[COMMAND_PORT]; command_info = usb_get_serial_port_data(command_port); + + if (command_port->bulk_out_size < datasize + 1) + return -EIO; + mutex_lock(&command_info->mutex); command_info->command_finished = false; @@ -648,6 +652,7 @@ static void firm_setup_port(struct tty_struct *tty) struct device *dev = &port->dev; struct whiteheat_port_settings port_settings; unsigned int cflag = tty->termios.c_cflag; + speed_t baud; port_settings.port = port->port_number + 1; @@ -708,11 +713,13 @@ static void firm_setup_port(struct tty_struct *tty) dev_dbg(dev, "%s - XON = %2x, XOFF = %2x\n", __func__, port_settings.xon, port_settings.xoff); /* get the baud rate wanted */ - port_settings.baud = tty_get_baud_rate(tty); - dev_dbg(dev, "%s - baud rate = %d\n", __func__, port_settings.baud); + baud = tty_get_baud_rate(tty); + port_settings.baud = cpu_to_le32(baud); + dev_dbg(dev, "%s - baud rate = %u\n", __func__, baud); /* fixme: should set validated settings */ - tty_encode_baud_rate(tty, port_settings.baud, port_settings.baud); + tty_encode_baud_rate(tty, baud, baud); + /* handle any settings that aren't specified in the tty structure */ port_settings.lloop = 0; diff --git a/drivers/usb/serial/whiteheat.h b/drivers/usb/serial/whiteheat.h index 38065df4d2d8..30169c859a74 100644 --- a/drivers/usb/serial/whiteheat.h +++ b/drivers/usb/serial/whiteheat.h @@ -91,7 +91,7 @@ struct whiteheat_simple { struct whiteheat_port_settings { __u8 port; /* port number (1 to N) */ - __u32 baud; /* any value 7 - 460800, firmware calculates + __le32 baud; /* any value 7 - 460800, firmware calculates best fit; arrives little endian */ __u8 bits; /* 5, 6, 7, or 8 */ __u8 stop; /* 1 or 2, default 1 (2 = 1.5 if bits = 5) */ diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index afb4b0bf47b3..fd5398efce41 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -81,7 +81,6 @@ static const char* host_info(struct Scsi_Host *host) static int slave_alloc (struct scsi_device *sdev) { struct us_data *us = host_to_us(sdev->host); - int maxp; /* * Set the INQUIRY transfer length to 36. We don't use any of @@ -90,15 +89,6 @@ static int slave_alloc (struct scsi_device *sdev) */ sdev->inquiry_len = 36; - /* - * USB has unusual scatter-gather requirements: the length of each - * scatterlist element except the last must be divisible by the - * Bulk maxpacket value. Fortunately this value is always a - * power of 2. Inform the block layer about this requirement. - */ - maxp = usb_maxpacket(us->pusb_dev, us->recv_bulk_pipe, 0); - blk_queue_virt_boundary(sdev->request_queue, maxp - 1); - /* * Some host controllers may have alignment requirements. * We'll play it safe by requiring 512-byte alignment always. diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 21c8925a4116..9d97543449e6 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -46,6 +46,7 @@ struct uas_dev_info { struct scsi_cmnd *cmnd[MAX_CMNDS]; spinlock_t lock; struct work_struct work; + struct work_struct scan_work; /* for async scanning */ }; enum { @@ -115,6 +116,17 @@ out: spin_unlock_irqrestore(&devinfo->lock, flags); } +static void uas_scan_work(struct work_struct *work) +{ + struct uas_dev_info *devinfo = + container_of(work, struct uas_dev_info, scan_work); + struct Scsi_Host *shost = usb_get_intfdata(devinfo->intf); + + dev_dbg(&devinfo->intf->dev, "starting scan\n"); + scsi_scan_host(shost); + dev_dbg(&devinfo->intf->dev, "scan complete\n"); +} + static void uas_add_work(struct uas_cmd_info *cmdinfo) { struct scsi_pointer *scp = (void *)cmdinfo; @@ -796,29 +808,9 @@ static int uas_slave_alloc(struct scsi_device *sdev) { struct uas_dev_info *devinfo = (struct uas_dev_info *)sdev->host->hostdata; - int maxp; sdev->hostdata = devinfo; - /* - * We have two requirements here. We must satisfy the requirements - * of the physical HC and the demands of the protocol, as we - * definitely want no additional memory allocation in this path - * ruling out using bounce buffers. - * - * For a transmission on USB to continue we must never send - * a package that is smaller than maxpacket. Hence the length of each - * scatterlist element except the last must be divisible by the - * Bulk maxpacket value. - * If the HC does not ensure that through SG, - * the upper layer must do that. We must assume nothing - * about the capabilities off the HC, so we use the most - * pessimistic requirement. - */ - - maxp = usb_maxpacket(devinfo->udev, devinfo->data_in_pipe, 0); - blk_queue_virt_boundary(sdev->request_queue, maxp - 1); - /* * The protocol has no requirements on alignment in the strict sense. * Controllers may or may not have alignment restrictions. @@ -852,6 +844,10 @@ static int uas_slave_configure(struct scsi_device *sdev) sdev->wce_default_on = 1; } + /* Some disks cannot handle READ_CAPACITY_16 */ + if (devinfo->flags & US_FL_NO_READ_CAPACITY_16) + sdev->no_read_capacity_16 = 1; + /* * Some disks return the total number of blocks in response * to READ CAPACITY rather than the highest block number. @@ -860,6 +856,12 @@ static int uas_slave_configure(struct scsi_device *sdev) if (devinfo->flags & US_FL_FIX_CAPACITY) sdev->fix_capacity = 1; + /* + * in some cases we have to guess + */ + if (devinfo->flags & US_FL_CAPACITY_HEURISTICS) + sdev->guess_capacity = 1; + /* * Some devices don't like MODE SENSE with page=0x3f, * which is the command used for checking if a device @@ -999,6 +1001,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) init_usb_anchor(&devinfo->data_urbs); spin_lock_init(&devinfo->lock); INIT_WORK(&devinfo->work, uas_do_work); + INIT_WORK(&devinfo->scan_work, uas_scan_work); result = uas_configure_endpoints(devinfo); if (result) @@ -1015,7 +1018,9 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id) if (result) goto free_streams; - scsi_scan_host(shost); + /* Submit the delayed_work for SCSI-device scanning */ + schedule_work(&devinfo->scan_work); + return result; free_streams: @@ -1183,6 +1188,12 @@ static void uas_disconnect(struct usb_interface *intf) usb_kill_anchored_urbs(&devinfo->data_urbs); uas_zap_pending(devinfo, DID_NO_CONNECT); + /* + * Prevent SCSI scanning (if it hasn't started yet) + * or wait for the SCSI-scanning routine to stop. + */ + cancel_work_sync(&devinfo->scan_work); + scsi_remove_host(shost); uas_free_streams(devinfo); scsi_host_put(shost); diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index fb69cb64f7d4..df8ee83c3f1a 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -1277,6 +1277,12 @@ UNUSUAL_DEV( 0x090a, 0x1200, 0x0000, 0x9999, USB_SC_RBC, USB_PR_BULK, NULL, 0 ), +UNUSUAL_DEV(0x090c, 0x1000, 0x1100, 0x1100, + "Samsung", + "Flash Drive FIT", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_MAX_SECTORS_64), + /* aeb */ UNUSUAL_DEV( 0x090c, 0x1132, 0x0000, 0xffff, "Feiya", diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index f15aa47c54a9..0eb8c67ee138 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -163,12 +163,15 @@ UNUSUAL_DEV(0x2537, 0x1068, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_IGNORE_UAS), -/* Reported-by: Takeo Nakayama */ +/* + * Initially Reported-by: Takeo Nakayama + * UAS Ignore Reported by Steven Ellis + */ UNUSUAL_DEV(0x357d, 0x7788, 0x0000, 0x9999, "JMicron", "JMS566", USB_SC_DEVICE, USB_PR_DEVICE, NULL, - US_FL_NO_REPORT_OPCODES), + US_FL_NO_REPORT_OPCODES | US_FL_IGNORE_UAS), /* Reported-by: Hans de Goede */ UNUSUAL_DEV(0x4971, 0x1012, 0x0000, 0x9999, diff --git a/drivers/usb/usbip/Kconfig b/drivers/usb/usbip/Kconfig index a20b65cb6678..8276a20ecea7 100644 --- a/drivers/usb/usbip/Kconfig +++ b/drivers/usb/usbip/Kconfig @@ -2,6 +2,7 @@ config USBIP_CORE tristate "USB/IP support" depends on NET select USB_COMMON + select SGL_ALLOC ---help--- This enables pushing USB packets over IP to allow remote machines direct access to USB devices. It provides the diff --git a/drivers/usb/usbip/stub.h b/drivers/usb/usbip/stub.h index 84c0599b45b7..d9d14d875949 100644 --- a/drivers/usb/usbip/stub.h +++ b/drivers/usb/usbip/stub.h @@ -66,7 +66,11 @@ struct stub_priv { unsigned long seqnum; struct list_head list; struct stub_device *sdev; - struct urb *urb; + struct urb **urbs; + struct scatterlist *sgl; + int num_urbs; + int completed_urbs; + int urb_status; int unlinking; }; @@ -100,6 +104,7 @@ extern struct usb_device_driver stub_driver; struct bus_id_priv *get_busid_priv(const char *busid); void put_busid_priv(struct bus_id_priv *bid); int del_match_busid(char *busid); +void stub_free_priv_and_urb(struct stub_priv *priv); void stub_device_cleanup_urbs(struct stub_device *sdev); /* stub_rx.c */ diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index 108dd65fbfbc..2dc662cf0694 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "usbip_common.h" #include "stub.h" @@ -297,13 +298,49 @@ static struct stub_priv *stub_priv_pop_from_listhead(struct list_head *listhead) struct stub_priv *priv, *tmp; list_for_each_entry_safe(priv, tmp, listhead, list) { - list_del(&priv->list); + list_del_init(&priv->list); return priv; } return NULL; } +void stub_free_priv_and_urb(struct stub_priv *priv) +{ + struct urb *urb; + int i; + + for (i = 0; i < priv->num_urbs; i++) { + urb = priv->urbs[i]; + + if (!urb) + return; + + kfree(urb->setup_packet); + urb->setup_packet = NULL; + + + if (urb->transfer_buffer && !priv->sgl) { + kfree(urb->transfer_buffer); + urb->transfer_buffer = NULL; + } + + if (urb->num_sgs) { + sgl_free(urb->sg); + urb->sg = NULL; + urb->num_sgs = 0; + } + + usb_free_urb(urb); + } + if (!list_empty(&priv->list)) + list_del(&priv->list); + if (priv->sgl) + sgl_free(priv->sgl); + kfree(priv->urbs); + kmem_cache_free(stub_priv_cache, priv); +} + static struct stub_priv *stub_priv_pop(struct stub_device *sdev) { unsigned long flags; @@ -330,25 +367,15 @@ done: void stub_device_cleanup_urbs(struct stub_device *sdev) { struct stub_priv *priv; - struct urb *urb; + int i; dev_dbg(&sdev->udev->dev, "Stub device cleaning up urbs\n"); while ((priv = stub_priv_pop(sdev))) { - urb = priv->urb; - dev_dbg(&sdev->udev->dev, "free urb seqnum %lu\n", - priv->seqnum); - usb_kill_urb(urb); + for (i = 0; i < priv->num_urbs; i++) + usb_kill_urb(priv->urbs[i]); - kmem_cache_free(stub_priv_cache, priv); - - kfree(urb->transfer_buffer); - urb->transfer_buffer = NULL; - - kfree(urb->setup_packet); - urb->setup_packet = NULL; - - usb_free_urb(urb); + stub_free_priv_and_urb(priv); } } diff --git a/drivers/usb/usbip/stub_rx.c b/drivers/usb/usbip/stub_rx.c index 777a4058c407..cb24b22252e4 100644 --- a/drivers/usb/usbip/stub_rx.c +++ b/drivers/usb/usbip/stub_rx.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "usbip_common.h" #include "stub.h" @@ -215,7 +216,7 @@ static void tweak_special_requests(struct urb *urb) static int stub_recv_cmd_unlink(struct stub_device *sdev, struct usbip_header *pdu) { - int ret; + int ret, i; unsigned long flags; struct stub_priv *priv; @@ -260,12 +261,14 @@ static int stub_recv_cmd_unlink(struct stub_device *sdev, * so a driver in a client host will know the failure * of the unlink request ? */ - ret = usb_unlink_urb(priv->urb); - if (ret != -EINPROGRESS) - dev_err(&priv->urb->dev->dev, - "failed to unlink a urb # %lu, ret %d\n", - priv->seqnum, ret); - + for (i = priv->completed_urbs; i < priv->num_urbs; i++) { + ret = usb_unlink_urb(priv->urbs[i]); + if (ret != -EINPROGRESS) + dev_err(&priv->urbs[i]->dev->dev, + "failed to unlink %d/%d urb of seqnum %lu, ret %d\n", + i + 1, priv->num_urbs, + priv->seqnum, ret); + } return 0; } @@ -353,14 +356,6 @@ static int get_pipe(struct stub_device *sdev, struct usbip_header *pdu) epd = &ep->desc; - /* validate transfer_buffer_length */ - if (pdu->u.cmd_submit.transfer_buffer_length > INT_MAX) { - dev_err(&sdev->udev->dev, - "CMD_SUBMIT: -EMSGSIZE transfer_buffer_length %d\n", - pdu->u.cmd_submit.transfer_buffer_length); - return -1; - } - if (usb_endpoint_xfer_control(epd)) { if (dir == USBIP_DIR_OUT) return usb_sndctrlpipe(udev, epnum); @@ -458,93 +453,191 @@ static void masking_bogus_flags(struct urb *urb) urb->transfer_flags &= allowed; } +static int stub_recv_xbuff(struct usbip_device *ud, struct stub_priv *priv) +{ + int ret; + int i; + + for (i = 0; i < priv->num_urbs; i++) { + ret = usbip_recv_xbuff(ud, priv->urbs[i]); + if (ret < 0) + break; + } + + return ret; +} + static void stub_recv_cmd_submit(struct stub_device *sdev, struct usbip_header *pdu) { - int ret; struct stub_priv *priv; struct usbip_device *ud = &sdev->ud; struct usb_device *udev = sdev->udev; + struct scatterlist *sgl = NULL, *sg; + void *buffer = NULL; + unsigned long long buf_len; + int nents; + int num_urbs = 1; int pipe = get_pipe(sdev, pdu); + int use_sg = pdu->u.cmd_submit.transfer_flags & URB_DMA_MAP_SG; + int support_sg = 1; + int np = 0; + int ret, i; if (pipe == -1) return; + /* + * Smatch reported the error case where use_sg is true and buf_len is 0. + * In this case, It adds SDEV_EVENT_ERROR_MALLOC and stub_priv will be + * released by stub event handler and connection will be shut down. + */ priv = stub_priv_alloc(sdev, pdu); if (!priv) return; - /* setup a urb */ - if (usb_pipeisoc(pipe)) - priv->urb = usb_alloc_urb(pdu->u.cmd_submit.number_of_packets, - GFP_KERNEL); - else - priv->urb = usb_alloc_urb(0, GFP_KERNEL); + buf_len = (unsigned long long)pdu->u.cmd_submit.transfer_buffer_length; - if (!priv->urb) { - usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC); - return; + if (use_sg && !buf_len) { + dev_err(&udev->dev, "sg buffer with zero length\n"); + goto err_malloc; } /* allocate urb transfer buffer, if needed */ - if (pdu->u.cmd_submit.transfer_buffer_length > 0 && - pdu->u.cmd_submit.transfer_buffer_length <= INT_MAX) { - priv->urb->transfer_buffer = - kzalloc(pdu->u.cmd_submit.transfer_buffer_length, - GFP_KERNEL); - if (!priv->urb->transfer_buffer) { - usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC); - return; + if (buf_len) { + if (use_sg) { + sgl = sgl_alloc(buf_len, GFP_KERNEL, &nents); + if (!sgl) + goto err_malloc; + + /* Check if the server's HCD supports SG */ + if (!udev->bus->sg_tablesize) { + /* + * If the server's HCD doesn't support SG, break + * a single SG request into several URBs and map + * each SG list entry to corresponding URB + * buffer. The previously allocated SG list is + * stored in priv->sgl (If the server's HCD + * support SG, SG list is stored only in + * urb->sg) and it is used as an indicator that + * the server split single SG request into + * several URBs. Later, priv->sgl is used by + * stub_complete() and stub_send_ret_submit() to + * reassemble the divied URBs. + */ + support_sg = 0; + num_urbs = nents; + priv->completed_urbs = 0; + pdu->u.cmd_submit.transfer_flags &= + ~URB_DMA_MAP_SG; + } + } else { + buffer = kzalloc(buf_len, GFP_KERNEL); + if (!buffer) + goto err_malloc; } } - /* copy urb setup packet */ - priv->urb->setup_packet = kmemdup(&pdu->u.cmd_submit.setup, 8, - GFP_KERNEL); - if (!priv->urb->setup_packet) { - dev_err(&udev->dev, "allocate setup_packet\n"); - usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC); - return; + /* allocate urb array */ + priv->num_urbs = num_urbs; + priv->urbs = kmalloc_array(num_urbs, sizeof(*priv->urbs), GFP_KERNEL); + if (!priv->urbs) + goto err_urbs; + + /* setup a urb */ + if (support_sg) { + if (usb_pipeisoc(pipe)) + np = pdu->u.cmd_submit.number_of_packets; + + priv->urbs[0] = usb_alloc_urb(np, GFP_KERNEL); + if (!priv->urbs[0]) + goto err_urb; + + if (buf_len) { + if (use_sg) { + priv->urbs[0]->sg = sgl; + priv->urbs[0]->num_sgs = nents; + priv->urbs[0]->transfer_buffer = NULL; + } else { + priv->urbs[0]->transfer_buffer = buffer; + } + } + + /* copy urb setup packet */ + priv->urbs[0]->setup_packet = kmemdup(&pdu->u.cmd_submit.setup, + 8, GFP_KERNEL); + if (!priv->urbs[0]->setup_packet) { + usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC); + return; + } + + usbip_pack_pdu(pdu, priv->urbs[0], USBIP_CMD_SUBMIT, 0); + } else { + for_each_sg(sgl, sg, nents, i) { + priv->urbs[i] = usb_alloc_urb(0, GFP_KERNEL); + /* The URBs which is previously allocated will be freed + * in stub_device_cleanup_urbs() if error occurs. + */ + if (!priv->urbs[i]) + goto err_urb; + + usbip_pack_pdu(pdu, priv->urbs[i], USBIP_CMD_SUBMIT, 0); + priv->urbs[i]->transfer_buffer = sg_virt(sg); + priv->urbs[i]->transfer_buffer_length = sg->length; + } + priv->sgl = sgl; } - /* set other members from the base header of pdu */ - priv->urb->context = (void *) priv; - priv->urb->dev = udev; - priv->urb->pipe = pipe; - priv->urb->complete = stub_complete; + for (i = 0; i < num_urbs; i++) { + /* set other members from the base header of pdu */ + priv->urbs[i]->context = (void *) priv; + priv->urbs[i]->dev = udev; + priv->urbs[i]->pipe = pipe; + priv->urbs[i]->complete = stub_complete; - usbip_pack_pdu(pdu, priv->urb, USBIP_CMD_SUBMIT, 0); + /* no need to submit an intercepted request, but harmless? */ + tweak_special_requests(priv->urbs[i]); + masking_bogus_flags(priv->urbs[i]); + } - if (usbip_recv_xbuff(ud, priv->urb) < 0) + if (stub_recv_xbuff(ud, priv) < 0) return; - if (usbip_recv_iso(ud, priv->urb) < 0) + if (usbip_recv_iso(ud, priv->urbs[0]) < 0) return; - /* no need to submit an intercepted request, but harmless? */ - tweak_special_requests(priv->urb); - - masking_bogus_flags(priv->urb); /* urb is now ready to submit */ - ret = usb_submit_urb(priv->urb, GFP_KERNEL); + for (i = 0; i < priv->num_urbs; i++) { + ret = usb_submit_urb(priv->urbs[i], GFP_KERNEL); - if (ret == 0) - usbip_dbg_stub_rx("submit urb ok, seqnum %u\n", - pdu->base.seqnum); - else { - dev_err(&udev->dev, "submit_urb error, %d\n", ret); - usbip_dump_header(pdu); - usbip_dump_urb(priv->urb); + if (ret == 0) + usbip_dbg_stub_rx("submit urb ok, seqnum %u\n", + pdu->base.seqnum); + else { + dev_err(&udev->dev, "submit_urb error, %d\n", ret); + usbip_dump_header(pdu); + usbip_dump_urb(priv->urbs[i]); - /* - * Pessimistic. - * This connection will be discarded. - */ - usbip_event_add(ud, SDEV_EVENT_ERROR_SUBMIT); + /* + * Pessimistic. + * This connection will be discarded. + */ + usbip_event_add(ud, SDEV_EVENT_ERROR_SUBMIT); + break; + } } usbip_dbg_stub_rx("Leave\n"); + return; + +err_urb: + kfree(priv->urbs); +err_urbs: + kfree(buffer); + sgl_free(sgl); +err_malloc: + usbip_event_add(ud, SDEV_EVENT_ERROR_MALLOC); } /* recv a pdu */ diff --git a/drivers/usb/usbip/stub_tx.c b/drivers/usb/usbip/stub_tx.c index 96aa375b80d9..45c34a37432e 100644 --- a/drivers/usb/usbip/stub_tx.c +++ b/drivers/usb/usbip/stub_tx.c @@ -19,25 +19,11 @@ #include #include +#include #include "usbip_common.h" #include "stub.h" -static void stub_free_priv_and_urb(struct stub_priv *priv) -{ - struct urb *urb = priv->urb; - - kfree(urb->setup_packet); - urb->setup_packet = NULL; - - kfree(urb->transfer_buffer); - urb->transfer_buffer = NULL; - - list_del(&priv->list); - kmem_cache_free(stub_priv_cache, priv); - usb_free_urb(urb); -} - /* be in spin_lock_irqsave(&sdev->priv_lock, flags) */ void stub_enqueue_ret_unlink(struct stub_device *sdev, __u32 seqnum, __u32 status) @@ -99,6 +85,22 @@ void stub_complete(struct urb *urb) break; } + /* + * If the server breaks single SG request into the several URBs, the + * URBs must be reassembled before sending completed URB to the vhci. + * Don't wake up the tx thread until all the URBs are completed. + */ + if (priv->sgl) { + priv->completed_urbs++; + + /* Only save the first error status */ + if (urb->status && !priv->urb_status) + priv->urb_status = urb->status; + + if (priv->completed_urbs < priv->num_urbs) + return; + } + /* link a urb to the queue of tx. */ spin_lock_irqsave(&sdev->priv_lock, flags); if (sdev->ud.tcp_socket == NULL) { @@ -170,18 +172,22 @@ static int stub_send_ret_submit(struct stub_device *sdev) size_t total_size = 0; while ((priv = dequeue_from_priv_tx(sdev)) != NULL) { - int ret; - struct urb *urb = priv->urb; + struct urb *urb = priv->urbs[0]; struct usbip_header pdu_header; struct usbip_iso_packet_descriptor *iso_buffer = NULL; struct kvec *iov = NULL; + struct scatterlist *sg; + u32 actual_length = 0; int iovnum = 0; + int ret; + int i; txsize = 0; memset(&pdu_header, 0, sizeof(pdu_header)); memset(&msg, 0, sizeof(msg)); - if (urb->actual_length > 0 && !urb->transfer_buffer) { + if (urb->actual_length > 0 && !urb->transfer_buffer && + !urb->num_sgs) { dev_err(&sdev->udev->dev, "urb: actual_length %d transfer_buffer null\n", urb->actual_length); @@ -190,6 +196,11 @@ static int stub_send_ret_submit(struct stub_device *sdev) if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) iovnum = 2 + urb->number_of_packets; + else if (usb_pipein(urb->pipe) && urb->actual_length > 0 && + urb->num_sgs) + iovnum = 1 + urb->num_sgs; + else if (usb_pipein(urb->pipe) && priv->sgl) + iovnum = 1 + priv->num_urbs; else iovnum = 2; @@ -206,6 +217,15 @@ static int stub_send_ret_submit(struct stub_device *sdev) setup_ret_submit_pdu(&pdu_header, urb); usbip_dbg_stub_tx("setup txdata seqnum: %d\n", pdu_header.base.seqnum); + + if (priv->sgl) { + for (i = 0; i < priv->num_urbs; i++) + actual_length += priv->urbs[i]->actual_length; + + pdu_header.u.ret_submit.status = priv->urb_status; + pdu_header.u.ret_submit.actual_length = actual_length; + } + usbip_header_correct_endian(&pdu_header, 1); iov[iovnum].iov_base = &pdu_header; @@ -214,12 +234,47 @@ static int stub_send_ret_submit(struct stub_device *sdev) txsize += sizeof(pdu_header); /* 2. setup transfer buffer */ - if (usb_pipein(urb->pipe) && + if (usb_pipein(urb->pipe) && priv->sgl) { + /* If the server split a single SG request into several + * URBs because the server's HCD doesn't support SG, + * reassemble the split URB buffers into a single + * return command. + */ + for (i = 0; i < priv->num_urbs; i++) { + iov[iovnum].iov_base = + priv->urbs[i]->transfer_buffer; + iov[iovnum].iov_len = + priv->urbs[i]->actual_length; + iovnum++; + } + txsize += actual_length; + } else if (usb_pipein(urb->pipe) && usb_pipetype(urb->pipe) != PIPE_ISOCHRONOUS && urb->actual_length > 0) { - iov[iovnum].iov_base = urb->transfer_buffer; - iov[iovnum].iov_len = urb->actual_length; - iovnum++; + if (urb->num_sgs) { + unsigned int copy = urb->actual_length; + int size; + + for_each_sg(urb->sg, sg, urb->num_sgs, i) { + if (copy == 0) + break; + + if (copy < sg->length) + size = copy; + else + size = sg->length; + + iov[iovnum].iov_base = sg_virt(sg); + iov[iovnum].iov_len = size; + + iovnum++; + copy -= size; + } + } else { + iov[iovnum].iov_base = urb->transfer_buffer; + iov[iovnum].iov_len = urb->actual_length; + iovnum++; + } txsize += urb->actual_length; } else if (usb_pipein(urb->pipe) && usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS) { diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c index 7f0d22131121..cd01f1e278ee 100644 --- a/drivers/usb/usbip/usbip_common.c +++ b/drivers/usb/usbip/usbip_common.c @@ -695,8 +695,12 @@ EXPORT_SYMBOL_GPL(usbip_pad_iso); /* some members of urb must be substituted before. */ int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb) { - int ret; + struct scatterlist *sg; + int ret = 0; + int recv; int size; + int copy; + int i; if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) { /* the direction of urb must be OUT. */ @@ -716,29 +720,51 @@ int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb) if (!(size > 0)) return 0; - if (size > urb->transfer_buffer_length) { + if (size > urb->transfer_buffer_length) /* should not happen, probably malicious packet */ - if (ud->side == USBIP_STUB) { - usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); - return 0; - } else { - usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); - return -EPIPE; - } - } + goto error; - ret = usbip_recv(ud->tcp_socket, urb->transfer_buffer, size); - if (ret != size) { - dev_err(&urb->dev->dev, "recv xbuf, %d\n", ret); - if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) { - usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); - } else { - usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); - return -EPIPE; + if (urb->num_sgs) { + copy = size; + for_each_sg(urb->sg, sg, urb->num_sgs, i) { + int recv_size; + + if (copy < sg->length) + recv_size = copy; + else + recv_size = sg->length; + + recv = usbip_recv(ud->tcp_socket, sg_virt(sg), + recv_size); + + if (recv != recv_size) + goto error; + + copy -= recv; + ret += recv; + + if (!copy) + break; } + + if (ret != size) + goto error; + } else { + ret = usbip_recv(ud->tcp_socket, urb->transfer_buffer, size); + if (ret != size) + goto error; } return ret; + +error: + dev_err(&urb->dev->dev, "recv xbuf, %d\n", ret); + if (ud->side == USBIP_STUB || ud->side == USBIP_VUDC) + usbip_event_add(ud, SDEV_EVENT_ERROR_TCP); + else + usbip_event_add(ud, VDEV_EVENT_ERROR_TCP); + + return -EPIPE; } EXPORT_SYMBOL_GPL(usbip_recv_xbuff); diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 84e2d7edaa5c..253e0affd396 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -716,8 +716,11 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag } vdev = &vhci_hcd->vdev[portnum-1]; - /* patch to usb_sg_init() is in 2.5.60 */ - BUG_ON(!urb->transfer_buffer && urb->transfer_buffer_length); + if (!urb->transfer_buffer && !urb->num_sgs && + urb->transfer_buffer_length) { + dev_dbg(dev, "Null URB transfer buffer\n"); + return -EINVAL; + } spin_lock_irqsave(&vhci->lock, flags); @@ -1160,6 +1163,15 @@ static int vhci_setup(struct usb_hcd *hcd) hcd->speed = HCD_USB3; hcd->self.root_hub->speed = USB_SPEED_SUPER; } + + /* + * Support SG. + * sg_tablesize is an arbitrary value to alleviate memory pressure + * on the host. + */ + hcd->self.sg_tablesize = 32; + hcd->self.no_sg_constraint = 1; + return 0; } diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c index 1343037d00f9..c13dd526b996 100644 --- a/drivers/usb/usbip/vhci_rx.c +++ b/drivers/usb/usbip/vhci_rx.c @@ -91,19 +91,27 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev, usbip_pack_pdu(pdu, urb, USBIP_RET_SUBMIT, 0); /* recv transfer buffer */ - if (usbip_recv_xbuff(ud, urb) < 0) - return; + if (usbip_recv_xbuff(ud, urb) < 0) { + urb->status = -EPROTO; + goto error; + } /* recv iso_packet_descriptor */ - if (usbip_recv_iso(ud, urb) < 0) - return; + if (usbip_recv_iso(ud, urb) < 0) { + urb->status = -EPROTO; + goto error; + } /* restore the padding in iso packets */ usbip_pad_iso(ud, urb); +error: if (usbip_dbg_flag_vhci_rx) usbip_dump_urb(urb); + if (urb->num_sgs) + urb->transfer_flags &= ~URB_DMA_MAP_SG; + usbip_dbg_vhci_rx("now giveback urb %u\n", pdu->base.seqnum); spin_lock_irqsave(&vhci->lock, flags); diff --git a/drivers/usb/usbip/vhci_tx.c b/drivers/usb/usbip/vhci_tx.c index a9a663a578b6..682127d258fd 100644 --- a/drivers/usb/usbip/vhci_tx.c +++ b/drivers/usb/usbip/vhci_tx.c @@ -19,6 +19,7 @@ #include #include +#include #include "usbip_common.h" #include "vhci.h" @@ -64,19 +65,23 @@ static struct vhci_priv *dequeue_from_priv_tx(struct vhci_device *vdev) static int vhci_send_cmd_submit(struct vhci_device *vdev) { + struct usbip_iso_packet_descriptor *iso_buffer = NULL; struct vhci_priv *priv = NULL; + struct scatterlist *sg; struct msghdr msg; - struct kvec iov[3]; + struct kvec *iov; size_t txsize; size_t total_size = 0; + int iovnum; + int err = -ENOMEM; + int i; while ((priv = dequeue_from_priv_tx(vdev)) != NULL) { int ret; struct urb *urb = priv->urb; struct usbip_header pdu_header; - struct usbip_iso_packet_descriptor *iso_buffer = NULL; txsize = 0; memset(&pdu_header, 0, sizeof(pdu_header)); @@ -86,18 +91,45 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev) usbip_dbg_vhci_tx("setup txdata urb seqnum %lu\n", priv->seqnum); + if (urb->num_sgs && usb_pipeout(urb->pipe)) + iovnum = 2 + urb->num_sgs; + else + iovnum = 3; + + iov = kcalloc(iovnum, sizeof(*iov), GFP_KERNEL); + if (!iov) { + usbip_event_add(&vdev->ud, SDEV_EVENT_ERROR_MALLOC); + return -ENOMEM; + } + + if (urb->num_sgs) + urb->transfer_flags |= URB_DMA_MAP_SG; + /* 1. setup usbip_header */ setup_cmd_submit_pdu(&pdu_header, urb); usbip_header_correct_endian(&pdu_header, 1); + iovnum = 0; - iov[0].iov_base = &pdu_header; - iov[0].iov_len = sizeof(pdu_header); + iov[iovnum].iov_base = &pdu_header; + iov[iovnum].iov_len = sizeof(pdu_header); txsize += sizeof(pdu_header); + iovnum++; /* 2. setup transfer buffer */ if (!usb_pipein(urb->pipe) && urb->transfer_buffer_length > 0) { - iov[1].iov_base = urb->transfer_buffer; - iov[1].iov_len = urb->transfer_buffer_length; + if (urb->num_sgs && + !usb_endpoint_xfer_isoc(&urb->ep->desc)) { + for_each_sg(urb->sg, sg, urb->num_sgs, i) { + iov[iovnum].iov_base = sg_virt(sg); + iov[iovnum].iov_len = sg->length; + iovnum++; + } + } else { + iov[iovnum].iov_base = urb->transfer_buffer; + iov[iovnum].iov_len = + urb->transfer_buffer_length; + iovnum++; + } txsize += urb->transfer_buffer_length; } @@ -109,30 +141,43 @@ static int vhci_send_cmd_submit(struct vhci_device *vdev) if (!iso_buffer) { usbip_event_add(&vdev->ud, SDEV_EVENT_ERROR_MALLOC); - return -1; + goto err_iso_buffer; } - iov[2].iov_base = iso_buffer; - iov[2].iov_len = len; + iov[iovnum].iov_base = iso_buffer; + iov[iovnum].iov_len = len; + iovnum++; txsize += len; } - ret = kernel_sendmsg(vdev->ud.tcp_socket, &msg, iov, 3, txsize); + ret = kernel_sendmsg(vdev->ud.tcp_socket, &msg, iov, iovnum, + txsize); if (ret != txsize) { pr_err("sendmsg failed!, ret=%d for %zd\n", ret, txsize); - kfree(iso_buffer); usbip_event_add(&vdev->ud, VDEV_EVENT_ERROR_TCP); - return -1; + err = -EPIPE; + goto err_tx; } + kfree(iov); + /* This is only for isochronous case */ kfree(iso_buffer); + iso_buffer = NULL; + usbip_dbg_vhci_tx("send txdata\n"); total_size += txsize; } return total_size; + +err_tx: + kfree(iso_buffer); +err_iso_buffer: + kfree(iov); + + return err; } static struct vhci_unlink *dequeue_from_unlink_tx(struct vhci_device *vdev) diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index 0212f0ee8aea..e052f62fdea7 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -150,10 +150,10 @@ static int mdev_device_remove_ops(struct mdev_device *mdev, bool force_remove) static int mdev_device_remove_cb(struct device *dev, void *data) { - if (!dev_is_mdev(dev)) - return 0; + if (dev_is_mdev(dev)) + mdev_device_remove(dev, true); - return mdev_device_remove(dev, data ? *(bool *)data : true); + return 0; } /* @@ -182,6 +182,7 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) /* Check for duplicate */ parent = __find_parent_device(dev); if (parent) { + parent = NULL; ret = -EEXIST; goto add_dev_err; } @@ -240,7 +241,6 @@ EXPORT_SYMBOL(mdev_register_device); void mdev_unregister_device(struct device *dev) { struct mdev_parent *parent; - bool force_remove = true; mutex_lock(&parent_list_lock); parent = __find_parent_device(dev); @@ -254,8 +254,7 @@ void mdev_unregister_device(struct device *dev) list_del(&parent->next); class_compat_remove_link(mdev_bus_compat_class, dev, NULL); - device_for_each_child(dev, (void *)&force_remove, - mdev_device_remove_cb); + device_for_each_child(dev, NULL, mdev_device_remove_cb); parent_remove_sysfs_files(parent); diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 15b1cd4ef5a7..550ab7707b57 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -433,10 +433,14 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device *vdev, int irq_type) { if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) { u8 pin; - pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin); - if (IS_ENABLED(CONFIG_VFIO_PCI_INTX) && !vdev->nointx && pin) - return 1; + if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || + vdev->nointx || vdev->pdev->is_virtfn) + return 0; + + pci_read_config_byte(vdev->pdev, PCI_INTERRUPT_PIN, &pin); + + return pin ? 1 : 0; } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX) { u8 pos; u16 flags; @@ -713,6 +717,7 @@ static long vfio_pci_ioctl(void *device_data, { void __iomem *io; size_t size; + u16 orig_cmd; info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); info.flags = 0; @@ -728,15 +733,23 @@ static long vfio_pci_ioctl(void *device_data, break; } - /* Is it really there? */ - io = pci_map_rom(pdev, &size); - if (!io || !size) { - info.size = 0; - break; - } - pci_unmap_rom(pdev, io); + /* + * Is it really there? Enable memory decode for + * implicit access in pci_map_rom(). + */ + pci_read_config_word(pdev, PCI_COMMAND, &orig_cmd); + pci_write_config_word(pdev, PCI_COMMAND, + orig_cmd | PCI_COMMAND_MEMORY); - info.flags = VFIO_REGION_INFO_FLAG_READ; + io = pci_map_rom(pdev, &size); + if (io) { + info.flags = VFIO_REGION_INFO_FLAG_READ; + pci_unmap_rom(pdev, io); + } else { + info.size = 0; + } + + pci_write_config_word(pdev, PCI_COMMAND, orig_cmd); break; } case VFIO_PCI_VGA_REGION_INDEX: diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 115a36f6f403..423ea1f98441 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -1180,8 +1180,10 @@ static int vfio_msi_cap_len(struct vfio_pci_device *vdev, u8 pos) return -ENOMEM; ret = init_pci_cap_msi_perm(vdev->msi_perm, len, flags); - if (ret) + if (ret) { + kfree(vdev->msi_perm); return ret; + } return len; } @@ -1609,6 +1611,15 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev) return 0; } +/* + * Nag about hardware bugs, hopefully to have vendors fix them, but at least + * to collect a list of dependencies for the VF INTx pin quirk below. + */ +static const struct pci_device_id known_bogus_vf_intx_pin[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x270c) }, + {} +}; + /* * For each device we allocate a pci_config_map that indicates the * capability occupying each dword and thus the struct perm_bits we @@ -1674,6 +1685,24 @@ int vfio_config_init(struct vfio_pci_device *vdev) if (pdev->is_virtfn) { *(__le16 *)&vconfig[PCI_VENDOR_ID] = cpu_to_le16(pdev->vendor); *(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device); + + /* + * Per SR-IOV spec rev 1.1, 3.4.1.18 the interrupt pin register + * does not apply to VFs and VFs must implement this register + * as read-only with value zero. Userspace is not readily able + * to identify whether a device is a VF and thus that the pin + * definition on the device is bogus should it violate this + * requirement. We already virtualize the pin register for + * other purposes, so we simply need to replace the bogus value + * and consider VFs when we determine INTx IRQ count. + */ + if (vconfig[PCI_INTERRUPT_PIN] && + !pci_match_id(known_bogus_vf_intx_pin, pdev)) + pci_warn(pdev, + "Hardware bug: VF reports bogus INTx pin %d\n", + vconfig[PCI_INTERRUPT_PIN]); + + vconfig[PCI_INTERRUPT_PIN] = 0; /* Gratuitous for good VFs */ } if (!IS_ENABLED(CONFIG_VFIO_PCI_INTX) || vdev->nointx) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 1c46045b0e7f..94594dc63c41 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -297,8 +297,8 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, irq = pci_irq_vector(pdev, vector); if (vdev->ctx[vector].trigger) { - free_irq(irq, vdev->ctx[vector].trigger); irq_bypass_unregister_producer(&vdev->ctx[vector].producer); + free_irq(irq, vdev->ctx[vector].trigger); kfree(vdev->ctx[vector].name); eventfd_ctx_put(vdev->ctx[vector].trigger); vdev->ctx[vector].trigger = NULL; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index b4c68f3b82be..eba9aaf3cc17 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -409,6 +409,7 @@ static void tce_iommu_release(void *iommu_data) { struct tce_container *container = iommu_data; struct tce_iommu_group *tcegrp; + struct tce_iommu_prereg *tcemem, *tmtmp; long i; while (tce_groups_attached(container)) { @@ -431,13 +432,8 @@ static void tce_iommu_release(void *iommu_data) tce_iommu_free_table(container, tbl); } - while (!list_empty(&container->prereg_list)) { - struct tce_iommu_prereg *tcemem; - - tcemem = list_first_entry(&container->prereg_list, - struct tce_iommu_prereg, next); - WARN_ON_ONCE(tce_iommu_prereg_free(container, tcemem)); - } + list_for_each_entry_safe(tcemem, tmtmp, &container->prereg_list, next) + WARN_ON(tce_iommu_prereg_free(container, tcemem)); tce_iommu_disable(container); if (container->mm) diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 4d11152e60c1..8fe07622ae59 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1025,11 +1025,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) static struct socket *get_raw_socket(int fd) { - struct { - struct sockaddr_ll sa; - char buf[MAX_ADDR_LEN]; - } uaddr; - int uaddr_len = sizeof uaddr, r; + int r; struct socket *sock = sockfd_lookup(fd, &r); if (!sock) @@ -1041,12 +1037,7 @@ static struct socket *get_raw_socket(int fd) goto err; } - r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa, - &uaddr_len, 0); - if (r) - goto err; - - if (uaddr.sa.sll_family != AF_PACKET) { + if (sock->sk->sk_family != AF_PACKET) { r = -EPFNOSUPPORT; goto err; } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 3d7bea15c57b..85edacc0be47 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "vhost.h" @@ -361,7 +362,9 @@ static int vhost_worker(void *data) llist_for_each_entry_safe(work, work_next, node, node) { clear_bit(VHOST_WORK_QUEUED, &work->flags); __set_current_state(TASK_RUNNING); + kcov_remote_start_common(dev->kcov_handle); work->fn(work); + kcov_remote_stop(); if (need_resched()) schedule(); } @@ -521,6 +524,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev) /* No owner, become one */ dev->mm = get_task_mm(current); + dev->kcov_handle = kcov_common_handle(); worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); if (IS_ERR(worker)) { err = PTR_ERR(worker); @@ -546,6 +550,7 @@ err_worker: if (dev->mm) mmput(dev->mm); dev->mm = NULL; + dev->kcov_handle = 0; err_mm: return err; } @@ -665,6 +670,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) if (dev->worker) { kthread_stop(dev->worker); dev->worker = NULL; + dev->kcov_handle = 0; } if (dev->mm) mmput(dev->mm); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 950c5c4e4ee3..6e8f67ff1e1c 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -175,6 +175,7 @@ struct vhost_dev { wait_queue_head_t wait; int weight; int byte_weight; + u64 kcov_handle; }; bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len); diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 5b9db5deffbb..6391dc5b0ebe 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -103,7 +103,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, struct iov_iter iov_iter; unsigned out, in; size_t nbytes; - size_t len; + size_t iov_len, payload_len; int head; spin_lock_bh(&vsock->send_pkt_list_lock); @@ -148,8 +148,24 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, break; } - len = iov_length(&vq->iov[out], in); - iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len); + iov_len = iov_length(&vq->iov[out], in); + if (iov_len < sizeof(pkt->hdr)) { + virtio_transport_free_pkt(pkt); + vq_err(vq, "Buffer len [%zu] too small\n", iov_len); + break; + } + + iov_iter_init(&iov_iter, READ, &vq->iov[out], in, iov_len); + payload_len = pkt->len - pkt->off; + + /* If the packet is greater than the space available in the + * buffer, we split it using multiple buffers. + */ + if (payload_len > iov_len - sizeof(pkt->hdr)) + payload_len = iov_len - sizeof(pkt->hdr); + + /* Set the correct length in the header */ + pkt->hdr.len = cpu_to_le32(payload_len); nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter); if (nbytes != sizeof(pkt->hdr)) { @@ -158,33 +174,47 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock, break; } - nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter); - if (nbytes != pkt->len) { + nbytes = copy_to_iter(pkt->buf + pkt->off, payload_len, + &iov_iter); + if (nbytes != payload_len) { virtio_transport_free_pkt(pkt); vq_err(vq, "Faulted on copying pkt buf\n"); break; } - vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len); + vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len); added = true; - if (pkt->reply) { - int val; - - val = atomic_dec_return(&vsock->queued_replies); - - /* Do we have resources to resume tx processing? */ - if (val + 1 == tx_vq->num) - restart_tx = true; - } - /* Deliver to monitoring devices all correctly transmitted * packets. */ virtio_transport_deliver_tap_pkt(pkt); - total_len += pkt->len; - virtio_transport_free_pkt(pkt); + pkt->off += payload_len; + total_len += payload_len; + + /* If we didn't send all the payload we can requeue the packet + * to send it with the next available buffer. + */ + if (pkt->off < pkt->len) { + spin_lock_bh(&vsock->send_pkt_list_lock); + list_add(&pkt->list, &vsock->send_pkt_list); + spin_unlock_bh(&vsock->send_pkt_list_lock); + } else { + if (pkt->reply) { + int val; + + val = atomic_dec_return(&vsock->queued_replies); + + /* Do we have resources to resume tx + * processing? + */ + if (val + 1 == tx_vq->num) + restart_tx = true; + } + + virtio_transport_free_pkt(pkt); + } } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); if (added) vhost_signal(&vsock->dev, vq); @@ -406,7 +436,9 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) virtio_transport_deliver_tap_pkt(pkt); /* Only accept correctly addressed packets */ - if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid) + if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid && + le64_to_cpu(pkt->hdr.dst_cid) == + vhost_transport_get_local_cid()) virtio_transport_recv_pkt(pkt); else virtio_transport_free_pkt(pkt); diff --git a/drivers/video/backlight/lm3630a_bl.c b/drivers/video/backlight/lm3630a_bl.c index 2030a6b77a09..ef2553f452ca 100644 --- a/drivers/video/backlight/lm3630a_bl.c +++ b/drivers/video/backlight/lm3630a_bl.c @@ -201,7 +201,7 @@ static int lm3630a_bank_a_update_status(struct backlight_device *bl) LM3630A_LEDA_ENABLE, LM3630A_LEDA_ENABLE); if (ret < 0) goto out_i2c_err; - return bl->props.brightness; + return 0; out_i2c_err: dev_err(pchip->dev, "i2c failed to access\n"); @@ -278,7 +278,7 @@ static int lm3630a_bank_b_update_status(struct backlight_device *bl) LM3630A_LEDB_ENABLE, LM3630A_LEDB_ENABLE); if (ret < 0) goto out_i2c_err; - return bl->props.brightness; + return 0; out_i2c_err: dev_err(pchip->dev, "i2c failed to access REG_CTRL\n"); diff --git a/drivers/video/backlight/lm3639_bl.c b/drivers/video/backlight/lm3639_bl.c index cd50df5807ea..086611c7bc03 100644 --- a/drivers/video/backlight/lm3639_bl.c +++ b/drivers/video/backlight/lm3639_bl.c @@ -400,10 +400,8 @@ static int lm3639_remove(struct i2c_client *client) regmap_write(pchip->regmap, REG_ENABLE, 0x00); - if (&pchip->cdev_torch) - led_classdev_unregister(&pchip->cdev_torch); - if (&pchip->cdev_flash) - led_classdev_unregister(&pchip->cdev_flash); + led_classdev_unregister(&pchip->cdev_torch); + led_classdev_unregister(&pchip->cdev_flash); if (pchip->bled) device_remove_file(&(pchip->bled->dev), &dev_attr_bled_mode); return 0; diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index f09e17b60e45..0fdd6761d6c3 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -1310,6 +1310,9 @@ static int vgacon_font_get(struct vc_data *c, struct console_font *font) static int vgacon_resize(struct vc_data *c, unsigned int width, unsigned int height, unsigned int user) { + if ((width << 1) * height > vga_vram_size) + return -EINVAL; + if (width % 2 || width > screen_info.orig_video_cols || height > (screen_info.orig_video_lines * vga_default_font_height)/ c->vc_font.height) diff --git a/drivers/video/fbdev/chipsfb.c b/drivers/video/fbdev/chipsfb.c index f103665cad43..f9b366d17587 100644 --- a/drivers/video/fbdev/chipsfb.c +++ b/drivers/video/fbdev/chipsfb.c @@ -350,7 +350,7 @@ static void init_chips(struct fb_info *p, unsigned long addr) static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent) { struct fb_info *p; - unsigned long addr, size; + unsigned long addr; unsigned short cmd; int rc = -ENODEV; @@ -362,7 +362,6 @@ static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent) if ((dp->resource[0].flags & IORESOURCE_MEM) == 0) goto err_disable; addr = pci_resource_start(dp, 0); - size = pci_resource_len(dp, 0); if (addr == 0) goto err_disable; diff --git a/drivers/video/fbdev/core/fbmon.c b/drivers/video/fbdev/core/fbmon.c index 2b2d67328514..ed202f1e13b8 100644 --- a/drivers/video/fbdev/core/fbmon.c +++ b/drivers/video/fbdev/core/fbmon.c @@ -997,97 +997,6 @@ void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs) DPRINTK("========================================\n"); } -/** - * fb_edid_add_monspecs() - add monitor video modes from E-EDID data - * @edid: 128 byte array with an E-EDID block - * @spacs: monitor specs to be extended - */ -void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) -{ - unsigned char *block; - struct fb_videomode *m; - int num = 0, i; - u8 svd[64], edt[(128 - 4) / DETAILED_TIMING_DESCRIPTION_SIZE]; - u8 pos = 4, svd_n = 0; - - if (!edid) - return; - - if (!edid_checksum(edid)) - return; - - if (edid[0] != 0x2 || - edid[2] < 4 || edid[2] > 128 - DETAILED_TIMING_DESCRIPTION_SIZE) - return; - - DPRINTK(" Short Video Descriptors\n"); - - while (pos < edid[2]) { - u8 len = edid[pos] & 0x1f, type = (edid[pos] >> 5) & 7; - pr_debug("Data block %u of %u bytes\n", type, len); - if (type == 2) { - for (i = pos; i < pos + len; i++) { - u8 idx = edid[pos + i] & 0x7f; - svd[svd_n++] = idx; - pr_debug("N%sative mode #%d\n", - edid[pos + i] & 0x80 ? "" : "on-n", idx); - } - } else if (type == 3 && len >= 3) { - /* Check Vendor Specific Data Block. For HDMI, - it is always 00-0C-03 for HDMI Licensing, LLC. */ - if (edid[pos + 1] == 3 && edid[pos + 2] == 0xc && - edid[pos + 3] == 0) - specs->misc |= FB_MISC_HDMI; - } - pos += len + 1; - } - - block = edid + edid[2]; - - DPRINTK(" Extended Detailed Timings\n"); - - for (i = 0; i < (128 - edid[2]) / DETAILED_TIMING_DESCRIPTION_SIZE; - i++, block += DETAILED_TIMING_DESCRIPTION_SIZE) - if (PIXEL_CLOCK != 0) - edt[num++] = block - edid; - - /* Yikes, EDID data is totally useless */ - if (!(num + svd_n)) - return; - - m = kzalloc((specs->modedb_len + num + svd_n) * - sizeof(struct fb_videomode), GFP_KERNEL); - - if (!m) - return; - - memcpy(m, specs->modedb, specs->modedb_len * sizeof(struct fb_videomode)); - - for (i = specs->modedb_len; i < specs->modedb_len + num; i++) { - get_detailed_timing(edid + edt[i - specs->modedb_len], &m[i]); - if (i == specs->modedb_len) - m[i].flag |= FB_MODE_IS_FIRST; - pr_debug("Adding %ux%u@%u\n", m[i].xres, m[i].yres, m[i].refresh); - } - - for (i = specs->modedb_len + num; i < specs->modedb_len + num + svd_n; i++) { - int idx = svd[i - specs->modedb_len - num]; - if (!idx || idx >= ARRAY_SIZE(cea_modes)) { - pr_warn("Reserved SVD code %d\n", idx); - } else if (!cea_modes[idx].xres) { - pr_warn("Unimplemented SVD code %d\n", idx); - } else { - memcpy(&m[i], cea_modes + idx, sizeof(m[i])); - pr_debug("Adding SVD #%d: %ux%u@%u\n", idx, - m[i].xres, m[i].yres, m[i].refresh); - } - } - - kfree(specs->modedb); - specs->modedb = m; - specs->modedb_len = specs->modedb_len + num + svd_n; -} - /* * VESA Generalized Timing Formula (GTF) */ @@ -1497,9 +1406,6 @@ int fb_parse_edid(unsigned char *edid, struct fb_var_screeninfo *var) void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs) { } -void fb_edid_add_monspecs(unsigned char *edid, struct fb_monspecs *specs) -{ -} void fb_destroy_modedb(struct fb_videomode *modedb) { } @@ -1607,7 +1513,6 @@ EXPORT_SYMBOL(fb_firmware_edid); EXPORT_SYMBOL(fb_parse_edid); EXPORT_SYMBOL(fb_edid_to_monspecs); -EXPORT_SYMBOL(fb_edid_add_monspecs); EXPORT_SYMBOL(fb_get_mode); EXPORT_SYMBOL(fb_validate_mode); EXPORT_SYMBOL(fb_destroy_modedb); diff --git a/drivers/video/fbdev/core/modedb.c b/drivers/video/fbdev/core/modedb.c index 455a15f70172..a9d76e1b4378 100644 --- a/drivers/video/fbdev/core/modedb.c +++ b/drivers/video/fbdev/core/modedb.c @@ -289,63 +289,6 @@ static const struct fb_videomode modedb[] = { }; #ifdef CONFIG_FB_MODE_HELPERS -const struct fb_videomode cea_modes[65] = { - /* #1: 640x480p@59.94/60Hz */ - [1] = { - NULL, 60, 640, 480, 39722, 48, 16, 33, 10, 96, 2, 0, - FB_VMODE_NONINTERLACED, 0, - }, - /* #3: 720x480p@59.94/60Hz */ - [3] = { - NULL, 60, 720, 480, 37037, 60, 16, 30, 9, 62, 6, 0, - FB_VMODE_NONINTERLACED, 0, - }, - /* #5: 1920x1080i@59.94/60Hz */ - [5] = { - NULL, 60, 1920, 1080, 13763, 148, 88, 15, 2, 44, 5, - FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, - FB_VMODE_INTERLACED, 0, - }, - /* #7: 720(1440)x480iH@59.94/60Hz */ - [7] = { - NULL, 60, 1440, 480, 18554/*37108*/, 114, 38, 15, 4, 124, 3, 0, - FB_VMODE_INTERLACED, 0, - }, - /* #9: 720(1440)x240pH@59.94/60Hz */ - [9] = { - NULL, 60, 1440, 240, 18554, 114, 38, 16, 4, 124, 3, 0, - FB_VMODE_NONINTERLACED, 0, - }, - /* #18: 720x576pH@50Hz */ - [18] = { - NULL, 50, 720, 576, 37037, 68, 12, 39, 5, 64, 5, 0, - FB_VMODE_NONINTERLACED, 0, - }, - /* #19: 1280x720p@50Hz */ - [19] = { - NULL, 50, 1280, 720, 13468, 220, 440, 20, 5, 40, 5, - FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, - FB_VMODE_NONINTERLACED, 0, - }, - /* #20: 1920x1080i@50Hz */ - [20] = { - NULL, 50, 1920, 1080, 13480, 148, 528, 15, 5, 528, 5, - FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, - FB_VMODE_INTERLACED, 0, - }, - /* #32: 1920x1080p@23.98/24Hz */ - [32] = { - NULL, 24, 1920, 1080, 13468, 148, 638, 36, 4, 44, 5, - FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT, - FB_VMODE_NONINTERLACED, 0, - }, - /* #35: (2880)x480p4x@59.94/60Hz */ - [35] = { - NULL, 60, 2880, 480, 9250, 240, 64, 30, 9, 248, 6, 0, - FB_VMODE_NONINTERLACED, 0, - }, -}; - const struct fb_videomode vesa_modes[] = { /* 0 640x350-85 VESA */ { NULL, 85, 640, 350, 31746, 96, 32, 60, 32, 64, 3, diff --git a/drivers/video/fbdev/pxa168fb.c b/drivers/video/fbdev/pxa168fb.c index d059d04c63ac..20195d3dbf08 100644 --- a/drivers/video/fbdev/pxa168fb.c +++ b/drivers/video/fbdev/pxa168fb.c @@ -769,8 +769,8 @@ failed_free_cmap: failed_free_clk: clk_disable_unprepare(fbi->clk); failed_free_fbmem: - dma_free_coherent(fbi->dev, info->fix.smem_len, - info->screen_base, fbi->fb_start_dma); + dma_free_wc(fbi->dev, info->fix.smem_len, + info->screen_base, fbi->fb_start_dma); failed_free_info: kfree(info); @@ -804,7 +804,7 @@ static int pxa168fb_remove(struct platform_device *pdev) irq = platform_get_irq(pdev, 0); - dma_free_wc(fbi->dev, PAGE_ALIGN(info->fix.smem_len), + dma_free_wc(fbi->dev, info->fix.smem_len, info->screen_base, info->fix.smem_start); clk_disable_unprepare(fbi->clk); diff --git a/drivers/video/fbdev/sbuslib.c b/drivers/video/fbdev/sbuslib.c index a436d44f1b7f..01a7110e61a7 100644 --- a/drivers/video/fbdev/sbuslib.c +++ b/drivers/video/fbdev/sbuslib.c @@ -106,11 +106,11 @@ int sbusfb_ioctl_helper(unsigned long cmd, unsigned long arg, struct fbtype __user *f = (struct fbtype __user *) arg; if (put_user(type, &f->fb_type) || - __put_user(info->var.yres, &f->fb_height) || - __put_user(info->var.xres, &f->fb_width) || - __put_user(fb_depth, &f->fb_depth) || - __put_user(0, &f->fb_cmsize) || - __put_user(fb_size, &f->fb_cmsize)) + put_user(info->var.yres, &f->fb_height) || + put_user(info->var.xres, &f->fb_width) || + put_user(fb_depth, &f->fb_depth) || + put_user(0, &f->fb_cmsize) || + put_user(fb_size, &f->fb_cmsize)) return -EFAULT; return 0; } @@ -125,10 +125,10 @@ int sbusfb_ioctl_helper(unsigned long cmd, unsigned long arg, unsigned int index, count, i; if (get_user(index, &c->index) || - __get_user(count, &c->count) || - __get_user(ured, &c->red) || - __get_user(ugreen, &c->green) || - __get_user(ublue, &c->blue)) + get_user(count, &c->count) || + get_user(ured, &c->red) || + get_user(ugreen, &c->green) || + get_user(ublue, &c->blue)) return -EFAULT; cmap.len = 1; @@ -165,13 +165,13 @@ int sbusfb_ioctl_helper(unsigned long cmd, unsigned long arg, u8 red, green, blue; if (get_user(index, &c->index) || - __get_user(count, &c->count) || - __get_user(ured, &c->red) || - __get_user(ugreen, &c->green) || - __get_user(ublue, &c->blue)) + get_user(count, &c->count) || + get_user(ured, &c->red) || + get_user(ugreen, &c->green) || + get_user(ublue, &c->blue)) return -EFAULT; - if (index + count > cmap->len) + if (index > cmap->len || count > cmap->len - index) return -EINVAL; for (i = 0; i < count; i++) { diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c index 111a0ab6280a..ce7c4a269f77 100644 --- a/drivers/video/hdmi.c +++ b/drivers/video/hdmi.c @@ -1036,12 +1036,12 @@ static int hdmi_avi_infoframe_unpack(struct hdmi_avi_infoframe *frame, if (ptr[0] & 0x10) frame->active_aspect = ptr[1] & 0xf; if (ptr[0] & 0x8) { - frame->top_bar = (ptr[5] << 8) + ptr[6]; - frame->bottom_bar = (ptr[7] << 8) + ptr[8]; + frame->top_bar = (ptr[6] << 8) | ptr[5]; + frame->bottom_bar = (ptr[8] << 8) | ptr[7]; } if (ptr[0] & 0x4) { - frame->left_bar = (ptr[9] << 8) + ptr[10]; - frame->right_bar = (ptr[11] << 8) + ptr[12]; + frame->left_bar = (ptr[10] << 8) | ptr[9]; + frame->right_bar = (ptr[12] << 8) | ptr[11]; } frame->scan_mode = ptr[0] & 0x3; diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index cff773f15b7e..89bc931de8df 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -38,6 +38,17 @@ config VIRTIO_PCI_LEGACY If unsure, say Y. +config VIRTIO_PMEM + tristate "Support for virtio pmem driver" + depends on VIRTIO + depends on LIBNVDIMM + help + This driver provides access to virtio-pmem devices, storage devices + that are mapped into the physical address space - similar to NVDIMMs + - with a virtio-based flushing interface. + + If unsure, say Y. + config VIRTIO_BALLOON tristate "Virtio balloon driver" depends on VIRTIO diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index d9873aa014a6..71970773aad1 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -132,6 +132,8 @@ static void set_page_pfns(struct virtio_balloon *vb, { unsigned int i; + BUILD_BUG_ON(VIRTIO_BALLOON_PAGES_PER_PAGE > VIRTIO_BALLOON_ARRAY_PFNS_MAX); + /* * Set balloon pfns pointing at this page. * Note that the first pfn points at start of the page. @@ -492,6 +494,17 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info, get_page(newpage); /* balloon reference */ + /* + * When we migrate a page to a different zone and adjusted the + * managed page count when inflating, we have to fixup the count of + * both involved zones. + */ + if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM) && + page_zone(page) != page_zone(newpage)) { + adjust_managed_page_count(page, 1); + adjust_managed_page_count(newpage, -1); + } + /* balloon's page migration 1st step -- inflate "newpage" */ spin_lock_irqsave(&vb_dev_info->pages_lock, flags); balloon_page_insert(vb_dev_info, newpage); diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c index 3a0468f2ceb0..0073d078e50e 100644 --- a/drivers/virtio/virtio_input.c +++ b/drivers/virtio/virtio_input.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -163,6 +164,15 @@ static void virtinput_cfg_abs(struct virtio_input *vi, int abs) virtio_cread(vi->vdev, struct virtio_input_config, u.abs.flat, &fl); input_set_abs_params(vi->idev, abs, mi, ma, fu, fl); input_abs_set_res(vi->idev, abs, re); + if (abs == ABS_MT_TRACKING_ID) { + unsigned int slot_flags = + test_bit(INPUT_PROP_DIRECT, vi->idev->propbit) ? + INPUT_MT_DIRECT : 0; + + input_mt_init_slots(vi->idev, + ma, /* input max finger */ + slot_flags); + } } static int virtinput_init_vqs(struct virtio_input *vi) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index cc9d421c0929..b82bb0b08161 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -432,7 +432,7 @@ unmap_release: kfree(desc); END_USE(vq); - return -EIO; + return -ENOMEM; } /** diff --git a/drivers/vme/bridges/vme_fake.c b/drivers/vme/bridges/vme_fake.c index 30b3acc93833..e81ec763b555 100644 --- a/drivers/vme/bridges/vme_fake.c +++ b/drivers/vme/bridges/vme_fake.c @@ -418,8 +418,9 @@ static void fake_lm_check(struct fake_driver *bridge, unsigned long long addr, } } -static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u8 fake_vmeread8(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u8 retval = 0xff; int i; @@ -450,8 +451,9 @@ static u8 fake_vmeread8(struct fake_driver *bridge, unsigned long long addr, return retval; } -static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u16 fake_vmeread16(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u16 retval = 0xffff; int i; @@ -482,8 +484,9 @@ static u16 fake_vmeread16(struct fake_driver *bridge, unsigned long long addr, return retval; } -static u32 fake_vmeread32(struct fake_driver *bridge, unsigned long long addr, - u32 aspace, u32 cycle) +static noinline_for_stack u32 fake_vmeread32(struct fake_driver *bridge, + unsigned long long addr, + u32 aspace, u32 cycle) { u32 retval = 0xffffffff; int i; @@ -613,8 +616,9 @@ out: return retval; } -static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite8(struct fake_driver *bridge, + u8 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; @@ -643,8 +647,9 @@ static void fake_vmewrite8(struct fake_driver *bridge, u8 *buf, } -static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite16(struct fake_driver *bridge, + u16 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; @@ -673,8 +678,9 @@ static void fake_vmewrite16(struct fake_driver *bridge, u16 *buf, } -static void fake_vmewrite32(struct fake_driver *bridge, u32 *buf, - unsigned long long addr, u32 aspace, u32 cycle) +static noinline_for_stack void fake_vmewrite32(struct fake_driver *bridge, + u32 *buf, unsigned long long addr, + u32 aspace, u32 cycle) { int i; unsigned long long start, end, offset; diff --git a/drivers/w1/slaves/w1_ds2438.c b/drivers/w1/slaves/w1_ds2438.c index bf641a191d07..7c4e33dbee4d 100644 --- a/drivers/w1/slaves/w1_ds2438.c +++ b/drivers/w1/slaves/w1_ds2438.c @@ -186,8 +186,8 @@ static int w1_ds2438_change_config_bit(struct w1_slave *sl, u8 mask, u8 value) return -1; } -static uint16_t w1_ds2438_get_voltage(struct w1_slave *sl, - int adc_input, uint16_t *voltage) +static int w1_ds2438_get_voltage(struct w1_slave *sl, + int adc_input, uint16_t *voltage) { unsigned int retries = W1_DS2438_RETRIES; u8 w1_buf[DS2438_PAGE_SIZE + 1 /*for CRC*/]; @@ -235,6 +235,25 @@ post_unlock: return ret; } +static int w1_ds2438_get_current(struct w1_slave *sl, int16_t *voltage) +{ + u8 w1_buf[DS2438_PAGE_SIZE + 1 /*for CRC*/]; + int ret; + + mutex_lock(&sl->master->bus_mutex); + + if (w1_ds2438_get_page(sl, 0, w1_buf) == 0) { + /* The voltage measured across current sense resistor RSENS. */ + *voltage = (((int16_t) w1_buf[DS2438_CURRENT_MSB]) << 8) | ((int16_t) w1_buf[DS2438_CURRENT_LSB]); + ret = 0; + } else + ret = -1; + + mutex_unlock(&sl->master->bus_mutex); + + return ret; +} + static ssize_t iad_write(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) @@ -257,6 +276,27 @@ static ssize_t iad_write(struct file *filp, struct kobject *kobj, return ret; } +static ssize_t iad_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, + loff_t off, size_t count) +{ + struct w1_slave *sl = kobj_to_w1_slave(kobj); + int ret; + int16_t voltage; + + if (off != 0) + return 0; + if (!buf) + return -EINVAL; + + if (w1_ds2438_get_current(sl, &voltage) == 0) { + ret = snprintf(buf, count, "%i\n", voltage); + } else + ret = -EIO; + + return ret; +} + static ssize_t page0_read(struct file *filp, struct kobject *kobj, struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) @@ -272,9 +312,13 @@ static ssize_t page0_read(struct file *filp, struct kobject *kobj, mutex_lock(&sl->master->bus_mutex); + /* Read no more than page0 size */ + if (count > DS2438_PAGE_SIZE) + count = DS2438_PAGE_SIZE; + if (w1_ds2438_get_page(sl, 0, w1_buf) == 0) { - memcpy(buf, &w1_buf, DS2438_PAGE_SIZE); - ret = DS2438_PAGE_SIZE; + memcpy(buf, &w1_buf, count); + ret = count; } else ret = -EIO; @@ -289,7 +333,6 @@ static ssize_t temperature_read(struct file *filp, struct kobject *kobj, { struct w1_slave *sl = kobj_to_w1_slave(kobj); int ret; - ssize_t c = PAGE_SIZE; int16_t temp; if (off != 0) @@ -298,8 +341,7 @@ static ssize_t temperature_read(struct file *filp, struct kobject *kobj, return -EINVAL; if (w1_ds2438_get_temperature(sl, &temp) == 0) { - c -= snprintf(buf + PAGE_SIZE - c, c, "%d\n", temp); - ret = PAGE_SIZE - c; + ret = snprintf(buf, count, "%i\n", temp); } else ret = -EIO; @@ -312,7 +354,6 @@ static ssize_t vad_read(struct file *filp, struct kobject *kobj, { struct w1_slave *sl = kobj_to_w1_slave(kobj); int ret; - ssize_t c = PAGE_SIZE; uint16_t voltage; if (off != 0) @@ -321,8 +362,7 @@ static ssize_t vad_read(struct file *filp, struct kobject *kobj, return -EINVAL; if (w1_ds2438_get_voltage(sl, DS2438_ADC_INPUT_VAD, &voltage) == 0) { - c -= snprintf(buf + PAGE_SIZE - c, c, "%d\n", voltage); - ret = PAGE_SIZE - c; + ret = snprintf(buf, count, "%u\n", voltage); } else ret = -EIO; @@ -335,7 +375,6 @@ static ssize_t vdd_read(struct file *filp, struct kobject *kobj, { struct w1_slave *sl = kobj_to_w1_slave(kobj); int ret; - ssize_t c = PAGE_SIZE; uint16_t voltage; if (off != 0) @@ -344,15 +383,14 @@ static ssize_t vdd_read(struct file *filp, struct kobject *kobj, return -EINVAL; if (w1_ds2438_get_voltage(sl, DS2438_ADC_INPUT_VDD, &voltage) == 0) { - c -= snprintf(buf + PAGE_SIZE - c, c, "%d\n", voltage); - ret = PAGE_SIZE - c; + ret = snprintf(buf, count, "%u\n", voltage); } else ret = -EIO; return ret; } -static BIN_ATTR(iad, S_IRUGO | S_IWUSR | S_IWGRP, NULL, iad_write, 1); +static BIN_ATTR(iad, S_IRUGO | S_IWUSR | S_IWGRP, iad_read, iad_write, 0); static BIN_ATTR_RO(page0, DS2438_PAGE_SIZE); static BIN_ATTR_RO(temperature, 0/* real length varies */); static BIN_ATTR_RO(vad, 0/* real length varies */); diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 8efc10ab9d0b..d1d201c5d737 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -563,6 +563,7 @@ config MAX63XX_WATCHDOG config MAX77620_WATCHDOG tristate "Maxim Max77620 Watchdog Timer" depends on MFD_MAX77620 || COMPILE_TEST + select WATCHDOG_CORE help This is the driver for the Max77620 watchdog timer. Say 'Y' here to enable the watchdog timer support for diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c index cee7334b2a00..f5835cbd5d41 100644 --- a/drivers/watchdog/aspeed_wdt.c +++ b/drivers/watchdog/aspeed_wdt.c @@ -204,11 +204,6 @@ static int aspeed_wdt_probe(struct platform_device *pdev) if (IS_ERR(wdt->base)) return PTR_ERR(wdt->base); - /* - * The ast2400 wdt can run at PCLK, or 1MHz. The ast2500 only - * runs at 1MHz. We chose to always run at 1MHz, as there's no - * good reason to have a faster watchdog counter. - */ wdt->wdd.info = &aspeed_wdt_info; wdt->wdd.ops = &aspeed_wdt_ops; wdt->wdd.max_hw_heartbeat_ms = WDT_MAX_TIMEOUT_MS; @@ -224,7 +219,16 @@ static int aspeed_wdt_probe(struct platform_device *pdev) return -EINVAL; config = ofdid->data; - wdt->ctrl = WDT_CTRL_1MHZ_CLK; + /* + * On clock rates: + * - ast2400 wdt can run at PCLK, or 1MHz + * - ast2500 only runs at 1MHz, hard coding bit 4 to 1 + * - ast2600 always runs at 1MHz + * + * Set the ast2400 to run at 1MHz as it simplifies the driver. + */ + if (of_device_is_compatible(np, "aspeed,ast2400-wdt")) + wdt->ctrl = WDT_CTRL_1MHZ_CLK; /* * Control reset on a per-device basis to ensure the diff --git a/drivers/watchdog/da9062_wdt.c b/drivers/watchdog/da9062_wdt.c index 9083d3d922b0..79383ff62019 100644 --- a/drivers/watchdog/da9062_wdt.c +++ b/drivers/watchdog/da9062_wdt.c @@ -126,13 +126,6 @@ static int da9062_wdt_stop(struct watchdog_device *wdd) struct da9062_watchdog *wdt = watchdog_get_drvdata(wdd); int ret; - ret = da9062_reset_watchdog_timer(wdt); - if (ret) { - dev_err(wdt->hw->dev, "Failed to ping the watchdog (err = %d)\n", - ret); - return ret; - } - ret = regmap_update_bits(wdt->hw->regmap, DA9062AA_CONTROL_D, DA9062AA_TWDSCALE_MASK, diff --git a/drivers/watchdog/meson_gxbb_wdt.c b/drivers/watchdog/meson_gxbb_wdt.c index 69a5a57f1446..61297a6ab43a 100644 --- a/drivers/watchdog/meson_gxbb_wdt.c +++ b/drivers/watchdog/meson_gxbb_wdt.c @@ -137,8 +137,8 @@ static unsigned int meson_gxbb_wdt_get_timeleft(struct watchdog_device *wdt_dev) reg = readl(data->reg_base + GXBB_WDT_TCNT_REG); - return ((reg >> GXBB_WDT_TCNT_CNT_SHIFT) - - (reg & GXBB_WDT_TCNT_SETUP_MASK)) / 1000; + return ((reg & GXBB_WDT_TCNT_SETUP_MASK) - + (reg >> GXBB_WDT_TCNT_CNT_SHIFT)) / 1000; } static const struct watchdog_ops meson_gxbb_wdt_ops = { diff --git a/drivers/watchdog/rn5t618_wdt.c b/drivers/watchdog/rn5t618_wdt.c index e60f55702ab7..d2e79cf70e77 100644 --- a/drivers/watchdog/rn5t618_wdt.c +++ b/drivers/watchdog/rn5t618_wdt.c @@ -193,6 +193,7 @@ static struct platform_driver rn5t618_wdt_driver = { module_platform_driver(rn5t618_wdt_driver); +MODULE_ALIAS("platform:rn5t618-wdt"); MODULE_AUTHOR("Beniamino Galvani "); MODULE_DESCRIPTION("RN5T618 watchdog driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/watchdog/sama5d4_wdt.c b/drivers/watchdog/sama5d4_wdt.c index 0ae947c3d7bc..d8cf2039c6a4 100644 --- a/drivers/watchdog/sama5d4_wdt.c +++ b/drivers/watchdog/sama5d4_wdt.c @@ -111,9 +111,7 @@ static int sama5d4_wdt_set_timeout(struct watchdog_device *wdd, u32 value = WDT_SEC2TICKS(timeout); wdt->mr &= ~AT91_WDT_WDV; - wdt->mr &= ~AT91_WDT_WDD; wdt->mr |= AT91_WDT_SET_WDV(value); - wdt->mr |= AT91_WDT_SET_WDD(value); /* * WDDIS has to be 0 when updating WDD/WDV. The datasheet states: When @@ -255,7 +253,7 @@ static int sama5d4_wdt_probe(struct platform_device *pdev) timeout = WDT_SEC2TICKS(wdd->timeout); - wdt->mr |= AT91_WDT_SET_WDD(timeout); + wdt->mr |= AT91_WDT_SET_WDD(WDT_SEC2TICKS(MAX_WDT_TIMEOUT)); wdt->mr |= AT91_WDT_SET_WDV(timeout); ret = sama5d4_wdt_init(wdt); diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c index 7817836bff55..4b9365d4de7a 100644 --- a/drivers/watchdog/w83627hf_wdt.c +++ b/drivers/watchdog/w83627hf_wdt.c @@ -50,7 +50,7 @@ static int cr_wdt_csr; /* WDT control & status register */ enum chips { w83627hf, w83627s, w83697hf, w83697ug, w83637hf, w83627thf, w83687thf, w83627ehf, w83627dhg, w83627uhg, w83667hg, w83627dhg_p, w83667hg_b, nct6775, nct6776, nct6779, nct6791, nct6792, nct6793, - nct6795, nct6102 }; + nct6795, nct6796, nct6102 }; static int timeout; /* in seconds */ module_param(timeout, int, 0); @@ -100,6 +100,7 @@ MODULE_PARM_DESC(early_disable, "Disable watchdog at boot time (default=0)"); #define NCT6792_ID 0xc9 #define NCT6793_ID 0xd1 #define NCT6795_ID 0xd3 +#define NCT6796_ID 0xd4 /* also NCT9697D, NCT9698D */ #define W83627HF_WDT_TIMEOUT 0xf6 #define W83697HF_WDT_TIMEOUT 0xf4 @@ -209,6 +210,7 @@ static int w83627hf_init(struct watchdog_device *wdog, enum chips chip) case nct6792: case nct6793: case nct6795: + case nct6796: case nct6102: /* * These chips have a fixed WDTO# output pin (W83627UHG), @@ -407,6 +409,9 @@ static int wdt_find(int addr) case NCT6795_ID: ret = nct6795; break; + case NCT6796_ID: + ret = nct6796; + break; case NCT6102_ID: ret = nct6102; cr_wdt_timeout = NCT6102D_WDT_TIMEOUT; @@ -450,6 +455,7 @@ static int __init wdt_init(void) "NCT6792", "NCT6793", "NCT6795", + "NCT6796", "NCT6102", }; diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c index 0da9943d405f..c310e841561c 100644 --- a/drivers/watchdog/wdat_wdt.c +++ b/drivers/watchdog/wdat_wdt.c @@ -392,7 +392,7 @@ static int wdat_wdt_probe(struct platform_device *pdev) memset(&r, 0, sizeof(r)); r.start = gas->address; - r.end = r.start + gas->access_width - 1; + r.end = r.start + ACPI_ACCESS_BYTE_WIDTH(gas->access_width) - 1; if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) { r.flags = IORESOURCE_MEM; } else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 7d521babc020..3f9260af701f 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -356,7 +356,10 @@ static enum bp_state reserve_additional_memory(void) * callers drop the mutex before trying again. */ mutex_unlock(&balloon_mutex); + /* add_memory_resource() requires the device_hotplug lock */ + lock_device_hotplug(); rc = add_memory_resource(nid, resource, memhp_auto_online); + unlock_device_hotplug(); mutex_lock(&balloon_mutex); if (rc) { @@ -398,7 +401,8 @@ static struct notifier_block xen_memory_nb = { #else static enum bp_state reserve_additional_memory(void) { - balloon_stats.target_pages = balloon_stats.current_pages; + balloon_stats.target_pages = balloon_stats.current_pages + + balloon_stats.target_unpopulated; return BP_ECANCELED; } #endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ diff --git a/drivers/xen/cpu_hotplug.c b/drivers/xen/cpu_hotplug.c index b1357aa4bc55..f192b6f42da9 100644 --- a/drivers/xen/cpu_hotplug.c +++ b/drivers/xen/cpu_hotplug.c @@ -54,7 +54,7 @@ static int vcpu_online(unsigned int cpu) } static void vcpu_hotplug(unsigned int cpu) { - if (!cpu_possible(cpu)) + if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) return; switch (vcpu_online(cpu)) { diff --git a/drivers/xen/preempt.c b/drivers/xen/preempt.c index 08cb419eb4e6..5f6b77ea34fb 100644 --- a/drivers/xen/preempt.c +++ b/drivers/xen/preempt.c @@ -37,7 +37,9 @@ asmlinkage __visible void xen_maybe_preempt_hcall(void) * cpu. */ __this_cpu_write(xen_in_preemptible_hcall, false); - _cond_resched(); + local_irq_enable(); + cond_resched(); + local_irq_disable(); __this_cpu_write(xen_in_preemptible_hcall, true); } } diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index abd6dbc29ac2..58be15c27b6d 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -792,7 +792,7 @@ static int pvcalls_back_poll(struct xenbus_device *dev, mappass->reqcopy = *req; icsk = inet_csk(mappass->sock->sk); queue = &icsk->icsk_accept_queue; - data = queue->rskq_accept_head != NULL; + data = READ_ONCE(queue->rskq_accept_head) != NULL; if (data) { mappass->reqcopy.cmd = 0; ret = 0; diff --git a/drivers/xen/xen-balloon.c b/drivers/xen/xen-balloon.c index cf8ef8cee5a0..112e8b5e6fee 100644 --- a/drivers/xen/xen-balloon.c +++ b/drivers/xen/xen-balloon.c @@ -82,7 +82,7 @@ static void watch_target(struct xenbus_watch *watch, "%llu", &static_max) == 1)) static_max >>= PAGE_SHIFT - 10; else - static_max = new_target; + static_max = balloon_stats.current_pages; target_diff = (xen_pv_domain() || xen_initial_domain()) ? 0 : static_max - balloon_stats.target_pages; diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 9e480fdebe1f..8c250f4a3a97 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -106,7 +106,8 @@ static void pcistub_device_release(struct kref *kref) * is called from "unbind" which takes a device_lock mutex. */ __pci_reset_function_locked(dev); - if (pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state)) + if (dev_data && + pci_load_and_free_saved_state(dev, &dev_data->pci_saved_state)) dev_info(&dev->dev, "Could not reload PCI state\n"); else pci_restore_state(dev); diff --git a/firmware/Makefile b/firmware/Makefile index 168094a3fae7..30e6b738839e 100644 --- a/firmware/Makefile +++ b/firmware/Makefile @@ -19,7 +19,7 @@ quiet_cmd_fwbin = MK_FW $@ PROGBITS=$(if $(CONFIG_ARM),%,@)progbits; \ echo "/* Generated by firmware/Makefile */" > $@;\ echo " .section .rodata" >>$@;\ - echo " .p2align $${ASM_ALIGN}" >>$@;\ + echo " .p2align 4" >>$@;\ echo "_fw_$${FWSTR}_bin:" >>$@;\ echo " .incbin \"$(2)\"" >>$@;\ echo "_fw_end:" >>$@;\ diff --git a/fs/Kconfig b/fs/Kconfig index 6da435421744..1c1ed36c36c2 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -106,6 +106,7 @@ source "fs/quota/Kconfig" source "fs/autofs4/Kconfig" source "fs/fuse/Kconfig" source "fs/overlayfs/Kconfig" +source "fs/incfs/Kconfig" menu "Caches" diff --git a/fs/Makefile b/fs/Makefile index 998b27e210a1..74fa4aa5d470 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -112,6 +112,7 @@ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_FUSE_FS) += fuse/ obj-$(CONFIG_OVERLAY_FS) += overlayfs/ obj-$(CONFIG_ORANGEFS_FS) += orangefs/ +obj-$(CONFIG_INCREMENTAL_FS) += incfs/ obj-$(CONFIG_UDF_FS) += udf/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ obj-$(CONFIG_OMFS_FS) += omfs/ diff --git a/fs/affs/super.c b/fs/affs/super.c index 884bedab7266..789a1c7db5d8 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -559,14 +559,9 @@ affs_remount(struct super_block *sb, int *flags, char *data) int root_block; unsigned long mount_flags; int res = 0; - char *new_opts; char volume[32]; char *prefix = NULL; - new_opts = kstrdup(data, GFP_KERNEL); - if (data && !new_opts) - return -ENOMEM; - pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data); sync_filesystem(sb); @@ -577,7 +572,6 @@ affs_remount(struct super_block *sb, int *flags, char *data) &blocksize, &prefix, volume, &mount_flags)) { kfree(prefix); - kfree(new_opts); return -EINVAL; } diff --git a/fs/afs/super.c b/fs/afs/super.c index 689173c0a682..f8529ddbd587 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -359,6 +359,7 @@ static int afs_fill_super(struct super_block *sb, /* fill in the superblock */ sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = AFS_FS_MAGIC; sb->s_op = &afs_super_ops; sb->s_xattr = afs_xattr_handlers; diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index 2830e4f48d85..7c6b62a94e7e 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -50,7 +50,7 @@ static int afs_xattr_get_cell(const struct xattr_handler *handler, return namelen; if (namelen > size) return -ERANGE; - memcpy(buffer, cell->name, size); + memcpy(buffer, cell->name, namelen); return namelen; } @@ -104,7 +104,7 @@ static int afs_xattr_get_volume(const struct xattr_handler *handler, return namelen; if (namelen > size) return -ERANGE; - memcpy(buffer, volname, size); + memcpy(buffer, volname, namelen); return namelen; } diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index 141f9bc213a3..94a0017c923b 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -472,9 +472,10 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb, */ flags &= ~AUTOFS_EXP_LEAVES; found = should_expire(expired, mnt, timeout, how); - if (!found || found != expired) - /* Something has changed, continue */ + if (found != expired) { // something has changed, continue + dput(found); goto next; + } if (expired != dentry) dput(dentry); diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 7cde3f46ad26..e996174cbfc0 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -14,13 +14,30 @@ #include #include +static inline bool spacetab(char c) { return c == ' ' || c == '\t'; } +static inline char *next_non_spacetab(char *first, const char *last) +{ + for (; first <= last; first++) + if (!spacetab(*first)) + return first; + return NULL; +} +static inline char *next_terminator(char *first, const char *last) +{ + for (; first <= last; first++) + if (spacetab(*first) || !*first) + return first; + return NULL; +} + static int load_script(struct linux_binprm *bprm) { const char *i_arg, *i_name; - char *cp; + char *cp, *buf_end; struct file *file; int retval; + /* Not ours to exec if we don't start with "#!". */ if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!')) return -ENOEXEC; @@ -33,18 +50,40 @@ static int load_script(struct linux_binprm *bprm) if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) return -ENOENT; - /* - * This section does the #! interpretation. - * Sorta complicated, but hopefully it will work. -TYT - */ - + /* Release since we are not mapping a binary into memory. */ allow_write_access(bprm->file); fput(bprm->file); bprm->file = NULL; - bprm->buf[BINPRM_BUF_SIZE - 1] = '\0'; - if ((cp = strchr(bprm->buf, '\n')) == NULL) - cp = bprm->buf+BINPRM_BUF_SIZE-1; + /* + * This section handles parsing the #! line into separate + * interpreter path and argument strings. We must be careful + * because bprm->buf is not yet guaranteed to be NUL-terminated + * (though the buffer will have trailing NUL padding when the + * file size was smaller than the buffer size). + * + * We do not want to exec a truncated interpreter path, so either + * we find a newline (which indicates nothing is truncated), or + * we find a space/tab/NUL after the interpreter path (which + * itself may be preceded by spaces/tabs). Truncating the + * arguments is fine: the interpreter can re-read the script to + * parse them on its own. + */ + buf_end = bprm->buf + sizeof(bprm->buf) - 1; + cp = strnchr(bprm->buf, sizeof(bprm->buf), '\n'); + if (!cp) { + cp = next_non_spacetab(bprm->buf + 2, buf_end); + if (!cp) + return -ENOEXEC; /* Entire buf is spaces/tabs */ + /* + * If there is no later space/tab/NUL we must assume the + * interpreter path is truncated. + */ + if (!next_terminator(cp, buf_end)) + return -ENOEXEC; + cp = buf_end; + } + /* NUL-terminate the buffer and any trailing spaces/tabs. */ *cp = '\0'; while (cp > bprm->buf) { cp--; diff --git a/fs/block_dev.c b/fs/block_dev.c index 61949e3446e5..ad27f9a1317c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1899,6 +1899,9 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) if (bdev_read_only(I_BDEV(bd_inode))) return -EPERM; + if (IS_SWAPFILE(bd_inode)) + return -ETXTBSY; + if (!iov_iter_count(from)) return 0; diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index e00c8a9fd5bb..72d7589072f5 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -265,16 +265,17 @@ out: } } -static void run_ordered_work(struct __btrfs_workqueue *wq) +static void run_ordered_work(struct __btrfs_workqueue *wq, + struct btrfs_work *self) { struct list_head *list = &wq->ordered_list; struct btrfs_work *work; spinlock_t *lock = &wq->list_lock; unsigned long flags; + void *wtag; + bool free_self = false; while (1) { - void *wtag; - spin_lock_irqsave(lock, flags); if (list_empty(list)) break; @@ -300,16 +301,47 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) list_del(&work->ordered_list); spin_unlock_irqrestore(lock, flags); - /* - * We don't want to call the ordered free functions with the - * lock held though. Save the work as tag for the trace event, - * because the callback could free the structure. - */ - wtag = work; - work->ordered_free(work); - trace_btrfs_all_work_done(wq->fs_info, wtag); + if (work == self) { + /* + * This is the work item that the worker is currently + * executing. + * + * The kernel workqueue code guarantees non-reentrancy + * of work items. I.e., if a work item with the same + * address and work function is queued twice, the second + * execution is blocked until the first one finishes. A + * work item may be freed and recycled with the same + * work function; the workqueue code assumes that the + * original work item cannot depend on the recycled work + * item in that case (see find_worker_executing_work()). + * + * Note that the work of one Btrfs filesystem may depend + * on the work of another Btrfs filesystem via, e.g., a + * loop device. Therefore, we must not allow the current + * work item to be recycled until we are really done, + * otherwise we break the above assumption and can + * deadlock. + */ + free_self = true; + } else { + /* + * We don't want to call the ordered free functions with + * the lock held though. Save the work as tag for the + * trace event, because the callback could free the + * structure. + */ + wtag = work; + work->ordered_free(work); + trace_btrfs_all_work_done(wq->fs_info, wtag); + } } spin_unlock_irqrestore(lock, flags); + + if (free_self) { + wtag = self; + self->ordered_free(self); + trace_btrfs_all_work_done(wq->fs_info, wtag); + } } static void normal_work_helper(struct btrfs_work *work) @@ -337,7 +369,7 @@ static void normal_work_helper(struct btrfs_work *work) work->func(work); if (need_order) { set_bit(WORK_DONE_BIT, &work->flags); - run_ordered_work(wq); + run_ordered_work(wq, work); } if (!need_order) trace_btrfs_all_work_done(wq->fs_info, wtag); diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 7d5a9b51f0d7..4be07cf31d74 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -642,7 +642,6 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev, static int btrfsic_process_superblock(struct btrfsic_state *state, struct btrfs_fs_devices *fs_devices) { - struct btrfs_fs_info *fs_info = state->fs_info; struct btrfs_super_block *selected_super; struct list_head *dev_head = &fs_devices->devices; struct btrfs_device *device; @@ -713,7 +712,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, break; } - num_copies = btrfs_num_copies(fs_info, next_bytenr, + num_copies = btrfs_num_copies(state->fs_info, next_bytenr, state->metablock_size); if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) pr_info("num_copies(log_bytenr=%llu) = %d\n", diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 27983fd657ab..f5a8c0d26cf3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -334,26 +334,6 @@ struct tree_mod_elem { struct tree_mod_root old_root; }; -static inline void tree_mod_log_read_lock(struct btrfs_fs_info *fs_info) -{ - read_lock(&fs_info->tree_mod_log_lock); -} - -static inline void tree_mod_log_read_unlock(struct btrfs_fs_info *fs_info) -{ - read_unlock(&fs_info->tree_mod_log_lock); -} - -static inline void tree_mod_log_write_lock(struct btrfs_fs_info *fs_info) -{ - write_lock(&fs_info->tree_mod_log_lock); -} - -static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info) -{ - write_unlock(&fs_info->tree_mod_log_lock); -} - /* * Pull a new tree mod seq number for our operation. */ @@ -373,14 +353,12 @@ static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info) u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem) { - tree_mod_log_write_lock(fs_info); - spin_lock(&fs_info->tree_mod_seq_lock); + write_lock(&fs_info->tree_mod_log_lock); if (!elem->seq) { elem->seq = btrfs_inc_tree_mod_seq(fs_info); list_add_tail(&elem->list, &fs_info->tree_mod_seq_list); } - spin_unlock(&fs_info->tree_mod_seq_lock); - tree_mod_log_write_unlock(fs_info); + write_unlock(&fs_info->tree_mod_log_lock); return elem->seq; } @@ -399,7 +377,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, if (!seq_putting) return; - spin_lock(&fs_info->tree_mod_seq_lock); + write_lock(&fs_info->tree_mod_log_lock); list_del(&elem->list); elem->seq = 0; @@ -410,29 +388,27 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, * blocker with lower sequence number exists, we * cannot remove anything from the log */ - spin_unlock(&fs_info->tree_mod_seq_lock); + write_unlock(&fs_info->tree_mod_log_lock); return; } min_seq = cur_elem->seq; } } - spin_unlock(&fs_info->tree_mod_seq_lock); /* * anything that's lower than the lowest existing (read: blocked) * sequence number can be removed from the tree. */ - tree_mod_log_write_lock(fs_info); tm_root = &fs_info->tree_mod_log; for (node = rb_first(tm_root); node; node = next) { next = rb_next(node); tm = rb_entry(node, struct tree_mod_elem, node); - if (tm->seq > min_seq) + if (tm->seq >= min_seq) continue; rb_erase(node, tm_root); kfree(tm); } - tree_mod_log_write_unlock(fs_info); + write_unlock(&fs_info->tree_mod_log_lock); } /* @@ -443,7 +419,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info, * for root replace operations, or the logical address of the affected * block for all other operations. * - * Note: must be called with write lock (tree_mod_log_write_lock). + * Note: must be called with write lock for fs_info::tree_mod_log_lock. */ static noinline int __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) @@ -481,7 +457,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm) * Determines if logging can be omitted. Returns 1 if it can. Otherwise, it * returns zero with the tree_mod_log_lock acquired. The caller must hold * this until all tree mod log insertions are recorded in the rb tree and then - * call tree_mod_log_write_unlock() to release. + * write unlock fs_info::tree_mod_log_lock. */ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) { @@ -491,9 +467,9 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info, if (eb && btrfs_header_level(eb) == 0) return 1; - tree_mod_log_write_lock(fs_info); + write_lock(&fs_info->tree_mod_log_lock); if (list_empty(&(fs_info)->tree_mod_seq_list)) { - tree_mod_log_write_unlock(fs_info); + write_unlock(&fs_info->tree_mod_log_lock); return 1; } @@ -557,7 +533,7 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, } ret = __tree_mod_log_insert(fs_info, tm); - tree_mod_log_write_unlock(fs_info); + write_unlock(&eb->fs_info->tree_mod_log_lock); if (ret) kfree(tm); @@ -621,7 +597,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info, ret = __tree_mod_log_insert(fs_info, tm); if (ret) goto free_tms; - tree_mod_log_write_unlock(fs_info); + write_unlock(&eb->fs_info->tree_mod_log_lock); kfree(tm_list); return 0; @@ -632,7 +608,7 @@ free_tms: kfree(tm_list[i]); } if (locked) - tree_mod_log_write_unlock(fs_info); + write_unlock(&eb->fs_info->tree_mod_log_lock); kfree(tm_list); kfree(tm); @@ -713,7 +689,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info, if (!ret) ret = __tree_mod_log_insert(fs_info, tm); - tree_mod_log_write_unlock(fs_info); + write_unlock(&fs_info->tree_mod_log_lock); if (ret) goto free_tms; kfree(tm_list); @@ -740,7 +716,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, struct tree_mod_elem *cur = NULL; struct tree_mod_elem *found = NULL; - tree_mod_log_read_lock(fs_info); + read_lock(&fs_info->tree_mod_log_lock); tm_root = &fs_info->tree_mod_log; node = tm_root->rb_node; while (node) { @@ -768,7 +744,7 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq, break; } } - tree_mod_log_read_unlock(fs_info); + read_unlock(&fs_info->tree_mod_log_lock); return found; } @@ -849,7 +825,7 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, goto free_tms; } - tree_mod_log_write_unlock(fs_info); + write_unlock(&fs_info->tree_mod_log_lock); kfree(tm_list); return 0; @@ -861,7 +837,7 @@ free_tms: kfree(tm_list[i]); } if (locked) - tree_mod_log_write_unlock(fs_info); + write_unlock(&fs_info->tree_mod_log_lock); kfree(tm_list); return ret; @@ -921,7 +897,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb) goto free_tms; ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems); - tree_mod_log_write_unlock(fs_info); + write_unlock(&eb->fs_info->tree_mod_log_lock); if (ret) goto free_tms; kfree(tm_list); @@ -1279,7 +1255,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, unsigned long p_size = sizeof(struct btrfs_key_ptr); n = btrfs_header_nritems(eb); - tree_mod_log_read_lock(fs_info); + read_lock(&fs_info->tree_mod_log_lock); while (tm && tm->seq >= time_seq) { /* * all the operations are recorded with the operator used for @@ -1334,7 +1310,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, if (tm->logical != first_tm->logical) break; } - tree_mod_log_read_unlock(fs_info); + read_unlock(&fs_info->tree_mod_log_lock); btrfs_set_header_nritems(eb, n); } @@ -2988,6 +2964,10 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key, again: b = get_old_root(root, time_seq); + if (!b) { + ret = -EIO; + goto done; + } level = btrfs_header_level(b); p->locks[level] = BTRFS_READ_LOCK; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 588760c49fe2..5412b12491cb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -869,14 +869,12 @@ struct btrfs_fs_info { struct list_head delayed_iputs; struct mutex cleaner_delayed_iput_mutex; - /* this protects tree_mod_seq_list */ - spinlock_t tree_mod_seq_lock; atomic64_t tree_mod_seq; - struct list_head tree_mod_seq_list; - /* this protects tree_mod_log */ + /* this protects tree_mod_log and tree_mod_seq_list */ rwlock_t tree_mod_log_lock; struct rb_root tree_mod_log; + struct list_head tree_mod_seq_list; atomic_t nr_async_submits; atomic_t async_submit_draining; @@ -2408,32 +2406,6 @@ static inline u32 btrfs_file_extent_inline_item_len( return btrfs_item_size(eb, e) - BTRFS_FILE_EXTENT_INLINE_DATA_START; } -/* this returns the number of file bytes represented by the inline item. - * If an item is compressed, this is the uncompressed size - */ -static inline u32 btrfs_file_extent_inline_len(const struct extent_buffer *eb, - int slot, - const struct btrfs_file_extent_item *fi) -{ - struct btrfs_map_token token; - - btrfs_init_map_token(&token); - /* - * return the space used on disk if this item isn't - * compressed or encoded - */ - if (btrfs_token_file_extent_compression(eb, fi, &token) == 0 && - btrfs_token_file_extent_encryption(eb, fi, &token) == 0 && - btrfs_token_file_extent_other_encoding(eb, fi, &token) == 0) { - return btrfs_file_extent_inline_item_len(eb, - btrfs_item_nr(slot)); - } - - /* otherwise use the ram bytes field */ - return btrfs_token_file_extent_ram_bytes(eb, fi, &token); -} - - /* btrfs_dev_stats_item */ static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb, const struct btrfs_dev_stats_item *ptr, diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 04f39111fafb..87414fc9e268 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -1975,12 +1975,19 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root) } inode_id = delayed_nodes[n - 1]->inode_id + 1; - - for (i = 0; i < n; i++) - refcount_inc(&delayed_nodes[i]->refs); + for (i = 0; i < n; i++) { + /* + * Don't increase refs in case the node is dead and + * about to be removed from the tree in the loop below + */ + if (!refcount_inc_not_zero(&delayed_nodes[i]->refs)) + delayed_nodes[i] = NULL; + } spin_unlock(&root->inode_lock); for (i = 0; i < n; i++) { + if (!delayed_nodes[i]) + continue; __btrfs_kill_delayed_node(delayed_nodes[i]); btrfs_release_delayed_node(delayed_nodes[i]); } diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 93ffa898df6d..45714f1c43a3 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -195,8 +195,6 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, ref->in_tree = 0; btrfs_put_delayed_ref(ref); atomic_dec(&delayed_refs->num_entries); - if (trans->delayed_ref_updates) - trans->delayed_ref_updates--; } static bool merge_ref(struct btrfs_trans_handle *trans, @@ -283,7 +281,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, if (head->is_data) return; - spin_lock(&fs_info->tree_mod_seq_lock); + read_lock(&fs_info->tree_mod_log_lock); if (!list_empty(&fs_info->tree_mod_seq_list)) { struct seq_list *elem; @@ -291,7 +289,7 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, struct seq_list, list); seq = elem->seq; } - spin_unlock(&fs_info->tree_mod_seq_lock); + read_unlock(&fs_info->tree_mod_log_lock); ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, list); @@ -319,7 +317,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, struct seq_list *elem; int ret = 0; - spin_lock(&fs_info->tree_mod_seq_lock); + read_lock(&fs_info->tree_mod_log_lock); if (!list_empty(&fs_info->tree_mod_seq_list)) { elem = list_first_entry(&fs_info->tree_mod_seq_list, struct seq_list, list); @@ -333,7 +331,7 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, } } - spin_unlock(&fs_info->tree_mod_seq_lock); + read_unlock(&fs_info->tree_mod_log_lock); return ret; } @@ -458,7 +456,6 @@ add_tail: if (ref->action == BTRFS_ADD_DELAYED_REF) list_add_tail(&ref->add_list, &href->ref_add_list); atomic_inc(&root->num_entries); - trans->delayed_ref_updates++; spin_unlock(&href->lock); return ret; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 813834552aa1..6b4fee5c79f9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1679,8 +1679,8 @@ static void end_workqueue_fn(struct btrfs_work *work) bio->bi_status = end_io_wq->status; bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; - kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); bio_endio(bio); + kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); } static int cleaner_kthread(void *arg) @@ -2051,7 +2051,7 @@ static void free_root_extent_buffers(struct btrfs_root *root) } /* helper to cleanup tree roots */ -static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) +static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root) { free_root_extent_buffers(info->tree_root); @@ -2060,7 +2060,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) free_root_extent_buffers(info->csum_root); free_root_extent_buffers(info->quota_root); free_root_extent_buffers(info->uuid_root); - if (chunk_root) + if (free_chunk_root) free_root_extent_buffers(info->chunk_root); free_root_extent_buffers(info->free_space_root); } @@ -2455,7 +2455,6 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); spin_lock_init(&fs_info->defrag_inodes_lock); - spin_lock_init(&fs_info->tree_mod_seq_lock); spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->qgroup_op_lock); spin_lock_init(&fs_info->buffer_lock); @@ -2914,6 +2913,7 @@ retry_root_backup: /* do not make disk changes in broken FS or nologreplay is given */ if (btrfs_super_log_root(disk_super) != 0 && !btrfs_test_opt(fs_info, NOLOGREPLAY)) { + btrfs_info(fs_info, "start tree-log replay"); ret = btrfs_replay_log(fs_info, fs_devices); if (ret) { err = ret; @@ -3069,7 +3069,7 @@ fail_block_groups: btrfs_put_block_group_cache(fs_info); fail_tree_roots: - free_root_pointers(fs_info, 1); + free_root_pointers(fs_info, true); invalidate_inode_pages2(fs_info->btree_inode->i_mapping); fail_sb_buffer: @@ -3097,7 +3097,7 @@ recovery_tree_root: if (!btrfs_test_opt(fs_info, USEBACKUPROOT)) goto fail_tree_roots; - free_root_pointers(fs_info, 0); + free_root_pointers(fs_info, false); /* don't use the log in recovery mode, it won't be valid */ btrfs_set_super_log_root(disk_super, 0); @@ -3761,10 +3761,17 @@ void close_ctree(struct btrfs_fs_info *fs_info) invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); - btrfs_free_block_groups(fs_info); - clear_bit(BTRFS_FS_OPEN, &fs_info->flags); - free_root_pointers(fs_info, 1); + free_root_pointers(fs_info, true); + + /* + * We must free the block groups after dropping the fs_roots as we could + * have had an IO error and have left over tree log blocks that aren't + * cleaned up until the fs roots are freed. This makes the block group + * accounting appear to be wrong because there's pending reserved bytes, + * so make sure we do the block group cleanup afterwards. + */ + btrfs_free_block_groups(fs_info); iput(fs_info->btree_inode); @@ -4387,7 +4394,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, wake_up(&fs_info->transaction_wait); btrfs_destroy_delayed_inodes(fs_info); - btrfs_assert_delayed_root_empty(fs_info); btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages, EXTENT_DIRTY); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 10dee8245558..fd15f396b3a0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10255,6 +10255,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) btrfs_err(info, "bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups", cache->key.objectid); + btrfs_put_block_group(cache); ret = -EINVAL; goto error; } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2965bb413ed5..a485e34f2c70 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4037,6 +4037,14 @@ retry: */ scanned = 1; index = 0; + + /* + * If we're looping we could run into a page that is locked by a + * writer and that writer could be waiting on writeback for a + * page in our current bio, and thus deadlock, so flush the + * write bio here. + */ + flush_write_bio(data); goto retry; } @@ -4938,12 +4946,14 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, return eb; eb = alloc_dummy_extent_buffer(fs_info, start); if (!eb) - return NULL; + return ERR_PTR(-ENOMEM); eb->fs_info = fs_info; again: ret = radix_tree_preload(GFP_NOFS); - if (ret) + if (ret) { + exists = ERR_PTR(ret); goto free_eb; + } spin_lock(&fs_info->buffer_lock); ret = radix_tree_insert(&fs_info->buffer_radix, start >> PAGE_SHIFT, eb); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 2e348fb0b280..c87d673ce334 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -228,6 +228,17 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) struct extent_map *merge = NULL; struct rb_node *rb; + /* + * We can't modify an extent map that is in the tree and that is being + * used by another task, as it can cause that other task to see it in + * inconsistent state during the merging. We always have 1 reference for + * the tree and 1 for this task (which is unpinning the extent map or + * clearing the logging flag), so anything > 2 means it's being used by + * other tasks too. + */ + if (refcount_read(&em->refs) > 2) + return; + if (em->start != 0) { rb = rb_prev(&em->rb_node); if (rb) diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index fdcb41002623..717d82d51bb1 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -288,7 +288,8 @@ found: csum += count * csum_size; nblocks -= count; next: - while (count--) { + while (count > 0) { + count--; disk_bytenr += fs_info->sectorsize; offset += fs_info->sectorsize; page_bytes_left -= fs_info->sectorsize; @@ -955,7 +956,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, btrfs_file_extent_num_bytes(leaf, fi); } else if (type == BTRFS_FILE_EXTENT_INLINE) { size_t size; - size = btrfs_file_extent_inline_len(leaf, slot, fi); + size = btrfs_file_extent_ram_bytes(leaf, fi); extent_end = ALIGN(extent_start + size, fs_info->sectorsize); } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6fbae1357644..725544ec9c84 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -784,8 +784,7 @@ next_slot: btrfs_file_extent_num_bytes(leaf, fi); } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { extent_end = key.offset + - btrfs_file_extent_inline_len(leaf, - path->slots[0], fi); + btrfs_file_extent_ram_bytes(leaf, fi); } else { /* can't happen */ BUG(); @@ -1625,6 +1624,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, break; } + only_release_metadata = false; sector_offset = pos & (fs_info->sectorsize - 1); reserve_bytes = round_up(write_bytes + sector_offset, fs_info->sectorsize); @@ -1778,7 +1778,6 @@ again: set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, EXTENT_NORESERVE, NULL, NULL, GFP_NOFS); - only_release_metadata = false; } btrfs_drop_pages(pages, num_pages); @@ -1882,7 +1881,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); ssize_t err; loff_t pos; - size_t count = iov_iter_count(from); + size_t count; loff_t oldsize; int clean_page = 0; @@ -1890,9 +1889,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, (iocb->ki_flags & IOCB_NOWAIT)) return -EOPNOTSUPP; - if (!inode_trylock(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) return -EAGAIN; + } else { inode_lock(inode); } @@ -1903,6 +1903,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, } pos = iocb->ki_pos; + count = iov_iter_count(from); if (iocb->ki_flags & IOCB_NOWAIT) { /* * We will allocate space in case nodatacow is not set, diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 9f31b81a5e27..abeb26d48d0a 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -398,6 +398,12 @@ static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode if (uptodate && !PageUptodate(page)) { btrfs_readpage(NULL, page); lock_page(page); + if (page->mapping != inode->i_mapping) { + btrfs_err(BTRFS_I(inode)->root->fs_info, + "free space cache page truncated"); + io_ctl_drop_pages(io_ctl); + return -EIO; + } if (!PageUptodate(page)) { btrfs_err(BTRFS_I(inode)->root->fs_info, "error reading free space cache"); diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index d02019747d00..2ae32451fb5b 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -26,6 +26,19 @@ #include "inode-map.h" #include "transaction.h" +static void fail_caching_thread(struct btrfs_root *root) +{ + struct btrfs_fs_info *fs_info = root->fs_info; + + btrfs_warn(fs_info, "failed to start inode caching task"); + btrfs_clear_pending_and_info(fs_info, INODE_MAP_CACHE, + "disabling inode map caching"); + spin_lock(&root->ino_cache_lock); + root->ino_cache_state = BTRFS_CACHE_ERROR; + spin_unlock(&root->ino_cache_lock); + wake_up(&root->ino_cache_wait); +} + static int caching_kthread(void *data) { struct btrfs_root *root = data; @@ -42,8 +55,10 @@ static int caching_kthread(void *data) return 0; path = btrfs_alloc_path(); - if (!path) + if (!path) { + fail_caching_thread(root); return -ENOMEM; + } /* Since the commit root is read-only, we can safely skip locking. */ path->skip_locking = 1; @@ -159,6 +174,7 @@ static void start_caching(struct btrfs_root *root) spin_lock(&root->ino_cache_lock); root->ino_cache_state = BTRFS_CACHE_FINISHED; spin_unlock(&root->ino_cache_lock); + wake_up(&root->ino_cache_wait); return; } @@ -177,11 +193,8 @@ static void start_caching(struct btrfs_root *root) tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu", root->root_key.objectid); - if (IS_ERR(tsk)) { - btrfs_warn(fs_info, "failed to start inode caching task"); - btrfs_clear_pending_and_info(fs_info, INODE_MAP_CACHE, - "disabling inode map caching"); - } + if (IS_ERR(tsk)) + fail_caching_thread(root); } int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid) @@ -199,11 +212,14 @@ again: wait_event(root->ino_cache_wait, root->ino_cache_state == BTRFS_CACHE_FINISHED || + root->ino_cache_state == BTRFS_CACHE_ERROR || root->free_ino_ctl->free_space > 0); if (root->ino_cache_state == BTRFS_CACHE_FINISHED && root->free_ino_ctl->free_space == 0) return -ENOSPC; + else if (root->ino_cache_state == BTRFS_CACHE_ERROR) + return btrfs_find_free_objectid(root, objectid); else goto again; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ddc1d1d1a29f..2a196bb134d9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1476,8 +1476,7 @@ next_slot: nocow = 1; } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { extent_end = found_key.offset + - btrfs_file_extent_inline_len(leaf, - path->slots[0], fi); + btrfs_file_extent_ram_bytes(leaf, fi); extent_end = ALIGN(extent_end, fs_info->sectorsize); } else { @@ -4651,8 +4650,8 @@ search_again: BTRFS_I(inode), leaf, fi, found_key.offset); } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { - item_end += btrfs_file_extent_inline_len(leaf, - path->slots[0], fi); + item_end += btrfs_file_extent_ram_bytes(leaf, + fi); trace_btrfs_truncate_show_fi_inline( BTRFS_I(inode), leaf, fi, path->slots[0], @@ -5729,7 +5728,6 @@ static void inode_tree_add(struct inode *inode) static void inode_tree_del(struct inode *inode) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_root *root = BTRFS_I(inode)->root; int empty = 0; @@ -5742,7 +5740,6 @@ static void inode_tree_del(struct inode *inode) spin_unlock(&root->inode_lock); if (empty && btrfs_root_refs(&root->root_item) == 0) { - synchronize_srcu(&fs_info->subvol_srcu); spin_lock(&root->inode_lock); empty = RB_EMPTY_ROOT(&root->inode_tree); spin_unlock(&root->inode_lock); @@ -7169,7 +7166,8 @@ again: extent_start); } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { size_t size; - size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); + + size = btrfs_file_extent_ram_bytes(leaf, item); extent_end = ALIGN(extent_start + size, fs_info->sectorsize); @@ -7220,7 +7218,7 @@ next: if (new_inline) goto out; - size = btrfs_file_extent_inline_len(leaf, path->slots[0], item); + size = btrfs_file_extent_ram_bytes(leaf, item); extent_offset = page_offset(page) + pg_offset - extent_start; copy_size = min_t(u64, PAGE_SIZE - pg_offset, size - extent_offset); @@ -9820,9 +9818,8 @@ static int btrfs_rename_exchange(struct inode *old_dir, return -EXDEV; /* close the race window with snapshot create/destroy ioctl */ - if (old_ino == BTRFS_FIRST_FREE_OBJECTID) - down_read(&fs_info->subvol_sem); - if (new_ino == BTRFS_FIRST_FREE_OBJECTID) + if (old_ino == BTRFS_FIRST_FREE_OBJECTID || + new_ino == BTRFS_FIRST_FREE_OBJECTID) down_read(&fs_info->subvol_sem); /* @@ -9839,6 +9836,9 @@ static int btrfs_rename_exchange(struct inode *old_dir, goto out_notrans; } + if (dest != root) + btrfs_record_root_in_trans(trans, dest); + /* * We need to find a free sequence number both in the source and * in the destination directory for the exchange. @@ -10011,9 +10011,8 @@ out_fail: ret2 = btrfs_end_transaction(trans); ret = ret ? ret : ret2; out_notrans: - if (new_ino == BTRFS_FIRST_FREE_OBJECTID) - up_read(&fs_info->subvol_sem); - if (old_ino == BTRFS_FIRST_FREE_OBJECTID) + if (new_ino == BTRFS_FIRST_FREE_OBJECTID || + old_ino == BTRFS_FIRST_FREE_OBJECTID) up_read(&fs_info->subvol_sem); return ret; @@ -10640,6 +10639,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 cur_offset = start; + u64 clear_offset = start; u64 i_size; u64 cur_bytes; u64 last_alloc = (u64)-1; @@ -10674,6 +10674,15 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, btrfs_end_transaction(trans); break; } + + /* + * We've reserved this space, and thus converted it from + * ->bytes_may_use to ->bytes_reserved. Any error that happens + * from here on out we will only need to clear our reservation + * for the remaining unreserved area, so advance our + * clear_offset by our extent size. + */ + clear_offset += ins.offset; btrfs_dec_block_group_reservations(fs_info, ins.objectid); last_alloc = ins.offset; @@ -10754,9 +10763,9 @@ next: if (own_trans) btrfs_end_transaction(trans); } - if (cur_offset < end) - btrfs_free_reserved_data_space(inode, NULL, cur_offset, - end - cur_offset + 1); + if (clear_offset < end) + btrfs_free_reserved_data_space(inode, NULL, clear_offset, + end - clear_offset + 1); return ret; } diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index dd3b4820ac30..e82b4f3f490c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -580,12 +580,18 @@ static noinline int create_subvol(struct inode *dir, btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2); ret = btrfs_update_inode(trans, root, dir); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto fail; + } ret = btrfs_add_root_ref(trans, fs_info, objectid, root->root_key.objectid, btrfs_ino(BTRFS_I(dir)), index, name, namelen); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, ret); + goto fail; + } ret = btrfs_uuid_tree_add(trans, fs_info, root_item->uuid, BTRFS_UUID_KEY_SUBVOL, objectid); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a3aca495e33e..d2287ea9fc50 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -838,10 +838,15 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) } btrfs_start_ordered_extent(inode, ordered, 1); end = ordered->file_offset; + /* + * If the ordered extent had an error save the error but don't + * exit without waiting first for all other ordered extents in + * the range to complete. + */ if (test_bit(BTRFS_ORDERED_IOERR, &ordered->flags)) ret = -EIO; btrfs_put_ordered_extent(ordered); - if (ret || end == 0 || end == start) + if (end == 0 || end == start) break; end--; } diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 569205e651c7..47336d4b19d8 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -259,8 +259,8 @@ void btrfs_print_leaf(struct extent_buffer *l) struct btrfs_file_extent_item); if (btrfs_file_extent_type(l, fi) == BTRFS_FILE_EXTENT_INLINE) { - pr_info("\t\tinline extent data size %u\n", - btrfs_file_extent_inline_len(l, i, fi)); + pr_info("\t\tinline extent data size %llu\n", + btrfs_file_extent_ram_bytes(l, fi)); break; } pr_info("\t\textent data disk bytenr %llu nr %llu\n", diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index cb6e8cb0de94..39a00b57ff01 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1928,8 +1928,12 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 nr_old_roots = 0; int ret = 0; + /* + * If quotas get disabled meanwhile, the resouces need to be freed and + * we can't just exit here. + */ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) - return 0; + goto out_free; if (new_roots) { if (!maybe_fs_roots(new_roots)) diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 3a4e15b39cc1..440c0d5d2050 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -734,21 +734,19 @@ static int reada_start_machine_dev(struct btrfs_device *dev) static void reada_start_machine_worker(struct btrfs_work *work) { struct reada_machine_work *rmw; - struct btrfs_fs_info *fs_info; int old_ioprio; rmw = container_of(work, struct reada_machine_work, work); - fs_info = rmw->fs_info; - - kfree(rmw); old_ioprio = IOPRIO_PRIO_VALUE(task_nice_ioclass(current), task_nice_ioprio(current)); set_task_ioprio(current, BTRFS_IOPRIO_READA); - __reada_start_machine(fs_info); + __reada_start_machine(rmw->fs_info); set_task_ioprio(current, old_ioprio); - atomic_dec(&fs_info->reada_works_cnt); + atomic_dec(&rmw->fs_info->reada_works_cnt); + + kfree(rmw); } static void __reada_start_machine(struct btrfs_fs_info *fs_info) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 9fa6db6a6f7d..d4c00edd16d2 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4587,6 +4587,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) fs_root = read_fs_root(fs_info, reloc_root->root_key.offset); if (IS_ERR(fs_root)) { err = PTR_ERR(fs_root); + list_add_tail(&reloc_root->root_list, &reloc_roots); goto out_free; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 61192c536e6c..2ebae9773978 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2421,14 +2421,13 @@ static void scrub_missing_raid56_worker(struct btrfs_work *work) scrub_write_block_to_dev_replace(sblock); } - scrub_block_put(sblock); - if (sctx->is_dev_replace && sctx->flush_all_writes) { mutex_lock(&sctx->wr_lock); scrub_wr_submit(sctx); mutex_unlock(&sctx->wr_lock); } + scrub_block_put(sblock); scrub_pending_bio_dec(sctx); } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index a5905f97b3db..ca15d65a2070 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -36,6 +36,14 @@ #include "transaction.h" #include "compression.h" +/* + * Maximum number of references an extent can have in order for us to attempt to + * issue clone operations instead of write operations. This currently exists to + * avoid hitting limitations of the backreference walking code (taking a lot of + * time and using too much memory for extents with large number of references). + */ +#define SEND_MAX_EXTENT_REFS 64 + /* * A fs_path is a helper to dynamically build path names with unknown size. * It reallocates the internal buffer on demand. @@ -1324,6 +1332,7 @@ static int find_extent_clone(struct send_ctx *sctx, struct clone_root *cur_clone_root; struct btrfs_key found_key; struct btrfs_path *tmp_path; + struct btrfs_extent_item *ei; int compressed; u32 i; @@ -1373,7 +1382,6 @@ static int find_extent_clone(struct send_ctx *sctx, ret = extent_from_logical(fs_info, disk_byte, tmp_path, &found_key, &flags); up_read(&fs_info->commit_root_sem); - btrfs_release_path(tmp_path); if (ret < 0) goto out; @@ -1382,6 +1390,21 @@ static int find_extent_clone(struct send_ctx *sctx, goto out; } + ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0], + struct btrfs_extent_item); + /* + * Backreference walking (iterate_extent_inodes() below) is currently + * too expensive when an extent has a large number of references, both + * in time spent and used memory. So for now just fallback to write + * operations instead of clone operations when an extent has more than + * a certain amount of references. + */ + if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) { + ret = -ENOENT; + goto out; + } + btrfs_release_path(tmp_path); + /* * Setup the clone roots. */ @@ -1522,7 +1545,7 @@ static int read_symlink(struct btrfs_root *root, BUG_ON(compression); off = btrfs_file_extent_inline_start(ei); - len = btrfs_file_extent_inline_len(path->nodes[0], path->slots[0], ei); + len = btrfs_file_extent_ram_bytes(path->nodes[0], ei); ret = fs_path_add_from_extent_buffer(dest, path->nodes[0], off, len); @@ -5172,7 +5195,7 @@ static int clone_range(struct send_ctx *sctx, ei = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); type = btrfs_file_extent_type(leaf, ei); if (type == BTRFS_FILE_EXTENT_INLINE) { - ext_len = btrfs_file_extent_inline_len(leaf, slot, ei); + ext_len = btrfs_file_extent_ram_bytes(leaf, ei); ext_len = PAGE_ALIGN(ext_len); } else { ext_len = btrfs_file_extent_num_bytes(leaf, ei); @@ -5248,8 +5271,7 @@ static int send_write_or_clone(struct send_ctx *sctx, struct btrfs_file_extent_item); type = btrfs_file_extent_type(path->nodes[0], ei); if (type == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(path->nodes[0], - path->slots[0], ei); + len = btrfs_file_extent_ram_bytes(path->nodes[0], ei); /* * it is possible the inline item won't cover the whole page, * but there may be items after this page. Make @@ -5382,7 +5404,7 @@ static int is_extent_unchanged(struct send_ctx *sctx, } if (right_type == BTRFS_FILE_EXTENT_INLINE) { - right_len = btrfs_file_extent_inline_len(eb, slot, ei); + right_len = btrfs_file_extent_ram_bytes(eb, ei); right_len = PAGE_ALIGN(right_len); } else { right_len = btrfs_file_extent_num_bytes(eb, ei); @@ -5503,8 +5525,7 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset) struct btrfs_file_extent_item); type = btrfs_file_extent_type(path->nodes[0], fi); if (type == BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_inline_len(path->nodes[0], - path->slots[0], fi); + u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); extent_end = ALIGN(key.offset + size, sctx->send_root->fs_info->sectorsize); } else { @@ -5567,7 +5588,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx, fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_inline_len(leaf, slot, fi); + u64 size = btrfs_file_extent_ram_bytes(leaf, fi); extent_end = ALIGN(key.offset + size, root->fs_info->sectorsize); @@ -5613,8 +5634,7 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_file_extent_item); type = btrfs_file_extent_type(path->nodes[0], fi); if (type == BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_inline_len(path->nodes[0], - path->slots[0], fi); + u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); extent_end = ALIGN(key->offset + size, sctx->send_root->fs_info->sectorsize); } else { diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 49a02bf091ae..17a8463ef35c 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1801,6 +1801,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) } if (btrfs_super_log_root(fs_info->super_copy) != 0) { + btrfs_warn(fs_info, + "mount required to replay tree-log, cannot remount read-write"); ret = -EINVAL; goto restore; } @@ -1863,7 +1865,7 @@ restore: } /* Used to sort the devices by max_avail(descending sort) */ -static int btrfs_cmp_device_free_bytes(const void *dev_info1, +static inline int btrfs_cmp_device_free_bytes(const void *dev_info1, const void *dev_info2) { if (((struct btrfs_device_info *)dev_info1)->max_avail > @@ -1892,8 +1894,8 @@ static inline void btrfs_descending_sort_devices( * The helper to calc the free space on the devices that can be used to store * file data. */ -static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, - u64 *free_bytes) +static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, + u64 *free_bytes) { struct btrfs_device_info *devices_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; @@ -2114,7 +2116,15 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) */ thresh = 4 * 1024 * 1024; - if (!mixed && total_free_meta - thresh < block_rsv->size) + /* + * We only want to claim there's no available space if we can no longer + * allocate chunks for our metadata profile and our global reserve will + * not fit in the free metadata space. If we aren't ->full then we + * still can allocate chunks and thus are fine using the currently + * calculated f_bavail. + */ + if (!mixed && block_rsv->space_info->full && + total_free_meta - thresh < block_rsv->size) buf->f_bavail = 0; buf->f_type = BTRFS_SUPER_MAGIC; diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index d3f25376a0f8..6c92101e8092 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -115,7 +115,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize) spin_lock_init(&fs_info->qgroup_op_lock); spin_lock_init(&fs_info->super_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); - spin_lock_init(&fs_info->tree_mod_seq_lock); mutex_init(&fs_info->qgroup_ioctl_lock); mutex_init(&fs_info->qgroup_rescan_lock); rwlock_init(&fs_info->tree_mod_log_lock); diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 8444a018cca2..f6c783e959b7 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -475,9 +475,9 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, root->fs_info->tree_root = root; root->node = alloc_test_extent_buffer(root->fs_info, nodesize); - if (!root->node) { - test_msg("Couldn't allocate dummy buffer\n"); - ret = -ENOMEM; + if (IS_ERR(root->node)) { + test_msg("couldn't allocate dummy buffer\n"); + ret = PTR_ERR(root->node); goto out; } btrfs_set_header_level(root->node, 0); diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 578fd045e859..eb72cf280546 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -487,9 +487,9 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize) * *cough*backref walking code*cough* */ root->node = alloc_test_extent_buffer(root->fs_info, nodesize); - if (!root->node) { + if (IS_ERR(root->node)) { test_msg("Couldn't allocate dummy buffer\n"); - ret = -ENOMEM; + ret = PTR_ERR(root->node); goto out; } btrfs_set_header_level(root->node, 0); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index fa8f56e6f665..a066ad581976 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1948,6 +1948,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) struct btrfs_transaction *prev_trans = NULL; int ret; + /* + * Some places just start a transaction to commit it. We need to make + * sure that if this commit fails that the abort code actually marks the + * transaction as failed, so set trans->dirty to make the abort code do + * the right thing. + */ + trans->dirty = true; + /* Stop the commit early if ->aborted is set */ if (unlikely(READ_ONCE(cur_trans->aborted))) { ret = cur_trans->aborted; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index e35301e5fe8e..0b62c8080af0 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -619,7 +619,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, if (btrfs_file_extent_disk_bytenr(eb, item) == 0) nbytes = 0; } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { - size = btrfs_file_extent_inline_len(eb, slot, item); + size = btrfs_file_extent_ram_bytes(eb, item); nbytes = btrfs_file_extent_ram_bytes(eb, item); extent_end = ALIGN(start + size, fs_info->sectorsize); @@ -3758,7 +3758,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans, static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, struct btrfs_path *dst_path, - struct btrfs_path *src_path, u64 *last_extent, + struct btrfs_path *src_path, int start_slot, int nr, int inode_only, u64 logged_isize) { @@ -3769,7 +3769,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item *extent; struct btrfs_inode_item *inode_item; struct extent_buffer *src = src_path->nodes[0]; - struct btrfs_key first_key, last_key, key; int ret; struct btrfs_key *ins_keys; u32 *ins_sizes; @@ -3777,9 +3776,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, int i; struct list_head ordered_sums; int skip_csum = inode->flags & BTRFS_INODE_NODATASUM; - bool has_extents = false; - bool need_find_last_extent = true; - bool done = false; INIT_LIST_HEAD(&ordered_sums); @@ -3788,8 +3784,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, if (!ins_data) return -ENOMEM; - first_key.objectid = (u64)-1; - ins_sizes = (u32 *)ins_data; ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32)); @@ -3810,9 +3804,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, src_offset = btrfs_item_ptr_offset(src, start_slot + i); - if (i == nr - 1) - last_key = ins_keys[i]; - if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { inode_item = btrfs_item_ptr(dst_path->nodes[0], dst_path->slots[0], @@ -3826,20 +3817,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, src_offset, ins_sizes[i]); } - /* - * We set need_find_last_extent here in case we know we were - * processing other items and then walk into the first extent in - * the inode. If we don't hit an extent then nothing changes, - * we'll do the last search the next time around. - */ - if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) { - has_extents = true; - if (first_key.objectid == (u64)-1) - first_key = ins_keys[i]; - } else { - need_find_last_extent = false; - } - /* take a reference on file data extents so that truncates * or deletes of this inode don't have to relog the inode * again @@ -3905,169 +3882,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, kfree(sums); } - if (!has_extents) - return ret; - - if (need_find_last_extent && *last_extent == first_key.offset) { - /* - * We don't have any leafs between our current one and the one - * we processed before that can have file extent items for our - * inode (and have a generation number smaller than our current - * transaction id). - */ - need_find_last_extent = false; - } - - /* - * Because we use btrfs_search_forward we could skip leaves that were - * not modified and then assume *last_extent is valid when it really - * isn't. So back up to the previous leaf and read the end of the last - * extent before we go and fill in holes. - */ - if (need_find_last_extent) { - u64 len; - - ret = btrfs_prev_leaf(inode->root, src_path); - if (ret < 0) - return ret; - if (ret) - goto fill_holes; - if (src_path->slots[0]) - src_path->slots[0]--; - src = src_path->nodes[0]; - btrfs_item_key_to_cpu(src, &key, src_path->slots[0]); - if (key.objectid != btrfs_ino(inode) || - key.type != BTRFS_EXTENT_DATA_KEY) - goto fill_holes; - extent = btrfs_item_ptr(src, src_path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(src, extent) == - BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(src, - src_path->slots[0], - extent); - *last_extent = ALIGN(key.offset + len, - fs_info->sectorsize); - } else { - len = btrfs_file_extent_num_bytes(src, extent); - *last_extent = key.offset + len; - } - } -fill_holes: - /* So we did prev_leaf, now we need to move to the next leaf, but a few - * things could have happened - * - * 1) A merge could have happened, so we could currently be on a leaf - * that holds what we were copying in the first place. - * 2) A split could have happened, and now not all of the items we want - * are on the same leaf. - * - * So we need to adjust how we search for holes, we need to drop the - * path and re-search for the first extent key we found, and then walk - * forward until we hit the last one we copied. - */ - if (need_find_last_extent) { - /* btrfs_prev_leaf could return 1 without releasing the path */ - btrfs_release_path(src_path); - ret = btrfs_search_slot(NULL, inode->root, &first_key, - src_path, 0, 0); - if (ret < 0) - return ret; - ASSERT(ret == 0); - src = src_path->nodes[0]; - i = src_path->slots[0]; - } else { - i = start_slot; - } - - /* - * Ok so here we need to go through and fill in any holes we may have - * to make sure that holes are punched for those areas in case they had - * extents previously. - */ - while (!done) { - u64 offset, len; - u64 extent_end; - - if (i >= btrfs_header_nritems(src_path->nodes[0])) { - ret = btrfs_next_leaf(inode->root, src_path); - if (ret < 0) - return ret; - ASSERT(ret == 0); - src = src_path->nodes[0]; - i = 0; - need_find_last_extent = true; - } - - btrfs_item_key_to_cpu(src, &key, i); - if (!btrfs_comp_cpu_keys(&key, &last_key)) - done = true; - if (key.objectid != btrfs_ino(inode) || - key.type != BTRFS_EXTENT_DATA_KEY) { - i++; - continue; - } - extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(src, extent) == - BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(src, i, extent); - extent_end = ALIGN(key.offset + len, - fs_info->sectorsize); - } else { - len = btrfs_file_extent_num_bytes(src, extent); - extent_end = key.offset + len; - } - i++; - - if (*last_extent == key.offset) { - *last_extent = extent_end; - continue; - } - offset = *last_extent; - len = key.offset - *last_extent; - ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode), - offset, 0, 0, len, 0, len, 0, 0, 0); - if (ret) - break; - *last_extent = extent_end; - } - - /* - * Check if there is a hole between the last extent found in our leaf - * and the first extent in the next leaf. If there is one, we need to - * log an explicit hole so that at replay time we can punch the hole. - */ - if (ret == 0 && - key.objectid == btrfs_ino(inode) && - key.type == BTRFS_EXTENT_DATA_KEY && - i == btrfs_header_nritems(src_path->nodes[0])) { - ret = btrfs_next_leaf(inode->root, src_path); - need_find_last_extent = true; - if (ret > 0) { - ret = 0; - } else if (ret == 0) { - btrfs_item_key_to_cpu(src_path->nodes[0], &key, - src_path->slots[0]); - if (key.objectid == btrfs_ino(inode) && - key.type == BTRFS_EXTENT_DATA_KEY && - *last_extent < key.offset) { - const u64 len = key.offset - *last_extent; - - ret = btrfs_insert_file_extent(trans, log, - btrfs_ino(inode), - *last_extent, 0, - 0, len, 0, len, - 0, 0, 0); - *last_extent += len; - } - } - } - /* - * Need to let the callers know we dropped the path so they should - * re-search. - */ - if (!ret && need_find_last_extent) - ret = 1; return ret; } @@ -4340,7 +4154,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, const u64 i_size = i_size_read(&inode->vfs_inode); const u64 ino = btrfs_ino(inode); struct btrfs_path *dst_path = NULL; - u64 last_extent = (u64)-1; + bool dropped_extents = false; int ins_nr = 0; int start_slot; int ret; @@ -4362,8 +4176,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, if (slot >= btrfs_header_nritems(leaf)) { if (ins_nr > 0) { ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); + start_slot, ins_nr, 1, 0); if (ret < 0) goto out; ins_nr = 0; @@ -4387,8 +4200,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, path->slots[0]++; continue; } - if (last_extent == (u64)-1) { - last_extent = key.offset; + if (!dropped_extents) { /* * Avoid logging extent items logged in past fsync calls * and leading to duplicate keys in the log tree. @@ -4402,6 +4214,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, } while (ret == -EAGAIN); if (ret) goto out; + dropped_extents = true; } if (ins_nr == 0) start_slot = slot; @@ -4416,7 +4229,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, } } if (ins_nr > 0) { - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, start_slot, ins_nr, 1, 0); if (ret > 0) ret = 0; @@ -4610,13 +4423,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, if (slot >= nritems) { if (ins_nr > 0) { - u64 last_extent = 0; - ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); - /* can't be 1, extent items aren't processed */ - ASSERT(ret <= 0); + start_slot, ins_nr, 1, 0); if (ret < 0) return ret; ins_nr = 0; @@ -4640,13 +4448,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, cond_resched(); } if (ins_nr > 0) { - u64 last_extent = 0; - ret = copy_items(trans, inode, dst_path, path, - &last_extent, start_slot, - ins_nr, 1, 0); - /* can't be 1, extent items aren't processed */ - ASSERT(ret <= 0); + start_slot, ins_nr, 1, 0); if (ret < 0) return ret; } @@ -4655,109 +4458,119 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, } /* - * If the no holes feature is enabled we need to make sure any hole between the - * last extent and the i_size of our inode is explicitly marked in the log. This - * is to make sure that doing something like: - * - * 1) create file with 128Kb of data - * 2) truncate file to 64Kb - * 3) truncate file to 256Kb - * 4) fsync file - * 5) - * 6) mount fs and trigger log replay - * - * Will give us a file with a size of 256Kb, the first 64Kb of data match what - * the file had in its first 64Kb of data at step 1 and the last 192Kb of the - * file correspond to a hole. The presence of explicit holes in a log tree is - * what guarantees that log replay will remove/adjust file extent items in the - * fs/subvol tree. - * - * Here we do not need to care about holes between extents, that is already done - * by copy_items(). We also only need to do this in the full sync path, where we - * lookup for extents from the fs/subvol tree only. In the fast path case, we - * lookup the list of modified extent maps and if any represents a hole, we - * insert a corresponding extent representing a hole in the log tree. + * When using the NO_HOLES feature if we punched a hole that causes the + * deletion of entire leafs or all the extent items of the first leaf (the one + * that contains the inode item and references) we may end up not processing + * any extents, because there are no leafs with a generation matching the + * current transaction that have extent items for our inode. So we need to find + * if any holes exist and then log them. We also need to log holes after any + * truncate operation that changes the inode's size. */ -static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_inode *inode, - struct btrfs_path *path) +static int btrfs_log_holes(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_inode *inode, + struct btrfs_path *path) { struct btrfs_fs_info *fs_info = root->fs_info; - int ret; struct btrfs_key key; - u64 hole_start; - u64 hole_size; - struct extent_buffer *leaf; - struct btrfs_root *log = root->log_root; const u64 ino = btrfs_ino(inode); const u64 i_size = i_size_read(&inode->vfs_inode); + u64 prev_extent_end = 0; + int ret; - if (!btrfs_fs_incompat(fs_info, NO_HOLES)) + if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0) return 0; key.objectid = ino; key.type = BTRFS_EXTENT_DATA_KEY; - key.offset = (u64)-1; + key.offset = 0; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - ASSERT(ret != 0); if (ret < 0) return ret; - ASSERT(path->slots[0] > 0); - path->slots[0]--; - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - - if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { - /* inode does not have any extents */ - hole_start = 0; - hole_size = i_size; - } else { + while (true) { struct btrfs_file_extent_item *extent; + struct extent_buffer *leaf = path->nodes[0]; u64 len; - /* - * If there's an extent beyond i_size, an explicit hole was - * already inserted by copy_items(). - */ - if (key.offset >= i_size) - return 0; + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + if (ret > 0) { + ret = 0; + break; + } + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) + break; + + /* We have a hole, log it. */ + if (prev_extent_end < key.offset) { + const u64 hole_len = key.offset - prev_extent_end; + + /* + * Release the path to avoid deadlocks with other code + * paths that search the root while holding locks on + * leafs from the log root. + */ + btrfs_release_path(path); + ret = btrfs_insert_file_extent(trans, root->log_root, + ino, prev_extent_end, 0, + 0, hole_len, 0, hole_len, + 0, 0, 0); + if (ret < 0) + return ret; + + /* + * Search for the same key again in the root. Since it's + * an extent item and we are holding the inode lock, the + * key must still exist. If it doesn't just emit warning + * and return an error to fall back to a transaction + * commit. + */ + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + if (WARN_ON(ret > 0)) + return -ENOENT; + leaf = path->nodes[0]; + } extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, extent) == BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_inline_len(leaf, - path->slots[0], - extent); - ASSERT(len == i_size || - (len == fs_info->sectorsize && - btrfs_file_extent_compression(leaf, extent) != - BTRFS_COMPRESS_NONE) || - (len < i_size && i_size < fs_info->sectorsize)); - return 0; + len = btrfs_file_extent_ram_bytes(leaf, extent); + prev_extent_end = ALIGN(key.offset + len, + fs_info->sectorsize); + } else { + len = btrfs_file_extent_num_bytes(leaf, extent); + prev_extent_end = key.offset + len; } - len = btrfs_file_extent_num_bytes(leaf, extent); - /* Last extent goes beyond i_size, no need to log a hole. */ - if (key.offset + len > i_size) - return 0; - hole_start = key.offset + len; - hole_size = i_size - hole_start; + path->slots[0]++; + cond_resched(); } - btrfs_release_path(path); - /* Last extent ends at i_size. */ - if (hole_size == 0) - return 0; + if (prev_extent_end < i_size) { + u64 hole_len; - hole_size = ALIGN(hole_size, fs_info->sectorsize); - ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, - hole_size, 0, hole_size, 0, 0, 0); - return ret; + btrfs_release_path(path); + hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize); + ret = btrfs_insert_file_extent(trans, root->log_root, + ino, prev_extent_end, 0, 0, + hole_len, 0, hole_len, + 0, 0, 0); + if (ret < 0) + return ret; + } + + return 0; } /* @@ -4925,7 +4738,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct btrfs_root *log = root->log_root; struct extent_buffer *src = NULL; LIST_HEAD(logged_list); - u64 last_extent = 0; int err = 0; int ret; int nritems; @@ -5099,7 +4911,7 @@ again: ins_start_slot = path->slots[0]; } ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { @@ -5153,17 +4965,13 @@ again: if (ins_nr == 0) goto next_slot; ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } ins_nr = 0; - if (ret) { - btrfs_release_path(path); - continue; - } goto next_slot; } @@ -5177,18 +4985,13 @@ again: goto next_slot; } - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - if (ret) { - ins_nr = 0; - btrfs_release_path(path); - continue; - } ins_nr = 1; ins_start_slot = path->slots[0]; next_slot: @@ -5202,13 +5005,12 @@ next_slot: } if (ins_nr) { ret = copy_items(trans, inode, dst_path, path, - &last_extent, ins_start_slot, + ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - ret = 0; ins_nr = 0; } btrfs_release_path(path); @@ -5223,14 +5025,13 @@ next_key: } } if (ins_nr) { - ret = copy_items(trans, inode, dst_path, path, &last_extent, + ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, inode_only, logged_isize); if (ret < 0) { err = ret; goto out_unlock; } - ret = 0; ins_nr = 0; } @@ -5243,7 +5044,7 @@ next_key: if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { btrfs_release_path(path); btrfs_release_path(dst_path); - err = btrfs_log_trailing_hole(trans, root, inode, path); + err = btrfs_log_holes(trans, root, inode, path); if (err) goto out_unlock; } @@ -6018,9 +5819,28 @@ again: wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key); if (IS_ERR(wc.replay_dest)) { ret = PTR_ERR(wc.replay_dest); + + /* + * We didn't find the subvol, likely because it was + * deleted. This is ok, simply skip this log and go to + * the next one. + * + * We need to exclude the root because we can't have + * other log replays overwriting this log as we'll read + * it back in a few more times. This will keep our + * block from being modified, and we'll just bail for + * each subsequent pass. + */ + if (ret == -ENOENT) + ret = btrfs_pin_extent_for_log_replay(fs_info, + log->node->start, + log->node->len); free_extent_buffer(log->node); free_extent_buffer(log->commit_root); kfree(log); + + if (!ret) + goto next; btrfs_handle_fs_error(fs_info, ret, "Couldn't read target root for tree log recovery."); goto error; @@ -6052,7 +5872,6 @@ again: &root->highest_objectid); } - key.offset = found_key.offset - 1; wc.replay_dest->log_root = NULL; free_extent_buffer(log->node); free_extent_buffer(log->commit_root); @@ -6060,9 +5879,10 @@ again: if (ret) goto error; - +next: if (found_key.offset == 0) break; + key.offset = found_key.offset - 1; } btrfs_release_path(path); diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index 726f928238d0..331f3a1ad23b 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -336,6 +336,8 @@ again_search_slot: } if (ret < 0 && ret != -ENOENT) goto out; + key.offset++; + goto again_search_slot; } item_size -= sizeof(subid_le); offset += sizeof(subid_le); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 358e930df4ac..6d34842912e8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7227,6 +7227,8 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info, else btrfs_dev_stat_reset(dev, i); } + btrfs_info(fs_info, "device stats zeroed by %s (%d)", + current->comm, task_pid_nr(current)); } else { for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) if (stats->nr_items > i) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index f6ae6cdf233d..07b805d08e55 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -317,7 +317,6 @@ struct btrfs_bio { u64 map_type; /* get from map_lookup->type */ bio_end_io_t *end_io; struct bio *orig_bio; - unsigned long flags; void *private; atomic_t error; int max_errors; diff --git a/fs/buffer.c b/fs/buffer.c index 713bc25e47f7..613b33f6b175 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -46,6 +46,7 @@ #include #include #include +#include static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, @@ -3129,6 +3130,8 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, */ bio = bio_alloc(GFP_NOIO, 1); + fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); + if (wbc) { wbc_init_bio(wbc, bio); wbc_account_io(wbc, bh->b_page, bh->b_size); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index df95e39ccd45..c3a3ee74e2d8 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -935,6 +935,11 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); + /* remove from inode's cap rbtree, and clear auth cap */ + rb_erase(&cap->ci_node, &ci->i_caps); + if (ci->i_auth_cap == cap) + ci->i_auth_cap = NULL; + /* remove from session list */ spin_lock(&session->s_cap_lock); if (session->s_cap_iterator == cap) { @@ -970,11 +975,6 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release) spin_unlock(&session->s_cap_lock); - /* remove from inode list */ - rb_erase(&cap->ci_node, &ci->i_caps); - if (ci->i_auth_cap == cap) - ci->i_auth_cap = NULL; - if (removed) ceph_put_cap(mdsc, cap); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 879bc0825093..5999d806de78 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -1347,6 +1347,7 @@ retry_lookup: dout(" final dn %p\n", dn); } else if ((req->r_op == CEPH_MDS_OP_LOOKUPSNAP || req->r_op == CEPH_MDS_OP_MKSNAP) && + test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { struct dentry *dn = req->r_dentry; struct inode *dir = req->r_parent; @@ -1630,7 +1631,6 @@ retry_lookup: if (IS_ERR(realdn)) { err = PTR_ERR(realdn); d_drop(dn); - dn = NULL; goto next_item; } dn = realdn; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b968334f841e..f36ddfea4997 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2261,8 +2261,7 @@ static int __do_request(struct ceph_mds_client *mdsc, if (!(mdsc->fsc->mount_options->flags & CEPH_MOUNT_OPT_MOUNTWAIT) && !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) { - err = -ENOENT; - pr_info("probably no mds server is up\n"); + err = -EHOSTUNREACH; goto finish; } } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f0694293b31a..6b10b20bfe32 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -232,6 +232,7 @@ static int parse_fsopt_token(char *c, void *private) return -ENOMEM; break; case Opt_fscache_uniq: +#ifdef CONFIG_CEPH_FSCACHE kfree(fsopt->fscache_uniq); fsopt->fscache_uniq = kstrndup(argstr[0].from, argstr[0].to-argstr[0].from, @@ -240,7 +241,10 @@ static int parse_fsopt_token(char *c, void *private) return -ENOMEM; fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; break; - /* misc */ +#else + pr_err("fscache support is disabled\n"); + return -EINVAL; +#endif case Opt_wsize: if (intval < PAGE_SIZE || intval > CEPH_MAX_WRITE_SIZE) return -EINVAL; @@ -312,8 +316,13 @@ static int parse_fsopt_token(char *c, void *private) fsopt->flags &= ~CEPH_MOUNT_OPT_INO32; break; case Opt_fscache: +#ifdef CONFIG_CEPH_FSCACHE fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE; break; +#else + pr_err("fscache support is disabled\n"); + return -EINVAL; +#endif case Opt_nofscache: fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE; break; @@ -1046,6 +1055,11 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, return res; out_splat: + if (!ceph_mdsmap_is_cluster_available(fsc->mdsc->mdsmap)) { + pr_info("No mds server is up or the cluster is laggy\n"); + err = -EHOSTUNREACH; + } + ceph_mdsc_close_sessions(fsc->mdsc); deactivate_locked_super(sb); goto out_final; diff --git a/fs/char_dev.c b/fs/char_dev.c index 20ce45c7c57c..715d76b00108 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -361,7 +361,7 @@ static struct kobject *cdev_get(struct cdev *p) if (owner && !try_module_get(owner)) return NULL; - kobj = kobject_get(&p->kobj); + kobj = kobject_get_unless_zero(&p->kobj); if (!kobj) module_put(owner); return kobj; diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index b98436f5c7c7..73d428af97a9 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -603,7 +603,7 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode, ((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) *pmode |= (S_IXUGO & (*pbits_to_set)); - cifs_dbg(NOISY, "access flags 0x%x mode now 0x%x\n", flags, *pmode); + cifs_dbg(NOISY, "access flags 0x%x mode now %04o\n", flags, *pmode); return; } @@ -632,7 +632,7 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use, if (mode & S_IXUGO) *pace_flags |= SET_FILE_EXEC_RIGHTS; - cifs_dbg(NOISY, "mode: 0x%x, access flags now 0x%x\n", + cifs_dbg(NOISY, "mode: %04o, access flags now 0x%x\n", mode, *pace_flags); return; } diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7b7ab10a9db1..600bb838c15b 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1210,6 +1210,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file); struct cifsInodeInfo { bool can_cache_brlcks; struct list_head llist; /* locks helb by this inode */ + /* + * NOTE: Some code paths call down_read(lock_sem) twice, so + * we must always use use cifs_down_write() instead of down_write() + * for this semaphore to avoid deadlocks. + */ struct rw_semaphore lock_sem; /* protect the fields above */ /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index ccdb42f71b2e..3a7fb8e750e9 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -149,6 +149,7 @@ extern int cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, const unsigned int xid); extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile); +extern void cifs_down_write(struct rw_semaphore *sem); extern struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f523a9ca9574..697edc92dff2 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -921,6 +921,7 @@ cifs_demultiplex_thread(void *p) mempool_resize(cifs_req_poolp, length + cifs_min_rcv); set_freezable(); + allow_kernel_signal(SIGKILL); while (server->tcpStatus != CifsExiting) { if (try_to_freeze()) continue; @@ -2320,7 +2321,7 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) task = xchg(&server->tsk, NULL); if (task) - force_sig(SIGKILL, task); + send_sig(SIGKILL, task, 1); } static struct TCP_Server_Info * @@ -3046,8 +3047,10 @@ match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data) { struct cifs_sb_info *old = CIFS_SB(sb); struct cifs_sb_info *new = mnt_data->cifs_sb; - bool old_set = old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; - bool new_set = new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH; + bool old_set = (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + old->prepath; + bool new_set = (new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && + new->prepath; if (old_set && new_set && !strcmp(new->prepath, old->prepath)) return 1; @@ -3518,7 +3521,7 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info, cifs_sb->mnt_gid = pvolume_info->linux_gid; cifs_sb->mnt_file_mode = pvolume_info->file_mode; cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; - cifs_dbg(FYI, "file mode: 0x%hx dir mode: 0x%hx\n", + cifs_dbg(FYI, "file mode: %04ho dir mode: %04ho\n", cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode); cifs_sb->actimeo = pvolume_info->actimeo; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f00a7ce3eb6e..03293e543c07 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -562,7 +562,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (server->ops->close) server->ops->close(xid, tcon, &fid); cifs_del_pending_open(&open); - fput(file); rc = -ENOMEM; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 71a960da7cce..5e75c5f77f4c 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -280,6 +280,13 @@ cifs_has_mand_locks(struct cifsInodeInfo *cinode) return has_locks; } +void +cifs_down_write(struct rw_semaphore *sem) +{ + while (!down_write_trylock(sem)) + msleep(10); +} + struct cifsFileInfo * cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, struct tcon_link *tlink, __u32 oplock) @@ -305,9 +312,6 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, INIT_LIST_HEAD(&fdlocks->locks); fdlocks->cfile = cfile; cfile->llist = fdlocks; - down_write(&cinode->lock_sem); - list_add(&fdlocks->llist, &cinode->llist); - up_write(&cinode->lock_sem); cfile->count = 1; cfile->pid = current->tgid; @@ -331,6 +335,10 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file, oplock = 0; } + cifs_down_write(&cinode->lock_sem); + list_add(&fdlocks->llist, &cinode->llist); + up_write(&cinode->lock_sem); + spin_lock(&tcon->open_file_lock); if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock) oplock = fid->pending_open->oplock; @@ -457,7 +465,7 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler) * Delete any outstanding lock records. We'll lose them when the file * is closed anyway. */ - down_write(&cifsi->lock_sem); + cifs_down_write(&cifsi->lock_sem); list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) { list_del(&li->llist); cifs_del_lock_waiters(li); @@ -714,6 +722,13 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush) if (backup_cred(cifs_sb)) create_options |= CREATE_OPEN_BACKUP_INTENT; + /* O_SYNC also has bit for O_DSYNC so following check picks up either */ + if (cfile->f_flags & O_SYNC) + create_options |= CREATE_WRITE_THROUGH; + + if (cfile->f_flags & O_DIRECT) + create_options |= CREATE_NO_BUFFER; + if (server->ops->get_lease_key) server->ops->get_lease_key(inode, &cfile->fid); @@ -1011,7 +1026,7 @@ static void cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock) { struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry)); - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_add_tail(&lock->llist, &cfile->llist->locks); up_write(&cinode->lock_sem); } @@ -1033,7 +1048,7 @@ cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock, try_again: exist = false; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length, lock->type, &conf_lock, CIFS_LOCK_OP); @@ -1055,7 +1070,7 @@ try_again: (lock->blist.next == &lock->blist)); if (!rc) goto try_again; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_del_init(&lock->blist); } @@ -1108,7 +1123,7 @@ cifs_posix_lock_set(struct file *file, struct file_lock *flock) return rc; try_again: - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { up_write(&cinode->lock_sem); return rc; @@ -1314,7 +1329,7 @@ cifs_push_locks(struct cifsFileInfo *cfile) int rc = 0; /* we are going to update can_cache_brlcks here - need a write access */ - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); if (!cinode->can_cache_brlcks) { up_write(&cinode->lock_sem); return rc; @@ -1505,7 +1520,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, if (!buf) return -ENOMEM; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); for (i = 0; i < 2; i++) { cur = buf; num = 0; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a35c14105906..bdce714e9448 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1581,7 +1581,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode) struct TCP_Server_Info *server; char *full_path; - cifs_dbg(FYI, "In cifs_mkdir, mode = 0x%hx inode = 0x%p\n", + cifs_dbg(FYI, "In cifs_mkdir, mode = %04ho inode = 0x%p\n", mode, inode); cifs_sb = CIFS_SB(inode->i_sb); @@ -1998,6 +1998,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) struct inode *inode = d_inode(dentry); struct super_block *sb = dentry->d_sb; char *full_path = NULL; + int count = 0; if (inode == NULL) return -ENOENT; @@ -2019,15 +2020,18 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) full_path, inode, inode->i_count.counter, dentry, cifs_get_time(dentry), jiffies); +again: if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); else rc = cifs_get_inode_info(&inode, full_path, NULL, sb, xid, NULL); - + if (rc == -EAGAIN && count++ < 10) + goto again; out: kfree(full_path); free_xid(xid); + return rc; } diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index cc88f4f0325e..bed973330227 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -130,10 +130,6 @@ static const struct smb_to_posix_error mapping_table_ERRSRV[] = { {0, 0} }; -static const struct smb_to_posix_error mapping_table_ERRHRD[] = { - {0, 0} -}; - /* * Convert a string containing text IPv4 or IPv6 address to binary form. * diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index f50d3d0b9b87..483458340b10 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -181,6 +181,9 @@ cifs_get_next_mid(struct TCP_Server_Info *server) /* we do not want to loop forever */ last_mid = cur_mid; cur_mid++; + /* avoid 0xFFFF MID */ + if (cur_mid == 0xffff) + cur_mid++; /* * This nested loop looks more expensive than it is. diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c index 1add404618f0..e270812927cf 100644 --- a/fs/cifs/smb2file.c +++ b/fs/cifs/smb2file.c @@ -69,7 +69,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms, goto out; - if (oparms->tcon->use_resilient) { + if (oparms->tcon->use_resilient) { nr_ioctl_req.Timeout = 0; /* use server default (120 seconds) */ nr_ioctl_req.Reserved = 0; rc = SMB2_ioctl(xid, oparms->tcon, fid->persistent_fid, @@ -139,7 +139,7 @@ smb2_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, cur = buf; - down_write(&cinode->lock_sem); + cifs_down_write(&cinode->lock_sem); list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) { if (flock->fl_start > li->offset || (flock->fl_start + length) < diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 31f01f09d25a..ff2ad15f67d6 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -622,10 +622,10 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) spin_lock(&cifs_tcp_ses_lock); list_for_each(tmp, &server->smb_ses_list) { ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + list_for_each(tmp1, &ses->tcon_list) { tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); - cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); spin_lock(&tcon->open_file_lock); list_for_each(tmp2, &tcon->openFileList) { cfile = list_entry(tmp2, struct cifsFileInfo, @@ -637,6 +637,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) continue; cifs_dbg(FYI, "file id match, oplock break\n"); + cifs_stats_inc( + &tcon->stats.cifs_stats.num_oplock_brks); cinode = CIFS_I(d_inode(cfile->dentry)); spin_lock(&cfile->file_info_lock); if (!CIFS_CACHE_WRITE(cinode) && @@ -669,9 +671,6 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) return true; } spin_unlock(&tcon->open_file_lock); - spin_unlock(&cifs_tcp_ses_lock); - cifs_dbg(FYI, "No matching file for oplock break\n"); - return true; } } spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 0e1c36c92f60..1c87a429ce72 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -257,9 +257,14 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon) } rc = cifs_negotiate_protocol(0, tcon->ses); - if (!rc && tcon->ses->need_reconnect) + if (!rc && tcon->ses->need_reconnect) { rc = cifs_setup_session(0, tcon->ses, nls_codepage); - + if ((rc == -EACCES) && !tcon->retry) { + rc = -EHOSTDOWN; + mutex_unlock(&tcon->ses->session_mutex); + goto failed; + } + } if (rc || !tcon->need_reconnect) { mutex_unlock(&tcon->ses->session_mutex); goto out; @@ -301,6 +306,7 @@ out: case SMB2_SET_INFO: rc = -EAGAIN; } +failed: unload_nls(nls_codepage); return rc; } @@ -575,6 +581,7 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses) } else if (rsp->DialectRevision == cpu_to_le16(SMB21_PROT_ID)) { /* ops set to 3.0 by default for default so update */ ses->server->ops = &smb21_operations; + ses->server->vals = &smb21_values; } } else if (le16_to_cpu(rsp->DialectRevision) != ses->server->vals->protocol_id) { diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index ea52b98b39fa..f445bc9cdc94 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -161,6 +161,7 @@ struct compat_video_event { unsigned int frame_rate; } u; }; +#define VIDEO_GET_EVENT32 _IOR('o', 28, struct compat_video_event) static int do_video_get_event(struct file *file, unsigned int cmd, struct compat_video_event __user *up) @@ -172,7 +173,7 @@ static int do_video_get_event(struct file *file, if (kevent == NULL) return -EFAULT; - err = do_ioctl(file, cmd, (unsigned long)kevent); + err = do_ioctl(file, VIDEO_GET_EVENT, (unsigned long)kevent); if (!err) { err = convert_in_user(&kevent->type, &up->type); err |= convert_in_user(&kevent->timestamp, &up->timestamp); @@ -191,6 +192,7 @@ struct compat_video_still_picture { compat_uptr_t iFrame; int32_t size; }; +#define VIDEO_STILLPICTURE32 _IOW('o', 30, struct compat_video_still_picture) static int do_video_stillpicture(struct file *file, unsigned int cmd, struct compat_video_still_picture __user *up) @@ -213,7 +215,7 @@ static int do_video_stillpicture(struct file *file, if (err) return -EFAULT; - err = do_ioctl(file, cmd, (unsigned long) up_native); + err = do_ioctl(file, VIDEO_STILLPICTURE, (unsigned long) up_native); return err; } @@ -1476,9 +1478,9 @@ static long do_ioctl_trans(unsigned int cmd, return rtc_ioctl(file, cmd, argp); /* dvb */ - case VIDEO_GET_EVENT: + case VIDEO_GET_EVENT32: return do_video_get_event(file, cmd, argp); - case VIDEO_STILLPICTURE: + case VIDEO_STILLPICTURE32: return do_video_stillpicture(file, cmd, argp); case VIDEO_SET_SPU_PALETTE: return do_video_set_spu_palette(file, cmd, argp); @@ -1575,9 +1577,10 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, #endif case FICLONE: + goto do_ioctl; case FICLONERANGE: case FIDEDUPERANGE: - goto do_ioctl; + goto found_handler; case FIBMAP: case FIGETBSZ: diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index ccc31fa6f1a7..16eb59adf5aa 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -34,6 +34,15 @@ #include #include +struct configfs_fragment { + atomic_t frag_count; + struct rw_semaphore frag_sem; + bool frag_dead; +}; + +void put_fragment(struct configfs_fragment *); +struct configfs_fragment *get_fragment(struct configfs_fragment *); + struct configfs_dirent { atomic_t s_count; int s_dependent_count; @@ -48,6 +57,7 @@ struct configfs_dirent { #ifdef CONFIG_LOCKDEP int s_depth; #endif + struct configfs_fragment *s_frag; }; #define CONFIGFS_ROOT 0x0001 @@ -75,8 +85,8 @@ extern int configfs_create(struct dentry *, umode_t mode, void (*init)(struct in extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); extern int configfs_create_bin_file(struct config_item *, const struct configfs_bin_attribute *); -extern int configfs_make_dirent(struct configfs_dirent *, - struct dentry *, void *, umode_t, int); +extern int configfs_make_dirent(struct configfs_dirent *, struct dentry *, + void *, umode_t, int, struct configfs_fragment *); extern int configfs_dirent_is_ready(struct configfs_dirent *); extern void configfs_hash_and_remove(struct dentry * dir, const char * name); @@ -151,6 +161,7 @@ static inline void release_configfs_dirent(struct configfs_dirent * sd) { if (!(sd->s_type & CONFIGFS_ROOT)) { kfree(sd->s_iattr); + put_fragment(sd->s_frag); kmem_cache_free(configfs_dir_cachep, sd); } } diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index a1985a9ad2d6..c2ef617d2f97 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -164,11 +164,38 @@ configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd) #endif /* CONFIG_LOCKDEP */ +static struct configfs_fragment *new_fragment(void) +{ + struct configfs_fragment *p; + + p = kmalloc(sizeof(struct configfs_fragment), GFP_KERNEL); + if (p) { + atomic_set(&p->frag_count, 1); + init_rwsem(&p->frag_sem); + p->frag_dead = false; + } + return p; +} + +void put_fragment(struct configfs_fragment *frag) +{ + if (frag && atomic_dec_and_test(&frag->frag_count)) + kfree(frag); +} + +struct configfs_fragment *get_fragment(struct configfs_fragment *frag) +{ + if (likely(frag)) + atomic_inc(&frag->frag_count); + return frag; +} + /* * Allocates a new configfs_dirent and links it to the parent configfs_dirent */ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd, - void *element, int type) + void *element, int type, + struct configfs_fragment *frag) { struct configfs_dirent * sd; @@ -188,6 +215,7 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *paren kmem_cache_free(configfs_dir_cachep, sd); return ERR_PTR(-ENOENT); } + sd->s_frag = get_fragment(frag); list_add(&sd->s_sibling, &parent_sd->s_children); spin_unlock(&configfs_dirent_lock); @@ -222,11 +250,11 @@ static int configfs_dirent_exists(struct configfs_dirent *parent_sd, int configfs_make_dirent(struct configfs_dirent * parent_sd, struct dentry * dentry, void * element, - umode_t mode, int type) + umode_t mode, int type, struct configfs_fragment *frag) { struct configfs_dirent * sd; - sd = configfs_new_dirent(parent_sd, element, type); + sd = configfs_new_dirent(parent_sd, element, type, frag); if (IS_ERR(sd)) return PTR_ERR(sd); @@ -273,7 +301,8 @@ static void init_symlink(struct inode * inode) * until it is validated by configfs_dir_set_ready() */ -static int configfs_create_dir(struct config_item *item, struct dentry *dentry) +static int configfs_create_dir(struct config_item *item, struct dentry *dentry, + struct configfs_fragment *frag) { int error; umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO; @@ -286,7 +315,8 @@ static int configfs_create_dir(struct config_item *item, struct dentry *dentry) return error; error = configfs_make_dirent(p->d_fsdata, dentry, item, mode, - CONFIGFS_DIR | CONFIGFS_USET_CREATING); + CONFIGFS_DIR | CONFIGFS_USET_CREATING, + frag); if (unlikely(error)) return error; @@ -351,9 +381,10 @@ int configfs_create_link(struct configfs_symlink *sl, { int err = 0; umode_t mode = S_IFLNK | S_IRWXUGO; + struct configfs_dirent *p = parent->d_fsdata; - err = configfs_make_dirent(parent->d_fsdata, dentry, sl, mode, - CONFIGFS_ITEM_LINK); + err = configfs_make_dirent(p, dentry, sl, mode, + CONFIGFS_ITEM_LINK, p->s_frag); if (!err) { err = configfs_create(dentry, mode, init_symlink); if (err) { @@ -612,7 +643,8 @@ static int populate_attrs(struct config_item *item) static int configfs_attach_group(struct config_item *parent_item, struct config_item *item, - struct dentry *dentry); + struct dentry *dentry, + struct configfs_fragment *frag); static void configfs_detach_group(struct config_item *item); static void detach_groups(struct config_group *group) @@ -660,7 +692,8 @@ static void detach_groups(struct config_group *group) * try using vfs_mkdir. Just a thought. */ static int create_default_group(struct config_group *parent_group, - struct config_group *group) + struct config_group *group, + struct configfs_fragment *frag) { int ret; struct configfs_dirent *sd; @@ -676,7 +709,7 @@ static int create_default_group(struct config_group *parent_group, d_add(child, NULL); ret = configfs_attach_group(&parent_group->cg_item, - &group->cg_item, child); + &group->cg_item, child, frag); if (!ret) { sd = child->d_fsdata; sd->s_type |= CONFIGFS_USET_DEFAULT; @@ -690,13 +723,14 @@ static int create_default_group(struct config_group *parent_group, return ret; } -static int populate_groups(struct config_group *group) +static int populate_groups(struct config_group *group, + struct configfs_fragment *frag) { struct config_group *new_group; int ret = 0; list_for_each_entry(new_group, &group->default_groups, group_entry) { - ret = create_default_group(group, new_group); + ret = create_default_group(group, new_group, frag); if (ret) { detach_groups(group); break; @@ -810,11 +844,12 @@ static void link_group(struct config_group *parent_group, struct config_group *g */ static int configfs_attach_item(struct config_item *parent_item, struct config_item *item, - struct dentry *dentry) + struct dentry *dentry, + struct configfs_fragment *frag) { int ret; - ret = configfs_create_dir(item, dentry); + ret = configfs_create_dir(item, dentry, frag); if (!ret) { ret = populate_attrs(item); if (ret) { @@ -844,12 +879,13 @@ static void configfs_detach_item(struct config_item *item) static int configfs_attach_group(struct config_item *parent_item, struct config_item *item, - struct dentry *dentry) + struct dentry *dentry, + struct configfs_fragment *frag) { int ret; struct configfs_dirent *sd; - ret = configfs_attach_item(parent_item, item, dentry); + ret = configfs_attach_item(parent_item, item, dentry, frag); if (!ret) { sd = dentry->d_fsdata; sd->s_type |= CONFIGFS_USET_DIR; @@ -865,7 +901,7 @@ static int configfs_attach_group(struct config_item *parent_item, */ inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD); configfs_adjust_dir_dirent_depth_before_populate(sd); - ret = populate_groups(to_config_group(item)); + ret = populate_groups(to_config_group(item), frag); if (ret) { configfs_detach_item(item); d_inode(dentry)->i_flags |= S_DEAD; @@ -1260,6 +1296,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode struct configfs_dirent *sd; struct config_item_type *type; struct module *subsys_owner = NULL, *new_item_owner = NULL; + struct configfs_fragment *frag; char *name; sd = dentry->d_parent->d_fsdata; @@ -1278,6 +1315,12 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode goto out; } + frag = new_fragment(); + if (!frag) { + ret = -ENOMEM; + goto out; + } + /* Get a working ref for the duration of this function */ parent_item = configfs_get_config_item(dentry->d_parent); type = parent_item->ci_type; @@ -1380,9 +1423,9 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode spin_unlock(&configfs_dirent_lock); if (group) - ret = configfs_attach_group(parent_item, item, dentry); + ret = configfs_attach_group(parent_item, item, dentry, frag); else - ret = configfs_attach_item(parent_item, item, dentry); + ret = configfs_attach_item(parent_item, item, dentry, frag); spin_lock(&configfs_dirent_lock); sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; @@ -1419,6 +1462,7 @@ out_put: * reference. */ config_item_put(parent_item); + put_fragment(frag); out: return ret; @@ -1430,6 +1474,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) struct config_item *item; struct configfs_subsystem *subsys; struct configfs_dirent *sd; + struct configfs_fragment *frag; struct module *subsys_owner = NULL, *dead_item_owner = NULL; int ret; @@ -1487,6 +1532,16 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) } } while (ret == -EAGAIN); + frag = sd->s_frag; + if (down_write_killable(&frag->frag_sem)) { + spin_lock(&configfs_dirent_lock); + configfs_detach_rollback(dentry); + spin_unlock(&configfs_dirent_lock); + return -EINTR; + } + frag->frag_dead = true; + up_write(&frag->frag_sem); + /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); @@ -1587,7 +1642,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file) */ err = -ENOENT; if (configfs_dirent_is_ready(parent_sd)) { - file->private_data = configfs_new_dirent(parent_sd, NULL, 0); + file->private_data = configfs_new_dirent(parent_sd, NULL, 0, NULL); if (IS_ERR(file->private_data)) err = PTR_ERR(file->private_data); else @@ -1743,8 +1798,13 @@ int configfs_register_group(struct config_group *parent_group, { struct configfs_subsystem *subsys = parent_group->cg_subsys; struct dentry *parent; + struct configfs_fragment *frag; int ret; + frag = new_fragment(); + if (!frag) + return -ENOMEM; + mutex_lock(&subsys->su_mutex); link_group(parent_group, group); mutex_unlock(&subsys->su_mutex); @@ -1752,7 +1812,7 @@ int configfs_register_group(struct config_group *parent_group, parent = parent_group->cg_item.ci_dentry; inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); - ret = create_default_group(parent_group, group); + ret = create_default_group(parent_group, group, frag); if (ret) goto err_out; @@ -1760,12 +1820,14 @@ int configfs_register_group(struct config_group *parent_group, configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata); spin_unlock(&configfs_dirent_lock); inode_unlock(d_inode(parent)); + put_fragment(frag); return 0; err_out: inode_unlock(d_inode(parent)); mutex_lock(&subsys->su_mutex); unlink_group(group); mutex_unlock(&subsys->su_mutex); + put_fragment(frag); return ret; } EXPORT_SYMBOL(configfs_register_group); @@ -1781,16 +1843,12 @@ void configfs_unregister_group(struct config_group *group) struct configfs_subsystem *subsys = group->cg_subsys; struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *parent = group->cg_item.ci_parent->ci_dentry; + struct configfs_dirent *sd = dentry->d_fsdata; + struct configfs_fragment *frag = sd->s_frag; - mutex_lock(&subsys->su_mutex); - if (!group->cg_item.ci_parent->ci_group) { - /* - * The parent has already been unlinked and detached - * due to a rmdir. - */ - goto unlink_group; - } - mutex_unlock(&subsys->su_mutex); + down_write(&frag->frag_sem); + frag->frag_dead = true; + up_write(&frag->frag_sem); inode_lock_nested(d_inode(parent), I_MUTEX_PARENT); spin_lock(&configfs_dirent_lock); @@ -1806,7 +1864,6 @@ void configfs_unregister_group(struct config_group *group) dput(dentry); mutex_lock(&subsys->su_mutex); -unlink_group: unlink_group(group); mutex_unlock(&subsys->su_mutex); } @@ -1863,10 +1920,17 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) struct dentry *dentry; struct dentry *root; struct configfs_dirent *sd; + struct configfs_fragment *frag; + + frag = new_fragment(); + if (!frag) + return -ENOMEM; root = configfs_pin_fs(); - if (IS_ERR(root)) + if (IS_ERR(root)) { + put_fragment(frag); return PTR_ERR(root); + } if (!group->cg_item.ci_name) group->cg_item.ci_name = group->cg_item.ci_namebuf; @@ -1882,7 +1946,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) d_add(dentry, NULL); err = configfs_attach_group(sd->s_element, &group->cg_item, - dentry); + dentry, frag); if (err) { BUG_ON(d_inode(dentry)); d_drop(dentry); @@ -1900,6 +1964,7 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) unlink_group(group); configfs_release_fs(); } + put_fragment(frag); return err; } @@ -1909,12 +1974,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) struct config_group *group = &subsys->su_group; struct dentry *dentry = group->cg_item.ci_dentry; struct dentry *root = dentry->d_sb->s_root; + struct configfs_dirent *sd = dentry->d_fsdata; + struct configfs_fragment *frag = sd->s_frag; if (dentry->d_parent != root) { pr_err("Tried to unregister non-subsystem!\n"); return; } + down_write(&frag->frag_sem); + frag->frag_dead = true; + up_write(&frag->frag_sem); + inode_lock_nested(d_inode(root), I_MUTEX_PARENT); inode_lock_nested(d_inode(dentry), I_MUTEX_CHILD); diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 39da1103d341..bb0a427517e9 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -53,40 +53,44 @@ struct configfs_buffer { bool write_in_progress; char *bin_buffer; int bin_buffer_size; + int cb_max_size; + struct config_item *item; + struct module *owner; + union { + struct configfs_attribute *attr; + struct configfs_bin_attribute *bin_attr; + }; }; - -/** - * fill_read_buffer - allocate and fill buffer from item. - * @dentry: dentry pointer. - * @buffer: data buffer for file. - * - * Allocate @buffer->page, if it hasn't been already, then call the - * config_item's show() method to fill the buffer with this attribute's - * data. - * This is called only once, on the file's first read. - */ -static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buffer) +static inline struct configfs_fragment *to_frag(struct file *file) { - struct configfs_attribute * attr = to_attr(dentry); - struct config_item * item = to_item(dentry->d_parent); - int ret = 0; - ssize_t count; + struct configfs_dirent *sd = file->f_path.dentry->d_fsdata; + + return sd->s_frag; +} + +static int fill_read_buffer(struct file *file, struct configfs_buffer *buffer) +{ + struct configfs_fragment *frag = to_frag(file); + ssize_t count = -ENOENT; if (!buffer->page) buffer->page = (char *) get_zeroed_page(GFP_KERNEL); if (!buffer->page) return -ENOMEM; - count = attr->show(item, buffer->page); + down_read(&frag->frag_sem); + if (!frag->frag_dead) + count = buffer->attr->show(buffer->item, buffer->page); + up_read(&frag->frag_sem); - BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE); - if (count >= 0) { - buffer->needs_read_fill = 0; - buffer->count = count; - } else - ret = count; - return ret; + if (count < 0) + return count; + if (WARN_ON_ONCE(count > (ssize_t)SIMPLE_ATTR_SIZE)) + return -EIO; + buffer->needs_read_fill = 0; + buffer->count = count; + return 0; } /** @@ -111,12 +115,13 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf static ssize_t configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct configfs_buffer * buffer = file->private_data; + struct configfs_buffer *buffer = file->private_data; ssize_t retval = 0; mutex_lock(&buffer->mutex); if (buffer->needs_read_fill) { - if ((retval = fill_read_buffer(file->f_path.dentry,buffer))) + retval = fill_read_buffer(file, buffer); + if (retval) goto out; } pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", @@ -152,10 +157,8 @@ static ssize_t configfs_read_bin_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + struct configfs_fragment *frag = to_frag(file); struct configfs_buffer *buffer = file->private_data; - struct dentry *dentry = file->f_path.dentry; - struct config_item *item = to_item(dentry->d_parent); - struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry); ssize_t retval = 0; ssize_t len = min_t(size_t, count, PAGE_SIZE); @@ -166,18 +169,23 @@ configfs_read_bin_file(struct file *file, char __user *buf, retval = -ETXTBSY; goto out; } - buffer->read_in_progress = 1; + buffer->read_in_progress = true; if (buffer->needs_read_fill) { /* perform first read with buf == NULL to get extent */ - len = bin_attr->read(item, NULL, 0); + down_read(&frag->frag_sem); + if (!frag->frag_dead) + len = buffer->bin_attr->read(buffer->item, NULL, 0); + else + len = -ENOENT; + up_read(&frag->frag_sem); if (len <= 0) { retval = len; goto out; } /* do not exceed the maximum value */ - if (bin_attr->cb_max_size && len > bin_attr->cb_max_size) { + if (buffer->cb_max_size && len > buffer->cb_max_size) { retval = -EFBIG; goto out; } @@ -190,7 +198,13 @@ configfs_read_bin_file(struct file *file, char __user *buf, buffer->bin_buffer_size = len; /* perform second read to fill buffer */ - len = bin_attr->read(item, buffer->bin_buffer, len); + down_read(&frag->frag_sem); + if (!frag->frag_dead) + len = buffer->bin_attr->read(buffer->item, + buffer->bin_buffer, len); + else + len = -ENOENT; + up_read(&frag->frag_sem); if (len < 0) { retval = len; vfree(buffer->bin_buffer); @@ -240,25 +254,17 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size return error ? -EFAULT : count; } - -/** - * flush_write_buffer - push buffer to config_item. - * @dentry: dentry to the attribute - * @buffer: data buffer for file. - * @count: number of bytes - * - * Get the correct pointers for the config_item and the attribute we're - * dealing with, then call the store() method for the attribute, - * passing the buffer that we acquired in fill_write_buffer(). - */ - static int -flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size_t count) +flush_write_buffer(struct file *file, struct configfs_buffer *buffer, size_t count) { - struct configfs_attribute * attr = to_attr(dentry); - struct config_item * item = to_item(dentry->d_parent); + struct configfs_fragment *frag = to_frag(file); + int res = -ENOENT; - return attr->store(item, buffer->page, count); + down_read(&frag->frag_sem); + if (!frag->frag_dead) + res = buffer->attr->store(buffer->item, buffer->page, count); + up_read(&frag->frag_sem); + return res; } @@ -282,13 +288,13 @@ flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size static ssize_t configfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct configfs_buffer * buffer = file->private_data; + struct configfs_buffer *buffer = file->private_data; ssize_t len; mutex_lock(&buffer->mutex); len = fill_write_buffer(buffer, buf, count); if (len > 0) - len = flush_write_buffer(file->f_path.dentry, buffer, len); + len = flush_write_buffer(file, buffer, len); if (len > 0) *ppos += len; mutex_unlock(&buffer->mutex); @@ -313,8 +319,6 @@ configfs_write_bin_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct configfs_buffer *buffer = file->private_data; - struct dentry *dentry = file->f_path.dentry; - struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry); void *tbuf = NULL; ssize_t len; @@ -325,13 +329,13 @@ configfs_write_bin_file(struct file *file, const char __user *buf, len = -ETXTBSY; goto out; } - buffer->write_in_progress = 1; + buffer->write_in_progress = true; /* buffer grows? */ if (*ppos + count > buffer->bin_buffer_size) { - if (bin_attr->cb_max_size && - *ppos + count > bin_attr->cb_max_size) { + if (buffer->cb_max_size && + *ppos + count > buffer->cb_max_size) { len = -EFBIG; goto out; } @@ -363,31 +367,51 @@ out: return len; } -static int check_perm(struct inode * inode, struct file * file, int type) +static int __configfs_open_file(struct inode *inode, struct file *file, int type) { - struct config_item *item = configfs_get_config_item(file->f_path.dentry->d_parent); - struct configfs_attribute * attr = to_attr(file->f_path.dentry); - struct configfs_bin_attribute *bin_attr = NULL; - struct configfs_buffer * buffer; - struct configfs_item_operations * ops = NULL; - int error = 0; + struct dentry *dentry = file->f_path.dentry; + struct configfs_fragment *frag = to_frag(file); + struct configfs_attribute *attr; + struct configfs_buffer *buffer; + int error; - if (!item || !attr) - goto Einval; + error = -ENOMEM; + buffer = kzalloc(sizeof(struct configfs_buffer), GFP_KERNEL); + if (!buffer) + goto out; - if (type & CONFIGFS_ITEM_BIN_ATTR) - bin_attr = to_bin_attr(file->f_path.dentry); + error = -ENOENT; + down_read(&frag->frag_sem); + if (unlikely(frag->frag_dead)) + goto out_free_buffer; - /* Grab the module reference for this attribute if we have one */ - if (!try_module_get(attr->ca_owner)) { - error = -ENODEV; - goto Done; + error = -EINVAL; + buffer->item = to_item(dentry->d_parent); + if (!buffer->item) + goto out_free_buffer; + + attr = to_attr(dentry); + if (!attr) + goto out_put_item; + + if (type & CONFIGFS_ITEM_BIN_ATTR) { + buffer->bin_attr = to_bin_attr(dentry); + buffer->cb_max_size = buffer->bin_attr->cb_max_size; + } else { + buffer->attr = attr; } - if (item->ci_type) - ops = item->ci_type->ct_item_ops; - else - goto Eaccess; + buffer->owner = attr->ca_owner; + /* Grab the module reference for this attribute if we have one */ + error = -ENODEV; + if (!try_module_get(buffer->owner)) + goto out_put_item; + + error = -EACCES; + if (!buffer->item->ci_type) + goto out_put_module; + + buffer->ops = buffer->item->ci_type->ct_item_ops; /* File needs write support. * The inode's perms must say it's ok, @@ -395,13 +419,11 @@ static int check_perm(struct inode * inode, struct file * file, int type) */ if (file->f_mode & FMODE_WRITE) { if (!(inode->i_mode & S_IWUGO)) - goto Eaccess; - + goto out_put_module; if ((type & CONFIGFS_ITEM_ATTR) && !attr->store) - goto Eaccess; - - if ((type & CONFIGFS_ITEM_BIN_ATTR) && !bin_attr->write) - goto Eaccess; + goto out_put_module; + if ((type & CONFIGFS_ITEM_BIN_ATTR) && !buffer->bin_attr->write) + goto out_put_module; } /* File needs read support. @@ -410,92 +432,72 @@ static int check_perm(struct inode * inode, struct file * file, int type) */ if (file->f_mode & FMODE_READ) { if (!(inode->i_mode & S_IRUGO)) - goto Eaccess; - + goto out_put_module; if ((type & CONFIGFS_ITEM_ATTR) && !attr->show) - goto Eaccess; - - if ((type & CONFIGFS_ITEM_BIN_ATTR) && !bin_attr->read) - goto Eaccess; + goto out_put_module; + if ((type & CONFIGFS_ITEM_BIN_ATTR) && !buffer->bin_attr->read) + goto out_put_module; } - /* No error? Great, allocate a buffer for the file, and store it - * it in file->private_data for easy access. - */ - buffer = kzalloc(sizeof(struct configfs_buffer),GFP_KERNEL); - if (!buffer) { - error = -ENOMEM; - goto Enomem; - } mutex_init(&buffer->mutex); buffer->needs_read_fill = 1; - buffer->read_in_progress = 0; - buffer->write_in_progress = 0; - buffer->ops = ops; + buffer->read_in_progress = false; + buffer->write_in_progress = false; file->private_data = buffer; - goto Done; + up_read(&frag->frag_sem); + return 0; - Einval: - error = -EINVAL; - goto Done; - Eaccess: - error = -EACCES; - Enomem: - module_put(attr->ca_owner); - Done: - if (error && item) - config_item_put(item); +out_put_module: + module_put(buffer->owner); +out_put_item: + config_item_put(buffer->item); +out_free_buffer: + up_read(&frag->frag_sem); + kfree(buffer); +out: return error; } static int configfs_release(struct inode *inode, struct file *filp) { - struct config_item * item = to_item(filp->f_path.dentry->d_parent); - struct configfs_attribute * attr = to_attr(filp->f_path.dentry); - struct module * owner = attr->ca_owner; - struct configfs_buffer * buffer = filp->private_data; + struct configfs_buffer *buffer = filp->private_data; - if (item) - config_item_put(item); - /* After this point, attr should not be accessed. */ - module_put(owner); - - if (buffer) { - if (buffer->page) - free_page((unsigned long)buffer->page); - mutex_destroy(&buffer->mutex); - kfree(buffer); - } + module_put(buffer->owner); + if (buffer->page) + free_page((unsigned long)buffer->page); + mutex_destroy(&buffer->mutex); + kfree(buffer); return 0; } static int configfs_open_file(struct inode *inode, struct file *filp) { - return check_perm(inode, filp, CONFIGFS_ITEM_ATTR); + return __configfs_open_file(inode, filp, CONFIGFS_ITEM_ATTR); } static int configfs_open_bin_file(struct inode *inode, struct file *filp) { - return check_perm(inode, filp, CONFIGFS_ITEM_BIN_ATTR); + return __configfs_open_file(inode, filp, CONFIGFS_ITEM_BIN_ATTR); } -static int configfs_release_bin_file(struct inode *inode, struct file *filp) +static int configfs_release_bin_file(struct inode *inode, struct file *file) { - struct configfs_buffer *buffer = filp->private_data; - struct dentry *dentry = filp->f_path.dentry; - struct config_item *item = to_item(dentry->d_parent); - struct configfs_bin_attribute *bin_attr = to_bin_attr(dentry); - ssize_t len = 0; - int ret; + struct configfs_buffer *buffer = file->private_data; - buffer->read_in_progress = 0; + buffer->read_in_progress = false; if (buffer->write_in_progress) { - buffer->write_in_progress = 0; - - len = bin_attr->write(item, buffer->bin_buffer, - buffer->bin_buffer_size); + struct configfs_fragment *frag = to_frag(file); + buffer->write_in_progress = false; + down_read(&frag->frag_sem); + if (!frag->frag_dead) { + /* result of ->release() is ignored */ + buffer->bin_attr->write(buffer->item, + buffer->bin_buffer, + buffer->bin_buffer_size); + } + up_read(&frag->frag_sem); /* vfree on NULL is safe */ vfree(buffer->bin_buffer); buffer->bin_buffer = NULL; @@ -503,10 +505,8 @@ static int configfs_release_bin_file(struct inode *inode, struct file *filp) buffer->needs_read_fill = 1; } - ret = configfs_release(inode, filp); - if (len < 0) - return len; - return ret; + configfs_release(inode, file); + return 0; } @@ -541,7 +541,7 @@ int configfs_create_file(struct config_item * item, const struct configfs_attrib inode_lock_nested(d_inode(dir), I_MUTEX_NORMAL); error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, - CONFIGFS_ITEM_ATTR); + CONFIGFS_ITEM_ATTR, parent_sd->s_frag); inode_unlock(d_inode(dir)); return error; @@ -563,7 +563,7 @@ int configfs_create_bin_file(struct config_item *item, inode_lock_nested(dir->d_inode, I_MUTEX_NORMAL); error = configfs_make_dirent(parent_sd, NULL, (void *) bin_attr, mode, - CONFIGFS_ITEM_BIN_ATTR); + CONFIGFS_ITEM_BIN_ATTR, parent_sd->s_frag); inode_unlock(dir->d_inode); return error; diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 9993cdb81e7d..147a6b779ab9 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -157,11 +157,42 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna !type->ct_item_ops->allow_link) goto out_put; + /* + * This is really sick. What they wanted was a hybrid of + * link(2) and symlink(2) - they wanted the target resolved + * at syscall time (as link(2) would've done), be a directory + * (which link(2) would've refused to do) *AND* be a deep + * fucking magic, making the target busy from rmdir POV. + * symlink(2) is nothing of that sort, and the locking it + * gets matches the normal symlink(2) semantics. Without + * attempts to resolve the target (which might very well + * not even exist yet) done prior to locking the parent + * directory. This perversion, OTOH, needs to resolve + * the target, which would lead to obvious deadlocks if + * attempted with any directories locked. + * + * Unfortunately, that garbage is userland ABI and we should've + * said "no" back in 2005. Too late now, so we get to + * play very ugly games with locking. + * + * Try *ANYTHING* of that sort in new code, and you will + * really regret it. Just ask yourself - what could a BOFH + * do to me and do I want to find it out first-hand? + * + * AV, a thoroughly annoyed bastard. + */ + inode_unlock(dir); ret = get_target(symname, &path, &target_item, dentry->d_sb); + inode_lock(dir); if (ret) goto out_put; - ret = type->ct_item_ops->allow_link(parent_item, target_item); + if (dentry->d_inode || d_unhashed(dentry)) + ret = -EEXIST; + else + ret = inode_permission(dir, MAY_WRITE | MAY_EXEC); + if (!ret) + ret = type->ct_item_ops->allow_link(parent_item, target_item); if (!ret) { mutex_lock(&configfs_symlink_mutex); ret = create_link(parent_item, target_item, dentry); diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig index 4bc66f2c571e..97c0a113f4cc 100644 --- a/fs/crypto/Kconfig +++ b/fs/crypto/Kconfig @@ -1,13 +1,8 @@ config FS_ENCRYPTION bool "FS Encryption (Per-file encryption)" select CRYPTO - select CRYPTO_AES - select CRYPTO_CBC - select CRYPTO_ECB - select CRYPTO_XTS - select CRYPTO_CTS - select CRYPTO_SHA512 - select CRYPTO_HMAC + select CRYPTO_HASH + select CRYPTO_BLKCIPHER select KEYS help Enable encryption of files and directories. This @@ -15,3 +10,22 @@ config FS_ENCRYPTION efficient since it avoids caching the encrypted and decrypted pages in the page cache. Currently Ext4, F2FS and UBIFS make use of this feature. + +# Filesystems supporting encryption must select this if FS_ENCRYPTION. This +# allows the algorithms to be built as modules when all the filesystems are. +config FS_ENCRYPTION_ALGS + tristate + select CRYPTO_AES + select CRYPTO_CBC + select CRYPTO_CTS + select CRYPTO_ECB + select CRYPTO_HMAC + select CRYPTO_SHA256 + select CRYPTO_SHA512 + select CRYPTO_XTS + +config FS_ENCRYPTION_INLINE_CRYPT + bool "Enable fscrypt to use inline crypto" + depends on FS_ENCRYPTION && BLK_INLINE_ENCRYPTION + help + Enable fscrypt to use inline encryption hardware if available. diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile index 0a78543f6cec..1a6b0774f3ff 100644 --- a/fs/crypto/Makefile +++ b/fs/crypto/Makefile @@ -10,3 +10,4 @@ fscrypto-y := crypto.o \ policy.o fscrypto-$(CONFIG_BLOCK) += bio.o +fscrypto-$(CONFIG_FS_ENCRYPTION_INLINE_CRYPT) += inline_crypt.o diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index 0eb98a6d6f14..0242088d213f 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -26,7 +26,7 @@ #include #include "fscrypt_private.h" -static void __fscrypt_decrypt_bio(struct bio *bio, bool done) +void fscrypt_decrypt_bio(struct bio *bio) { struct bio_vec *bv; int i; @@ -37,84 +37,158 @@ static void __fscrypt_decrypt_bio(struct bio *bio, bool done) bv->bv_offset); if (ret) SetPageError(page); - else if (done) - SetPageUptodate(page); - if (done) - unlock_page(page); } } - -void fscrypt_decrypt_bio(struct bio *bio) -{ - __fscrypt_decrypt_bio(bio, false); -} EXPORT_SYMBOL(fscrypt_decrypt_bio); -static void completion_pages(struct work_struct *work) -{ - struct fscrypt_ctx *ctx = container_of(work, struct fscrypt_ctx, work); - struct bio *bio = ctx->bio; - - __fscrypt_decrypt_bio(bio, true); - fscrypt_release_ctx(ctx); - bio_put(bio); -} - -void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, struct bio *bio) -{ - INIT_WORK(&ctx->work, completion_pages); - ctx->bio = bio; - fscrypt_enqueue_decrypt_work(&ctx->work); -} -EXPORT_SYMBOL(fscrypt_enqueue_decrypt_bio); - -int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, - sector_t pblk, unsigned int len) +static int fscrypt_zeroout_range_inlinecrypt(const struct inode *inode, + pgoff_t lblk, + sector_t pblk, unsigned int len) { const unsigned int blockbits = inode->i_blkbits; - const unsigned int blocksize = 1 << blockbits; - struct page *ciphertext_page; + const unsigned int blocks_per_page_bits = PAGE_SHIFT - blockbits; + const unsigned int blocks_per_page = 1 << blocks_per_page_bits; + unsigned int i; struct bio *bio; - int ret, err = 0; + int ret, err; - ciphertext_page = fscrypt_alloc_bounce_page(GFP_NOWAIT); - if (!ciphertext_page) - return -ENOMEM; + /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */ + bio = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); - while (len--) { - err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk, - ZERO_PAGE(0), ciphertext_page, - blocksize, 0, GFP_NOFS); - if (err) - goto errout; - - bio = bio_alloc(GFP_NOWAIT, 1); - if (!bio) { - err = -ENOMEM; - goto errout; - } + do { bio_set_dev(bio, inode->i_sb->s_bdev); bio->bi_iter.bi_sector = pblk << (blockbits - 9); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); - ret = bio_add_page(bio, ciphertext_page, blocksize, 0); - if (WARN_ON(ret != blocksize)) { - /* should never happen! */ - bio_put(bio); - err = -EIO; - goto errout; - } + fscrypt_set_bio_crypt_ctx(bio, inode, lblk, GFP_NOFS); + + i = 0; + do { + unsigned int blocks_this_page = + min(len, blocks_per_page); + unsigned int bytes_this_page = + blocks_this_page << blockbits; + + ret = bio_add_page(bio, ZERO_PAGE(0), + bytes_this_page, 0); + if (WARN_ON(ret != bytes_this_page)) { + err = -EIO; + goto out; + } + lblk += blocks_this_page; + pblk += blocks_this_page; + len -= blocks_this_page; + } while (++i != BIO_MAX_PAGES && len != 0); + err = submit_bio_wait(bio); - if (err == 0 && bio->bi_status) - err = -EIO; - bio_put(bio); if (err) - goto errout; - lblk++; - pblk++; - } + goto out; + bio_reset(bio); + } while (len != 0); err = 0; -errout: - fscrypt_free_bounce_page(ciphertext_page); +out: + bio_put(bio); + return err; +} + +/** + * fscrypt_zeroout_range() - zero out a range of blocks in an encrypted file + * @inode: the file's inode + * @lblk: the first file logical block to zero out + * @pblk: the first filesystem physical block to zero out + * @len: number of blocks to zero out + * + * Zero out filesystem blocks in an encrypted regular file on-disk, i.e. write + * ciphertext blocks which decrypt to the all-zeroes block. The blocks must be + * both logically and physically contiguous. It's also assumed that the + * filesystem only uses a single block device, ->s_bdev. + * + * Note that since each block uses a different IV, this involves writing a + * different ciphertext to each block; we can't simply reuse the same one. + * + * Return: 0 on success; -errno on failure. + */ +int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, + sector_t pblk, unsigned int len) +{ + const unsigned int blockbits = inode->i_blkbits; + const unsigned int blocksize = 1 << blockbits; + const unsigned int blocks_per_page_bits = PAGE_SHIFT - blockbits; + const unsigned int blocks_per_page = 1 << blocks_per_page_bits; + struct page *pages[16]; /* write up to 16 pages at a time */ + unsigned int nr_pages; + unsigned int i; + unsigned int offset; + struct bio *bio; + int ret, err; + + if (len == 0) + return 0; + + if (fscrypt_inode_uses_inline_crypto(inode)) + return fscrypt_zeroout_range_inlinecrypt(inode, lblk, pblk, + len); + + BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_PAGES); + nr_pages = min_t(unsigned int, ARRAY_SIZE(pages), + (len + blocks_per_page - 1) >> blocks_per_page_bits); + + /* + * We need at least one page for ciphertext. Allocate the first one + * from a mempool, with __GFP_DIRECT_RECLAIM set so that it can't fail. + * + * Any additional page allocations are allowed to fail, as they only + * help performance, and waiting on the mempool for them could deadlock. + */ + for (i = 0; i < nr_pages; i++) { + pages[i] = fscrypt_alloc_bounce_page(i == 0 ? GFP_NOFS : + GFP_NOWAIT | __GFP_NOWARN); + if (!pages[i]) + break; + } + nr_pages = i; + if (WARN_ON(nr_pages <= 0)) + return -EINVAL; + + /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */ + bio = bio_alloc(GFP_NOFS, nr_pages); + + do { + bio_set_dev(bio, inode->i_sb->s_bdev); + bio->bi_iter.bi_sector = pblk << (blockbits - 9); + bio_set_op_attrs(bio, REQ_OP_WRITE, 0); + + i = 0; + offset = 0; + do { + err = fscrypt_crypt_block(inode, FS_ENCRYPT, lblk, + ZERO_PAGE(0), pages[i], + blocksize, offset, GFP_NOFS); + if (err) + goto out; + lblk++; + pblk++; + len--; + offset += blocksize; + if (offset == PAGE_SIZE || len == 0) { + ret = bio_add_page(bio, pages[i++], offset, 0); + if (WARN_ON(ret != offset)) { + err = -EIO; + goto out; + } + offset = 0; + } + } while (i != nr_pages && len != 0); + + err = submit_bio_wait(bio); + if (err) + goto out; + bio_reset(bio); + } while (len != 0); + err = 0; +out: + bio_put(bio); + for (i = 0; i < nr_pages; i++) + fscrypt_free_bounce_page(pages[i]); return err; } EXPORT_SYMBOL(fscrypt_zeroout_range); diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c index 35efeae24efc..ed6ea28dbdad 100644 --- a/fs/crypto/crypto.c +++ b/fs/crypto/crypto.c @@ -24,31 +24,20 @@ #include #include #include -#include -#include -#include #include #include "fscrypt_private.h" static unsigned int num_prealloc_crypto_pages = 32; -static unsigned int num_prealloc_crypto_ctxs = 128; module_param(num_prealloc_crypto_pages, uint, 0444); MODULE_PARM_DESC(num_prealloc_crypto_pages, "Number of crypto pages to preallocate"); -module_param(num_prealloc_crypto_ctxs, uint, 0444); -MODULE_PARM_DESC(num_prealloc_crypto_ctxs, - "Number of crypto contexts to preallocate"); static mempool_t *fscrypt_bounce_page_pool = NULL; -static LIST_HEAD(fscrypt_free_ctxs); -static DEFINE_SPINLOCK(fscrypt_ctx_lock); - static struct workqueue_struct *fscrypt_read_workqueue; static DEFINE_MUTEX(fscrypt_init_mutex); -static struct kmem_cache *fscrypt_ctx_cachep; struct kmem_cache *fscrypt_info_cachep; void fscrypt_enqueue_decrypt_work(struct work_struct *work) @@ -57,62 +46,6 @@ void fscrypt_enqueue_decrypt_work(struct work_struct *work) } EXPORT_SYMBOL(fscrypt_enqueue_decrypt_work); -/** - * fscrypt_release_ctx() - Release a decryption context - * @ctx: The decryption context to release. - * - * If the decryption context was allocated from the pre-allocated pool, return - * it to that pool. Else, free it. - */ -void fscrypt_release_ctx(struct fscrypt_ctx *ctx) -{ - unsigned long flags; - - if (ctx->flags & FS_CTX_REQUIRES_FREE_ENCRYPT_FL) { - kmem_cache_free(fscrypt_ctx_cachep, ctx); - } else { - spin_lock_irqsave(&fscrypt_ctx_lock, flags); - list_add(&ctx->free_list, &fscrypt_free_ctxs); - spin_unlock_irqrestore(&fscrypt_ctx_lock, flags); - } -} -EXPORT_SYMBOL(fscrypt_release_ctx); - -/** - * fscrypt_get_ctx() - Get a decryption context - * @gfp_flags: The gfp flag for memory allocation - * - * Allocate and initialize a decryption context. - * - * Return: A new decryption context on success; an ERR_PTR() otherwise. - */ -struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) -{ - struct fscrypt_ctx *ctx; - unsigned long flags; - - /* - * First try getting a ctx from the free list so that we don't have to - * call into the slab allocator. - */ - spin_lock_irqsave(&fscrypt_ctx_lock, flags); - ctx = list_first_entry_or_null(&fscrypt_free_ctxs, - struct fscrypt_ctx, free_list); - if (ctx) - list_del(&ctx->free_list); - spin_unlock_irqrestore(&fscrypt_ctx_lock, flags); - if (!ctx) { - ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, gfp_flags); - if (!ctx) - return ERR_PTR(-ENOMEM); - ctx->flags |= FS_CTX_REQUIRES_FREE_ENCRYPT_FL; - } else { - ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL; - } - return ctx; -} -EXPORT_SYMBOL(fscrypt_get_ctx); - struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags) { return mempool_alloc(fscrypt_bounce_page_pool, gfp_flags); @@ -137,14 +70,17 @@ EXPORT_SYMBOL(fscrypt_free_bounce_page); void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, const struct fscrypt_info *ci) { + u8 flags = fscrypt_policy_flags(&ci->ci_policy); + memset(iv, 0, ci->ci_mode->ivsize); - iv->lblk_num = cpu_to_le64(lblk_num); - if (fscrypt_is_direct_key_policy(&ci->ci_policy)) + if (flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) { + WARN_ON_ONCE((u32)lblk_num != lblk_num); + lblk_num |= (u64)ci->ci_inode->i_ino << 32; + } else if (flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { memcpy(iv->nonce, ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE); - - if (ci->ci_essiv_tfm != NULL) - crypto_cipher_encrypt_one(ci->ci_essiv_tfm, iv->raw, iv->raw); + } + iv->lblk_num = cpu_to_le64(lblk_num); } /* Encrypt or decrypt a single filesystem block of file contents */ @@ -158,7 +94,7 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, DECLARE_CRYPTO_WAIT(wait); struct scatterlist dst, src; struct fscrypt_info *ci = inode->i_crypt_info; - struct crypto_skcipher *tfm = ci->ci_ctfm; + struct crypto_skcipher *tfm = ci->ci_key.tfm; int res = 0; if (WARN_ON_ONCE(len <= 0)) @@ -201,7 +137,7 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, * multiple of the filesystem's block size. * @offs: Byte offset within @page of the first block to encrypt. Must be * a multiple of the filesystem's block size. - * @gfp_flags: Memory allocation flags + * @gfp_flags: Memory allocation flags. See details below. * * A new bounce page is allocated, and the specified block(s) are encrypted into * it. In the bounce page, the ciphertext block(s) will be located at the same @@ -211,6 +147,11 @@ int fscrypt_crypt_block(const struct inode *inode, fscrypt_direction_t rw, * * This is for use by the filesystem's ->writepages() method. * + * The bounce page allocation is mempool-backed, so it will always succeed when + * @gfp_flags includes __GFP_DIRECT_RECLAIM, e.g. when it's GFP_NOFS. However, + * only the first page of each bio can be allocated this way. To prevent + * deadlocks, for any additional pages a mask like GFP_NOWAIT must be used. + * * Return: the new encrypted bounce page on success; an ERR_PTR() on failure */ struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, @@ -347,65 +288,6 @@ int fscrypt_decrypt_block_inplace(const struct inode *inode, struct page *page, } EXPORT_SYMBOL(fscrypt_decrypt_block_inplace); -/* - * Validate dentries in encrypted directories to make sure we aren't potentially - * caching stale dentries after a key has been added. - */ -static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) -{ - struct dentry *dir; - int err; - int valid; - - /* - * Plaintext names are always valid, since fscrypt doesn't support - * reverting to ciphertext names without evicting the directory's inode - * -- which implies eviction of the dentries in the directory. - */ - if (!(dentry->d_flags & DCACHE_ENCRYPTED_NAME)) - return 1; - - /* - * Ciphertext name; valid if the directory's key is still unavailable. - * - * Although fscrypt forbids rename() on ciphertext names, we still must - * use dget_parent() here rather than use ->d_parent directly. That's - * because a corrupted fs image may contain directory hard links, which - * the VFS handles by moving the directory's dentry tree in the dcache - * each time ->lookup() finds the directory and it already has a dentry - * elsewhere. Thus ->d_parent can be changing, and we must safely grab - * a reference to some ->d_parent to prevent it from being freed. - */ - - if (flags & LOOKUP_RCU) - return -ECHILD; - - dir = dget_parent(dentry); - err = fscrypt_get_encryption_info(d_inode(dir)); - valid = !fscrypt_has_encryption_key(d_inode(dir)); - dput(dir); - - if (err < 0) - return err; - - return valid; -} - -const struct dentry_operations fscrypt_d_ops = { - .d_revalidate = fscrypt_d_revalidate, -}; - -static void fscrypt_destroy(void) -{ - struct fscrypt_ctx *pos, *n; - - list_for_each_entry_safe(pos, n, &fscrypt_free_ctxs, free_list) - kmem_cache_free(fscrypt_ctx_cachep, pos); - INIT_LIST_HEAD(&fscrypt_free_ctxs); - mempool_destroy(fscrypt_bounce_page_pool); - fscrypt_bounce_page_pool = NULL; -} - /** * fscrypt_initialize() - allocate major buffers for fs encryption. * @cop_flags: fscrypt operations flags @@ -413,11 +295,11 @@ static void fscrypt_destroy(void) * We only call this when we start accessing encrypted files, since it * results in memory getting allocated that wouldn't otherwise be used. * - * Return: Zero on success, non-zero otherwise. + * Return: 0 on success; -errno on failure */ int fscrypt_initialize(unsigned int cop_flags) { - int i, res = -ENOMEM; + int err = 0; /* No need to allocate a bounce page pool if this FS won't use it. */ if (cop_flags & FS_CFLG_OWN_PAGES) @@ -425,29 +307,18 @@ int fscrypt_initialize(unsigned int cop_flags) mutex_lock(&fscrypt_init_mutex); if (fscrypt_bounce_page_pool) - goto already_initialized; - - for (i = 0; i < num_prealloc_crypto_ctxs; i++) { - struct fscrypt_ctx *ctx; - - ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, GFP_NOFS); - if (!ctx) - goto fail; - list_add(&ctx->free_list, &fscrypt_free_ctxs); - } + goto out_unlock; + err = -ENOMEM; fscrypt_bounce_page_pool = mempool_create_page_pool(num_prealloc_crypto_pages, 0); if (!fscrypt_bounce_page_pool) - goto fail; + goto out_unlock; -already_initialized: + err = 0; +out_unlock: mutex_unlock(&fscrypt_init_mutex); - return 0; -fail: - fscrypt_destroy(); - mutex_unlock(&fscrypt_init_mutex); - return res; + return err; } void fscrypt_msg(const struct inode *inode, const char *level, @@ -493,13 +364,9 @@ static int __init fscrypt_init(void) if (!fscrypt_read_workqueue) goto fail; - fscrypt_ctx_cachep = KMEM_CACHE(fscrypt_ctx, SLAB_RECLAIM_ACCOUNT); - if (!fscrypt_ctx_cachep) - goto fail_free_queue; - fscrypt_info_cachep = KMEM_CACHE(fscrypt_info, SLAB_RECLAIM_ACCOUNT); if (!fscrypt_info_cachep) - goto fail_free_ctx; + goto fail_free_queue; err = fscrypt_init_keyring(); if (err) @@ -509,8 +376,6 @@ static int __init fscrypt_init(void) fail_free_info: kmem_cache_destroy(fscrypt_info_cachep); -fail_free_ctx: - kmem_cache_destroy(fscrypt_ctx_cachep); fail_free_queue: destroy_workqueue(fscrypt_read_workqueue); fail: diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index 3da3707c10e3..63bfe5e8accd 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -11,10 +11,88 @@ * This has not yet undergone a rigorous security audit. */ +#include #include +#include +#include #include #include "fscrypt_private.h" +/** + * struct fscrypt_nokey_name - identifier for directory entry when key is absent + * + * When userspace lists an encrypted directory without access to the key, the + * filesystem must present a unique "no-key name" for each filename that allows + * it to find the directory entry again if requested. Naively, that would just + * mean using the ciphertext filenames. However, since the ciphertext filenames + * can contain illegal characters ('\0' and '/'), they must be encoded in some + * way. We use base64. But that can cause names to exceed NAME_MAX (255 + * bytes), so we also need to use a strong hash to abbreviate long names. + * + * The filesystem may also need another kind of hash, the "dirhash", to quickly + * find the directory entry. Since filesystems normally compute the dirhash + * over the on-disk filename (i.e. the ciphertext), it's not computable from + * no-key names that abbreviate the ciphertext using the strong hash to fit in + * NAME_MAX. It's also not computable if it's a keyed hash taken over the + * plaintext (but it may still be available in the on-disk directory entry); + * casefolded directories use this type of dirhash. At least in these cases, + * each no-key name must include the name's dirhash too. + * + * To meet all these requirements, we base64-encode the following + * variable-length structure. It contains the dirhash, or 0's if the filesystem + * didn't provide one; up to 149 bytes of the ciphertext name; and for + * ciphertexts longer than 149 bytes, also the SHA-256 of the remaining bytes. + * + * This ensures that each no-key name contains everything needed to find the + * directory entry again, contains only legal characters, doesn't exceed + * NAME_MAX, is unambiguous unless there's a SHA-256 collision, and that we only + * take the performance hit of SHA-256 on very long filenames (which are rare). + */ +struct fscrypt_nokey_name { + u32 dirhash[2]; + u8 bytes[149]; + u8 sha256[SHA256_DIGEST_SIZE]; +}; /* 189 bytes => 252 bytes base64-encoded, which is <= NAME_MAX (255) */ + +/* + * Decoded size of max-size nokey name, i.e. a name that was abbreviated using + * the strong hash and thus includes the 'sha256' field. This isn't simply + * sizeof(struct fscrypt_nokey_name), as the padding at the end isn't included. + */ +#define FSCRYPT_NOKEY_NAME_MAX offsetofend(struct fscrypt_nokey_name, sha256) + +static struct crypto_shash *sha256_hash_tfm; + +static int fscrypt_do_sha256(const u8 *data, unsigned int data_len, u8 *result) +{ + struct crypto_shash *tfm = READ_ONCE(sha256_hash_tfm); + + if (unlikely(!tfm)) { + struct crypto_shash *prev_tfm; + + tfm = crypto_alloc_shash("sha256", 0, 0); + if (IS_ERR(tfm)) { + fscrypt_err(NULL, + "Error allocating SHA-256 transform: %ld", + PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + prev_tfm = cmpxchg(&sha256_hash_tfm, NULL, tfm); + if (prev_tfm) { + crypto_free_shash(tfm); + tfm = prev_tfm; + } + } + { + SHASH_DESC_ON_STACK(desc, tfm); + + desc->tfm = tfm; + desc->flags = 0; + + return crypto_shash_digest(desc, data, data_len, result); + } +} + static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) { if (str->len == 1 && str->name[0] == '.') @@ -27,20 +105,20 @@ static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) } /** - * fname_encrypt() - encrypt a filename + * fscrypt_fname_encrypt() - encrypt a filename * * The output buffer must be at least as large as the input buffer. * Any extra space is filled with NUL padding before encryption. * * Return: 0 on success, -errno on failure */ -int fname_encrypt(struct inode *inode, const struct qstr *iname, - u8 *out, unsigned int olen) +int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname, + u8 *out, unsigned int olen) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); - struct fscrypt_info *ci = inode->i_crypt_info; - struct crypto_skcipher *tfm = ci->ci_ctfm; + const struct fscrypt_info *ci = inode->i_crypt_info; + struct crypto_skcipher *tfm = ci->ci_key.tfm; union fscrypt_iv iv; struct scatterlist sg; int res; @@ -85,15 +163,15 @@ int fname_encrypt(struct inode *inode, const struct qstr *iname, * * Return: 0 on success, -errno on failure */ -static int fname_decrypt(struct inode *inode, - const struct fscrypt_str *iname, - struct fscrypt_str *oname) +static int fname_decrypt(const struct inode *inode, + const struct fscrypt_str *iname, + struct fscrypt_str *oname) { struct skcipher_request *req = NULL; DECLARE_CRYPTO_WAIT(wait); struct scatterlist src_sg, dst_sg; - struct fscrypt_info *ci = inode->i_crypt_info; - struct crypto_skcipher *tfm = ci->ci_ctfm; + const struct fscrypt_info *ci = inode->i_crypt_info; + struct crypto_skcipher *tfm = ci->ci_key.tfm; union fscrypt_iv iv; int res; @@ -206,9 +284,7 @@ int fscrypt_fname_alloc_buffer(const struct inode *inode, u32 max_encrypted_len, struct fscrypt_str *crypto_str) { - const u32 max_encoded_len = - max_t(u32, BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE), - 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))); + const u32 max_encoded_len = BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX); u32 max_presented_len; max_presented_len = max(max_encoded_len, max_encrypted_len); @@ -241,19 +317,21 @@ EXPORT_SYMBOL(fscrypt_fname_free_buffer); * * The caller must have allocated sufficient memory for the @oname string. * - * If the key is available, we'll decrypt the disk name; otherwise, we'll encode - * it for presentation. Short names are directly base64-encoded, while long - * names are encoded in fscrypt_digested_name format. + * If the key is available, we'll decrypt the disk name. Otherwise, we'll + * encode it for presentation in fscrypt_nokey_name format. + * See struct fscrypt_nokey_name for details. * * Return: 0 on success, -errno on failure */ -int fscrypt_fname_disk_to_usr(struct inode *inode, - u32 hash, u32 minor_hash, - const struct fscrypt_str *iname, - struct fscrypt_str *oname) +int fscrypt_fname_disk_to_usr(const struct inode *inode, + u32 hash, u32 minor_hash, + const struct fscrypt_str *iname, + struct fscrypt_str *oname) { const struct qstr qname = FSTR_TO_QSTR(iname); - struct fscrypt_digested_name digested_name; + struct fscrypt_nokey_name nokey_name; + u32 size; /* size of the unencoded no-key name */ + int err; if (fscrypt_is_dot_dotdot(&qname)) { oname->name[0] = '.'; @@ -268,24 +346,37 @@ int fscrypt_fname_disk_to_usr(struct inode *inode, if (fscrypt_has_encryption_key(inode)) return fname_decrypt(inode, iname, oname); - if (iname->len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) { - oname->len = base64_encode(iname->name, iname->len, - oname->name); - return 0; - } + /* + * Sanity check that struct fscrypt_nokey_name doesn't have padding + * between fields and that its encoded size never exceeds NAME_MAX. + */ + BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, dirhash) != + offsetof(struct fscrypt_nokey_name, bytes)); + BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, bytes) != + offsetof(struct fscrypt_nokey_name, sha256)); + BUILD_BUG_ON(BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX) > NAME_MAX); + if (hash) { - digested_name.hash = hash; - digested_name.minor_hash = minor_hash; + nokey_name.dirhash[0] = hash; + nokey_name.dirhash[1] = minor_hash; } else { - digested_name.hash = 0; - digested_name.minor_hash = 0; + nokey_name.dirhash[0] = 0; + nokey_name.dirhash[1] = 0; } - memcpy(digested_name.digest, - FSCRYPT_FNAME_DIGEST(iname->name, iname->len), - FSCRYPT_FNAME_DIGEST_SIZE); - oname->name[0] = '_'; - oname->len = 1 + base64_encode((const u8 *)&digested_name, - sizeof(digested_name), oname->name + 1); + if (iname->len <= sizeof(nokey_name.bytes)) { + memcpy(nokey_name.bytes, iname->name, iname->len); + size = offsetof(struct fscrypt_nokey_name, bytes[iname->len]); + } else { + memcpy(nokey_name.bytes, iname->name, sizeof(nokey_name.bytes)); + /* Compute strong hash of remaining part of name. */ + err = fscrypt_do_sha256(&iname->name[sizeof(nokey_name.bytes)], + iname->len - sizeof(nokey_name.bytes), + nokey_name.sha256); + if (err) + return err; + size = FSCRYPT_NOKEY_NAME_MAX; + } + oname->len = base64_encode((const u8 *)&nokey_name, size, oname->name); return 0; } EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); @@ -306,8 +397,7 @@ EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); * get the disk_name. * * Else, for keyless @lookup operations, @iname is the presented ciphertext, so - * we decode it to get either the ciphertext disk_name (for short names) or the - * fscrypt_digested_name (for long names). Non-@lookup operations will be + * we decode it to get the fscrypt_nokey_name. Non-@lookup operations will be * impossible in this case, so we fail them with ENOKEY. * * If successful, fscrypt_free_filename() must be called later to clean up. @@ -317,8 +407,8 @@ EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { + struct fscrypt_nokey_name *nokey_name; int ret; - int digested; memset(fname, 0, sizeof(struct fscrypt_name)); fname->usr_fname = iname; @@ -342,8 +432,8 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, if (!fname->crypto_buf.name) return -ENOMEM; - ret = fname_encrypt(dir, iname, fname->crypto_buf.name, - fname->crypto_buf.len); + ret = fscrypt_fname_encrypt(dir, iname, fname->crypto_buf.name, + fname->crypto_buf.len); if (ret) goto errout; fname->disk_name.name = fname->crypto_buf.name; @@ -358,40 +448,31 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, * We don't have the key and we are doing a lookup; decode the * user-supplied name */ - if (iname->name[0] == '_') { - if (iname->len != - 1 + BASE64_CHARS(sizeof(struct fscrypt_digested_name))) - return -ENOENT; - digested = 1; - } else { - if (iname->len > - BASE64_CHARS(FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE)) - return -ENOENT; - digested = 0; - } - fname->crypto_buf.name = - kmalloc(max_t(size_t, FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE, - sizeof(struct fscrypt_digested_name)), - GFP_KERNEL); + if (iname->len > BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX)) + return -ENOENT; + + fname->crypto_buf.name = kmalloc(FSCRYPT_NOKEY_NAME_MAX, GFP_KERNEL); if (fname->crypto_buf.name == NULL) return -ENOMEM; - ret = base64_decode(iname->name + digested, iname->len - digested, - fname->crypto_buf.name); - if (ret < 0) { + ret = base64_decode(iname->name, iname->len, fname->crypto_buf.name); + if (ret < (int)offsetof(struct fscrypt_nokey_name, bytes[1]) || + (ret > offsetof(struct fscrypt_nokey_name, sha256) && + ret != FSCRYPT_NOKEY_NAME_MAX)) { ret = -ENOENT; goto errout; } fname->crypto_buf.len = ret; - if (digested) { - const struct fscrypt_digested_name *n = - (const void *)fname->crypto_buf.name; - fname->hash = n->hash; - fname->minor_hash = n->minor_hash; - } else { - fname->disk_name.name = fname->crypto_buf.name; - fname->disk_name.len = fname->crypto_buf.len; + + nokey_name = (void *)fname->crypto_buf.name; + fname->hash = nokey_name->dirhash[0]; + fname->minor_hash = nokey_name->dirhash[1]; + if (ret != FSCRYPT_NOKEY_NAME_MAX) { + /* The full ciphertext filename is available. */ + fname->disk_name.name = nokey_name->bytes; + fname->disk_name.len = + ret - offsetof(struct fscrypt_nokey_name, bytes); } return 0; @@ -400,3 +481,106 @@ errout: return ret; } EXPORT_SYMBOL(fscrypt_setup_filename); + +/** + * fscrypt_match_name() - test whether the given name matches a directory entry + * @fname: the name being searched for + * @de_name: the name from the directory entry + * @de_name_len: the length of @de_name in bytes + * + * Normally @fname->disk_name will be set, and in that case we simply compare + * that to the name stored in the directory entry. The only exception is that + * if we don't have the key for an encrypted directory and the name we're + * looking for is very long, then we won't have the full disk_name and instead + * we'll need to match against a fscrypt_nokey_name that includes a strong hash. + * + * Return: %true if the name matches, otherwise %false. + */ +bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + const struct fscrypt_nokey_name *nokey_name = + (const void *)fname->crypto_buf.name; + u8 sha256[SHA256_DIGEST_SIZE]; + + if (likely(fname->disk_name.name)) { + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, de_name_len); + } + if (de_name_len <= sizeof(nokey_name->bytes)) + return false; + if (memcmp(de_name, nokey_name->bytes, sizeof(nokey_name->bytes))) + return false; + if (fscrypt_do_sha256(&de_name[sizeof(nokey_name->bytes)], + de_name_len - sizeof(nokey_name->bytes), sha256)) + return false; + return !memcmp(sha256, nokey_name->sha256, sizeof(sha256)); +} +EXPORT_SYMBOL_GPL(fscrypt_match_name); + +/** + * fscrypt_fname_siphash() - calculate the SipHash of a filename + * @dir: the parent directory + * @name: the filename to calculate the SipHash of + * + * Given a plaintext filename @name and a directory @dir which uses SipHash as + * its dirhash method and has had its fscrypt key set up, this function + * calculates the SipHash of that name using the directory's secret dirhash key. + * + * Return: the SipHash of @name using the hash key of @dir + */ +u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name) +{ + const struct fscrypt_info *ci = dir->i_crypt_info; + + WARN_ON(!ci->ci_dirhash_key_initialized); + + return siphash(name->name, name->len, &ci->ci_dirhash_key); +} +EXPORT_SYMBOL_GPL(fscrypt_fname_siphash); + +/* + * Validate dentries in encrypted directories to make sure we aren't potentially + * caching stale dentries after a key has been added. + */ +int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct dentry *dir; + int err; + int valid; + + /* + * Plaintext names are always valid, since fscrypt doesn't support + * reverting to ciphertext names without evicting the directory's inode + * -- which implies eviction of the dentries in the directory. + */ + if (!(dentry->d_flags & DCACHE_ENCRYPTED_NAME)) + return 1; + + /* + * Ciphertext name; valid if the directory's key is still unavailable. + * + * Although fscrypt forbids rename() on ciphertext names, we still must + * use dget_parent() here rather than use ->d_parent directly. That's + * because a corrupted fs image may contain directory hard links, which + * the VFS handles by moving the directory's dentry tree in the dcache + * each time ->lookup() finds the directory and it already has a dentry + * elsewhere. Thus ->d_parent can be changing, and we must safely grab + * a reference to some ->d_parent to prevent it from being freed. + */ + + if (flags & LOOKUP_RCU) + return -ECHILD; + + dir = dget_parent(dentry); + err = fscrypt_get_encryption_info(d_inode(dir)); + valid = !fscrypt_has_encryption_key(d_inode(dir)); + dput(dir); + + if (err < 0) + return err; + + return valid; +} +EXPORT_SYMBOL(fscrypt_d_revalidate); diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index 721838483d84..f78cc4dfb452 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -12,13 +12,16 @@ #define _FSCRYPT_PRIVATE_H #include +#include #include +#include #define CONST_STRLEN(str) (sizeof(str) - 1) #define FS_KEY_DERIVATION_NONCE_SIZE 16 #define FSCRYPT_MIN_KEY_SIZE 16 +#define FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE 128 #define FSCRYPT_CONTEXT_V1 1 #define FSCRYPT_CONTEXT_V2 2 @@ -136,12 +139,6 @@ fscrypt_policy_flags(const union fscrypt_policy *policy) BUG(); } -static inline bool -fscrypt_is_direct_key_policy(const union fscrypt_policy *policy) -{ - return fscrypt_policy_flags(policy) & FSCRYPT_POLICY_FLAG_DIRECT_KEY; -} - /** * For encrypted symlinks, the ciphertext length is stored at the beginning * of the string in little-endian format. @@ -151,6 +148,20 @@ struct fscrypt_symlink_data { char encrypted_path[1]; } __packed; +/** + * struct fscrypt_prepared_key - a key prepared for actual encryption/decryption + * @tfm: crypto API transform object + * @blk_key: key for blk-crypto + * + * Normally only one of the fields will be non-NULL. + */ +struct fscrypt_prepared_key { + struct crypto_skcipher *tfm; +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT + struct fscrypt_blk_crypto_key *blk_key; +#endif +}; + /* * fscrypt_info - the "encryption key" for an inode * @@ -160,14 +171,19 @@ struct fscrypt_symlink_data { */ struct fscrypt_info { - /* The actual crypto transform used for encryption and decryption */ - struct crypto_skcipher *ci_ctfm; + /* The key in a form prepared for actual encryption/decryption */ + struct fscrypt_prepared_key ci_key; + /* True if the key should be freed when this fscrypt_info is freed */ + bool ci_owns_key; + +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT /* - * Cipher for ESSIV IV generation. Only set for CBC contents - * encryption, otherwise is NULL. + * True if this inode will use inline encryption (blk-crypto) instead of + * the traditional filesystem-layer encryption. */ - struct crypto_cipher *ci_essiv_tfm; + bool ci_inlinecrypt; +#endif /* * Encryption mode used for this inode. It corresponds to either the @@ -193,10 +209,18 @@ struct fscrypt_info { /* * If non-NULL, then encryption is done using the master key directly - * and ci_ctfm will equal ci_direct_key->dk_ctfm. + * and ci_key will equal ci_direct_key->dk_key. */ struct fscrypt_direct_key *ci_direct_key; + /* + * This inode's hash key for filenames. This is a 128-bit SipHash-2-4 + * key. This is only set for directories that use a keyed dirhash over + * the plaintext filenames -- currently just casefolded directories. + */ + siphash_key_t ci_dirhash_key; + bool ci_dirhash_key_initialized; + /* The encryption policy used by this inode */ union fscrypt_policy ci_policy; @@ -209,25 +233,6 @@ typedef enum { FS_ENCRYPT, } fscrypt_direction_t; -#define FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001 - -static inline bool fscrypt_valid_enc_modes(u32 contents_mode, - u32 filenames_mode) -{ - if (contents_mode == FSCRYPT_MODE_AES_128_CBC && - filenames_mode == FSCRYPT_MODE_AES_128_CTS) - return true; - - if (contents_mode == FSCRYPT_MODE_AES_256_XTS && - filenames_mode == FSCRYPT_MODE_AES_256_CTS) - return true; - - if (contents_mode == FSCRYPT_MODE_ADIANTUM && - filenames_mode == FSCRYPT_MODE_ADIANTUM) - return true; - return false; -} - /* crypto.c */ extern struct kmem_cache *fscrypt_info_cachep; extern int fscrypt_initialize(unsigned int cop_flags); @@ -237,7 +242,6 @@ extern int fscrypt_crypt_block(const struct inode *inode, unsigned int len, unsigned int offs, gfp_t gfp_flags); extern struct page *fscrypt_alloc_bounce_page(gfp_t gfp_flags); -extern const struct dentry_operations fscrypt_d_ops; extern void __printf(3, 4) __cold fscrypt_msg(const struct inode *inode, const char *level, const char *fmt, ...); @@ -258,14 +262,16 @@ union fscrypt_iv { u8 nonce[FS_KEY_DERIVATION_NONCE_SIZE]; }; u8 raw[FSCRYPT_MAX_IV_SIZE]; + __le64 dun[FSCRYPT_MAX_IV_SIZE / sizeof(__le64)]; }; void fscrypt_generate_iv(union fscrypt_iv *iv, u64 lblk_num, const struct fscrypt_info *ci); /* fname.c */ -extern int fname_encrypt(struct inode *inode, const struct qstr *iname, - u8 *out, unsigned int olen); +extern int fscrypt_fname_encrypt(const struct inode *inode, + const struct qstr *iname, + u8 *out, unsigned int olen); extern bool fscrypt_fname_encrypted_size(const struct inode *inode, u32 orig_len, u32 max_len, u32 *encrypted_len_ret); @@ -287,15 +293,107 @@ extern int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, * output doesn't reveal another. */ #define HKDF_CONTEXT_KEY_IDENTIFIER 1 -#define HKDF_CONTEXT_PER_FILE_KEY 2 -#define HKDF_CONTEXT_PER_MODE_KEY 3 +#define HKDF_CONTEXT_PER_FILE_ENC_KEY 2 +#define HKDF_CONTEXT_DIRECT_KEY 3 +#define HKDF_CONTEXT_IV_INO_LBLK_64_KEY 4 +#define HKDF_CONTEXT_DIRHASH_KEY 5 -extern int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context, +extern int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, u8 *okm, unsigned int okmlen); extern void fscrypt_destroy_hkdf(struct fscrypt_hkdf *hkdf); +/* inline_crypt.c */ +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT +extern void fscrypt_select_encryption_impl(struct fscrypt_info *ci); + +static inline bool +fscrypt_using_inline_encryption(const struct fscrypt_info *ci) +{ + return ci->ci_inlinecrypt; +} + +extern int fscrypt_prepare_inline_crypt_key( + struct fscrypt_prepared_key *prep_key, + const u8 *raw_key, + unsigned int raw_key_size, + bool is_hw_wrapped, + const struct fscrypt_info *ci); + +extern void fscrypt_destroy_inline_crypt_key( + struct fscrypt_prepared_key *prep_key); + +extern int fscrypt_derive_raw_secret(struct super_block *sb, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *raw_secret, + unsigned int raw_secret_size); + +/* + * Check whether the crypto transform or blk-crypto key has been allocated in + * @prep_key, depending on which encryption implementation the file will use. + */ +static inline bool +fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, + const struct fscrypt_info *ci) +{ + /* + * The READ_ONCE() here pairs with the smp_store_release() in + * fscrypt_prepare_key(). (This only matters for the per-mode keys, + * which are shared by multiple inodes.) + */ + if (fscrypt_using_inline_encryption(ci)) + return READ_ONCE(prep_key->blk_key) != NULL; + return READ_ONCE(prep_key->tfm) != NULL; +} + +#else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ + +static inline void fscrypt_select_encryption_impl(struct fscrypt_info *ci) +{ +} + +static inline bool fscrypt_using_inline_encryption( + const struct fscrypt_info *ci) +{ + return false; +} + +static inline int +fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, + const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, + const struct fscrypt_info *ci) +{ + WARN_ON(1); + return -EOPNOTSUPP; +} + +static inline void +fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key) +{ +} + +static inline int fscrypt_derive_raw_secret(struct super_block *sb, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *raw_secret, + unsigned int raw_secret_size) +{ + fscrypt_warn(NULL, + "kernel built without support for hardware-wrapped keys"); + return -EOPNOTSUPP; +} + +static inline bool +fscrypt_is_key_prepared(struct fscrypt_prepared_key *prep_key, + const struct fscrypt_info *ci) +{ + return READ_ONCE(prep_key->tfm) != NULL; +} +#endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ + /* keyring.c */ /* @@ -312,8 +410,15 @@ struct fscrypt_master_key_secret { /* Size of the raw key in bytes. Set even if ->raw isn't set. */ u32 size; - /* For v1 policy keys: the raw key. Wiped for v2 policy keys. */ - u8 raw[FSCRYPT_MAX_KEY_SIZE]; + /* True if the key in ->raw is a hardware-wrapped key. */ + bool is_hw_wrapped; + + /* + * For v1 policy keys: the raw key. Wiped for v2 policy keys, unless + * ->is_hw_wrapped is true, in which case this contains the wrapped key + * rather than the key with which 'hkdf' was keyed. + */ + u8 raw[FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE]; } __randomize_layout; @@ -385,8 +490,11 @@ struct fscrypt_master_key { struct list_head mk_decrypted_inodes; spinlock_t mk_decrypted_inodes_lock; - /* Per-mode tfms for DIRECT_KEY policies, allocated on-demand */ - struct crypto_skcipher *mk_mode_keys[__FSCRYPT_MODE_MAX + 1]; + /* Per-mode keys for DIRECT_KEY policies, allocated on-demand */ + struct fscrypt_prepared_key mk_direct_keys[__FSCRYPT_MODE_MAX + 1]; + + /* Per-mode keys for IV_INO_LBLK_64 policies, allocated on-demand */ + struct fscrypt_prepared_key mk_iv_ino_lblk_64_keys[__FSCRYPT_MODE_MAX + 1]; } __randomize_layout; @@ -442,22 +550,24 @@ struct fscrypt_mode { const char *cipher_str; int keysize; int ivsize; - bool logged_impl_name; - bool needs_essiv; + int logged_impl_name; + enum blk_crypto_mode_num blk_crypto_mode; }; -static inline bool -fscrypt_mode_supports_direct_key(const struct fscrypt_mode *mode) -{ - return mode->ivsize >= offsetofend(union fscrypt_iv, nonce); -} +extern struct fscrypt_mode fscrypt_modes[]; -extern struct crypto_skcipher * -fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, - const struct inode *inode); +extern int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, + const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, + const struct fscrypt_info *ci); -extern int fscrypt_set_derived_key(struct fscrypt_info *ci, - const u8 *derived_key); +extern void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key); + +extern int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, + const u8 *raw_key); + +extern int fscrypt_derive_dirhash_key(struct fscrypt_info *ci, + const struct fscrypt_master_key *mk); /* keysetup_v1.c */ diff --git a/fs/crypto/hkdf.c b/fs/crypto/hkdf.c index 2c026009c6e7..fd7f67628561 100644 --- a/fs/crypto/hkdf.c +++ b/fs/crypto/hkdf.c @@ -113,7 +113,7 @@ out: * adds to its application-specific info strings to guarantee that it doesn't * accidentally repeat an info string when using HKDF for different purposes.) */ -int fscrypt_hkdf_expand(struct fscrypt_hkdf *hkdf, u8 context, +int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, const u8 *info, unsigned int infolen, u8 *okm, unsigned int okmlen) { diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c index 30b1ca661249..a6396bf721ac 100644 --- a/fs/crypto/hooks.c +++ b/fs/crypto/hooks.c @@ -4,6 +4,8 @@ * Encryption hooks for higher-level filesystem operations. */ +#include + #include "fscrypt_private.h" /** @@ -115,12 +117,53 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_ENCRYPTED_NAME; spin_unlock(&dentry->d_lock); - d_set_d_op(dentry, &fscrypt_d_ops); } return err; } EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); +/** + * fscrypt_prepare_setflags() - prepare to change flags with FS_IOC_SETFLAGS + * @inode: the inode on which flags are being changed + * @oldflags: the old flags + * @flags: the new flags + * + * The caller should be holding i_rwsem for write. + * + * Return: 0 on success; -errno if the flags change isn't allowed or if + * another error occurs. + */ +int fscrypt_prepare_setflags(struct inode *inode, + unsigned int oldflags, unsigned int flags) +{ + struct fscrypt_info *ci; + struct fscrypt_master_key *mk; + int err; + + /* + * When the CASEFOLD flag is set on an encrypted directory, we must + * derive the secret key needed for the dirhash. This is only possible + * if the directory uses a v2 encryption policy. + */ + if (IS_ENCRYPTED(inode) && (flags & ~oldflags & FS_CASEFOLD_FL)) { + err = fscrypt_require_key(inode); + if (err) + return err; + ci = inode->i_crypt_info; + if (ci->ci_policy.version != FSCRYPT_POLICY_V2) + return -EINVAL; + mk = ci->ci_master_key->payload.data[0]; + down_read(&mk->mk_secret_sem); + if (is_master_key_secret_present(&mk->mk_secret)) + err = fscrypt_derive_dirhash_key(ci, mk); + else + err = -ENOKEY; + up_read(&mk->mk_secret_sem); + return err; + } + return 0; +} + int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link) @@ -187,7 +230,8 @@ int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, ciphertext_len = disk_link->len - sizeof(*sd); sd->len = cpu_to_le16(ciphertext_len); - err = fname_encrypt(inode, &iname, sd->encrypted_path, ciphertext_len); + err = fscrypt_fname_encrypt(inode, &iname, sd->encrypted_path, + ciphertext_len); if (err) goto err_free_sd; diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c new file mode 100644 index 000000000000..cd901697d62a --- /dev/null +++ b/fs/crypto/inline_crypt.c @@ -0,0 +1,354 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Inline encryption support for fscrypt + * + * Copyright 2019 Google LLC + */ + +/* + * With "inline encryption", the block layer handles the decryption/encryption + * as part of the bio, instead of the filesystem doing the crypto itself via + * crypto API. See Documentation/block/inline-encryption.rst. fscrypt still + * provides the key and IV to use. + */ + +#include +#include +#include +#include +#include + +#include "fscrypt_private.h" + +struct fscrypt_blk_crypto_key { + struct blk_crypto_key base; + int num_devs; + struct request_queue *devs[]; +}; + +/* Enable inline encryption for this file if supported. */ +void fscrypt_select_encryption_impl(struct fscrypt_info *ci) +{ + const struct inode *inode = ci->ci_inode; + struct super_block *sb = inode->i_sb; + + /* The file must need contents encryption, not filenames encryption */ + if (!S_ISREG(inode->i_mode)) + return; + + /* blk-crypto must implement the needed encryption algorithm */ + if (ci->ci_mode->blk_crypto_mode == BLK_ENCRYPTION_MODE_INVALID) + return; + + /* The filesystem must be mounted with -o inlinecrypt */ + if (!sb->s_cop->inline_crypt_enabled || + !sb->s_cop->inline_crypt_enabled(sb)) + return; + + ci->ci_inlinecrypt = true; +} + +int fscrypt_prepare_inline_crypt_key(struct fscrypt_prepared_key *prep_key, + const u8 *raw_key, + unsigned int raw_key_size, + bool is_hw_wrapped, + const struct fscrypt_info *ci) +{ + const struct inode *inode = ci->ci_inode; + struct super_block *sb = inode->i_sb; + enum blk_crypto_mode_num crypto_mode = ci->ci_mode->blk_crypto_mode; + int num_devs = 1; + int queue_refs = 0; + struct fscrypt_blk_crypto_key *blk_key; + int err; + int i; + + if (sb->s_cop->get_num_devices) + num_devs = sb->s_cop->get_num_devices(sb); + if (WARN_ON(num_devs < 1)) + return -EINVAL; + + blk_key = kzalloc(struct_size(blk_key, devs, num_devs), GFP_NOFS); + if (!blk_key) + return -ENOMEM; + + blk_key->num_devs = num_devs; + if (num_devs == 1) + blk_key->devs[0] = bdev_get_queue(sb->s_bdev); + else + sb->s_cop->get_devices(sb, blk_key->devs); + + BUILD_BUG_ON(FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE > + BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE); + + err = blk_crypto_init_key(&blk_key->base, raw_key, raw_key_size, + is_hw_wrapped, crypto_mode, sb->s_blocksize); + if (err) { + fscrypt_err(inode, "error %d initializing blk-crypto key", err); + goto fail; + } + + /* + * We have to start using blk-crypto on all the filesystem's devices. + * We also have to save all the request_queue's for later so that the + * key can be evicted from them. This is needed because some keys + * aren't destroyed until after the filesystem was already unmounted + * (namely, the per-mode keys in struct fscrypt_master_key). + */ + for (i = 0; i < num_devs; i++) { + if (!blk_get_queue(blk_key->devs[i])) { + fscrypt_err(inode, "couldn't get request_queue"); + err = -EAGAIN; + goto fail; + } + queue_refs++; + + err = blk_crypto_start_using_mode(crypto_mode, sb->s_blocksize, + blk_key->devs[i]); + if (err) { + fscrypt_err(inode, + "error %d starting to use blk-crypto", err); + goto fail; + } + } + /* + * Pairs with READ_ONCE() in fscrypt_is_key_prepared(). (Only matters + * for the per-mode keys, which are shared by multiple inodes.) + */ + smp_store_release(&prep_key->blk_key, blk_key); + return 0; + +fail: + for (i = 0; i < queue_refs; i++) + blk_put_queue(blk_key->devs[i]); + kzfree(blk_key); + return err; +} + +void fscrypt_destroy_inline_crypt_key(struct fscrypt_prepared_key *prep_key) +{ + struct fscrypt_blk_crypto_key *blk_key = prep_key->blk_key; + int i; + + if (blk_key) { + for (i = 0; i < blk_key->num_devs; i++) { + blk_crypto_evict_key(blk_key->devs[i], &blk_key->base); + blk_put_queue(blk_key->devs[i]); + } + kzfree(blk_key); + } +} + +int fscrypt_derive_raw_secret(struct super_block *sb, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *raw_secret, unsigned int raw_secret_size) +{ + struct request_queue *q; + + q = sb->s_bdev->bd_queue; + if (!q->ksm) + return -EOPNOTSUPP; + + return keyslot_manager_derive_raw_secret(q->ksm, + wrapped_key, wrapped_key_size, + raw_secret, raw_secret_size); +} + +/** + * fscrypt_inode_uses_inline_crypto - test whether an inode uses inline + * encryption + * @inode: an inode + * + * Return: true if the inode requires file contents encryption and if the + * encryption should be done in the block layer via blk-crypto rather + * than in the filesystem layer. + */ +bool fscrypt_inode_uses_inline_crypto(const struct inode *inode) +{ + return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && + inode->i_crypt_info->ci_inlinecrypt; +} +EXPORT_SYMBOL_GPL(fscrypt_inode_uses_inline_crypto); + +/** + * fscrypt_inode_uses_fs_layer_crypto - test whether an inode uses fs-layer + * encryption + * @inode: an inode + * + * Return: true if the inode requires file contents encryption and if the + * encryption should be done in the filesystem layer rather than in the + * block layer via blk-crypto. + */ +bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) +{ + return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && + !inode->i_crypt_info->ci_inlinecrypt; +} +EXPORT_SYMBOL_GPL(fscrypt_inode_uses_fs_layer_crypto); + +static void fscrypt_generate_dun(const struct fscrypt_info *ci, u64 lblk_num, + u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE]) +{ + union fscrypt_iv iv; + int i; + + fscrypt_generate_iv(&iv, lblk_num, ci); + + BUILD_BUG_ON(FSCRYPT_MAX_IV_SIZE > BLK_CRYPTO_MAX_IV_SIZE); + memset(dun, 0, BLK_CRYPTO_MAX_IV_SIZE); + for (i = 0; i < ci->ci_mode->ivsize/sizeof(dun[0]); i++) + dun[i] = le64_to_cpu(iv.dun[i]); +} + +/** + * fscrypt_set_bio_crypt_ctx - prepare a file contents bio for inline encryption + * @bio: a bio which will eventually be submitted to the file + * @inode: the file's inode + * @first_lblk: the first file logical block number in the I/O + * @gfp_mask: memory allocation flags - these must be a waiting mask so that + * bio_crypt_set_ctx can't fail. + * + * If the contents of the file should be encrypted (or decrypted) with inline + * encryption, then assign the appropriate encryption context to the bio. + * + * Normally the bio should be newly allocated (i.e. no pages added yet), as + * otherwise fscrypt_mergeable_bio() won't work as intended. + * + * The encryption context will be freed automatically when the bio is freed. + * + * This function also handles setting bi_skip_dm_default_key when needed. + */ +void fscrypt_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, + u64 first_lblk, gfp_t gfp_mask) +{ + const struct fscrypt_info *ci = inode->i_crypt_info; + u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; + + if (fscrypt_inode_should_skip_dm_default_key(inode)) + bio_set_skip_dm_default_key(bio); + + if (!fscrypt_inode_uses_inline_crypto(inode)) + return; + + fscrypt_generate_dun(ci, first_lblk, dun); + bio_crypt_set_ctx(bio, &ci->ci_key.blk_key->base, dun, gfp_mask); +} +EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx); + +/* Extract the inode and logical block number from a buffer_head. */ +static bool bh_get_inode_and_lblk_num(const struct buffer_head *bh, + const struct inode **inode_ret, + u64 *lblk_num_ret) +{ + struct page *page = bh->b_page; + const struct address_space *mapping; + const struct inode *inode; + + /* + * The ext4 journal (jbd2) can submit a buffer_head it directly created + * for a non-pagecache page. fscrypt doesn't care about these. + */ + mapping = page_mapping(page); + if (!mapping) + return false; + inode = mapping->host; + + *inode_ret = inode; + *lblk_num_ret = ((u64)page->index << (PAGE_SHIFT - inode->i_blkbits)) + + (bh_offset(bh) >> inode->i_blkbits); + return true; +} + +/** + * fscrypt_set_bio_crypt_ctx_bh - prepare a file contents bio for inline + * encryption + * @bio: a bio which will eventually be submitted to the file + * @first_bh: the first buffer_head for which I/O will be submitted + * @gfp_mask: memory allocation flags + * + * Same as fscrypt_set_bio_crypt_ctx(), except this takes a buffer_head instead + * of an inode and block number directly. + */ +void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio, + const struct buffer_head *first_bh, + gfp_t gfp_mask) +{ + const struct inode *inode; + u64 first_lblk; + + if (bh_get_inode_and_lblk_num(first_bh, &inode, &first_lblk)) + fscrypt_set_bio_crypt_ctx(bio, inode, first_lblk, gfp_mask); +} +EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx_bh); + +/** + * fscrypt_mergeable_bio - test whether data can be added to a bio + * @bio: the bio being built up + * @inode: the inode for the next part of the I/O + * @next_lblk: the next file logical block number in the I/O + * + * When building a bio which may contain data which should undergo inline + * encryption (or decryption) via fscrypt, filesystems should call this function + * to ensure that the resulting bio contains only logically contiguous data. + * This will return false if the next part of the I/O cannot be merged with the + * bio because either the encryption key would be different or the encryption + * data unit numbers would be discontiguous. + * + * fscrypt_set_bio_crypt_ctx() must have already been called on the bio. + * + * This function also returns false if the next part of the I/O would need to + * have a different value for the bi_skip_dm_default_key flag. + * + * Return: true iff the I/O is mergeable + */ +bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, + u64 next_lblk) +{ + const struct bio_crypt_ctx *bc = bio->bi_crypt_context; + u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; + + if (!!bc != fscrypt_inode_uses_inline_crypto(inode)) + return false; + if (bio_should_skip_dm_default_key(bio) != + fscrypt_inode_should_skip_dm_default_key(inode)) + return false; + if (!bc) + return true; + + /* + * Comparing the key pointers is good enough, as all I/O for each key + * uses the same pointer. I.e., there's currently no need to support + * merging requests where the keys are the same but the pointers differ. + */ + if (bc->bc_key != &inode->i_crypt_info->ci_key.blk_key->base) + return false; + + fscrypt_generate_dun(inode->i_crypt_info, next_lblk, next_dun); + return bio_crypt_dun_is_contiguous(bc, bio->bi_iter.bi_size, next_dun); +} +EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio); + +/** + * fscrypt_mergeable_bio_bh - test whether data can be added to a bio + * @bio: the bio being built up + * @next_bh: the next buffer_head for which I/O will be submitted + * + * Same as fscrypt_mergeable_bio(), except this takes a buffer_head instead of + * an inode and block number directly. + * + * Return: true iff the I/O is mergeable + */ +bool fscrypt_mergeable_bio_bh(struct bio *bio, + const struct buffer_head *next_bh) +{ + const struct inode *inode; + u64 next_lblk; + + if (!bh_get_inode_and_lblk_num(next_bh, &inode, &next_lblk)) + return !bio->bi_crypt_context && + !bio_should_skip_dm_default_key(bio); + + return fscrypt_mergeable_bio(bio, inode, next_lblk); +} +EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh); diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c index 7f43ca5d30ae..0081fd48e96f 100644 --- a/fs/crypto/keyring.c +++ b/fs/crypto/keyring.c @@ -43,8 +43,10 @@ static void free_master_key(struct fscrypt_master_key *mk) wipe_master_key_secret(&mk->mk_secret); - for (i = 0; i < ARRAY_SIZE(mk->mk_mode_keys); i++) - crypto_free_skcipher(mk->mk_mode_keys[i]); + for (i = 0; i <= __FSCRYPT_MODE_MAX; i++) { + fscrypt_destroy_prepared_key(&mk->mk_direct_keys[i]); + fscrypt_destroy_prepared_key(&mk->mk_iv_ino_lblk_64_keys[i]); + } key_put(mk->mk_users); kzfree(mk); @@ -463,6 +465,114 @@ out_unlock: return err; } +static int fscrypt_provisioning_key_preparse(struct key_preparsed_payload *prep) +{ + const struct fscrypt_provisioning_key_payload *payload = prep->data; + + BUILD_BUG_ON(FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE < FSCRYPT_MAX_KEY_SIZE); + + if (prep->datalen < sizeof(*payload) + FSCRYPT_MIN_KEY_SIZE || + prep->datalen > sizeof(*payload) + FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE) + return -EINVAL; + + if (payload->type != FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR && + payload->type != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) + return -EINVAL; + + if (payload->__reserved) + return -EINVAL; + + prep->payload.data[0] = kmemdup(payload, prep->datalen, GFP_KERNEL); + if (!prep->payload.data[0]) + return -ENOMEM; + + prep->quotalen = prep->datalen; + return 0; +} + +static void fscrypt_provisioning_key_free_preparse( + struct key_preparsed_payload *prep) +{ + kzfree(prep->payload.data[0]); +} + +static void fscrypt_provisioning_key_describe(const struct key *key, + struct seq_file *m) +{ + seq_puts(m, key->description); + if (key_is_positive(key)) { + const struct fscrypt_provisioning_key_payload *payload = + key->payload.data[0]; + + seq_printf(m, ": %u [%u]", key->datalen, payload->type); + } +} + +static void fscrypt_provisioning_key_destroy(struct key *key) +{ + kzfree(key->payload.data[0]); +} + +static struct key_type key_type_fscrypt_provisioning = { + .name = "fscrypt-provisioning", + .preparse = fscrypt_provisioning_key_preparse, + .free_preparse = fscrypt_provisioning_key_free_preparse, + .instantiate = generic_key_instantiate, + .describe = fscrypt_provisioning_key_describe, + .destroy = fscrypt_provisioning_key_destroy, +}; + +/* + * Retrieve the raw key from the Linux keyring key specified by 'key_id', and + * store it into 'secret'. + * + * The key must be of type "fscrypt-provisioning" and must have the field + * fscrypt_provisioning_key_payload::type set to 'type', indicating that it's + * only usable with fscrypt with the particular KDF version identified by + * 'type'. We don't use the "logon" key type because there's no way to + * completely restrict the use of such keys; they can be used by any kernel API + * that accepts "logon" keys and doesn't require a specific service prefix. + * + * The ability to specify the key via Linux keyring key is intended for cases + * where userspace needs to re-add keys after the filesystem is unmounted and + * re-mounted. Most users should just provide the raw key directly instead. + */ +static int get_keyring_key(u32 key_id, u32 type, + struct fscrypt_master_key_secret *secret) +{ + key_ref_t ref; + struct key *key; + const struct fscrypt_provisioning_key_payload *payload; + int err; + + ref = lookup_user_key(key_id, 0, KEY_NEED_SEARCH); + if (IS_ERR(ref)) + return PTR_ERR(ref); + key = key_ref_to_ptr(ref); + + if (key->type != &key_type_fscrypt_provisioning) + goto bad_key; + payload = key->payload.data[0]; + + /* Don't allow fscrypt v1 keys to be used as v2 keys and vice versa. */ + if (payload->type != type) + goto bad_key; + + secret->size = key->datalen - sizeof(*payload); + memcpy(secret->raw, payload->raw, secret->size); + err = 0; + goto out_put; + +bad_key: + err = -EKEYREJECTED; +out_put: + key_ref_put(ref); + return err; +} + +/* Size of software "secret" derived from hardware-wrapped key */ +#define RAW_SECRET_SIZE 32 + /* * Add a master encryption key to the filesystem, causing all files which were * encrypted with it to appear "unlocked" (decrypted) when accessed. @@ -493,6 +603,9 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg) struct fscrypt_add_key_arg __user *uarg = _uarg; struct fscrypt_add_key_arg arg; struct fscrypt_master_key_secret secret; + u8 _kdf_key[RAW_SECRET_SIZE]; + u8 *kdf_key; + unsigned int kdf_key_size; int err; if (copy_from_user(&arg, uarg, sizeof(arg))) @@ -501,18 +614,31 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg) if (!valid_key_spec(&arg.key_spec)) return -EINVAL; - if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE || - arg.raw_size > FSCRYPT_MAX_KEY_SIZE) - return -EINVAL; - if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved))) return -EINVAL; memset(&secret, 0, sizeof(secret)); - secret.size = arg.raw_size; - err = -EFAULT; - if (copy_from_user(secret.raw, uarg->raw, secret.size)) - goto out_wipe_secret; + if (arg.key_id) { + if (arg.raw_size != 0) + return -EINVAL; + err = get_keyring_key(arg.key_id, arg.key_spec.type, &secret); + if (err) + goto out_wipe_secret; + err = -EINVAL; + if (!(arg.__flags & __FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED) && + secret.size > FSCRYPT_MAX_KEY_SIZE) + goto out_wipe_secret; + } else { + if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE || + arg.raw_size > + ((arg.__flags & __FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED) ? + FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE : FSCRYPT_MAX_KEY_SIZE)) + return -EINVAL; + secret.size = arg.raw_size; + err = -EFAULT; + if (copy_from_user(secret.raw, uarg->raw, secret.size)) + goto out_wipe_secret; + } switch (arg.key_spec.type) { case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR: @@ -524,18 +650,37 @@ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg) err = -EACCES; if (!capable(CAP_SYS_ADMIN)) goto out_wipe_secret; + + err = -EINVAL; + if (arg.__flags) + goto out_wipe_secret; break; case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER: - err = fscrypt_init_hkdf(&secret.hkdf, secret.raw, secret.size); + err = -EINVAL; + if (arg.__flags & ~__FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED) + goto out_wipe_secret; + if (arg.__flags & __FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED) { + kdf_key = _kdf_key; + kdf_key_size = RAW_SECRET_SIZE; + err = fscrypt_derive_raw_secret(sb, secret.raw, + secret.size, + kdf_key, kdf_key_size); + if (err) + goto out_wipe_secret; + secret.is_hw_wrapped = true; + } else { + kdf_key = secret.raw; + kdf_key_size = secret.size; + } + err = fscrypt_init_hkdf(&secret.hkdf, kdf_key, kdf_key_size); + /* + * Now that the HKDF context is initialized, the raw HKDF + * key is no longer needed. + */ + memzero_explicit(kdf_key, kdf_key_size); if (err) goto out_wipe_secret; - /* - * Now that the HKDF context is initialized, the raw key is no - * longer needed. - */ - memzero_explicit(secret.raw, secret.size); - /* Calculate the key identifier and return it to userspace. */ err = fscrypt_hkdf_expand(&secret.hkdf, HKDF_CONTEXT_KEY_IDENTIFIER, @@ -664,9 +809,6 @@ static int check_for_busy_inodes(struct super_block *sb, struct list_head *pos; size_t busy_count = 0; unsigned long ino; - struct dentry *dentry; - char _path[256]; - char *path = NULL; spin_lock(&mk->mk_decrypted_inodes_lock); @@ -685,31 +827,45 @@ static int check_for_busy_inodes(struct super_block *sb, struct fscrypt_info, ci_master_key_link)->ci_inode; ino = inode->i_ino; - dentry = d_find_alias(inode); } spin_unlock(&mk->mk_decrypted_inodes_lock); - if (dentry) { - path = dentry_path(dentry, _path, sizeof(_path)); - dput(dentry); - } - if (IS_ERR_OR_NULL(path)) - path = "(unknown)"; - fscrypt_warn(NULL, - "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu (%s)", + "%s: %zu inode(s) still busy after removing key with %s %*phN, including ino %lu", sb->s_id, busy_count, master_key_spec_type(&mk->mk_spec), master_key_spec_len(&mk->mk_spec), (u8 *)&mk->mk_spec.u, - ino, path); + ino); return -EBUSY; } +static BLOCKING_NOTIFIER_HEAD(fscrypt_key_removal_notifiers); + +/* + * Register a function to be executed when the FS_IOC_REMOVE_ENCRYPTION_KEY + * ioctl has removed a key and is about to try evicting inodes. + */ +int fscrypt_register_key_removal_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&fscrypt_key_removal_notifiers, + nb); +} +EXPORT_SYMBOL_GPL(fscrypt_register_key_removal_notifier); + +int fscrypt_unregister_key_removal_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&fscrypt_key_removal_notifiers, + nb); +} +EXPORT_SYMBOL_GPL(fscrypt_unregister_key_removal_notifier); + static int try_to_lock_encrypted_files(struct super_block *sb, struct fscrypt_master_key *mk) { int err1; int err2; + blocking_notifier_call_chain(&fscrypt_key_removal_notifiers, 0, NULL); + /* * An inode can't be evicted while it is dirty or has dirty pages. * Thus, we first have to clean the inodes in ->mk_decrypted_inodes. @@ -976,8 +1132,14 @@ int __init fscrypt_init_keyring(void) if (err) goto err_unregister_fscrypt; + err = register_key_type(&key_type_fscrypt_provisioning); + if (err) + goto err_unregister_fscrypt_user; + return 0; +err_unregister_fscrypt_user: + unregister_key_type(&key_type_fscrypt_user); err_unregister_fscrypt: unregister_key_type(&key_type_fscrypt); return err; diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index e3ba6b45c06c..f4c6d8cb4587 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -8,21 +8,18 @@ * Heavily modified since then. */ -#include -#include #include #include #include "fscrypt_private.h" -static struct crypto_shash *essiv_hash_tfm; - -static struct fscrypt_mode available_modes[] = { +struct fscrypt_mode fscrypt_modes[] = { [FSCRYPT_MODE_AES_256_XTS] = { .friendly_name = "AES-256-XTS", .cipher_str = "xts(aes)", .keysize = 64, .ivsize = 16, + .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_256_XTS, }, [FSCRYPT_MODE_AES_256_CTS] = { .friendly_name = "AES-256-CTS-CBC", @@ -31,11 +28,11 @@ static struct fscrypt_mode available_modes[] = { .ivsize = 16, }, [FSCRYPT_MODE_AES_128_CBC] = { - .friendly_name = "AES-128-CBC", - .cipher_str = "cbc(aes)", + .friendly_name = "AES-128-CBC-ESSIV", + .cipher_str = "essiv(cbc(aes),sha256)", .keysize = 16, .ivsize = 16, - .needs_essiv = true, + .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV, }, [FSCRYPT_MODE_AES_128_CTS] = { .friendly_name = "AES-128-CTS-CBC", @@ -48,6 +45,7 @@ static struct fscrypt_mode available_modes[] = { .cipher_str = "adiantum(xchacha12,aes)", .keysize = 32, .ivsize = 32, + .blk_crypto_mode = BLK_ENCRYPTION_MODE_ADIANTUM, }, }; @@ -56,10 +54,10 @@ select_encryption_mode(const union fscrypt_policy *policy, const struct inode *inode) { if (S_ISREG(inode->i_mode)) - return &available_modes[fscrypt_policy_contents_mode(policy)]; + return &fscrypt_modes[fscrypt_policy_contents_mode(policy)]; if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - return &available_modes[fscrypt_policy_fnames_mode(policy)]; + return &fscrypt_modes[fscrypt_policy_fnames_mode(policy)]; WARN_ONCE(1, "fscrypt: filesystem tried to load encryption info for inode %lu, which is not encryptable (file type %d)\n", inode->i_ino, (inode->i_mode & S_IFMT)); @@ -67,9 +65,9 @@ select_encryption_mode(const union fscrypt_policy *policy, } /* Create a symmetric cipher object for the given encryption mode and key */ -struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode, - const u8 *raw_key, - const struct inode *inode) +static struct crypto_skcipher * +fscrypt_allocate_skcipher(struct fscrypt_mode *mode, const u8 *raw_key, + const struct inode *inode) { struct crypto_skcipher *tfm; int err; @@ -86,18 +84,19 @@ struct crypto_skcipher *fscrypt_allocate_skcipher(struct fscrypt_mode *mode, mode->cipher_str, PTR_ERR(tfm)); return tfm; } - if (unlikely(!mode->logged_impl_name)) { + if (!xchg(&mode->logged_impl_name, 1)) { /* * fscrypt performance can vary greatly depending on which * crypto algorithm implementation is used. Help people debug * performance problems by logging the ->cra_driver_name the - * first time a mode is used. Note that multiple threads can - * race here, but it doesn't really matter. + * first time a mode is used. */ - mode->logged_impl_name = true; pr_info("fscrypt: %s using implementation \"%s\"\n", - mode->friendly_name, - crypto_skcipher_alg(tfm)->base.cra_driver_name); + mode->friendly_name, crypto_skcipher_driver_name(tfm)); + } + if (WARN_ON(crypto_skcipher_ivsize(tfm) != mode->ivsize)) { + err = -EINVAL; + goto err_free_tfm; } crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY); err = crypto_skcipher_setkey(tfm, raw_key, mode->keysize); @@ -111,175 +110,204 @@ err_free_tfm: return ERR_PTR(err); } -static int derive_essiv_salt(const u8 *key, int keysize, u8 *salt) +/* + * Prepare the crypto transform object or blk-crypto key in @prep_key, given the + * raw key, encryption mode, and flag indicating which encryption implementation + * (fs-layer or blk-crypto) will be used. + */ +int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, + const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, const struct fscrypt_info *ci) { - struct crypto_shash *tfm = READ_ONCE(essiv_hash_tfm); + struct crypto_skcipher *tfm; - /* init hash transform on demand */ - if (unlikely(!tfm)) { - struct crypto_shash *prev_tfm; + if (fscrypt_using_inline_encryption(ci)) + return fscrypt_prepare_inline_crypt_key(prep_key, + raw_key, raw_key_size, is_hw_wrapped, ci); - tfm = crypto_alloc_shash("sha256", 0, 0); - if (IS_ERR(tfm)) { - if (PTR_ERR(tfm) == -ENOENT) { - fscrypt_warn(NULL, - "Missing crypto API support for SHA-256"); - return -ENOPKG; - } - fscrypt_err(NULL, - "Error allocating SHA-256 transform: %ld", - PTR_ERR(tfm)); - return PTR_ERR(tfm); - } - prev_tfm = cmpxchg(&essiv_hash_tfm, NULL, tfm); - if (prev_tfm) { - crypto_free_shash(tfm); - tfm = prev_tfm; - } - } - - { - SHASH_DESC_ON_STACK(desc, tfm); - desc->tfm = tfm; - desc->flags = 0; - - return crypto_shash_digest(desc, key, keysize, salt); - } -} - -static int init_essiv_generator(struct fscrypt_info *ci, const u8 *raw_key, - int keysize) -{ - int err; - struct crypto_cipher *essiv_tfm; - u8 salt[SHA256_DIGEST_SIZE]; - - if (WARN_ON(ci->ci_mode->ivsize != AES_BLOCK_SIZE)) + if (WARN_ON(is_hw_wrapped || raw_key_size != ci->ci_mode->keysize)) return -EINVAL; - essiv_tfm = crypto_alloc_cipher("aes", 0, 0); - if (IS_ERR(essiv_tfm)) - return PTR_ERR(essiv_tfm); - - ci->ci_essiv_tfm = essiv_tfm; - - err = derive_essiv_salt(raw_key, keysize, salt); - if (err) - goto out; - + tfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key, ci->ci_inode); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); /* - * Using SHA256 to derive the salt/key will result in AES-256 being - * used for IV generation. File contents encryption will still use the - * configured keysize (AES-128) nevertheless. + * Pairs with READ_ONCE() in fscrypt_is_key_prepared(). (Only matters + * for the per-mode keys, which are shared by multiple inodes.) */ - err = crypto_cipher_setkey(essiv_tfm, salt, sizeof(salt)); - if (err) - goto out; - -out: - memzero_explicit(salt, sizeof(salt)); - return err; -} - -/* Given the per-file key, set up the file's crypto transform object(s) */ -int fscrypt_set_derived_key(struct fscrypt_info *ci, const u8 *derived_key) -{ - struct fscrypt_mode *mode = ci->ci_mode; - struct crypto_skcipher *ctfm; - int err; - - ctfm = fscrypt_allocate_skcipher(mode, derived_key, ci->ci_inode); - if (IS_ERR(ctfm)) - return PTR_ERR(ctfm); - - ci->ci_ctfm = ctfm; - - if (mode->needs_essiv) { - err = init_essiv_generator(ci, derived_key, mode->keysize); - if (err) { - fscrypt_warn(ci->ci_inode, - "Error initializing ESSIV generator: %d", - err); - return err; - } - } + smp_store_release(&prep_key->tfm, tfm); return 0; } -static int setup_per_mode_key(struct fscrypt_info *ci, - struct fscrypt_master_key *mk) +/* Destroy a crypto transform object and/or blk-crypto key. */ +void fscrypt_destroy_prepared_key(struct fscrypt_prepared_key *prep_key) { + crypto_free_skcipher(prep_key->tfm); + fscrypt_destroy_inline_crypt_key(prep_key); +} + +/* Given a per-file encryption key, set up the file's crypto transform object */ +int fscrypt_set_per_file_enc_key(struct fscrypt_info *ci, const u8 *raw_key) +{ + ci->ci_owns_key = true; + return fscrypt_prepare_key(&ci->ci_key, raw_key, ci->ci_mode->keysize, + false /*is_hw_wrapped*/, ci); +} + +static int setup_per_mode_enc_key(struct fscrypt_info *ci, + struct fscrypt_master_key *mk, + struct fscrypt_prepared_key *keys, + u8 hkdf_context, bool include_fs_uuid) +{ + static DEFINE_MUTEX(mode_key_setup_mutex); + const struct inode *inode = ci->ci_inode; + const struct super_block *sb = inode->i_sb; struct fscrypt_mode *mode = ci->ci_mode; - u8 mode_num = mode - available_modes; - struct crypto_skcipher *tfm, *prev_tfm; + const u8 mode_num = mode - fscrypt_modes; + struct fscrypt_prepared_key *prep_key; u8 mode_key[FSCRYPT_MAX_KEY_SIZE]; + u8 hkdf_info[sizeof(mode_num) + sizeof(sb->s_uuid)]; + unsigned int hkdf_infolen = 0; int err; - if (WARN_ON(mode_num >= ARRAY_SIZE(mk->mk_mode_keys))) + if (WARN_ON(mode_num > __FSCRYPT_MODE_MAX)) return -EINVAL; - /* pairs with cmpxchg() below */ - tfm = READ_ONCE(mk->mk_mode_keys[mode_num]); - if (likely(tfm != NULL)) - goto done; + prep_key = &keys[mode_num]; + if (fscrypt_is_key_prepared(prep_key, ci)) { + ci->ci_key = *prep_key; + return 0; + } - BUILD_BUG_ON(sizeof(mode_num) != 1); - err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, - HKDF_CONTEXT_PER_MODE_KEY, - &mode_num, sizeof(mode_num), - mode_key, mode->keysize); + mutex_lock(&mode_key_setup_mutex); + + if (fscrypt_is_key_prepared(prep_key, ci)) + goto done_unlock; + + if (mk->mk_secret.is_hw_wrapped && S_ISREG(inode->i_mode)) { + int i; + + if (!fscrypt_using_inline_encryption(ci)) { + fscrypt_warn(ci->ci_inode, + "Hardware-wrapped keys require inline encryption (-o inlinecrypt)"); + err = -EINVAL; + goto out_unlock; + } + for (i = 0; i <= __FSCRYPT_MODE_MAX; i++) { + if (fscrypt_is_key_prepared(&keys[i], ci)) { + fscrypt_warn(ci->ci_inode, + "Each hardware-wrapped key can only be used with one encryption mode"); + err = -EINVAL; + goto out_unlock; + } + } + err = fscrypt_prepare_key(prep_key, mk->mk_secret.raw, + mk->mk_secret.size, true, ci); + if (err) + goto out_unlock; + } else { + BUILD_BUG_ON(sizeof(mode_num) != 1); + BUILD_BUG_ON(sizeof(sb->s_uuid) != 16); + BUILD_BUG_ON(sizeof(hkdf_info) != 17); + hkdf_info[hkdf_infolen++] = mode_num; + if (include_fs_uuid) { + memcpy(&hkdf_info[hkdf_infolen], &sb->s_uuid, + sizeof(sb->s_uuid)); + hkdf_infolen += sizeof(sb->s_uuid); + } + err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, + hkdf_context, hkdf_info, hkdf_infolen, + mode_key, mode->keysize); + if (err) + goto out_unlock; + err = fscrypt_prepare_key(prep_key, mode_key, mode->keysize, + false /*is_hw_wrapped*/, ci); + memzero_explicit(mode_key, mode->keysize); + if (err) + goto out_unlock; + } +done_unlock: + ci->ci_key = *prep_key; + err = 0; +out_unlock: + mutex_unlock(&mode_key_setup_mutex); + return err; +} + +int fscrypt_derive_dirhash_key(struct fscrypt_info *ci, + const struct fscrypt_master_key *mk) +{ + int err; + + err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, HKDF_CONTEXT_DIRHASH_KEY, + ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE, + (u8 *)&ci->ci_dirhash_key, + sizeof(ci->ci_dirhash_key)); if (err) return err; - tfm = fscrypt_allocate_skcipher(mode, mode_key, ci->ci_inode); - memzero_explicit(mode_key, mode->keysize); - if (IS_ERR(tfm)) - return PTR_ERR(tfm); - - /* pairs with READ_ONCE() above */ - prev_tfm = cmpxchg(&mk->mk_mode_keys[mode_num], NULL, tfm); - if (prev_tfm != NULL) { - crypto_free_skcipher(tfm); - tfm = prev_tfm; - } -done: - ci->ci_ctfm = tfm; + ci->ci_dirhash_key_initialized = true; return 0; } static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, struct fscrypt_master_key *mk) { - u8 derived_key[FSCRYPT_MAX_KEY_SIZE]; int err; + if (mk->mk_secret.is_hw_wrapped && + !(ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64)) { + fscrypt_warn(ci->ci_inode, + "Hardware-wrapped keys are only supported with IV_INO_LBLK_64 policies"); + return -EINVAL; + } + if (ci->ci_policy.v2.flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { /* - * DIRECT_KEY: instead of deriving per-file keys, the per-file - * nonce will be included in all the IVs. But unlike v1 - * policies, for v2 policies in this case we don't encrypt with - * the master key directly but rather derive a per-mode key. - * This ensures that the master key is consistently used only - * for HKDF, avoiding key reuse issues. + * DIRECT_KEY: instead of deriving per-file encryption keys, the + * per-file nonce will be included in all the IVs. But unlike + * v1 policies, for v2 policies in this case we don't encrypt + * with the master key directly but rather derive a per-mode + * encryption key. This ensures that the master key is + * consistently used only for HKDF, avoiding key reuse issues. */ - if (!fscrypt_mode_supports_direct_key(ci->ci_mode)) { - fscrypt_warn(ci->ci_inode, - "Direct key flag not allowed with %s", - ci->ci_mode->friendly_name); - return -EINVAL; - } - return setup_per_mode_key(ci, mk); - } + err = setup_per_mode_enc_key(ci, mk, mk->mk_direct_keys, + HKDF_CONTEXT_DIRECT_KEY, false); + } else if (ci->ci_policy.v2.flags & + FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) { + /* + * IV_INO_LBLK_64: encryption keys are derived from (master_key, + * mode_num, filesystem_uuid), and inode number is included in + * the IVs. This format is optimized for use with inline + * encryption hardware compliant with the UFS or eMMC standards. + */ + err = setup_per_mode_enc_key(ci, mk, mk->mk_iv_ino_lblk_64_keys, + HKDF_CONTEXT_IV_INO_LBLK_64_KEY, + true); + } else { + u8 derived_key[FSCRYPT_MAX_KEY_SIZE]; - err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, - HKDF_CONTEXT_PER_FILE_KEY, - ci->ci_nonce, FS_KEY_DERIVATION_NONCE_SIZE, - derived_key, ci->ci_mode->keysize); + err = fscrypt_hkdf_expand(&mk->mk_secret.hkdf, + HKDF_CONTEXT_PER_FILE_ENC_KEY, + ci->ci_nonce, + FS_KEY_DERIVATION_NONCE_SIZE, + derived_key, ci->ci_mode->keysize); + if (err) + return err; + + err = fscrypt_set_per_file_enc_key(ci, derived_key); + memzero_explicit(derived_key, ci->ci_mode->keysize); + } if (err) return err; - err = fscrypt_set_derived_key(ci, derived_key); - memzero_explicit(derived_key, ci->ci_mode->keysize); - return err; + /* Derive a secret dirhash key for directories that need it. */ + if (S_ISDIR(ci->ci_inode->i_mode) && IS_CASEFOLDED(ci->ci_inode)) { + err = fscrypt_derive_dirhash_key(ci, mk); + if (err) + return err; + } + + return 0; } /* @@ -300,6 +328,8 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, struct fscrypt_key_specifier mk_spec; int err; + fscrypt_select_encryption_impl(ci); + switch (ci->ci_policy.version) { case FSCRYPT_POLICY_V1: mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR; @@ -389,13 +419,10 @@ static void put_crypt_info(struct fscrypt_info *ci) if (!ci) return; - if (ci->ci_direct_key) { + if (ci->ci_direct_key) fscrypt_put_direct_key(ci->ci_direct_key); - } else if ((ci->ci_ctfm != NULL || ci->ci_essiv_tfm != NULL) && - !fscrypt_is_direct_key_policy(&ci->ci_policy)) { - crypto_free_skcipher(ci->ci_ctfm); - crypto_free_cipher(ci->ci_essiv_tfm); - } + else if (ci->ci_owns_key) + fscrypt_destroy_prepared_key(&ci->ci_key); key = ci->ci_master_key; if (key) { @@ -416,6 +443,7 @@ static void put_crypt_info(struct fscrypt_info *ci) key_invalidate(key); key_put(key); } + memzero_explicit(ci, sizeof(*ci)); kmem_cache_free(fscrypt_info_cachep, ci); } @@ -579,6 +607,15 @@ int fscrypt_drop_inode(struct inode *inode) return 0; mk = ci->ci_master_key->payload.data[0]; + /* + * With proper, non-racy use of FS_IOC_REMOVE_ENCRYPTION_KEY, all inodes + * protected by the key were cleaned by sync_filesystem(). But if + * userspace is still using the files, inodes can be dirtied between + * then and now. We mustn't lose any writes, so skip dirty inodes here. + */ + if (inode->i_state & I_DIRTY_ALL) + return 0; + /* * Note: since we aren't holding ->mk_secret_sem, the result here can * immediately become outdated. But there's no correctness problem with diff --git a/fs/crypto/keysetup_v1.c b/fs/crypto/keysetup_v1.c index 4ae795d61840..3f7bb48f7317 100644 --- a/fs/crypto/keysetup_v1.c +++ b/fs/crypto/keysetup_v1.c @@ -9,7 +9,7 @@ * This file implements compatibility functions for the original encryption * policy version ("v1"), including: * - * - Deriving per-file keys using the AES-128-ECB based KDF + * - Deriving per-file encryption keys using the AES-128-ECB based KDF * (rather than the new method of using HKDF-SHA512) * * - Retrieving fscrypt master keys from process-subscribed keyrings @@ -146,7 +146,7 @@ struct fscrypt_direct_key { struct hlist_node dk_node; refcount_t dk_refcount; const struct fscrypt_mode *dk_mode; - struct crypto_skcipher *dk_ctfm; + struct fscrypt_prepared_key dk_key; u8 dk_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; u8 dk_raw[FSCRYPT_MAX_KEY_SIZE]; }; @@ -154,7 +154,7 @@ struct fscrypt_direct_key { static void free_direct_key(struct fscrypt_direct_key *dk) { if (dk) { - crypto_free_skcipher(dk->dk_ctfm); + fscrypt_destroy_prepared_key(&dk->dk_key); kzfree(dk); } } @@ -199,6 +199,8 @@ find_or_insert_direct_key(struct fscrypt_direct_key *to_insert, continue; if (ci->ci_mode != dk->dk_mode) continue; + if (!fscrypt_is_key_prepared(&dk->dk_key, ci)) + continue; if (crypto_memneq(raw_key, dk->dk_raw, ci->ci_mode->keysize)) continue; /* using existing tfm with same (descriptor, mode, raw_key) */ @@ -231,13 +233,10 @@ fscrypt_get_direct_key(const struct fscrypt_info *ci, const u8 *raw_key) return ERR_PTR(-ENOMEM); refcount_set(&dk->dk_refcount, 1); dk->dk_mode = ci->ci_mode; - dk->dk_ctfm = fscrypt_allocate_skcipher(ci->ci_mode, raw_key, - ci->ci_inode); - if (IS_ERR(dk->dk_ctfm)) { - err = PTR_ERR(dk->dk_ctfm); - dk->dk_ctfm = NULL; + err = fscrypt_prepare_key(&dk->dk_key, raw_key, ci->ci_mode->keysize, + false /*is_hw_wrapped*/, ci); + if (err) goto err_free_dk; - } memcpy(dk->dk_descriptor, ci->ci_policy.v1.master_key_descriptor, FSCRYPT_KEY_DESCRIPTOR_SIZE); memcpy(dk->dk_raw, raw_key, ci->ci_mode->keysize); @@ -253,32 +252,13 @@ err_free_dk: static int setup_v1_file_key_direct(struct fscrypt_info *ci, const u8 *raw_master_key) { - const struct fscrypt_mode *mode = ci->ci_mode; struct fscrypt_direct_key *dk; - if (!fscrypt_mode_supports_direct_key(mode)) { - fscrypt_warn(ci->ci_inode, - "Direct key mode not allowed with %s", - mode->friendly_name); - return -EINVAL; - } - - if (ci->ci_policy.v1.contents_encryption_mode != - ci->ci_policy.v1.filenames_encryption_mode) { - fscrypt_warn(ci->ci_inode, - "Direct key mode not allowed with different contents and filenames modes"); - return -EINVAL; - } - - /* ESSIV implies 16-byte IVs which implies !DIRECT_KEY */ - if (WARN_ON(mode->needs_essiv)) - return -EINVAL; - dk = fscrypt_get_direct_key(ci, raw_master_key); if (IS_ERR(dk)) return PTR_ERR(dk); ci->ci_direct_key = dk; - ci->ci_ctfm = dk->dk_ctfm; + ci->ci_key = dk->dk_key; return 0; } @@ -302,7 +282,7 @@ static int setup_v1_file_key_derived(struct fscrypt_info *ci, if (err) goto out; - err = fscrypt_set_derived_key(ci, derived_key); + err = fscrypt_set_per_file_enc_key(ci, derived_key); out: kzfree(derived_key); return err; diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c index 4072ba644595..cf2a9d26ef7d 100644 --- a/fs/crypto/policy.c +++ b/fs/crypto/policy.c @@ -29,13 +29,153 @@ bool fscrypt_policies_equal(const union fscrypt_policy *policy1, return !memcmp(policy1, policy2, fscrypt_policy_size(policy1)); } +static bool fscrypt_valid_enc_modes(u32 contents_mode, u32 filenames_mode) +{ + if (contents_mode == FSCRYPT_MODE_AES_256_XTS && + filenames_mode == FSCRYPT_MODE_AES_256_CTS) + return true; + + if (contents_mode == FSCRYPT_MODE_AES_128_CBC && + filenames_mode == FSCRYPT_MODE_AES_128_CTS) + return true; + + if (contents_mode == FSCRYPT_MODE_ADIANTUM && + filenames_mode == FSCRYPT_MODE_ADIANTUM) + return true; + + return false; +} + +static bool supported_direct_key_modes(const struct inode *inode, + u32 contents_mode, u32 filenames_mode) +{ + const struct fscrypt_mode *mode; + + if (contents_mode != filenames_mode) { + fscrypt_warn(inode, + "Direct key flag not allowed with different contents and filenames modes"); + return false; + } + mode = &fscrypt_modes[contents_mode]; + + if (mode->ivsize < offsetofend(union fscrypt_iv, nonce)) { + fscrypt_warn(inode, "Direct key flag not allowed with %s", + mode->friendly_name); + return false; + } + return true; +} + +static bool supported_iv_ino_lblk_64_policy( + const struct fscrypt_policy_v2 *policy, + const struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + int ino_bits = 64, lblk_bits = 64; + + if (policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) { + fscrypt_warn(inode, + "The DIRECT_KEY and IV_INO_LBLK_64 flags are mutually exclusive"); + return false; + } + /* + * It's unsafe to include inode numbers in the IVs if the filesystem can + * potentially renumber inodes, e.g. via filesystem shrinking. + */ + if (!sb->s_cop->has_stable_inodes || + !sb->s_cop->has_stable_inodes(sb)) { + fscrypt_warn(inode, + "Can't use IV_INO_LBLK_64 policy on filesystem '%s' because it doesn't have stable inode numbers", + sb->s_id); + return false; + } + if (sb->s_cop->get_ino_and_lblk_bits) + sb->s_cop->get_ino_and_lblk_bits(sb, &ino_bits, &lblk_bits); + if (ino_bits > 32 || lblk_bits > 32) { + fscrypt_warn(inode, + "Can't use IV_INO_LBLK_64 policy on filesystem '%s' because it doesn't use 32-bit inode and block numbers", + sb->s_id); + return false; + } + return true; +} + +static bool fscrypt_supported_v1_policy(const struct fscrypt_policy_v1 *policy, + const struct inode *inode) +{ + if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, + policy->filenames_encryption_mode)) { + fscrypt_warn(inode, + "Unsupported encryption modes (contents %d, filenames %d)", + policy->contents_encryption_mode, + policy->filenames_encryption_mode); + return false; + } + + if (policy->flags & ~(FSCRYPT_POLICY_FLAGS_PAD_MASK | + FSCRYPT_POLICY_FLAG_DIRECT_KEY)) { + fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)", + policy->flags); + return false; + } + + if ((policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) && + !supported_direct_key_modes(inode, policy->contents_encryption_mode, + policy->filenames_encryption_mode)) + return false; + + if (IS_CASEFOLDED(inode)) { + /* With v1, there's no way to derive dirhash keys. */ + fscrypt_warn(inode, + "v1 policies can't be used on casefolded directories"); + return false; + } + + return true; +} + +static bool fscrypt_supported_v2_policy(const struct fscrypt_policy_v2 *policy, + const struct inode *inode) +{ + if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, + policy->filenames_encryption_mode)) { + fscrypt_warn(inode, + "Unsupported encryption modes (contents %d, filenames %d)", + policy->contents_encryption_mode, + policy->filenames_encryption_mode); + return false; + } + + if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) { + fscrypt_warn(inode, "Unsupported encryption flags (0x%02x)", + policy->flags); + return false; + } + + if ((policy->flags & FSCRYPT_POLICY_FLAG_DIRECT_KEY) && + !supported_direct_key_modes(inode, policy->contents_encryption_mode, + policy->filenames_encryption_mode)) + return false; + + if ((policy->flags & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64) && + !supported_iv_ino_lblk_64_policy(policy, inode)) + return false; + + if (memchr_inv(policy->__reserved, 0, sizeof(policy->__reserved))) { + fscrypt_warn(inode, "Reserved bits set in encryption policy"); + return false; + } + + return true; +} + /** * fscrypt_supported_policy - check whether an encryption policy is supported * * Given an encryption policy, check whether all its encryption modes and other - * settings are supported by this kernel. (But we don't currently don't check - * for crypto API support here, so attempting to use an algorithm not configured - * into the crypto API will still fail later.) + * settings are supported by this kernel on the given inode. (But we don't + * currently don't check for crypto API support here, so attempting to use an + * algorithm not configured into the crypto API will still fail later.) * * Return: %true if supported, else %false */ @@ -43,55 +183,10 @@ bool fscrypt_supported_policy(const union fscrypt_policy *policy_u, const struct inode *inode) { switch (policy_u->version) { - case FSCRYPT_POLICY_V1: { - const struct fscrypt_policy_v1 *policy = &policy_u->v1; - - if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, - policy->filenames_encryption_mode)) { - fscrypt_warn(inode, - "Unsupported encryption modes (contents %d, filenames %d)", - policy->contents_encryption_mode, - policy->filenames_encryption_mode); - return false; - } - - if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) { - fscrypt_warn(inode, - "Unsupported encryption flags (0x%02x)", - policy->flags); - return false; - } - - return true; - } - case FSCRYPT_POLICY_V2: { - const struct fscrypt_policy_v2 *policy = &policy_u->v2; - - if (!fscrypt_valid_enc_modes(policy->contents_encryption_mode, - policy->filenames_encryption_mode)) { - fscrypt_warn(inode, - "Unsupported encryption modes (contents %d, filenames %d)", - policy->contents_encryption_mode, - policy->filenames_encryption_mode); - return false; - } - - if (policy->flags & ~FSCRYPT_POLICY_FLAGS_VALID) { - fscrypt_warn(inode, - "Unsupported encryption flags (0x%02x)", - policy->flags); - return false; - } - - if (memchr_inv(policy->__reserved, 0, - sizeof(policy->__reserved))) { - fscrypt_warn(inode, - "Reserved bits set in encryption policy"); - return false; - } - - return true; - } + case FSCRYPT_POLICY_V1: + return fscrypt_supported_v1_policy(&policy_u->v1, inode); + case FSCRYPT_POLICY_V2: + return fscrypt_supported_v2_policy(&policy_u->v2, inode); } return false; } diff --git a/fs/dax.c b/fs/dax.c index ddb4981ae32e..34a55754164f 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1057,6 +1057,9 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, lockdep_assert_held(&inode->i_rwsem); } + if (iocb->ki_flags & IOCB_NOWAIT) + flags |= IOMAP_NOWAIT; + while (iov_iter_count(iter)) { ret = iomap_apply(inode, pos, iov_iter_count(iter), flags, ops, iter, dax_iomap_actor); diff --git a/fs/dcache.c b/fs/dcache.c index a3d9dd74356d..2f01b271023d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -270,24 +270,10 @@ static void __d_free(struct rcu_head *head) kmem_cache_free(dentry_cache, dentry); } -static void __d_free_external_name(struct rcu_head *head) -{ - struct external_name *name = container_of(head, struct external_name, - u.head); - - mod_node_page_state(page_pgdat(virt_to_page(name)), - NR_INDIRECTLY_RECLAIMABLE_BYTES, - -ksize(name)); - - kfree(name); -} - static void __d_free_external(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); - - __d_free_external_name(&external_name(dentry)->u.head); - + kfree(external_name(dentry)); kmem_cache_free(dentry_cache, dentry); } @@ -319,7 +305,7 @@ void release_dentry_name_snapshot(struct name_snapshot *name) struct external_name *p; p = container_of(name->name, struct external_name, name[0]); if (unlikely(atomic_dec_and_test(&p->u.count))) - call_rcu(&p->u.head, __d_free_external_name); + kfree_rcu(p, u.head); } } EXPORT_SYMBOL(release_dentry_name_snapshot); @@ -1615,7 +1601,6 @@ EXPORT_SYMBOL(d_invalidate); struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) { - struct external_name *ext = NULL; struct dentry *dentry; char *dname; int err; @@ -1636,13 +1621,15 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) dname = dentry->d_iname; } else if (name->len > DNAME_INLINE_LEN-1) { size_t size = offsetof(struct external_name, name[1]); - ext = kmalloc(size + name->len, GFP_KERNEL_ACCOUNT); - if (!ext) { + struct external_name *p = kmalloc(size + name->len, + GFP_KERNEL_ACCOUNT | + __GFP_RECLAIMABLE); + if (!p) { kmem_cache_free(dentry_cache, dentry); return NULL; } - atomic_set(&ext->u.count, 1); - dname = ext->name; + atomic_set(&p->u.count, 1); + dname = p->name; } else { dname = dentry->d_iname; } @@ -1682,12 +1669,6 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) } } - if (unlikely(ext)) { - pg_data_t *pgdat = page_pgdat(virt_to_page(ext)); - mod_node_page_state(pgdat, NR_INDIRECTLY_RECLAIMABLE_BYTES, - ksize(ext)); - } - this_cpu_inc(nr_dentry); return dentry; @@ -2782,7 +2763,7 @@ static void copy_name(struct dentry *dentry, struct dentry *target) dentry->d_name.hash_len = target->d_name.hash_len; } if (old_name && likely(atomic_dec_and_test(&old_name->u.count))) - call_rcu(&old_name->u.head, __d_free_external_name); + kfree_rcu(old_name, u.head); } static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target) diff --git a/fs/direct-io.c b/fs/direct-io.c index 30bf22c989de..729c59213d2e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -431,6 +432,7 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, sector_t first_sector, int nr_vecs) { struct bio *bio; + struct inode *inode = dio->inode; /* * bio_alloc() is guaranteed to return a bio when called with @@ -438,6 +440,9 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, */ bio = bio_alloc(GFP_KERNEL, nr_vecs); + fscrypt_set_bio_crypt_ctx(bio, inode, + sdio->cur_page_fs_offset >> inode->i_blkbits, + GFP_KERNEL); bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = first_sector; bio_set_op_attrs(bio, dio->op, dio->op_flags); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 610f72ae7ad6..9c8c9a09b4a6 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -807,6 +807,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) dlm_delete_debug_file(ls); + idr_destroy(&ls->ls_recover_idr); kfree(ls->ls_recover_buf); /* diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 3fda3832cf6a..0bc43b35d2c5 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -671,7 +671,7 @@ int dlm_ls_stop(struct dlm_ls *ls) int dlm_ls_start(struct dlm_ls *ls) { struct dlm_recover *rv, *rv_old; - struct dlm_config_node *nodes; + struct dlm_config_node *nodes = NULL; int error, count; rv = kzalloc(sizeof(*rv), GFP_NOFS); @@ -680,7 +680,7 @@ int dlm_ls_start(struct dlm_ls *ls) error = dlm_config_nodes(ls->ls_name, &nodes, &count); if (error < 0) - goto fail; + goto fail_rv; spin_lock(&ls->ls_recover_lock); @@ -712,8 +712,9 @@ int dlm_ls_start(struct dlm_ls *ls) return 0; fail: - kfree(rv); kfree(nodes); + fail_rv: + kfree(rv); return error; } diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c index 7cd24bccd4fe..37be29f21d04 100644 --- a/fs/dlm/memory.c +++ b/fs/dlm/memory.c @@ -38,10 +38,8 @@ int __init dlm_memory_init(void) void dlm_memory_exit(void) { - if (lkb_cache) - kmem_cache_destroy(lkb_cache); - if (rsb_cache) - kmem_cache_destroy(rsb_cache); + kmem_cache_destroy(lkb_cache); + kmem_cache_destroy(rsb_cache); } char *dlm_allocate_lvb(struct dlm_ls *ls) @@ -86,8 +84,7 @@ void dlm_free_lkb(struct dlm_lkb *lkb) struct dlm_user_args *ua; ua = lkb->lkb_ua; if (ua) { - if (ua->lksb.sb_lvbptr) - kfree(ua->lksb.sb_lvbptr); + kfree(ua->lksb.sb_lvbptr); kfree(ua); } } diff --git a/fs/dlm/user.c b/fs/dlm/user.c index d18e7a539f11..02de11695d0b 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -25,6 +25,7 @@ #include "lvb_table.h" #include "user.h" #include "ast.h" +#include "config.h" static const char name_prefix[] = "dlm"; static const struct file_operations device_fops; @@ -404,7 +405,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - error = dlm_new_lockspace(params->name, NULL, params->flags, + error = dlm_new_lockspace(params->name, dlm_config.ci_cluster_name, params->flags, DLM_USER_LVB_LEN, NULL, NULL, NULL, &lockspace); if (error) @@ -702,7 +703,7 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, result.version[0] = DLM_DEVICE_VERSION_MAJOR; result.version[1] = DLM_DEVICE_VERSION_MINOR; result.version[2] = DLM_DEVICE_VERSION_PATCH; - memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb)); + memcpy(&result.lksb, &ua->lksb, offsetof(struct dlm_lksb, sb_lvbptr)); result.user_lksb = ua->user_lksb; /* FIXME: dlm1 provides for the user's bastparam/addr to not be updated diff --git a/fs/drop_caches.c b/fs/drop_caches.c index d31b6c72b476..dc1a1d5d825b 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -35,11 +35,11 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) spin_unlock(&inode->i_lock); spin_unlock(&sb->s_inode_list_lock); - cond_resched(); invalidate_mapping_pages(inode->i_mapping, 0, -1); iput(toput_inode); toput_inode = inode; + cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index bd25ab837011..eed38ae86c6c 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -339,8 +339,10 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat, struct extent_crypt_result ecr; int rc = 0; - BUG_ON(!crypt_stat || !crypt_stat->tfm - || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)); + if (!crypt_stat || !crypt_stat->tfm + || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED)) + return -EINVAL; + if (unlikely(ecryptfs_verbosity > 0)) { ecryptfs_printk(KERN_DEBUG, "Key size [%zd]; key:\n", crypt_stat->key_size); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index bda65a730790..62d1dea85ef1 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -326,9 +326,9 @@ static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode) static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, struct dentry *lower_dentry) { - struct inode *inode, *lower_inode = d_inode(lower_dentry); + struct path *path = ecryptfs_dentry_to_lower_path(dentry->d_parent); + struct inode *inode, *lower_inode; struct ecryptfs_dentry_info *dentry_info; - struct vfsmount *lower_mnt; int rc = 0; dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); @@ -340,16 +340,23 @@ static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry, return ERR_PTR(-ENOMEM); } - lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); fsstack_copy_attr_atime(d_inode(dentry->d_parent), - d_inode(lower_dentry->d_parent)); + d_inode(path->dentry)); BUG_ON(!d_count(lower_dentry)); ecryptfs_set_dentry_private(dentry, dentry_info); - dentry_info->lower_path.mnt = lower_mnt; + dentry_info->lower_path.mnt = mntget(path->mnt); dentry_info->lower_path.dentry = lower_dentry; - if (d_really_is_negative(lower_dentry)) { + /* + * negative dentry can go positive under us here - its parent is not + * locked. That's OK and that could happen just as we return from + * ecryptfs_lookup() anyway. Just need to be careful and fetch + * ->d_inode only once - it's not stable here. + */ + lower_inode = READ_ONCE(lower_dentry->d_inode); + + if (!lower_inode) { /* We want to add because we couldn't find in lower */ d_add(dentry, NULL); return NULL; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index fa218cd64f74..b134315fb69d 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1335,7 +1335,7 @@ parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat, printk(KERN_WARNING "Tag 1 packet contains key larger " "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES"); rc = -EINVAL; - goto out; + goto out_free; } memcpy((*new_auth_tok)->session_key.encrypted_key, &data[(*packet_size)], (body_size - (ECRYPTFS_SIG_SIZE + 2))); diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 4f457d5c4933..26464f9d9b76 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c @@ -397,6 +397,7 @@ int __init ecryptfs_init_messaging(void) * ecryptfs_message_buf_len), GFP_KERNEL); if (!ecryptfs_msg_ctx_arr) { + kfree(ecryptfs_daemon_hash); rc = -ENOMEM; printk(KERN_ERR "%s: Failed to allocate memory\n", __func__); goto out; diff --git a/fs/exec.c b/fs/exec.c index 41f0090b0dbf..c5d7b2e10492 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1009,7 +1009,7 @@ static int exec_mmap(struct mm_struct *mm) /* Notify parent that we're no longer interested in the old VM */ tsk = current; old_mm = current->mm; - mm_release(tsk, old_mm); + exec_mm_release(tsk, old_mm); if (old_mm) { sync_mm_rss(old_mm); diff --git a/fs/exofs/super.c b/fs/exofs/super.c index c9ec652e2fcd..881d5798a181 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -702,21 +702,18 @@ out: /* * Read the superblock from the OSD and fill in the fields */ -static int exofs_fill_super(struct super_block *sb, void *data, int silent) +static int exofs_fill_super(struct super_block *sb, + struct exofs_mountopt *opts, + struct exofs_sb_info *sbi, + int silent) { struct inode *root; - struct exofs_mountopt *opts = data; - struct exofs_sb_info *sbi; /*extended info */ struct osd_dev *od; /* Master device */ struct exofs_fscb fscb; /*on-disk superblock info */ struct ore_comp comp; unsigned table_count; int ret; - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) - return -ENOMEM; - /* use mount options to fill superblock */ if (opts->is_osdname) { struct osd_dev_info odi = {.systemid_len = 0}; @@ -860,7 +857,9 @@ static struct dentry *exofs_mount(struct file_system_type *type, int flags, const char *dev_name, void *data) { + struct super_block *s; struct exofs_mountopt opts; + struct exofs_sb_info *sbi; int ret; ret = parse_options(data, &opts); @@ -869,9 +868,31 @@ static struct dentry *exofs_mount(struct file_system_type *type, return ERR_PTR(ret); } + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) { + kfree(opts.dev_name); + return ERR_PTR(-ENOMEM); + } + + s = sget(type, NULL, set_anon_super, flags, NULL); + + if (IS_ERR(s)) { + kfree(opts.dev_name); + kfree(sbi); + return ERR_CAST(s); + } + if (!opts.dev_name) opts.dev_name = dev_name; - return mount_nodev(type, flags, &opts, exofs_fill_super); + + + ret = exofs_fill_super(s, &opts, sbi, flags & SB_SILENT ? 1 : 0); + if (ret) { + deactivate_locked_super(s); + return ERR_PTR(ret); + } + s->s_flags |= SB_ACTIVE; + return dget(s->s_root); } /* diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index c22cc9d2a5c9..c08960040dd0 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -147,6 +147,7 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, tmp = lookup_one_len_unlocked(nbuf, parent, strlen(nbuf)); if (IS_ERR(tmp)) { dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); + err = PTR_ERR(tmp); goto out_err; } if (tmp != dentry) { @@ -508,26 +509,33 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, * inode is actually connected to the parent. */ err = exportfs_get_name(mnt, target_dir, nbuf, result); - if (!err) { - inode_lock(target_dir->d_inode); - nresult = lookup_one_len(nbuf, target_dir, - strlen(nbuf)); - inode_unlock(target_dir->d_inode); - if (!IS_ERR(nresult)) { - if (nresult->d_inode) { - dput(result); - result = nresult; - } else - dput(nresult); - } + if (err) { + dput(target_dir); + goto err_result; } + inode_lock(target_dir->d_inode); + nresult = lookup_one_len(nbuf, target_dir, strlen(nbuf)); + if (!IS_ERR(nresult)) { + if (unlikely(nresult->d_inode != result->d_inode)) { + dput(nresult); + nresult = ERR_PTR(-ESTALE); + } + } + inode_unlock(target_dir->d_inode); /* * At this point we are done with the parent, but it's pinned * by the child dentry anyway. */ dput(target_dir); + if (IS_ERR(nresult)) { + err = PTR_ERR(nresult); + goto err_result; + } + dput(result); + result = nresult; + /* * And finally make sure the dentry is actually acceptable * to NFSD. diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index a7c87d593083..31c5a7b5f1f3 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -699,10 +699,13 @@ static int ext2_get_blocks(struct inode *inode, if (!partial) { count++; mutex_unlock(&ei->truncate_mutex); - if (err) - goto cleanup; goto got_it; } + + if (err) { + mutex_unlock(&ei->truncate_mutex); + goto cleanup; + } } /* diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 13f470636672..4a338576ebb1 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1077,9 +1077,9 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) if (EXT2_BLOCKS_PER_GROUP(sb) == 0) goto cantfind_ext2; - sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - - le32_to_cpu(es->s_first_data_block) - 1) - / EXT2_BLOCKS_PER_GROUP(sb)) + 1; + sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - + le32_to_cpu(es->s_first_data_block) - 1) + / EXT2_BLOCKS_PER_GROUP(sb)) + 1; db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / EXT2_DESC_PER_BLOCK(sb); sbi->s_group_desc = kmalloc (db_count * sizeof (struct buffer_head *), GFP_KERNEL); diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index e2cfd33fd759..39cd2c054339 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -37,6 +37,7 @@ config EXT4_FS select CRC16 select CRYPTO select CRYPTO_CRC32C + select FS_ENCRYPTION_ALGS if FS_ENCRYPTION help This is the next generation of the ext3 filesystem. diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 70266a3355dc..fb38f20f869e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -280,6 +280,7 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, ext4_group_t ngroups = ext4_get_groups_count(sb); struct ext4_group_desc *desc; struct ext4_sb_info *sbi = EXT4_SB(sb); + struct buffer_head *bh_p; if (block_group >= ngroups) { ext4_error(sb, "block_group >= groups_count - block_group = %u," @@ -290,7 +291,14 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); - if (!sbi->s_group_desc[group_desc]) { + bh_p = sbi_array_rcu_deref(sbi, s_group_desc, group_desc); + /* + * sbi_array_rcu_deref returns with rcu unlocked, this is ok since + * the pointer being dereferenced won't be dereferenced again. By + * looking at the usage in add_new_gdb() the value isn't modified, + * just the pointer, and so it remains valid. + */ + if (!bh_p) { ext4_error(sb, "Group descriptor not loaded - " "block_group = %u, group_desc = %u, desc = %u", block_group, group_desc, offset); @@ -298,10 +306,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, } desc = (struct ext4_group_desc *)( - (__u8 *)sbi->s_group_desc[group_desc]->b_data + + (__u8 *)bh_p->b_data + offset * EXT4_DESC_SIZE(sb)); if (bh) - *bh = sbi->s_group_desc[group_desc]; + *bh = bh_p; return desc; } diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 4b65b8d40fd1..ee766e3bed8b 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -77,6 +77,11 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, error_msg = "rec_len is too small for name_len"; else if (unlikely(((char *) de - buf) + rlen > size)) error_msg = "directory entry overrun"; + else if (unlikely(((char *) de - buf) + rlen > + size - EXT4_DIR_REC_LEN(1) && + ((char *) de - buf) + rlen != size)) { + error_msg = "directory entry too close to block end"; + } else if (unlikely(le32_to_cpu(de->inode) > le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count))) error_msg = "inode out of bounds"; @@ -112,7 +117,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) if (IS_ENCRYPTED(inode)) { err = fscrypt_get_encryption_info(inode); - if (err && err != -ENOKEY) + if (err) return err; } @@ -121,12 +126,14 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) if (err != ERR_BAD_DX_DIR) { return err; } - /* - * We don't set the inode dirty flag since it's not - * critical that it get flushed back to the disk. - */ - ext4_clear_inode_flag(file_inode(file), - EXT4_INODE_INDEX); + /* Can we just clear INDEX flag to ignore htree information? */ + if (!ext4_has_metadata_csum(sb)) { + /* + * We don't set the inode dirty flag since it's not + * critical that it gets flushed back to the disk. + */ + ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } if (ext4_has_inline_data(inode)) { @@ -657,51 +664,3 @@ const struct file_operations ext4_dir_operations = { .open = ext4_dir_open, .release = ext4_release_dir, }; - -#ifdef CONFIG_UNICODE -static int ext4_d_compare(const struct dentry *dentry, unsigned int len, - const char *str, const struct qstr *name) -{ - struct qstr qstr = {.name = str, .len = len }; - struct inode *inode = dentry->d_parent->d_inode; - - if (!IS_CASEFOLDED(inode) || !EXT4_SB(inode->i_sb)->s_encoding) { - if (len != name->len) - return -1; - return memcmp(str, name->name, len); - } - - return ext4_ci_compare(inode, name, &qstr, false); -} - -static int ext4_d_hash(const struct dentry *dentry, struct qstr *str) -{ - const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb); - const struct unicode_map *um = sbi->s_encoding; - unsigned char *norm; - int len, ret = 0; - - if (!IS_CASEFOLDED(dentry->d_inode) || !um) - return 0; - - norm = kmalloc(PATH_MAX, GFP_ATOMIC); - if (!norm) - return -ENOMEM; - - len = utf8_casefold(um, str, norm, PATH_MAX); - if (len < 0) { - if (ext4_has_strict_mode(sbi)) - ret = -EINVAL; - goto out; - } - str->hash = full_name_hash(dentry, norm, len); -out: - kfree(norm); - return ret; -} - -const struct dentry_operations ext4_dentry_ops = { - .d_hash = ext4_d_hash, - .d_compare = ext4_d_compare, -}; -#endif diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index c638bb9e45a4..a21220b74b50 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1155,6 +1155,7 @@ struct ext4_inode_info { #define EXT4_MOUNT_DIOREAD_NOLOCK 0x400000 /* Enable support for dio read nolocking */ #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ +#define EXT4_MOUNT_INLINECRYPT 0x4000000 /* Inline encryption support */ #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ @@ -1375,14 +1376,6 @@ struct ext4_super_block { #define EXT4_ENC_UTF8_12_1 1 -/* - * Flags for ext4_sb_info.s_encoding_flags. - */ -#define EXT4_ENC_STRICT_MODE_FL (1 << 0) - -#define ext4_has_strict_mode(sbi) \ - (sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL) - /* * fourth extended-fs super-block data in memory */ @@ -1403,7 +1396,7 @@ struct ext4_sb_info { loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ struct buffer_head * s_sbh; /* Buffer containing the super block */ struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ - struct buffer_head **s_group_desc; + struct buffer_head * __rcu *s_group_desc; unsigned int s_mount_opt; unsigned int s_mount_opt2; unsigned int s_mount_flags; @@ -1434,10 +1427,6 @@ struct ext4_sb_info { struct kobject s_kobj; struct completion s_kobj_unregister; struct super_block *s_sb; -#ifdef CONFIG_UNICODE - struct unicode_map *s_encoding; - __u16 s_encoding_flags; -#endif /* Journaling */ struct journal_s *s_journal; @@ -1467,7 +1456,7 @@ struct ext4_sb_info { #endif /* for buddy allocator */ - struct ext4_group_info ***s_group_info; + struct ext4_group_info ** __rcu *s_group_info; struct inode *s_buddy_cache; spinlock_t s_md_lock; unsigned short *s_mb_offsets; @@ -1517,7 +1506,7 @@ struct ext4_sb_info { unsigned int s_extent_max_zeroout_kb; unsigned int s_log_groups_per_flex; - struct flex_groups *s_flex_groups; + struct flex_groups * __rcu *s_flex_groups; ext4_group_t s_flex_groups_allocated; /* workqueue for reserved extent conversions (buffered io) */ @@ -1557,8 +1546,11 @@ struct ext4_sb_info { struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state; - /* Barrier between changing inodes' journal flags and writepages ops. */ - struct percpu_rw_semaphore s_journal_flag_rwsem; + /* + * Barrier between writepages ops and changing any inode's JOURNAL_DATA + * or EXTENTS flag. + */ + struct percpu_rw_semaphore s_writepages_rwsem; struct dax_device *s_daxdev; }; @@ -1578,6 +1570,23 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); } +/* + * Returns: sbi->field[index] + * Used to access an array element from the following sbi fields which require + * rcu protection to avoid dereferencing an invalid pointer due to reassignment + * - s_group_desc + * - s_group_info + * - s_flex_group + */ +#define sbi_array_rcu_deref(sbi, field, index) \ +({ \ + typeof(*((sbi)->field)) _v; \ + rcu_read_lock(); \ + _v = ((typeof(_v)*)rcu_dereference((sbi)->field))[index]; \ + rcu_read_unlock(); \ + _v; \ +}) + /* * Inode dynamic state flags */ @@ -1685,6 +1694,7 @@ static inline bool ext4_verity_in_progress(struct inode *inode) #define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 #define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 #define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200 +#define EXT4_FEATURE_COMPAT_STABLE_INODES 0x0800 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 @@ -1781,6 +1791,7 @@ EXT4_FEATURE_COMPAT_FUNCS(xattr, EXT_ATTR) EXT4_FEATURE_COMPAT_FUNCS(resize_inode, RESIZE_INODE) EXT4_FEATURE_COMPAT_FUNCS(dir_index, DIR_INDEX) EXT4_FEATURE_COMPAT_FUNCS(sparse_super2, SPARSE_SUPER2) +EXT4_FEATURE_COMPAT_FUNCS(stable_inodes, STABLE_INODES) EXT4_FEATURE_RO_COMPAT_FUNCS(sparse_super, SPARSE_SUPER) EXT4_FEATURE_RO_COMPAT_FUNCS(large_file, LARGE_FILE) @@ -2481,8 +2492,11 @@ void ext4_insert_dentry(struct inode *inode, struct ext4_filename *fname); static inline void ext4_update_dx_flag(struct inode *inode) { - if (!ext4_has_feature_dir_index(inode->i_sb)) + if (!ext4_has_feature_dir_index(inode->i_sb)) { + /* ext4_iget() should have caught this... */ + WARN_ON_ONCE(ext4_has_feature_metadata_csum(inode->i_sb)); ext4_clear_inode_flag(inode, EXT4_INODE_INDEX); + } } static const unsigned char ext4_filetype_table[] = { DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK @@ -2658,6 +2672,7 @@ extern int ext4_generic_delete_entry(handle_t *handle, extern bool ext4_empty_dir(struct inode *inode); /* resize.c */ +extern void ext4_kvfree_array_rcu(void *to_free); extern int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input); extern int ext4_group_extend(struct super_block *sb, @@ -2903,13 +2918,13 @@ static inline struct ext4_group_info *ext4_get_group_info(struct super_block *sb, ext4_group_t group) { - struct ext4_group_info ***grp_info; + struct ext4_group_info **grp_info; long indexv, indexh; BUG_ON(group >= EXT4_SB(sb)->s_groups_count); - grp_info = EXT4_SB(sb)->s_group_info; indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); - return grp_info[indexv][indexh]; + grp_info = sbi_array_rcu_deref(EXT4_SB(sb), s_group_info, indexv); + return grp_info[indexh]; } /* @@ -2959,7 +2974,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) !inode_is_locked(inode)); down_write(&EXT4_I(inode)->i_data_sem); if (newsize > EXT4_I(inode)->i_disksize) - EXT4_I(inode)->i_disksize = newsize; + WRITE_ONCE(EXT4_I(inode)->i_disksize, newsize); up_write(&EXT4_I(inode)->i_data_sem); } @@ -3200,7 +3215,7 @@ static inline void ext4_set_de_type(struct super_block *sb, /* readpages.c */ extern int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages); + unsigned nr_pages, bool is_readahead); extern int __init ext4_init_post_read_processing(void); extern void ext4_exit_post_read_processing(void); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index c91d723209f4..4f01bd88b301 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -38,9 +38,10 @@ static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; - if (!inode_trylock_shared(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock_shared(inode)) return -EAGAIN; + } else { inode_lock_shared(inode); } /* @@ -188,9 +189,10 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; - if (!inode_trylock(inode)) { - if (iocb->ki_flags & IOCB_NOWAIT) + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) return -EAGAIN; + } else { inode_lock(inode); } ret = ext4_write_checks(iocb, from); diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index ed76a6d7a2d8..f3bc69b8d4e5 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -277,7 +277,7 @@ int ext4fs_dirhash(const struct inode *dir, const char *name, int len, struct dx_hash_info *hinfo) { #ifdef CONFIG_UNICODE - const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding; + const struct unicode_map *um = dir->i_sb->s_encoding; int r, dlen; unsigned char *buff; struct qstr qstr = {.name = name, .len = len }; diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 6f9355227dc8..7889326b4f9c 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -333,11 +333,13 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) percpu_counter_inc(&sbi->s_freeinodes_counter); if (sbi->s_log_groups_per_flex) { - ext4_group_t f = ext4_flex_group(sbi, block_group); + struct flex_groups *fg; - atomic_inc(&sbi->s_flex_groups[f].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, + ext4_flex_group(sbi, block_group)); + atomic_inc(&fg->free_inodes); if (is_directory) - atomic_dec(&sbi->s_flex_groups[f].used_dirs); + atomic_dec(&fg->used_dirs); } BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata"); fatal = ext4_handle_dirty_metadata(handle, NULL, bh2); @@ -378,12 +380,13 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, int flex_size, struct orlov_stats *stats) { struct ext4_group_desc *desc; - struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups; if (flex_size > 1) { - stats->free_inodes = atomic_read(&flex_group[g].free_inodes); - stats->free_clusters = atomic64_read(&flex_group[g].free_clusters); - stats->used_dirs = atomic_read(&flex_group[g].used_dirs); + struct flex_groups *fg = sbi_array_rcu_deref(EXT4_SB(sb), + s_flex_groups, g); + stats->free_inodes = atomic_read(&fg->free_inodes); + stats->free_clusters = atomic64_read(&fg->free_clusters); + stats->used_dirs = atomic_read(&fg->used_dirs); return; } @@ -1062,7 +1065,8 @@ got: if (sbi->s_log_groups_per_flex) { ext4_group_t f = ext4_flex_group(sbi, group); - atomic_inc(&sbi->s_flex_groups[f].used_dirs); + atomic_inc(&sbi_array_rcu_deref(sbi, s_flex_groups, + f)->used_dirs); } } if (ext4_has_group_desc_csum(sb)) { @@ -1085,7 +1089,8 @@ got: if (sbi->s_log_groups_per_flex) { flex_group = ext4_flex_group(sbi, group); - atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes); + atomic_dec(&sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_inodes); } inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index b3248f444f48..f737d5b1ca3b 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1439,7 +1439,7 @@ int htree_inlinedir_to_tree(struct file *dir_file, err = ext4_htree_store_dirent(dir_file, hinfo->hash, hinfo->minor_hash, de, &tmp_str); if (err) { - count = err; + ret = err; goto out; } count++; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 07539fdb9f69..8381a07dc7a9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -198,7 +198,12 @@ void ext4_evict_inode(struct inode *inode) { handle_t *handle; int err; - int extra_credits = 3; + /* + * Credits for final inode cleanup and freeing: + * sb + inode (ext4_orphan_del()), block bitmap, group descriptor + * (xattr block freeing), bitmap, group descriptor (inode freeing) + */ + int extra_credits = 6; struct ext4_xattr_inode_array *ea_inode_array = NULL; trace_ext4_evict_inode(inode); @@ -254,8 +259,12 @@ void ext4_evict_inode(struct inode *inode) if (!IS_NOQUOTA(inode)) extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb); + /* + * Block bitmap, group descriptor, and inode are accounted in both + * ext4_blocks_for_truncate() and extra_credits. So subtract 3. + */ handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, - ext4_blocks_for_truncate(inode)+extra_credits); + ext4_blocks_for_truncate(inode) + extra_credits - 3); if (IS_ERR(handle)) { ext4_std_error(inode->i_sb, PTR_ERR(handle)); /* @@ -1225,7 +1234,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len, (block_start < from || block_end > to)) { ll_rw_block(REQ_OP_READ, 0, 1, &bh); *wait_bh++ = bh; - decrypt = IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); + decrypt = fscrypt_inode_uses_fs_layer_crypto(inode); } } /* @@ -2584,7 +2593,7 @@ update_disksize: * truncate are avoided by checking i_size under i_data_sem. */ disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; - if (disksize > EXT4_I(inode)->i_disksize) { + if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) { int err2; loff_t i_size; @@ -2754,7 +2763,7 @@ static int ext4_writepages(struct address_space *mapping, if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) return -EIO; - percpu_down_read(&sbi->s_journal_flag_rwsem); + percpu_down_read(&sbi->s_writepages_rwsem); trace_ext4_writepages(inode, wbc); if (dax_mapping(mapping)) { @@ -2984,7 +2993,7 @@ unplug: out_writepages: trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); - percpu_up_read(&sbi->s_journal_flag_rwsem); + percpu_up_read(&sbi->s_writepages_rwsem); return ret; } @@ -3369,7 +3378,8 @@ static int ext4_readpage(struct file *file, struct page *page) ret = ext4_readpage_inline(inode, page); if (ret == -EAGAIN) - return ext4_mpage_readpages(page->mapping, NULL, page, 1); + return ext4_mpage_readpages(page->mapping, NULL, page, 1, + false); return ret; } @@ -3384,7 +3394,7 @@ ext4_readpages(struct file *file, struct address_space *mapping, if (ext4_has_inline_data(inode)) return 0; - return ext4_mpage_readpages(mapping, pages, NULL, nr_pages); + return ext4_mpage_readpages(mapping, pages, NULL, nr_pages, true); } static void ext4_invalidatepage(struct page *page, unsigned int offset, @@ -3817,7 +3827,13 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter) * writes & truncates and since we take care of writing back page cache, * we are protected against page writeback as well. */ - inode_lock_shared(inode); + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock_shared(inode)) + return -EAGAIN; + } else { + inode_lock_shared(inode); + } + ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, iocb->ki_pos + count - 1); if (ret) @@ -3838,10 +3854,12 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter) ssize_t ret; int rw = iov_iter_rw(iter); -#ifdef CONFIG_FS_ENCRYPTION - if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) - return 0; -#endif + if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENCRYPTED(inode)) { + if (!fscrypt_inode_uses_inline_crypto(inode) || + !IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), + i_blocksize(inode))) + return 0; + } if (fsverity_active(inode)) return 0; @@ -4049,7 +4067,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, /* Uhhuh. Read error. Complain and punt. */ if (!buffer_uptodate(bh)) goto unlock; - if (S_ISREG(inode->i_mode) && IS_ENCRYPTED(inode)) { + if (fscrypt_inode_uses_fs_layer_crypto(inode)) { /* We expect the key to be set. */ BUG_ON(!fscrypt_has_encryption_key(inode)); BUG_ON(blocksize != PAGE_SIZE); @@ -4872,6 +4890,18 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) ret = -EFSCORRUPTED; goto bad_inode; } + /* + * If dir_index is not enabled but there's dir with INDEX flag set, + * we'd normally treat htree data as empty space. But with metadata + * checksumming that corrupts checksums so forbid that. + */ + if (!ext4_has_feature_dir_index(sb) && ext4_has_metadata_csum(sb) && + ext4_test_inode_flag(inode, EXT4_INODE_INDEX)) { + EXT4_ERROR_INODE(inode, + "iget: Dir with htree data on filesystem without dir_index feature."); + ret = -EFSCORRUPTED; + goto bad_inode; + } ei->i_disksize = inode->i_size; #ifdef CONFIG_QUOTA ei->i_reserved_quota = 0; @@ -5368,11 +5398,15 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) offset = inode->i_size & (PAGE_SIZE - 1); /* - * All buffers in the last page remain valid? Then there's nothing to - * do. We do the check mainly to optimize the common PAGE_SIZE == - * blocksize case + * If the page is fully truncated, we don't need to wait for any commit + * (and we even should not as __ext4_journalled_invalidatepage() may + * strip all buffers from the page but keep the page dirty which can then + * confuse e.g. concurrent ext4_writepage() seeing dirty page without + * buffers). Also we don't need to wait for any commit if all buffers in + * the page remain valid. This is most beneficial for the common case of + * blocksize == PAGESIZE. */ - if (offset > PAGE_SIZE - i_blocksize(inode)) + if (!offset || offset > (PAGE_SIZE - i_blocksize(inode))) return; while (1) { page = find_lock_page(inode->i_mapping, @@ -5628,12 +5662,15 @@ int ext4_getattr(const struct path *path, struct kstat *stat, stat->attributes |= STATX_ATTR_IMMUTABLE; if (flags & EXT4_NODUMP_FL) stat->attributes |= STATX_ATTR_NODUMP; + if (flags & EXT4_VERITY_FL) + stat->attributes |= STATX_ATTR_VERITY; stat->attributes_mask |= (STATX_ATTR_APPEND | STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE | - STATX_ATTR_NODUMP); + STATX_ATTR_NODUMP | + STATX_ATTR_VERITY); generic_fillattr(inode, stat); return 0; @@ -5823,8 +5860,23 @@ static int __ext4_expand_extra_isize(struct inode *inode, { struct ext4_inode *raw_inode; struct ext4_xattr_ibody_header *header; + unsigned int inode_size = EXT4_INODE_SIZE(inode->i_sb); + struct ext4_inode_info *ei = EXT4_I(inode); int error; + /* this was checked at iget time, but double check for good measure */ + if ((EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > inode_size) || + (ei->i_extra_isize & 3)) { + EXT4_ERROR_INODE(inode, "bad extra_isize %u (inode size %u)", + ei->i_extra_isize, + EXT4_INODE_SIZE(inode->i_sb)); + return -EFSCORRUPTED; + } + if ((new_extra_isize < ei->i_extra_isize) || + (new_extra_isize < 4) || + (new_extra_isize > inode_size - EXT4_GOOD_OLD_INODE_SIZE)) + return -EINVAL; /* Should never happen */ + raw_inode = ext4_raw_inode(iloc); header = IHDR(inode, raw_inode); @@ -5918,7 +5970,7 @@ int ext4_expand_extra_isize(struct inode *inode, error = ext4_journal_get_write_access(handle, iloc->bh); if (error) { brelse(iloc->bh); - goto out_stop; + goto out_unlock; } error = __ext4_expand_extra_isize(inode, new_extra_isize, iloc, @@ -5928,8 +5980,8 @@ int ext4_expand_extra_isize(struct inode *inode, if (!error) error = rc; +out_unlock: ext4_write_unlock_xattr(inode, &no_expand); -out_stop: ext4_journal_stop(handle); return error; } @@ -6076,7 +6128,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) } } - percpu_down_write(&sbi->s_journal_flag_rwsem); + percpu_down_write(&sbi->s_writepages_rwsem); jbd2_journal_lock_updates(journal); /* @@ -6093,7 +6145,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) err = jbd2_journal_flush(journal); if (err < 0) { jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); ext4_inode_resume_unlocked_dio(inode); return err; } @@ -6102,7 +6154,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ext4_set_aops(inode); jbd2_journal_unlock_updates(journal); - percpu_up_write(&sbi->s_journal_flag_rwsem); + percpu_up_write(&sbi->s_writepages_rwsem); if (val) up_write(&EXT4_I(inode)->i_mmap_sem); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 3ba07ccb24d2..f852d90a1562 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1247,6 +1247,8 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case FS_IOC_GETFSMAP: case FS_IOC_ENABLE_VERITY: case FS_IOC_MEASURE_VERITY: + case EXT4_IOC_FSGETXATTR: + case EXT4_IOC_FSSETXATTR: break; default: return -ENOIOCTLCMD; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3ba9a4ae4eac..745a89d30a57 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2389,7 +2389,7 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) { struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned size; - struct ext4_group_info ***new_groupinfo; + struct ext4_group_info ***old_groupinfo, ***new_groupinfo; size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); @@ -2402,13 +2402,16 @@ int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups) ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group"); return -ENOMEM; } - if (sbi->s_group_info) { - memcpy(new_groupinfo, sbi->s_group_info, + rcu_read_lock(); + old_groupinfo = rcu_dereference(sbi->s_group_info); + if (old_groupinfo) + memcpy(new_groupinfo, old_groupinfo, sbi->s_group_info_size * sizeof(*sbi->s_group_info)); - kvfree(sbi->s_group_info); - } - sbi->s_group_info = new_groupinfo; + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_group_info, new_groupinfo); sbi->s_group_info_size = size / sizeof(*sbi->s_group_info); + if (old_groupinfo) + ext4_kvfree_array_rcu(old_groupinfo); ext4_debug("allocated s_groupinfo array for %d meta_bg's\n", sbi->s_group_info_size); return 0; @@ -2420,6 +2423,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, { int i; int metalen = 0; + int idx = group >> EXT4_DESC_PER_BLOCK_BITS(sb); struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_info **meta_group_info; struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2438,12 +2442,12 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, "for a buddy group"); goto exit_meta_group_info; } - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = - meta_group_info; + rcu_read_lock(); + rcu_dereference(sbi->s_group_info)[idx] = meta_group_info; + rcu_read_unlock(); } - meta_group_info = - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; + meta_group_info = sbi_array_rcu_deref(sbi, s_group_info, idx); i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_NOFS); @@ -2491,8 +2495,13 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, exit_group_info: /* If a meta_group_info table has been allocated, release it now */ if (group % EXT4_DESC_PER_BLOCK(sb) == 0) { - kfree(sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]); - sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)] = NULL; + struct ext4_group_info ***group_info; + + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); + kfree(group_info[idx]); + group_info[idx] = NULL; + rcu_read_unlock(); } exit_meta_group_info: return -ENOMEM; @@ -2505,6 +2514,7 @@ static int ext4_mb_init_backend(struct super_block *sb) struct ext4_sb_info *sbi = EXT4_SB(sb); int err; struct ext4_group_desc *desc; + struct ext4_group_info ***group_info; struct kmem_cache *cachep; err = ext4_mb_alloc_groupinfo(sb, ngroups); @@ -2539,11 +2549,16 @@ err_freebuddy: while (i-- > 0) kmem_cache_free(cachep, ext4_get_group_info(sb, i)); i = sbi->s_group_info_size; + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); while (i-- > 0) - kfree(sbi->s_group_info[i]); + kfree(group_info[i]); + rcu_read_unlock(); iput(sbi->s_buddy_cache); err_freesgi: - kvfree(sbi->s_group_info); + rcu_read_lock(); + kvfree(rcu_dereference(sbi->s_group_info)); + rcu_read_unlock(); return -ENOMEM; } @@ -2733,7 +2748,7 @@ int ext4_mb_release(struct super_block *sb) ext4_group_t ngroups = ext4_get_groups_count(sb); ext4_group_t i; int num_meta_group_infos; - struct ext4_group_info *grinfo; + struct ext4_group_info *grinfo, ***group_info; struct ext4_sb_info *sbi = EXT4_SB(sb); struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits); @@ -2751,9 +2766,12 @@ int ext4_mb_release(struct super_block *sb) num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); + rcu_read_lock(); + group_info = rcu_dereference(sbi->s_group_info); for (i = 0; i < num_meta_group_infos; i++) - kfree(sbi->s_group_info[i]); - kvfree(sbi->s_group_info); + kfree(group_info[i]); + kvfree(group_info); + rcu_read_unlock(); } kfree(sbi->s_mb_offsets); kfree(sbi->s_mb_maxs); @@ -3052,7 +3070,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ext4_group_t flex_group = ext4_flex_group(sbi, ac->ac_b_ex.fe_group); atomic64_sub(ac->ac_b_ex.fe_len, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); @@ -4947,7 +4966,8 @@ do_more: if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(count_clusters, - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) @@ -5092,7 +5112,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, block_group); atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), - &sbi->s_flex_groups[flex_group].free_clusters); + &sbi_array_rcu_deref(sbi, s_flex_groups, + flex_group)->free_clusters); } ext4_mb_unload_buddy(&e4b); diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 78d45c7d3fa7..0d785868cc50 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -434,6 +434,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) int ext4_ext_migrate(struct inode *inode) { + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); handle_t *handle; int retval = 0, i; __le32 *i_data; @@ -458,6 +459,8 @@ int ext4_ext_migrate(struct inode *inode) */ return retval; + percpu_down_write(&sbi->s_writepages_rwsem); + /* * Worst case we can touch the allocation bitmaps, a bgd * block, and a block to link in the orphan list. We do need @@ -468,7 +471,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(handle)) { retval = PTR_ERR(handle); - return retval; + goto out_unlock; } goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) * EXT4_INODES_PER_GROUP(inode->i_sb)) + 1; @@ -479,7 +482,7 @@ int ext4_ext_migrate(struct inode *inode) if (IS_ERR(tmp_inode)) { retval = PTR_ERR(tmp_inode); ext4_journal_stop(handle); - return retval; + goto out_unlock; } i_size_write(tmp_inode, i_size_read(inode)); /* @@ -521,7 +524,7 @@ int ext4_ext_migrate(struct inode *inode) */ ext4_orphan_del(NULL, tmp_inode); retval = PTR_ERR(handle); - goto out; + goto out_tmp_inode; } ei = EXT4_I(inode); @@ -602,10 +605,11 @@ err_out: /* Reset the extent details */ ext4_ext_tree_init(handle, tmp_inode); ext4_journal_stop(handle); -out: +out_tmp_inode: unlock_new_inode(tmp_inode); iput(tmp_inode); - +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return retval; } @@ -615,7 +619,8 @@ out: int ext4_ind_migrate(struct inode *inode) { struct ext4_extent_header *eh; - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + struct ext4_super_block *es = sbi->s_es; struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; @@ -639,9 +644,13 @@ int ext4_ind_migrate(struct inode *inode) if (test_opt(inode->i_sb, DELALLOC)) ext4_alloc_da_blocks(inode); + percpu_down_write(&sbi->s_writepages_rwsem); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out_unlock; + } down_write(&EXT4_I(inode)->i_data_sem); ret = ext4_ext_check_inode(inode); @@ -676,5 +685,7 @@ int ext4_ind_migrate(struct inode *inode) errout: ext4_journal_stop(handle); up_write(&EXT4_I(inode)->i_data_sem); +out_unlock: + percpu_up_write(&sbi->s_writepages_rwsem); return ret; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 38e6a846aac1..0c042bd43246 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -120,10 +120,10 @@ void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp, { __ext4_warning(sb, function, line, "%s", msg); __ext4_warning(sb, function, line, - "MMP failure info: last update time: %llu, last update " - "node: %s, last update device: %s", - (long long unsigned int) le64_to_cpu(mmp->mmp_time), - mmp->mmp_nodename, mmp->mmp_bdevname); + "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s", + (unsigned long long)le64_to_cpu(mmp->mmp_time), + (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename, + (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname); } /* @@ -154,6 +154,7 @@ static int kmmpd(void *data) mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval, EXT4_MMP_MIN_CHECK_INTERVAL); mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval); + BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE); bdevname(bh->b_bdev, mmp->mmp_bdevname); memcpy(mmp->mmp_nodename, init_utsname()->nodename, @@ -375,7 +376,8 @@ skip: /* * Start a kernel thread to update the MMP block periodically. */ - EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%s", + EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, mmpd_data, "kmmpd-%.*s", + (int)sizeof(mmp->mmp_bdevname), bdevname(bh->b_bdev, mmp->mmp_bdevname)); if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 747912df3931..14a89af91558 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1280,8 +1280,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) int ext4_ci_compare(const struct inode *parent, const struct qstr *name, const struct qstr *entry, bool quick) { - const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb); - const struct unicode_map *um = sbi->s_encoding; + const struct super_block *sb = parent->i_sb; + const struct unicode_map *um = sb->s_encoding; int ret; if (quick) @@ -1293,7 +1293,7 @@ int ext4_ci_compare(const struct inode *parent, const struct qstr *name, /* Handle invalid character sequence as either an error * or as an opaque byte sequence. */ - if (ext4_has_strict_mode(sbi)) + if (sb_has_enc_strict_mode(sb)) return -EINVAL; if (name->len != entry->len) @@ -1310,7 +1310,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, { int len; - if (!IS_CASEFOLDED(dir) || !EXT4_SB(dir->i_sb)->s_encoding) { + if (!needs_casefold(dir)) { cf_name->name = NULL; return; } @@ -1319,7 +1319,7 @@ void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, if (!cf_name->name) return; - len = utf8_casefold(EXT4_SB(dir->i_sb)->s_encoding, + len = utf8_casefold(dir->i_sb->s_encoding, iname, cf_name->name, EXT4_NAME_LEN); if (len <= 0) { @@ -1356,7 +1356,7 @@ static inline bool ext4_match(const struct inode *parent, #endif #ifdef CONFIG_UNICODE - if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) { + if (needs_casefold(parent)) { if (fname->cf_name.name) { struct qstr cf = {.name = fname->cf_name.name, .len = fname->cf_name.len}; @@ -1505,6 +1505,7 @@ restart: /* * We deal with the read-ahead logic here. */ + cond_resched(); if (ra_ptr >= ra_max) { /* Refill the readahead buffer */ ra_ptr = 0; @@ -1607,6 +1608,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir, struct buffer_head *bh; err = ext4_fname_prepare_lookup(dir, dentry, &fname); + generic_set_encrypted_ci_d_ops(dir, dentry); if (err == -ENOENT) return NULL; if (err) @@ -2172,7 +2174,6 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, struct ext4_dir_entry_2 *de; struct ext4_dir_entry_tail *t; struct super_block *sb; - struct ext4_sb_info *sbi; struct ext4_filename fname; int retval; int dx_fallback=0; @@ -2184,14 +2185,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, csum_size = sizeof(struct ext4_dir_entry_tail); sb = dir->i_sb; - sbi = EXT4_SB(sb); blocksize = sb->s_blocksize; if (!dentry->d_name.len) return -EINVAL; #ifdef CONFIG_UNICODE - if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) && - sbi->s_encoding && utf8_validate(sbi->s_encoding, &dentry->d_name)) + if (sb_has_enc_strict_mode(sb) && IS_CASEFOLDED(dir) && + sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name)) return -EINVAL; #endif @@ -2213,6 +2213,13 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, retval = ext4_dx_add_entry(handle, &fname, dir, inode); if (!retval || (retval != ERR_BAD_DX_DIR)) goto out; + /* Can we just ignore htree data? */ + if (ext4_has_metadata_csum(sb)) { + EXT4_ERROR_INODE(dir, + "Directory has corrupted htree index."); + retval = -EFSCORRUPTED; + goto out; + } ext4_clear_inode_flag(dir, EXT4_INODE_INDEX); dx_fallback++; ext4_mark_inode_dirty(handle, dir); @@ -2412,7 +2419,7 @@ again: dxroot->info.indirect_levels += 1; dxtrace(printk(KERN_DEBUG "Creating %d level index...\n", - info->indirect_levels)); + dxroot->info.indirect_levels)); err = ext4_handle_dirty_dx_node(handle, dir, frame->bh); if (err) goto journal_error; @@ -2821,7 +2828,7 @@ bool ext4_empty_dir(struct inode *inode) { unsigned int offset; struct buffer_head *bh; - struct ext4_dir_entry_2 *de, *de1; + struct ext4_dir_entry_2 *de; struct super_block *sb; if (ext4_has_inline_data(inode)) { @@ -2846,19 +2853,25 @@ bool ext4_empty_dir(struct inode *inode) return true; de = (struct ext4_dir_entry_2 *) bh->b_data; - de1 = ext4_next_entry(de, sb->s_blocksize); - if (le32_to_cpu(de->inode) != inode->i_ino || - le32_to_cpu(de1->inode) == 0 || - strcmp(".", de->name) || strcmp("..", de1->name)) { - ext4_warning_inode(inode, "directory missing '.' and/or '..'"); + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, + 0) || + le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) { + ext4_warning_inode(inode, "directory missing '.'"); brelse(bh); return true; } - offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) + - ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize); - de = ext4_next_entry(de1, sb->s_blocksize); + offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); + de = ext4_next_entry(de, sb->s_blocksize); + if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, + offset) || + le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) { + ext4_warning_inode(inode, "directory missing '..'"); + brelse(bh); + return true; + } + offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); while (offset < inode->i_size) { - if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { + if (!(offset & (sb->s_blocksize - 1))) { unsigned int lblock; brelse(bh); lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb); @@ -2869,12 +2882,11 @@ bool ext4_empty_dir(struct inode *inode) } if (IS_ERR(bh)) return true; - de = (struct ext4_dir_entry_2 *) bh->b_data; } + de = (struct ext4_dir_entry_2 *) (bh->b_data + + (offset & (sb->s_blocksize - 1))); if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size, offset)) { - de = (struct ext4_dir_entry_2 *)(bh->b_data + - sb->s_blocksize); offset = (offset | (sb->s_blocksize - 1)) + 1; continue; } @@ -2883,7 +2895,6 @@ bool ext4_empty_dir(struct inode *inode) return false; } offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); - de = ext4_next_entry(de, sb->s_blocksize); } brelse(bh); return true; @@ -3195,18 +3206,17 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) if (IS_DIRSYNC(dir)) ext4_handle_sync(handle); - if (inode->i_nlink == 0) { - ext4_warning_inode(inode, "Deleting file '%.*s' with no links", - dentry->d_name.len, dentry->d_name.name); - set_nlink(inode, 1); - } retval = ext4_delete_entry(handle, dir, de, bh); if (retval) goto end_unlink; dir->i_ctime = dir->i_mtime = current_time(dir); ext4_update_dx_flag(dir); ext4_mark_inode_dirty(handle, dir); - drop_nlink(inode); + if (inode->i_nlink == 0) + ext4_warning_inode(inode, "Deleting file '%.*s' with no links", + dentry->d_name.len, dentry->d_name.name); + else + drop_nlink(inode); if (!inode->i_nlink) ext4_orphan_add(handle, inode); inode->i_ctime = current_time(inode); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index b028265a1fbb..e2fad25e3913 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -366,6 +366,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io, bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); if (!bio) return -ENOMEM; + fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); wbc_init_bio(io->io_wbc, bio); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio_set_dev(bio, bh->b_bdev); @@ -383,7 +384,8 @@ static int io_submit_add_bh(struct ext4_io_submit *io, { int ret; - if (io->io_bio && bh->b_blocknr != io->io_next_block) { + if (io->io_bio && (bh->b_blocknr != io->io_next_block || + !fscrypt_mergeable_bio_bh(io->io_bio, bh))) { submit_and_retry: ext4_io_submit(io); } @@ -469,20 +471,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io, bh = head = page_buffers(page); - if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode) && nr_to_submit) { + if (fscrypt_inode_uses_fs_layer_crypto(inode) && nr_to_submit) { gfp_t gfp_flags = GFP_NOFS; + /* + * Since bounce page allocation uses a mempool, we can only use + * a waiting mask (i.e. request guaranteed allocation) on the + * first page of the bio. Otherwise it can deadlock. + */ + if (io->io_bio) + gfp_flags = GFP_NOWAIT | __GFP_NOWARN; retry_encrypt: bounce_page = fscrypt_encrypt_pagecache_blocks(page, PAGE_SIZE, 0, gfp_flags); if (IS_ERR(bounce_page)) { ret = PTR_ERR(bounce_page); - if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { - if (io->io_bio) { + if (ret == -ENOMEM && + (io->io_bio || wbc->sync_mode == WB_SYNC_ALL)) { + gfp_flags = GFP_NOFS; + if (io->io_bio) ext4_io_submit(io); - congestion_wait(BLK_RW_ASYNC, HZ/50); - } - gfp_flags |= __GFP_NOFAIL; + else + gfp_flags |= __GFP_NOFAIL; + congestion_wait(BLK_RW_ASYNC, HZ/50); goto retry_encrypt; } bounce_page = NULL; diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index f74dead3fcc0..eb9c630cdbb7 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -198,7 +198,7 @@ static struct bio_post_read_ctx *get_bio_post_read_ctx(struct inode *inode, unsigned int post_read_steps = 0; struct bio_post_read_ctx *ctx = NULL; - if (IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode)) + if (fscrypt_inode_uses_fs_layer_crypto(inode)) post_read_steps |= 1 << STEP_DECRYPT; if (ext4_need_verity(inode, first_idx)) @@ -250,7 +250,7 @@ ext4_submit_bio_read(struct bio *bio) int ext4_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, - unsigned nr_pages) + unsigned nr_pages, bool is_readahead) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; @@ -259,6 +259,7 @@ int ext4_mpage_readpages(struct address_space *mapping, const unsigned blkbits = inode->i_blkbits; const unsigned blocks_per_page = PAGE_SIZE >> blkbits; const unsigned blocksize = 1 << blkbits; + sector_t next_block; sector_t block_in_file; sector_t last_block; sector_t last_block_in_file; @@ -290,7 +291,8 @@ int ext4_mpage_readpages(struct address_space *mapping, if (page_has_buffers(page)) goto confused; - block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); + block_in_file = next_block = + (sector_t)page->index << (PAGE_SHIFT - blkbits); last_block = block_in_file + nr_pages * blocks_per_page; last_block_in_file = (ext4_readpage_limit(inode) + blocksize - 1) >> blkbits; @@ -390,7 +392,8 @@ int ext4_mpage_readpages(struct address_space *mapping, * This page will go to BIO. Do we need to send this * BIO off first? */ - if (bio && (last_block_in_bio != blocks[0] - 1)) { + if (bio && (last_block_in_bio != blocks[0] - 1 || + !fscrypt_mergeable_bio(bio, inode, next_block))) { submit_and_realloc: ext4_submit_bio_read(bio); bio = NULL; @@ -402,6 +405,8 @@ int ext4_mpage_readpages(struct address_space *mapping, min_t(int, nr_pages, BIO_MAX_PAGES)); if (!bio) goto set_error_page; + fscrypt_set_bio_crypt_ctx(bio, inode, next_block, + GFP_KERNEL); ctx = get_bio_post_read_ctx(inode, bio, page->index); if (IS_ERR(ctx)) { bio_put(bio); @@ -412,7 +417,8 @@ int ext4_mpage_readpages(struct address_space *mapping, bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_end_io = mpage_end_io; bio->bi_private = ctx; - bio_set_op_attrs(bio, REQ_OP_READ, 0); + bio_set_op_attrs(bio, REQ_OP_READ, + is_readahead ? REQ_RAHEAD : 0); } length = first_hole << blkbits; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 4f7cd78d0364..d42f7471fd5b 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -17,6 +17,33 @@ #include "ext4_jbd2.h" +struct ext4_rcu_ptr { + struct rcu_head rcu; + void *ptr; +}; + +static void ext4_rcu_ptr_callback(struct rcu_head *head) +{ + struct ext4_rcu_ptr *ptr; + + ptr = container_of(head, struct ext4_rcu_ptr, rcu); + kvfree(ptr->ptr); + kfree(ptr); +} + +void ext4_kvfree_array_rcu(void *to_free) +{ + struct ext4_rcu_ptr *ptr = kzalloc(sizeof(*ptr), GFP_KERNEL); + + if (ptr) { + ptr->ptr = to_free; + call_rcu(&ptr->rcu, ext4_rcu_ptr_callback); + return; + } + synchronize_rcu(); + kvfree(to_free); +} + int ext4_resize_begin(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -545,8 +572,8 @@ static int setup_new_flex_group_blocks(struct super_block *sb, brelse(gdb); goto out; } - memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, - gdb->b_size); + memcpy(gdb->b_data, sbi_array_rcu_deref(sbi, + s_group_desc, j)->b_data, gdb->b_size); set_buffer_uptodate(gdb); err = ext4_handle_dirty_metadata(handle, NULL, gdb); @@ -854,13 +881,15 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, } brelse(dind); - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); le16_add_cpu(&es->s_reserved_gdt_blocks, -1); err = ext4_handle_dirty_super(handle, sb); @@ -904,9 +933,11 @@ static int add_new_gdb_meta_bg(struct super_block *sb, return err; } - o_group_desc = EXT4_SB(sb)->s_group_desc; + rcu_read_lock(); + o_group_desc = rcu_dereference(EXT4_SB(sb)->s_group_desc); memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); + rcu_read_unlock(); n_group_desc[gdb_num] = gdb_bh; BUFFER_TRACE(gdb_bh, "get_write_access"); @@ -917,9 +948,9 @@ static int add_new_gdb_meta_bg(struct super_block *sb, return err; } - EXT4_SB(sb)->s_group_desc = n_group_desc; + rcu_assign_pointer(EXT4_SB(sb)->s_group_desc, n_group_desc); EXT4_SB(sb)->s_gdb_count++; - kvfree(o_group_desc); + ext4_kvfree_array_rcu(o_group_desc); return err; } @@ -1183,7 +1214,8 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, * use non-sparse filesystems anymore. This is already checked above. */ if (gdb_off) { - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); BUFFER_TRACE(gdb_bh, "get_write_access"); err = ext4_journal_get_write_access(handle, gdb_bh); @@ -1265,7 +1297,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, /* * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). */ - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, gdb_num); /* Update group descriptor block for new group */ gdp = (struct ext4_group_desc *)(gdb_bh->b_data + gdb_off * EXT4_DESC_SIZE(sb)); @@ -1393,11 +1425,14 @@ static void ext4_update_super(struct super_block *sb, percpu_counter_read(&sbi->s_freeclusters_counter)); if (ext4_has_feature_flex_bg(sb) && sbi->s_log_groups_per_flex) { ext4_group_t flex_group; + struct flex_groups *fg; + flex_group = ext4_flex_group(sbi, group_data[0].group); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); atomic64_add(EXT4_NUM_B2C(sbi, free_blocks), - &sbi->s_flex_groups[flex_group].free_clusters); + &fg->free_clusters); atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, - &sbi->s_flex_groups[flex_group].free_inodes); + &fg->free_inodes); } /* @@ -1492,7 +1527,8 @@ exit_journal: for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; - gdb_bh = sbi->s_group_desc[gdb_num]; + gdb_bh = sbi_array_rcu_deref(sbi, s_group_desc, + gdb_num); if (old_gdb == gdb_bh->b_blocknr) continue; update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 029d6f54b089..649215f535a9 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -901,6 +901,8 @@ static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; + struct buffer_head **group_desc; + struct flex_groups **flex_groups; int aborted = 0; int i, err; @@ -932,15 +934,23 @@ static void ext4_put_super(struct super_block *sb) if (!sb_rdonly(sb)) ext4_commit_super(sb, 1); + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < sbi->s_gdb_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); - kvfree(sbi->s_flex_groups); + brelse(group_desc[i]); + kvfree(group_desc); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } + rcu_read_unlock(); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(get_qf_name(sb, sbi, i)); @@ -989,7 +999,7 @@ static void ext4_put_super(struct super_block *sb) kfree(sbi->s_blockgroup_lock); fs_put_dax(sbi->s_daxdev); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif kfree(sbi); } @@ -1285,6 +1295,23 @@ static bool ext4_dummy_context(struct inode *inode) return DUMMY_ENCRYPTION_ENABLED(EXT4_SB(inode->i_sb)); } +static bool ext4_has_stable_inodes(struct super_block *sb) +{ + return ext4_has_feature_stable_inodes(sb); +} + +static void ext4_get_ino_and_lblk_bits(struct super_block *sb, + int *ino_bits_ret, int *lblk_bits_ret) +{ + *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count); + *lblk_bits_ret = 8 * sizeof(ext4_lblk_t); +} + +static bool ext4_inline_crypt_enabled(struct super_block *sb) +{ + return test_opt(sb, INLINECRYPT); +} + static const struct fscrypt_operations ext4_cryptops = { .key_prefix = "ext4:", .get_context = ext4_get_context, @@ -1292,6 +1319,9 @@ static const struct fscrypt_operations ext4_cryptops = { .dummy_context = ext4_dummy_context, .empty_dir = ext4_empty_dir, .max_namelen = EXT4_NAME_LEN, + .has_stable_inodes = ext4_has_stable_inodes, + .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits, + .inline_crypt_enabled = ext4_inline_crypt_enabled, }; #endif @@ -1386,6 +1416,7 @@ enum { Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption, + Opt_inlinecrypt, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota, Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, @@ -1479,6 +1510,7 @@ static const match_table_t tokens = { {Opt_noinit_itable, "noinit_itable"}, {Opt_max_dir_size_kb, "max_dir_size_kb=%u"}, {Opt_test_dummy_encryption, "test_dummy_encryption"}, + {Opt_inlinecrypt, "inlinecrypt"}, {Opt_nombcache, "nombcache"}, {Opt_nombcache, "no_mbcache"}, /* for backward compatibility */ {Opt_removed, "check=none"}, /* mount option from ext2/3 */ @@ -1688,6 +1720,11 @@ static const struct mount_opts { {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, {Opt_max_dir_size_kb, 0, MOPT_GTE0}, {Opt_test_dummy_encryption, 0, MOPT_GTE0}, +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT + {Opt_inlinecrypt, EXT4_MOUNT_INLINECRYPT, MOPT_SET}, +#else + {Opt_inlinecrypt, EXT4_MOUNT_INLINECRYPT, MOPT_NOSUPPORT}, +#endif {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET}, {Opt_err, 0, 0} }; @@ -1814,6 +1851,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, arg = JBD2_DEFAULT_MAX_COMMIT_AGE; sbi->s_commit_interval = HZ * arg; } else if (token == Opt_debug_want_extra_isize) { + if ((arg & 1) || + (arg < 4) || + (arg > (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) { + ext4_msg(sb, KERN_ERR, + "Invalid want_extra_isize %d", arg); + return -1; + } sbi->s_want_extra_isize = arg; } else if (token == Opt_max_batch_time) { sbi->s_max_batch_time = arg; @@ -2252,8 +2296,8 @@ done: int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) { struct ext4_sb_info *sbi = EXT4_SB(sb); - struct flex_groups *new_groups; - int size; + struct flex_groups **old_groups, **new_groups; + int size, i, j; if (!sbi->s_log_groups_per_flex) return 0; @@ -2262,22 +2306,37 @@ int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup) if (size <= sbi->s_flex_groups_allocated) return 0; - size = roundup_pow_of_two(size * sizeof(struct flex_groups)); - new_groups = kvzalloc(size, GFP_KERNEL); + new_groups = kvzalloc(roundup_pow_of_two(size * + sizeof(*sbi->s_flex_groups)), GFP_KERNEL); if (!new_groups) { - ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups", - size / (int) sizeof(struct flex_groups)); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex group pointers", size); return -ENOMEM; } - - if (sbi->s_flex_groups) { - memcpy(new_groups, sbi->s_flex_groups, - (sbi->s_flex_groups_allocated * - sizeof(struct flex_groups))); - kvfree(sbi->s_flex_groups); + for (i = sbi->s_flex_groups_allocated; i < size; i++) { + new_groups[i] = kvzalloc(roundup_pow_of_two( + sizeof(struct flex_groups)), + GFP_KERNEL); + if (!new_groups[i]) { + for (j = sbi->s_flex_groups_allocated; j < i; j++) + kvfree(new_groups[j]); + kvfree(new_groups); + ext4_msg(sb, KERN_ERR, + "not enough memory for %d flex groups", size); + return -ENOMEM; + } } - sbi->s_flex_groups = new_groups; - sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups); + rcu_read_lock(); + old_groups = rcu_dereference(sbi->s_flex_groups); + if (old_groups) + memcpy(new_groups, old_groups, + (sbi->s_flex_groups_allocated * + sizeof(struct flex_groups *))); + rcu_read_unlock(); + rcu_assign_pointer(sbi->s_flex_groups, new_groups); + sbi->s_flex_groups_allocated = size; + if (old_groups) + ext4_kvfree_array_rcu(old_groups); return 0; } @@ -2285,6 +2344,7 @@ static int ext4_fill_flex_info(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_group_desc *gdp = NULL; + struct flex_groups *fg; ext4_group_t flex_group; int i, err; @@ -2302,12 +2362,11 @@ static int ext4_fill_flex_info(struct super_block *sb) gdp = ext4_get_group_desc(sb, i, NULL); flex_group = ext4_flex_group(sbi, i); - atomic_add(ext4_free_inodes_count(sb, gdp), - &sbi->s_flex_groups[flex_group].free_inodes); + fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group); + atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes); atomic64_add(ext4_free_group_clusters(sb, gdp), - &sbi->s_flex_groups[flex_group].free_clusters); - atomic_add(ext4_used_dirs_count(sb, gdp), - &sbi->s_flex_groups[flex_group].used_dirs); + &fg->free_clusters); + atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs); } return 1; @@ -2897,17 +2956,11 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#ifndef CONFIG_QUOTA - if (ext4_has_feature_quota(sb) && !readonly) { +#if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2) + if (!readonly && (ext4_has_feature_quota(sb) || + ext4_has_feature_project(sb))) { ext4_msg(sb, KERN_ERR, - "Filesystem with quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); - return 0; - } - if (ext4_has_feature_project(sb) && !readonly) { - ext4_msg(sb, KERN_ERR, - "Filesystem with project quota feature cannot be mounted RDWR " - "without CONFIG_QUOTA"); + "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2"); return 0; } #endif /* CONFIG_QUOTA */ @@ -3495,37 +3548,6 @@ int ext4_calculate_overhead(struct super_block *sb) return 0; } -static void ext4_clamp_want_extra_isize(struct super_block *sb) -{ - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct ext4_super_block *es = sbi->s_es; - - /* determine the minimum size of new large inodes, if present */ - if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE && - sbi->s_want_extra_isize == 0) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; - if (ext4_has_feature_extra_isize(sb)) { - if (sbi->s_want_extra_isize < - le16_to_cpu(es->s_want_extra_isize)) - sbi->s_want_extra_isize = - le16_to_cpu(es->s_want_extra_isize); - if (sbi->s_want_extra_isize < - le16_to_cpu(es->s_min_extra_isize)) - sbi->s_want_extra_isize = - le16_to_cpu(es->s_min_extra_isize); - } - } - /* Check if enough inode space is available */ - if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize > - sbi->s_inode_size) { - sbi->s_want_extra_isize = sizeof(struct ext4_inode) - - EXT4_GOOD_OLD_INODE_SIZE; - ext4_msg(sb, KERN_INFO, - "required extra inode space not available"); - } -} - static void ext4_set_resv_clusters(struct super_block *sb) { ext4_fsblk_t resv_clusters; @@ -3560,9 +3582,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) { struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev); char *orig_data = kstrdup(data, GFP_KERNEL); - struct buffer_head *bh; + struct buffer_head *bh, **group_desc; struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + struct flex_groups **flex_groups; ext4_fsblk_t block; ext4_fsblk_t sb_block = get_sb_block(&data); ext4_fsblk_t logical_sb_block; @@ -3733,6 +3756,75 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) */ sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; + blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + if (blocksize < EXT4_MIN_BLOCK_SIZE || + blocksize > EXT4_MAX_BLOCK_SIZE) { + ext4_msg(sb, KERN_ERR, + "Unsupported filesystem blocksize %d (%d log_block_size)", + blocksize, le32_to_cpu(es->s_log_block_size)); + goto failed_mount; + } + + if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { + sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; + sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; + } else { + sbi->s_inode_size = le16_to_cpu(es->s_inode_size); + sbi->s_first_ino = le32_to_cpu(es->s_first_ino); + if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) { + ext4_msg(sb, KERN_ERR, "invalid first ino: %u", + sbi->s_first_ino); + goto failed_mount; + } + if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || + (!is_power_of_2(sbi->s_inode_size)) || + (sbi->s_inode_size > blocksize)) { + ext4_msg(sb, KERN_ERR, + "unsupported inode size: %d", + sbi->s_inode_size); + ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize); + goto failed_mount; + } + /* + * i_atime_extra is the last extra field available for + * [acm]times in struct ext4_inode. Checking for that + * field should suffice to ensure we have extra space + * for all three. + */ + if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) + + sizeof(((struct ext4_inode *)0)->i_atime_extra)) { + sb->s_time_gran = 1; + } else { + sb->s_time_gran = NSEC_PER_SEC; + } + } + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { + sbi->s_want_extra_isize = sizeof(struct ext4_inode) - + EXT4_GOOD_OLD_INODE_SIZE; + if (ext4_has_feature_extra_isize(sb)) { + unsigned v, max = (sbi->s_inode_size - + EXT4_GOOD_OLD_INODE_SIZE); + + v = le16_to_cpu(es->s_want_extra_isize); + if (v > max) { + ext4_msg(sb, KERN_ERR, + "bad s_want_extra_isize: %d", v); + goto failed_mount; + } + if (sbi->s_want_extra_isize < v) + sbi->s_want_extra_isize = v; + + v = le16_to_cpu(es->s_min_extra_isize); + if (v > max) { + ext4_msg(sb, KERN_ERR, + "bad s_min_extra_isize: %d", v); + goto failed_mount; + } + if (sbi->s_want_extra_isize < v) + sbi->s_want_extra_isize = v; + } + } + if (sbi->s_es->s_mount_opts[0]) { char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts, sizeof(sbi->s_es->s_mount_opts), @@ -3753,7 +3845,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; #ifdef CONFIG_UNICODE - if (ext4_has_feature_casefold(sb) && !sbi->s_encoding) { + if (ext4_has_feature_casefold(sb) && !sb->s_encoding) { const struct ext4_sb_encodings *encoding_info; struct unicode_map *encoding; __u16 encoding_flags; @@ -3784,8 +3876,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "%s-%s with flags 0x%hx", encoding_info->name, encoding_info->version?:"\b", encoding_flags); - sbi->s_encoding = encoding; - sbi->s_encoding_flags = encoding_flags; + sb->s_encoding = encoding; + sb->s_encoding_flags = encoding_flags; } #endif @@ -3891,14 +3983,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) goto failed_mount; - blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); - if (blocksize < EXT4_MIN_BLOCK_SIZE || - blocksize > EXT4_MAX_BLOCK_SIZE) { - ext4_msg(sb, KERN_ERR, - "Unsupported filesystem blocksize %d (%d log_block_size)", - blocksize, le32_to_cpu(es->s_log_block_size)); - goto failed_mount; - } if (le32_to_cpu(es->s_log_block_size) > (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { ext4_msg(sb, KERN_ERR, @@ -3968,29 +4052,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) has_huge_files); sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); - if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { - sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; - sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO; - } else { - sbi->s_inode_size = le16_to_cpu(es->s_inode_size); - sbi->s_first_ino = le32_to_cpu(es->s_first_ino); - if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) { - ext4_msg(sb, KERN_ERR, "invalid first ino: %u", - sbi->s_first_ino); - goto failed_mount; - } - if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || - (!is_power_of_2(sbi->s_inode_size)) || - (sbi->s_inode_size > blocksize)) { - ext4_msg(sb, KERN_ERR, - "unsupported inode size: %d", - sbi->s_inode_size); - goto failed_mount; - } - if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) - sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2); - } - sbi->s_desc_size = le16_to_cpu(es->s_desc_size); if (ext4_has_feature_64bit(sb)) { if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT || @@ -4174,9 +4235,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } } - sbi->s_group_desc = kvmalloc(db_count * + rcu_assign_pointer(sbi->s_group_desc, + kvmalloc_array(db_count, sizeof(struct buffer_head *), - GFP_KERNEL); + GFP_KERNEL)); if (sbi->s_group_desc == NULL) { ext4_msg(sb, KERN_ERR, "not enough memory"); ret = -ENOMEM; @@ -4192,14 +4254,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } for (i = 0; i < db_count; i++) { + struct buffer_head *bh; + block = descriptor_loc(sb, logical_sb_block, i); - sbi->s_group_desc[i] = sb_bread_unmovable(sb, block); - if (!sbi->s_group_desc[i]) { + bh = sb_bread_unmovable(sb, block); + if (!bh) { ext4_msg(sb, KERN_ERR, "can't read group descriptor %d", i); db_count = i; goto failed_mount2; } + rcu_read_lock(); + rcu_dereference(sbi->s_group_desc)[i] = bh; + rcu_read_unlock(); } sbi->s_gdb_count = db_count; if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { @@ -4430,11 +4497,6 @@ no_journal: goto failed_mount4; } -#ifdef CONFIG_UNICODE - if (sbi->s_encoding) - sb->s_d_op = &ext4_dentry_ops; -#endif - sb->s_root = d_make_root(root); if (!sb->s_root) { ext4_msg(sb, KERN_ERR, "get root dentry failed"); @@ -4445,8 +4507,6 @@ no_journal: if (ext4_setup_super(sb, es, sb_rdonly(sb))) sb->s_flags |= MS_RDONLY; - ext4_clamp_want_extra_isize(sb); - ext4_set_resv_clusters(sb); err = ext4_setup_system_zone(sb); @@ -4484,7 +4544,7 @@ no_journal: err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, GFP_KERNEL); if (!err) - err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem); + err = percpu_init_rwsem(&sbi->s_writepages_rwsem); if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); @@ -4572,13 +4632,19 @@ failed_mount7: ext4_unregister_li_request(sb); failed_mount6: ext4_mb_release(sb); - if (sbi->s_flex_groups) - kvfree(sbi->s_flex_groups); + rcu_read_lock(); + flex_groups = rcu_dereference(sbi->s_flex_groups); + if (flex_groups) { + for (i = 0; i < sbi->s_flex_groups_allocated; i++) + kvfree(flex_groups[i]); + kvfree(flex_groups); + } + rcu_read_unlock(); percpu_counter_destroy(&sbi->s_freeclusters_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); - percpu_free_rwsem(&sbi->s_journal_flag_rwsem); + percpu_free_rwsem(&sbi->s_writepages_rwsem); failed_mount5: ext4_ext_release(sb); ext4_release_system_zone(sb); @@ -4609,15 +4675,18 @@ failed_mount3: if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); for (i = 0; i < db_count; i++) - brelse(sbi->s_group_desc[i]); - kvfree(sbi->s_group_desc); + brelse(group_desc[i]); + kvfree(group_desc); + rcu_read_unlock(); failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif #ifdef CONFIG_QUOTA @@ -5235,8 +5304,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } - ext4_clamp_want_extra_isize(sb); - if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^ test_opt(sb, JOURNAL_CHECKSUM)) { ext4_msg(sb, KERN_ERR, "changing journal_checksum " diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index d0d8a9795dd6..bd717248a4bc 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -342,12 +342,57 @@ static int ext4_get_verity_descriptor(struct inode *inode, void *buf, return desc_size; } -static struct page *ext4_read_merkle_tree_page(struct inode *inode, - pgoff_t index) +/* + * Prefetch some pages from the file's Merkle tree. + * + * This is basically a stripped-down version of __do_page_cache_readahead() + * which works on pages past i_size. + */ +static void ext4_merkle_tree_readahead(struct address_space *mapping, + pgoff_t start_index, unsigned long count) { + LIST_HEAD(pages); + unsigned int nr_pages = 0; + struct page *page; + pgoff_t index; + struct blk_plug plug; + + for (index = start_index; index < start_index + count; index++) { + rcu_read_lock(); + page = radix_tree_lookup(&mapping->page_tree, index); + rcu_read_unlock(); + if (!page || radix_tree_exceptional_entry(page)) { + page = __page_cache_alloc(readahead_gfp_mask(mapping)); + if (!page) + break; + page->index = index; + list_add(&page->lru, &pages); + nr_pages++; + } + } + blk_start_plug(&plug); + ext4_mpage_readpages(mapping, &pages, NULL, nr_pages, true); + blk_finish_plug(&plug); +} + +static struct page *ext4_read_merkle_tree_page(struct inode *inode, + pgoff_t index, + unsigned long num_ra_pages) +{ + struct page *page; + index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; - return read_mapping_page(inode->i_mapping, index, NULL); + page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { + if (page) + put_page(page); + else if (num_ra_pages > 1) + ext4_merkle_tree_readahead(inode->i_mapping, index, + num_ra_pages); + page = read_mapping_page(inode->i_mapping, index, NULL); + } + return page; } static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf, diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index d4519801745e..8f2019d17f34 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -5,6 +5,7 @@ config F2FS_FS select CRYPTO select CRYPTO_CRC32 select F2FS_FS_XATTR if FS_ENCRYPTION + select FS_ENCRYPTION_ALGS if FS_ENCRYPTION help F2FS is based on Log-structured File System (LFS), which supports versatile "flash-friendly" features. The design has been focused on @@ -20,7 +21,7 @@ config F2FS_FS config F2FS_STAT_FS bool "F2FS Status Information" - depends on F2FS_FS && DEBUG_FS + depends on F2FS_FS default y help /sys/kernel/debug/f2fs/ contains information about all the partitions @@ -104,3 +105,28 @@ config F2FS_FAULT_INJECTION Test F2FS to inject faults such as ENOMEM, ENOSPC, and so on. If unsure, say N. + +config F2FS_FS_COMPRESSION + bool "F2FS compression feature" + depends on F2FS_FS + help + Enable filesystem-level compression on f2fs regular files, + multiple back-end compression algorithms are supported. + +config F2FS_FS_LZO + bool "LZO compression support" + depends on F2FS_FS_COMPRESSION + select LZO_COMPRESS + select LZO_DECOMPRESS + default y + help + Support LZO compress algorithm, if unsure, say Y. + +config F2FS_FS_LZ4 + bool "LZ4 compression support" + depends on F2FS_FS_COMPRESSION + select LZ4_COMPRESS + select LZ4_DECOMPRESS + default y + help + Support LZ4 compress algorithm, if unsure, say Y. diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile index 2aaecc63834f..ee7316b42f69 100644 --- a/fs/f2fs/Makefile +++ b/fs/f2fs/Makefile @@ -9,3 +9,4 @@ f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o f2fs-$(CONFIG_FS_VERITY) += verity.o +f2fs-$(CONFIG_F2FS_FS_COMPRESSION) += compress.o diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index a28eac29234d..0cbd5e7b13f9 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -581,7 +581,7 @@ int f2fs_acquire_orphan_inode(struct f2fs_sb_info *sbi) if (time_to_inject(sbi, FAULT_ORPHAN)) { spin_unlock(&im->ino_lock); - f2fs_show_injection_info(FAULT_ORPHAN); + f2fs_show_injection_info(sbi, FAULT_ORPHAN); return -ENOSPC; } @@ -1509,10 +1509,10 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_wait_on_all_pages_writeback(sbi); /* - * invalidate intermediate page cache borrowed from meta inode - * which are used for migration of encrypted inode's blocks. + * invalidate intermediate page cache borrowed from meta inode which are + * used for migration of encrypted or verity inode's blocks. */ - if (f2fs_sb_has_encrypt(sbi)) + if (f2fs_sb_has_encrypt(sbi) || f2fs_sb_has_verity(sbi)) invalidate_mapping_pages(META_MAPPING(sbi), MAIN_BLKADDR(sbi), MAX_BLKADDR(sbi) - 1); diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c new file mode 100644 index 000000000000..f335635bb1aa --- /dev/null +++ b/fs/f2fs/compress.c @@ -0,0 +1,1182 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * f2fs compress support + * + * Copyright (c) 2019 Chao Yu + */ + +#include +#include +#include +#include +#include +#include + +#include "f2fs.h" +#include "node.h" +#include + +/* Some architectures don't have PAGE_KERNEL_RO */ +#ifndef PAGE_KERNEL_RO +#define PAGE_KERNEL_RO PAGE_KERNEL +#endif + +struct f2fs_compress_ops { + int (*init_compress_ctx)(struct compress_ctx *cc); + void (*destroy_compress_ctx)(struct compress_ctx *cc); + int (*compress_pages)(struct compress_ctx *cc); + int (*decompress_pages)(struct decompress_io_ctx *dic); +}; + +static unsigned int offset_in_cluster(struct compress_ctx *cc, pgoff_t index) +{ + return index & (cc->cluster_size - 1); +} + +static pgoff_t cluster_idx(struct compress_ctx *cc, pgoff_t index) +{ + return index >> cc->log_cluster_size; +} + +static pgoff_t start_idx_of_cluster(struct compress_ctx *cc) +{ + return cc->cluster_idx << cc->log_cluster_size; +} + +bool f2fs_is_compressed_page(struct page *page) +{ + if (!PagePrivate(page)) + return false; + if (!page_private(page)) + return false; + if (IS_ATOMIC_WRITTEN_PAGE(page) || IS_DUMMY_WRITTEN_PAGE(page)) + return false; + f2fs_bug_on(F2FS_M_SB(page->mapping), + *((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC); + return true; +} + +static void f2fs_set_compressed_page(struct page *page, + struct inode *inode, pgoff_t index, void *data, refcount_t *r) +{ + SetPagePrivate(page); + set_page_private(page, (unsigned long)data); + + /* i_crypto_info and iv index */ + page->index = index; + page->mapping = inode->i_mapping; + if (r) + refcount_inc(r); +} + +static void f2fs_put_compressed_page(struct page *page) +{ + set_page_private(page, (unsigned long)NULL); + ClearPagePrivate(page); + page->mapping = NULL; + unlock_page(page); + put_page(page); +} + +static void f2fs_drop_rpages(struct compress_ctx *cc, int len, bool unlock) +{ + int i; + + for (i = 0; i < len; i++) { + if (!cc->rpages[i]) + continue; + if (unlock) + unlock_page(cc->rpages[i]); + else + put_page(cc->rpages[i]); + } +} + +static void f2fs_put_rpages(struct compress_ctx *cc) +{ + f2fs_drop_rpages(cc, cc->cluster_size, false); +} + +static void f2fs_unlock_rpages(struct compress_ctx *cc, int len) +{ + f2fs_drop_rpages(cc, len, true); +} + +static void f2fs_put_rpages_mapping(struct compress_ctx *cc, + struct address_space *mapping, + pgoff_t start, int len) +{ + int i; + + for (i = 0; i < len; i++) { + struct page *page = find_get_page(mapping, start + i); + + put_page(page); + put_page(page); + } +} + +static void f2fs_put_rpages_wbc(struct compress_ctx *cc, + struct writeback_control *wbc, bool redirty, int unlock) +{ + unsigned int i; + + for (i = 0; i < cc->cluster_size; i++) { + if (!cc->rpages[i]) + continue; + if (redirty) + redirty_page_for_writepage(wbc, cc->rpages[i]); + f2fs_put_page(cc->rpages[i], unlock); + } +} + +struct page *f2fs_compress_control_page(struct page *page) +{ + return ((struct compress_io_ctx *)page_private(page))->rpages[0]; +} + +int f2fs_init_compress_ctx(struct compress_ctx *cc) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + + if (cc->nr_rpages) + return 0; + + cc->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) << + cc->log_cluster_size, GFP_NOFS); + return cc->rpages ? 0 : -ENOMEM; +} + +void f2fs_destroy_compress_ctx(struct compress_ctx *cc) +{ + kfree(cc->rpages); + cc->rpages = NULL; + cc->nr_rpages = 0; + cc->nr_cpages = 0; + cc->cluster_idx = NULL_CLUSTER; +} + +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page) +{ + unsigned int cluster_ofs; + + if (!f2fs_cluster_can_merge_page(cc, page->index)) + f2fs_bug_on(F2FS_I_SB(cc->inode), 1); + + cluster_ofs = offset_in_cluster(cc, page->index); + cc->rpages[cluster_ofs] = page; + cc->nr_rpages++; + cc->cluster_idx = cluster_idx(cc, page->index); +} + +#ifdef CONFIG_F2FS_FS_LZO +static int lzo_init_compress_ctx(struct compress_ctx *cc) +{ + cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode), + LZO1X_MEM_COMPRESS, GFP_NOFS); + if (!cc->private) + return -ENOMEM; + + cc->clen = lzo1x_worst_compress(PAGE_SIZE << cc->log_cluster_size); + return 0; +} + +static void lzo_destroy_compress_ctx(struct compress_ctx *cc) +{ + kvfree(cc->private); + cc->private = NULL; +} + +static int lzo_compress_pages(struct compress_ctx *cc) +{ + int ret; + + ret = lzo1x_1_compress(cc->rbuf, cc->rlen, cc->cbuf->cdata, + &cc->clen, cc->private); + if (ret != LZO_E_OK) { + printk_ratelimited("%sF2FS-fs (%s): lzo compress failed, ret:%d\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, ret); + return -EIO; + } + return 0; +} + +static int lzo_decompress_pages(struct decompress_io_ctx *dic) +{ + int ret; + + ret = lzo1x_decompress_safe(dic->cbuf->cdata, dic->clen, + dic->rbuf, &dic->rlen); + if (ret != LZO_E_OK) { + printk_ratelimited("%sF2FS-fs (%s): lzo decompress failed, ret:%d\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret); + return -EIO; + } + + if (dic->rlen != PAGE_SIZE << dic->log_cluster_size) { + printk_ratelimited("%sF2FS-fs (%s): lzo invalid rlen:%zu, " + "expected:%lu\n", KERN_ERR, + F2FS_I_SB(dic->inode)->sb->s_id, + dic->rlen, + PAGE_SIZE << dic->log_cluster_size); + return -EIO; + } + return 0; +} + +static const struct f2fs_compress_ops f2fs_lzo_ops = { + .init_compress_ctx = lzo_init_compress_ctx, + .destroy_compress_ctx = lzo_destroy_compress_ctx, + .compress_pages = lzo_compress_pages, + .decompress_pages = lzo_decompress_pages, +}; +#endif + +#ifdef CONFIG_F2FS_FS_LZ4 +static int lz4_init_compress_ctx(struct compress_ctx *cc) +{ + cc->private = f2fs_kvmalloc(F2FS_I_SB(cc->inode), + LZ4_MEM_COMPRESS, GFP_NOFS); + if (!cc->private) + return -ENOMEM; + + cc->clen = LZ4_compressBound(PAGE_SIZE << cc->log_cluster_size); + return 0; +} + +static void lz4_destroy_compress_ctx(struct compress_ctx *cc) +{ + kvfree(cc->private); + cc->private = NULL; +} + +static int lz4_compress_pages(struct compress_ctx *cc) +{ + int len; + + len = LZ4_compress_default(cc->rbuf, cc->cbuf->cdata, cc->rlen, + cc->clen, cc->private); + if (!len) { + printk_ratelimited("%sF2FS-fs (%s): lz4 compress failed\n", + KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id); + return -EIO; + } + cc->clen = len; + return 0; +} + +static int lz4_decompress_pages(struct decompress_io_ctx *dic) +{ + int ret; + + ret = LZ4_decompress_safe(dic->cbuf->cdata, dic->rbuf, + dic->clen, dic->rlen); + if (ret < 0) { + printk_ratelimited("%sF2FS-fs (%s): lz4 decompress failed, ret:%d\n", + KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, ret); + return -EIO; + } + + if (ret != PAGE_SIZE << dic->log_cluster_size) { + printk_ratelimited("%sF2FS-fs (%s): lz4 invalid rlen:%zu, " + "expected:%lu\n", KERN_ERR, + F2FS_I_SB(dic->inode)->sb->s_id, + dic->rlen, + PAGE_SIZE << dic->log_cluster_size); + return -EIO; + } + return 0; +} + +static const struct f2fs_compress_ops f2fs_lz4_ops = { + .init_compress_ctx = lz4_init_compress_ctx, + .destroy_compress_ctx = lz4_destroy_compress_ctx, + .compress_pages = lz4_compress_pages, + .decompress_pages = lz4_decompress_pages, +}; +#endif + +static const struct f2fs_compress_ops *f2fs_cops[COMPRESS_MAX] = { +#ifdef CONFIG_F2FS_FS_LZO + &f2fs_lzo_ops, +#else + NULL, +#endif +#ifdef CONFIG_F2FS_FS_LZ4 + &f2fs_lz4_ops, +#else + NULL, +#endif +}; + +bool f2fs_is_compress_backend_ready(struct inode *inode) +{ + if (!f2fs_compressed_file(inode)) + return true; + return f2fs_cops[F2FS_I(inode)->i_compress_algorithm]; +} + +static struct page *f2fs_grab_page(void) +{ + struct page *page; + + page = alloc_page(GFP_NOFS); + if (!page) + return NULL; + lock_page(page); + return page; +} + +static int f2fs_compress_pages(struct compress_ctx *cc) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + struct f2fs_inode_info *fi = F2FS_I(cc->inode); + const struct f2fs_compress_ops *cops = + f2fs_cops[fi->i_compress_algorithm]; + unsigned int max_len, nr_cpages; + int i, ret; + + trace_f2fs_compress_pages_start(cc->inode, cc->cluster_idx, + cc->cluster_size, fi->i_compress_algorithm); + + ret = cops->init_compress_ctx(cc); + if (ret) + goto out; + + max_len = COMPRESS_HEADER_SIZE + cc->clen; + cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE); + + cc->cpages = f2fs_kzalloc(sbi, sizeof(struct page *) * + cc->nr_cpages, GFP_NOFS); + if (!cc->cpages) { + ret = -ENOMEM; + goto destroy_compress_ctx; + } + + for (i = 0; i < cc->nr_cpages; i++) { + cc->cpages[i] = f2fs_grab_page(); + if (!cc->cpages[i]) { + ret = -ENOMEM; + goto out_free_cpages; + } + } + + cc->rbuf = vmap(cc->rpages, cc->cluster_size, VM_MAP, PAGE_KERNEL_RO); + if (!cc->rbuf) { + ret = -ENOMEM; + goto out_free_cpages; + } + + cc->cbuf = vmap(cc->cpages, cc->nr_cpages, VM_MAP, PAGE_KERNEL); + if (!cc->cbuf) { + ret = -ENOMEM; + goto out_vunmap_rbuf; + } + + ret = cops->compress_pages(cc); + if (ret) + goto out_vunmap_cbuf; + + max_len = PAGE_SIZE * (cc->cluster_size - 1) - COMPRESS_HEADER_SIZE; + + if (cc->clen > max_len) { + ret = -EAGAIN; + goto out_vunmap_cbuf; + } + + cc->cbuf->clen = cpu_to_le32(cc->clen); + cc->cbuf->chksum = cpu_to_le32(0); + + for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++) + cc->cbuf->reserved[i] = cpu_to_le32(0); + + vunmap(cc->cbuf); + vunmap(cc->rbuf); + + nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE); + + for (i = nr_cpages; i < cc->nr_cpages; i++) { + f2fs_put_compressed_page(cc->cpages[i]); + cc->cpages[i] = NULL; + } + + cc->nr_cpages = nr_cpages; + + trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, + cc->clen, ret); + return 0; + +out_vunmap_cbuf: + vunmap(cc->cbuf); +out_vunmap_rbuf: + vunmap(cc->rbuf); +out_free_cpages: + for (i = 0; i < cc->nr_cpages; i++) { + if (cc->cpages[i]) + f2fs_put_compressed_page(cc->cpages[i]); + } + kfree(cc->cpages); + cc->cpages = NULL; +destroy_compress_ctx: + cops->destroy_compress_ctx(cc); +out: + trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, + cc->clen, ret); + return ret; +} + +void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity) +{ + struct decompress_io_ctx *dic = + (struct decompress_io_ctx *)page_private(page); + struct f2fs_sb_info *sbi = F2FS_I_SB(dic->inode); + struct f2fs_inode_info *fi= F2FS_I(dic->inode); + const struct f2fs_compress_ops *cops = + f2fs_cops[fi->i_compress_algorithm]; + int ret; + + dec_page_count(sbi, F2FS_RD_DATA); + + if (bio->bi_status || PageError(page)) + dic->failed = true; + + if (refcount_dec_not_one(&dic->ref)) + return; + + trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx, + dic->cluster_size, fi->i_compress_algorithm); + + /* submit partial compressed pages */ + if (dic->failed) { + ret = -EIO; + goto out_free_dic; + } + + dic->rbuf = vmap(dic->tpages, dic->cluster_size, VM_MAP, PAGE_KERNEL); + if (!dic->rbuf) { + ret = -ENOMEM; + goto out_free_dic; + } + + dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO); + if (!dic->cbuf) { + ret = -ENOMEM; + goto out_vunmap_rbuf; + } + + dic->clen = le32_to_cpu(dic->cbuf->clen); + dic->rlen = PAGE_SIZE << dic->log_cluster_size; + + if (dic->clen > PAGE_SIZE * dic->nr_cpages - COMPRESS_HEADER_SIZE) { + ret = -EFSCORRUPTED; + goto out_vunmap_cbuf; + } + + ret = cops->decompress_pages(dic); + +out_vunmap_cbuf: + vunmap(dic->cbuf); +out_vunmap_rbuf: + vunmap(dic->rbuf); +out_free_dic: + if (!verity) + f2fs_decompress_end_io(dic->rpages, dic->cluster_size, + ret, false); + + trace_f2fs_decompress_pages_end(dic->inode, dic->cluster_idx, + dic->clen, ret); + if (!verity) + f2fs_free_dic(dic); +} + +static bool is_page_in_cluster(struct compress_ctx *cc, pgoff_t index) +{ + if (cc->cluster_idx == NULL_CLUSTER) + return true; + return cc->cluster_idx == cluster_idx(cc, index); +} + +bool f2fs_cluster_is_empty(struct compress_ctx *cc) +{ + return cc->nr_rpages == 0; +} + +static bool f2fs_cluster_is_full(struct compress_ctx *cc) +{ + return cc->cluster_size == cc->nr_rpages; +} + +bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index) +{ + if (f2fs_cluster_is_empty(cc)) + return true; + return is_page_in_cluster(cc, index); +} + +static bool __cluster_may_compress(struct compress_ctx *cc) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + loff_t i_size = i_size_read(cc->inode); + unsigned nr_pages = DIV_ROUND_UP(i_size, PAGE_SIZE); + int i; + + for (i = 0; i < cc->cluster_size; i++) { + struct page *page = cc->rpages[i]; + + f2fs_bug_on(sbi, !page); + + if (unlikely(f2fs_cp_error(sbi))) + return false; + if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) + return false; + + /* beyond EOF */ + if (page->index >= nr_pages) + return false; + } + return true; +} + +/* return # of compressed block addresses */ +static int f2fs_compressed_blocks(struct compress_ctx *cc) +{ + struct dnode_of_data dn; + int ret; + + set_new_dnode(&dn, cc->inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, start_idx_of_cluster(cc), + LOOKUP_NODE); + if (ret) { + if (ret == -ENOENT) + ret = 0; + goto fail; + } + + if (dn.data_blkaddr == COMPRESS_ADDR) { + int i; + + ret = 1; + for (i = 1; i < cc->cluster_size; i++) { + block_t blkaddr; + + blkaddr = datablock_addr(dn.inode, + dn.node_page, dn.ofs_in_node + i); + if (blkaddr != NULL_ADDR) + ret++; + } + } +fail: + f2fs_put_dnode(&dn); + return ret; +} + +int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index) +{ + struct compress_ctx cc = { + .inode = inode, + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size, + }; + + return f2fs_compressed_blocks(&cc); +} + +static bool cluster_may_compress(struct compress_ctx *cc) +{ + if (!f2fs_compressed_file(cc->inode)) + return false; + if (f2fs_is_atomic_file(cc->inode)) + return false; + if (f2fs_is_mmap_file(cc->inode)) + return false; + if (!f2fs_cluster_is_full(cc)) + return false; + return __cluster_may_compress(cc); +} + +static void set_cluster_writeback(struct compress_ctx *cc) +{ + int i; + + for (i = 0; i < cc->cluster_size; i++) { + if (cc->rpages[i]) + set_page_writeback(cc->rpages[i]); + } +} + +static void set_cluster_dirty(struct compress_ctx *cc) +{ + int i; + + for (i = 0; i < cc->cluster_size; i++) + if (cc->rpages[i]) + set_page_dirty(cc->rpages[i]); +} + +static int prepare_compress_overwrite(struct compress_ctx *cc, + struct page **pagep, pgoff_t index, void **fsdata) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + struct address_space *mapping = cc->inode->i_mapping; + struct page *page; + struct dnode_of_data dn; + sector_t last_block_in_bio; + unsigned fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT; + pgoff_t start_idx = start_idx_of_cluster(cc); + int i, ret; + bool prealloc; + +retry: + ret = f2fs_compressed_blocks(cc); + if (ret <= 0) + return ret; + + /* compressed case */ + prealloc = (ret < cc->cluster_size); + + ret = f2fs_init_compress_ctx(cc); + if (ret) + return ret; + + /* keep page reference to avoid page reclaim */ + for (i = 0; i < cc->cluster_size; i++) { + page = f2fs_pagecache_get_page(mapping, start_idx + i, + fgp_flag, GFP_NOFS); + if (!page) { + ret = -ENOMEM; + goto unlock_pages; + } + + if (PageUptodate(page)) + unlock_page(page); + else + f2fs_compress_ctx_add_page(cc, page); + } + + if (!f2fs_cluster_is_empty(cc)) { + struct bio *bio = NULL; + + ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size, + &last_block_in_bio, false); + f2fs_destroy_compress_ctx(cc); + if (ret) + goto release_pages; + if (bio) + f2fs_submit_bio(sbi, bio, DATA); + + ret = f2fs_init_compress_ctx(cc); + if (ret) + goto release_pages; + } + + for (i = 0; i < cc->cluster_size; i++) { + f2fs_bug_on(sbi, cc->rpages[i]); + + page = find_lock_page(mapping, start_idx + i); + f2fs_bug_on(sbi, !page); + + f2fs_wait_on_page_writeback(page, DATA, true, true); + + f2fs_compress_ctx_add_page(cc, page); + f2fs_put_page(page, 0); + + if (!PageUptodate(page)) { + f2fs_unlock_rpages(cc, i + 1); + f2fs_put_rpages_mapping(cc, mapping, start_idx, + cc->cluster_size); + f2fs_destroy_compress_ctx(cc); + goto retry; + } + } + + if (prealloc) { + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + + set_new_dnode(&dn, cc->inode, NULL, NULL, 0); + + for (i = cc->cluster_size - 1; i > 0; i--) { + ret = f2fs_get_block(&dn, start_idx + i); + if (ret) { + i = cc->cluster_size; + break; + } + + if (dn.data_blkaddr != NEW_ADDR) + break; + } + + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + } + + if (likely(!ret)) { + *fsdata = cc->rpages; + *pagep = cc->rpages[offset_in_cluster(cc, index)]; + return cc->cluster_size; + } + +unlock_pages: + f2fs_unlock_rpages(cc, i); +release_pages: + f2fs_put_rpages_mapping(cc, mapping, start_idx, i); + f2fs_destroy_compress_ctx(cc); + return ret; +} + +int f2fs_prepare_compress_overwrite(struct inode *inode, + struct page **pagep, pgoff_t index, void **fsdata) +{ + struct compress_ctx cc = { + .inode = inode, + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .cluster_idx = index >> F2FS_I(inode)->i_log_cluster_size, + .rpages = NULL, + .nr_rpages = 0, + }; + + return prepare_compress_overwrite(&cc, pagep, index, fsdata); +} + +bool f2fs_compress_write_end(struct inode *inode, void *fsdata, + pgoff_t index, unsigned copied) + +{ + struct compress_ctx cc = { + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .rpages = fsdata, + }; + bool first_index = (index == cc.rpages[0]->index); + + if (copied) + set_cluster_dirty(&cc); + + f2fs_put_rpages_wbc(&cc, NULL, false, 1); + f2fs_destroy_compress_ctx(&cc); + + return first_index; +} + +static int f2fs_write_compressed_pages(struct compress_ctx *cc, + int *submitted, + struct writeback_control *wbc, + enum iostat_type io_type) +{ + struct inode *inode = cc->inode; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_io_info fio = { + .sbi = sbi, + .ino = cc->inode->i_ino, + .type = DATA, + .op = REQ_OP_WRITE, + .op_flags = wbc_to_write_flags(wbc), + .old_blkaddr = NEW_ADDR, + .page = NULL, + .encrypted_page = NULL, + .compressed_page = NULL, + .submitted = false, + .need_lock = LOCK_RETRY, + .io_type = io_type, + .io_wbc = wbc, + .encrypted = f2fs_encrypted_file(cc->inode), + }; + struct dnode_of_data dn; + struct node_info ni; + struct compress_io_ctx *cic; + pgoff_t start_idx = start_idx_of_cluster(cc); + unsigned int last_index = cc->cluster_size - 1; + loff_t psize; + int i, err; + + set_new_dnode(&dn, cc->inode, NULL, NULL, 0); + + f2fs_lock_op(sbi); + + err = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); + if (err) + goto out_unlock_op; + + for (i = 0; i < cc->cluster_size; i++) { + if (datablock_addr(dn.inode, dn.node_page, + dn.ofs_in_node + i) == NULL_ADDR) + goto out_put_dnode; + } + + psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT; + + err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); + if (err) + goto out_put_dnode; + + fio.version = ni.version; + + cic = f2fs_kzalloc(sbi, sizeof(struct compress_io_ctx), GFP_NOFS); + if (!cic) + goto out_put_dnode; + + cic->magic = F2FS_COMPRESSED_PAGE_MAGIC; + cic->inode = inode; + refcount_set(&cic->ref, 1); + cic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) << + cc->log_cluster_size, GFP_NOFS); + if (!cic->rpages) + goto out_put_cic; + + cic->nr_rpages = cc->cluster_size; + + for (i = 0; i < cc->nr_cpages; i++) { + f2fs_set_compressed_page(cc->cpages[i], inode, + cc->rpages[i + 1]->index, + cic, i ? &cic->ref : NULL); + fio.compressed_page = cc->cpages[i]; + if (fio.encrypted) { + fio.page = cc->rpages[i + 1]; + err = f2fs_encrypt_one_page(&fio); + if (err) + goto out_destroy_crypt; + if (fscrypt_inode_uses_fs_layer_crypto(inode)) + cc->cpages[i] = fio.encrypted_page; + } + } + + set_cluster_writeback(cc); + + for (i = 0; i < cc->cluster_size; i++) + cic->rpages[i] = cc->rpages[i]; + + for (i = 0; i < cc->cluster_size; i++, dn.ofs_in_node++) { + block_t blkaddr; + + blkaddr = datablock_addr(dn.inode, dn.node_page, + dn.ofs_in_node); + fio.page = cic->rpages[i]; + fio.old_blkaddr = blkaddr; + + /* cluster header */ + if (i == 0) { + if (blkaddr == COMPRESS_ADDR) + fio.compr_blocks++; + if (__is_valid_data_blkaddr(blkaddr)) + f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_update_data_blkaddr(&dn, COMPRESS_ADDR); + goto unlock_continue; + } + + if (fio.compr_blocks && __is_valid_data_blkaddr(blkaddr)) + fio.compr_blocks++; + + if (i > cc->nr_cpages) { + if (__is_valid_data_blkaddr(blkaddr)) { + f2fs_invalidate_blocks(sbi, blkaddr); + f2fs_update_data_blkaddr(&dn, NEW_ADDR); + } + goto unlock_continue; + } + + f2fs_bug_on(fio.sbi, blkaddr == NULL_ADDR); + + if (fio.encrypted && fscrypt_inode_uses_fs_layer_crypto(inode)) + fio.encrypted_page = cc->cpages[i - 1]; + else + fio.compressed_page = cc->cpages[i - 1]; + + cc->cpages[i - 1] = NULL; + f2fs_outplace_write_data(&dn, &fio); + (*submitted)++; +unlock_continue: + inode_dec_dirty_pages(cc->inode); + unlock_page(fio.page); + } + + if (fio.compr_blocks) + f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false); + f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true); + + set_inode_flag(cc->inode, FI_APPEND_WRITE); + if (cc->cluster_idx == 0) + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + + down_write(&fi->i_sem); + if (fi->last_disk_size < psize) + fi->last_disk_size = psize; + up_write(&fi->i_sem); + + f2fs_put_rpages(cc); + f2fs_destroy_compress_ctx(cc); + return 0; + +out_destroy_crypt: + kfree(cic->rpages); + + for (--i; i >= 0; i--) + fscrypt_finalize_bounce_page(&cc->cpages[i]); + for (i = 0; i < cc->nr_cpages; i++) { + if (!cc->cpages[i]) + continue; + f2fs_put_page(cc->cpages[i], 1); + } +out_put_cic: + kfree(cic); +out_put_dnode: + f2fs_put_dnode(&dn); +out_unlock_op: + f2fs_unlock_op(sbi); + return -EAGAIN; +} + +void f2fs_compress_write_end_io(struct bio *bio, struct page *page) +{ + struct f2fs_sb_info *sbi = bio->bi_private; + struct compress_io_ctx *cic = + (struct compress_io_ctx *)page_private(page); + int i; + + if (unlikely(bio->bi_status)) + mapping_set_error(cic->inode->i_mapping, -EIO); + + f2fs_put_compressed_page(page); + + dec_page_count(sbi, F2FS_WB_DATA); + + if (refcount_dec_not_one(&cic->ref)) + return; + + for (i = 0; i < cic->nr_rpages; i++) { + WARN_ON(!cic->rpages[i]); + clear_cold_data(cic->rpages[i]); + end_page_writeback(cic->rpages[i]); + } + + kfree(cic->rpages); + kfree(cic); +} + +static int f2fs_write_raw_pages(struct compress_ctx *cc, + int *submitted, + struct writeback_control *wbc, + enum iostat_type io_type) +{ + struct address_space *mapping = cc->inode->i_mapping; + int _submitted, compr_blocks, ret; + int i = -1, err = 0; + + compr_blocks = f2fs_compressed_blocks(cc); + if (compr_blocks < 0) { + err = compr_blocks; + goto out_err; + } + + for (i = 0; i < cc->cluster_size; i++) { + if (!cc->rpages[i]) + continue; +retry_write: + if (cc->rpages[i]->mapping != mapping) { + unlock_page(cc->rpages[i]); + continue; + } + + BUG_ON(!PageLocked(cc->rpages[i])); + + ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, + NULL, NULL, wbc, io_type, + compr_blocks); + if (ret) { + if (ret == AOP_WRITEPAGE_ACTIVATE) { + unlock_page(cc->rpages[i]); + ret = 0; + } else if (ret == -EAGAIN) { + ret = 0; + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + lock_page(cc->rpages[i]); + clear_page_dirty_for_io(cc->rpages[i]); + goto retry_write; + } + err = ret; + goto out_fail; + } + + *submitted += _submitted; + } + return 0; + +out_fail: + /* TODO: revoke partially updated block addresses */ + BUG_ON(compr_blocks); +out_err: + for (++i; i < cc->cluster_size; i++) { + if (!cc->rpages[i]) + continue; + redirty_page_for_writepage(wbc, cc->rpages[i]); + unlock_page(cc->rpages[i]); + } + return err; +} + +int f2fs_write_multi_pages(struct compress_ctx *cc, + int *submitted, + struct writeback_control *wbc, + enum iostat_type io_type) +{ + struct f2fs_inode_info *fi = F2FS_I(cc->inode); + const struct f2fs_compress_ops *cops = + f2fs_cops[fi->i_compress_algorithm]; + int err; + + *submitted = 0; + if (cluster_may_compress(cc)) { + err = f2fs_compress_pages(cc); + if (err == -EAGAIN) { + goto write; + } else if (err) { + f2fs_put_rpages_wbc(cc, wbc, true, 1); + goto destroy_out; + } + + err = f2fs_write_compressed_pages(cc, submitted, + wbc, io_type); + cops->destroy_compress_ctx(cc); + if (!err) + return 0; + f2fs_bug_on(F2FS_I_SB(cc->inode), err != -EAGAIN); + } +write: + f2fs_bug_on(F2FS_I_SB(cc->inode), *submitted); + + err = f2fs_write_raw_pages(cc, submitted, wbc, io_type); + f2fs_put_rpages_wbc(cc, wbc, false, 0); +destroy_out: + f2fs_destroy_compress_ctx(cc); + return err; +} + +struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode); + struct decompress_io_ctx *dic; + pgoff_t start_idx = start_idx_of_cluster(cc); + int i; + + dic = f2fs_kzalloc(sbi, sizeof(struct decompress_io_ctx), GFP_NOFS); + if (!dic) + return ERR_PTR(-ENOMEM); + + dic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) << + cc->log_cluster_size, GFP_NOFS); + if (!dic->rpages) { + kfree(dic); + return ERR_PTR(-ENOMEM); + } + + dic->magic = F2FS_COMPRESSED_PAGE_MAGIC; + dic->inode = cc->inode; + refcount_set(&dic->ref, 1); + dic->cluster_idx = cc->cluster_idx; + dic->cluster_size = cc->cluster_size; + dic->log_cluster_size = cc->log_cluster_size; + dic->nr_cpages = cc->nr_cpages; + dic->failed = false; + + for (i = 0; i < dic->cluster_size; i++) + dic->rpages[i] = cc->rpages[i]; + dic->nr_rpages = cc->cluster_size; + + dic->cpages = f2fs_kzalloc(sbi, sizeof(struct page *) * + dic->nr_cpages, GFP_NOFS); + if (!dic->cpages) + goto out_free; + + for (i = 0; i < dic->nr_cpages; i++) { + struct page *page; + + page = f2fs_grab_page(); + if (!page) + goto out_free; + + f2fs_set_compressed_page(page, cc->inode, + start_idx + i + 1, + dic, i ? &dic->ref : NULL); + dic->cpages[i] = page; + } + + dic->tpages = f2fs_kzalloc(sbi, sizeof(struct page *) * + dic->cluster_size, GFP_NOFS); + if (!dic->tpages) + goto out_free; + + for (i = 0; i < dic->cluster_size; i++) { + if (cc->rpages[i]) + continue; + + dic->tpages[i] = f2fs_grab_page(); + if (!dic->tpages[i]) + goto out_free; + } + + for (i = 0; i < dic->cluster_size; i++) { + if (dic->tpages[i]) + continue; + dic->tpages[i] = cc->rpages[i]; + } + + return dic; + +out_free: + f2fs_free_dic(dic); + return ERR_PTR(-ENOMEM); +} + +void f2fs_free_dic(struct decompress_io_ctx *dic) +{ + int i; + + if (dic->tpages) { + for (i = 0; i < dic->cluster_size; i++) { + if (dic->rpages[i]) + continue; + f2fs_put_page(dic->tpages[i], 1); + } + kfree(dic->tpages); + } + + if (dic->cpages) { + for (i = 0; i < dic->nr_cpages; i++) { + if (!dic->cpages[i]) + continue; + f2fs_put_compressed_page(dic->cpages[i]); + } + kfree(dic->cpages); + } + + kfree(dic->rpages); + kfree(dic); +} + +void f2fs_decompress_end_io(struct page **rpages, + unsigned int cluster_size, bool err, bool verity) +{ + int i; + + for (i = 0; i < cluster_size; i++) { + struct page *rpage = rpages[i]; + + if (!rpage) + continue; + + if (err || PageError(rpage)) { + ClearPageUptodate(rpage); + ClearPageError(rpage); + } else { + if (!verity || fsverity_verify_page(rpage)) + SetPageUptodate(rpage); + else + SetPageError(rpage); + } + unlock_page(rpage); + } +} diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8ec4402364ce..f2588db70c8c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -30,7 +30,50 @@ #define NUM_PREALLOC_POST_READ_CTXS 128 static struct kmem_cache *bio_post_read_ctx_cache; +static struct kmem_cache *bio_entry_slab; static mempool_t *bio_post_read_ctx_pool; +static struct bio_set *f2fs_bioset; + +#define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE + +int __init f2fs_init_bioset(void) +{ + f2fs_bioset = bioset_create(F2FS_BIO_POOL_SIZE, + 0, BIOSET_NEED_BVECS); + if (!f2fs_bioset) + return -ENOMEM; + return 0; +} + +void f2fs_destroy_bioset(void) +{ + bioset_free(f2fs_bioset); +} + +static inline struct bio *__f2fs_bio_alloc(gfp_t gfp_mask, + unsigned int nr_iovecs) +{ + return bio_alloc_bioset(gfp_mask, nr_iovecs, f2fs_bioset); +} + +struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool no_fail) +{ + struct bio *bio; + + if (no_fail) { + /* No failure on bio allocation */ + bio = __f2fs_bio_alloc(GFP_NOIO, npages); + if (!bio) + bio = __f2fs_bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); + return bio; + } + if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { + f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO); + return NULL; + } + + return __f2fs_bio_alloc(GFP_KERNEL, npages); +} static bool __is_cp_guaranteed(struct page *page) { @@ -41,6 +84,9 @@ static bool __is_cp_guaranteed(struct page *page) if (!mapping) return false; + if (f2fs_is_compressed_page(page)) + return false; + inode = mapping->host; sbi = F2FS_I_SB(inode); @@ -73,19 +119,19 @@ static enum count_type __read_io_type(struct page *page) /* postprocessing steps for read bios */ enum bio_post_read_step { - STEP_INITIAL = 0, STEP_DECRYPT, + STEP_DECOMPRESS, STEP_VERITY, }; struct bio_post_read_ctx { struct bio *bio; + struct f2fs_sb_info *sbi; struct work_struct work; - unsigned int cur_step; unsigned int enabled_steps; }; -static void __read_end_io(struct bio *bio) +static void __read_end_io(struct bio *bio, bool compr, bool verity) { struct page *page; struct bio_vec *bv; @@ -94,6 +140,13 @@ static void __read_end_io(struct bio *bio) bio_for_each_segment_all(bv, bio, i) { page = bv->bv_page; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (compr && f2fs_is_compressed_page(page)) { + f2fs_decompress_pages(bio, page, verity); + continue; + } +#endif + /* PG_error was set if any post_read step failed */ if (bio->bi_status || PageError(page)) { ClearPageUptodate(page); @@ -105,31 +158,107 @@ static void __read_end_io(struct bio *bio) dec_page_count(F2FS_P_SB(page), __read_io_type(page)); unlock_page(page); } - if (bio->bi_private) - mempool_free(bio->bi_private, bio_post_read_ctx_pool); - bio_put(bio); +} + +static void f2fs_release_read_bio(struct bio *bio); +static void __f2fs_read_end_io(struct bio *bio, bool compr, bool verity) +{ + if (!compr) + __read_end_io(bio, false, verity); + f2fs_release_read_bio(bio); +} + +static void f2fs_decompress_bio(struct bio *bio, bool verity) +{ + __read_end_io(bio, true, verity); } static void bio_post_read_processing(struct bio_post_read_ctx *ctx); -static void decrypt_work(struct work_struct *work) +static void f2fs_decrypt_work(struct bio_post_read_ctx *ctx) { - struct bio_post_read_ctx *ctx = - container_of(work, struct bio_post_read_ctx, work); - fscrypt_decrypt_bio(ctx->bio); - - bio_post_read_processing(ctx); } -static void verity_work(struct work_struct *work) +static void f2fs_decompress_work(struct bio_post_read_ctx *ctx) +{ + f2fs_decompress_bio(ctx->bio, ctx->enabled_steps & (1 << STEP_VERITY)); +} + +#ifdef CONFIG_F2FS_FS_COMPRESSION +static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size) +{ + f2fs_decompress_end_io(rpages, cluster_size, false, true); +} + +static void f2fs_verify_bio(struct bio *bio) +{ + struct page *page = bio->bi_io_vec[0].bv_page; + struct decompress_io_ctx *dic = + (struct decompress_io_ctx *)page_private(page); + + f2fs_verify_pages(dic->rpages, dic->cluster_size); + f2fs_free_dic(dic); +} +#endif + +static void f2fs_verity_work(struct work_struct *work) +{ + struct bio_post_read_ctx *ctx = + container_of(work, struct bio_post_read_ctx, work); + struct bio *bio = ctx->bio; +#ifdef CONFIG_F2FS_FS_COMPRESSION + unsigned int enabled_steps = ctx->enabled_steps; +#endif + + /* + * fsverity_verify_bio() may call readpages() again, and while verity + * will be disabled for this, decryption may still be needed, resulting + * in another bio_post_read_ctx being allocated. So to prevent + * deadlocks we need to release the current ctx to the mempool first. + * This assumes that verity is the last post-read step. + */ + mempool_free(ctx, bio_post_read_ctx_pool); + bio->bi_private = NULL; + +#ifdef CONFIG_F2FS_FS_COMPRESSION + /* previous step is decompression */ + if (enabled_steps & (1 << STEP_DECOMPRESS)) { + f2fs_verify_bio(bio); + f2fs_release_read_bio(bio); + return; + } +#endif + + fsverity_verify_bio(bio); + __f2fs_read_end_io(bio, false, false); +} + +static void f2fs_post_read_work(struct work_struct *work) { struct bio_post_read_ctx *ctx = container_of(work, struct bio_post_read_ctx, work); - fsverity_verify_bio(ctx->bio); + if (ctx->enabled_steps & (1 << STEP_DECRYPT)) + f2fs_decrypt_work(ctx); - bio_post_read_processing(ctx); + if (ctx->enabled_steps & (1 << STEP_DECOMPRESS)) + f2fs_decompress_work(ctx); + + if (ctx->enabled_steps & (1 << STEP_VERITY)) { + INIT_WORK(&ctx->work, f2fs_verity_work); + fsverity_enqueue_verify_work(&ctx->work); + return; + } + + __f2fs_read_end_io(ctx->bio, + ctx->enabled_steps & (1 << STEP_DECOMPRESS), false); +} + +static void f2fs_enqueue_post_read_work(struct f2fs_sb_info *sbi, + struct work_struct *work) +{ + queue_work(sbi->post_read_wq, work); } static void bio_post_read_processing(struct bio_post_read_ctx *ctx) @@ -139,46 +268,41 @@ static void bio_post_read_processing(struct bio_post_read_ctx *ctx) * verity may require reading metadata pages that need decryption, and * we shouldn't recurse to the same workqueue. */ - switch (++ctx->cur_step) { - case STEP_DECRYPT: - if (ctx->enabled_steps & (1 << STEP_DECRYPT)) { - INIT_WORK(&ctx->work, decrypt_work); - fscrypt_enqueue_decrypt_work(&ctx->work); - return; - } - ctx->cur_step++; - /* fall-through */ - case STEP_VERITY: - if (ctx->enabled_steps & (1 << STEP_VERITY)) { - INIT_WORK(&ctx->work, verity_work); - fsverity_enqueue_verify_work(&ctx->work); - return; - } - ctx->cur_step++; - /* fall-through */ - default: - __read_end_io(ctx->bio); + + if (ctx->enabled_steps & (1 << STEP_DECRYPT) || + ctx->enabled_steps & (1 << STEP_DECOMPRESS)) { + INIT_WORK(&ctx->work, f2fs_post_read_work); + f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work); + return; } + + if (ctx->enabled_steps & (1 << STEP_VERITY)) { + INIT_WORK(&ctx->work, f2fs_verity_work); + fsverity_enqueue_verify_work(&ctx->work); + return; + } + + __f2fs_read_end_io(ctx->bio, false, false); } static bool f2fs_bio_post_read_required(struct bio *bio) { - return bio->bi_private && !bio->bi_status; + return bio->bi_private; } static void f2fs_read_end_io(struct bio *bio) { struct page *first_page = bio->bi_io_vec[0].bv_page; + struct f2fs_sb_info *sbi = F2FS_P_SB(first_page); - if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_READ_IO)) { - f2fs_show_injection_info(FAULT_READ_IO); + if (time_to_inject(sbi, FAULT_READ_IO)) { + f2fs_show_injection_info(sbi, FAULT_READ_IO); bio->bi_status = BLK_STS_IOERR; } if (f2fs_bio_post_read_required(bio)) { struct bio_post_read_ctx *ctx = bio->bi_private; - ctx->cur_step = STEP_INITIAL; bio_post_read_processing(ctx); return; } @@ -190,7 +314,7 @@ static void f2fs_read_end_io(struct bio *bio) bio->bi_iter.bi_size); } - __read_end_io(bio); + __f2fs_read_end_io(bio, false, false); } static void f2fs_write_end_io(struct bio *bio) @@ -200,7 +324,7 @@ static void f2fs_write_end_io(struct bio *bio) int i; if (time_to_inject(sbi, FAULT_WRITE_IO)) { - f2fs_show_injection_info(FAULT_WRITE_IO); + f2fs_show_injection_info(sbi, FAULT_WRITE_IO); bio->bi_status = BLK_STS_IOERR; } @@ -221,6 +345,13 @@ static void f2fs_write_end_io(struct bio *bio) fscrypt_finalize_bounce_page(&page); +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_is_compressed_page(page)) { + f2fs_compress_write_end_io(bio, page); + continue; + } +#endif + if (unlikely(bio->bi_status)) { mapping_set_error(page->mapping, -EIO); if (type == F2FS_WB_CP_DATA) @@ -315,6 +446,37 @@ static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) return bio; } +static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, + pgoff_t first_idx, + const struct f2fs_io_info *fio, + gfp_t gfp_mask) +{ + /* + * The f2fs garbage collector sets ->encrypted_page when it wants to + * read/write raw data without encryption. + */ + if (!fio || !fio->encrypted_page) + fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask); + else if (fscrypt_inode_should_skip_dm_default_key(inode)) + bio_set_skip_dm_default_key(bio); +} + +static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode, + pgoff_t next_idx, + const struct f2fs_io_info *fio) +{ + /* + * The f2fs garbage collector sets ->encrypted_page when it wants to + * read/write raw data without encryption. + */ + if (fio && fio->encrypted_page) + return !bio_has_crypt_ctx(bio) && + (bio_should_skip_dm_default_key(bio) == + fscrypt_inode_should_skip_dm_default_key(inode)); + + return fscrypt_mergeable_bio(bio, inode, next_idx); +} + static inline void __submit_bio(struct f2fs_sb_info *sbi, struct bio *bio, enum page_type type) { @@ -392,6 +554,12 @@ static void __f2fs_submit_read_bio(struct f2fs_sb_info *sbi, __submit_bio(sbi, bio, type); } +void f2fs_submit_bio(struct f2fs_sb_info *sbi, + struct bio *bio, enum page_type type) +{ + __submit_bio(sbi, bio, type); +} + static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; @@ -414,7 +582,6 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, struct page *page, nid_t ino) { struct bio_vec *bvec; - struct page *target; int i; if (!bio) @@ -424,10 +591,18 @@ static bool __has_merged_page(struct bio *bio, struct inode *inode, return true; bio_for_each_segment_all(bvec, bio, i) { + struct page *target = bvec->bv_page; - target = bvec->bv_page; - if (fscrypt_is_bounce_page(target)) + if (fscrypt_is_bounce_page(target)) { target = fscrypt_pagecache_page(target); + if (IS_ERR(target)) + continue; + } + if (f2fs_is_compressed_page(target)) { + target = f2fs_compress_control_page(target); + if (IS_ERR(target)) + continue; + } if (inode && inode == target->mapping->host) return true; @@ -525,6 +700,9 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) /* Allocate a new bio */ bio = __bio_alloc(fio, 1); + f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, + fio->page->index, fio, GFP_NOIO); + if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { bio_put(bio); return -EFAULT; @@ -582,6 +760,127 @@ static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, return io_type_is_mergeable(io, fio); } +static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, + struct page *page, enum temp_type temp) +{ + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; + struct bio_entry *be; + + be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS); + be->bio = bio; + bio_get(bio); + + if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) + f2fs_bug_on(sbi, 1); + + down_write(&io->bio_list_lock); + list_add_tail(&be->list, &io->bio_list); + up_write(&io->bio_list_lock); +} + +static void del_bio_entry(struct bio_entry *be) +{ + list_del(&be->list); + kmem_cache_free(bio_entry_slab, be); +} + +static int add_ipu_page(struct f2fs_sb_info *sbi, struct bio **bio, + struct page *page) +{ + enum temp_type temp; + bool found = false; + int ret = -EAGAIN; + + for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; + struct list_head *head = &io->bio_list; + struct bio_entry *be; + + down_write(&io->bio_list_lock); + list_for_each_entry(be, head, list) { + if (be->bio != *bio) + continue; + + found = true; + + if (bio_add_page(*bio, page, PAGE_SIZE, 0) == + PAGE_SIZE) { + ret = 0; + break; + } + + /* bio is full */ + del_bio_entry(be); + __submit_bio(sbi, *bio, DATA); + break; + } + up_write(&io->bio_list_lock); + } + + if (ret) { + bio_put(*bio); + *bio = NULL; + } + + return ret; +} + +void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, + struct bio **bio, struct page *page) +{ + enum temp_type temp; + bool found = false; + struct bio *target = bio ? *bio : NULL; + + for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { + struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; + struct list_head *head = &io->bio_list; + struct bio_entry *be; + + if (list_empty(head)) + continue; + + down_read(&io->bio_list_lock); + list_for_each_entry(be, head, list) { + if (target) + found = (target == be->bio); + else + found = __has_merged_page(be->bio, NULL, + page, 0); + if (found) + break; + } + up_read(&io->bio_list_lock); + + if (!found) + continue; + + found = false; + + down_write(&io->bio_list_lock); + list_for_each_entry(be, head, list) { + if (target) + found = (target == be->bio); + else + found = __has_merged_page(be->bio, NULL, + page, 0); + if (found) { + target = be->bio; + del_bio_entry(be); + break; + } + } + up_write(&io->bio_list_lock); + } + + if (found) + __submit_bio(sbi, target, DATA); + if (bio && *bio) { + bio_put(*bio); + *bio = NULL; + } +} + int f2fs_merge_page_bio(struct f2fs_io_info *fio) { struct bio *bio = *fio->bio; @@ -595,21 +894,23 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) trace_f2fs_submit_page_bio(page, fio); f2fs_trace_ios(fio, 0); - if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, - fio->new_blkaddr)) { - __submit_bio(fio->sbi, bio, fio->type); - bio = NULL; - } + if (bio && (!page_is_mergeable(fio->sbi, bio, *fio->last_block, + fio->new_blkaddr) || + !f2fs_crypt_mergeable_bio(bio, fio->page->mapping->host, + fio->page->index, fio))) + f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL); alloc_new: if (!bio) { bio = __bio_alloc(fio, BIO_MAX_PAGES); + f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, + fio->page->index, fio, + GFP_NOIO); bio_set_op_attrs(bio, fio->op, fio->op_flags); - } - if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { - __submit_bio(fio->sbi, bio, fio->type); - bio = NULL; - goto alloc_new; + add_bio_entry(fio->sbi, bio, page, fio->temp); + } else { + if (add_ipu_page(fio->sbi, &bio, page)) + goto alloc_new; } if (fio->io_wbc) @@ -623,19 +924,6 @@ alloc_new: return 0; } -static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio, - struct page *page) -{ - if (!bio) - return; - - if (!__has_merged_page(*bio, NULL, page, 0)) - return; - - __submit_bio(sbi, *bio, DATA); - *bio = NULL; -} - void f2fs_submit_page_write(struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = fio->sbi; @@ -661,15 +949,23 @@ next: verify_fio_blkaddr(fio); - bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page; + if (fio->encrypted_page) + bio_page = fio->encrypted_page; + else if (fio->compressed_page) + bio_page = fio->compressed_page; + else + bio_page = fio->page; /* set submitted = true as a return value */ fio->submitted = true; inc_page_count(sbi, WB_DATA_TYPE(bio_page)); - if (io->bio && !io_is_mergeable(sbi, io->bio, io, fio, - io->last_block_in_bio, fio->new_blkaddr)) + if (io->bio && + (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, + fio->new_blkaddr) || + !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host, + fio->page->index, fio))) __submit_merged_bio(io); alloc_new: @@ -682,6 +978,9 @@ alloc_new: goto skip; } io->bio = __bio_alloc(fio, BIO_MAX_PAGES); + f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, + fio->page->index, fio, + GFP_NOIO); io->fio = *fio; } @@ -725,23 +1024,25 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false); if (!bio) return ERR_PTR(-ENOMEM); + + f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS); + f2fs_target_device(sbi, blkaddr, bio); bio->bi_end_io = f2fs_read_end_io; bio_set_op_attrs(bio, REQ_OP_READ, op_flag); - if (f2fs_encrypted_file(inode)) + if (fscrypt_inode_uses_fs_layer_crypto(inode)) post_read_steps |= 1 << STEP_DECRYPT; - + if (f2fs_compressed_file(inode)) + post_read_steps |= 1 << STEP_DECOMPRESS; if (f2fs_need_verity(inode, first_idx)) post_read_steps |= 1 << STEP_VERITY; if (post_read_steps) { + /* Due to the mempool, this never fails. */ ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); - if (!ctx) { - bio_put(bio); - return ERR_PTR(-ENOMEM); - } ctx->bio = bio; + ctx->sbi = sbi; ctx->enabled_steps = post_read_steps; bio->bi_private = ctx; } @@ -749,6 +1050,13 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, return bio; } +static void f2fs_release_read_bio(struct bio *bio) +{ + if (bio->bi_private) + mempool_free(bio->bi_private, bio_post_read_ctx_pool); + bio_put(bio); +} + /* This can handle encryption stuffs */ static int f2fs_submit_page_read(struct inode *inode, struct page *page, block_t blkaddr) @@ -1115,19 +1423,6 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) int err = 0; bool direct_io = iocb->ki_flags & IOCB_DIRECT; - /* convert inline data for Direct I/O*/ - if (direct_io) { - err = f2fs_convert_inline_inode(inode); - if (err) - return err; - } - - if (direct_io && allow_outplace_dio(inode, iocb, from)) - return 0; - - if (is_inode_flag_set(inode, FI_NO_PREALLOC)) - return 0; - map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); if (map.m_len > map.m_lblk) @@ -1767,8 +2062,9 @@ zero_out: * This page will go to BIO. Do we need to send this * BIO off first? */ - if (bio && !page_is_mergeable(F2FS_I_SB(inode), bio, - *last_block_in_bio, block_nr)) { + if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio, + *last_block_in_bio, block_nr) || + !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { submit_and_realloc: __f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); bio = NULL; @@ -1807,6 +2103,145 @@ out: return ret; } +#ifdef CONFIG_F2FS_FS_COMPRESSION +int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, + unsigned nr_pages, sector_t *last_block_in_bio, + bool is_readahead) +{ + struct dnode_of_data dn; + struct inode *inode = cc->inode; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct bio *bio = *bio_ret; + unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size; + sector_t last_block_in_file; + const unsigned blkbits = inode->i_blkbits; + const unsigned blocksize = 1 << blkbits; + struct decompress_io_ctx *dic = NULL; + int i; + int ret = 0; + + f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); + + last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits; + + /* get rid of pages beyond EOF */ + for (i = 0; i < cc->cluster_size; i++) { + struct page *page = cc->rpages[i]; + + if (!page) + continue; + if ((sector_t)page->index >= last_block_in_file) { + zero_user_segment(page, 0, PAGE_SIZE); + if (!PageUptodate(page)) + SetPageUptodate(page); + } else if (!PageUptodate(page)) { + continue; + } + unlock_page(page); + cc->rpages[i] = NULL; + cc->nr_rpages--; + } + + /* we are done since all pages are beyond EOF */ + if (f2fs_cluster_is_empty(cc)) + goto out; + + set_new_dnode(&dn, inode, NULL, NULL, 0); + ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); + if (ret) + goto out; + + /* cluster was overwritten as normal cluster */ + if (dn.data_blkaddr != COMPRESS_ADDR) + goto out; + + for (i = 1; i < cc->cluster_size; i++) { + block_t blkaddr; + + blkaddr = datablock_addr(dn.inode, dn.node_page, + dn.ofs_in_node + i); + + if (!__is_valid_data_blkaddr(blkaddr)) + break; + + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) { + ret = -EFAULT; + goto out_put_dnode; + } + cc->nr_cpages++; + } + + /* nothing to decompress */ + if (cc->nr_cpages == 0) { + ret = 0; + goto out_put_dnode; + } + + dic = f2fs_alloc_dic(cc); + if (IS_ERR(dic)) { + ret = PTR_ERR(dic); + goto out_put_dnode; + } + + for (i = 0; i < dic->nr_cpages; i++) { + struct page *page = dic->cpages[i]; + block_t blkaddr; + + blkaddr = datablock_addr(dn.inode, dn.node_page, + dn.ofs_in_node + i + 1); + + if (bio && (!page_is_mergeable(sbi, bio, + *last_block_in_bio, blkaddr) || + !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { +submit_and_realloc: + __submit_bio(sbi, bio, DATA); + bio = NULL; + } + + if (!bio) { + bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, + is_readahead ? REQ_RAHEAD : 0, + page->index); + if (IS_ERR(bio)) { + ret = PTR_ERR(bio); + bio = NULL; + dic->failed = true; + if (refcount_sub_and_test(dic->nr_cpages - i, + &dic->ref)) + f2fs_decompress_end_io(dic->rpages, + cc->cluster_size, true, + false); + f2fs_free_dic(dic); + f2fs_put_dnode(&dn); + *bio_ret = bio; + return ret; + } + } + + f2fs_wait_on_block_writeback(inode, blkaddr); + + if (bio_add_page(bio, page, blocksize, 0) < blocksize) + goto submit_and_realloc; + + inc_page_count(sbi, F2FS_RD_DATA); + ClearPageError(page); + *last_block_in_bio = blkaddr; + } + + f2fs_put_dnode(&dn); + + *bio_ret = bio; + return 0; + +out_put_dnode: + f2fs_put_dnode(&dn); +out: + f2fs_decompress_end_io(cc->rpages, cc->cluster_size, true, false); + *bio_ret = bio; + return ret; +} +#endif + /* * This function was originally taken from fs/mpage.c, and customized for f2fs. * Major change was from block_size == page_size in f2fs by default. @@ -1816,7 +2251,7 @@ out: * use ->readpage() or do the necessary surgery to decouple ->readpages() * from read-ahead. */ -static int f2fs_mpage_readpages(struct address_space *mapping, +int f2fs_mpage_readpages(struct address_space *mapping, struct list_head *pages, struct page *page, unsigned nr_pages, bool is_readahead) { @@ -1824,6 +2259,19 @@ static int f2fs_mpage_readpages(struct address_space *mapping, sector_t last_block_in_bio = 0; struct inode *inode = mapping->host; struct f2fs_map_blocks map; +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct compress_ctx cc = { + .inode = inode, + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .cluster_idx = NULL_CLUSTER, + .rpages = NULL, + .cpages = NULL, + .nr_rpages = 0, + .nr_cpages = 0, + }; +#endif + unsigned max_nr_pages = nr_pages; int ret = 0; map.m_pblk = 0; @@ -1847,9 +2295,41 @@ static int f2fs_mpage_readpages(struct address_space *mapping, goto next_page; } - ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio, - &last_block_in_bio, is_readahead); +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + /* there are remained comressed pages, submit them */ + if (!f2fs_cluster_can_merge_page(&cc, page->index)) { + ret = f2fs_read_multi_pages(&cc, &bio, + max_nr_pages, + &last_block_in_bio, + is_readahead); + f2fs_destroy_compress_ctx(&cc); + if (ret) + goto set_error_page; + } + ret = f2fs_is_compressed_cluster(inode, page->index); + if (ret < 0) + goto set_error_page; + else if (!ret) + goto read_single_page; + + ret = f2fs_init_compress_ctx(&cc); + if (ret) + goto set_error_page; + + f2fs_compress_ctx_add_page(&cc, page); + + goto next_page; + } +read_single_page: +#endif + + ret = f2fs_read_single_page(inode, page, max_nr_pages, &map, + &bio, &last_block_in_bio, is_readahead); if (ret) { +#ifdef CONFIG_F2FS_FS_COMPRESSION +set_error_page: +#endif SetPageError(page); zero_user_segment(page, 0, PAGE_SIZE); unlock_page(page); @@ -1857,6 +2337,19 @@ static int f2fs_mpage_readpages(struct address_space *mapping, next_page: if (pages) put_page(page); + +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + /* last page */ + if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) { + ret = f2fs_read_multi_pages(&cc, &bio, + max_nr_pages, + &last_block_in_bio, + is_readahead); + f2fs_destroy_compress_ctx(&cc); + } + } +#endif } BUG_ON(pages && !list_empty(pages)); if (bio) @@ -1871,6 +2364,11 @@ static int f2fs_read_data_page(struct file *file, struct page *page) trace_f2fs_readpage(page, DATA); + if (!f2fs_is_compress_backend_ready(inode)) { + unlock_page(page); + return -EOPNOTSUPP; + } + /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); @@ -1889,6 +2387,9 @@ static int f2fs_read_data_pages(struct file *file, trace_f2fs_readpages(inode, page, nr_pages); + if (!f2fs_is_compress_backend_ready(inode)) + return 0; + /* If the file has inline data, skip readpages */ if (f2fs_has_inline_data(inode)) return 0; @@ -1896,22 +2397,26 @@ static int f2fs_read_data_pages(struct file *file, return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true); } -static int encrypt_one_page(struct f2fs_io_info *fio) +int f2fs_encrypt_one_page(struct f2fs_io_info *fio) { struct inode *inode = fio->page->mapping->host; - struct page *mpage; + struct page *mpage, *page; gfp_t gfp_flags = GFP_NOFS; if (!f2fs_encrypted_file(inode)) return 0; + page = fio->compressed_page ? fio->compressed_page : fio->page; + /* wait for GCed page writeback via META_MAPPING */ f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); + if (fscrypt_inode_uses_inline_crypto(inode)) + return 0; + retry_encrypt: - fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page, - PAGE_SIZE, 0, - gfp_flags); + fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page, + PAGE_SIZE, 0, gfp_flags); if (IS_ERR(fio->encrypted_page)) { /* flush pending IOs and wait for a while in the ENOMEM case */ if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { @@ -2071,7 +2576,7 @@ got_it: if (ipu_force || (__is_valid_data_blkaddr(fio->old_blkaddr) && need_inplace_update(fio))) { - err = encrypt_one_page(fio); + err = f2fs_encrypt_one_page(fio); if (err) goto out_writepage; @@ -2082,7 +2587,7 @@ got_it: f2fs_unlock_op(fio->sbi); err = f2fs_inplace_write_data(fio); if (err) { - if (f2fs_encrypted_file(inode)) + if (fscrypt_inode_uses_fs_layer_crypto(inode)) fscrypt_finalize_bounce_page(&fio->encrypted_page); if (PageWriteback(page)) end_page_writeback(page); @@ -2107,13 +2612,16 @@ got_it: fio->version = ni.version; - err = encrypt_one_page(fio); + err = f2fs_encrypt_one_page(fio); if (err) goto out_writepage; set_page_writeback(page); ClearPageError(page); + if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR) + f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false); + /* LFS mode write path */ f2fs_outplace_write_data(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); @@ -2128,18 +2636,19 @@ out: return err; } -static int __write_data_page(struct page *page, bool *submitted, +int f2fs_write_single_data_page(struct page *page, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, - enum iostat_type io_type) + enum iostat_type io_type, + int compr_blocks) { struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); loff_t i_size = i_size_read(inode); - const pgoff_t end_index = ((unsigned long long) i_size) + const pgoff_t end_index = ((unsigned long long)i_size) >> PAGE_SHIFT; - loff_t psize = (page->index + 1) << PAGE_SHIFT; + loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; unsigned offset = 0; bool need_balance_fs = false; int err = 0; @@ -2153,6 +2662,7 @@ static int __write_data_page(struct page *page, bool *submitted, .page = page, .encrypted_page = NULL, .submitted = false, + .compr_blocks = compr_blocks, .need_lock = LOCK_RETRY, .io_type = io_type, .io_wbc = wbc, @@ -2177,7 +2687,9 @@ static int __write_data_page(struct page *page, bool *submitted, if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) goto redirty_out; - if (page->index < end_index || f2fs_verity_in_progress(inode)) + if (page->index < end_index || + f2fs_verity_in_progress(inode) || + compr_blocks) goto write; /* @@ -2253,22 +2765,19 @@ out: f2fs_remove_dirty_inode(inode); submitted = NULL; } - unlock_page(page); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && - !F2FS_I(inode)->cp_task) { - f2fs_submit_ipu_bio(sbi, bio, page); + !F2FS_I(inode)->cp_task) f2fs_balance_fs(sbi, need_balance_fs); - } if (unlikely(f2fs_cp_error(sbi))) { - f2fs_submit_ipu_bio(sbi, bio, page); f2fs_submit_merged_write(sbi, DATA); + f2fs_submit_merged_ipu_write(sbi, bio, NULL); submitted = NULL; } if (submitted) - *submitted = fio.submitted; + *submitted = fio.submitted ? 1 : 0; return 0; @@ -2289,7 +2798,23 @@ redirty_out: static int f2fs_write_data_page(struct page *page, struct writeback_control *wbc) { - return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO); +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct inode *inode = page->mapping->host; + + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) + goto out; + + if (f2fs_compressed_file(inode)) { + if (f2fs_is_compressed_cluster(inode, page->index)) { + redirty_page_for_writepage(wbc, page); + return AOP_WRITEPAGE_ACTIVATE; + } + } +out: +#endif + + return f2fs_write_single_data_page(page, NULL, NULL, NULL, + wbc, FS_DATA_IO, 0); } /* @@ -2302,11 +2827,27 @@ static int f2fs_write_cache_pages(struct address_space *mapping, enum iostat_type io_type) { int ret = 0; - int done = 0; + int done = 0, retry = 0; struct pagevec pvec; struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); struct bio *bio = NULL; sector_t last_block; +#ifdef CONFIG_F2FS_FS_COMPRESSION + struct inode *inode = mapping->host; + struct compress_ctx cc = { + .inode = inode, + .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, + .cluster_size = F2FS_I(inode)->i_cluster_size, + .cluster_idx = NULL_CLUSTER, + .rpages = NULL, + .nr_rpages = 0, + .cpages = NULL, + .rbuf = NULL, + .cbuf = NULL, + .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size, + .private = NULL, + }; +#endif int nr_pages; pgoff_t uninitialized_var(writeback_index); pgoff_t index; @@ -2316,6 +2857,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int range_whole = 0; int tag; int nwritten = 0; + int submitted = 0; + int i; pagevec_init(&pvec, 0); @@ -2345,12 +2888,11 @@ static int f2fs_write_cache_pages(struct address_space *mapping, else tag = PAGECACHE_TAG_DIRTY; retry: + retry = 0; if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag_pages_for_writeback(mapping, index, end); done_index = index; - while (!done && (index <= end)) { - int i; - + while (!done && !retry && (index <= end)) { nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end, tag); if (nr_pages == 0) @@ -2358,15 +2900,62 @@ retry: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; - bool submitted = false; + bool need_readd; +readd: + need_readd = false; +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + ret = f2fs_init_compress_ctx(&cc); + if (ret) { + done = 1; + break; + } + if (!f2fs_cluster_can_merge_page(&cc, + page->index)) { + ret = f2fs_write_multi_pages(&cc, + &submitted, wbc, io_type); + if (!ret) + need_readd = true; + goto result; + } + + if (unlikely(f2fs_cp_error(sbi))) + goto lock_page; + + if (f2fs_cluster_is_empty(&cc)) { + void *fsdata = NULL; + struct page *pagep; + int ret2; + + ret2 = f2fs_prepare_compress_overwrite( + inode, &pagep, + page->index, &fsdata); + if (ret2 < 0) { + ret = ret2; + done = 1; + break; + } else if (ret2 && + !f2fs_compress_write_end(inode, + fsdata, page->index, + 1)) { + retry = 1; + break; + } + } else { + goto lock_page; + } + } +#endif /* give a priority to WB_SYNC threads */ if (atomic_read(&sbi->wb_sync_req[DATA]) && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } - +#ifdef CONFIG_F2FS_FS_COMPRESSION +lock_page: +#endif done_index = page->index; retry_write: lock_page(page); @@ -2383,57 +2972,81 @@ continue_unlock: } if (PageWriteback(page)) { - if (wbc->sync_mode != WB_SYNC_NONE) { + if (wbc->sync_mode != WB_SYNC_NONE) f2fs_wait_on_page_writeback(page, DATA, true, true); - f2fs_submit_ipu_bio(sbi, &bio, page); - } else { + else goto continue_unlock; - } } if (!clear_page_dirty_for_io(page)) goto continue_unlock; - ret = __write_data_page(page, &submitted, &bio, - &last_block, wbc, io_type); +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + get_page(page); + f2fs_compress_ctx_add_page(&cc, page); + continue; + } +#endif + ret = f2fs_write_single_data_page(page, &submitted, + &bio, &last_block, wbc, io_type, 0); + if (ret == AOP_WRITEPAGE_ACTIVATE) + unlock_page(page); +#ifdef CONFIG_F2FS_FS_COMPRESSION +result: +#endif + nwritten += submitted; + wbc->nr_to_write -= submitted; + if (unlikely(ret)) { /* * keep nr_to_write, since vfs uses this to * get # of written pages. */ if (ret == AOP_WRITEPAGE_ACTIVATE) { - unlock_page(page); ret = 0; - continue; + goto next; } else if (ret == -EAGAIN) { ret = 0; if (wbc->sync_mode == WB_SYNC_ALL) { cond_resched(); congestion_wait(BLK_RW_ASYNC, - HZ/50); + HZ/50); goto retry_write; } - continue; + goto next; } done_index = page->index + 1; done = 1; break; - } else if (submitted) { - nwritten++; } - if (--wbc->nr_to_write <= 0 && + if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } +next: + if (need_readd) + goto readd; } pagevec_release(&pvec); cond_resched(); } - - if (!cycled && !done) { +#ifdef CONFIG_F2FS_FS_COMPRESSION + /* flush remained pages in compress cluster */ + if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) { + ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type); + nwritten += submitted; + wbc->nr_to_write -= submitted; + if (ret) { + done = 1; + retry = 0; + } + } +#endif + if ((!cycled && !done) || retry) { cycled = 1; index = 0; end = writeback_index - 1; @@ -2447,7 +3060,7 @@ continue_unlock: NULL, 0, DATA); /* submit cached bio of IPU write */ if (bio) - __submit_bio(sbi, bio, DATA); + f2fs_submit_merged_ipu_write(sbi, &bio, NULL); return ret; } @@ -2457,6 +3070,8 @@ static inline bool __should_serialize_io(struct inode *inode, { if (!S_ISREG(inode->i_mode)) return false; + if (f2fs_compressed_file(inode)) + return true; if (IS_NOQUOTA(inode)) return false; /* to avoid deadlock in path of data flush */ @@ -2552,14 +3167,16 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) struct inode *inode = mapping->host; loff_t i_size = i_size_read(inode); + if (IS_NOQUOTA(inode)) + return; + /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */ if (to > i_size && !f2fs_verity_in_progress(inode)) { down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, i_size); - if (!IS_NOQUOTA(inode)) - f2fs_truncate_blocks(inode, i_size, true); + f2fs_truncate_blocks(inode, i_size, true); up_write(&F2FS_I(inode)->i_mmap_sem); up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); @@ -2599,6 +3216,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, __do_map_lock(sbi, flag, true); locked = true; } + restart: /* check inline_data */ ipage = f2fs_get_node_page(sbi, inode->i_ino); @@ -2699,6 +3317,24 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, if (err) goto fail; } + +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + int ret; + + *fsdata = NULL; + + ret = f2fs_prepare_compress_overwrite(inode, pagep, + index, fsdata); + if (ret < 0) { + err = ret; + goto fail; + } else if (ret) { + return 0; + } + } +#endif + repeat: /* * Do not use grab_cache_page_write_begin() to avoid deadlock due to @@ -2711,6 +3347,8 @@ repeat: goto fail; } + /* TODO: cluster can be compressed due to race with .writepage */ + *pagep = page; err = prepare_write_begin(sbi, page, pos, len, @@ -2795,6 +3433,16 @@ static int f2fs_write_end(struct file *file, else SetPageUptodate(page); } + +#ifdef CONFIG_F2FS_FS_COMPRESSION + /* overwrite compressed file */ + if (f2fs_compressed_file(inode) && fsdata) { + f2fs_compress_write_end(inode, fsdata, page->index, copied); + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + return copied; + } +#endif + if (!copied) goto unlock_out; @@ -3123,7 +3771,8 @@ int f2fs_migrate_page(struct address_space *mapping, #ifdef CONFIG_SWAP /* Copied from generic_swapfile_activate() to check any holes */ -static int check_swap_activate(struct file *swap_file, unsigned int max) +static int check_swap_activate(struct swap_info_struct *sis, + struct file *swap_file, sector_t *span) { struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; @@ -3134,6 +3783,8 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) sector_t last_block; sector_t lowest_block = -1; sector_t highest_block = 0; + int nr_extents = 0; + int ret; blkbits = inode->i_blkbits; blocks_per_page = PAGE_SIZE >> blkbits; @@ -3145,7 +3796,8 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) probe_block = 0; page_no = 0; last_block = i_size_read(inode) >> blkbits; - while ((probe_block + blocks_per_page) <= last_block && page_no < max) { + while ((probe_block + blocks_per_page) <= last_block && + page_no < sis->max) { unsigned block_in_page; sector_t first_block; @@ -3185,13 +3837,27 @@ static int check_swap_activate(struct file *swap_file, unsigned int max) highest_block = first_block; } + /* + * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks + */ + ret = add_swap_extent(sis, page_no, 1, first_block); + if (ret < 0) + goto out; + nr_extents += ret; page_no++; probe_block += blocks_per_page; reprobe: continue; } - return 0; - + ret = nr_extents; + *span = 1 + highest_block - lowest_block; + if (page_no == 0) + page_no = 1; /* force Empty message */ + sis->max = page_no; + sis->pages = page_no - 1; + sis->highest_bit = page_no - 1; +out: + return ret; bad_bmap: pr_err("swapon: swapfile has holes\n"); return -EINVAL; @@ -3213,14 +3879,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, if (ret) return ret; - ret = check_swap_activate(file, sis->max); - if (ret) + if (f2fs_disable_compressed_file(inode)) + return -EINVAL; + + ret = check_swap_activate(sis, file, span); + if (ret < 0) return ret; set_inode_flag(inode, FI_PIN_FILE); f2fs_precache_extents(inode); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); - return 0; + return ret; } static void f2fs_swap_deactivate(struct file *file) @@ -3291,8 +3960,43 @@ fail: return -ENOMEM; } -void __exit f2fs_destroy_post_read_processing(void) +void f2fs_destroy_post_read_processing(void) { mempool_destroy(bio_post_read_ctx_pool); kmem_cache_destroy(bio_post_read_ctx_cache); } + +int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi) +{ + if (!f2fs_sb_has_encrypt(sbi) && + !f2fs_sb_has_verity(sbi) && + !f2fs_sb_has_compression(sbi)) + return 0; + + sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq", + WQ_UNBOUND | WQ_HIGHPRI, + num_online_cpus()); + if (!sbi->post_read_wq) + return -ENOMEM; + return 0; +} + +void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi) +{ + if (sbi->post_read_wq) + destroy_workqueue(sbi->post_read_wq); +} + +int __init f2fs_init_bio_entry_cache(void) +{ + bio_entry_slab = f2fs_kmem_cache_create("bio_entry_slab", + sizeof(struct bio_entry)); + if (!bio_entry_slab) + return -ENOMEM; + return 0; +} + +void f2fs_destroy_bio_entry_cache(void) +{ + kmem_cache_destroy(bio_entry_slab); +} diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 244c5bb23fe2..77a82f93b54c 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -21,9 +21,45 @@ #include "gc.h" static LIST_HEAD(f2fs_stat_list); -static struct dentry *f2fs_debugfs_root; static DEFINE_MUTEX(f2fs_stat_mutex); +#ifdef CONFIG_DEBUG_FS +static struct dentry *f2fs_debugfs_root; +#endif +/* + * This function calculates BDF of every segments + */ +void f2fs_update_sit_info(struct f2fs_sb_info *sbi) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned long long blks_per_sec, hblks_per_sec, total_vblocks; + unsigned long long bimodal, dist; + unsigned int segno, vblocks; + int ndirty = 0; + + bimodal = 0; + total_vblocks = 0; + blks_per_sec = BLKS_PER_SEC(sbi); + hblks_per_sec = blks_per_sec / 2; + for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { + vblocks = get_valid_blocks(sbi, segno, true); + dist = abs(vblocks - hblks_per_sec); + bimodal += dist * dist; + + if (vblocks > 0 && vblocks < blks_per_sec) { + total_vblocks += vblocks; + ndirty++; + } + } + dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100); + si->bimodal = div64_u64(bimodal, dist); + if (si->dirty_count) + si->avg_vblocks = div_u64(total_vblocks, ndirty); + else + si->avg_vblocks = 0; +} + +#ifdef CONFIG_DEBUG_FS static void update_general_status(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); @@ -56,7 +92,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->nquota_files = sbi->nquota_files; si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); - si->aw_cnt = atomic_read(&sbi->aw_cnt); + si->aw_cnt = sbi->atomic_files; si->vw_cnt = atomic_read(&sbi->vw_cnt); si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); @@ -94,6 +130,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->inline_xattr = atomic_read(&sbi->inline_xattr); si->inline_inode = atomic_read(&sbi->inline_inode); si->inline_dir = atomic_read(&sbi->inline_dir); + si->compr_inode = atomic_read(&sbi->compr_inode); + si->compr_blocks = atomic_read(&sbi->compr_blocks); si->append = sbi->im[APPEND_INO].ino_num; si->update = sbi->im[UPDATE_INO].ino_num; si->orphans = sbi->im[ORPHAN_INO].ino_num; @@ -114,7 +152,6 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID]; si->avail_nids = NM_I(sbi)->available_nids; si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID]; - si->bg_gc = sbi->bg_gc; si->io_skip_bggc = sbi->io_skip_bggc; si->other_skip_bggc = sbi->other_skip_bggc; si->skipped_atomic_files[BG_GC] = sbi->skipped_atomic_files[BG_GC]; @@ -145,39 +182,6 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->inplace_count = atomic_read(&sbi->inplace_count); } -/* - * This function calculates BDF of every segments - */ -static void update_sit_info(struct f2fs_sb_info *sbi) -{ - struct f2fs_stat_info *si = F2FS_STAT(sbi); - unsigned long long blks_per_sec, hblks_per_sec, total_vblocks; - unsigned long long bimodal, dist; - unsigned int segno, vblocks; - int ndirty = 0; - - bimodal = 0; - total_vblocks = 0; - blks_per_sec = BLKS_PER_SEC(sbi); - hblks_per_sec = blks_per_sec / 2; - for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { - vblocks = get_valid_blocks(sbi, segno, true); - dist = abs(vblocks - hblks_per_sec); - bimodal += dist * dist; - - if (vblocks > 0 && vblocks < blks_per_sec) { - total_vblocks += vblocks; - ndirty++; - } - } - dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100); - si->bimodal = div64_u64(bimodal, dist); - if (si->dirty_count) - si->avg_vblocks = div_u64(total_vblocks, ndirty); - else - si->avg_vblocks = 0; -} - /* * This function calculates memory footprint. */ @@ -315,6 +319,8 @@ static int stat_show(struct seq_file *s, void *v) si->inline_inode); seq_printf(s, " - Inline_dentry Inode: %u\n", si->inline_dir); + seq_printf(s, " - Compressed Inode: %u, Blocks: %u\n", + si->compr_inode, si->compr_blocks); seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n", si->orphans, si->append, si->update); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", @@ -441,7 +447,7 @@ static int stat_show(struct seq_file *s, void *v) si->block_count[LFS], si->segment_count[LFS]); /* segment usage info */ - update_sit_info(si->sbi); + f2fs_update_sit_info(si->sbi); seq_printf(s, "\nBDF: %u, avg. vblocks: %u\n", si->bimodal, si->avg_vblocks); @@ -472,6 +478,7 @@ static const struct file_operations stat_fops = { .llseek = seq_lseek, .release = single_release, }; +#endif int f2fs_build_stats(struct f2fs_sb_info *sbi) { @@ -502,11 +509,12 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inline_xattr, 0); atomic_set(&sbi->inline_inode, 0); atomic_set(&sbi->inline_dir, 0); + atomic_set(&sbi->compr_inode, 0); + atomic_set(&sbi->compr_blocks, 0); atomic_set(&sbi->inplace_count, 0); for (i = META_CP; i < META_MAX; i++) atomic_set(&sbi->meta_count[i], 0); - atomic_set(&sbi->aw_cnt, 0); atomic_set(&sbi->vw_cnt, 0); atomic_set(&sbi->max_aw_cnt, 0); atomic_set(&sbi->max_vw_cnt, 0); @@ -531,14 +539,18 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi) void __init f2fs_create_root_stats(void) { +#ifdef CONFIG_DEBUG_FS f2fs_debugfs_root = debugfs_create_dir("f2fs", NULL); debugfs_create_file("status", S_IRUGO, f2fs_debugfs_root, NULL, &stat_fops); +#endif } void f2fs_destroy_root_stats(void) { +#ifdef CONFIG_DEBUG_FS debugfs_remove_recursive(f2fs_debugfs_root); f2fs_debugfs_root = NULL; +#endif } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9440d59b109d..8f83bfa8ffc6 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -108,34 +108,52 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, * Test whether a case-insensitive directory entry matches the filename * being searched for. * + * Only called for encrypted names if the key is available. + * * Returns: 0 if the directory entry matches, more than 0 if it * doesn't match or less than zero on error. */ -int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, - const struct qstr *entry, bool quick) +static int f2fs_ci_compare(const struct inode *parent, const struct qstr *name, + u8 *de_name, size_t de_name_len, bool quick) { - const struct f2fs_sb_info *sbi = F2FS_SB(parent->i_sb); - const struct unicode_map *um = sbi->s_encoding; + const struct super_block *sb = parent->i_sb; + const struct unicode_map *um = sb->s_encoding; + struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len); + struct qstr entry = QSTR_INIT(de_name, de_name_len); int ret; - if (quick) - ret = utf8_strncasecmp_folded(um, name, entry); - else - ret = utf8_strncasecmp(um, name, entry); + if (IS_ENCRYPTED(parent)) { + const struct fscrypt_str encrypted_name = + FSTR_INIT(de_name, de_name_len); + decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); + if (!decrypted_name.name) + return -ENOMEM; + ret = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name, + &decrypted_name); + if (ret < 0) + goto out; + entry.name = decrypted_name.name; + entry.len = decrypted_name.len; + } + + if (quick) + ret = utf8_strncasecmp_folded(um, name, &entry); + else + ret = utf8_strncasecmp(um, name, &entry); if (ret < 0) { /* Handle invalid character sequence as either an error * or as an opaque byte sequence. */ - if (f2fs_has_strict_mode(sbi)) - return -EINVAL; - - if (name->len != entry->len) - return 1; - - return !!memcmp(name->name, entry->name, name->len); + if (sb_has_enc_strict_mode(sb)) + ret = -EINVAL; + else if (name->len != entry.len) + ret = 1; + else + ret = !!memcmp(name->name, entry.name, entry.len); } - +out: + kfree(decrypted_name.name); return ret; } @@ -154,7 +172,7 @@ static void f2fs_fname_setup_ci_filename(struct inode *dir, if (!cf_name->name) return; - cf_name->len = utf8_casefold(sbi->s_encoding, + cf_name->len = utf8_casefold(dir->i_sb->s_encoding, iname, cf_name->name, F2FS_NAME_LEN); if ((int)cf_name->len <= 0) { @@ -173,24 +191,24 @@ static inline bool f2fs_match_name(struct f2fs_dentry_ptr *d, { #ifdef CONFIG_UNICODE struct inode *parent = d->inode; - struct f2fs_sb_info *sbi = F2FS_I_SB(parent); - struct qstr entry; + u8 *name; + int len; #endif if (de->hash_code != namehash) return false; #ifdef CONFIG_UNICODE - entry.name = d->filename[bit_pos]; - entry.len = de->name_len; + name = d->filename[bit_pos]; + len = le16_to_cpu(de->name_len); - if (sbi->s_encoding && IS_CASEFOLDED(parent)) { + if (needs_casefold(parent)) { if (cf_str->name) { struct qstr cf = {.name = cf_str->name, .len = cf_str->len}; - return !f2fs_ci_compare(parent, &cf, &entry, true); + return !f2fs_ci_compare(parent, &cf, name, len, true); } - return !f2fs_ci_compare(parent, fname->usr_fname, &entry, + return !f2fs_ci_compare(parent, fname->usr_fname, name, len, false); } #endif @@ -357,8 +375,8 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, int err; #ifdef CONFIG_UNICODE - if (f2fs_has_strict_mode(F2FS_I_SB(dir)) && IS_CASEFOLDED(dir) && - utf8_validate(F2FS_I_SB(dir)->s_encoding, child)) { + if (sb_has_enc_strict_mode(dir->i_sb) && IS_CASEFOLDED(dir) && + utf8_validate(dir->i_sb->s_encoding, child)) { *res_page = ERR_PTR(-EINVAL); return NULL; } @@ -578,6 +596,20 @@ next: goto next; } +bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, + struct fscrypt_name *fname) +{ + struct f2fs_dentry_ptr d; + unsigned int bit_pos; + int slots = GET_DENTRY_SLOTS(fname_len(fname)); + + make_dentry_ptr_inline(dir, &d, inline_data_addr(dir, ipage)); + + bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max); + + return bit_pos < d.max; +} + void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos) @@ -602,13 +634,13 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, const struct qstr *orig_name, + f2fs_hash_t dentry_hash, struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; unsigned int level; unsigned int current_depth; unsigned long bidx, block; - f2fs_hash_t dentry_hash; unsigned int nbucket, nblock; struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; @@ -618,7 +650,6 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, level = 0; slots = GET_DENTRY_SLOTS(new_name->len); - dentry_hash = f2fs_dentry_hash(dir, new_name, NULL); current_depth = F2FS_I(dir)->i_current_depth; if (F2FS_I(dir)->chash == dentry_hash) { @@ -628,7 +659,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, start: if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) { - f2fs_show_injection_info(FAULT_DIR_DEPTH); + f2fs_show_injection_info(F2FS_I_SB(dir), FAULT_DIR_DEPTH); return -ENOSPC; } @@ -704,17 +735,19 @@ int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode) { struct qstr new_name; + f2fs_hash_t dentry_hash; int err = -EAGAIN; new_name.name = fname_name(fname); new_name.len = fname_len(fname); if (f2fs_has_inline_dentry(dir)) - err = f2fs_add_inline_entry(dir, &new_name, fname->usr_fname, + err = f2fs_add_inline_entry(dir, &new_name, fname, inode, ino, mode); + dentry_hash = f2fs_dentry_hash(dir, &new_name, fname); if (err == -EAGAIN) err = f2fs_add_regular_entry(dir, &new_name, fname->usr_fname, - inode, ino, mode); + dentry_hash, inode, ino, mode); f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); return err; @@ -919,8 +952,9 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, bit_pos++; ctx->pos = start_pos + bit_pos; printk_ratelimited( - "%s, invalid namelen(0), ino:%u, run fsck to fix.", - KERN_WARNING, le32_to_cpu(de->ino)); + "%sF2FS-fs (%s): invalid namelen(0), ino:%u, run fsck to fix.", + KERN_WARNING, sbi->sb->s_id, + le32_to_cpu(de->ino)); set_sbi_flag(sbi, SBI_NEED_FSCK); continue; } @@ -986,7 +1020,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) if (IS_ENCRYPTED(inode)) { err = fscrypt_get_encryption_info(inode); - if (err && err != -ENOKEY) + if (err) goto out; err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr); @@ -1062,50 +1096,3 @@ const struct file_operations f2fs_dir_operations = { .compat_ioctl = f2fs_compat_ioctl, #endif }; - -#ifdef CONFIG_UNICODE -static int f2fs_d_compare(const struct dentry *dentry, unsigned int len, - const char *str, const struct qstr *name) -{ - struct qstr qstr = {.name = str, .len = len }; - - if (!IS_CASEFOLDED(dentry->d_parent->d_inode)) { - if (len != name->len) - return -1; - return memcmp(str, name, len); - } - - return f2fs_ci_compare(dentry->d_parent->d_inode, name, &qstr, false); -} - -static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str) -{ - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); - const struct unicode_map *um = sbi->s_encoding; - unsigned char *norm; - int len, ret = 0; - - if (!IS_CASEFOLDED(dentry->d_inode)) - return 0; - - norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC); - if (!norm) - return -ENOMEM; - - len = utf8_casefold(um, str, norm, PATH_MAX); - if (len < 0) { - if (f2fs_has_strict_mode(sbi)) - ret = -EINVAL; - goto out; - } - str->hash = full_name_hash(dentry, norm, len); -out: - kvfree(norm); - return ret; -} - -const struct dentry_operations f2fs_dentry_ops = { - .d_hash = f2fs_d_hash, - .d_compare = f2fs_d_compare, -}; -#endif diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 624f87d06f0d..ad0ec1b7c141 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -117,6 +117,8 @@ typedef u32 block_t; /* */ typedef u32 nid_t; +#define COMPRESS_EXT_NUM 16 + struct f2fs_mount_info { unsigned int opt; int write_io_size_bits; /* Write IO size bits */ @@ -138,9 +140,18 @@ struct f2fs_mount_info { int alloc_mode; /* segment allocation policy */ int fsync_mode; /* fsync policy */ bool test_dummy_encryption; /* test dummy encryption */ +#ifdef CONFIG_FS_ENCRYPTION + bool inlinecrypt; /* inline encryption enabled */ +#endif block_t unusable_cap; /* Amount of space allowed to be * unusable when disabling checkpoint */ + + /* For compression */ + unsigned char compress_algorithm; /* algorithm type */ + unsigned compress_log_size; /* cluster log size */ + unsigned char compress_ext_cnt; /* extension count */ + unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN]; /* extensions */ }; #define F2FS_FEATURE_ENCRYPT 0x0001 @@ -156,6 +167,7 @@ struct f2fs_mount_info { #define F2FS_FEATURE_VERITY 0x0400 #define F2FS_FEATURE_SB_CHKSUM 0x0800 #define F2FS_FEATURE_CASEFOLD 0x1000 +#define F2FS_FEATURE_COMPRESSION 0x2000 #define __F2FS_HAS_FEATURE(raw_super, mask) \ ((raw_super->feature & cpu_to_le32(mask)) != 0) @@ -710,6 +722,12 @@ struct f2fs_inode_info { int i_inline_xattr_size; /* inline xattr size */ struct timespec i_crtime; /* inode creation time */ struct timespec i_disk_time[4]; /* inode disk times */ + + /* for file compress */ + u64 i_compr_blocks; /* # of compressed blocks */ + unsigned char i_compress_algorithm; /* algorithm type */ + unsigned char i_log_cluster_size; /* log of cluster size */ + unsigned int i_cluster_size; /* cluster size */ }; static inline void get_extent_info(struct extent_info *ext, @@ -888,6 +906,7 @@ enum { CURSEG_WARM_NODE, /* direct node blocks of normal files */ CURSEG_COLD_NODE, /* indirect node blocks */ NO_CHECK_TYPE, + CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */ }; struct flush_cmd { @@ -1015,6 +1034,7 @@ enum need_lock_type { enum cp_reason_type { CP_NO_NEEDED, CP_NON_REGULAR, + CP_COMPRESSED, CP_HARDLINK, CP_SB_NEED_CP, CP_WRONG_PINO, @@ -1053,12 +1073,15 @@ struct f2fs_io_info { block_t old_blkaddr; /* old block address before Cow */ struct page *page; /* page to be written */ struct page *encrypted_page; /* encrypted page */ + struct page *compressed_page; /* compressed page */ struct list_head list; /* serialize IOs */ bool submitted; /* indicate IO submission */ int need_lock; /* indicate we need to lock cp_rwsem */ bool in_list; /* indicate fio is in io_list */ bool is_por; /* indicate IO is from recovery or not */ bool retry; /* need to reallocate block address */ + int compr_blocks; /* # of compressed block addresses */ + bool encrypted; /* indicate file is encrypted */ enum iostat_type io_type; /* io type */ struct writeback_control *io_wbc; /* writeback control */ struct bio **bio; /* bio for ipu */ @@ -1066,6 +1089,11 @@ struct f2fs_io_info { unsigned char version; /* version of the node */ }; +struct bio_entry { + struct bio *bio; + struct list_head list; +}; + #define is_read_io(rw) ((rw) == READ) struct f2fs_bio_info { struct f2fs_sb_info *sbi; /* f2fs superblock */ @@ -1075,6 +1103,8 @@ struct f2fs_bio_info { struct rw_semaphore io_rwsem; /* blocking op for bio */ spinlock_t io_lock; /* serialize DATA/NODE IOs */ struct list_head io_list; /* track fios */ + struct list_head bio_list; /* bio entry list head */ + struct rw_semaphore bio_list_lock; /* lock to protect bio entry list */ }; #define FDEV(i) (sbi->devs[i]) @@ -1159,6 +1189,18 @@ enum fsync_mode { FSYNC_MODE_NOBARRIER, /* fsync behaves nobarrier based on posix */ }; +/* + * this value is set in page as a private data which indicate that + * the page is atomically written, and it is in inmem_pages list. + */ +#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1) +#define DUMMY_WRITTEN_PAGE ((unsigned long)-2) + +#define IS_ATOMIC_WRITTEN_PAGE(page) \ + (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE) +#define IS_DUMMY_WRITTEN_PAGE(page) \ + (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE) + #ifdef CONFIG_FS_ENCRYPTION #define DUMMY_ENCRYPTION_ENABLED(sbi) \ (unlikely(F2FS_OPTION(sbi).test_dummy_encryption)) @@ -1166,6 +1208,75 @@ enum fsync_mode { #define DUMMY_ENCRYPTION_ENABLED(sbi) (0) #endif +/* For compression */ +enum compress_algorithm_type { + COMPRESS_LZO, + COMPRESS_LZ4, + COMPRESS_MAX, +}; + +#define COMPRESS_DATA_RESERVED_SIZE 4 +struct compress_data { + __le32 clen; /* compressed data size */ + __le32 chksum; /* checksum of compressed data */ + __le32 reserved[COMPRESS_DATA_RESERVED_SIZE]; /* reserved */ + u8 cdata[]; /* compressed data */ +}; + +#define COMPRESS_HEADER_SIZE (sizeof(struct compress_data)) + +#define F2FS_COMPRESSED_PAGE_MAGIC 0xF5F2C000 + +/* compress context */ +struct compress_ctx { + struct inode *inode; /* inode the context belong to */ + pgoff_t cluster_idx; /* cluster index number */ + unsigned int cluster_size; /* page count in cluster */ + unsigned int log_cluster_size; /* log of cluster size */ + struct page **rpages; /* pages store raw data in cluster */ + unsigned int nr_rpages; /* total page number in rpages */ + struct page **cpages; /* pages store compressed data in cluster */ + unsigned int nr_cpages; /* total page number in cpages */ + void *rbuf; /* virtual mapped address on rpages */ + struct compress_data *cbuf; /* virtual mapped address on cpages */ + size_t rlen; /* valid data length in rbuf */ + size_t clen; /* valid data length in cbuf */ + void *private; /* payload buffer for specified compression algorithm */ +}; + +/* compress context for write IO path */ +struct compress_io_ctx { + u32 magic; /* magic number to indicate page is compressed */ + struct inode *inode; /* inode the context belong to */ + struct page **rpages; /* pages store raw data in cluster */ + unsigned int nr_rpages; /* total page number in rpages */ + refcount_t ref; /* referrence count of raw page */ +}; + +/* decompress io context for read IO path */ +struct decompress_io_ctx { + u32 magic; /* magic number to indicate page is compressed */ + struct inode *inode; /* inode the context belong to */ + pgoff_t cluster_idx; /* cluster index number */ + unsigned int cluster_size; /* page count in cluster */ + unsigned int log_cluster_size; /* log of cluster size */ + struct page **rpages; /* pages store raw data in cluster */ + unsigned int nr_rpages; /* total page number in rpages */ + struct page **cpages; /* pages store compressed data in cluster */ + unsigned int nr_cpages; /* total page number in cpages */ + struct page **tpages; /* temp pages to pad holes in cluster */ + void *rbuf; /* virtual mapped address on rpages */ + struct compress_data *cbuf; /* virtual mapped address on cpages */ + size_t rlen; /* valid data length in rbuf */ + size_t clen; /* valid data length in cbuf */ + refcount_t ref; /* referrence count of compressed page */ + bool failed; /* indicate IO error during decompression */ +}; + +#define NULL_CLUSTER ((unsigned int)(~0)) +#define MIN_COMPRESS_LOG_SIZE 2 +#define MAX_COMPRESS_LOG_SIZE 8 + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -1174,10 +1285,6 @@ struct f2fs_sb_info { int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ struct mutex writepages; /* mutex for writepages() */ -#ifdef CONFIG_UNICODE - struct unicode_map *s_encoding; - __u16 s_encoding_flags; -#endif #ifdef CONFIG_BLK_DEV_ZONED unsigned int blocks_per_blkz; /* F2FS blocks per zone */ @@ -1281,17 +1388,22 @@ struct f2fs_sb_info { struct f2fs_mount_info mount_opt; /* mount options */ /* for cleaning operations */ - struct mutex gc_mutex; /* mutex for GC */ + struct rw_semaphore gc_lock; /* + * semaphore for GC, avoid + * race between GC and GC or CP + */ struct f2fs_gc_kthread *gc_thread; /* GC thread */ unsigned int cur_victim_sec; /* current victim section num */ unsigned int gc_mode; /* current GC state */ unsigned int next_victim_seg[2]; /* next segment in victim section */ /* for skip statistic */ + unsigned int atomic_files; /* # of opened atomic file */ unsigned long long skipped_atomic_files[2]; /* FG_GC and BG_GC */ unsigned long long skipped_gc_rwsem; /* FG_GC only */ /* threshold for gc trials on pinned files */ u64 gc_pin_file_threshold; + struct rw_semaphore pin_sem; /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; @@ -1315,11 +1427,11 @@ struct f2fs_sb_info { atomic_t inline_xattr; /* # of inline_xattr inodes */ atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ - atomic_t aw_cnt; /* # of atomic writes */ + atomic_t compr_inode; /* # of compressed inodes */ + atomic_t compr_blocks; /* # of compressed blocks */ atomic_t vw_cnt; /* # of volatile writes */ atomic_t max_aw_cnt; /* max # of atomic writes */ atomic_t max_vw_cnt; /* max # of volatile writes */ - int bg_gc; /* background gc calls */ unsigned int io_skip_bggc; /* skip background gc for in-flight IO */ unsigned int other_skip_bggc; /* skip background gc for other reasons */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ @@ -1353,6 +1465,8 @@ struct f2fs_sb_info { /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_chksum_seed; + + struct workqueue_struct *post_read_wq; /* post read workqueue */ }; struct f2fs_private_dio { @@ -1363,9 +1477,10 @@ struct f2fs_private_dio { }; #ifdef CONFIG_F2FS_FAULT_INJECTION -#define f2fs_show_injection_info(type) \ - printk_ratelimited("%sF2FS-fs : inject %s in %s of %pF\n", \ - KERN_INFO, f2fs_fault_name[type], \ +#define f2fs_show_injection_info(sbi, type) \ + printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \ + KERN_INFO, sbi->sb->s_id, \ + f2fs_fault_name[type], \ __func__, __builtin_return_address(0)) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { @@ -1385,7 +1500,7 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) return false; } #else -#define f2fs_show_injection_info(type) do { } while (0) +#define f2fs_show_injection_info(sbi, type) do { } while (0) static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) { return false; @@ -1771,7 +1886,7 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, return ret; if (time_to_inject(sbi, FAULT_BLOCK)) { - f2fs_show_injection_info(FAULT_BLOCK); + f2fs_show_injection_info(sbi, FAULT_BLOCK); release = *count; goto release_quota; } @@ -2023,7 +2138,7 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, } if (time_to_inject(sbi, FAULT_BLOCK)) { - f2fs_show_injection_info(FAULT_BLOCK); + f2fs_show_injection_info(sbi, FAULT_BLOCK); goto enospc; } @@ -2138,7 +2253,8 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, return page; if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) { - f2fs_show_injection_info(FAULT_PAGE_ALLOC); + f2fs_show_injection_info(F2FS_M_SB(mapping), + FAULT_PAGE_ALLOC); return NULL; } } @@ -2153,7 +2269,7 @@ static inline struct page *f2fs_pagecache_get_page( int fgp_flags, gfp_t gfp_mask) { if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) { - f2fs_show_injection_info(FAULT_PAGE_GET); + f2fs_show_injection_info(F2FS_M_SB(mapping), FAULT_PAGE_GET); return NULL; } @@ -2209,26 +2325,6 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, return entry; } -static inline struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, - int npages, bool no_fail) -{ - struct bio *bio; - - if (no_fail) { - /* No failure on bio allocation */ - bio = bio_alloc(GFP_NOIO, npages); - if (!bio) - bio = bio_alloc(GFP_NOIO | __GFP_NOFAIL, npages); - return bio; - } - if (time_to_inject(sbi, FAULT_ALLOC_BIO)) { - f2fs_show_injection_info(FAULT_ALLOC_BIO); - return NULL; - } - - return bio_alloc(GFP_KERNEL, npages); -} - static inline bool is_idle(struct f2fs_sb_info *sbi, int type) { if (sbi->gc_mode == GC_URGENT) @@ -2365,11 +2461,13 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) /* * On-disk inode flags (f2fs_inode::i_flags) */ +#define F2FS_COMPR_FL 0x00000004 /* Compress file */ #define F2FS_SYNC_FL 0x00000008 /* Synchronous updates */ #define F2FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ #define F2FS_APPEND_FL 0x00000020 /* writes to file may only append */ #define F2FS_NODUMP_FL 0x00000040 /* do not dump file */ #define F2FS_NOATIME_FL 0x00000080 /* do not update atime */ +#define F2FS_NOCOMP_FL 0x00000400 /* Don't compress */ #define F2FS_INDEX_FL 0x00001000 /* hash-indexed directory */ #define F2FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ @@ -2378,7 +2476,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr) /* Flags that should be inherited by new inodes from their parent. */ #define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \ F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \ - F2FS_CASEFOLD_FL) + F2FS_CASEFOLD_FL | F2FS_COMPR_FL | F2FS_NOCOMP_FL) /* Flags that are appropriate for regular files (all but dir-specific ones). */ #define F2FS_REG_FLMASK (~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \ @@ -2430,6 +2528,8 @@ enum { FI_PIN_FILE, /* indicate file should not be gced */ FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */ FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */ + FI_COMPRESSED_FILE, /* indicate file's data can be compressed */ + FI_MMAP_FILE, /* indicate file was mmapped */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, @@ -2446,6 +2546,7 @@ static inline void __mark_inode_dirty_flag(struct inode *inode, case FI_DATA_EXIST: case FI_INLINE_DOTS: case FI_PIN_FILE: + case FI_COMPRESSED_FILE: f2fs_mark_inode_dirty_sync(inode, true); } } @@ -2601,16 +2702,27 @@ static inline int f2fs_has_inline_xattr(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_XATTR); } +static inline int f2fs_compressed_file(struct inode *inode) +{ + return S_ISREG(inode->i_mode) && + is_inode_flag_set(inode, FI_COMPRESSED_FILE); +} + static inline unsigned int addrs_per_inode(struct inode *inode) { unsigned int addrs = CUR_ADDRS_PER_INODE(inode) - get_inline_xattr_addrs(inode); - return ALIGN_DOWN(addrs, 1); + + if (!f2fs_compressed_file(inode)) + return addrs; + return ALIGN_DOWN(addrs, F2FS_I(inode)->i_cluster_size); } static inline unsigned int addrs_per_block(struct inode *inode) { - return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, 1); + if (!f2fs_compressed_file(inode)) + return DEF_ADDRS_PER_BLOCK; + return ALIGN_DOWN(DEF_ADDRS_PER_BLOCK, F2FS_I(inode)->i_cluster_size); } static inline void *inline_xattr_addr(struct inode *inode, struct page *page) @@ -2643,6 +2755,11 @@ static inline int f2fs_has_inline_dots(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_DOTS); } +static inline int f2fs_is_mmap_file(struct inode *inode) +{ + return is_inode_flag_set(inode, FI_MMAP_FILE); +} + static inline bool f2fs_is_pinned_file(struct inode *inode) { return is_inode_flag_set(inode, FI_PIN_FILE); @@ -2703,6 +2820,20 @@ static inline void clear_file(struct inode *inode, int type) f2fs_mark_inode_dirty_sync(inode, true); } +static inline bool f2fs_is_time_consistent(struct inode *inode) +{ + if (!timespec_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) + return false; + if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3, + &F2FS_I(inode)->i_crtime)) + return false; + return true; +} + static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) { bool ret; @@ -2720,14 +2851,7 @@ static inline bool f2fs_skip_inode_update(struct inode *inode, int dsync) i_size_read(inode) & ~PAGE_MASK) return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime)) - return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime)) - return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime)) - return false; - if (!timespec_equal(F2FS_I(inode)->i_disk_time + 3, - &F2FS_I(inode)->i_crtime)) + if (!f2fs_is_time_consistent(inode)) return false; down_read(&F2FS_I(inode)->i_sem); @@ -2763,7 +2887,8 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) struct f2fs_sb_info *sbi = F2FS_I_SB(inode); if (!test_opt(sbi, EXTENT_CACHE) || - is_inode_flag_set(inode, FI_NO_EXTENT)) + is_inode_flag_set(inode, FI_NO_EXTENT) || + is_inode_flag_set(inode, FI_COMPRESSED_FILE)) return false; /* @@ -2782,7 +2907,7 @@ static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, void *ret; if (time_to_inject(sbi, FAULT_KMALLOC)) { - f2fs_show_injection_info(FAULT_KMALLOC); + f2fs_show_injection_info(sbi, FAULT_KMALLOC); return NULL; } @@ -2803,7 +2928,7 @@ static inline void *f2fs_kvmalloc(struct f2fs_sb_info *sbi, size_t size, gfp_t flags) { if (time_to_inject(sbi, FAULT_KVMALLOC)) { - f2fs_show_injection_info(FAULT_KVMALLOC); + f2fs_show_injection_info(sbi, FAULT_KVMALLOC); return NULL; } @@ -2883,7 +3008,8 @@ static inline void verify_blkaddr(struct f2fs_sb_info *sbi, static inline bool __is_valid_data_blkaddr(block_t blkaddr) { - if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR || + blkaddr == COMPRESS_ADDR) return false; return true; } @@ -2949,11 +3075,6 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, bool hot, bool set); struct dentry *f2fs_get_parent(struct dentry *child); -extern int f2fs_ci_compare(const struct inode *parent, - const struct qstr *name, - const struct qstr *entry, - bool quick); - /* * dir.c */ @@ -2981,11 +3102,13 @@ ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, struct page **page); void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode); +bool f2fs_has_enough_room(struct inode *dir, struct page *ipage, + struct fscrypt_name *fname); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos); int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, + const struct qstr *orig_name, f2fs_hash_t dentry_hash, struct inode *inode, nid_t ino, umode_t mode); int f2fs_add_dentry(struct inode *dir, struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode); @@ -3018,7 +3141,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi); * hash.c */ f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, - const struct qstr *name_info, struct fscrypt_name *fname); + const struct qstr *name_info, const struct fscrypt_name *fname); /* * node.c @@ -3101,7 +3224,7 @@ void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi); int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, struct cp_control *cpc); @@ -3185,12 +3308,19 @@ void f2fs_destroy_checkpoint_caches(void); /* * data.c */ -int f2fs_init_post_read_processing(void); -void f2fs_destroy_post_read_processing(void); +int __init f2fs_init_bioset(void); +void f2fs_destroy_bioset(void); +struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool no_fail); +int f2fs_init_bio_entry_cache(void); +void f2fs_destroy_bio_entry_cache(void); +void f2fs_submit_bio(struct f2fs_sb_info *sbi, + struct bio *bio, enum page_type type); void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type); void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, struct inode *inode, struct page *page, nid_t ino, enum page_type type); +void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, + struct bio **bio, struct page *page); void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi); int f2fs_submit_page_bio(struct f2fs_io_info *fio); int f2fs_merge_page_bio(struct f2fs_io_info *fio); @@ -3205,6 +3335,9 @@ int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); +int f2fs_mpage_readpages(struct address_space *mapping, + struct list_head *pages, struct page *page, + unsigned nr_pages, bool is_readahead); struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, int op_flags, bool for_write); struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index); @@ -3218,8 +3351,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int create, int flag); int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); +int f2fs_encrypt_one_page(struct f2fs_io_info *fio); bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio); bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio); +int f2fs_write_single_data_page(struct page *page, int *submitted, + struct bio **bio, sector_t *last_block, + struct writeback_control *wbc, + enum iostat_type io_type, + int compr_blocks); void f2fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length); int f2fs_release_page(struct page *page, gfp_t wait); @@ -3229,6 +3368,10 @@ int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, #endif bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len); void f2fs_clear_radix_tree_dirty_tag(struct page *page); +int f2fs_init_post_read_processing(void); +void f2fs_destroy_post_read_processing(void); +int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); +void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi); /* * gc.c @@ -3275,6 +3418,7 @@ struct f2fs_stat_info { int nr_discard_cmd; unsigned int undiscard_blks; int inline_xattr, inline_inode, inline_dir, append, update, orphans; + int compr_inode, compr_blocks; int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt; unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; unsigned int bimodal, avg_vblocks; @@ -3306,7 +3450,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_cp_count(si) ((si)->cp_count++) #define stat_inc_bg_cp_count(si) ((si)->bg_cp_count++) #define stat_inc_call_count(si) ((si)->call_count++) -#define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) +#define stat_inc_bggc_count(si) ((si)->bg_gc++) #define stat_io_skip_bggc_count(sbi) ((sbi)->io_skip_bggc++) #define stat_other_skip_bggc_count(sbi) ((sbi)->other_skip_bggc++) #define stat_inc_dirty_inode(sbi, type) ((sbi)->ndirty_inode[type]++) @@ -3345,6 +3489,20 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (f2fs_has_inline_dentry(inode)) \ (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) +#define stat_inc_compr_inode(inode) \ + do { \ + if (f2fs_compressed_file(inode)) \ + (atomic_inc(&F2FS_I_SB(inode)->compr_inode)); \ + } while (0) +#define stat_dec_compr_inode(inode) \ + do { \ + if (f2fs_compressed_file(inode)) \ + (atomic_dec(&F2FS_I_SB(inode)->compr_inode)); \ + } while (0) +#define stat_add_compr_blocks(inode, blocks) \ + (atomic_add(blocks, &F2FS_I_SB(inode)->compr_blocks)) +#define stat_sub_compr_blocks(inode, blocks) \ + (atomic_sub(blocks, &F2FS_I_SB(inode)->compr_blocks)) #define stat_inc_meta_count(sbi, blkaddr) \ do { \ if (blkaddr < SIT_I(sbi)->sit_base_addr) \ @@ -3362,13 +3520,9 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) ((sbi)->block_count[(curseg)->alloc_type]++) #define stat_inc_inplace_blocks(sbi) \ (atomic_inc(&(sbi)->inplace_count)) -#define stat_inc_atomic_write(inode) \ - (atomic_inc(&F2FS_I_SB(inode)->aw_cnt)) -#define stat_dec_atomic_write(inode) \ - (atomic_dec(&F2FS_I_SB(inode)->aw_cnt)) #define stat_update_max_atomic_write(inode) \ do { \ - int cur = atomic_read(&F2FS_I_SB(inode)->aw_cnt); \ + int cur = F2FS_I_SB(inode)->atomic_files; \ int max = atomic_read(&F2FS_I_SB(inode)->max_aw_cnt); \ if (cur > max) \ atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \ @@ -3420,6 +3574,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi); void f2fs_destroy_stats(struct f2fs_sb_info *sbi); void __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); +void f2fs_update_sit_info(struct f2fs_sb_info *sbi); #else #define stat_inc_cp_count(si) do { } while (0) #define stat_inc_bg_cp_count(si) do { } while (0) @@ -3429,8 +3584,8 @@ void f2fs_destroy_root_stats(void); #define stat_other_skip_bggc_count(sbi) do { } while (0) #define stat_inc_dirty_inode(sbi, type) do { } while (0) #define stat_dec_dirty_inode(sbi, type) do { } while (0) -#define stat_inc_total_hit(sb) do { } while (0) -#define stat_inc_rbtree_node_hit(sb) do { } while (0) +#define stat_inc_total_hit(sbi) do { } while (0) +#define stat_inc_rbtree_node_hit(sbi) do { } while (0) #define stat_inc_largest_node_hit(sbi) do { } while (0) #define stat_inc_cached_node_hit(sbi) do { } while (0) #define stat_inc_inline_xattr(inode) do { } while (0) @@ -3439,6 +3594,10 @@ void f2fs_destroy_root_stats(void); #define stat_dec_inline_inode(inode) do { } while (0) #define stat_inc_inline_dir(inode) do { } while (0) #define stat_dec_inline_dir(inode) do { } while (0) +#define stat_inc_compr_inode(inode) do { } while (0) +#define stat_dec_compr_inode(inode) do { } while (0) +#define stat_add_compr_blocks(inode, blocks) do { } while (0) +#define stat_sub_compr_blocks(inode, blocks) do { } while (0) #define stat_inc_atomic_write(inode) do { } while (0) #define stat_dec_atomic_write(inode) do { } while (0) #define stat_update_max_atomic_write(inode) do { } while (0) @@ -3458,12 +3617,10 @@ static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } static inline void __init f2fs_create_root_stats(void) { } static inline void f2fs_destroy_root_stats(void) { } +static inline void update_sit_info(struct f2fs_sb_info *sbi) {} #endif extern const struct file_operations f2fs_dir_operations; -#ifdef CONFIG_UNICODE -extern const struct dentry_operations f2fs_dentry_ops; -#endif extern const struct file_operations f2fs_file_operations; extern const struct inode_operations f2fs_file_inode_operations; extern const struct address_space_operations f2fs_dblock_aops; @@ -3486,6 +3643,7 @@ void f2fs_truncate_inline_inode(struct inode *inode, int f2fs_read_inline_data(struct inode *inode, struct page *page); int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); +int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry); int f2fs_write_inline_data(struct inode *inode, struct page *page); bool f2fs_recover_inline_data(struct inode *inode, struct page *npage); struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, @@ -3493,7 +3651,7 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir, int f2fs_make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage); int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, + const struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode); void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, struct inode *dir, @@ -3578,7 +3736,85 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode) */ static inline bool f2fs_post_read_required(struct inode *inode) { - return f2fs_encrypted_file(inode) || fsverity_active(inode); + return f2fs_encrypted_file(inode) || fsverity_active(inode) || + f2fs_compressed_file(inode); +} + +/* + * compress.c + */ +#ifdef CONFIG_F2FS_FS_COMPRESSION +bool f2fs_is_compressed_page(struct page *page); +struct page *f2fs_compress_control_page(struct page *page); +int f2fs_prepare_compress_overwrite(struct inode *inode, + struct page **pagep, pgoff_t index, void **fsdata); +bool f2fs_compress_write_end(struct inode *inode, void *fsdata, + pgoff_t index, unsigned copied); +void f2fs_compress_write_end_io(struct bio *bio, struct page *page); +bool f2fs_is_compress_backend_ready(struct inode *inode); +void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity); +bool f2fs_cluster_is_empty(struct compress_ctx *cc); +bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); +void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); +int f2fs_write_multi_pages(struct compress_ctx *cc, + int *submitted, + struct writeback_control *wbc, + enum iostat_type io_type); +int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index); +int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, + unsigned nr_pages, sector_t *last_block_in_bio, + bool is_readahead); +struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); +void f2fs_free_dic(struct decompress_io_ctx *dic); +void f2fs_decompress_end_io(struct page **rpages, + unsigned int cluster_size, bool err, bool verity); +int f2fs_init_compress_ctx(struct compress_ctx *cc); +void f2fs_destroy_compress_ctx(struct compress_ctx *cc); +void f2fs_init_compress_info(struct f2fs_sb_info *sbi); +#else +static inline bool f2fs_is_compressed_page(struct page *page) { return false; } +static inline bool f2fs_is_compress_backend_ready(struct inode *inode) +{ + if (!f2fs_compressed_file(inode)) + return true; + /* not support compression */ + return false; +} +static inline struct page *f2fs_compress_control_page(struct page *page) +{ + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +} +#endif + +static inline void set_compress_context(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + + F2FS_I(inode)->i_compress_algorithm = + F2FS_OPTION(sbi).compress_algorithm; + F2FS_I(inode)->i_log_cluster_size = + F2FS_OPTION(sbi).compress_log_size; + F2FS_I(inode)->i_cluster_size = + 1 << F2FS_I(inode)->i_log_cluster_size; + F2FS_I(inode)->i_flags |= F2FS_COMPR_FL; + set_inode_flag(inode, FI_COMPRESSED_FILE); + stat_inc_compr_inode(inode); +} + +static inline u64 f2fs_disable_compressed_file(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + + if (!f2fs_compressed_file(inode)) + return 0; + if (fi->i_compr_blocks) + return fi->i_compr_blocks; + + fi->i_flags &= ~F2FS_COMPR_FL; + clear_inode_flag(inode, FI_COMPRESSED_FILE); + stat_dec_compr_inode(inode); + return 0; } #define F2FS_FEATURE_FUNCS(name, flagname) \ @@ -3599,6 +3835,7 @@ F2FS_FEATURE_FUNCS(lost_found, LOST_FOUND); F2FS_FEATURE_FUNCS(verity, VERITY); F2FS_FEATURE_FUNCS(sb_chksum, SB_CHKSUM); F2FS_FEATURE_FUNCS(casefold, CASEFOLD); +F2FS_FEATURE_FUNCS(compression, COMPRESSION); #ifdef CONFIG_BLK_DEV_ZONED static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi, @@ -3680,6 +3917,30 @@ static inline bool f2fs_may_encrypt(struct inode *inode) #endif } +static inline bool f2fs_may_compress(struct inode *inode) +{ + if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) || + f2fs_is_atomic_file(inode) || + f2fs_is_volatile_file(inode)) + return false; + return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); +} + +static inline void f2fs_i_compr_blocks_update(struct inode *inode, + u64 blocks, bool add) +{ + int diff = F2FS_I(inode)->i_cluster_size - blocks; + + if (add) { + F2FS_I(inode)->i_compr_blocks += diff; + stat_add_compr_blocks(inode, diff); + } else { + F2FS_I(inode)->i_compr_blocks -= diff; + stat_sub_compr_blocks(inode, diff); + } + f2fs_mark_inode_dirty_sync(inode, true); +} + static inline int block_unaligned_IO(struct inode *inode, struct kiocb *iocb, struct iov_iter *iter) { @@ -3707,10 +3968,18 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int rw = iov_iter_rw(iter); - if (f2fs_post_read_required(inode)) + if (IS_ENABLED(CONFIG_FS_ENCRYPTION) && f2fs_encrypted_file(inode)) { + if (!fscrypt_inode_uses_inline_crypto(inode) || + !IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), + F2FS_BLKSIZE)) + return true; + } + if (fsverity_active(inode)) return true; if (f2fs_is_multi_device(sbi)) return true; + if (f2fs_compressed_file(inode)) + return true; /* * for blkzoned device, fallback direct IO to buffered IO, so * all IOs can be serialized by log-structured write. diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5d370e3bc19a..a5066cda00c4 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -50,8 +50,9 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) struct page *page = vmf->page; struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct dnode_of_data dn = { .node_changed = false }; - int err; + struct dnode_of_data dn; + bool need_alloc = true; + int err = 0; if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -63,6 +64,26 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) goto err; } +#ifdef CONFIG_F2FS_FS_COMPRESSION + if (f2fs_compressed_file(inode)) { + int ret = f2fs_is_compressed_cluster(inode, page->index); + + if (ret < 0) { + err = ret; + goto err; + } else if (ret) { + if (ret < F2FS_I(inode)->i_cluster_size) { + err = -EAGAIN; + goto err; + } + need_alloc = false; + } + } +#endif + /* should do out of any locked page */ + if (need_alloc) + f2fs_balance_fs(sbi, true); + sb_start_pagefault(inode->i_sb); f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); @@ -80,15 +101,17 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) goto out_sem; } - /* block allocation */ - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_get_block(&dn, page->index); - f2fs_put_dnode(&dn); - __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); - if (err) { - unlock_page(page); - goto out_sem; + if (need_alloc) { + /* block allocation */ + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); + set_new_dnode(&dn, inode, NULL, NULL, 0); + err = f2fs_get_block(&dn, page->index); + f2fs_put_dnode(&dn); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); + if (err) { + unlock_page(page); + goto out_sem; + } } /* fill the page */ @@ -158,6 +181,8 @@ static inline enum cp_reason_type need_do_checkpoint(struct inode *inode) if (!S_ISREG(inode->i_mode)) cp_reason = CP_NON_REGULAR; + else if (f2fs_compressed_file(inode)) + cp_reason = CP_COMPRESSED; else if (inode->i_nlink != 1) cp_reason = CP_HARDLINK; else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) @@ -499,6 +524,9 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + /* we don't need to use inline_data strictly */ err = f2fs_convert_inline_inode(inode); if (err) @@ -506,6 +534,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; + set_inode_flag(inode, FI_MMAP_FILE); return 0; } @@ -516,6 +545,9 @@ static int f2fs_file_open(struct inode *inode, struct file *filp) if (err) return err; + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + err = fsverity_file_open(inode, filp); if (err) return err; @@ -532,6 +564,9 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) int nr_free = 0, ofs = dn->ofs_in_node, len = count; __le32 *addr; int base = 0; + bool compressed_cluster = false; + int cluster_index = 0, valid_blocks = 0; + int cluster_size = F2FS_I(dn->inode)->i_cluster_size; if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) base = get_extra_isize(dn->inode); @@ -539,26 +574,43 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count) raw_node = F2FS_NODE(dn->node_page); addr = blkaddr_in_node(raw_node) + base + ofs; - for (; count > 0; count--, addr++, dn->ofs_in_node++) { + /* Assumption: truncateion starts with cluster */ + for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) { block_t blkaddr = le32_to_cpu(*addr); + if (f2fs_compressed_file(dn->inode) && + !(cluster_index & (cluster_size - 1))) { + if (compressed_cluster) + f2fs_i_compr_blocks_update(dn->inode, + valid_blocks, false); + compressed_cluster = (blkaddr == COMPRESS_ADDR); + valid_blocks = 0; + } + if (blkaddr == NULL_ADDR) continue; dn->data_blkaddr = NULL_ADDR; f2fs_set_data_blkaddr(dn); - if (__is_valid_data_blkaddr(blkaddr) && - !f2fs_is_valid_blkaddr(sbi, blkaddr, + if (__is_valid_data_blkaddr(blkaddr)) { + if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) - continue; + continue; + if (compressed_cluster) + valid_blocks++; + } - f2fs_invalidate_blocks(sbi, blkaddr); if (dn->ofs_in_node == 0 && IS_INODE(dn->node_page)) clear_inode_flag(dn->inode, FI_FIRST_BLOCK_WRITTEN); + + f2fs_invalidate_blocks(sbi, blkaddr); nr_free++; } + if (compressed_cluster) + f2fs_i_compr_blocks_update(dn->inode, valid_blocks, false); + if (nr_free) { pgoff_t fofs; /* @@ -601,6 +653,9 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, return 0; } + if (f2fs_compressed_file(inode)) + return 0; + page = f2fs_get_lock_data_page(inode, index, true); if (IS_ERR(page)) return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); @@ -616,7 +671,7 @@ truncate_out: return 0; } -int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) +static int do_truncate_blocks(struct inode *inode, u64 from, bool lock) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; @@ -681,6 +736,28 @@ free_partial: return err; } +int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock) +{ + u64 free_from = from; + + /* + * for compressed file, only support cluster size + * aligned truncation. + */ + if (f2fs_compressed_file(inode)) { + size_t cluster_shift = PAGE_SHIFT + + F2FS_I(inode)->i_log_cluster_size; + size_t cluster_mask = (1 << cluster_shift) - 1; + + free_from = from >> cluster_shift; + if (from & cluster_mask) + free_from++; + free_from <<= cluster_shift; + } + + return do_truncate_blocks(inode, free_from, lock); +} + int f2fs_truncate(struct inode *inode) { int err; @@ -695,7 +772,7 @@ int f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { - f2fs_show_injection_info(FAULT_TRUNCATE); + f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_TRUNCATE); return -EIO; } @@ -740,11 +817,14 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, stat->attributes |= STATX_ATTR_IMMUTABLE; if (flags & F2FS_NODUMP_FL) stat->attributes |= STATX_ATTR_NODUMP; + if (IS_VERITY(inode)) + stat->attributes |= STATX_ATTR_VERITY; stat->attributes_mask |= (STATX_ATTR_APPEND | STATX_ATTR_ENCRYPTED | STATX_ATTR_IMMUTABLE | - STATX_ATTR_NODUMP); + STATX_ATTR_NODUMP | + STATX_ATTR_VERITY); generic_fillattr(inode, stat); @@ -794,6 +874,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; + if ((attr->ia_valid & ATTR_SIZE) && + !f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + err = setattr_prepare(dentry, attr); if (err) return err; @@ -1034,8 +1118,8 @@ next_dnode: } else if (ret == -ENOENT) { if (dn.max_level == 0) return -ENOENT; - done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - dn.ofs_in_node, - len); + done = min((pgoff_t)ADDRS_PER_BLOCK(inode) - + dn.ofs_in_node, len); blkaddr += done; do_replace += done; goto next; @@ -1150,7 +1234,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, } dn.ofs_in_node++; i++; - new_size = (dst + i) << PAGE_SHIFT; + new_size = (loff_t)(dst + i) << PAGE_SHIFT; if (dst_inode->i_size < new_size) f2fs_i_size_write(dst_inode, new_size); } while (--ilen && (do_replace[i] || blkaddr[i] == NULL_ADDR)); @@ -1198,13 +1282,13 @@ static int __exchange_data_block(struct inode *src_inode, src_blkaddr = f2fs_kvzalloc(F2FS_I_SB(src_inode), array_size(olen, sizeof(block_t)), - GFP_KERNEL); + GFP_NOFS); if (!src_blkaddr) return -ENOMEM; do_replace = f2fs_kvzalloc(F2FS_I_SB(src_inode), array_size(olen, sizeof(int)), - GFP_KERNEL); + GFP_NOFS); if (!do_replace) { kvfree(src_blkaddr); return -ENOMEM; @@ -1556,12 +1640,44 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (off_end) map.m_len++; - if (f2fs_is_pinned_file(inode)) - map.m_seg_type = CURSEG_COLD_DATA; + if (!map.m_len) + return 0; - err = f2fs_map_blocks(inode, &map, 1, (f2fs_is_pinned_file(inode) ? - F2FS_GET_BLOCK_PRE_DIO : - F2FS_GET_BLOCK_PRE_AIO)); + if (f2fs_is_pinned_file(inode)) { + block_t len = (map.m_len >> sbi->log_blocks_per_seg) << + sbi->log_blocks_per_seg; + block_t done = 0; + + if (map.m_len % sbi->blocks_per_seg) + len += sbi->blocks_per_seg; + + map.m_len = sbi->blocks_per_seg; +next_alloc: + if (has_not_enough_free_secs(sbi, 0, + GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { + down_write(&sbi->gc_lock); + err = f2fs_gc(sbi, true, false, NULL_SEGNO); + if (err && err != -ENODATA && err != -EAGAIN) + goto out_err; + } + + down_write(&sbi->pin_sem); + map.m_seg_type = CURSEG_COLD_DATA_PINNED; + f2fs_allocate_new_segments(sbi, CURSEG_COLD_DATA); + err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO); + up_write(&sbi->pin_sem); + + done += map.m_len; + len -= map.m_len; + map.m_lblk += map.m_len; + if (!err && len) + goto next_alloc; + + map.m_len = done; + } else { + err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO); + } +out_err: if (err) { pgoff_t last_off; @@ -1597,6 +1713,8 @@ static long f2fs_fallocate(struct file *file, int mode, return -EIO; if (!f2fs_is_checkpoint_ready(F2FS_I_SB(inode))) return -ENOSPC; + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; /* f2fs only support ->fallocate for regular file */ if (!S_ISREG(inode->i_mode)) @@ -1606,6 +1724,11 @@ static long f2fs_fallocate(struct file *file, int mode, (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; + if (f2fs_compressed_file(inode) && + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) + return -EOPNOTSUPP; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)) @@ -1683,19 +1806,55 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id) static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) { struct f2fs_inode_info *fi = F2FS_I(inode); + u32 masked_flags = fi->i_flags & mask; + + f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask)); /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) return -EPERM; - if ((iflags ^ fi->i_flags) & F2FS_CASEFOLD_FL) { + if ((iflags ^ masked_flags) & F2FS_CASEFOLD_FL) { if (!f2fs_sb_has_casefold(F2FS_I_SB(inode))) return -EOPNOTSUPP; if (!f2fs_empty_dir(inode)) return -ENOTEMPTY; } + if (iflags & (F2FS_COMPR_FL | F2FS_NOCOMP_FL)) { + if (!f2fs_sb_has_compression(F2FS_I_SB(inode))) + return -EOPNOTSUPP; + if ((iflags & F2FS_COMPR_FL) && (iflags & F2FS_NOCOMP_FL)) + return -EINVAL; + } + + if ((iflags ^ masked_flags) & F2FS_COMPR_FL) { + if (S_ISREG(inode->i_mode) && + (masked_flags & F2FS_COMPR_FL || i_size_read(inode) || + F2FS_HAS_BLOCKS(inode))) + return -EINVAL; + if (iflags & F2FS_NOCOMP_FL) + return -EINVAL; + if (iflags & F2FS_COMPR_FL) { + int err = f2fs_convert_inline_inode(inode); + + if (err) + return err; + + if (!f2fs_may_compress(inode)) + return -EINVAL; + + set_compress_context(inode); + } + } + if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) { + if (masked_flags & F2FS_COMPR_FL) + return -EINVAL; + } + fi->i_flags = iflags | (fi->i_flags & ~mask); + f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) && + (fi->i_flags & F2FS_NOCOMP_FL)); if (fi->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); @@ -1721,11 +1880,13 @@ static const struct { u32 iflag; u32 fsflag; } f2fs_fsflags_map[] = { + { F2FS_COMPR_FL, FS_COMPR_FL }, { F2FS_SYNC_FL, FS_SYNC_FL }, { F2FS_IMMUTABLE_FL, FS_IMMUTABLE_FL }, { F2FS_APPEND_FL, FS_APPEND_FL }, { F2FS_NODUMP_FL, FS_NODUMP_FL }, { F2FS_NOATIME_FL, FS_NOATIME_FL }, + { F2FS_NOCOMP_FL, FS_NOCOMP_FL }, { F2FS_INDEX_FL, FS_INDEX_FL }, { F2FS_DIRSYNC_FL, FS_DIRSYNC_FL }, { F2FS_PROJINHERIT_FL, FS_PROJINHERIT_FL }, @@ -1733,11 +1894,13 @@ static const struct { }; #define F2FS_GETTABLE_FS_FL ( \ + FS_COMPR_FL | \ FS_SYNC_FL | \ FS_IMMUTABLE_FL | \ FS_APPEND_FL | \ FS_NODUMP_FL | \ FS_NOATIME_FL | \ + FS_NOCOMP_FL | \ FS_INDEX_FL | \ FS_DIRSYNC_FL | \ FS_PROJINHERIT_FL | \ @@ -1748,11 +1911,13 @@ static const struct { FS_CASEFOLD_FL) #define F2FS_SETTABLE_FS_FL ( \ + FS_COMPR_FL | \ FS_SYNC_FL | \ FS_IMMUTABLE_FL | \ FS_APPEND_FL | \ FS_NODUMP_FL | \ FS_NOATIME_FL | \ + FS_NOCOMP_FL | \ FS_DIRSYNC_FL | \ FS_PROJINHERIT_FL | \ FS_CASEFOLD_FL) @@ -1873,6 +2038,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode_lock(inode); + f2fs_disable_compressed_file(inode); + if (f2fs_is_atomic_file(inode)) { if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) ret = -EINVAL; @@ -1901,6 +2068,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (list_empty(&fi->inmem_ilist)) list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]); + sbi->atomic_files++; spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); /* add inode in inmem_list first and set atomic_file */ @@ -1910,7 +2078,6 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); F2FS_I(inode)->inmem_task = current; - stat_inc_atomic_write(inode); stat_update_max_atomic_write(inode); out: inode_unlock(inode); @@ -2299,12 +2466,12 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg) return ret; if (!sync) { - if (!mutex_trylock(&sbi->gc_mutex)) { + if (!down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } } else { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); } ret = f2fs_gc(sbi, sync, true, NULL_SEGNO); @@ -2342,12 +2509,12 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg) do_more: if (!range.sync) { - if (!mutex_trylock(&sbi->gc_mutex)) { + if (!down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } } else { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); } ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start)); @@ -2778,7 +2945,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) end_segno = min(start_segno + range.segments, dev_end_segno); while (start_segno < end_segno) { - if (!mutex_trylock(&sbi->gc_mutex)) { + if (!down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } @@ -3073,10 +3240,16 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) ret = -EAGAIN; goto out; } + ret = f2fs_convert_inline_inode(inode); if (ret) goto out; + if (f2fs_disable_compressed_file(inode)) { + ret = -EOPNOTSUPP; + goto out; + } + set_inode_flag(inode, FI_PIN_FILE); ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; done: @@ -3259,6 +3432,17 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } } +static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + + return generic_file_read_iter(iocb, iter); +} + static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; @@ -3270,6 +3454,9 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; } + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock(inode)) { ret = -EAGAIN; @@ -3298,18 +3485,41 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = -EAGAIN; goto out; } - } else { - preallocated = true; - target_size = iocb->ki_pos + iov_iter_count(from); - - err = f2fs_preallocate_blocks(iocb, from); - if (err) { - clear_inode_flag(inode, FI_NO_PREALLOC); - inode_unlock(inode); - ret = err; - goto out; - } + goto write; } + + if (is_inode_flag_set(inode, FI_NO_PREALLOC)) + goto write; + + if (iocb->ki_flags & IOCB_DIRECT) { + /* + * Convert inline data for Direct I/O before entering + * f2fs_direct_IO(). + */ + err = f2fs_convert_inline_inode(inode); + if (err) + goto out_err; + /* + * If force_buffere_io() is true, we have to allocate + * blocks all the time, since f2fs_direct_IO will fall + * back to buffered IO. + */ + if (!f2fs_force_buffered_io(inode, iocb, from) && + allow_outplace_dio(inode, iocb, from)) + goto write; + } + preallocated = true; + target_size = iocb->ki_pos + iov_iter_count(from); + + err = f2fs_preallocate_blocks(iocb, from); + if (err) { +out_err: + clear_inode_flag(inode, FI_NO_PREALLOC); + inode_unlock(inode); + ret = err; + goto out; + } +write: ret = __generic_file_write_iter(iocb, from); clear_inode_flag(inode, FI_NO_PREALLOC); @@ -3381,7 +3591,7 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) const struct file_operations f2fs_file_operations = { .llseek = f2fs_llseek, - .read_iter = generic_file_read_iter, + .read_iter = f2fs_file_read_iter, .write_iter = f2fs_file_write_iter, .open = f2fs_file_open, .release = f2fs_release_file, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6ba0f4c9abc8..7c28b794e438 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -54,7 +54,7 @@ static int gc_thread_func(void *data) } if (time_to_inject(sbi, FAULT_CHECKPOINT)) { - f2fs_show_injection_info(FAULT_CHECKPOINT); + f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } @@ -78,18 +78,18 @@ static int gc_thread_func(void *data) */ if (sbi->gc_mode == GC_URGENT) { wait_ms = gc_th->urgent_sleep_time; - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); goto do_gc; } - if (!mutex_trylock(&sbi->gc_mutex)) { + if (!down_write_trylock(&sbi->gc_lock)) { stat_other_skip_bggc_count(sbi); goto next; } if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); stat_io_skip_bggc_count(sbi); goto next; } @@ -99,7 +99,7 @@ static int gc_thread_func(void *data) else increase_sleep_time(gc_th, &wait_ms); do_gc: - stat_inc_bggc_count(sbi); + stat_inc_bggc_count(sbi->stat_info); /* if return value is not zero, no victim was selected */ if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO)) @@ -1012,8 +1012,14 @@ next_step: block_t start_bidx; nid_t nid = le32_to_cpu(entry->nid); - /* stop BG_GC if there is not enough free sections. */ - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) + /* + * stop BG_GC if there is not enough free sections. + * Or, stop GC if the segment becomes fully valid caused by + * race condition along with SSR block allocation. + */ + if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) || + get_valid_blocks(sbi, segno, false) == + sbi->blocks_per_seg) return submitted; if (check_valid_map(sbi, segno, off) == 0) @@ -1043,8 +1049,10 @@ next_step: if (phase == 3) { inode = f2fs_iget(sb, dni.ino); - if (IS_ERR(inode) || is_bad_inode(inode)) + if (IS_ERR(inode) || is_bad_inode(inode)) { + set_sbi_flag(sbi, SBI_NEED_FSCK); continue; + } if (!down_write_trylock( &F2FS_I(inode)->i_gc_rwsem[WRITE])) { @@ -1362,7 +1370,7 @@ stop: reserved_segments(sbi), prefree_segments(sbi)); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); put_gc_inode(&gc_list); @@ -1401,9 +1409,9 @@ static int free_segment_range(struct f2fs_sb_info *sbi, unsigned int start, .iroot = RADIX_TREE_INIT(GFP_NOFS), }; - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); do_garbage_collect(sbi, segno, &gc_list, FG_GC); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); put_gc_inode(&gc_list); if (get_valid_blocks(sbi, segno, true)) @@ -1437,11 +1445,20 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) raw_sb->segment_count_main = cpu_to_le32(segment_count_main + segs); raw_sb->block_count = cpu_to_le64(block_count + (long long)segs * sbi->blocks_per_seg); + if (f2fs_is_multi_device(sbi)) { + int last_dev = sbi->s_ndevs - 1; + int dev_segs = + le32_to_cpu(raw_sb->devs[last_dev].total_segments); + + raw_sb->devs[last_dev].total_segments = + cpu_to_le32(dev_segs + segs); + } } static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) { int segs = secs * sbi->segs_per_sec; + long long blks = (long long)segs * sbi->blocks_per_seg; long long user_block_count = le64_to_cpu(F2FS_CKPT(sbi)->user_block_count); @@ -1449,8 +1466,20 @@ static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) MAIN_SEGS(sbi) = (int)MAIN_SEGS(sbi) + segs; FREE_I(sbi)->free_sections = (int)FREE_I(sbi)->free_sections + secs; FREE_I(sbi)->free_segments = (int)FREE_I(sbi)->free_segments + segs; - F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + - (long long)segs * sbi->blocks_per_seg); + F2FS_CKPT(sbi)->user_block_count = cpu_to_le64(user_block_count + blks); + + if (f2fs_is_multi_device(sbi)) { + int last_dev = sbi->s_ndevs - 1; + + FDEV(last_dev).total_segments = + (int)FDEV(last_dev).total_segments + segs; + FDEV(last_dev).end_blk = + (long long)FDEV(last_dev).end_blk + blks; +#ifdef CONFIG_BLK_DEV_ZONED + FDEV(last_dev).nr_blkz = (int)FDEV(last_dev).nr_blkz + + (int)(blks >> sbi->log_blocks_per_blkz); +#endif + } } int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) @@ -1465,6 +1494,15 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) if (block_count > old_block_count) return -EINVAL; + if (f2fs_is_multi_device(sbi)) { + int last_dev = sbi->s_ndevs - 1; + __u64 last_segs = FDEV(last_dev).total_segments; + + if (block_count + last_segs * sbi->blocks_per_seg <= + old_block_count) + return -EINVAL; + } + /* new fs size should align to section size */ div_u64_rem(block_count, BLKS_PER_SEC(sbi), &rem); if (rem) diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index 5bc4dcd8fc03..8f7ee4362312 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -68,8 +68,9 @@ static void str2hashbuf(const unsigned char *msg, size_t len, *buf++ = pad; } -static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, - struct fscrypt_name *fname) +static f2fs_hash_t __f2fs_dentry_hash(const struct inode *dir, + const struct qstr *name_info, + const struct fscrypt_name *fname) { __u32 hash; f2fs_hash_t f2fs_hash; @@ -79,12 +80,17 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, size_t len = name_info->len; /* encrypted bigname case */ - if (fname && !fname->disk_name.name) + if (fname && fname->is_ciphertext_name) return cpu_to_le32(fname->hash); if (is_dot_dotdot(name_info)) return 0; + if (IS_CASEFOLDED(dir) && IS_ENCRYPTED(dir)) { + f2fs_hash = cpu_to_le32(fscrypt_fname_siphash(dir, name_info)); + return f2fs_hash; + } + /* Initialize the default seed for the hash checksum functions */ buf[0] = 0x67452301; buf[1] = 0xefcdab89; @@ -106,35 +112,38 @@ static f2fs_hash_t __f2fs_dentry_hash(const struct qstr *name_info, } f2fs_hash_t f2fs_dentry_hash(const struct inode *dir, - const struct qstr *name_info, struct fscrypt_name *fname) + const struct qstr *name_info, const struct fscrypt_name *fname) { #ifdef CONFIG_UNICODE struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - const struct unicode_map *um = sbi->s_encoding; + const struct unicode_map *um = dir->i_sb->s_encoding; int r, dlen; unsigned char *buff; struct qstr folded; + const struct qstr *name = fname ? fname->usr_fname : name_info; if (!name_info->len || !IS_CASEFOLDED(dir)) goto opaque_seq; + if (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir)) + goto opaque_seq; + buff = f2fs_kzalloc(sbi, sizeof(char) * PATH_MAX, GFP_KERNEL); if (!buff) return -ENOMEM; - - dlen = utf8_casefold(um, name_info, buff, PATH_MAX); + dlen = utf8_casefold(um, name, buff, PATH_MAX); if (dlen < 0) { kvfree(buff); goto opaque_seq; } folded.name = buff; folded.len = dlen; - r = __f2fs_dentry_hash(&folded, fname); + r = __f2fs_dentry_hash(dir, &folded, fname); kvfree(buff); return r; opaque_seq: #endif - return __f2fs_dentry_hash(name_info, fname); + return __f2fs_dentry_hash(dir, name_info, fname); } diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e98d53d5e4aa..c68a32369f44 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -386,7 +386,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, struct f2fs_dentry_ptr src, dst; int err; - page = f2fs_grab_cache_page(dir->i_mapping, 0, false); + page = f2fs_grab_cache_page(dir->i_mapping, 0, true); if (!page) { f2fs_put_page(ipage, 1); return -ENOMEM; @@ -483,8 +483,8 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry) ino = le32_to_cpu(de->ino); fake_mode = f2fs_get_de_type(de) << S_SHIFT; - err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL, - ino, fake_mode); + err = f2fs_add_regular_entry(dir, &new_name, NULL, + de->hash_code, NULL, ino, fake_mode); if (err) goto punch_dentry_pages; @@ -548,7 +548,7 @@ recover: return err; } -static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, +static int do_convert_inline_dir(struct inode *dir, struct page *ipage, void *inline_dentry) { if (!F2FS_I(dir)->i_dir_level) @@ -557,8 +557,46 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry); } +int f2fs_try_convert_inline_dir(struct inode *dir, struct dentry *dentry) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + struct fscrypt_name fname; + void *inline_dentry = NULL; + int err = 0; + + if (!f2fs_has_inline_dentry(dir)) + return 0; + + f2fs_lock_op(sbi); + + err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &fname); + if (err) + goto out; + + ipage = f2fs_get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; + } + + if (f2fs_has_enough_room(dir, ipage, &fname)) { + f2fs_put_page(ipage, 1); + goto out; + } + + inline_dentry = inline_data_addr(dir, ipage); + + err = do_convert_inline_dir(dir, ipage, inline_dentry); + if (!err) + f2fs_put_page(ipage, 1); +out: + f2fs_unlock_op(sbi); + return err; +} + int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, - const struct qstr *orig_name, + const struct fscrypt_name *fname, struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); @@ -569,6 +607,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, struct f2fs_dentry_ptr d; int slots = GET_DENTRY_SLOTS(new_name->len); struct page *page = NULL; + const struct qstr *orig_name = fname->usr_fname; int err = 0; ipage = f2fs_get_node_page(sbi, dir->i_ino); @@ -580,7 +619,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, bit_pos = f2fs_room_for_filename(d.bitmap, slots, d.max); if (bit_pos >= d.max) { - err = f2fs_convert_inline_dir(dir, ipage, inline_dentry); + err = do_convert_inline_dir(dir, ipage, inline_dentry); if (err) return err; err = -EAGAIN; @@ -599,7 +638,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_wait_on_page_writeback(ipage, NODE, true, true); - name_hash = f2fs_dentry_hash(dir, new_name, NULL); + name_hash = f2fs_dentry_hash(dir, new_name, fname); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); set_page_dirty(ipage); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index db4fec30c30d..78c3f1d70f1d 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -200,6 +200,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_inode *ri = F2FS_INODE(node_page); unsigned long long iblocks; iblocks = le64_to_cpu(F2FS_INODE(node_page)->i_blocks); @@ -286,6 +287,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) && + fi->i_flags & F2FS_COMPR_FL && + F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, + i_log_cluster_size)) { + if (ri->i_compress_algorithm >= COMPRESS_MAX) + return false; + if (le64_to_cpu(ri->i_compr_blocks) > inode->i_blocks) + return false; + if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE || + ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) + return false; + } + return true; } @@ -407,6 +421,18 @@ static int do_read_inode(struct inode *inode) fi->i_crtime.tv_nsec = le32_to_cpu(ri->i_crtime_nsec); } + if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) && + (fi->i_flags & F2FS_COMPR_FL)) { + if (F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, + i_log_cluster_size)) { + fi->i_compr_blocks = le64_to_cpu(ri->i_compr_blocks); + fi->i_compress_algorithm = ri->i_compress_algorithm; + fi->i_log_cluster_size = ri->i_log_cluster_size; + fi->i_cluster_size = 1 << fi->i_log_cluster_size; + set_inode_flag(inode, FI_COMPRESSED_FILE); + } + } + F2FS_I(inode)->i_disk_time[0] = inode->i_atime; F2FS_I(inode)->i_disk_time[1] = inode->i_ctime; F2FS_I(inode)->i_disk_time[2] = inode->i_mtime; @@ -416,6 +442,8 @@ static int do_read_inode(struct inode *inode) stat_inc_inline_xattr(inode); stat_inc_inline_inode(inode); stat_inc_inline_dir(inode); + stat_inc_compr_inode(inode); + stat_add_compr_blocks(inode, F2FS_I(inode)->i_compr_blocks); return 0; } @@ -569,6 +597,17 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page) ri->i_crtime_nsec = cpu_to_le32(F2FS_I(inode)->i_crtime.tv_nsec); } + + if (f2fs_sb_has_compression(F2FS_I_SB(inode)) && + F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize, + i_log_cluster_size)) { + ri->i_compr_blocks = + cpu_to_le64(F2FS_I(inode)->i_compr_blocks); + ri->i_compress_algorithm = + F2FS_I(inode)->i_compress_algorithm; + ri->i_log_cluster_size = + F2FS_I(inode)->i_log_cluster_size; + } } __set_inode_rdev(inode, ri); @@ -615,7 +654,11 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) inode->i_ino == F2FS_META_INO(sbi)) return 0; - if (!is_inode_flag_set(inode, FI_DIRTY_INODE)) + /* + * atime could be updated without dirtying f2fs inode in lazytime mode + */ + if (f2fs_is_time_consistent(inode) && + !is_inode_flag_set(inode, FI_DIRTY_INODE)) return 0; if (!f2fs_is_checkpoint_ready(sbi)) @@ -677,7 +720,7 @@ retry: err = f2fs_truncate(inode); if (time_to_inject(sbi, FAULT_EVICT_INODE)) { - f2fs_show_injection_info(FAULT_EVICT_INODE); + f2fs_show_injection_info(sbi, FAULT_EVICT_INODE); err = -EIO; } @@ -707,6 +750,8 @@ no_delete: stat_dec_inline_xattr(inode); stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); + stat_dec_compr_inode(inode); + stat_sub_compr_blocks(inode, F2FS_I(inode)->i_compr_blocks); if (likely(!f2fs_cp_error(sbi) && !is_sbi_flag_set(sbi, SBI_CP_DISABLED))) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index f1716a2a645e..198e77c15f59 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -119,6 +119,13 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) set_inode_flag(inode, FI_PROJ_INHERIT); + if (f2fs_sb_has_compression(sbi)) { + /* Inherit the compression flag in directory */ + if ((F2FS_I(dir)->i_flags & F2FS_COMPR_FL) && + f2fs_may_compress(inode)) + set_compress_context(inode); + } + f2fs_set_inode_flags(inode); trace_f2fs_new_inode(inode, 0); @@ -149,6 +156,9 @@ static inline int is_extension_exist(const unsigned char *s, const char *sub) size_t sublen = strlen(sub); int i; + if (sublen == 1 && *sub == '*') + return 1; + /* * filename format of multimedia file should be defined as: * "filename + '.' + extension + (optional: '.' + temp extension)". @@ -262,6 +272,45 @@ int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name, return 0; } +static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, + const unsigned char *name) +{ + __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; + unsigned char (*ext)[F2FS_EXTENSION_LEN]; + unsigned int ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + int i, cold_count, hot_count; + + if (!f2fs_sb_has_compression(sbi) || + is_inode_flag_set(inode, FI_COMPRESSED_FILE) || + F2FS_I(inode)->i_flags & F2FS_NOCOMP_FL || + !f2fs_may_compress(inode)) + return; + + down_read(&sbi->sb_lock); + + cold_count = le32_to_cpu(sbi->raw_super->extension_count); + hot_count = sbi->raw_super->hot_ext_count; + + for (i = cold_count; i < cold_count + hot_count; i++) { + if (is_extension_exist(name, extlist[i])) { + up_read(&sbi->sb_lock); + return; + } + } + + up_read(&sbi->sb_lock); + + ext = F2FS_OPTION(sbi).extensions; + + for (i = 0; i < ext_cnt; i++) { + if (!is_extension_exist(name, ext[i])) + continue; + + set_compress_context(inode); + return; + } +} + static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { @@ -286,6 +335,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (!test_opt(sbi, DISABLE_EXT_IDENTIFY)) set_file_temperature(sbi, inode, dentry->d_name.name); + set_compress_inode(sbi, inode, dentry->d_name.name); + inode->i_op = &f2fs_file_inode_operations; inode->i_fop = &f2fs_file_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; @@ -443,6 +494,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, } err = fscrypt_prepare_lookup(dir, dentry, &fname); + generic_set_encrypted_ci_d_ops(dir, dentry); if (err == -ENOENT) goto out_splice; if (err) @@ -797,6 +849,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, if (whiteout) { f2fs_i_links_write(inode, false); + inode->i_state |= I_LINKABLE; *whiteout = inode; } else { d_tmpfile(dentry, inode); @@ -849,12 +902,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); struct inode *whiteout = NULL; - struct page *old_dir_page; + struct page *old_dir_page = NULL; struct page *old_page, *new_page = NULL; struct f2fs_dir_entry *old_dir_entry = NULL; struct f2fs_dir_entry *old_entry; struct f2fs_dir_entry *new_entry; - bool is_old_inline = f2fs_has_inline_dentry(old_dir); int err; if (unlikely(f2fs_cp_error(sbi))) @@ -867,6 +919,26 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, F2FS_I(old_dentry->d_inode)->i_projid))) return -EXDEV; + /* + * If new_inode is null, the below renaming flow will + * add a link in old_dir which can conver inline_dir. + * After then, if we failed to get the entry due to other + * reasons like ENOMEM, we had to remove the new entry. + * Instead of adding such the error handling routine, let's + * simply convert first here. + */ + if (old_dir == new_dir && !new_inode) { + err = f2fs_try_convert_inline_dir(old_dir, new_dentry); + if (err) + return err; + } + + if (flags & RENAME_WHITEOUT) { + err = f2fs_create_whiteout(old_dir, &whiteout); + if (err) + return err; + } + err = dquot_initialize(old_dir); if (err) goto out; @@ -898,17 +970,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } } - if (flags & RENAME_WHITEOUT) { - err = f2fs_create_whiteout(old_dir, &whiteout); - if (err) - goto out_dir; - } - if (new_inode) { err = -ENOTEMPTY; if (old_dir_entry && !f2fs_empty_dir(new_inode)) - goto out_whiteout; + goto out_dir; err = -ENOENT; new_entry = f2fs_find_entry(new_dir, &new_dentry->d_name, @@ -916,7 +982,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!new_entry) { if (IS_ERR(new_page)) err = PTR_ERR(new_page); - goto out_whiteout; + goto out_dir; } f2fs_balance_fs(sbi, true); @@ -928,6 +994,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, goto put_out_dir; f2fs_set_link(new_dir, new_entry, new_page, old_inode); + new_page = NULL; new_inode->i_ctime = current_time(new_inode); down_write(&F2FS_I(new_inode)->i_sem); @@ -948,49 +1015,28 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, err = f2fs_add_link(new_dentry, old_inode); if (err) { f2fs_unlock_op(sbi); - goto out_whiteout; + goto out_dir; } if (old_dir_entry) f2fs_i_links_write(new_dir, true); - - /* - * old entry and new entry can locate in the same inline - * dentry in inode, when attaching new entry in inline dentry, - * it could force inline dentry conversion, after that, - * old_entry and old_page will point to wrong address, in - * order to avoid this, let's do the check and update here. - */ - if (is_old_inline && !f2fs_has_inline_dentry(old_dir)) { - f2fs_put_page(old_page, 0); - old_page = NULL; - - old_entry = f2fs_find_entry(old_dir, - &old_dentry->d_name, &old_page); - if (!old_entry) { - err = -ENOENT; - if (IS_ERR(old_page)) - err = PTR_ERR(old_page); - f2fs_unlock_op(sbi); - goto out_whiteout; - } - } } down_write(&F2FS_I(old_inode)->i_sem); if (!old_dir_entry || whiteout) file_lost_pino(old_inode); else - F2FS_I(old_inode)->i_pino = new_dir->i_ino; + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(old_inode, new_dir->i_ino); up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = current_time(old_inode); f2fs_mark_inode_dirty_sync(old_inode, false); f2fs_delete_entry(old_entry, old_page, old_dir, NULL); + old_page = NULL; if (whiteout) { - whiteout->i_state |= I_LINKABLE; set_inode_flag(whiteout, FI_INC_LINK); err = f2fs_add_link(old_dentry, whiteout); if (err) @@ -1024,17 +1070,15 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, put_out_dir: f2fs_unlock_op(sbi); - if (new_page) - f2fs_put_page(new_page, 0); -out_whiteout: - if (whiteout) - iput(whiteout); + f2fs_put_page(new_page, 0); out_dir: if (old_dir_entry) f2fs_put_page(old_dir_page, 0); out_old: f2fs_put_page(old_page, 0); out: + if (whiteout) + iput(whiteout); return err; } @@ -1141,7 +1185,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_set_link(old_dir, old_entry, old_page, new_inode); down_write(&F2FS_I(old_inode)->i_sem); - file_lost_pino(old_inode); + if (!old_dir_entry) + file_lost_pino(old_inode); + else + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(old_inode, new_dir->i_ino); up_write(&F2FS_I(old_inode)->i_sem); old_dir->i_ctime = current_time(old_dir); @@ -1156,7 +1204,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_set_link(new_dir, new_entry, new_page, old_inode); down_write(&F2FS_I(new_inode)->i_sem); - file_lost_pino(new_inode); + if (!new_dir_entry) + file_lost_pino(new_inode); + else + /* adjust dir's i_pino to pass fsck check */ + f2fs_i_pino_write(new_inode, old_dir->i_ino); up_write(&F2FS_I(new_inode)->i_sem); new_dir->i_ctime = current_time(new_dir); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 14c1896a6842..494947d282d5 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2349,7 +2349,6 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, if (ret) { up_read(&nm_i->nat_tree_lock); - f2fs_bug_on(sbi, !mount); f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); return ret; } @@ -2399,7 +2398,7 @@ bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *i = NULL; retry: if (time_to_inject(sbi, FAULT_ALLOC_NID)) { - f2fs_show_injection_info(FAULT_ALLOC_NID); + f2fs_show_injection_info(sbi, FAULT_ALLOC_NID); return false; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 69e660ab6823..f177e2749f19 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -711,7 +711,7 @@ next: f2fs_put_page(page, 1); } if (!err) - f2fs_allocate_new_segments(sbi); + f2fs_allocate_new_segments(sbi, NO_CHECK_TYPE); return err; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 808709581481..dea5e265c8cc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -288,6 +288,8 @@ void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure) struct list_head *head = &sbi->inode_list[ATOMIC_FILE]; struct inode *inode; struct f2fs_inode_info *fi; + unsigned int count = sbi->atomic_files; + unsigned int looped = 0; next: spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (list_empty(head)) { @@ -296,22 +298,26 @@ next: } fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist); inode = igrab(&fi->vfs_inode); + if (inode) + list_move_tail(&fi->inmem_ilist, head); spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); if (inode) { if (gc_failure) { - if (fi->i_gc_failures[GC_FAILURE_ATOMIC]) - goto drop; - goto skip; + if (!fi->i_gc_failures[GC_FAILURE_ATOMIC]) + goto skip; } -drop: set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); f2fs_drop_inmem_pages(inode); +skip: iput(inode); } -skip: congestion_wait(BLK_RW_ASYNC, HZ/50); cond_resched(); + if (gc_failure) { + if (++looped >= count) + return; + } goto next; } @@ -327,13 +333,15 @@ void f2fs_drop_inmem_pages(struct inode *inode) mutex_unlock(&fi->inmem_lock); } - clear_inode_flag(inode, FI_ATOMIC_FILE); fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0; - stat_dec_atomic_write(inode); spin_lock(&sbi->inode_lock[ATOMIC_FILE]); if (!list_empty(&fi->inmem_ilist)) list_del_init(&fi->inmem_ilist); + if (f2fs_is_atomic_file(inode)) { + clear_inode_flag(inode, FI_ATOMIC_FILE); + sbi->atomic_files--; + } spin_unlock(&sbi->inode_lock[ATOMIC_FILE]); } @@ -480,7 +488,7 @@ int f2fs_commit_inmem_pages(struct inode *inode) void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { if (time_to_inject(sbi, FAULT_CHECKPOINT)) { - f2fs_show_injection_info(FAULT_CHECKPOINT); + f2fs_show_injection_info(sbi, FAULT_CHECKPOINT); f2fs_stop_checkpoint(sbi, false); } @@ -496,7 +504,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) * dir/node pages without enough free segments. */ if (has_not_enough_free_secs(sbi, 0, 0)) { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); f2fs_gc(sbi, false, false, NULL_SEGNO); } } @@ -1008,8 +1016,9 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, if (dc->error) printk_ratelimited( - "%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d", - KERN_INFO, dc->lstart, dc->start, dc->len, dc->error); + "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d", + KERN_INFO, sbi->sb->s_id, + dc->lstart, dc->start, dc->len, dc->error); __detach_discard_cmd(dcc, dc); } @@ -1149,7 +1158,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, dc->len += len; if (time_to_inject(sbi, FAULT_DISCARD)) { - f2fs_show_injection_info(FAULT_DISCARD); + f2fs_show_injection_info(sbi, FAULT_DISCARD); err = -EIO; goto submit; } @@ -2214,7 +2223,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) struct sit_info *sit_i = SIT_I(sbi); f2fs_bug_on(sbi, addr == NULL_ADDR); - if (addr == NEW_ADDR) + if (addr == NEW_ADDR || addr == COMPRESS_ADDR) return; invalidate_mapping_pages(META_MAPPING(sbi), addr, addr); @@ -2690,7 +2699,7 @@ unlock: up_read(&SM_I(sbi)->curseg_lock); } -void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) +void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg; unsigned int old_segno; @@ -2699,10 +2708,17 @@ void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) { + if (type != NO_CHECK_TYPE && i != type) + continue; + curseg = CURSEG_I(sbi, i); - old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); - locate_dirty_segment(sbi, old_segno); + if (type == NO_CHECK_TYPE || curseg->next_blkoff || + get_valid_blocks(sbi, curseg->segno, false) || + get_ckpt_valid_blocks(sbi, curseg->segno)) { + old_segno = curseg->segno; + SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true); + locate_dirty_segment(sbi, old_segno); + } } up_write(&SIT_I(sbi)->sentry_lock); @@ -2843,9 +2859,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) if (sbi->discard_blks == 0) goto out; - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); err = f2fs_write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); if (err) goto out; @@ -3018,7 +3034,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) if (fio->type == DATA) { struct inode *inode = fio->page->mapping->host; - if (is_cold_data(fio->page) || file_is_cold(inode)) + if (is_cold_data(fio->page) || file_is_cold(inode) || + f2fs_compressed_file(inode)) return CURSEG_COLD_DATA; if (file_is_hot(inode) || is_inode_flag_set(inode, FI_HOT_DATA) || @@ -3068,6 +3085,19 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); + bool put_pin_sem = false; + + if (type == CURSEG_COLD_DATA) { + /* GC during CURSEG_COLD_DATA_PINNED allocation */ + if (down_read_trylock(&sbi->pin_sem)) { + put_pin_sem = true; + } else { + type = CURSEG_WARM_DATA; + curseg = CURSEG_I(sbi, type); + } + } else if (type == CURSEG_COLD_DATA_PINNED) { + type = CURSEG_COLD_DATA; + } down_read(&SM_I(sbi)->curseg_lock); @@ -3133,6 +3163,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&curseg->curseg_mutex); up_read(&SM_I(sbi)->curseg_lock); + + if (put_pin_sem) + up_read(&sbi->pin_sem); } static void update_device_state(struct f2fs_io_info *fio) @@ -3255,7 +3288,7 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) stat_inc_inplace_blocks(fio->sbi); - if (fio->bio) + if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE))) err = f2fs_merge_page_bio(fio); else err = f2fs_submit_page_bio(fio); @@ -3379,7 +3412,10 @@ void f2fs_wait_on_page_writeback(struct page *page, if (PageWriteback(page)) { struct f2fs_sb_info *sbi = F2FS_P_SB(page); + /* submit cached LFS IO */ f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type); + /* sbumit cached IPU IO */ + f2fs_submit_merged_ipu_write(sbi, NULL, page); if (ordered) { wait_on_page_writeback(page); f2fs_bug_on(sbi, locked && PageWriteback(page)); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 325781a1ae4d..459dc3901a57 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -200,18 +200,6 @@ struct segment_allocation { void (*allocate_segment)(struct f2fs_sb_info *, int, bool); }; -/* - * this value is set in page as a private data which indicate that - * the page is atomically written, and it is in inmem_pages list. - */ -#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1) -#define DUMMY_WRITTEN_PAGE ((unsigned long)-2) - -#define IS_ATOMIC_WRITTEN_PAGE(page) \ - (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE) -#define IS_DUMMY_WRITTEN_PAGE(page) \ - (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE) - #define MAX_SKIP_GC_COUNT 16 struct inmem_pages { @@ -313,6 +301,8 @@ struct sit_entry_set { */ static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) { + if (type == CURSEG_COLD_DATA_PINNED) + type = CURSEG_COLD_DATA; return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); } @@ -617,8 +607,10 @@ static inline int utilization(struct f2fs_sb_info *sbi) * threashold, * F2FS_IPU_FSYNC - activated in fsync path only for high performance flash * storages. IPU will be triggered only if the # of dirty - * pages over min_fsync_blocks. - * F2FS_IPUT_DISABLE - disable IPU. (=default option) + * pages over min_fsync_blocks. (=default option) + * F2FS_IPU_ASYNC - do IPU given by asynchronous write requests. + * F2FS_IPU_NOCACHE - disable IPU bio cache. + * F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode) */ #define DEF_MIN_IPU_UTIL 70 #define DEF_MIN_FSYNC_BLOCKS 8 @@ -633,6 +625,7 @@ enum { F2FS_IPU_SSR_UTIL, F2FS_IPU_FSYNC, F2FS_IPU_ASYNC, + F2FS_IPU_NOCACHE, }; static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 060570477944..e67f2a86105f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -137,10 +137,14 @@ enum { Opt_alloc, Opt_fsync, Opt_test_dummy_encryption, + Opt_inlinecrypt, Opt_checkpoint_disable, Opt_checkpoint_disable_cap, Opt_checkpoint_disable_cap_perc, Opt_checkpoint_enable, + Opt_compress_algorithm, + Opt_compress_log_size, + Opt_compress_extension, Opt_err, }; @@ -199,10 +203,14 @@ static match_table_t f2fs_tokens = { {Opt_alloc, "alloc_mode=%s"}, {Opt_fsync, "fsync_mode=%s"}, {Opt_test_dummy_encryption, "test_dummy_encryption"}, + {Opt_inlinecrypt, "inlinecrypt"}, {Opt_checkpoint_disable, "checkpoint=disable"}, {Opt_checkpoint_disable_cap, "checkpoint=disable:%u"}, {Opt_checkpoint_disable_cap_perc, "checkpoint=disable:%u%%"}, {Opt_checkpoint_enable, "checkpoint=enable"}, + {Opt_compress_algorithm, "compress_algorithm=%s"}, + {Opt_compress_log_size, "compress_log_size=%u"}, + {Opt_compress_extension, "compress_extension=%s"}, {Opt_err, NULL}, }; @@ -391,8 +399,9 @@ static int parse_options(struct super_block *sb, char *options) { struct f2fs_sb_info *sbi = F2FS_SB(sb); substring_t args[MAX_OPT_ARGS]; + unsigned char (*ext)[F2FS_EXTENSION_LEN]; char *p, *name; - int arg = 0; + int arg = 0, ext_cnt; kuid_t uid; kgid_t gid; #ifdef CONFIG_QUOTA @@ -783,6 +792,13 @@ static int parse_options(struct super_block *sb, char *options) f2fs_info(sbi, "Test dummy encryption mode enabled"); #else f2fs_info(sbi, "Test dummy encryption mount option ignored"); +#endif + break; + case Opt_inlinecrypt: +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT + F2FS_OPTION(sbi).inlinecrypt = true; +#else + f2fs_info(sbi, "inline encryption not supported"); #endif break; case Opt_checkpoint_disable_cap_perc: @@ -810,6 +826,66 @@ static int parse_options(struct super_block *sb, char *options) case Opt_checkpoint_enable: clear_opt(sbi, DISABLE_CHECKPOINT); break; + case Opt_compress_algorithm: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_err(sbi, "Compression feature if off"); + return -EINVAL; + } + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + if (strlen(name) == 3 && !strcmp(name, "lzo")) { + F2FS_OPTION(sbi).compress_algorithm = + COMPRESS_LZO; + } else if (strlen(name) == 3 && + !strcmp(name, "lz4")) { + F2FS_OPTION(sbi).compress_algorithm = + COMPRESS_LZ4; + } else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_compress_log_size: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_err(sbi, "Compression feature is off"); + return -EINVAL; + } + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (arg < MIN_COMPRESS_LOG_SIZE || + arg > MAX_COMPRESS_LOG_SIZE) { + f2fs_err(sbi, + "Compress cluster log size is out of range"); + return -EINVAL; + } + F2FS_OPTION(sbi).compress_log_size = arg; + break; + case Opt_compress_extension: + if (!f2fs_sb_has_compression(sbi)) { + f2fs_err(sbi, "Compression feature is off"); + return -EINVAL; + } + name = match_strdup(&args[0]); + if (!name) + return -ENOMEM; + + ext = F2FS_OPTION(sbi).extensions; + ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt; + + if (strlen(name) >= F2FS_EXTENSION_LEN || + ext_cnt >= COMPRESS_EXT_NUM) { + f2fs_err(sbi, + "invalid extension length/number"); + kfree(name); + return -EINVAL; + } + + strcpy(ext[ext_cnt], name); + F2FS_OPTION(sbi).compress_ext_cnt++; + kfree(name); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1133,6 +1209,8 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_node_manager(sbi); f2fs_destroy_segment_manager(sbi); + f2fs_destroy_post_read_wq(sbi); + kvfree(sbi->ckpt); f2fs_unregister_sysfs(sbi); @@ -1152,7 +1230,7 @@ static void f2fs_put_super(struct super_block *sb) for (i = 0; i < NR_PAGE_TYPE; i++) kvfree(sbi->write_io[i]); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif kvfree(sbi); } @@ -1177,9 +1255,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) cpc.reason = __get_cp_reason(sbi); - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); err = f2fs_write_checkpoint(sbi, &cpc); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); } f2fs_trace_ios(NULL, 1); @@ -1221,9 +1299,11 @@ static int f2fs_statfs_project(struct super_block *sb, return PTR_ERR(dquot); spin_lock(&dquot->dq_dqb_lock); - limit = (dquot->dq_dqb.dqb_bsoftlimit ? - dquot->dq_dqb.dqb_bsoftlimit : - dquot->dq_dqb.dqb_bhardlimit) >> sb->s_blocksize_bits; + limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit, + dquot->dq_dqb.dqb_bhardlimit); + if (limit) + limit >>= sb->s_blocksize_bits; + if (limit && buf->f_blocks > limit) { curblock = dquot->dq_dqb.dqb_curspace >> sb->s_blocksize_bits; buf->f_blocks = limit; @@ -1232,9 +1312,9 @@ static int f2fs_statfs_project(struct super_block *sb, (buf->f_blocks - curblock) : 0; } - limit = dquot->dq_dqb.dqb_isoftlimit ? - dquot->dq_dqb.dqb_isoftlimit : - dquot->dq_dqb.dqb_ihardlimit; + limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit, + dquot->dq_dqb.dqb_ihardlimit); + if (limit && buf->f_files > limit) { buf->f_files = limit; buf->f_ffree = @@ -1340,6 +1420,35 @@ static inline void f2fs_show_quota_options(struct seq_file *seq, #endif } +static inline void f2fs_show_compress_options(struct seq_file *seq, + struct super_block *sb) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + char *algtype = ""; + int i; + + if (!f2fs_sb_has_compression(sbi)) + return; + + switch (F2FS_OPTION(sbi).compress_algorithm) { + case COMPRESS_LZO: + algtype = "lzo"; + break; + case COMPRESS_LZ4: + algtype = "lz4"; + break; + } + seq_printf(seq, ",compress_algorithm=%s", algtype); + + seq_printf(seq, ",compress_log_size=%u", + F2FS_OPTION(sbi).compress_log_size); + + for (i = 0; i < F2FS_OPTION(sbi).compress_ext_cnt; i++) { + seq_printf(seq, ",compress_extension=%s", + F2FS_OPTION(sbi).extensions[i]); + } +} + static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); @@ -1446,6 +1555,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) #ifdef CONFIG_FS_ENCRYPTION if (F2FS_OPTION(sbi).test_dummy_encryption) seq_puts(seq, ",test_dummy_encryption"); + if (F2FS_OPTION(sbi).inlinecrypt) + seq_puts(seq, ",inlinecrypt"); #endif if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_DEFAULT) @@ -1462,6 +1573,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_printf(seq, ",fsync_mode=%s", "strict"); else if (F2FS_OPTION(sbi).fsync_mode == FSYNC_MODE_NOBARRIER) seq_printf(seq, ",fsync_mode=%s", "nobarrier"); + + f2fs_show_compress_options(seq, sbi->sb); return 0; } @@ -1474,8 +1587,14 @@ static void default_options(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT; F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX; F2FS_OPTION(sbi).test_dummy_encryption = false; +#ifdef CONFIG_FS_ENCRYPTION + F2FS_OPTION(sbi).inlinecrypt = false; +#endif F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID); F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID); + F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZO; + F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE; + F2FS_OPTION(sbi).compress_ext_cnt = 0; set_opt(sbi, BG_GC); set_opt(sbi, INLINE_XATTR); @@ -1524,7 +1643,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) f2fs_update_time(sbi, DISABLE_TIME); while (!f2fs_time_over(sbi, DISABLE_TIME)) { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, NULL_SEGNO); if (err == -ENODATA) { err = 0; @@ -1546,7 +1665,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) goto restore_flag; } - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); cpc.reason = CP_PAUSE; set_sbi_flag(sbi, SBI_CP_DISABLED); err = f2fs_write_checkpoint(sbi, &cpc); @@ -1558,7 +1677,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) spin_unlock(&sbi->stat_lock); out_unlock: - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); restore_flag: sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */ return err; @@ -1566,12 +1685,12 @@ restore_flag: static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) { - mutex_lock(&sbi->gc_mutex); + down_write(&sbi->gc_lock); f2fs_dirty_to_prefree(sbi); clear_sbi_flag(sbi, SBI_CP_DISABLED); set_sbi_flag(sbi, SBI_IS_DIRTY); - mutex_unlock(&sbi->gc_mutex); + up_write(&sbi->gc_lock); f2fs_sync_fs(sbi->sb, 1); } @@ -2158,7 +2277,7 @@ static int f2fs_dquot_commit(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; - down_read(&sbi->quota_sem); + down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING); ret = dquot_commit(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); @@ -2182,13 +2301,10 @@ static int f2fs_dquot_acquire(struct dquot *dquot) static int f2fs_dquot_release(struct dquot *dquot) { struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); - int ret; + int ret = dquot_release(dquot); - down_read(&sbi->quota_sem); - ret = dquot_release(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); return ret; } @@ -2196,29 +2312,22 @@ static int f2fs_dquot_mark_dquot_dirty(struct dquot *dquot) { struct super_block *sb = dquot->dq_sb; struct f2fs_sb_info *sbi = F2FS_SB(sb); - int ret; - - down_read(&sbi->quota_sem); - ret = dquot_mark_dquot_dirty(dquot); + int ret = dquot_mark_dquot_dirty(dquot); /* if we are using journalled quota */ if (is_journalled_quota(sbi)) set_sbi_flag(sbi, SBI_QUOTA_NEED_FLUSH); - up_read(&sbi->quota_sem); return ret; } static int f2fs_dquot_commit_info(struct super_block *sb, int type) { struct f2fs_sb_info *sbi = F2FS_SB(sb); - int ret; + int ret = dquot_commit_info(sb, type); - down_read(&sbi->quota_sem); - ret = dquot_commit_info(sb, type); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); return ret; } @@ -2316,13 +2425,54 @@ static bool f2fs_dummy_context(struct inode *inode) return DUMMY_ENCRYPTION_ENABLED(F2FS_I_SB(inode)); } +static bool f2fs_has_stable_inodes(struct super_block *sb) +{ + return true; +} + +static void f2fs_get_ino_and_lblk_bits(struct super_block *sb, + int *ino_bits_ret, int *lblk_bits_ret) +{ + *ino_bits_ret = 8 * sizeof(nid_t); + *lblk_bits_ret = 8 * sizeof(block_t); +} + +static bool f2fs_inline_crypt_enabled(struct super_block *sb) +{ + return F2FS_OPTION(F2FS_SB(sb)).inlinecrypt; +} + +static int f2fs_get_num_devices(struct super_block *sb) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + + if (f2fs_is_multi_device(sbi)) + return sbi->s_ndevs; + return 1; +} + +static void f2fs_get_devices(struct super_block *sb, + struct request_queue **devs) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + int i; + + for (i = 0; i < sbi->s_ndevs; i++) + devs[i] = bdev_get_queue(FDEV(i).bdev); +} + static const struct fscrypt_operations f2fs_cryptops = { - .key_prefix = "f2fs:", - .get_context = f2fs_get_context, - .set_context = f2fs_set_context, - .dummy_context = f2fs_dummy_context, - .empty_dir = f2fs_empty_dir, - .max_namelen = F2FS_NAME_LEN, + .key_prefix = "f2fs:", + .get_context = f2fs_get_context, + .set_context = f2fs_set_context, + .dummy_context = f2fs_dummy_context, + .empty_dir = f2fs_empty_dir, + .max_namelen = F2FS_NAME_LEN, + .has_stable_inodes = f2fs_has_stable_inodes, + .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits, + .inline_crypt_enabled = f2fs_inline_crypt_enabled, + .get_num_devices = f2fs_get_num_devices, + .get_devices = f2fs_get_devices, }; #endif @@ -2612,6 +2762,21 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return -EFSCORRUPTED; } + if (RDEV(0).path[0]) { + block_t dev_seg_count = le32_to_cpu(RDEV(0).total_segments); + int i = 1; + + while (i < MAX_DEVICES && RDEV(i).path[0]) { + dev_seg_count += le32_to_cpu(RDEV(i).total_segments); + i++; + } + if (segment_count != dev_seg_count) { + f2fs_info(sbi, "Segment count (%u) mismatch with total segments from devices (%u)", + segment_count, dev_seg_count); + return -EFSCORRUPTED; + } + } + if (secs_per_zone > total_sections || !secs_per_zone) { f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)", secs_per_zone, total_sections); @@ -2846,6 +3011,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) spin_lock_init(&sbi->dev_lock); init_rwsem(&sbi->sb_lock); + init_rwsem(&sbi->pin_sem); } static int init_percpu_info(struct f2fs_sb_info *sbi) @@ -2961,6 +3127,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Unable to read %dth superblock", block + 1); err = -EIO; + *recovery = 1; continue; } @@ -2970,6 +3137,7 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", block + 1); brelse(bh); + *recovery = 1; continue; } @@ -2982,10 +3150,6 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi, brelse(bh); } - /* Fail to read any one of the superblocks*/ - if (err < 0) - *recovery = 1; - /* No valid superblock */ if (!*raw_super) kvfree(super); @@ -3129,17 +3293,11 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) { #ifdef CONFIG_UNICODE - if (f2fs_sb_has_casefold(sbi) && !sbi->s_encoding) { + if (f2fs_sb_has_casefold(sbi) && !sbi->sb->s_encoding) { const struct f2fs_sb_encodings *encoding_info; struct unicode_map *encoding; __u16 encoding_flags; - if (f2fs_sb_has_encrypt(sbi)) { - f2fs_err(sbi, - "Can't mount with encoding and encryption"); - return -EINVAL; - } - if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info, &encoding_flags)) { f2fs_err(sbi, @@ -3160,9 +3318,8 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) "%s-%s with flags 0x%hx", encoding_info->name, encoding_info->version?:"\b", encoding_flags); - sbi->s_encoding = encoding; - sbi->s_encoding_flags = encoding_flags; - sbi->sb->s_d_op = &f2fs_dentry_ops; + sbi->sb->s_encoding = encoding; + sbi->sb->s_encoding_flags = encoding_flags; } #else if (f2fs_sb_has_casefold(sbi)) { @@ -3304,7 +3461,7 @@ try_onemore: /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; - mutex_init(&sbi->gc_mutex); + init_rwsem(&sbi->gc_lock); mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); mutex_init(&sbi->resize_mutex); @@ -3339,6 +3496,8 @@ try_onemore: sbi->write_io[i][j].bio = NULL; spin_lock_init(&sbi->write_io[i][j].io_lock); INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); + INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); + init_rwsem(&sbi->write_io[i][j].bio_list_lock); } } @@ -3391,6 +3550,12 @@ try_onemore: goto free_devices; } + err = f2fs_init_post_read_wq(sbi); + if (err) { + f2fs_err(sbi, "Failed to initialize post read workqueue"); + goto free_devices; + } + sbi->total_valid_node_count = le32_to_cpu(sbi->ckpt->valid_node_count); percpu_counter_set(&sbi->total_valid_inode_count, @@ -3611,6 +3776,7 @@ free_nm: f2fs_destroy_node_manager(sbi); free_sm: f2fs_destroy_segment_manager(sbi); + f2fs_destroy_post_read_wq(sbi); free_devices: destroy_device_list(sbi); kvfree(sbi->ckpt); @@ -3627,7 +3793,7 @@ free_bio_info: kvfree(sbi->write_io[i]); #ifdef CONFIG_UNICODE - utf8_unload(sbi->s_encoding); + utf8_unload(sb->s_encoding); #endif free_options: #ifdef CONFIG_QUOTA @@ -3749,9 +3915,17 @@ static int __init init_f2fs_fs(void) err = f2fs_init_post_read_processing(); if (err) goto free_root_stats; - + err = f2fs_init_bio_entry_cache(); + if (err) + goto free_post_read; + err = f2fs_init_bioset(); + if (err) + goto free_bio_enrty_cache; return 0; - +free_bio_enrty_cache: + f2fs_destroy_bio_entry_cache(); +free_post_read: + f2fs_destroy_post_read_processing(); free_root_stats: f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); @@ -3775,6 +3949,8 @@ fail: static void __exit exit_f2fs_fs(void) { + f2fs_destroy_bioset(); + f2fs_destroy_bio_entry_cache(); f2fs_destroy_post_read_processing(); f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index a1023b905df2..a32b3a392fbd 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -25,6 +25,9 @@ enum { DCC_INFO, /* struct discard_cmd_control */ NM_INFO, /* struct f2fs_nm_info */ F2FS_SBI, /* struct f2fs_sb_info */ +#ifdef CONFIG_F2FS_STAT_FS + STAT_INFO, /* struct f2fs_stat_info */ +#endif #ifdef CONFIG_F2FS_FAULT_INJECTION FAULT_INFO_RATE, /* struct f2fs_fault_info */ FAULT_INFO_TYPE, /* struct f2fs_fault_info */ @@ -42,6 +45,9 @@ struct f2fs_attr { int id; }; +static ssize_t f2fs_sbi_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf); + static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) { if (struct_type == GC_THREAD) @@ -58,6 +64,10 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) else if (struct_type == FAULT_INFO_RATE || struct_type == FAULT_INFO_TYPE) return (unsigned char *)&F2FS_OPTION(sbi).fault_info; +#endif +#ifdef CONFIG_F2FS_STAT_FS + else if (struct_type == STAT_INFO) + return (unsigned char *)F2FS_STAT(sbi); #endif return NULL; } @@ -65,35 +75,15 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) static ssize_t dirty_segments_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)(dirty_segments(sbi))); + return sprintf(buf, "%llu\n", + (unsigned long long)(dirty_segments(sbi))); } -static ssize_t unusable_show(struct f2fs_attr *a, +static ssize_t free_segments_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - block_t unusable; - - if (test_opt(sbi, DISABLE_CHECKPOINT)) - unusable = sbi->unusable_block_count; - else - unusable = f2fs_get_unusable_blocks(sbi); - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)unusable); -} - -static ssize_t encoding_show(struct f2fs_attr *a, - struct f2fs_sb_info *sbi, char *buf) -{ -#ifdef CONFIG_UNICODE - if (f2fs_sb_has_casefold(sbi)) - return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n", - sbi->s_encoding->charset, - (sbi->s_encoding->version >> 16) & 0xff, - (sbi->s_encoding->version >> 8) & 0xff, - sbi->s_encoding->version & 0xff); -#endif - return snprintf(buf, PAGE_SIZE, "(none)"); + return sprintf(buf, "%llu\n", + (unsigned long long)(free_segments(sbi))); } static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, @@ -102,10 +92,10 @@ static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a, struct super_block *sb = sbi->sb; if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); + return sprintf(buf, "0\n"); - return snprintf(buf, PAGE_SIZE, "%llu\n", - (unsigned long long)(sbi->kbytes_written + + return sprintf(buf, "%llu\n", + (unsigned long long)(sbi->kbytes_written + BD_PART_WRITTEN(sbi))); } @@ -116,7 +106,7 @@ static ssize_t features_show(struct f2fs_attr *a, int len = 0; if (!sb->s_bdev->bd_part) - return snprintf(buf, PAGE_SIZE, "0\n"); + return sprintf(buf, "0\n"); if (f2fs_sb_has_encrypt(sbi)) len += snprintf(buf, PAGE_SIZE - len, "%s", @@ -154,6 +144,11 @@ static ssize_t features_show(struct f2fs_attr *a, if (f2fs_sb_has_casefold(sbi)) len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", len ? ", " : "", "casefold"); + if (f2fs_sb_has_compression(sbi)) + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "compression"); + len += snprintf(buf + len, PAGE_SIZE - len, "%s%s", + len ? ", " : "", "pin_file"); len += snprintf(buf + len, PAGE_SIZE - len, "\n"); return len; } @@ -161,9 +156,68 @@ static ssize_t features_show(struct f2fs_attr *a, static ssize_t current_reserved_blocks_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return snprintf(buf, PAGE_SIZE, "%u\n", sbi->current_reserved_blocks); + return sprintf(buf, "%u\n", sbi->current_reserved_blocks); } +static ssize_t unusable_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + block_t unusable; + + if (test_opt(sbi, DISABLE_CHECKPOINT)) + unusable = sbi->unusable_block_count; + else + unusable = f2fs_get_unusable_blocks(sbi); + return sprintf(buf, "%llu\n", (unsigned long long)unusable); +} + +static ssize_t encoding_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ +#ifdef CONFIG_UNICODE + struct super_block *sb = sbi->sb; + + if (f2fs_sb_has_casefold(sbi)) + return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n", + sb->s_encoding->charset, + (sb->s_encoding->version >> 16) & 0xff, + (sb->s_encoding->version >> 8) & 0xff, + sb->s_encoding->version & 0xff); +#endif + return sprintf(buf, "(none)"); +} + +#ifdef CONFIG_F2FS_STAT_FS +static ssize_t moved_blocks_foreground_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + return sprintf(buf, "%llu\n", + (unsigned long long)(si->tot_blks - + (si->bg_data_blks + si->bg_node_blks))); +} + +static ssize_t moved_blocks_background_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + return sprintf(buf, "%llu\n", + (unsigned long long)(si->bg_data_blks + si->bg_node_blks)); +} + +static ssize_t avg_vblocks_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + struct f2fs_stat_info *si = F2FS_STAT(sbi); + + si->dirty_count = dirty_segments(sbi); + f2fs_update_sit_info(sbi); + return sprintf(buf, "%llu\n", (unsigned long long)(si->avg_vblocks)); +} +#endif + static ssize_t f2fs_sbi_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -197,7 +251,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, ui = (unsigned int *)(ptr + a->offset); - return snprintf(buf, PAGE_SIZE, "%u\n", *ui); + return sprintf(buf, "%u\n", *ui); } static ssize_t __sbi_store(struct f2fs_attr *a, @@ -387,6 +441,7 @@ enum feat_id { FEAT_VERITY, FEAT_SB_CHECKSUM, FEAT_CASEFOLD, + FEAT_COMPRESSION, }; static ssize_t f2fs_feature_show(struct f2fs_attr *a, @@ -406,7 +461,8 @@ static ssize_t f2fs_feature_show(struct f2fs_attr *a, case FEAT_VERITY: case FEAT_SB_CHECKSUM: case FEAT_CASEFOLD: - return snprintf(buf, PAGE_SIZE, "supported\n"); + case FEAT_COMPRESSION: + return sprintf(buf, "supported\n"); } return 0; } @@ -435,6 +491,14 @@ static struct f2fs_attr f2fs_attr_##_name = { \ .id = _id, \ } +#define F2FS_STAT_ATTR(_struct_type, _struct_name, _name, _elname) \ +static struct f2fs_attr f2fs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = 0444 }, \ + .show = f2fs_sbi_show, \ + .struct_type = _struct_type, \ + .offset = offsetof(struct _struct_name, _elname), \ +} + F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_urgent_sleep_time, urgent_sleep_time); F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time); @@ -443,6 +507,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, main_blkaddr, main_blkaddr); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); @@ -475,11 +540,21 @@ F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif F2FS_GENERAL_RO_ATTR(dirty_segments); +F2FS_GENERAL_RO_ATTR(free_segments); F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes); F2FS_GENERAL_RO_ATTR(features); F2FS_GENERAL_RO_ATTR(current_reserved_blocks); F2FS_GENERAL_RO_ATTR(unusable); F2FS_GENERAL_RO_ATTR(encoding); +#ifdef CONFIG_F2FS_STAT_FS +F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count); +F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count); +F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_foreground_calls, call_count); +F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, gc_background_calls, bg_gc); +F2FS_GENERAL_RO_ATTR(moved_blocks_background); +F2FS_GENERAL_RO_ATTR(moved_blocks_foreground); +F2FS_GENERAL_RO_ATTR(avg_vblocks); +#endif #ifdef CONFIG_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption, FEAT_CRYPTO); @@ -500,6 +575,7 @@ F2FS_FEATURE_RO_ATTR(verity, FEAT_VERITY); #endif F2FS_FEATURE_RO_ATTR(sb_checksum, FEAT_SB_CHECKSUM); F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD); +F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -510,6 +586,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(gc_idle), ATTR_LIST(gc_urgent), ATTR_LIST(reclaim_segments), + ATTR_LIST(main_blkaddr), ATTR_LIST(max_small_discards), ATTR_LIST(discard_granularity), ATTR_LIST(batched_trim_sections), @@ -539,12 +616,22 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(inject_type), #endif ATTR_LIST(dirty_segments), + ATTR_LIST(free_segments), ATTR_LIST(unusable), ATTR_LIST(lifetime_write_kbytes), ATTR_LIST(features), ATTR_LIST(reserved_blocks), ATTR_LIST(current_reserved_blocks), ATTR_LIST(encoding), +#ifdef CONFIG_F2FS_STAT_FS + ATTR_LIST(cp_foreground_calls), + ATTR_LIST(cp_background_calls), + ATTR_LIST(gc_foreground_calls), + ATTR_LIST(gc_background_calls), + ATTR_LIST(moved_blocks_foreground), + ATTR_LIST(moved_blocks_background), + ATTR_LIST(avg_vblocks), +#endif NULL, }; @@ -568,6 +655,7 @@ static struct attribute *f2fs_feat_attrs[] = { #endif ATTR_LIST(sb_checksum), ATTR_LIST(casefold), + ATTR_LIST(compression), NULL, }; @@ -745,10 +833,12 @@ int __init f2fs_init_sysfs(void) ret = kobject_init_and_add(&f2fs_feat, &f2fs_feat_ktype, NULL, "features"); - if (ret) + if (ret) { + kobject_put(&f2fs_feat); kset_unregister(&f2fs_kset); - else + } else { f2fs_proc_root = proc_mkdir("fs/f2fs", NULL); + } return ret; } @@ -769,8 +859,11 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) init_completion(&sbi->s_kobj_unregister); err = kobject_init_and_add(&sbi->s_kobj, &f2fs_sb_ktype, NULL, "%s", sb->s_id); - if (err) + if (err) { + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); return err; + } if (f2fs_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); @@ -798,4 +891,5 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); } diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index a401ef72bc82..5905050f7fb8 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -222,12 +222,57 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, return size; } -static struct page *f2fs_read_merkle_tree_page(struct inode *inode, - pgoff_t index) +/* + * Prefetch some pages from the file's Merkle tree. + * + * This is basically a stripped-down version of __do_page_cache_readahead() + * which works on pages past i_size. + */ +static void f2fs_merkle_tree_readahead(struct address_space *mapping, + pgoff_t start_index, unsigned long count) { + LIST_HEAD(pages); + unsigned int nr_pages = 0; + struct page *page; + pgoff_t index; + struct blk_plug plug; + + for (index = start_index; index < start_index + count; index++) { + rcu_read_lock(); + page = radix_tree_lookup(&mapping->page_tree, index); + rcu_read_unlock(); + if (!page || radix_tree_exceptional_entry(page)) { + page = __page_cache_alloc(readahead_gfp_mask(mapping)); + if (!page) + break; + page->index = index; + list_add(&page->lru, &pages); + nr_pages++; + } + } + blk_start_plug(&plug); + f2fs_mpage_readpages(mapping, &pages, NULL, nr_pages, true); + blk_finish_plug(&plug); +} + +static struct page *f2fs_read_merkle_tree_page(struct inode *inode, + pgoff_t index, + unsigned long num_ra_pages) +{ + struct page *page; + index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT; - return read_mapping_page(inode->i_mapping, index, NULL); + page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { + if (page) + put_page(page); + else if (num_ra_pages > 1) + f2fs_merkle_tree_readahead(inode->i_mapping, index, + num_ra_pages); + page = read_mapping_page(inode->i_mapping, index, NULL); + } + return page; } static int f2fs_write_merkle_tree_block(struct inode *inode, const void *buf, diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 181900af2576..296b3189448a 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -539,8 +539,9 @@ out: ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) { struct inode *inode = d_inode(dentry); + nid_t xnid = F2FS_I(inode)->i_xattr_nid; struct f2fs_xattr_entry *entry; - void *base_addr; + void *base_addr, *last_base_addr; int error = 0; size_t rest = buffer_size; @@ -550,6 +551,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) if (error) return error; + last_base_addr = (void *)base_addr + XATTR_SIZE(xnid, inode); + list_for_each_xattr(entry, base_addr) { const struct xattr_handler *handler = f2fs_xattr_handler(entry->e_name_index); @@ -557,6 +560,15 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) size_t prefix_len; size_t size; + if ((void *)(entry) + sizeof(__u32) > last_base_addr || + (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) { + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", + inode->i_ino); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + error = -EFSCORRUPTED; + goto cleanup; + } + if (!handler || (handler->list && !handler->list(dentry))) continue; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 5b5cf41acef5..18a87994f490 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -739,6 +739,13 @@ static struct inode *fat_alloc_inode(struct super_block *sb) return NULL; init_rwsem(&ei->truncate_lock); + /* Zeroing to allow iput() even if partial initialized inode. */ + ei->mmu_private = 0; + ei->i_start = 0; + ei->i_logstart = 0; + ei->i_attrs = 0; + ei->i_pos = 0; + return &ei->vfs_inode; } @@ -1369,16 +1376,6 @@ out: return 0; } -static void fat_dummy_inode_init(struct inode *inode) -{ - /* Initialize this dummy inode to work as no-op. */ - MSDOS_I(inode)->mmu_private = 0; - MSDOS_I(inode)->i_start = 0; - MSDOS_I(inode)->i_logstart = 0; - MSDOS_I(inode)->i_attrs = 0; - MSDOS_I(inode)->i_pos = 0; -} - static int fat_read_root(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); @@ -1823,13 +1820,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_inode = new_inode(sb); if (!fat_inode) goto out_fail; - fat_dummy_inode_init(fat_inode); sbi->fat_inode = fat_inode; fsinfo_inode = new_inode(sb); if (!fsinfo_inode) goto out_fail; - fat_dummy_inode_init(fsinfo_inode); fsinfo_inode->i_ino = MSDOS_FSINFO_INO; sbi->fsinfo_inode = fsinfo_inode; insert_inode_hash(fsinfo_inode); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ad6b8247d625..0fdd8b37c6b9 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -582,10 +582,13 @@ void wbc_attach_and_unlock_inode(struct writeback_control *wbc, spin_unlock(&inode->i_lock); /* - * A dying wb indicates that the memcg-blkcg mapping has changed - * and a new wb is already serving the memcg. Switch immediately. + * A dying wb indicates that either the blkcg associated with the + * memcg changed or the associated memcg is dying. In the first + * case, a replacement wb should already be available and we should + * refresh the wb immediately. In the second case, trying to + * refresh will keep failing. */ - if (unlikely(wb_dying(wbc->wb))) + if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css))) inode_switch_wbs(inode, wbc->wb_id); } diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 5be0339dcceb..42bed87dd5ea 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -107,7 +107,7 @@ static ssize_t fuse_conn_max_background_read(struct file *file, if (!fc) return 0; - val = fc->max_background; + val = READ_ONCE(fc->max_background); fuse_conn_put(fc); return fuse_conn_limit_read(file, buf, len, ppos, val); @@ -144,7 +144,7 @@ static ssize_t fuse_conn_congestion_threshold_read(struct file *file, if (!fc) return 0; - val = fc->congestion_threshold; + val = READ_ONCE(fc->congestion_threshold); fuse_conn_put(fc); return fuse_conn_limit_read(file, buf, len, ppos, val); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b77eb192a6bf..d84819c9174c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -234,7 +234,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) kfree(forget); if (ret == -ENOMEM) goto out; - if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT) + if (ret || fuse_invalid_attr(&outarg.attr) || + (outarg.attr.mode ^ inode->i_mode) & S_IFMT) goto invalid; forget_all_cached_acls(inode); @@ -343,6 +344,12 @@ int fuse_valid_type(int m) S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m); } +bool fuse_invalid_attr(struct fuse_attr *attr) +{ + return !fuse_valid_type(attr->mode) || + attr->size > LLONG_MAX; +} + int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, struct fuse_entry_out *outarg, struct inode **inode) { @@ -374,7 +381,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name err = -EIO; if (!outarg->nodeid) goto out_put_forget; - if (!fuse_valid_type(outarg->attr.mode)) + if (fuse_invalid_attr(&outarg->attr)) goto out_put_forget; *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, @@ -497,7 +504,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, goto out_free_ff; err = -EIO; - if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid)) + if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid) || + fuse_invalid_attr(&outentry.attr)) goto out_free_ff; ff->fh = outopen.fh; @@ -603,7 +611,7 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args, goto out_put_forget_req; err = -EIO; - if (invalid_nodeid(outarg.nodeid)) + if (invalid_nodeid(outarg.nodeid) || fuse_invalid_attr(&outarg.attr)) goto out_put_forget_req; if ((outarg.attr.mode ^ mode) & S_IFMT) @@ -876,7 +884,8 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, spin_lock(&fc->lock); fi->attr_version = ++fc->attr_version; - inc_nlink(inode); + if (likely(inode->i_nlink < UINT_MAX)) + inc_nlink(inode); spin_unlock(&fc->lock); fuse_invalidate_attr(inode); fuse_update_ctime(inode); @@ -956,7 +965,8 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, args.out.args[0].value = &outarg; err = fuse_simple_request(fc, &args); if (!err) { - if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { + if (fuse_invalid_attr(&outarg.attr) || + (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); err = -EIO; } else { @@ -1260,7 +1270,7 @@ static int fuse_direntplus_link(struct file *file, if (invalid_nodeid(o->nodeid)) return -EIO; - if (!fuse_valid_type(o->attr.mode)) + if (fuse_invalid_attr(&o->attr)) return -EIO; fc = get_fuse_conn(dir); @@ -1696,6 +1706,19 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, if (attr->ia_valid & ATTR_SIZE) is_truncate = true; + /* Flush dirty data/metadata before non-truncate SETATTR */ + if (is_wb && S_ISREG(inode->i_mode) && + attr->ia_valid & + (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET | + ATTR_TIMES_SET)) { + err = write_inode_now(inode, true); + if (err) + return err; + + fuse_set_nowrite(inode); + fuse_release_nowrite(inode); + } + if (is_truncate) { fuse_set_nowrite(inode); set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); @@ -1724,7 +1747,8 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, goto error; } - if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) { + if (fuse_invalid_attr(&outarg.attr) || + (inode->i_mode ^ outarg.attr.mode) & S_IFMT) { make_bad_inode(inode); err = -EIO; goto error; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 33ba1c4760c8..c551e1451a7b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -201,7 +201,7 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) { struct fuse_conn *fc = get_fuse_conn(inode); int err; - bool lock_inode = (file->f_flags & O_TRUNC) && + bool is_wb_truncate = (file->f_flags & O_TRUNC) && fc->atomic_o_trunc && fc->writeback_cache; @@ -209,16 +209,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) if (err) return err; - if (lock_inode) + if (is_wb_truncate) { inode_lock(inode); + fuse_set_nowrite(inode); + } err = fuse_do_open(fc, get_node_id(inode), file, isdir); if (!err) fuse_finish_open(inode, file); - if (lock_inode) + if (is_wb_truncate) { + fuse_release_nowrite(inode); inode_unlock(inode); + } return err; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 42977d8db3d3..74bae9703ee6 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -899,6 +899,8 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc); */ int fuse_valid_type(int m); +bool fuse_invalid_attr(struct fuse_attr *attr); + /** * Is current process allowed to perform filesystem operation? */ diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 09432b25fe9b..b3a1b16d4e3e 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1445,6 +1445,8 @@ static int do_grow(struct inode *inode, u64 size) } error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT + + (unstuff && + gfs2_is_jdata(ip) ? RES_JDATA : 0) + (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ? 0 : RES_QUOTA), 0); if (error) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c850579ae5a4..6c6401084d3d 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1255,7 +1255,7 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, if (!(*opened & FILE_OPENED)) return finish_no_open(file, d); dput(d); - return 0; + return excl && (flags & O_CREAT) ? -EEXIST : 0; } BUG_ON(d != NULL); diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 483b82e2be92..a3208511f35a 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -594,6 +594,14 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) list_add(&bd->bd_list, &sdp->sd_log_le_revoke); } +void gfs2_glock_remove_revoke(struct gfs2_glock *gl) +{ + if (atomic_dec_return(&gl->gl_revokes) == 0) { + clear_bit(GLF_LFLUSH, &gl->gl_flags); + gfs2_glock_queue_put(gl); + } +} + void gfs2_write_revokes(struct gfs2_sbd *sdp) { struct gfs2_trans *tr; diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 9499a6049212..3b7b7839ec6a 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -80,6 +80,7 @@ extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); extern int gfs2_logd(void *data); extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); +extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl); extern void gfs2_write_revokes(struct gfs2_sbd *sdp); #endif /* __LOG_DOT_H__ */ diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 049f8c6721b4..a5041e6d2c0d 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -660,10 +660,7 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) bd = list_entry(head->next, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); gl = bd->bd_gl; - if (atomic_dec_return(&gl->gl_revokes) == 0) { - clear_bit(GLF_LFLUSH, &gl->gl_flags); - gfs2_glock_queue_put(gl); - } + gfs2_glock_remove_revoke(gl); kmem_cache_free(gfs2_bufdata_cachep, bd); } } diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index b0eee90738ff..7cb0672294df 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -623,7 +623,10 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) RB_CLEAR_NODE(&rs->rs_node); if (rs->rs_free) { - struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm); + u64 last_block = gfs2_rbm_to_block(&rs->rs_rbm) + + rs->rs_free - 1; + struct gfs2_rbm last_rbm = { .rgd = rs->rs_rbm.rgd, }; + struct gfs2_bitmap *start, *last; /* return reserved blocks to the rgrp */ BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); @@ -634,7 +637,13 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) it will force the number to be recalculated later. */ rgd->rd_extfail_pt += rs->rs_free; rs->rs_free = 0; - clear_bit(GBF_FULL, &bi->bi_flags); + if (gfs2_rbm_from_block(&last_rbm, last_block)) + return; + start = rbm_bi(&rs->rs_rbm); + last = rbm_bi(&last_rbm); + do + clear_bit(GBF_FULL, &start->bi_flags); + while (start++ != last); } } @@ -1201,7 +1210,7 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); rl_flags &= ~GFS2_RDF_MASK; rgd->rd_flags &= GFS2_RDF_MASK; - rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); + rgd->rd_flags |= (rl_flags | GFS2_RDF_CHECK); if (rgd->rd_rgl->rl_unlinked == 0) rgd->rd_flags &= ~GFS2_RDF_CHECK; rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 8e54f2e3a304..c3f3f1ae4e1b 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -845,10 +845,10 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) return error; + flush_workqueue(gfs2_delete_workqueue); kthread_stop(sdp->sd_quotad_process); kthread_stop(sdp->sd_logd_process); - flush_workqueue(gfs2_delete_workqueue); gfs2_quota_sync(sdp->sd_vfs, 0); gfs2_statfs_sync(sdp->sd_vfs, 0); diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index affef3c066e0..69e3402a3cc5 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -277,6 +277,8 @@ void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) list_del_init(&bd->bd_list); gfs2_assert_withdraw(sdp, sdp->sd_log_num_revoke); sdp->sd_log_num_revoke--; + if (bd->bd_gl) + gfs2_glock_remove_revoke(bd->bd_gl); kmem_cache_free(gfs2_bufdata_cachep, bd); tr->tr_num_revoke_rm++; if (--n == 0) diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c index da25c49203cc..896396554bcc 100644 --- a/fs/hfs/brec.c +++ b/fs/hfs/brec.c @@ -445,6 +445,7 @@ skip: /* restore search_key */ hfs_bnode_read_key(node, fd->search_key, 14); } + new_node = NULL; } if (!rec && node->parent) diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 9bdff5e40626..19017d296173 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c @@ -220,6 +220,30 @@ static struct hfs_bnode *hfs_bmap_new_bmap(struct hfs_bnode *prev, u32 idx) return node; } +/* Make sure @tree has enough space for the @rsvd_nodes */ +int hfs_bmap_reserve(struct hfs_btree *tree, int rsvd_nodes) +{ + struct inode *inode = tree->inode; + u32 count; + int res; + + while (tree->free_nodes < rsvd_nodes) { + res = hfs_extend_file(inode); + if (res) + return res; + HFS_I(inode)->phys_size = inode->i_size = + (loff_t)HFS_I(inode)->alloc_blocks * + HFS_SB(tree->sb)->alloc_blksz; + HFS_I(inode)->fs_blocks = inode->i_size >> + tree->sb->s_blocksize_bits; + inode_set_bytes(inode, inode->i_size); + count = inode->i_size >> tree->node_size_shift; + tree->free_nodes += count - tree->node_count; + tree->node_count = count; + } + return 0; +} + struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) { struct hfs_bnode *node, *next_node; @@ -229,26 +253,11 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) u16 off16; u16 len; u8 *data, byte, m; - int i; + int i, res; - while (!tree->free_nodes) { - struct inode *inode = tree->inode; - u32 count; - int res; - - res = hfs_extend_file(inode); - if (res) - return ERR_PTR(res); - HFS_I(inode)->phys_size = inode->i_size = - (loff_t)HFS_I(inode)->alloc_blocks * - HFS_SB(tree->sb)->alloc_blksz; - HFS_I(inode)->fs_blocks = inode->i_size >> - tree->sb->s_blocksize_bits; - inode_set_bytes(inode, inode->i_size); - count = inode->i_size >> tree->node_size_shift; - tree->free_nodes = count - tree->node_count; - tree->node_count = count; - } + res = hfs_bmap_reserve(tree, 1); + if (res) + return ERR_PTR(res); nidx = 0; node = hfs_bnode_find(tree, nidx); diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h index c8b252dbb26c..dcc2aab1b2c4 100644 --- a/fs/hfs/btree.h +++ b/fs/hfs/btree.h @@ -82,6 +82,7 @@ struct hfs_find_data { extern struct hfs_btree *hfs_btree_open(struct super_block *, u32, btree_keycmp); extern void hfs_btree_close(struct hfs_btree *); extern void hfs_btree_write(struct hfs_btree *); +extern int hfs_bmap_reserve(struct hfs_btree *, int); extern struct hfs_bnode * hfs_bmap_alloc(struct hfs_btree *); extern void hfs_bmap_free(struct hfs_bnode *node); diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index 8a66405b0f8b..d365bf0b8c77 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c @@ -97,6 +97,14 @@ int hfs_cat_create(u32 cnid, struct inode *dir, const struct qstr *str, struct i if (err) return err; + /* + * Fail early and avoid ENOSPC during the btree operations. We may + * have to split the root node at most once. + */ + err = hfs_bmap_reserve(fd.tree, 2 * fd.tree->depth); + if (err) + goto err2; + hfs_cat_build_key(sb, fd.search_key, cnid, NULL); entry_size = hfs_cat_build_thread(sb, &entry, S_ISDIR(inode->i_mode) ? HFS_CDR_THD : HFS_CDR_FTH, @@ -295,6 +303,14 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, const struct qstr *src_name, return err; dst_fd = src_fd; + /* + * Fail early and avoid ENOSPC during the btree operations. We may + * have to split the root node at most once. + */ + err = hfs_bmap_reserve(src_fd.tree, 2 * src_fd.tree->depth); + if (err) + goto out; + /* find the old dir entry and read the data */ hfs_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); err = hfs_brec_find(&src_fd); diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index 5d0182654580..263d5028d9d1 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -117,6 +117,10 @@ static int __hfs_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) if (HFS_I(inode)->flags & HFS_FLG_EXT_NEW) { if (res != -ENOENT) return res; + /* Fail early and avoid ENOSPC during the btree operation */ + res = hfs_bmap_reserve(fd->tree, fd->tree->depth + 1); + if (res) + return res; hfs_brec_insert(fd, HFS_I(inode)->cached_extents, sizeof(hfs_extent_rec)); HFS_I(inode)->flags &= ~(HFS_FLG_EXT_DIRTY|HFS_FLG_EXT_NEW); } else { @@ -300,7 +304,7 @@ int hfs_free_fork(struct super_block *sb, struct hfs_cat_file *file, int type) return 0; blocks = 0; - for (i = 0; i < 3; extent++, i++) + for (i = 0; i < 3; i++) blocks += be16_to_cpu(extent[i].count); res = hfs_free_extents(sb, extent, blocks, blocks); @@ -341,7 +345,9 @@ int hfs_get_block(struct inode *inode, sector_t block, ablock = (u32)block / HFS_SB(sb)->fs_div; if (block >= HFS_I(inode)->fs_blocks) { - if (block > HFS_I(inode)->fs_blocks || !create) + if (!create) + return 0; + if (block > HFS_I(inode)->fs_blocks) return -EIO; if (ablock >= HFS_I(inode)->alloc_blocks) { res = hfs_extend_file(inode); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 2538b49cc349..350afd67bd69 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -642,6 +642,8 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) truncate_setsize(inode, attr->ia_size); hfs_file_truncate(inode); + inode->i_atime = inode->i_mtime = inode->i_ctime = + current_time(inode); } setattr_copy(inode, attr); diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c index 2bab6b3cdba4..e6d554476db4 100644 --- a/fs/hfsplus/attributes.c +++ b/fs/hfsplus/attributes.c @@ -217,6 +217,11 @@ int hfsplus_create_attr(struct inode *inode, if (err) goto failed_init_create_attr; + /* Fail early and avoid ENOSPC during the btree operation */ + err = hfs_bmap_reserve(fd.tree, fd.tree->depth + 1); + if (err) + goto failed_create_attr; + if (name) { err = hfsplus_attr_build_key(sb, fd.search_key, inode->i_ino, name); @@ -313,6 +318,11 @@ int hfsplus_delete_attr(struct inode *inode, const char *name) if (err) return err; + /* Fail early and avoid ENOSPC during the btree operation */ + err = hfs_bmap_reserve(fd.tree, fd.tree->depth); + if (err) + goto out; + if (name) { err = hfsplus_attr_build_key(sb, fd.search_key, inode->i_ino, name); diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index d3f36982f685..0f53a486d2c1 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -448,6 +448,7 @@ skip: /* restore search_key */ hfs_bnode_read_key(node, fd->search_key, 14); } + new_node = NULL; } if (!rec && node->parent) diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index 3de3bc4918b5..66774f4cb4fd 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c @@ -342,6 +342,34 @@ static struct hfs_bnode *hfs_bmap_new_bmap(struct hfs_bnode *prev, u32 idx) return node; } +/* Make sure @tree has enough space for the @rsvd_nodes */ +int hfs_bmap_reserve(struct hfs_btree *tree, int rsvd_nodes) +{ + struct inode *inode = tree->inode; + struct hfsplus_inode_info *hip = HFSPLUS_I(inode); + u32 count; + int res; + + if (rsvd_nodes <= 0) + return 0; + + while (tree->free_nodes < rsvd_nodes) { + res = hfsplus_file_extend(inode, hfs_bnode_need_zeroout(tree)); + if (res) + return res; + hip->phys_size = inode->i_size = + (loff_t)hip->alloc_blocks << + HFSPLUS_SB(tree->sb)->alloc_blksz_shift; + hip->fs_blocks = + hip->alloc_blocks << HFSPLUS_SB(tree->sb)->fs_shift; + inode_set_bytes(inode, inode->i_size); + count = inode->i_size >> tree->node_size_shift; + tree->free_nodes += count - tree->node_count; + tree->node_count = count; + } + return 0; +} + struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) { struct hfs_bnode *node, *next_node; @@ -351,27 +379,11 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) u16 off16; u16 len; u8 *data, byte, m; - int i; + int i, res; - while (!tree->free_nodes) { - struct inode *inode = tree->inode; - struct hfsplus_inode_info *hip = HFSPLUS_I(inode); - u32 count; - int res; - - res = hfsplus_file_extend(inode, hfs_bnode_need_zeroout(tree)); - if (res) - return ERR_PTR(res); - hip->phys_size = inode->i_size = - (loff_t)hip->alloc_blocks << - HFSPLUS_SB(tree->sb)->alloc_blksz_shift; - hip->fs_blocks = - hip->alloc_blocks << HFSPLUS_SB(tree->sb)->fs_shift; - inode_set_bytes(inode, inode->i_size); - count = inode->i_size >> tree->node_size_shift; - tree->free_nodes = count - tree->node_count; - tree->node_count = count; - } + res = hfs_bmap_reserve(tree, 1); + if (res) + return ERR_PTR(res); nidx = 0; node = hfs_bnode_find(tree, nidx); diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index a196369ba779..35472cba750e 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -265,6 +265,14 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, if (err) return err; + /* + * Fail early and avoid ENOSPC during the btree operations. We may + * have to split the root node at most once. + */ + err = hfs_bmap_reserve(fd.tree, 2 * fd.tree->depth); + if (err) + goto err2; + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? @@ -333,6 +341,14 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, const struct qstr *str) if (err) return err; + /* + * Fail early and avoid ENOSPC during the btree operations. We may + * have to split the root node at most once. + */ + err = hfs_bmap_reserve(fd.tree, 2 * (int)fd.tree->depth - 2); + if (err) + goto out; + if (!str) { int len; @@ -433,6 +449,14 @@ int hfsplus_rename_cat(u32 cnid, return err; dst_fd = src_fd; + /* + * Fail early and avoid ENOSPC during the btree operations. We may + * have to split the root node at most twice. + */ + err = hfs_bmap_reserve(src_fd.tree, 4 * (int)src_fd.tree->depth - 1); + if (err) + goto out; + /* find the old dir entry and read the data */ err = hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index e8770935ce6d..58f296bfd438 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c @@ -100,6 +100,10 @@ static int __hfsplus_ext_write_extent(struct inode *inode, if (hip->extent_state & HFSPLUS_EXT_NEW) { if (res != -ENOENT) return res; + /* Fail early and avoid ENOSPC during the btree operation */ + res = hfs_bmap_reserve(fd->tree, fd->tree->depth + 1); + if (res) + return res; hfs_brec_insert(fd, hip->cached_extents, sizeof(hfsplus_extent_rec)); hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW); @@ -233,7 +237,9 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, ablock = iblock >> sbi->fs_shift; if (iblock >= hip->fs_blocks) { - if (iblock > hip->fs_blocks || !create) + if (!create) + return 0; + if (iblock > hip->fs_blocks) return -EIO; if (ablock >= hip->alloc_blocks) { res = hfsplus_file_extend(inode, false); diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index a015044daa05..dbb55d823385 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -312,6 +312,7 @@ static inline unsigned short hfsplus_min_io_size(struct super_block *sb) #define hfs_btree_open hfsplus_btree_open #define hfs_btree_close hfsplus_btree_close #define hfs_btree_write hfsplus_btree_write +#define hfs_bmap_reserve hfsplus_bmap_reserve #define hfs_bmap_alloc hfsplus_bmap_alloc #define hfs_bmap_free hfsplus_bmap_free #define hfs_bnode_read hfsplus_bnode_read @@ -396,6 +397,7 @@ u32 hfsplus_calc_btree_clump_size(u32 block_size, u32 node_size, u64 sectors, struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id); void hfs_btree_close(struct hfs_btree *tree); int hfs_btree_write(struct hfs_btree *tree); +int hfs_bmap_reserve(struct hfs_btree *tree, int rsvd_nodes); struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree); void hfs_bmap_free(struct hfs_bnode *node); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 190c60efbc99..5b31f4730ee9 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -262,6 +262,7 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) } truncate_setsize(inode, attr->ia_size); hfsplus_file_truncate(inode); + inode->i_mtime = inode->i_ctime = current_time(inode); } setattr_copy(inode, attr); diff --git a/fs/incfs/Kconfig b/fs/incfs/Kconfig new file mode 100644 index 000000000000..a655d599ea46 --- /dev/null +++ b/fs/incfs/Kconfig @@ -0,0 +1,19 @@ +config INCREMENTAL_FS + tristate "Incremental file system support" + depends on BLOCK + select DECOMPRESS_LZ4 + select CRC32 + select CRYPTO + select CRYPTO_RSA + select CRYPTO_SHA256 + select X509_CERTIFICATE_PARSER + select ASYMMETRIC_KEY_TYPE + select ASYMMETRIC_PUBLIC_KEY_SUBTYPE + select PKCS7_MESSAGE_PARSER + help + Incremental FS is a read-only virtual file system that facilitates execution + of programs while their binaries are still being lazily downloaded over the + network, USB or pigeon post. + + To compile this file system support as a module, choose M here: the + module will be called incrementalfs. diff --git a/fs/incfs/Makefile b/fs/incfs/Makefile new file mode 100644 index 000000000000..8d734bf91ecd --- /dev/null +++ b/fs/incfs/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_INCREMENTAL_FS) += incrementalfs.o + +incrementalfs-y := \ + data_mgmt.o \ + format.o \ + integrity.o \ + main.o \ + vfs.o diff --git a/fs/incfs/data_mgmt.c b/fs/incfs/data_mgmt.c new file mode 100644 index 000000000000..afdb3dfd3355 --- /dev/null +++ b/fs/incfs/data_mgmt.c @@ -0,0 +1,1077 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "data_mgmt.h" +#include "format.h" +#include "integrity.h" + +struct mount_info *incfs_alloc_mount_info(struct super_block *sb, + struct mount_options *options, + struct path *backing_dir_path) +{ + struct mount_info *mi = NULL; + int error = 0; + + mi = kzalloc(sizeof(*mi), GFP_NOFS); + if (!mi) + return ERR_PTR(-ENOMEM); + + mi->mi_sb = sb; + mi->mi_options = *options; + mi->mi_backing_dir_path = *backing_dir_path; + mi->mi_owner = get_current_cred(); + path_get(&mi->mi_backing_dir_path); + mutex_init(&mi->mi_dir_struct_mutex); + mutex_init(&mi->mi_pending_reads_mutex); + init_waitqueue_head(&mi->mi_pending_reads_notif_wq); + INIT_LIST_HEAD(&mi->mi_reads_list_head); + + if (options->read_log_pages != 0) { + size_t buf_size = PAGE_SIZE * options->read_log_pages; + + spin_lock_init(&mi->mi_log.rl_writer_lock); + init_waitqueue_head(&mi->mi_log.ml_notif_wq); + + mi->mi_log.rl_size = buf_size / sizeof(*mi->mi_log.rl_ring_buf); + mi->mi_log.rl_ring_buf = kzalloc(buf_size, GFP_NOFS); + if (!mi->mi_log.rl_ring_buf) { + error = -ENOMEM; + goto err; + } + } + + return mi; + +err: + incfs_free_mount_info(mi); + return ERR_PTR(error); +} + +void incfs_free_mount_info(struct mount_info *mi) +{ + if (!mi) + return; + + dput(mi->mi_index_dir); + path_put(&mi->mi_backing_dir_path); + mutex_destroy(&mi->mi_dir_struct_mutex); + mutex_destroy(&mi->mi_pending_reads_mutex); + put_cred(mi->mi_owner); + kfree(mi->mi_log.rl_ring_buf); + kfree(mi->log_xattr); + kfree(mi->pending_read_xattr); + kfree(mi); +} + +static void data_file_segment_init(struct data_file_segment *segment) +{ + init_waitqueue_head(&segment->new_data_arrival_wq); + mutex_init(&segment->blockmap_mutex); + INIT_LIST_HEAD(&segment->reads_list_head); +} + +static void data_file_segment_destroy(struct data_file_segment *segment) +{ + mutex_destroy(&segment->blockmap_mutex); +} + +struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf) +{ + struct data_file *df; + struct backing_file_context *bfc; + int md_records; + u64 size; + int error; + int i; + + if (!bf || !mi) + return ERR_PTR(-EFAULT); + + if (!S_ISREG(bf->f_inode->i_mode)) + return ERR_PTR(-EBADF); + + bfc = incfs_alloc_bfc(bf); + if (IS_ERR(bfc)) + return ERR_CAST(bfc); + + df = kzalloc(sizeof(*df), GFP_NOFS); + if (!df) { + error = -ENOMEM; + goto out; + } + + df->df_backing_file_context = bfc; + df->df_mount_info = mi; + for (i = 0; i < ARRAY_SIZE(df->df_segments); i++) + data_file_segment_init(&df->df_segments[i]); + + error = mutex_lock_interruptible(&bfc->bc_mutex); + if (error) + goto out; + error = incfs_read_file_header(bfc, &df->df_metadata_off, + &df->df_id, &size); + mutex_unlock(&bfc->bc_mutex); + + if (error) + goto out; + + df->df_size = size; + if (size > 0) + df->df_block_count = get_blocks_count_for_size(size); + + md_records = incfs_scan_metadata_chain(df); + if (md_records < 0) + error = md_records; + +out: + if (error) { + incfs_free_bfc(bfc); + df->df_backing_file_context = NULL; + incfs_free_data_file(df); + return ERR_PTR(error); + } + return df; +} + +void incfs_free_data_file(struct data_file *df) +{ + int i; + + if (!df) + return; + + incfs_free_mtree(df->df_hash_tree); + for (i = 0; i < ARRAY_SIZE(df->df_segments); i++) + data_file_segment_destroy(&df->df_segments[i]); + incfs_free_bfc(df->df_backing_file_context); + kfree(df); +} + +int make_inode_ready_for_data_ops(struct mount_info *mi, + struct inode *inode, + struct file *backing_file) +{ + struct inode_info *node = get_incfs_node(inode); + struct data_file *df; + int err = 0; + + inode_lock(inode); + if (S_ISREG(inode->i_mode)) { + if (!node->n_file) { + df = incfs_open_data_file(mi, backing_file); + + if (IS_ERR(df)) + err = PTR_ERR(df); + else + node->n_file = df; + } + } else + err = -EBADF; + inode_unlock(inode); + return err; +} + +struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf) +{ + struct dir_file *dir; + + if (!S_ISDIR(bf->f_inode->i_mode)) + return ERR_PTR(-EBADF); + + dir = kzalloc(sizeof(*dir), GFP_NOFS); + if (!dir) + return ERR_PTR(-ENOMEM); + + dir->backing_dir = get_file(bf); + dir->mount_info = mi; + return dir; +} + +void incfs_free_dir_file(struct dir_file *dir) +{ + if (!dir) + return; + if (dir->backing_dir) + fput(dir->backing_dir); + kfree(dir); +} + +static ssize_t decompress(struct mem_range src, struct mem_range dst) +{ + int result = LZ4_decompress_safe(src.data, dst.data, src.len, dst.len); + + if (result < 0) + return -EBADMSG; + + return result; +} + +static void log_block_read(struct mount_info *mi, incfs_uuid_t *id, + int block_index, bool timed_out) +{ + struct read_log *log = &mi->mi_log; + struct read_log_state state; + s64 now_us = ktime_to_us(ktime_get()); + struct read_log_record record = { + .file_id = *id, + .timestamp_us = now_us + }; + + set_block_index(&record, block_index); + set_timed_out(&record, timed_out); + + if (log->rl_size == 0) + return; + + spin_lock(&log->rl_writer_lock); + state = READ_ONCE(log->rl_state); + log->rl_ring_buf[state.next_index] = record; + if (++state.next_index == log->rl_size) { + state.next_index = 0; + ++state.current_pass_no; + } + WRITE_ONCE(log->rl_state, state); + spin_unlock(&log->rl_writer_lock); + + wake_up_all(&log->ml_notif_wq); +} + +static int validate_hash_tree(struct file *bf, struct data_file *df, + int block_index, struct mem_range data, u8 *buf) +{ + u8 digest[INCFS_MAX_HASH_SIZE] = {}; + struct mtree *tree = NULL; + struct incfs_df_signature *sig = NULL; + struct mem_range calc_digest_rng; + struct mem_range saved_digest_rng; + struct mem_range root_hash_rng; + int digest_size; + int hash_block_index = block_index; + int hash_per_block; + int lvl = 0; + int res; + + tree = df->df_hash_tree; + sig = df->df_signature; + if (!tree || !sig) + return 0; + + digest_size = tree->alg->digest_size; + hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / digest_size; + calc_digest_rng = range(digest, digest_size); + res = incfs_calc_digest(tree->alg, data, calc_digest_rng); + if (res) + return res; + + for (lvl = 0; lvl < tree->depth; lvl++) { + loff_t lvl_off = + tree->hash_level_suboffset[lvl] + sig->hash_offset; + loff_t hash_block_off = lvl_off + + round_down(hash_block_index * digest_size, + INCFS_DATA_FILE_BLOCK_SIZE); + size_t hash_off_in_block = hash_block_index * digest_size + % INCFS_DATA_FILE_BLOCK_SIZE; + struct mem_range buf_range = range(buf, + INCFS_DATA_FILE_BLOCK_SIZE); + ssize_t read_res = incfs_kread(bf, buf, + INCFS_DATA_FILE_BLOCK_SIZE, hash_block_off); + + if (read_res < 0) + return read_res; + if (read_res != INCFS_DATA_FILE_BLOCK_SIZE) + return -EIO; + + saved_digest_rng = range(buf + hash_off_in_block, digest_size); + if (!incfs_equal_ranges(calc_digest_rng, saved_digest_rng)) { + int i; + bool zero = true; + + pr_debug("incfs: Hash mismatch lvl:%d blk:%d\n", + lvl, block_index); + for (i = 0; i < saved_digest_rng.len; ++i) + if (saved_digest_rng.data[i]) { + zero = false; + break; + } + + if (zero) + pr_debug("incfs: Note saved_digest all zero - did you forget to load the hashes?\n"); + return -EBADMSG; + } + + res = incfs_calc_digest(tree->alg, buf_range, calc_digest_rng); + if (res) + return res; + hash_block_index /= hash_per_block; + } + + root_hash_rng = range(tree->root_hash, digest_size); + if (!incfs_equal_ranges(calc_digest_rng, root_hash_rng)) { + pr_debug("incfs: Root hash mismatch blk:%d\n", block_index); + return -EBADMSG; + } + return 0; +} + +static struct data_file_segment *get_file_segment(struct data_file *df, + int block_index) +{ + int seg_idx = block_index % ARRAY_SIZE(df->df_segments); + + return &df->df_segments[seg_idx]; +} + +static bool is_data_block_present(struct data_file_block *block) +{ + return (block->db_backing_file_data_offset != 0) && + (block->db_stored_size != 0); +} + +static int get_data_file_block(struct data_file *df, int index, + struct data_file_block *res_block) +{ + struct incfs_blockmap_entry bme = {}; + struct backing_file_context *bfc = NULL; + loff_t blockmap_off = 0; + u16 flags = 0; + int error = 0; + + if (!df || !res_block) + return -EFAULT; + + blockmap_off = df->df_blockmap_off; + bfc = df->df_backing_file_context; + + if (index < 0 || index >= df->df_block_count || blockmap_off == 0) + return -EINVAL; + + error = incfs_read_blockmap_entry(bfc, index, blockmap_off, &bme); + if (error) + return error; + + flags = le16_to_cpu(bme.me_flags); + res_block->db_backing_file_data_offset = + le16_to_cpu(bme.me_data_offset_hi); + res_block->db_backing_file_data_offset <<= 32; + res_block->db_backing_file_data_offset |= + le32_to_cpu(bme.me_data_offset_lo); + res_block->db_stored_size = le16_to_cpu(bme.me_data_size); + res_block->db_comp_alg = (flags & INCFS_BLOCK_COMPRESSED_LZ4) ? + COMPRESSION_LZ4 : + COMPRESSION_NONE; + return 0; +} + +static bool is_read_done(struct pending_read *read) +{ + return atomic_read_acquire(&read->done) != 0; +} + +static void set_read_done(struct pending_read *read) +{ + atomic_set_release(&read->done, 1); +} + +/* + * Notifies a given data file about pending read from a given block. + * Returns a new pending read entry. + */ +static struct pending_read *add_pending_read(struct data_file *df, + int block_index) +{ + struct pending_read *result = NULL; + struct data_file_segment *segment = NULL; + struct mount_info *mi = NULL; + + segment = get_file_segment(df, block_index); + mi = df->df_mount_info; + + result = kzalloc(sizeof(*result), GFP_NOFS); + if (!result) + return NULL; + + result->file_id = df->df_id; + result->block_index = block_index; + result->timestamp_us = ktime_to_us(ktime_get()); + + mutex_lock(&mi->mi_pending_reads_mutex); + + result->serial_number = ++mi->mi_last_pending_read_number; + mi->mi_pending_reads_count++; + + list_add(&result->mi_reads_list, &mi->mi_reads_list_head); + list_add(&result->segment_reads_list, &segment->reads_list_head); + mutex_unlock(&mi->mi_pending_reads_mutex); + + wake_up_all(&mi->mi_pending_reads_notif_wq); + return result; +} + +/* Notifies a given data file that pending read is completed. */ +static void remove_pending_read(struct data_file *df, struct pending_read *read) +{ + struct mount_info *mi = NULL; + + if (!df || !read) { + WARN_ON(!df); + WARN_ON(!read); + return; + } + + mi = df->df_mount_info; + + mutex_lock(&mi->mi_pending_reads_mutex); + list_del(&read->mi_reads_list); + list_del(&read->segment_reads_list); + + mi->mi_pending_reads_count--; + mutex_unlock(&mi->mi_pending_reads_mutex); + + kfree(read); +} + +static void notify_pending_reads(struct mount_info *mi, + struct data_file_segment *segment, + int index) +{ + struct pending_read *entry = NULL; + + /* Notify pending reads waiting for this block. */ + mutex_lock(&mi->mi_pending_reads_mutex); + list_for_each_entry(entry, &segment->reads_list_head, + segment_reads_list) { + if (entry->block_index == index) + set_read_done(entry); + } + mutex_unlock(&mi->mi_pending_reads_mutex); + wake_up_all(&segment->new_data_arrival_wq); +} + +static int wait_for_data_block(struct data_file *df, int block_index, + int timeout_ms, + struct data_file_block *res_block) +{ + struct data_file_block block = {}; + struct data_file_segment *segment = NULL; + struct pending_read *read = NULL; + struct mount_info *mi = NULL; + int error = 0; + int wait_res = 0; + + if (!df || !res_block) + return -EFAULT; + + if (block_index < 0 || block_index >= df->df_block_count) + return -EINVAL; + + if (df->df_blockmap_off <= 0) + return -ENODATA; + + segment = get_file_segment(df, block_index); + error = mutex_lock_interruptible(&segment->blockmap_mutex); + if (error) + return error; + + /* Look up the given block */ + error = get_data_file_block(df, block_index, &block); + + /* If it's not found, create a pending read */ + if (!error && !is_data_block_present(&block) && timeout_ms != 0) + read = add_pending_read(df, block_index); + + mutex_unlock(&segment->blockmap_mutex); + if (error) + return error; + + /* If the block was found, just return it. No need to wait. */ + if (is_data_block_present(&block)) { + *res_block = block; + return 0; + } + + mi = df->df_mount_info; + + if (timeout_ms == 0) { + log_block_read(mi, &df->df_id, block_index, + true /*timed out*/); + return -ETIME; + } + + if (!read) + return -ENOMEM; + + /* Wait for notifications about block's arrival */ + wait_res = + wait_event_interruptible_timeout(segment->new_data_arrival_wq, + (is_read_done(read)), + msecs_to_jiffies(timeout_ms)); + + /* Woke up, the pending read is no longer needed. */ + remove_pending_read(df, read); + read = NULL; + + if (wait_res == 0) { + /* Wait has timed out */ + log_block_read(mi, &df->df_id, block_index, + true /*timed out*/); + return -ETIME; + } + if (wait_res < 0) { + /* + * Only ERESTARTSYS is really expected here when a signal + * comes while we wait. + */ + return wait_res; + } + + error = mutex_lock_interruptible(&segment->blockmap_mutex); + if (error) + return error; + + /* + * Re-read block's info now, it has just arrived and + * should be available. + */ + error = get_data_file_block(df, block_index, &block); + if (!error) { + if (is_data_block_present(&block)) + *res_block = block; + else { + /* + * Somehow wait finished successfully bug block still + * can't be found. It's not normal. + */ + pr_warn("incfs:Wait succeeded, but block not found.\n"); + error = -ENODATA; + } + } + + mutex_unlock(&segment->blockmap_mutex); + return error; +} + +ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, + int index, int timeout_ms, + struct mem_range tmp) +{ + loff_t pos; + ssize_t result; + size_t bytes_to_read; + struct mount_info *mi = NULL; + struct file *bf = NULL; + struct data_file_block block = {}; + + if (!dst.data || !df) + return -EFAULT; + + if (tmp.len < 2 * INCFS_DATA_FILE_BLOCK_SIZE) + return -ERANGE; + + mi = df->df_mount_info; + bf = df->df_backing_file_context->bc_file; + + result = wait_for_data_block(df, index, timeout_ms, &block); + if (result < 0) + goto out; + + pos = block.db_backing_file_data_offset; + if (block.db_comp_alg == COMPRESSION_NONE) { + bytes_to_read = min(dst.len, block.db_stored_size); + result = incfs_kread(bf, dst.data, bytes_to_read, pos); + + /* Some data was read, but not enough */ + if (result >= 0 && result != bytes_to_read) + result = -EIO; + } else { + bytes_to_read = min(tmp.len, block.db_stored_size); + result = incfs_kread(bf, tmp.data, bytes_to_read, pos); + if (result == bytes_to_read) { + result = + decompress(range(tmp.data, bytes_to_read), dst); + if (result < 0) { + const char *name = + bf->f_path.dentry->d_name.name; + + pr_warn_once("incfs: Decompression error. %s", + name); + } + } else if (result >= 0) { + /* Some data was read, but not enough */ + result = -EIO; + } + } + + if (result > 0) { + int err = validate_hash_tree(bf, df, index, dst, tmp.data); + + if (err < 0) + result = err; + } + + if (result >= 0) + log_block_read(mi, &df->df_id, index, false /*timed out*/); + +out: + return result; +} + +int incfs_process_new_data_block(struct data_file *df, + struct incfs_fill_block *block, u8 *data) +{ + struct mount_info *mi = NULL; + struct backing_file_context *bfc = NULL; + struct data_file_segment *segment = NULL; + struct data_file_block existing_block = {}; + u16 flags = 0; + int error = 0; + + if (!df || !block) + return -EFAULT; + + bfc = df->df_backing_file_context; + mi = df->df_mount_info; + + if (block->block_index >= df->df_block_count) + return -ERANGE; + + segment = get_file_segment(df, block->block_index); + if (!segment) + return -EFAULT; + if (block->compression == COMPRESSION_LZ4) + flags |= INCFS_BLOCK_COMPRESSED_LZ4; + + error = mutex_lock_interruptible(&segment->blockmap_mutex); + if (error) + return error; + + error = get_data_file_block(df, block->block_index, &existing_block); + if (error) + goto unlock; + if (is_data_block_present(&existing_block)) { + /* Block is already present, nothing to do here */ + goto unlock; + } + + error = mutex_lock_interruptible(&bfc->bc_mutex); + if (!error) { + error = incfs_write_data_block_to_backing_file( + bfc, range(data, block->data_len), block->block_index, + df->df_blockmap_off, flags); + mutex_unlock(&bfc->bc_mutex); + } + if (!error) + notify_pending_reads(mi, segment, block->block_index); + +unlock: + mutex_unlock(&segment->blockmap_mutex); + if (error) + pr_debug("incfs: %s %d error: %d\n", __func__, + block->block_index, error); + return error; +} + +int incfs_read_file_signature(struct data_file *df, struct mem_range dst) +{ + struct file *bf = df->df_backing_file_context->bc_file; + struct incfs_df_signature *sig; + int read_res = 0; + + if (!dst.data) + return -EFAULT; + + sig = df->df_signature; + if (!sig) + return 0; + + if (dst.len < sig->sig_size) + return -E2BIG; + + read_res = incfs_kread(bf, dst.data, sig->sig_size, sig->sig_offset); + + if (read_res < 0) + return read_res; + + if (read_res != sig->sig_size) + return -EIO; + + return read_res; +} + +int incfs_process_new_hash_block(struct data_file *df, + struct incfs_fill_block *block, u8 *data) +{ + struct backing_file_context *bfc = NULL; + struct mount_info *mi = NULL; + struct mtree *hash_tree = NULL; + struct incfs_df_signature *sig = NULL; + loff_t hash_area_base = 0; + loff_t hash_area_size = 0; + int error = 0; + + if (!df || !block) + return -EFAULT; + + if (!(block->flags & INCFS_BLOCK_FLAGS_HASH)) + return -EINVAL; + + bfc = df->df_backing_file_context; + mi = df->df_mount_info; + + if (!df) + return -ENOENT; + + hash_tree = df->df_hash_tree; + sig = df->df_signature; + if (!hash_tree || !sig || sig->hash_offset == 0) + return -ENOTSUPP; + + hash_area_base = sig->hash_offset; + hash_area_size = sig->hash_size; + if (hash_area_size < block->block_index * INCFS_DATA_FILE_BLOCK_SIZE + + block->data_len) { + /* Hash block goes beyond dedicated hash area of this file. */ + return -ERANGE; + } + + error = mutex_lock_interruptible(&bfc->bc_mutex); + if (!error) + error = incfs_write_hash_block_to_backing_file( + bfc, range(data, block->data_len), block->block_index, + hash_area_base); + mutex_unlock(&bfc->bc_mutex); + return error; +} + +static int process_blockmap_md(struct incfs_blockmap *bm, + struct metadata_handler *handler) +{ + struct data_file *df = handler->context; + int error = 0; + loff_t base_off = le64_to_cpu(bm->m_base_offset); + u32 block_count = le32_to_cpu(bm->m_block_count); + + if (!df) + return -EFAULT; + + if (df->df_block_count != block_count) + return -EBADMSG; + + df->df_blockmap_off = base_off; + return error; +} + +static int process_file_attr_md(struct incfs_file_attr *fa, + struct metadata_handler *handler) +{ + struct data_file *df = handler->context; + u16 attr_size = le16_to_cpu(fa->fa_size); + + if (!df) + return -EFAULT; + + if (attr_size > INCFS_MAX_FILE_ATTR_SIZE) + return -E2BIG; + + df->n_attr.fa_value_offset = le64_to_cpu(fa->fa_offset); + df->n_attr.fa_value_size = attr_size; + df->n_attr.fa_crc = le32_to_cpu(fa->fa_crc); + + return 0; +} + +static int process_file_signature_md(struct incfs_file_signature *sg, + struct metadata_handler *handler) +{ + struct data_file *df = handler->context; + struct mtree *hash_tree = NULL; + int error = 0; + struct incfs_df_signature *signature = + kzalloc(sizeof(*signature), GFP_NOFS); + void *buf = 0; + ssize_t read; + + if (!df || !df->df_backing_file_context || + !df->df_backing_file_context->bc_file) { + error = -ENOENT; + goto out; + } + + signature->hash_offset = le64_to_cpu(sg->sg_hash_tree_offset); + signature->hash_size = le32_to_cpu(sg->sg_hash_tree_size); + signature->sig_offset = le64_to_cpu(sg->sg_sig_offset); + signature->sig_size = le32_to_cpu(sg->sg_sig_size); + + buf = kzalloc(signature->sig_size, GFP_NOFS); + if (!buf) { + error = -ENOMEM; + goto out; + } + + read = incfs_kread(df->df_backing_file_context->bc_file, buf, + signature->sig_size, signature->sig_offset); + if (read < 0) { + error = read; + goto out; + } + + if (read != signature->sig_size) { + error = -EINVAL; + goto out; + } + + hash_tree = incfs_alloc_mtree(range(buf, signature->sig_size), + df->df_block_count); + if (IS_ERR(hash_tree)) { + error = PTR_ERR(hash_tree); + hash_tree = NULL; + goto out; + } + if (hash_tree->hash_tree_area_size != signature->hash_size) { + error = -EINVAL; + goto out; + } + if (signature->hash_size > 0 && + handler->md_record_offset <= signature->hash_offset) { + error = -EINVAL; + goto out; + } + if (handler->md_record_offset <= signature->sig_offset) { + error = -EINVAL; + goto out; + } + df->df_hash_tree = hash_tree; + hash_tree = NULL; + df->df_signature = signature; + signature = NULL; +out: + incfs_free_mtree(hash_tree); + kfree(signature); + kfree(buf); + + return error; +} + +int incfs_scan_metadata_chain(struct data_file *df) +{ + struct metadata_handler *handler = NULL; + int result = 0; + int records_count = 0; + int error = 0; + struct backing_file_context *bfc = NULL; + + if (!df || !df->df_backing_file_context) + return -EFAULT; + + bfc = df->df_backing_file_context; + + handler = kzalloc(sizeof(*handler), GFP_NOFS); + if (!handler) + return -ENOMEM; + + /* No writing to the backing file while it's being scanned. */ + error = mutex_lock_interruptible(&bfc->bc_mutex); + if (error) + goto out; + + /* Reading superblock */ + handler->md_record_offset = df->df_metadata_off; + handler->context = df; + handler->handle_blockmap = process_blockmap_md; + handler->handle_file_attr = process_file_attr_md; + handler->handle_signature = process_file_signature_md; + + pr_debug("incfs: Starting reading incfs-metadata records at offset %lld\n", + handler->md_record_offset); + while (handler->md_record_offset > 0) { + error = incfs_read_next_metadata_record(bfc, handler); + if (error) { + pr_warn("incfs: Error during reading incfs-metadata record. Offset: %lld Record #%d Error code: %d\n", + handler->md_record_offset, records_count + 1, + -error); + break; + } + records_count++; + } + if (error) { + pr_debug("incfs: Error %d after reading %d incfs-metadata records.\n", + -error, records_count); + result = error; + } else { + pr_debug("incfs: Finished reading %d incfs-metadata records.\n", + records_count); + result = records_count; + } + mutex_unlock(&bfc->bc_mutex); +out: + kfree(handler); + return result; +} + +/* + * Quickly checks if there are pending reads with a serial number larger + * than a given one. + */ +bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number) +{ + bool result = false; + + mutex_lock(&mi->mi_pending_reads_mutex); + result = (mi->mi_last_pending_read_number > last_number) && + (mi->mi_pending_reads_count > 0); + mutex_unlock(&mi->mi_pending_reads_mutex); + return result; +} + +int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, + struct incfs_pending_read_info *reads, + int reads_size) +{ + int reported_reads = 0; + struct pending_read *entry = NULL; + + if (!mi) + return -EFAULT; + + if (reads_size <= 0) + return 0; + + mutex_lock(&mi->mi_pending_reads_mutex); + + if (mi->mi_last_pending_read_number <= sn_lowerbound + || mi->mi_pending_reads_count == 0) + goto unlock; + + list_for_each_entry(entry, &mi->mi_reads_list_head, mi_reads_list) { + if (entry->serial_number <= sn_lowerbound) + continue; + + reads[reported_reads].file_id = entry->file_id; + reads[reported_reads].block_index = entry->block_index; + reads[reported_reads].serial_number = entry->serial_number; + reads[reported_reads].timestamp_us = entry->timestamp_us; + /* reads[reported_reads].kind = INCFS_READ_KIND_PENDING; */ + + reported_reads++; + if (reported_reads >= reads_size) + break; + } + +unlock: + mutex_unlock(&mi->mi_pending_reads_mutex); + + return reported_reads; +} + +struct read_log_state incfs_get_log_state(struct mount_info *mi) +{ + struct read_log *log = &mi->mi_log; + struct read_log_state result; + + spin_lock(&log->rl_writer_lock); + result = READ_ONCE(log->rl_state); + spin_unlock(&log->rl_writer_lock); + return result; +} + +static u64 calc_record_count(const struct read_log_state *state, int rl_size) +{ + return state->current_pass_no * (u64)rl_size + state->next_index; +} + +int incfs_get_uncollected_logs_count(struct mount_info *mi, + struct read_log_state state) +{ + struct read_log *log = &mi->mi_log; + + u64 count = calc_record_count(&log->rl_state, log->rl_size) - + calc_record_count(&state, log->rl_size); + return min_t(int, count, log->rl_size); +} + +static void fill_pending_read_from_log_record( + struct incfs_pending_read_info *dest, const struct read_log_record *src, + struct read_log_state *state, u64 log_size) +{ + dest->file_id = src->file_id; + dest->block_index = get_block_index(src); + dest->serial_number = + state->current_pass_no * log_size + state->next_index; + dest->timestamp_us = src->timestamp_us; +} + +int incfs_collect_logged_reads(struct mount_info *mi, + struct read_log_state *reader_state, + struct incfs_pending_read_info *reads, + int reads_size) +{ + struct read_log *log = &mi->mi_log; + struct read_log_state live_state = incfs_get_log_state(mi); + u64 read_count = calc_record_count(reader_state, log->rl_size); + u64 written_count = calc_record_count(&live_state, log->rl_size); + int dst_idx; + + if (reader_state->next_index >= log->rl_size || + read_count > written_count) + return -ERANGE; + + if (read_count == written_count) + return 0; + + if (read_count > written_count) { + /* This reader is somehow ahead of the writer. */ + pr_debug("incfs: Log reader is ahead of writer\n"); + *reader_state = live_state; + } + + if (written_count - read_count > log->rl_size) { + /* + * Reading pointer is too far behind, + * start from the record following the write pointer. + */ + pr_debug("incfs: read pointer is behind, moving: %u/%u -> %u/%u / %u\n", + (u32)reader_state->next_index, + (u32)reader_state->current_pass_no, + (u32)live_state.next_index, + (u32)live_state.current_pass_no - 1, (u32)log->rl_size); + + *reader_state = (struct read_log_state){ + .next_index = live_state.next_index, + .current_pass_no = live_state.current_pass_no - 1, + }; + } + + for (dst_idx = 0; dst_idx < reads_size; dst_idx++) { + if (reader_state->next_index == live_state.next_index && + reader_state->current_pass_no == live_state.current_pass_no) + break; + + fill_pending_read_from_log_record( + &reads[dst_idx], + &log->rl_ring_buf[reader_state->next_index], + reader_state, log->rl_size); + + reader_state->next_index++; + if (reader_state->next_index == log->rl_size) { + reader_state->next_index = 0; + reader_state->current_pass_no++; + } + } + return dst_idx; +} + +bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs) +{ + if (lhs.len != rhs.len) + return false; + return memcmp(lhs.data, rhs.data, lhs.len) == 0; +} diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h new file mode 100644 index 000000000000..01045403026a --- /dev/null +++ b/fs/incfs/data_mgmt.h @@ -0,0 +1,367 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ +#ifndef _INCFS_DATA_MGMT_H +#define _INCFS_DATA_MGMT_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "internal.h" + +#define SEGMENTS_PER_FILE 3 + +struct read_log_record { + u32 bitfield; + + u64 timestamp_us; + + incfs_uuid_t file_id; +} __packed; + +#define RLR_BLOCK_INDEX_MASK 0x7fff +#define RLR_TIMED_OUT_MASK 0x8000 + +static inline u32 get_block_index(const struct read_log_record *rlr) +{ + return rlr->bitfield & RLR_BLOCK_INDEX_MASK; +} + +static inline void set_block_index(struct read_log_record *rlr, + u32 block_index) +{ + rlr->bitfield = (rlr->bitfield & ~RLR_BLOCK_INDEX_MASK) + | (block_index & RLR_BLOCK_INDEX_MASK); +} + +static inline bool get_timed_out(const struct read_log_record *rlr) +{ + return (rlr->bitfield & RLR_TIMED_OUT_MASK) == RLR_TIMED_OUT_MASK; +} + +static inline void set_timed_out(struct read_log_record *rlr, bool timed_out) +{ + if (timed_out) + rlr->bitfield |= RLR_TIMED_OUT_MASK; + else + rlr->bitfield &= ~RLR_TIMED_OUT_MASK; +} + +struct read_log_state { + /* Next slot in rl_ring_buf to write to. */ + u32 next_index; + + /* Current number of writer pass over rl_ring_buf */ + u32 current_pass_no; +}; + +/* A ring buffer to save records about data blocks which were recently read. */ +struct read_log { + struct read_log_record *rl_ring_buf; + + struct read_log_state rl_state; + + spinlock_t rl_writer_lock; + + int rl_size; + + /* + * A queue of waiters who want to be notified about reads. + */ + wait_queue_head_t ml_notif_wq; +}; + +struct mount_options { + unsigned int read_timeout_ms; + unsigned int readahead_pages; + unsigned int read_log_pages; + unsigned int read_log_wakeup_count; + bool no_backing_file_cache; + bool no_backing_file_readahead; +}; + +struct mount_info { + struct super_block *mi_sb; + + struct path mi_backing_dir_path; + + struct dentry *mi_index_dir; + + const struct cred *mi_owner; + + struct mount_options mi_options; + + /* This mutex is to be taken before create, rename, delete */ + struct mutex mi_dir_struct_mutex; + + /* + * A queue of waiters who want to be notified about new pending reads. + */ + wait_queue_head_t mi_pending_reads_notif_wq; + + /* + * Protects: + * - reads_list_head + * - mi_pending_reads_count + * - mi_last_pending_read_number + * - data_file_segment.reads_list_head + */ + struct mutex mi_pending_reads_mutex; + + /* List of active pending_read objects */ + struct list_head mi_reads_list_head; + + /* Total number of items in reads_list_head */ + int mi_pending_reads_count; + + /* + * Last serial number that was assigned to a pending read. + * 0 means no pending reads have been seen yet. + */ + int mi_last_pending_read_number; + + /* Temporary buffer for read logger. */ + struct read_log mi_log; + + void *log_xattr; + size_t log_xattr_size; + + void *pending_read_xattr; + size_t pending_read_xattr_size; +}; + +struct data_file_block { + loff_t db_backing_file_data_offset; + + size_t db_stored_size; + + enum incfs_compression_alg db_comp_alg; +}; + +struct pending_read { + incfs_uuid_t file_id; + + s64 timestamp_us; + + atomic_t done; + + int block_index; + + int serial_number; + + struct list_head mi_reads_list; + + struct list_head segment_reads_list; +}; + +struct data_file_segment { + wait_queue_head_t new_data_arrival_wq; + + /* Protects reads and writes from the blockmap */ + /* Good candidate for read/write mutex */ + struct mutex blockmap_mutex; + + /* List of active pending_read objects belonging to this segment */ + /* Protected by mount_info.pending_reads_mutex */ + struct list_head reads_list_head; +}; + +/* + * Extra info associated with a file. Just a few bytes set by a user. + */ +struct file_attr { + loff_t fa_value_offset; + + size_t fa_value_size; + + u32 fa_crc; +}; + + +struct data_file { + struct backing_file_context *df_backing_file_context; + + struct mount_info *df_mount_info; + + incfs_uuid_t df_id; + + /* + * Array of segments used to reduce lock contention for the file. + * Segment is chosen for a block depends on the block's index. + */ + struct data_file_segment df_segments[SEGMENTS_PER_FILE]; + + /* Base offset of the first metadata record. */ + loff_t df_metadata_off; + + /* Base offset of the block map. */ + loff_t df_blockmap_off; + + /* File size in bytes */ + loff_t df_size; + + int df_block_count; /* File size in DATA_FILE_BLOCK_SIZE blocks */ + + struct file_attr n_attr; + + struct mtree *df_hash_tree; + + struct incfs_df_signature *df_signature; +}; + +struct dir_file { + struct mount_info *mount_info; + + struct file *backing_dir; +}; + +struct inode_info { + struct mount_info *n_mount_info; /* A mount, this file belongs to */ + + struct inode *n_backing_inode; + + struct data_file *n_file; + + struct inode n_vfs_inode; +}; + +struct dentry_info { + struct path backing_path; +}; + +struct mount_info *incfs_alloc_mount_info(struct super_block *sb, + struct mount_options *options, + struct path *backing_dir_path); + +void incfs_free_mount_info(struct mount_info *mi); + +struct data_file *incfs_open_data_file(struct mount_info *mi, struct file *bf); +void incfs_free_data_file(struct data_file *df); + +int incfs_scan_metadata_chain(struct data_file *df); + +struct dir_file *incfs_open_dir_file(struct mount_info *mi, struct file *bf); +void incfs_free_dir_file(struct dir_file *dir); + +ssize_t incfs_read_data_file_block(struct mem_range dst, struct data_file *df, + int index, int timeout_ms, + struct mem_range tmp); + +int incfs_read_file_signature(struct data_file *df, struct mem_range dst); + +int incfs_process_new_data_block(struct data_file *df, + struct incfs_fill_block *block, u8 *data); + +int incfs_process_new_hash_block(struct data_file *df, + struct incfs_fill_block *block, u8 *data); + +bool incfs_fresh_pending_reads_exist(struct mount_info *mi, int last_number); + +/* + * Collects pending reads and saves them into the array (reads/reads_size). + * Only reads with serial_number > sn_lowerbound are reported. + * Returns how many reads were saved into the array. + */ +int incfs_collect_pending_reads(struct mount_info *mi, int sn_lowerbound, + struct incfs_pending_read_info *reads, + int reads_size); + +int incfs_collect_logged_reads(struct mount_info *mi, + struct read_log_state *start_state, + struct incfs_pending_read_info *reads, + int reads_size); +struct read_log_state incfs_get_log_state(struct mount_info *mi); +int incfs_get_uncollected_logs_count(struct mount_info *mi, + struct read_log_state state); + +static inline struct inode_info *get_incfs_node(struct inode *inode) +{ + if (!inode) + return NULL; + + if (inode->i_sb->s_magic != INCFS_MAGIC_NUMBER) { + /* This inode doesn't belong to us. */ + pr_warn_once("incfs: %s on an alien inode.", __func__); + return NULL; + } + + return container_of(inode, struct inode_info, n_vfs_inode); +} + +static inline struct data_file *get_incfs_data_file(struct file *f) +{ + struct inode_info *node; + + if (!f) + return NULL; + + if (!S_ISREG(f->f_inode->i_mode)) + return NULL; + + node = get_incfs_node(f->f_inode); + if (!node) + return NULL; + + return node->n_file; +} + +static inline struct dir_file *get_incfs_dir_file(struct file *f) +{ + if (!f) + return NULL; + + if (!S_ISDIR(f->f_inode->i_mode)) + return NULL; + + return (struct dir_file *)f->private_data; +} + +/* + * Make sure that inode_info.n_file is initialized and inode can be used + * for reading and writing data from/to the backing file. + */ +int make_inode_ready_for_data_ops(struct mount_info *mi, + struct inode *inode, + struct file *backing_file); + +static inline struct dentry_info *get_incfs_dentry(const struct dentry *d) +{ + if (!d) + return NULL; + + return (struct dentry_info *)d->d_fsdata; +} + +static inline void get_incfs_backing_path(const struct dentry *d, + struct path *path) +{ + struct dentry_info *di = get_incfs_dentry(d); + + if (!di) { + *path = (struct path) {}; + return; + } + + *path = di->backing_path; + path_get(path); +} + +static inline int get_blocks_count_for_size(u64 size) +{ + if (size == 0) + return 0; + return 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; +} + +bool incfs_equal_ranges(struct mem_range lhs, struct mem_range rhs); + +#endif /* _INCFS_DATA_MGMT_H */ diff --git a/fs/incfs/format.c b/fs/incfs/format.c new file mode 100644 index 000000000000..8c8213ee325d --- /dev/null +++ b/fs/incfs/format.c @@ -0,0 +1,669 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "format.h" + +struct backing_file_context *incfs_alloc_bfc(struct file *backing_file) +{ + struct backing_file_context *result = NULL; + + result = kzalloc(sizeof(*result), GFP_NOFS); + if (!result) + return ERR_PTR(-ENOMEM); + + result->bc_file = get_file(backing_file); + mutex_init(&result->bc_mutex); + return result; +} + +void incfs_free_bfc(struct backing_file_context *bfc) +{ + if (!bfc) + return; + + if (bfc->bc_file) + fput(bfc->bc_file); + + mutex_destroy(&bfc->bc_mutex); + kfree(bfc); +} + +loff_t incfs_get_end_offset(struct file *f) +{ + /* + * This function assumes that file size and the end-offset + * are the same. This is not always true. + */ + return i_size_read(file_inode(f)); +} + +/* + * Truncate the tail of the file to the given length. + * Used to rollback partially successful multistep writes. + */ +static int truncate_backing_file(struct backing_file_context *bfc, + loff_t new_end) +{ + struct inode *inode = NULL; + struct dentry *dentry = NULL; + loff_t old_end = 0; + struct iattr attr; + int result = 0; + + if (!bfc) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + + if (!bfc->bc_file) + return -EFAULT; + + old_end = incfs_get_end_offset(bfc->bc_file); + if (old_end == new_end) + return 0; + if (old_end < new_end) + return -EINVAL; + + inode = bfc->bc_file->f_inode; + dentry = bfc->bc_file->f_path.dentry; + + attr.ia_size = new_end; + attr.ia_valid = ATTR_SIZE; + + inode_lock(inode); + result = notify_change(dentry, &attr, NULL); + inode_unlock(inode); + + return result; +} + +/* Append a given number of zero bytes to the end of the backing file. */ +static int append_zeros(struct backing_file_context *bfc, size_t len) +{ + loff_t file_size = 0; + loff_t new_last_byte_offset = 0; + int res = 0; + + if (!bfc) + return -EFAULT; + + if (len == 0) + return 0; + + LOCK_REQUIRED(bfc->bc_mutex); + + /* + * Allocate only one byte at the new desired end of the file. + * It will increase file size and create a zeroed area of + * a given size. + */ + file_size = incfs_get_end_offset(bfc->bc_file); + new_last_byte_offset = file_size + len - 1; + res = vfs_fallocate(bfc->bc_file, 0, new_last_byte_offset, 1); + if (res) + return res; + + res = vfs_fsync_range(bfc->bc_file, file_size, file_size + len, 1); + return res; +} + +static int write_to_bf(struct backing_file_context *bfc, const void *buf, + size_t count, loff_t pos, bool sync) +{ + ssize_t res = 0; + + res = incfs_kwrite(bfc->bc_file, buf, count, pos); + if (res < 0) + return res; + if (res != count) + return -EIO; + + if (sync) + return vfs_fsync_range(bfc->bc_file, pos, pos + count, 1); + + return 0; +} + +static u32 calc_md_crc(struct incfs_md_header *record) +{ + u32 result = 0; + __le32 saved_crc = record->h_record_crc; + __le64 saved_md_offset = record->h_next_md_offset; + size_t record_size = min_t(size_t, le16_to_cpu(record->h_record_size), + INCFS_MAX_METADATA_RECORD_SIZE); + + /* Zero fields which needs to be excluded from CRC calculation. */ + record->h_record_crc = 0; + record->h_next_md_offset = 0; + result = crc32(0, record, record_size); + + /* Restore excluded fields. */ + record->h_record_crc = saved_crc; + record->h_next_md_offset = saved_md_offset; + + return result; +} + +/* + * Append a given metadata record to the backing file and update a previous + * record to add the new record the the metadata list. + */ +static int append_md_to_backing_file(struct backing_file_context *bfc, + struct incfs_md_header *record) +{ + int result = 0; + loff_t record_offset; + loff_t file_pos; + __le64 new_md_offset; + size_t record_size; + + if (!bfc || !record) + return -EFAULT; + + if (bfc->bc_last_md_record_offset < 0) + return -EINVAL; + + LOCK_REQUIRED(bfc->bc_mutex); + + record_size = le16_to_cpu(record->h_record_size); + file_pos = incfs_get_end_offset(bfc->bc_file); + record->h_prev_md_offset = cpu_to_le64(bfc->bc_last_md_record_offset); + record->h_next_md_offset = 0; + record->h_record_crc = cpu_to_le32(calc_md_crc(record)); + + /* Write the metadata record to the end of the backing file */ + record_offset = file_pos; + new_md_offset = cpu_to_le64(record_offset); + result = write_to_bf(bfc, record, record_size, file_pos, true); + if (result) + return result; + + /* Update next metadata offset in a previous record or a superblock. */ + if (bfc->bc_last_md_record_offset) { + /* + * Find a place in the previous md record where new record's + * offset needs to be saved. + */ + file_pos = bfc->bc_last_md_record_offset + + offsetof(struct incfs_md_header, h_next_md_offset); + } else { + /* + * No metadata yet, file a place to update in the + * file_header. + */ + file_pos = offsetof(struct incfs_file_header, + fh_first_md_offset); + } + result = write_to_bf(bfc, &new_md_offset, sizeof(new_md_offset), + file_pos, true); + if (result) + return result; + + bfc->bc_last_md_record_offset = record_offset; + return result; +} + +/* + * Reserve 0-filled space for the blockmap body, and append + * incfs_blockmap metadata record pointing to it. + */ +int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, + u32 block_count, loff_t *map_base_off) +{ + struct incfs_blockmap blockmap = {}; + int result = 0; + loff_t file_end = 0; + size_t map_size = block_count * sizeof(struct incfs_blockmap_entry); + + if (!bfc) + return -EFAULT; + + blockmap.m_header.h_md_entry_type = INCFS_MD_BLOCK_MAP; + blockmap.m_header.h_record_size = cpu_to_le16(sizeof(blockmap)); + blockmap.m_header.h_next_md_offset = cpu_to_le64(0); + blockmap.m_block_count = cpu_to_le32(block_count); + + LOCK_REQUIRED(bfc->bc_mutex); + + /* Reserve 0-filled space for the blockmap body in the backing file. */ + file_end = incfs_get_end_offset(bfc->bc_file); + result = append_zeros(bfc, map_size); + if (result) + return result; + + /* Write blockmap metadata record pointing to the body written above. */ + blockmap.m_base_offset = cpu_to_le64(file_end); + result = append_md_to_backing_file(bfc, &blockmap.m_header); + if (result) { + /* Error, rollback file changes */ + truncate_backing_file(bfc, file_end); + } else if (map_base_off) { + *map_base_off = file_end; + } + + return result; +} + +/* + * Write file attribute data and metadata record to the backing file. + */ +int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, + struct mem_range value, struct incfs_file_attr *attr) +{ + struct incfs_file_attr file_attr = {}; + int result = 0; + u32 crc = 0; + loff_t value_offset = 0; + + if (!bfc) + return -EFAULT; + + if (value.len > INCFS_MAX_FILE_ATTR_SIZE) + return -ENOSPC; + + LOCK_REQUIRED(bfc->bc_mutex); + + crc = crc32(0, value.data, value.len); + value_offset = incfs_get_end_offset(bfc->bc_file); + file_attr.fa_header.h_md_entry_type = INCFS_MD_FILE_ATTR; + file_attr.fa_header.h_record_size = cpu_to_le16(sizeof(file_attr)); + file_attr.fa_header.h_next_md_offset = cpu_to_le64(0); + file_attr.fa_size = cpu_to_le16((u16)value.len); + file_attr.fa_offset = cpu_to_le64(value_offset); + file_attr.fa_crc = cpu_to_le32(crc); + + result = write_to_bf(bfc, value.data, value.len, value_offset, true); + if (result) + return result; + + result = append_md_to_backing_file(bfc, &file_attr.fa_header); + if (result) { + /* Error, rollback file changes */ + truncate_backing_file(bfc, value_offset); + } else if (attr) { + *attr = file_attr; + } + + return result; +} + +int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, + struct mem_range sig, u32 tree_size) +{ + struct incfs_file_signature sg = {}; + int result = 0; + loff_t rollback_pos = 0; + loff_t tree_area_pos = 0; + size_t alignment = 0; + + if (!bfc) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + + rollback_pos = incfs_get_end_offset(bfc->bc_file); + + sg.sg_header.h_md_entry_type = INCFS_MD_SIGNATURE; + sg.sg_header.h_record_size = cpu_to_le16(sizeof(sg)); + sg.sg_header.h_next_md_offset = cpu_to_le64(0); + if (sig.data != NULL && sig.len > 0) { + loff_t pos = incfs_get_end_offset(bfc->bc_file); + + sg.sg_sig_size = cpu_to_le32(sig.len); + sg.sg_sig_offset = cpu_to_le64(pos); + + result = write_to_bf(bfc, sig.data, sig.len, pos, false); + if (result) + goto err; + } + + tree_area_pos = incfs_get_end_offset(bfc->bc_file); + if (tree_size > 0) { + if (tree_size > 5 * INCFS_DATA_FILE_BLOCK_SIZE) { + /* + * If hash tree is big enough, it makes sense to + * align in the backing file for faster access. + */ + loff_t offset = round_up(tree_area_pos, PAGE_SIZE); + + alignment = offset - tree_area_pos; + tree_area_pos = offset; + } + + /* + * If root hash is not the only hash in the tree. + * reserve 0-filled space for the tree. + */ + result = append_zeros(bfc, tree_size + alignment); + if (result) + goto err; + + sg.sg_hash_tree_size = cpu_to_le32(tree_size); + sg.sg_hash_tree_offset = cpu_to_le64(tree_area_pos); + } + + /* Write a hash tree metadata record pointing to the hash tree above. */ + result = append_md_to_backing_file(bfc, &sg.sg_header); +err: + if (result) { + /* Error, rollback file changes */ + truncate_backing_file(bfc, rollback_pos); + } + return result; +} + +/* + * Write a backing file header + * It should always be called only on empty file. + * incfs_super_block.s_first_md_offset is 0 for now, but will be updated + * once first metadata record is added. + */ +int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size) +{ + struct incfs_file_header fh = {}; + loff_t file_pos = 0; + + if (!bfc) + return -EFAULT; + + fh.fh_magic = cpu_to_le64(INCFS_MAGIC_NUMBER); + fh.fh_version = cpu_to_le64(INCFS_FORMAT_CURRENT_VER); + fh.fh_header_size = cpu_to_le16(sizeof(fh)); + fh.fh_first_md_offset = cpu_to_le64(0); + fh.fh_data_block_size = cpu_to_le16(INCFS_DATA_FILE_BLOCK_SIZE); + + fh.fh_file_size = cpu_to_le64(file_size); + fh.fh_uuid = *uuid; + + LOCK_REQUIRED(bfc->bc_mutex); + + file_pos = incfs_get_end_offset(bfc->bc_file); + if (file_pos != 0) + return -EEXIST; + + return write_to_bf(bfc, &fh, sizeof(fh), file_pos, true); +} + +/* Write a given data block and update file's blockmap to point it. */ +int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, + struct mem_range block, int block_index, + loff_t bm_base_off, u16 flags) +{ + struct incfs_blockmap_entry bm_entry = {}; + int result = 0; + loff_t data_offset = 0; + loff_t bm_entry_off = + bm_base_off + sizeof(struct incfs_blockmap_entry) * block_index; + + if (!bfc) + return -EFAULT; + + if (block.len >= (1 << 16) || block_index < 0) + return -EINVAL; + + LOCK_REQUIRED(bfc->bc_mutex); + + data_offset = incfs_get_end_offset(bfc->bc_file); + if (data_offset <= bm_entry_off) { + /* Blockmap entry is beyond the file's end. It is not normal. */ + return -EINVAL; + } + + /* Write the block data at the end of the backing file. */ + result = write_to_bf(bfc, block.data, block.len, data_offset, false); + if (result) + return result; + + /* Update the blockmap to point to the newly written data. */ + bm_entry.me_data_offset_lo = cpu_to_le32((u32)data_offset); + bm_entry.me_data_offset_hi = cpu_to_le16((u16)(data_offset >> 32)); + bm_entry.me_data_size = cpu_to_le16((u16)block.len); + bm_entry.me_flags = cpu_to_le16(flags); + + result = write_to_bf(bfc, &bm_entry, sizeof(bm_entry), + bm_entry_off, false); + return result; +} + +int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, + struct mem_range block, + int block_index, loff_t hash_area_off) +{ + loff_t data_offset = 0; + loff_t file_end = 0; + + + if (!bfc) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + + data_offset = hash_area_off + block_index * INCFS_DATA_FILE_BLOCK_SIZE; + file_end = incfs_get_end_offset(bfc->bc_file); + if (data_offset + block.len > file_end) { + /* Block is located beyond the file's end. It is not normal. */ + return -EINVAL; + } + + return write_to_bf(bfc, block.data, block.len, data_offset, false); +} + +/* Initialize a new image in a given backing file. */ +int incfs_make_empty_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size) +{ + int result = 0; + + if (!bfc || !bfc->bc_file) + return -EFAULT; + + result = mutex_lock_interruptible(&bfc->bc_mutex); + if (result) + goto out; + + result = truncate_backing_file(bfc, 0); + if (result) + goto out; + + result = incfs_write_fh_to_backing_file(bfc, uuid, file_size); +out: + mutex_unlock(&bfc->bc_mutex); + return result; +} + +int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, + loff_t bm_base_off, + struct incfs_blockmap_entry *bm_entry) +{ + return incfs_read_blockmap_entries(bfc, bm_entry, block_index, 1, + bm_base_off); +} + +int incfs_read_blockmap_entries(struct backing_file_context *bfc, + struct incfs_blockmap_entry *entries, + int start_index, int blocks_number, + loff_t bm_base_off) +{ + loff_t bm_entry_off = + bm_base_off + sizeof(struct incfs_blockmap_entry) * start_index; + const size_t bytes_to_read = sizeof(struct incfs_blockmap_entry) + * blocks_number; + int result = 0; + + if (!bfc || !entries) + return -EFAULT; + + if (start_index < 0 || bm_base_off <= 0) + return -ENODATA; + + result = incfs_kread(bfc->bc_file, entries, bytes_to_read, + bm_entry_off); + if (result < 0) + return result; + if (result < bytes_to_read) + return -EIO; + return 0; +} + + +int incfs_read_file_header(struct backing_file_context *bfc, + loff_t *first_md_off, incfs_uuid_t *uuid, + u64 *file_size) +{ + ssize_t bytes_read = 0; + struct incfs_file_header fh = {}; + + if (!bfc || !first_md_off) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + bytes_read = incfs_kread(bfc->bc_file, &fh, sizeof(fh), 0); + if (bytes_read < 0) + return bytes_read; + + if (bytes_read < sizeof(fh)) + return -EBADMSG; + + if (le64_to_cpu(fh.fh_magic) != INCFS_MAGIC_NUMBER) + return -EILSEQ; + + if (le64_to_cpu(fh.fh_version) > INCFS_FORMAT_CURRENT_VER) + return -EILSEQ; + + if (le16_to_cpu(fh.fh_data_block_size) != INCFS_DATA_FILE_BLOCK_SIZE) + return -EILSEQ; + + if (le16_to_cpu(fh.fh_header_size) != sizeof(fh)) + return -EILSEQ; + + if (first_md_off) + *first_md_off = le64_to_cpu(fh.fh_first_md_offset); + if (uuid) + *uuid = fh.fh_uuid; + if (file_size) + *file_size = le64_to_cpu(fh.fh_file_size); + return 0; +} + +/* + * Read through metadata records from the backing file one by one + * and call provided metadata handlers. + */ +int incfs_read_next_metadata_record(struct backing_file_context *bfc, + struct metadata_handler *handler) +{ + const ssize_t max_md_size = INCFS_MAX_METADATA_RECORD_SIZE; + ssize_t bytes_read = 0; + size_t md_record_size = 0; + loff_t next_record = 0; + loff_t prev_record = 0; + int res = 0; + struct incfs_md_header *md_hdr = NULL; + + if (!bfc || !handler) + return -EFAULT; + + LOCK_REQUIRED(bfc->bc_mutex); + + if (handler->md_record_offset == 0) + return -EPERM; + + memset(&handler->md_buffer, 0, max_md_size); + bytes_read = incfs_kread(bfc->bc_file, &handler->md_buffer, + max_md_size, handler->md_record_offset); + if (bytes_read < 0) + return bytes_read; + if (bytes_read < sizeof(*md_hdr)) + return -EBADMSG; + + md_hdr = &handler->md_buffer.md_header; + next_record = le64_to_cpu(md_hdr->h_next_md_offset); + prev_record = le64_to_cpu(md_hdr->h_prev_md_offset); + md_record_size = le16_to_cpu(md_hdr->h_record_size); + + if (md_record_size > max_md_size) { + pr_warn("incfs: The record is too large. Size: %ld", + md_record_size); + return -EBADMSG; + } + + if (bytes_read < md_record_size) { + pr_warn("incfs: The record hasn't been fully read."); + return -EBADMSG; + } + + if (next_record <= handler->md_record_offset && next_record != 0) { + pr_warn("incfs: Next record (%lld) points back in file.", + next_record); + return -EBADMSG; + } + + if (prev_record != handler->md_prev_record_offset) { + pr_warn("incfs: Metadata chain has been corrupted."); + return -EBADMSG; + } + + if (le32_to_cpu(md_hdr->h_record_crc) != calc_md_crc(md_hdr)) { + pr_warn("incfs: Metadata CRC mismatch."); + return -EBADMSG; + } + + switch (md_hdr->h_md_entry_type) { + case INCFS_MD_NONE: + break; + case INCFS_MD_BLOCK_MAP: + if (handler->handle_blockmap) + res = handler->handle_blockmap( + &handler->md_buffer.blockmap, handler); + break; + case INCFS_MD_FILE_ATTR: + if (handler->handle_file_attr) + res = handler->handle_file_attr( + &handler->md_buffer.file_attr, handler); + break; + case INCFS_MD_SIGNATURE: + if (handler->handle_signature) + res = handler->handle_signature( + &handler->md_buffer.signature, handler); + break; + default: + res = -ENOTSUPP; + break; + } + + if (!res) { + if (next_record == 0) { + /* + * Zero offset for the next record means that the last + * metadata record has just been processed. + */ + bfc->bc_last_md_record_offset = + handler->md_record_offset; + } + handler->md_prev_record_offset = handler->md_record_offset; + handler->md_record_offset = next_record; + } + return res; +} + +ssize_t incfs_kread(struct file *f, void *buf, size_t size, loff_t pos) +{ + return kernel_read(f, buf, size, &pos); +} + +ssize_t incfs_kwrite(struct file *f, const void *buf, size_t size, loff_t pos) +{ + return kernel_write(f, buf, size, &pos); +} diff --git a/fs/incfs/format.h b/fs/incfs/format.h new file mode 100644 index 000000000000..55e6938b30d6 --- /dev/null +++ b/fs/incfs/format.h @@ -0,0 +1,330 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2018 Google LLC + */ + +/* + * Overview + * -------- + * The backbone of the incremental-fs ondisk format is an append only linked + * list of metadata blocks. Each metadata block contains an offset of the next + * one. These blocks describe files and directories on the + * file system. They also represent actions of adding and removing file names + * (hard links). + * + * Every time incremental-fs instance is mounted, it reads through this list + * to recreate filesystem's state in memory. An offset of the first record in + * the metadata list is stored in the superblock at the beginning of the backing + * file. + * + * Most of the backing file is taken by data areas and blockmaps. + * Since data blocks can be compressed and have different sizes, + * single per-file data area can't be pre-allocated. That's why blockmaps are + * needed in order to find a location and size of each data block in + * the backing file. Each time a file is created, a corresponding block map is + * allocated to store future offsets of data blocks. + * + * Whenever a data block is given by data loader to incremental-fs: + * - A data area with the given block is appended to the end of + * the backing file. + * - A record in the blockmap for the given block index is updated to reflect + * its location, size, and compression algorithm. + + * Metadata records + * ---------------- + * incfs_blockmap - metadata record that specifies size and location + * of a blockmap area for a given file. This area + * contains an array of incfs_blockmap_entry-s. + * incfs_file_signature - metadata record that specifies where file signature + * and its hash tree can be found in the backing file. + * + * incfs_file_attr - metadata record that specifies where additional file + * attributes blob can be found. + * + * Metadata header + * --------------- + * incfs_md_header - header of a metadata record. It's always a part + * of other structures and served purpose of metadata + * bookkeeping. + * + * +-----------------------------------------------+ ^ + * | incfs_md_header | | + * | 1. type of body(BLOCKMAP, FILE_ATTR..) | | + * | 2. size of the whole record header + body | | + * | 3. CRC the whole record header + body | | + * | 4. offset of the previous md record |]------+ + * | 5. offset of the next md record (md link) |]---+ + * +-----------------------------------------------+ | + * | Metadata record body with useful data | | + * +-----------------------------------------------+ | + * +---> + * + * Other ondisk structures + * ----------------------- + * incfs_super_block - backing file header + * incfs_blockmap_entry - a record in a blockmap area that describes size + * and location of a data block. + * Data blocks dont have any particular structure, they are written to the + * backing file in a raw form as they come from a data loader. + * + * Backing file layout + * ------------------- + * + * + * +-------------------------------------------+ + * | incfs_super_block |]---+ + * +-------------------------------------------+ | + * | metadata |<---+ + * | incfs_file_signature |]---+ + * +-------------------------------------------+ | + * ......................... | + * +-------------------------------------------+ | metadata + * +------->| blockmap area | | list links + * | | [incfs_blockmap_entry] | | + * | | [incfs_blockmap_entry] | | + * | | [incfs_blockmap_entry] | | + * | +--[| [incfs_blockmap_entry] | | + * | | | [incfs_blockmap_entry] | | + * | | | [incfs_blockmap_entry] | | + * | | +-------------------------------------------+ | + * | | ......................... | + * | | +-------------------------------------------+ | + * | | | metadata |<---+ + * +----|--[| incfs_blockmap |]---+ + * | +-------------------------------------------+ | + * | ......................... | + * | +-------------------------------------------+ | + * +-->| data block | | + * +-------------------------------------------+ | + * ......................... | + * +-------------------------------------------+ | + * | metadata |<---+ + * | incfs_file_attr | + * +-------------------------------------------+ + */ +#ifndef _INCFS_FORMAT_H +#define _INCFS_FORMAT_H +#include +#include +#include + +#include "internal.h" + +#define INCFS_MAX_NAME_LEN 255 +#define INCFS_FORMAT_V1 1 +#define INCFS_FORMAT_CURRENT_VER INCFS_FORMAT_V1 + +enum incfs_metadata_type { + INCFS_MD_NONE = 0, + INCFS_MD_BLOCK_MAP = 1, + INCFS_MD_FILE_ATTR = 2, + INCFS_MD_SIGNATURE = 3 +}; + +/* Header included at the beginning of all metadata records on the disk. */ +struct incfs_md_header { + __u8 h_md_entry_type; + + /* + * Size of the metadata record. + * (e.g. inode, dir entry etc) not just this struct. + */ + __le16 h_record_size; + + /* + * CRC32 of the metadata record. + * (e.g. inode, dir entry etc) not just this struct. + */ + __le32 h_record_crc; + + /* Offset of the next metadata entry if any */ + __le64 h_next_md_offset; + + /* Offset of the previous metadata entry if any */ + __le64 h_prev_md_offset; + +} __packed; + +/* Backing file header */ +struct incfs_file_header { + /* Magic number: INCFS_MAGIC_NUMBER */ + __le64 fh_magic; + + /* Format version: INCFS_FORMAT_CURRENT_VER */ + __le64 fh_version; + + /* sizeof(incfs_file_header) */ + __le16 fh_header_size; + + /* INCFS_DATA_FILE_BLOCK_SIZE */ + __le16 fh_data_block_size; + + /* Padding, also reserved for future use. */ + __le32 fh_dummy; + + /* Offset of the first metadata record */ + __le64 fh_first_md_offset; + + /* + * Put file specific information after this point + */ + + /* Full size of the file's content */ + __le64 fh_file_size; + + /* File uuid */ + incfs_uuid_t fh_uuid; +} __packed; + +enum incfs_block_map_entry_flags { + INCFS_BLOCK_COMPRESSED_LZ4 = (1 << 0), +}; + +/* Block map entry pointing to an actual location of the data block. */ +struct incfs_blockmap_entry { + /* Offset of the actual data block. Lower 32 bits */ + __le32 me_data_offset_lo; + + /* Offset of the actual data block. Higher 16 bits */ + __le16 me_data_offset_hi; + + /* How many bytes the data actually occupies in the backing file */ + __le16 me_data_size; + + /* Block flags from incfs_block_map_entry_flags */ + __le16 me_flags; +} __packed; + +/* Metadata record for locations of file blocks. Type = INCFS_MD_BLOCK_MAP */ +struct incfs_blockmap { + struct incfs_md_header m_header; + + /* Base offset of the array of incfs_blockmap_entry */ + __le64 m_base_offset; + + /* Size of the map entry array in blocks */ + __le32 m_block_count; +} __packed; + +/* Metadata record for file attribute. Type = INCFS_MD_FILE_ATTR */ +struct incfs_file_attr { + struct incfs_md_header fa_header; + + __le64 fa_offset; + + __le16 fa_size; + + __le32 fa_crc; +} __packed; + +/* Metadata record for file signature. Type = INCFS_MD_SIGNATURE */ +struct incfs_file_signature { + struct incfs_md_header sg_header; + + __le32 sg_sig_size; /* The size of the signature. */ + + __le64 sg_sig_offset; /* Signature's offset in the backing file */ + + __le32 sg_hash_tree_size; /* The size of the hash tree. */ + + __le64 sg_hash_tree_offset; /* Hash tree offset in the backing file */ +} __packed; + +/* In memory version of above */ +struct incfs_df_signature { + u32 sig_size; + u64 sig_offset; + u32 hash_size; + u64 hash_offset; +}; + +/* State of the backing file. */ +struct backing_file_context { + /* Protects writes to bc_file */ + struct mutex bc_mutex; + + /* File object to read data from */ + struct file *bc_file; + + /* + * Offset of the last known metadata record in the backing file. + * 0 means there are no metadata records. + */ + loff_t bc_last_md_record_offset; +}; + +struct metadata_handler { + loff_t md_record_offset; + loff_t md_prev_record_offset; + void *context; + + union { + struct incfs_md_header md_header; + struct incfs_blockmap blockmap; + struct incfs_file_attr file_attr; + struct incfs_file_signature signature; + } md_buffer; + + int (*handle_blockmap)(struct incfs_blockmap *bm, + struct metadata_handler *handler); + int (*handle_file_attr)(struct incfs_file_attr *fa, + struct metadata_handler *handler); + int (*handle_signature)(struct incfs_file_signature *sig, + struct metadata_handler *handler); +}; +#define INCFS_MAX_METADATA_RECORD_SIZE \ + FIELD_SIZEOF(struct metadata_handler, md_buffer) + +loff_t incfs_get_end_offset(struct file *f); + +/* Backing file context management */ +struct backing_file_context *incfs_alloc_bfc(struct file *backing_file); + +void incfs_free_bfc(struct backing_file_context *bfc); + +/* Writing stuff */ +int incfs_write_blockmap_to_backing_file(struct backing_file_context *bfc, + u32 block_count, loff_t *map_base_off); + +int incfs_write_fh_to_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size); + +int incfs_write_data_block_to_backing_file(struct backing_file_context *bfc, + struct mem_range block, + int block_index, loff_t bm_base_off, + u16 flags); + +int incfs_write_hash_block_to_backing_file(struct backing_file_context *bfc, + struct mem_range block, + int block_index, loff_t hash_area_off); + +int incfs_write_file_attr_to_backing_file(struct backing_file_context *bfc, + struct mem_range value, struct incfs_file_attr *attr); + +int incfs_write_signature_to_backing_file(struct backing_file_context *bfc, + struct mem_range sig, u32 tree_size); + +int incfs_make_empty_backing_file(struct backing_file_context *bfc, + incfs_uuid_t *uuid, u64 file_size); + +/* Reading stuff */ +int incfs_read_file_header(struct backing_file_context *bfc, + loff_t *first_md_off, incfs_uuid_t *uuid, + u64 *file_size); + +int incfs_read_blockmap_entry(struct backing_file_context *bfc, int block_index, + loff_t bm_base_off, + struct incfs_blockmap_entry *bm_entry); + +int incfs_read_blockmap_entries(struct backing_file_context *bfc, + struct incfs_blockmap_entry *entries, + int start_index, int blocks_number, + loff_t bm_base_off); + +int incfs_read_next_metadata_record(struct backing_file_context *bfc, + struct metadata_handler *handler); + +ssize_t incfs_kread(struct file *f, void *buf, size_t size, loff_t pos); +ssize_t incfs_kwrite(struct file *f, const void *buf, size_t size, loff_t pos); + +#endif /* _INCFS_FORMAT_H */ diff --git a/fs/incfs/integrity.c b/fs/incfs/integrity.c new file mode 100644 index 000000000000..f8af9a83ea8a --- /dev/null +++ b/fs/incfs/integrity.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 Google LLC + */ +#include +#include +#include +#include +#include + +#include "integrity.h" + +struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id) +{ + static struct incfs_hash_alg sha256 = { + .name = "sha256", + .digest_size = SHA256_DIGEST_SIZE, + .id = INCFS_HASH_TREE_SHA256 + }; + struct incfs_hash_alg *result = NULL; + struct crypto_shash *shash; + + if (id == INCFS_HASH_TREE_SHA256) { + BUILD_BUG_ON(INCFS_MAX_HASH_SIZE < SHA256_DIGEST_SIZE); + result = &sha256; + } + + if (result == NULL) + return ERR_PTR(-ENOENT); + + /* pairs with cmpxchg_release() below */ + shash = smp_load_acquire(&result->shash); + if (shash) + return result; + + shash = crypto_alloc_shash(result->name, 0, 0); + if (IS_ERR(shash)) { + int err = PTR_ERR(shash); + + pr_err("Can't allocate hash alg %s, error code:%d", + result->name, err); + return ERR_PTR(err); + } + + /* pairs with smp_load_acquire() above */ + if (cmpxchg_release(&result->shash, NULL, shash) != NULL) + crypto_free_shash(shash); + + return result; +} + +struct signature_info { + u32 version; + enum incfs_hash_tree_algorithm hash_algorithm; + u8 log2_blocksize; + struct mem_range salt; + struct mem_range root_hash; +}; + +static u32 read_u32(u8 **p, u8 *top, u32 *result) +{ + if (*p + sizeof(u32) > top) + return false; + + *result = le32_to_cpu(*(u32 *)*p); + *p += sizeof(u32); + return true; +} + +static bool read_u8(u8 **p, u8 *top, u8 *result) +{ + if (*p + sizeof(u8) > top) + return false; + + *result = *(u8 *)*p; + *p += sizeof(u8); + return true; +} + +static bool read_mem_range(u8 **p, u8 *top, struct mem_range *range) +{ + u32 len; + + if (!read_u32(p, top, &len) || *p + len > top) + return false; + + range->len = len; + range->data = *p; + *p += len; + return true; +} + +static int incfs_parse_signature(struct mem_range signature, + struct signature_info *si) +{ + u8 *p = signature.data; + u8 *top = signature.data + signature.len; + u32 hash_section_size; + + if (signature.len > INCFS_MAX_SIGNATURE_SIZE) + return -EINVAL; + + if (!read_u32(&p, top, &si->version) || + si->version != INCFS_SIGNATURE_VERSION) + return -EINVAL; + + if (!read_u32(&p, top, &hash_section_size) || + p + hash_section_size > top) + return -EINVAL; + top = p + hash_section_size; + + if (!read_u32(&p, top, &si->hash_algorithm) || + si->hash_algorithm != INCFS_HASH_TREE_SHA256) + return -EINVAL; + + if (!read_u8(&p, top, &si->log2_blocksize) || si->log2_blocksize != 12) + return -EINVAL; + + if (!read_mem_range(&p, top, &si->salt)) + return -EINVAL; + + if (!read_mem_range(&p, top, &si->root_hash)) + return -EINVAL; + + if (p != top) + return -EINVAL; + + return 0; +} + +struct mtree *incfs_alloc_mtree(struct mem_range signature, + int data_block_count) +{ + int error; + struct signature_info si; + struct mtree *result = NULL; + struct incfs_hash_alg *hash_alg = NULL; + int hash_per_block; + int lvl; + int total_blocks = 0; + int blocks_in_level[INCFS_MAX_MTREE_LEVELS]; + int blocks = data_block_count; + + if (data_block_count <= 0) + return ERR_PTR(-EINVAL); + + error = incfs_parse_signature(signature, &si); + if (error) + return ERR_PTR(error); + + hash_alg = incfs_get_hash_alg(si.hash_algorithm); + if (IS_ERR(hash_alg)) + return ERR_PTR(PTR_ERR(hash_alg)); + + if (si.root_hash.len < hash_alg->digest_size) + return ERR_PTR(-EINVAL); + + result = kzalloc(sizeof(*result), GFP_NOFS); + if (!result) + return ERR_PTR(-ENOMEM); + + result->alg = hash_alg; + hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / result->alg->digest_size; + + /* Calculating tree geometry. */ + /* First pass: calculate how many blocks in each tree level. */ + for (lvl = 0; blocks > 1; lvl++) { + if (lvl >= INCFS_MAX_MTREE_LEVELS) { + pr_err("incfs: too much data in mtree"); + goto err; + } + + blocks = (blocks + hash_per_block - 1) / hash_per_block; + blocks_in_level[lvl] = blocks; + total_blocks += blocks; + } + result->depth = lvl; + result->hash_tree_area_size = total_blocks * INCFS_DATA_FILE_BLOCK_SIZE; + if (result->hash_tree_area_size > INCFS_MAX_HASH_AREA_SIZE) + goto err; + + blocks = 0; + /* Second pass: calculate offset of each level. 0th level goes last. */ + for (lvl = 0; lvl < result->depth; lvl++) { + u32 suboffset; + + blocks += blocks_in_level[lvl]; + suboffset = (total_blocks - blocks) + * INCFS_DATA_FILE_BLOCK_SIZE; + + result->hash_level_suboffset[lvl] = suboffset; + } + + /* Root hash is stored separately from the rest of the tree. */ + memcpy(result->root_hash, si.root_hash.data, hash_alg->digest_size); + return result; + +err: + kfree(result); + return ERR_PTR(-E2BIG); +} + +void incfs_free_mtree(struct mtree *tree) +{ + kfree(tree); +} + +int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, + struct mem_range digest) +{ + SHASH_DESC_ON_STACK(desc, alg->shash); + + if (!alg || !alg->shash || !data.data || !digest.data) + return -EFAULT; + + if (alg->digest_size > digest.len) + return -EINVAL; + + desc->tfm = alg->shash; + + if (data.len < INCFS_DATA_FILE_BLOCK_SIZE) { + int err; + void *buf = kzalloc(INCFS_DATA_FILE_BLOCK_SIZE, GFP_NOFS); + + if (!buf) + return -ENOMEM; + + memcpy(buf, data.data, data.len); + err = crypto_shash_digest(desc, buf, INCFS_DATA_FILE_BLOCK_SIZE, + digest.data); + kfree(buf); + return err; + } + return crypto_shash_digest(desc, data.data, data.len, digest.data); +} + diff --git a/fs/incfs/integrity.h b/fs/incfs/integrity.h new file mode 100644 index 000000000000..cf79b64da736 --- /dev/null +++ b/fs/incfs/integrity.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ +#ifndef _INCFS_INTEGRITY_H +#define _INCFS_INTEGRITY_H +#include +#include +#include + +#include + +#include "internal.h" + +#define INCFS_MAX_MTREE_LEVELS 8 +#define INCFS_MAX_HASH_AREA_SIZE (1280 * 1024 * 1024) + +struct incfs_hash_alg { + const char *name; + int digest_size; + enum incfs_hash_tree_algorithm id; + + struct crypto_shash *shash; +}; + +/* Merkle tree structure. */ +struct mtree { + struct incfs_hash_alg *alg; + + u8 root_hash[INCFS_MAX_HASH_SIZE]; + + /* Offset of each hash level in the hash area. */ + u32 hash_level_suboffset[INCFS_MAX_MTREE_LEVELS]; + + u32 hash_tree_area_size; + + /* Number of levels in hash_level_suboffset */ + int depth; +}; + +struct incfs_hash_alg *incfs_get_hash_alg(enum incfs_hash_tree_algorithm id); + +struct mtree *incfs_alloc_mtree(struct mem_range signature, + int data_block_count); + +void incfs_free_mtree(struct mtree *tree); + +size_t incfs_get_mtree_depth(enum incfs_hash_tree_algorithm alg, loff_t size); + +size_t incfs_get_mtree_hash_count(enum incfs_hash_tree_algorithm alg, + loff_t size); + +int incfs_calc_digest(struct incfs_hash_alg *alg, struct mem_range data, + struct mem_range digest); + +#endif /* _INCFS_INTEGRITY_H */ diff --git a/fs/incfs/internal.h b/fs/incfs/internal.h new file mode 100644 index 000000000000..0a85eaed41d3 --- /dev/null +++ b/fs/incfs/internal.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2018 Google LLC + */ +#ifndef _INCFS_INTERNAL_H +#define _INCFS_INTERNAL_H +#include + +struct mem_range { + u8 *data; + size_t len; +}; + +static inline struct mem_range range(u8 *data, size_t len) +{ + return (struct mem_range){ .data = data, .len = len }; +} + +#define LOCK_REQUIRED(lock) WARN_ON_ONCE(!mutex_is_locked(&lock)) + +#endif /* _INCFS_INTERNAL_H */ diff --git a/fs/incfs/main.c b/fs/incfs/main.c new file mode 100644 index 000000000000..e65d0d895128 --- /dev/null +++ b/fs/incfs/main.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ +#include +#include +#include + +#include + +#include "vfs.h" + +#define INCFS_NODE_FEATURES "features" + +static struct file_system_type incfs_fs_type = { + .owner = THIS_MODULE, + .name = INCFS_NAME, + .mount = incfs_mount_fs, + .kill_sb = incfs_kill_sb, + .fs_flags = 0 +}; + +static struct kobject *sysfs_root, *featurefs_root; + +static ssize_t corefs_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) +{ + return snprintf(buff, PAGE_SIZE, "supported\n"); +} + +static struct kobj_attribute corefs_attr = __ATTR_RO(corefs); + +static struct attribute *attributes[] = { + &corefs_attr.attr, + NULL, +}; + +static const struct attribute_group attr_group = { + .attrs = attributes, +}; + +static int __init init_sysfs(void) +{ + int res = 0; + + sysfs_root = kobject_create_and_add(INCFS_NAME, fs_kobj); + if (!sysfs_root) + return -ENOMEM; + + featurefs_root = kobject_create_and_add(INCFS_NODE_FEATURES, + sysfs_root); + if (!featurefs_root) + return -ENOMEM; + + res = sysfs_create_group(featurefs_root, &attr_group); + if (res) { + kobject_put(sysfs_root); + sysfs_root = NULL; + } + return res; +} + +static void cleanup_sysfs(void) +{ + if (featurefs_root) { + sysfs_remove_group(featurefs_root, &attr_group); + kobject_put(featurefs_root); + featurefs_root = NULL; + } + + if (sysfs_root) { + kobject_put(sysfs_root); + sysfs_root = NULL; + } +} + +static int __init init_incfs_module(void) +{ + int err = 0; + + err = init_sysfs(); + if (err) + return err; + + err = register_filesystem(&incfs_fs_type); + if (err) + cleanup_sysfs(); + + return err; +} + +static void __exit cleanup_incfs_module(void) +{ + cleanup_sysfs(); + unregister_filesystem(&incfs_fs_type); +} + +module_init(init_incfs_module); +module_exit(cleanup_incfs_module); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Eugene Zemtsov "); +MODULE_DESCRIPTION("Incremental File System"); diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c new file mode 100644 index 000000000000..f911c5eb9290 --- /dev/null +++ b/fs/incfs/vfs.c @@ -0,0 +1,2229 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "vfs.h" +#include "data_mgmt.h" +#include "format.h" +#include "integrity.h" +#include "internal.h" + +#define INCFS_PENDING_READS_INODE 2 +#define INCFS_LOG_INODE 3 +#define INCFS_START_INO_RANGE 10 +#define READ_FILE_MODE 0444 +#define READ_EXEC_FILE_MODE 0555 +#define READ_WRITE_FILE_MODE 0666 + +/* Needed for kernel 4.14 - remove for later kernels */ +typedef unsigned int __poll_t; + +static int incfs_remount_fs(struct super_block *sb, int *flags, char *data); + +static int dentry_revalidate(struct dentry *dentry, unsigned int flags); +static void dentry_release(struct dentry *d); + +static int iterate_incfs_dir(struct file *file, struct dir_context *ctx); +static struct dentry *dir_lookup(struct inode *dir_inode, + struct dentry *dentry, unsigned int flags); +static int dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); +static int dir_unlink(struct inode *dir, struct dentry *dentry); +static int dir_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry); +static int dir_rmdir(struct inode *dir, struct dentry *dentry); +static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry); + +static int file_open(struct inode *inode, struct file *file); +static int file_release(struct inode *inode, struct file *file); +static int read_single_page(struct file *f, struct page *page); +static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg); + +static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos); +static __poll_t pending_reads_poll(struct file *file, poll_table *wait); +static int pending_reads_open(struct inode *inode, struct file *file); +static int pending_reads_release(struct inode *, struct file *); + +static ssize_t log_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos); +static __poll_t log_poll(struct file *file, poll_table *wait); +static int log_open(struct inode *inode, struct file *file); +static int log_release(struct inode *, struct file *); + +static struct inode *alloc_inode(struct super_block *sb); +static void free_inode(struct inode *inode); +static void evict_inode(struct inode *inode); + +static ssize_t incfs_getxattr(struct dentry *d, const char *name, + void *value, size_t size); +static ssize_t incfs_setxattr(struct dentry *d, const char *name, + const void *value, size_t size, int flags); +static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size); + +static int show_options(struct seq_file *, struct dentry *); + +static const struct super_operations incfs_super_ops = { + .statfs = simple_statfs, + .remount_fs = incfs_remount_fs, + .alloc_inode = alloc_inode, + .destroy_inode = free_inode, + .evict_inode = evict_inode, + .show_options = show_options +}; + +static int dir_rename_wrap(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + return dir_rename(old_dir, old_dentry, new_dir, new_dentry); +} + +static const struct inode_operations incfs_dir_inode_ops = { + .lookup = dir_lookup, + .mkdir = dir_mkdir, + .rename = dir_rename_wrap, + .unlink = dir_unlink, + .link = dir_link, + .rmdir = dir_rmdir +}; + +static const struct file_operations incfs_dir_fops = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .iterate = iterate_incfs_dir, + .open = file_open, + .release = file_release, + .unlocked_ioctl = dispatch_ioctl, + .compat_ioctl = dispatch_ioctl +}; + +static const struct dentry_operations incfs_dentry_ops = { + .d_revalidate = dentry_revalidate, + .d_release = dentry_release +}; + +static const struct address_space_operations incfs_address_space_ops = { + .readpage = read_single_page, + /* .readpages = readpages */ +}; + +static const struct file_operations incfs_file_ops = { + .open = file_open, + .release = file_release, + .read_iter = generic_file_read_iter, + .mmap = generic_file_mmap, + .splice_read = generic_file_splice_read, + .llseek = generic_file_llseek, + .unlocked_ioctl = dispatch_ioctl, + .compat_ioctl = dispatch_ioctl +}; + +enum FILL_PERMISSION { + CANT_FILL = 0, + CAN_FILL = 1, +}; + +static const struct file_operations incfs_pending_read_file_ops = { + .read = pending_reads_read, + .poll = pending_reads_poll, + .open = pending_reads_open, + .release = pending_reads_release, + .llseek = noop_llseek, + .unlocked_ioctl = dispatch_ioctl, + .compat_ioctl = dispatch_ioctl +}; + +static const struct file_operations incfs_log_file_ops = { + .read = log_read, + .poll = log_poll, + .open = log_open, + .release = log_release, + .llseek = noop_llseek, + .unlocked_ioctl = dispatch_ioctl, + .compat_ioctl = dispatch_ioctl +}; + +static const struct inode_operations incfs_file_inode_ops = { + .setattr = simple_setattr, + .getattr = simple_getattr, + .listxattr = incfs_listxattr +}; + +static int incfs_handler_getxattr(const struct xattr_handler *xh, + struct dentry *d, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return incfs_getxattr(d, name, buffer, size); +} + +static int incfs_handler_setxattr(const struct xattr_handler *xh, + struct dentry *d, struct inode *inode, + const char *name, const void *buffer, + size_t size, int flags) +{ + return incfs_setxattr(d, name, buffer, size, flags); +} + +static const struct xattr_handler incfs_xattr_handler = { + .prefix = "", /* AKA all attributes */ + .get = incfs_handler_getxattr, + .set = incfs_handler_setxattr, +}; + +static const struct xattr_handler *incfs_xattr_ops[] = { + &incfs_xattr_handler, + NULL, +}; + +/* State of an open .pending_reads file, unique for each file descriptor. */ +struct pending_reads_state { + /* A serial number of the last pending read obtained from this file. */ + int last_pending_read_sn; +}; + +/* State of an open .log file, unique for each file descriptor. */ +struct log_file_state { + struct read_log_state state; +}; + +struct inode_search { + unsigned long ino; + + struct dentry *backing_dentry; +}; + +enum parse_parameter { + Opt_read_timeout, + Opt_readahead_pages, + Opt_no_backing_file_cache, + Opt_no_backing_file_readahead, + Opt_rlog_pages, + Opt_rlog_wakeup_cnt, + Opt_err +}; + +static const char pending_reads_file_name[] = INCFS_PENDING_READS_FILENAME; +static struct mem_range pending_reads_file_name_range = { + .data = (u8 *)pending_reads_file_name, + .len = ARRAY_SIZE(pending_reads_file_name) - 1 +}; + +static const char log_file_name[] = INCFS_LOG_FILENAME; +static struct mem_range log_file_name_range = { + .data = (u8 *)log_file_name, + .len = ARRAY_SIZE(log_file_name) - 1 +}; + +static const match_table_t option_tokens = { + { Opt_read_timeout, "read_timeout_ms=%u" }, + { Opt_readahead_pages, "readahead=%u" }, + { Opt_no_backing_file_cache, "no_bf_cache=%u" }, + { Opt_no_backing_file_readahead, "no_bf_readahead=%u" }, + { Opt_rlog_pages, "rlog_pages=%u" }, + { Opt_rlog_wakeup_cnt, "rlog_wakeup_cnt=%u" }, + { Opt_err, NULL } +}; + +static int parse_options(struct mount_options *opts, char *str) +{ + substring_t args[MAX_OPT_ARGS]; + int value; + char *position; + + if (opts == NULL) + return -EFAULT; + + opts->read_timeout_ms = 1000; /* Default: 1s */ + opts->readahead_pages = 10; + opts->read_log_pages = 2; + opts->read_log_wakeup_count = 10; + opts->no_backing_file_cache = false; + opts->no_backing_file_readahead = false; + if (str == NULL || *str == 0) + return 0; + + while ((position = strsep(&str, ",")) != NULL) { + int token; + + if (!*position) + continue; + + token = match_token(position, option_tokens, args); + + switch (token) { + case Opt_read_timeout: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->read_timeout_ms = value; + break; + case Opt_readahead_pages: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->readahead_pages = value; + break; + case Opt_no_backing_file_cache: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->no_backing_file_cache = (value != 0); + break; + case Opt_no_backing_file_readahead: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->no_backing_file_readahead = (value != 0); + break; + case Opt_rlog_pages: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->read_log_pages = value; + break; + case Opt_rlog_wakeup_cnt: + if (match_int(&args[0], &value)) + return -EINVAL; + opts->read_log_wakeup_count = value; + break; + default: + return -EINVAL; + } + } + + return 0; +} + +static struct super_block *file_superblock(struct file *f) +{ + struct inode *inode = file_inode(f); + + return inode->i_sb; +} + +static struct mount_info *get_mount_info(struct super_block *sb) +{ + struct mount_info *result = sb->s_fs_info; + + WARN_ON(!result); + return result; +} + +/* Read file size from the attribute. Quicker than reading the header */ +static u64 read_size_attr(struct dentry *backing_dentry) +{ + __le64 attr_value; + ssize_t bytes_read; + + bytes_read = vfs_getxattr(backing_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&attr_value, sizeof(attr_value)); + + if (bytes_read != sizeof(attr_value)) + return 0; + + return le64_to_cpu(attr_value); +} + +static int inode_test(struct inode *inode, void *opaque) +{ + struct inode_search *search = opaque; + struct inode_info *node = get_incfs_node(inode); + + if (!node) + return 0; + + if (search->backing_dentry) { + struct inode *backing_inode = d_inode(search->backing_dentry); + + return (node->n_backing_inode == backing_inode) && + inode->i_ino == search->ino; + } + return 1; +} + +static int inode_set(struct inode *inode, void *opaque) +{ + struct inode_search *search = opaque; + struct inode_info *node = get_incfs_node(inode); + + if (search->backing_dentry) { + /* It's a regular inode that has corresponding backing inode */ + struct dentry *backing_dentry = search->backing_dentry; + struct inode *backing_inode = d_inode(backing_dentry); + + fsstack_copy_attr_all(inode, backing_inode); + if (S_ISREG(inode->i_mode)) { + u64 size = read_size_attr(backing_dentry); + + inode->i_size = size; + inode->i_blocks = get_blocks_count_for_size(size); + inode->i_mapping->a_ops = &incfs_address_space_ops; + inode->i_op = &incfs_file_inode_ops; + inode->i_fop = &incfs_file_ops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_size = 0; + inode->i_blocks = 1; + inode->i_mapping->a_ops = &incfs_address_space_ops; + inode->i_op = &incfs_dir_inode_ops; + inode->i_fop = &incfs_dir_fops; + } else { + pr_warn_once("incfs: Unexpected inode type\n"); + return -EBADF; + } + + ihold(backing_inode); + node->n_backing_inode = backing_inode; + node->n_mount_info = get_mount_info(inode->i_sb); + inode->i_ctime = backing_inode->i_ctime; + inode->i_mtime = backing_inode->i_mtime; + inode->i_atime = backing_inode->i_atime; + inode->i_ino = backing_inode->i_ino; + if (backing_inode->i_ino < INCFS_START_INO_RANGE) { + pr_warn("incfs: ino conflict with backing FS %ld\n", + backing_inode->i_ino); + } + + return 0; + } else if (search->ino == INCFS_PENDING_READS_INODE) { + /* It's an inode for .pending_reads pseudo file. */ + + inode->i_ctime = (struct timespec){}; + inode->i_mtime = inode->i_ctime; + inode->i_atime = inode->i_ctime; + inode->i_size = 0; + inode->i_ino = INCFS_PENDING_READS_INODE; + inode->i_private = NULL; + + inode_init_owner(inode, NULL, S_IFREG | READ_WRITE_FILE_MODE); + + inode->i_op = &incfs_file_inode_ops; + inode->i_fop = &incfs_pending_read_file_ops; + + } else if (search->ino == INCFS_LOG_INODE) { + /* It's an inode for .log pseudo file. */ + + inode->i_ctime = (struct timespec){}; + inode->i_mtime = inode->i_ctime; + inode->i_atime = inode->i_ctime; + inode->i_size = 0; + inode->i_ino = INCFS_LOG_INODE; + inode->i_private = NULL; + + inode_init_owner(inode, NULL, S_IFREG | READ_WRITE_FILE_MODE); + + inode->i_op = &incfs_file_inode_ops; + inode->i_fop = &incfs_log_file_ops; + + } else { + /* Unknown inode requested. */ + return -EINVAL; + } + + return 0; +} + +static struct inode *fetch_regular_inode(struct super_block *sb, + struct dentry *backing_dentry) +{ + struct inode *backing_inode = d_inode(backing_dentry); + struct inode_search search = { + .ino = backing_inode->i_ino, + .backing_dentry = backing_dentry + }; + struct inode *inode = iget5_locked(sb, search.ino, inode_test, + inode_set, &search); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + return inode; +} + +static ssize_t pending_reads_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos) +{ + struct pending_reads_state *pr_state = f->private_data; + struct mount_info *mi = get_mount_info(file_superblock(f)); + struct incfs_pending_read_info *reads_buf = NULL; + size_t reads_to_collect = len / sizeof(*reads_buf); + int last_known_read_sn = READ_ONCE(pr_state->last_pending_read_sn); + int new_max_sn = last_known_read_sn; + int reads_collected = 0; + ssize_t result = 0; + int i = 0; + + if (!incfs_fresh_pending_reads_exist(mi, last_known_read_sn)) + return 0; + + reads_buf = (struct incfs_pending_read_info *)get_zeroed_page(GFP_NOFS); + if (!reads_buf) + return -ENOMEM; + + reads_to_collect = + min_t(size_t, PAGE_SIZE / sizeof(*reads_buf), reads_to_collect); + + reads_collected = incfs_collect_pending_reads( + mi, last_known_read_sn, reads_buf, reads_to_collect); + if (reads_collected < 0) { + result = reads_collected; + goto out; + } + + for (i = 0; i < reads_collected; i++) + if (reads_buf[i].serial_number > new_max_sn) + new_max_sn = reads_buf[i].serial_number; + + /* + * Just to make sure that we don't accidentally copy more data + * to reads buffer than userspace can handle. + */ + reads_collected = min_t(size_t, reads_collected, reads_to_collect); + result = reads_collected * sizeof(*reads_buf); + + /* Copy reads info to the userspace buffer */ + if (copy_to_user(buf, reads_buf, result)) { + result = -EFAULT; + goto out; + } + + WRITE_ONCE(pr_state->last_pending_read_sn, new_max_sn); + *ppos = 0; +out: + if (reads_buf) + free_page((unsigned long)reads_buf); + return result; +} + + +static __poll_t pending_reads_poll(struct file *file, poll_table *wait) +{ + struct pending_reads_state *state = file->private_data; + struct mount_info *mi = get_mount_info(file_superblock(file)); + __poll_t ret = 0; + + poll_wait(file, &mi->mi_pending_reads_notif_wq, wait); + if (incfs_fresh_pending_reads_exist(mi, + state->last_pending_read_sn)) + ret = EPOLLIN | EPOLLRDNORM; + + return ret; +} + +static int pending_reads_open(struct inode *inode, struct file *file) +{ + struct pending_reads_state *state = NULL; + + state = kzalloc(sizeof(*state), GFP_NOFS); + if (!state) + return -ENOMEM; + + file->private_data = state; + return 0; +} + +static int pending_reads_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static struct inode *fetch_pending_reads_inode(struct super_block *sb) +{ + struct inode_search search = { + .ino = INCFS_PENDING_READS_INODE + }; + struct inode *inode = iget5_locked(sb, search.ino, inode_test, + inode_set, &search); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + return inode; +} + +static int log_open(struct inode *inode, struct file *file) +{ + struct log_file_state *log_state = NULL; + struct mount_info *mi = get_mount_info(file_superblock(file)); + + log_state = kzalloc(sizeof(*log_state), GFP_NOFS); + if (!log_state) + return -ENOMEM; + + log_state->state = incfs_get_log_state(mi); + file->private_data = log_state; + return 0; +} + +static int log_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + +static ssize_t log_read(struct file *f, char __user *buf, size_t len, + loff_t *ppos) +{ + struct log_file_state *log_state = f->private_data; + struct mount_info *mi = get_mount_info(file_superblock(f)); + struct incfs_pending_read_info *reads_buf = + (struct incfs_pending_read_info *)__get_free_page(GFP_NOFS); + size_t reads_to_collect = len / sizeof(*reads_buf); + size_t reads_per_page = PAGE_SIZE / sizeof(*reads_buf); + int total_reads_collected = 0; + ssize_t result = 0; + + if (!reads_buf) + return -ENOMEM; + + reads_to_collect = min_t(size_t, mi->mi_log.rl_size, reads_to_collect); + while (reads_to_collect > 0) { + struct read_log_state next_state = READ_ONCE(log_state->state); + int reads_collected = incfs_collect_logged_reads( + mi, &next_state, reads_buf, + min_t(size_t, reads_to_collect, reads_per_page)); + if (reads_collected <= 0) { + result = total_reads_collected ? + total_reads_collected * + sizeof(*reads_buf) : + reads_collected; + goto out; + } + if (copy_to_user(buf, reads_buf, + reads_collected * sizeof(*reads_buf))) { + result = total_reads_collected ? + total_reads_collected * + sizeof(*reads_buf) : + -EFAULT; + goto out; + } + + WRITE_ONCE(log_state->state, next_state); + total_reads_collected += reads_collected; + buf += reads_collected * sizeof(*reads_buf); + reads_to_collect -= reads_collected; + } + + result = total_reads_collected * sizeof(*reads_buf); + *ppos = 0; +out: + if (reads_buf) + free_page((unsigned long)reads_buf); + return result; +} + +static __poll_t log_poll(struct file *file, poll_table *wait) +{ + struct log_file_state *log_state = file->private_data; + struct mount_info *mi = get_mount_info(file_superblock(file)); + int count; + __poll_t ret = 0; + + poll_wait(file, &mi->mi_log.ml_notif_wq, wait); + count = incfs_get_uncollected_logs_count(mi, log_state->state); + if (count >= mi->mi_options.read_log_wakeup_count) + ret = EPOLLIN | EPOLLRDNORM; + + return ret; +} + +static struct inode *fetch_log_inode(struct super_block *sb) +{ + struct inode_search search = { + .ino = INCFS_LOG_INODE + }; + struct inode *inode = iget5_locked(sb, search.ino, inode_test, + inode_set, &search); + + if (!inode) + return ERR_PTR(-ENOMEM); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + + return inode; +} + +static int iterate_incfs_dir(struct file *file, struct dir_context *ctx) +{ + struct dir_file *dir = get_incfs_dir_file(file); + int error = 0; + struct mount_info *mi = get_mount_info(file_superblock(file)); + bool root; + + if (!dir) { + error = -EBADF; + goto out; + } + + root = dir->backing_dir->f_inode + == d_inode(mi->mi_backing_dir_path.dentry); + + if (root && ctx->pos == 0) { + if (!dir_emit(ctx, pending_reads_file_name, + ARRAY_SIZE(pending_reads_file_name) - 1, + INCFS_PENDING_READS_INODE, DT_REG)) { + error = -EINVAL; + goto out; + } + ctx->pos++; + } + + if (root && ctx->pos == 1) { + if (!dir_emit(ctx, log_file_name, + ARRAY_SIZE(log_file_name) - 1, + INCFS_LOG_INODE, DT_REG)) { + error = -EINVAL; + goto out; + } + ctx->pos++; + } + + ctx->pos -= 2; + error = iterate_dir(dir->backing_dir, ctx); + ctx->pos += 2; + file->f_pos = dir->backing_dir->f_pos; +out: + if (error) + pr_warn("incfs: %s %s %d\n", __func__, + file->f_path.dentry->d_name.name, error); + return error; +} + +static int incfs_init_dentry(struct dentry *dentry, struct path *path) +{ + struct dentry_info *d_info = NULL; + + if (!dentry || !path) + return -EFAULT; + + d_info = kzalloc(sizeof(*d_info), GFP_NOFS); + if (!d_info) + return -ENOMEM; + + d_info->backing_path = *path; + path_get(path); + + dentry->d_fsdata = d_info; + return 0; +} + +static struct dentry *incfs_lookup_dentry(struct dentry *parent, + const char *name) +{ + struct inode *inode; + struct dentry *result = NULL; + + if (!parent) + return ERR_PTR(-EFAULT); + + inode = d_inode(parent); + inode_lock_nested(inode, I_MUTEX_PARENT); + result = lookup_one_len(name, parent, strlen(name)); + inode_unlock(inode); + + if (IS_ERR(result)) + pr_warn("%s err:%ld\n", __func__, PTR_ERR(result)); + + return result; +} + +static struct dentry *open_or_create_index_dir(struct dentry *backing_dir) +{ + static const char name[] = ".index"; + struct dentry *index_dentry; + struct inode *backing_inode = d_inode(backing_dir); + int err = 0; + + index_dentry = incfs_lookup_dentry(backing_dir, name); + if (!index_dentry) { + return ERR_PTR(-EINVAL); + } else if (IS_ERR(index_dentry)) { + return index_dentry; + } else if (d_really_is_positive(index_dentry)) { + /* Index already exists. */ + return index_dentry; + } + + /* Index needs to be created. */ + inode_lock_nested(backing_inode, I_MUTEX_PARENT); + err = vfs_mkdir(backing_inode, index_dentry, 0777); + inode_unlock(backing_inode); + + if (err) + return ERR_PTR(err); + + if (!d_really_is_positive(index_dentry)) { + dput(index_dentry); + return ERR_PTR(-EINVAL); + } + + return index_dentry; +} + +static int read_single_page(struct file *f, struct page *page) +{ + loff_t offset = 0; + loff_t size = 0; + ssize_t bytes_to_read = 0; + ssize_t read_result = 0; + struct data_file *df = get_incfs_data_file(f); + int result = 0; + void *page_start = kmap(page); + int block_index; + int timeout_ms; + + if (!df) + return -EBADF; + + offset = page_offset(page); + block_index = offset / INCFS_DATA_FILE_BLOCK_SIZE; + size = df->df_size; + timeout_ms = df->df_mount_info->mi_options.read_timeout_ms; + + if (offset < size) { + struct mem_range tmp = { + .len = 2 * INCFS_DATA_FILE_BLOCK_SIZE + }; + + tmp.data = (u8 *)__get_free_pages(GFP_NOFS, get_order(tmp.len)); + bytes_to_read = min_t(loff_t, size - offset, PAGE_SIZE); + read_result = incfs_read_data_file_block( + range(page_start, bytes_to_read), df, block_index, + timeout_ms, tmp); + + free_pages((unsigned long)tmp.data, get_order(tmp.len)); + } else { + bytes_to_read = 0; + read_result = 0; + } + + if (read_result < 0) + result = read_result; + else if (read_result < PAGE_SIZE) + zero_user(page, read_result, PAGE_SIZE - read_result); + + if (result == 0) + SetPageUptodate(page); + else + SetPageError(page); + + flush_dcache_page(page); + kunmap(page); + unlock_page(page); + return result; +} + +static char *file_id_to_str(incfs_uuid_t id) +{ + char *result = kmalloc(1 + sizeof(id.bytes) * 2, GFP_NOFS); + char *end; + + if (!result) + return NULL; + + end = bin2hex(result, id.bytes, sizeof(id.bytes)); + *end = 0; + return result; +} + +static struct mem_range incfs_copy_signature_info_from_user(u8 __user *original, + u64 size) +{ + u8 *result; + + if (!original) + return range(NULL, 0); + + if (size > INCFS_MAX_SIGNATURE_SIZE) + return range(ERR_PTR(-EFAULT), 0); + + result = kzalloc(size, GFP_NOFS); + if (!result) + return range(ERR_PTR(-ENOMEM), 0); + + if (copy_from_user(result, original, size)) { + kfree(result); + return range(ERR_PTR(-EFAULT), 0); + } + + return range(result, size); +} + +static int init_new_file(struct mount_info *mi, struct dentry *dentry, + incfs_uuid_t *uuid, u64 size, struct mem_range attr, + u8 __user *user_signature_info, u64 signature_size) +{ + struct path path = {}; + struct file *new_file; + int error = 0; + struct backing_file_context *bfc = NULL; + u32 block_count; + struct mem_range raw_signature = { NULL }; + struct mtree *hash_tree = NULL; + + if (!mi || !dentry || !uuid) + return -EFAULT; + + /* Resize newly created file to its true size. */ + path = (struct path) { + .mnt = mi->mi_backing_dir_path.mnt, + .dentry = dentry + }; + new_file = dentry_open(&path, O_RDWR | O_NOATIME, mi->mi_owner); + + if (IS_ERR(new_file)) { + error = PTR_ERR(new_file); + goto out; + } + + bfc = incfs_alloc_bfc(new_file); + if (IS_ERR(bfc)) { + error = PTR_ERR(bfc); + bfc = NULL; + goto out; + } + + mutex_lock(&bfc->bc_mutex); + error = incfs_write_fh_to_backing_file(bfc, uuid, size); + if (error) + goto out; + + block_count = (u32)get_blocks_count_for_size(size); + error = incfs_write_blockmap_to_backing_file(bfc, block_count, NULL); + if (error) + goto out; + + /* This fill has data, reserve space for the block map. */ + if (block_count > 0) { + error = incfs_write_blockmap_to_backing_file( + bfc, block_count, NULL); + if (error) + goto out; + } + + if (attr.data && attr.len) { + error = incfs_write_file_attr_to_backing_file(bfc, + attr, NULL); + if (error) + goto out; + } + + if (user_signature_info) { + raw_signature = incfs_copy_signature_info_from_user( + user_signature_info, signature_size); + + if (IS_ERR(raw_signature.data)) { + error = PTR_ERR(raw_signature.data); + raw_signature.data = NULL; + goto out; + } + + hash_tree = incfs_alloc_mtree(raw_signature, block_count); + if (IS_ERR(hash_tree)) { + error = PTR_ERR(hash_tree); + hash_tree = NULL; + goto out; + } + + error = incfs_write_signature_to_backing_file( + bfc, raw_signature, hash_tree->hash_tree_area_size); + if (error) + goto out; + } + +out: + if (bfc) { + mutex_unlock(&bfc->bc_mutex); + incfs_free_bfc(bfc); + } + incfs_free_mtree(hash_tree); + kfree(raw_signature.data); + + if (error) + pr_debug("incfs: %s error: %d\n", __func__, error); + return error; +} + +static int incfs_link(struct dentry *what, struct dentry *where) +{ + struct dentry *parent_dentry = dget_parent(where); + struct inode *pinode = d_inode(parent_dentry); + int error = 0; + + inode_lock_nested(pinode, I_MUTEX_PARENT); + error = vfs_link(what, pinode, where, NULL); + inode_unlock(pinode); + + dput(parent_dentry); + return error; +} + +static int incfs_unlink(struct dentry *dentry) +{ + struct dentry *parent_dentry = dget_parent(dentry); + struct inode *pinode = d_inode(parent_dentry); + int error = 0; + + inode_lock_nested(pinode, I_MUTEX_PARENT); + error = vfs_unlink(pinode, dentry, NULL); + inode_unlock(pinode); + + dput(parent_dentry); + return error; +} + +static int incfs_rmdir(struct dentry *dentry) +{ + struct dentry *parent_dentry = dget_parent(dentry); + struct inode *pinode = d_inode(parent_dentry); + int error = 0; + + inode_lock_nested(pinode, I_MUTEX_PARENT); + error = vfs_rmdir(pinode, dentry); + inode_unlock(pinode); + + dput(parent_dentry); + return error; +} + +static int dir_relative_path_resolve( + struct mount_info *mi, + const char __user *relative_path, + struct path *result_path) +{ + struct path *base_path = &mi->mi_backing_dir_path; + int dir_fd = get_unused_fd_flags(0); + struct file *dir_f = NULL; + int error = 0; + + if (dir_fd < 0) + return dir_fd; + + dir_f = dentry_open(base_path, O_RDONLY | O_NOATIME, mi->mi_owner); + + if (IS_ERR(dir_f)) { + error = PTR_ERR(dir_f); + goto out; + } + fd_install(dir_fd, dir_f); + + if (!relative_path) { + /* No relative path given, just return the base dir. */ + *result_path = *base_path; + path_get(result_path); + goto out; + } + + error = user_path_at_empty(dir_fd, relative_path, + LOOKUP_FOLLOW | LOOKUP_DIRECTORY, result_path, NULL); + +out: + sys_close(dir_fd); + if (error) + pr_debug("incfs: %s %d\n", __func__, error); + return error; +} + +static int validate_name(char *file_name) +{ + struct mem_range name = range(file_name, strlen(file_name)); + int i = 0; + + if (name.len > INCFS_MAX_NAME_LEN) + return -ENAMETOOLONG; + + if (incfs_equal_ranges(pending_reads_file_name_range, name)) + return -EINVAL; + + for (i = 0; i < name.len; i++) + if (name.data[i] == '/') + return -EINVAL; + + return 0; +} + +static int chmod(struct dentry *dentry, umode_t mode) +{ + struct inode *inode = dentry->d_inode; + struct inode *delegated_inode = NULL; + struct iattr newattrs; + int error; + +retry_deleg: + inode_lock(inode); + newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); + newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; + error = notify_change(dentry, &newattrs, &delegated_inode); + inode_unlock(inode); + if (delegated_inode) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry_deleg; + } + return error; +} + +static long ioctl_create_file(struct mount_info *mi, + struct incfs_new_file_args __user *usr_args) +{ + struct incfs_new_file_args args; + char *file_id_str = NULL; + struct dentry *index_file_dentry = NULL; + struct dentry *named_file_dentry = NULL; + struct path parent_dir_path = {}; + struct inode *index_dir_inode = NULL; + __le64 size_attr_value = 0; + char *file_name = NULL; + char *attr_value = NULL; + int error = 0; + bool locked = false; + + if (!mi || !mi->mi_index_dir) { + error = -EFAULT; + goto out; + } + + if (copy_from_user(&args, usr_args, sizeof(args)) > 0) { + error = -EFAULT; + goto out; + } + + file_name = strndup_user(u64_to_user_ptr(args.file_name), PATH_MAX); + if (IS_ERR(file_name)) { + error = PTR_ERR(file_name); + file_name = NULL; + goto out; + } + + error = validate_name(file_name); + if (error) + goto out; + + file_id_str = file_id_to_str(args.file_id); + if (!file_id_str) { + error = -ENOMEM; + goto out; + } + + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (error) + goto out; + locked = true; + + /* Find a directory to put the file into. */ + error = dir_relative_path_resolve(mi, + u64_to_user_ptr(args.directory_path), + &parent_dir_path); + if (error) + goto out; + + if (parent_dir_path.dentry == mi->mi_index_dir) { + /* Can't create a file directly inside .index */ + error = -EBUSY; + goto out; + } + + /* Look up a dentry in the parent dir. It should be negative. */ + named_file_dentry = incfs_lookup_dentry(parent_dir_path.dentry, + file_name); + if (!named_file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(named_file_dentry)) { + error = PTR_ERR(named_file_dentry); + named_file_dentry = NULL; + goto out; + } + if (d_really_is_positive(named_file_dentry)) { + /* File with this path already exists. */ + error = -EEXIST; + goto out; + } + /* Look up a dentry in the .index dir. It should be negative. */ + index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); + if (!index_file_dentry) { + error = -EFAULT; + goto out; + } + if (IS_ERR(index_file_dentry)) { + error = PTR_ERR(index_file_dentry); + index_file_dentry = NULL; + goto out; + } + if (d_really_is_positive(index_file_dentry)) { + /* File with this ID already exists in index. */ + error = -EEXIST; + goto out; + } + + /* Creating a file in the .index dir. */ + index_dir_inode = d_inode(mi->mi_index_dir); + inode_lock_nested(index_dir_inode, I_MUTEX_PARENT); + error = vfs_create(index_dir_inode, index_file_dentry, args.mode | 0222, + true); + inode_unlock(index_dir_inode); + + if (error) + goto out; + if (!d_really_is_positive(index_file_dentry)) { + error = -EINVAL; + goto out; + } + + error = chmod(index_file_dentry, args.mode | 0222); + if (error) { + pr_debug("incfs: chmod err: %d\n", error); + goto delete_index_file; + } + + /* Save the file's ID as an xattr for easy fetching in future. */ + error = vfs_setxattr(index_file_dentry, INCFS_XATTR_ID_NAME, + file_id_str, strlen(file_id_str), XATTR_CREATE); + if (error) { + pr_debug("incfs: vfs_setxattr err:%d\n", error); + goto delete_index_file; + } + + /* Save the file's size as an xattr for easy fetching in future. */ + size_attr_value = cpu_to_le64(args.size); + error = vfs_setxattr(index_file_dentry, INCFS_XATTR_SIZE_NAME, + (char *)&size_attr_value, sizeof(size_attr_value), + XATTR_CREATE); + if (error) { + pr_debug("incfs: vfs_setxattr err:%d\n", error); + goto delete_index_file; + } + + /* Save the file's attribute as an xattr */ + if (args.file_attr_len && args.file_attr) { + if (args.file_attr_len > INCFS_MAX_FILE_ATTR_SIZE) { + error = -E2BIG; + goto delete_index_file; + } + + attr_value = kmalloc(args.file_attr_len, GFP_NOFS); + if (!attr_value) { + error = -ENOMEM; + goto delete_index_file; + } + + if (copy_from_user(attr_value, + u64_to_user_ptr(args.file_attr), + args.file_attr_len) > 0) { + error = -EFAULT; + goto delete_index_file; + } + + error = vfs_setxattr(index_file_dentry, + INCFS_XATTR_METADATA_NAME, + attr_value, args.file_attr_len, + XATTR_CREATE); + + if (error) + goto delete_index_file; + } + + /* Initializing a newly created file. */ + error = init_new_file(mi, index_file_dentry, &args.file_id, args.size, + range(attr_value, args.file_attr_len), + (u8 __user *)args.signature_info, + args.signature_size); + if (error) + goto delete_index_file; + + /* Linking a file with it's real name from the requested dir. */ + error = incfs_link(index_file_dentry, named_file_dentry); + + if (!error) + goto out; + +delete_index_file: + incfs_unlink(index_file_dentry); + +out: + if (error) + pr_debug("incfs: %s err:%d\n", __func__, error); + + kfree(file_id_str); + kfree(file_name); + kfree(attr_value); + dput(named_file_dentry); + dput(index_file_dentry); + path_put(&parent_dir_path); + if (locked) + mutex_unlock(&mi->mi_dir_struct_mutex); + return error; +} + +static long ioctl_fill_blocks(struct file *f, void __user *arg) +{ + struct incfs_fill_blocks __user *usr_fill_blocks = arg; + struct incfs_fill_blocks fill_blocks; + struct incfs_fill_block *usr_fill_block_array; + struct data_file *df = get_incfs_data_file(f); + const ssize_t data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE; + u8 *data_buf = NULL; + ssize_t error = 0; + int i = 0; + + if (!df) + return -EBADF; + + if ((uintptr_t)f->private_data != CAN_FILL) + return -EPERM; + + if (copy_from_user(&fill_blocks, usr_fill_blocks, sizeof(fill_blocks))) + return -EFAULT; + + usr_fill_block_array = u64_to_user_ptr(fill_blocks.fill_blocks); + data_buf = (u8 *)__get_free_pages(GFP_NOFS, get_order(data_buf_size)); + if (!data_buf) + return -ENOMEM; + + for (i = 0; i < fill_blocks.count; i++) { + struct incfs_fill_block fill_block = {}; + + if (copy_from_user(&fill_block, &usr_fill_block_array[i], + sizeof(fill_block)) > 0) { + error = -EFAULT; + break; + } + + if (fill_block.data_len > data_buf_size) { + error = -E2BIG; + break; + } + + if (copy_from_user(data_buf, u64_to_user_ptr(fill_block.data), + fill_block.data_len) > 0) { + error = -EFAULT; + break; + } + fill_block.data = 0; /* To make sure nobody uses it. */ + if (fill_block.flags & INCFS_BLOCK_FLAGS_HASH) { + error = incfs_process_new_hash_block(df, &fill_block, + data_buf); + } else { + error = incfs_process_new_data_block(df, &fill_block, + data_buf); + } + if (error) + break; + } + + if (data_buf) + free_pages((unsigned long)data_buf, get_order(data_buf_size)); + + /* + * Only report the error if no records were processed, otherwise + * just return how many were processed successfully. + */ + if (i == 0) + return error; + + return i; +} + +static long ioctl_permit_fill(struct file *f, void __user *arg) +{ + struct incfs_permit_fill __user *usr_permit_fill = arg; + struct incfs_permit_fill permit_fill; + long error = 0; + struct file *file = 0; + + if (f->f_op != &incfs_pending_read_file_ops) + return -EPERM; + + if (copy_from_user(&permit_fill, usr_permit_fill, sizeof(permit_fill))) + return -EFAULT; + + file = fget(permit_fill.file_descriptor); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (file->f_op != &incfs_file_ops) { + error = -EPERM; + goto out; + } + + if (file->f_inode->i_sb != f->f_inode->i_sb) { + error = -EPERM; + goto out; + } + + switch ((uintptr_t)file->private_data) { + case CANT_FILL: + file->private_data = (void *)CAN_FILL; + break; + + case CAN_FILL: + pr_debug("CAN_FILL already set"); + break; + + default: + pr_warn("Invalid file private data"); + error = -EFAULT; + goto out; + } + +out: + fput(file); + return error; +} + +static long ioctl_read_file_signature(struct file *f, void __user *arg) +{ + struct incfs_get_file_sig_args __user *args_usr_ptr = arg; + struct incfs_get_file_sig_args args = {}; + u8 *sig_buffer = NULL; + size_t sig_buf_size = 0; + int error = 0; + int read_result = 0; + struct data_file *df = get_incfs_data_file(f); + + if (!df) + return -EINVAL; + + if (copy_from_user(&args, args_usr_ptr, sizeof(args)) > 0) + return -EINVAL; + + sig_buf_size = args.file_signature_buf_size; + if (sig_buf_size > INCFS_MAX_SIGNATURE_SIZE) + return -E2BIG; + + sig_buffer = kzalloc(sig_buf_size, GFP_NOFS); + if (!sig_buffer) + return -ENOMEM; + + read_result = incfs_read_file_signature(df, + range(sig_buffer, sig_buf_size)); + + if (read_result < 0) { + error = read_result; + goto out; + } + + if (copy_to_user(u64_to_user_ptr(args.file_signature), sig_buffer, + read_result)) { + error = -EFAULT; + goto out; + } + + args.file_signature_len_out = read_result; + if (copy_to_user(args_usr_ptr, &args, sizeof(args))) + error = -EFAULT; + +out: + kfree(sig_buffer); + + return error; +} + +static long dispatch_ioctl(struct file *f, unsigned int req, unsigned long arg) +{ + struct mount_info *mi = get_mount_info(file_superblock(f)); + + switch (req) { + case INCFS_IOC_CREATE_FILE: + return ioctl_create_file(mi, (void __user *)arg); + case INCFS_IOC_FILL_BLOCKS: + return ioctl_fill_blocks(f, (void __user *)arg); + case INCFS_IOC_PERMIT_FILL: + return ioctl_permit_fill(f, (void __user *)arg); + case INCFS_IOC_READ_FILE_SIGNATURE: + return ioctl_read_file_signature(f, (void __user *)arg); + default: + return -EINVAL; + } +} + +static struct dentry *dir_lookup(struct inode *dir_inode, struct dentry *dentry, + unsigned int flags) +{ + struct mount_info *mi = get_mount_info(dir_inode->i_sb); + struct dentry *dir_dentry = NULL; + struct dentry *backing_dentry = NULL; + struct path dir_backing_path = {}; + struct inode_info *dir_info = get_incfs_node(dir_inode); + struct mem_range name_range = + range((u8 *)dentry->d_name.name, dentry->d_name.len); + int err = 0; + + if (d_inode(mi->mi_backing_dir_path.dentry) == + dir_info->n_backing_inode) { + /* We do lookup in the FS root. Show pseudo files. */ + + if (incfs_equal_ranges(pending_reads_file_name_range, + name_range)) { + struct inode *inode = fetch_pending_reads_inode( + dir_inode->i_sb); + + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + + d_add(dentry, inode); + goto out; + } + + if (incfs_equal_ranges(log_file_name_range, name_range)) { + struct inode *inode = fetch_log_inode( + dir_inode->i_sb); + + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + + d_add(dentry, inode); + goto out; + } + } + + dir_dentry = dget_parent(dentry); + get_incfs_backing_path(dir_dentry, &dir_backing_path); + backing_dentry = incfs_lookup_dentry(dir_backing_path.dentry, + dentry->d_name.name); + + if (!backing_dentry || IS_ERR(backing_dentry)) { + err = IS_ERR(backing_dentry) + ? PTR_ERR(backing_dentry) + : -EFAULT; + backing_dentry = NULL; + goto out; + } else { + struct inode *inode = NULL; + struct path backing_path = { + .mnt = dir_backing_path.mnt, + .dentry = backing_dentry + }; + + err = incfs_init_dentry(dentry, &backing_path); + if (err) + goto out; + + if (!d_really_is_positive(backing_dentry)) { + /* + * No such entry found in the backing dir. + * Create a negative entry. + */ + d_add(dentry, NULL); + err = 0; + goto out; + } + + if (d_inode(backing_dentry)->i_sb != + dir_info->n_backing_inode->i_sb) { + /* + * Somehow after the path lookup we ended up in a + * different fs mount. If we keep going it's going + * to end badly. + */ + err = -EXDEV; + goto out; + } + + inode = fetch_regular_inode(dir_inode->i_sb, backing_dentry); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + + d_add(dentry, inode); + } + +out: + dput(dir_dentry); + dput(backing_dentry); + path_put(&dir_backing_path); + if (err) + pr_debug("incfs: %s %s %d\n", __func__, + dentry->d_name.name, err); + return ERR_PTR(err); +} + +static int dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct mount_info *mi = get_mount_info(dir->i_sb); + struct inode_info *dir_node = get_incfs_node(dir); + struct dentry *backing_dentry = NULL; + struct path backing_path = {}; + int err = 0; + + + if (!mi || !dir_node || !dir_node->n_backing_inode) + return -EBADF; + + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (err) + return err; + + get_incfs_backing_path(dentry, &backing_path); + backing_dentry = backing_path.dentry; + + if (!backing_dentry) { + err = -EBADF; + goto out; + } + + if (backing_dentry->d_parent == mi->mi_index_dir) { + /* Can't create a subdir inside .index */ + err = -EBUSY; + goto out; + } + + inode_lock_nested(dir_node->n_backing_inode, I_MUTEX_PARENT); + err = vfs_mkdir(dir_node->n_backing_inode, backing_dentry, mode | 0222); + inode_unlock(dir_node->n_backing_inode); + if (!err) { + struct inode *inode = NULL; + + if (d_really_is_negative(backing_dentry)) { + err = -EINVAL; + goto out; + } + + inode = fetch_regular_inode(dir->i_sb, backing_dentry); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + d_instantiate(dentry, inode); + } + +out: + if (d_really_is_negative(dentry)) + d_drop(dentry); + path_put(&backing_path); + mutex_unlock(&mi->mi_dir_struct_mutex); + if (err) + pr_debug("incfs: %s err:%d\n", __func__, err); + return err; +} + +/* Delete file referenced by backing_dentry and also its hardlink from .index */ +static int final_file_delete(struct mount_info *mi, + struct dentry *backing_dentry) +{ + struct dentry *index_file_dentry = NULL; + /* 2 chars per byte of file ID + 1 char for \0 */ + char file_id_str[2 * sizeof(incfs_uuid_t) + 1] = {0}; + ssize_t uuid_size = 0; + int error = 0; + + WARN_ON(!mutex_is_locked(&mi->mi_dir_struct_mutex)); + uuid_size = vfs_getxattr(backing_dentry, INCFS_XATTR_ID_NAME, + file_id_str, 2 * sizeof(incfs_uuid_t)); + if (uuid_size < 0) { + error = uuid_size; + goto out; + } + + if (uuid_size != 2 * sizeof(incfs_uuid_t)) { + error = -EBADMSG; + goto out; + } + + index_file_dentry = incfs_lookup_dentry(mi->mi_index_dir, file_id_str); + if (IS_ERR(index_file_dentry)) { + error = PTR_ERR(index_file_dentry); + goto out; + } + + error = incfs_unlink(backing_dentry); + if (error) + goto out; + + if (d_really_is_positive(index_file_dentry)) + error = incfs_unlink(index_file_dentry); +out: + if (error) + pr_debug("incfs: delete_file_from_index err:%d\n", error); + return error; +} + +static int dir_unlink(struct inode *dir, struct dentry *dentry) +{ + struct mount_info *mi = get_mount_info(dir->i_sb); + struct path backing_path = {}; + struct kstat stat; + int err = 0; + + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (err) + return err; + + get_incfs_backing_path(dentry, &backing_path); + if (!backing_path.dentry) { + err = -EBADF; + goto out; + } + + if (backing_path.dentry->d_parent == mi->mi_index_dir) { + /* Direct unlink from .index are not allowed. */ + err = -EBUSY; + goto out; + } + + err = vfs_getattr(&backing_path, &stat, STATX_NLINK, + AT_STATX_SYNC_AS_STAT); + if (err) + goto out; + + if (stat.nlink == 2) { + /* + * This is the last named link to this file. The only one left + * is in .index. Remove them both now. + */ + err = final_file_delete(mi, backing_path.dentry); + } else { + /* There are other links to this file. Remove just this one. */ + err = incfs_unlink(backing_path.dentry); + } + + d_drop(dentry); +out: + path_put(&backing_path); + if (err) + pr_debug("incfs: %s err:%d\n", __func__, err); + mutex_unlock(&mi->mi_dir_struct_mutex); + return err; +} + +static int dir_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry) +{ + struct mount_info *mi = get_mount_info(dir->i_sb); + struct path backing_old_path = {}; + struct path backing_new_path = {}; + int error = 0; + + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (error) + return error; + + get_incfs_backing_path(old_dentry, &backing_old_path); + get_incfs_backing_path(new_dentry, &backing_new_path); + + if (backing_new_path.dentry->d_parent == mi->mi_index_dir) { + /* Can't link to .index */ + error = -EBUSY; + goto out; + } + + error = incfs_link(backing_old_path.dentry, backing_new_path.dentry); + if (!error) { + struct inode *inode = NULL; + struct dentry *bdentry = backing_new_path.dentry; + + if (d_really_is_negative(bdentry)) { + error = -EINVAL; + goto out; + } + + inode = fetch_regular_inode(dir->i_sb, bdentry); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + goto out; + } + d_instantiate(new_dentry, inode); + } + +out: + path_put(&backing_old_path); + path_put(&backing_new_path); + if (error) + pr_debug("incfs: %s err:%d\n", __func__, error); + mutex_unlock(&mi->mi_dir_struct_mutex); + return error; +} + +static int dir_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct mount_info *mi = get_mount_info(dir->i_sb); + struct path backing_path = {}; + int err = 0; + + err = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (err) + return err; + + get_incfs_backing_path(dentry, &backing_path); + if (!backing_path.dentry) { + err = -EBADF; + goto out; + } + + if (backing_path.dentry == mi->mi_index_dir) { + /* Can't delete .index */ + err = -EBUSY; + goto out; + } + + err = incfs_rmdir(backing_path.dentry); + if (!err) + d_drop(dentry); +out: + path_put(&backing_path); + if (err) + pr_debug("incfs: %s err:%d\n", __func__, err); + mutex_unlock(&mi->mi_dir_struct_mutex); + return err; +} + +static int dir_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct mount_info *mi = get_mount_info(old_dir->i_sb); + struct dentry *backing_old_dentry; + struct dentry *backing_new_dentry; + struct dentry *backing_old_dir_dentry; + struct dentry *backing_new_dir_dentry; + struct inode *target_inode; + struct dentry *trap; + int error = 0; + + error = mutex_lock_interruptible(&mi->mi_dir_struct_mutex); + if (error) + return error; + + backing_old_dentry = get_incfs_dentry(old_dentry)->backing_path.dentry; + backing_new_dentry = get_incfs_dentry(new_dentry)->backing_path.dentry; + dget(backing_old_dentry); + dget(backing_new_dentry); + + backing_old_dir_dentry = dget_parent(backing_old_dentry); + backing_new_dir_dentry = dget_parent(backing_new_dentry); + target_inode = d_inode(new_dentry); + + if (backing_old_dir_dentry == mi->mi_index_dir) { + /* Direct moves from .index are not allowed. */ + error = -EBUSY; + goto out; + } + + trap = lock_rename(backing_old_dir_dentry, backing_new_dir_dentry); + + if (trap == backing_old_dentry) { + error = -EINVAL; + goto unlock_out; + } + if (trap == backing_new_dentry) { + error = -ENOTEMPTY; + goto unlock_out; + } + + error = vfs_rename(d_inode(backing_old_dir_dentry), backing_old_dentry, + d_inode(backing_new_dir_dentry), backing_new_dentry, + NULL, 0); + if (error) + goto unlock_out; + if (target_inode) + fsstack_copy_attr_all(target_inode, + get_incfs_node(target_inode)->n_backing_inode); + fsstack_copy_attr_all(new_dir, d_inode(backing_new_dir_dentry)); + if (new_dir != old_dir) + fsstack_copy_attr_all(old_dir, d_inode(backing_old_dir_dentry)); + +unlock_out: + unlock_rename(backing_old_dir_dentry, backing_new_dir_dentry); + +out: + dput(backing_new_dir_dentry); + dput(backing_old_dir_dentry); + dput(backing_new_dentry); + dput(backing_old_dentry); + + mutex_unlock(&mi->mi_dir_struct_mutex); + if (error) + pr_debug("incfs: %s err:%d\n", __func__, error); + return error; +} + + +static int file_open(struct inode *inode, struct file *file) +{ + struct mount_info *mi = get_mount_info(inode->i_sb); + struct file *backing_file = NULL; + struct path backing_path = {}; + int err = 0; + + get_incfs_backing_path(file->f_path.dentry, &backing_path); + backing_file = dentry_open(&backing_path, O_RDWR | O_NOATIME, + mi->mi_owner); + path_put(&backing_path); + + if (IS_ERR(backing_file)) { + err = PTR_ERR(backing_file); + backing_file = NULL; + goto out; + } + + if (S_ISREG(inode->i_mode)) { + err = make_inode_ready_for_data_ops(mi, inode, backing_file); + file->private_data = (void *)CANT_FILL; + } else if (S_ISDIR(inode->i_mode)) { + struct dir_file *dir = NULL; + + dir = incfs_open_dir_file(mi, backing_file); + if (IS_ERR(dir)) + err = PTR_ERR(dir); + else + file->private_data = dir; + } else + err = -EBADF; + +out: + if (err) + pr_debug("incfs: %s name:%s err: %d\n", __func__, + file->f_path.dentry->d_name.name, err); + if (backing_file) + fput(backing_file); + return err; +} + +static int file_release(struct inode *inode, struct file *file) +{ + if (S_ISREG(inode->i_mode)) { + /* Do nothing. + * data_file is released only by inode eviction. + */ + } else if (S_ISDIR(inode->i_mode)) { + struct dir_file *dir = get_incfs_dir_file(file); + + incfs_free_dir_file(dir); + } + + return 0; +} + +static int dentry_revalidate(struct dentry *d, unsigned int flags) +{ + struct path backing_path = {}; + struct inode_info *info = get_incfs_node(d_inode(d)); + struct inode *binode = (info == NULL) ? NULL : info->n_backing_inode; + struct dentry *backing_dentry = NULL; + int result = 0; + + if (flags & LOOKUP_RCU) + return -ECHILD; + + get_incfs_backing_path(d, &backing_path); + backing_dentry = backing_path.dentry; + if (!backing_dentry) + goto out; + + if (d_inode(backing_dentry) != binode) { + /* + * Backing inodes obtained via dentry and inode don't match. + * It indicates that most likely backing dir has changed + * directly bypassing Incremental FS interface. + */ + goto out; + } + + if (backing_dentry->d_flags & DCACHE_OP_REVALIDATE) { + result = backing_dentry->d_op->d_revalidate(backing_dentry, + flags); + } else + result = 1; + +out: + path_put(&backing_path); + return result; +} + +static void dentry_release(struct dentry *d) +{ + struct dentry_info *di = get_incfs_dentry(d); + + if (di) + path_put(&di->backing_path); + d->d_fsdata = NULL; +} + +static struct inode *alloc_inode(struct super_block *sb) +{ + struct inode_info *node = kzalloc(sizeof(*node), GFP_NOFS); + + /* TODO: add a slab-based cache here. */ + if (!node) + return NULL; + inode_init_once(&node->n_vfs_inode); + return &node->n_vfs_inode; +} + +static void free_inode(struct inode *inode) +{ + struct inode_info *node = get_incfs_node(inode); + + kfree(node); +} + +static void evict_inode(struct inode *inode) +{ + struct inode_info *node = get_incfs_node(inode); + + if (node) { + if (node->n_backing_inode) { + iput(node->n_backing_inode); + node->n_backing_inode = NULL; + } + if (node->n_file) { + incfs_free_data_file(node->n_file); + node->n_file = NULL; + } + } + + truncate_inode_pages(&inode->i_data, 0); + clear_inode(inode); +} + +static ssize_t incfs_getxattr(struct dentry *d, const char *name, + void *value, size_t size) +{ + struct dentry_info *di = get_incfs_dentry(d); + struct mount_info *mi = get_mount_info(d->d_sb); + char *stored_value; + size_t stored_size; + + if (di && di->backing_path.dentry) + return vfs_getxattr(di->backing_path.dentry, name, value, size); + + if (strcmp(name, "security.selinux")) + return -ENODATA; + + if (!strcmp(d->d_iname, INCFS_PENDING_READS_FILENAME)) { + stored_value = mi->pending_read_xattr; + stored_size = mi->pending_read_xattr_size; + } else if (!strcmp(d->d_iname, INCFS_LOG_FILENAME)) { + stored_value = mi->log_xattr; + stored_size = mi->log_xattr_size; + } else { + return -ENODATA; + } + + if (!stored_value) + return -ENODATA; + + if (stored_size > size) + return -E2BIG; + + memcpy(value, stored_value, stored_size); + return stored_size; + +} + + +static ssize_t incfs_setxattr(struct dentry *d, const char *name, + const void *value, size_t size, int flags) +{ + struct dentry_info *di = get_incfs_dentry(d); + struct mount_info *mi = get_mount_info(d->d_sb); + void **stored_value; + size_t *stored_size; + + if (di && di->backing_path.dentry) + return vfs_setxattr(di->backing_path.dentry, name, value, size, + flags); + + if (strcmp(name, "security.selinux")) + return -ENODATA; + + if (size > INCFS_MAX_FILE_ATTR_SIZE) + return -E2BIG; + + if (!strcmp(d->d_iname, INCFS_PENDING_READS_FILENAME)) { + stored_value = &mi->pending_read_xattr; + stored_size = &mi->pending_read_xattr_size; + } else if (!strcmp(d->d_iname, INCFS_LOG_FILENAME)) { + stored_value = &mi->log_xattr; + stored_size = &mi->log_xattr_size; + } else { + return -ENODATA; + } + + kfree (*stored_value); + *stored_value = kzalloc(size, GFP_NOFS); + if (!*stored_value) + return -ENOMEM; + + memcpy(*stored_value, value, size); + *stored_size = size; + return 0; +} + +static ssize_t incfs_listxattr(struct dentry *d, char *list, size_t size) +{ + struct dentry_info *di = get_incfs_dentry(d); + + if (!di || !di->backing_path.dentry) + return -ENODATA; + + return vfs_listxattr(di->backing_path.dentry, list, size); +} + +struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, + const char *dev_name, void *data) +{ + struct mount_options options = {}; + struct mount_info *mi = NULL; + struct path backing_dir_path = {}; + struct dentry *index_dir; + struct super_block *src_fs_sb = NULL; + struct inode *root_inode = NULL; + struct super_block *sb = sget(type, NULL, set_anon_super, flags, NULL); + int error = 0; + + if (IS_ERR(sb)) + return ERR_CAST(sb); + + sb->s_op = &incfs_super_ops; + sb->s_d_op = &incfs_dentry_ops; + sb->s_flags |= S_NOATIME; + sb->s_magic = INCFS_MAGIC_NUMBER; + sb->s_time_gran = 1; + sb->s_blocksize = INCFS_DATA_FILE_BLOCK_SIZE; + sb->s_blocksize_bits = blksize_bits(sb->s_blocksize); + sb->s_xattr = incfs_xattr_ops; + + BUILD_BUG_ON(PAGE_SIZE != INCFS_DATA_FILE_BLOCK_SIZE); + + error = parse_options(&options, (char *)data); + if (error != 0) { + pr_err("incfs: Options parsing error. %d\n", error); + goto err; + } + + sb->s_bdi->ra_pages = options.readahead_pages; + if (!dev_name) { + pr_err("incfs: Backing dir is not set, filesystem can't be mounted.\n"); + error = -ENOENT; + goto err; + } + + error = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + &backing_dir_path); + if (error || backing_dir_path.dentry == NULL || + !d_really_is_positive(backing_dir_path.dentry)) { + pr_err("incfs: Error accessing: %s.\n", + dev_name); + goto err; + } + src_fs_sb = backing_dir_path.dentry->d_sb; + sb->s_maxbytes = src_fs_sb->s_maxbytes; + + mi = incfs_alloc_mount_info(sb, &options, &backing_dir_path); + + if (IS_ERR_OR_NULL(mi)) { + error = PTR_ERR(mi); + pr_err("incfs: Error allocating mount info. %d\n", error); + mi = NULL; + goto err; + } + + index_dir = open_or_create_index_dir(backing_dir_path.dentry); + if (IS_ERR_OR_NULL(index_dir)) { + error = PTR_ERR(index_dir); + pr_err("incfs: Can't find or create .index dir in %s\n", + dev_name); + goto err; + } + mi->mi_index_dir = index_dir; + + sb->s_fs_info = mi; + root_inode = fetch_regular_inode(sb, backing_dir_path.dentry); + if (IS_ERR(root_inode)) { + error = PTR_ERR(root_inode); + goto err; + } + + sb->s_root = d_make_root(root_inode); + if (!sb->s_root) { + error = -ENOMEM; + goto err; + } + error = incfs_init_dentry(sb->s_root, &backing_dir_path); + if (error) + goto err; + + path_put(&backing_dir_path); + sb->s_flags |= SB_ACTIVE; + + pr_debug("infs: mount\n"); + return dget(sb->s_root); +err: + sb->s_fs_info = NULL; + path_put(&backing_dir_path); + incfs_free_mount_info(mi); + deactivate_locked_super(sb); + return ERR_PTR(error); +} + +static int incfs_remount_fs(struct super_block *sb, int *flags, char *data) +{ + struct mount_options options; + struct mount_info *mi = get_mount_info(sb); + int err = 0; + + sync_filesystem(sb); + err = parse_options(&options, (char *)data); + if (err) + return err; + + if (mi->mi_options.read_timeout_ms != options.read_timeout_ms) { + mi->mi_options.read_timeout_ms = options.read_timeout_ms; + pr_debug("incfs: new timeout_ms=%d", options.read_timeout_ms); + } + + pr_debug("infs: remount\n"); + return 0; +} + +void incfs_kill_sb(struct super_block *sb) +{ + struct mount_info *mi = sb->s_fs_info; + + pr_debug("infs: unmount\n"); + incfs_free_mount_info(mi); + generic_shutdown_super(sb); +} + +static int show_options(struct seq_file *m, struct dentry *root) +{ + struct mount_info *mi = get_mount_info(root->d_sb); + + seq_printf(m, ",read_timeout_ms=%u", mi->mi_options.read_timeout_ms); + seq_printf(m, ",readahead=%u", mi->mi_options.readahead_pages); + if (mi->mi_options.read_log_pages != 0) { + seq_printf(m, ",rlog_pages=%u", mi->mi_options.read_log_pages); + seq_printf(m, ",rlog_wakeup_cnt=%u", + mi->mi_options.read_log_wakeup_count); + } + if (mi->mi_options.no_backing_file_cache) + seq_puts(m, ",no_bf_cache"); + if (mi->mi_options.no_backing_file_readahead) + seq_puts(m, ",no_bf_readahead"); + return 0; +} diff --git a/fs/incfs/vfs.h b/fs/incfs/vfs.h new file mode 100644 index 000000000000..eaa490e19072 --- /dev/null +++ b/fs/incfs/vfs.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2018 Google LLC + */ + +#ifndef _INCFS_VFS_H +#define _INCFS_VFS_H + +void incfs_kill_sb(struct super_block *sb); +struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, + const char *dev_name, void *data); + +#endif diff --git a/fs/inode.c b/fs/inode.c index 68e6eeb72b8b..9f1901043c8f 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -656,6 +657,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) struct inode *inode, *next; LIST_HEAD(dispose); +again: spin_lock(&sb->s_inode_list_lock); list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { spin_lock(&inode->i_lock); @@ -678,6 +680,12 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) inode_lru_list_del(inode); spin_unlock(&inode->i_lock); list_add(&inode->i_lru, &dispose); + if (need_resched()) { + spin_unlock(&sb->s_inode_list_lock); + cond_resched(); + dispose_list(&dispose); + goto again; + } } spin_unlock(&sb->s_inode_list_lock); @@ -2158,7 +2166,7 @@ int vfs_ioc_setflags_prepare(struct inode *inode, unsigned int oldflags, !capable(CAP_LINUX_IMMUTABLE)) return -EPERM; - return 0; + return fscrypt_prepare_setflags(inode, oldflags, flags); } EXPORT_SYMBOL(vfs_ioc_setflags_prepare); diff --git a/fs/iomap.c b/fs/iomap.c index 467d98bf7054..1e573a59ea71 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -825,10 +826,13 @@ static blk_qc_t iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, unsigned len) { + struct inode *inode = file_inode(dio->iocb->ki_filp); struct page *page = ZERO_PAGE(0); struct bio *bio; bio = bio_alloc(GFP_KERNEL, 1); + fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, + GFP_KERNEL); bio_set_dev(bio, iomap->bdev); bio->bi_iter.bi_sector = iomap->blkno + ((pos - iomap->offset) >> 9); @@ -908,6 +912,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, return 0; bio = bio_alloc(GFP_KERNEL, nr_pages); + fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits, + GFP_KERNEL); bio_set_dev(bio, iomap->bdev); bio->bi_iter.bi_sector = iomap->blkno + ((pos - iomap->offset) >> 9); @@ -941,7 +947,14 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, dio->submit.cookie = submit_bio(bio); } while (nr_pages); - if (need_zeroout) { + /* + * We need to zeroout the tail of a sub-block write if the extent type + * requires zeroing or the write extends beyond EOF. If we don't zero + * the block tail in the latter case, we can expose stale data via mmap + * reads of the EOF block. + */ + if (need_zeroout || + ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) { /* zero out from the end of the write to the end of the block */ pad = pos & (fs_block_size - 1); if (pad) @@ -1046,8 +1059,15 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, } pos += ret; - if (iov_iter_rw(iter) == READ && pos >= dio->i_size) + if (iov_iter_rw(iter) == READ && pos >= dio->i_size) { + /* + * We only report that we've read data up to i_size. + * Revert iter to a state corresponding to that as + * some callers (such as splice code) rely on it. + */ + iov_iter_revert(iter, pos - dio->i_size); break; + } } while ((count = iov_iter_count(iter)) > 0); blk_finish_plug(&plug); diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index fe4fe155b7fb..15d129b7494b 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -168,7 +168,7 @@ void __jbd2_log_wait_for_space(journal_t *journal) "journal space in %s\n", __func__, journal->j_devname); WARN_ON(1); - jbd2_journal_abort(journal, 0); + jbd2_journal_abort(journal, -EIO); } write_lock(&journal->j_state_lock); } else { diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0567b17a970c..1a4bd8d9636e 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -726,7 +726,6 @@ start_journal_io: submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); } cond_resched(); - stats.run.rs_blocks_logged += bufs; /* Force a new descriptor to be generated next time round the loop. */ @@ -784,7 +783,7 @@ start_journal_io: err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); } blk_finish_plug(&plug); @@ -813,6 +812,7 @@ start_journal_io: if (unlikely(!buffer_uptodate(bh))) err = -EIO; jbd2_unfile_log_bh(bh); + stats.run.rs_blocks_logged++; /* * The list contains temporary buffer heads created by @@ -858,6 +858,7 @@ start_journal_io: BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); clear_buffer_jwrite(bh); jbd2_unfile_log_bh(bh); + stats.run.rs_blocks_logged++; __brelse(bh); /* One for getblk */ /* AKPM: bforget here */ } @@ -875,10 +876,11 @@ start_journal_io: err = journal_submit_commit_record(journal, commit_transaction, &cbh, crc32_sum); if (err) - __jbd2_journal_abort_hard(journal); + jbd2_journal_abort(journal, err); } if (cbh) err = journal_wait_on_commit_record(journal, cbh); + stats.run.rs_blocks_logged++; if (jbd2_has_feature_async_commit(journal) && journal->j_flags & JBD2_BARRIER) { blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL); @@ -971,29 +973,33 @@ restart_loop: * it. */ /* - * A buffer which has been freed while still being journaled by - * a previous transaction. - */ - if (buffer_freed(bh)) { + * A buffer which has been freed while still being journaled + * by a previous transaction, refile the buffer to BJ_Forget of + * the running transaction. If the just committed transaction + * contains "add to orphan" operation, we can completely + * invalidate the buffer now. We are rather through in that + * since the buffer may be still accessible when blocksize < + * pagesize and it is attached to the last partial page. + */ + if (buffer_freed(bh) && !jh->b_next_transaction) { + struct address_space *mapping; + + clear_buffer_freed(bh); + clear_buffer_jbddirty(bh); + /* - * If the running transaction is the one containing - * "add to orphan" operation (b_next_transaction != - * NULL), we have to wait for that transaction to - * commit before we can really get rid of the buffer. - * So just clear b_modified to not confuse transaction - * credit accounting and refile the buffer to - * BJ_Forget of the running transaction. If the just - * committed transaction contains "add to orphan" - * operation, we can completely invalidate the buffer - * now. We are rather through in that since the - * buffer may be still accessible when blocksize < - * pagesize and it is attached to the last partial - * page. + * Block device buffers need to stay mapped all the + * time, so it is enough to clear buffer_jbddirty and + * buffer_freed bits. For the file mapping buffers (i.e. + * journalled data) we need to unmap buffer and clear + * more bits. We also need to be careful about the check + * because the data page mapping can get cleared under + * out hands, which alse need not to clear more bits + * because the page and buffers will be freed and can + * never be reused once we are done with them. */ - jh->b_modified = 0; - if (!jh->b_next_transaction) { - clear_buffer_freed(bh); - clear_buffer_jbddirty(bh); + mapping = READ_ONCE(bh->b_page->mapping); + if (mapping && !sb_is_blkdev_sb(mapping->host->i_sb)) { clear_buffer_mapped(bh); clear_buffer_new(bh); clear_buffer_req(bh); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index a1605a7d3e72..86490da306f6 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1691,6 +1691,11 @@ int jbd2_journal_load(journal_t *journal) journal->j_devname); return -EFSCORRUPTED; } + /* + * clear JBD2_ABORT flag initialized in journal_init_common + * here to update log tail information with the newest seq. + */ + journal->j_flags &= ~JBD2_ABORT; /* OK, we've finished with the dynamic journal bits: * reinitialise the dynamic contents of the superblock in memory @@ -1698,7 +1703,6 @@ int jbd2_journal_load(journal_t *journal) if (journal_reset(journal)) goto recovery_error; - journal->j_flags &= ~JBD2_ABORT; journal->j_flags |= JBD2_LOADED; return 0; @@ -2119,8 +2123,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) if (journal->j_flags & JBD2_ABORT) { write_unlock(&journal->j_state_lock); - if (!old_errno && old_errno != -ESHUTDOWN && - errno == -ESHUTDOWN) + if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) jbd2_journal_update_sb_errno(journal); return; } @@ -2128,12 +2131,10 @@ static void __journal_abort_soft (journal_t *journal, int errno) __jbd2_journal_abort_hard(journal); - if (errno) { - jbd2_journal_update_sb_errno(journal); - write_lock(&journal->j_state_lock); - journal->j_flags |= JBD2_REC_ERR; - write_unlock(&journal->j_state_lock); - } + jbd2_journal_update_sb_errno(journal); + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_REC_ERR; + write_unlock(&journal->j_state_lock); } /** @@ -2175,11 +2176,6 @@ static void __journal_abort_soft (journal_t *journal, int errno) * failure to disk. ext3_error, for example, now uses this * functionality. * - * Errors which originate from within the journaling layer will NOT - * supply an errno; a null errno implies that absolutely no further - * writes are done to the journal (unless there are any already in - * progress). - * */ void jbd2_journal_abort(journal_t *journal, int errno) diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 8d8569d7ea1e..826ec9e70d02 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1065,8 +1065,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, /* For undo access buffer must have data copied */ if (undo && !jh->b_committed_data) goto out; - if (jh->b_transaction != handle->h_transaction && - jh->b_next_transaction != handle->h_transaction) + if (READ_ONCE(jh->b_transaction) != handle->h_transaction && + READ_ONCE(jh->b_next_transaction) != handle->h_transaction) goto out; /* * There are two reasons for the barrier here: @@ -2251,14 +2251,16 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, return -EBUSY; } /* - * OK, buffer won't be reachable after truncate. We just set - * j_next_transaction to the running transaction (if there is - * one) and mark buffer as freed so that commit code knows it - * should clear dirty bits when it is done with the buffer. + * OK, buffer won't be reachable after truncate. We just clear + * b_modified to not confuse transaction credit accounting, and + * set j_next_transaction to the running transaction (if there + * is one) and mark buffer as freed so that commit code knows + * it should clear dirty bits when it is done with the buffer. */ set_buffer_freed(bh); if (journal->j_running_transaction && buffer_jbddirty(bh)) jh->b_next_transaction = journal->j_running_transaction; + jh->b_modified = 0; jbd2_journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); @@ -2484,8 +2486,8 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) * our jh reference and thus __jbd2_journal_file_buffer() must not * take a new one. */ - jh->b_transaction = jh->b_next_transaction; - jh->b_next_transaction = NULL; + WRITE_ONCE(jh->b_transaction, jh->b_next_transaction); + WRITE_ONCE(jh->b_next_transaction, NULL); if (buffer_freed(bh)) jlist = BJ_Forget; else if (jh->b_modified) diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index 4d973524c887..224ef034004b 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -1928,8 +1928,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * header ? */ if (tlck->type & tlckTRUNCATE) { - /* This odd declaration suppresses a bogus gcc warning */ - pxd_t pxd = pxd; /* truncated extent of xad */ + pxd_t pxd; /* truncated extent of xad */ int twm; /* diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 9e9117533fd7..8697b750b1c9 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -623,7 +623,6 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, { struct kernfs_node *kn; u32 gen; - int cursor; int ret; name = kstrdup_const(name, GFP_KERNEL); @@ -636,11 +635,11 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, idr_preload(GFP_KERNEL); spin_lock(&kernfs_idr_lock); - cursor = idr_get_cursor(&root->ino_idr); ret = idr_alloc_cyclic(&root->ino_idr, kn, 1, 0, GFP_ATOMIC); - if (ret >= 0 && ret < cursor) + if (ret >= 0 && ret < root->last_ino) root->next_generation++; gen = root->next_generation; + root->last_ino = ret; spin_unlock(&kernfs_idr_lock); idr_preload_end(); if (ret < 0) diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 5145ae2f0572..d273e3accade 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -63,6 +63,9 @@ static int kernfs_get_target_path(struct kernfs_node *parent, if (base == kn) break; + if ((s - path) + 3 >= PATH_MAX) + return -ENAMETOOLONG; + strcpy(s, "../"); s += 3; base = base->parent; @@ -79,7 +82,7 @@ static int kernfs_get_target_path(struct kernfs_node *parent, if (len < 2) return -EINVAL; len--; - if ((s - path) + len > PATH_MAX) + if ((s - path) + len >= PATH_MAX) return -ENAMETOOLONG; /* reverse fillup of target string from target to base */ diff --git a/fs/libfs.c b/fs/libfs.c index 49623301e5f0..27def8ba162d 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -16,6 +16,8 @@ #include #include #include /* sync_mapping_buffers */ +#include +#include #include @@ -1219,3 +1221,112 @@ bool is_empty_dir_inode(struct inode *inode) return (inode->i_fop == &empty_dir_operations) && (inode->i_op == &empty_dir_inode_operations); } + +#ifdef CONFIG_UNICODE +bool needs_casefold(const struct inode *dir) +{ + return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding && + (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir)); +} +EXPORT_SYMBOL(needs_casefold); + +int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name) +{ + const struct dentry *parent = READ_ONCE(dentry->d_parent); + const struct inode *inode = READ_ONCE(parent->d_inode); + const struct super_block *sb = dentry->d_sb; + const struct unicode_map *um = sb->s_encoding; + struct qstr entry = QSTR_INIT(str, len); + int ret; + + if (!inode || !needs_casefold(inode)) + goto fallback; + + ret = utf8_strncasecmp(um, name, &entry); + if (ret >= 0) + return ret; + + if (sb_has_enc_strict_mode(sb)) + return -EINVAL; +fallback: + if (len != name->len) + return 1; + return !!memcmp(str, name->name, len); +} +EXPORT_SYMBOL(generic_ci_d_compare); + +int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str) +{ + const struct inode *inode = READ_ONCE(dentry->d_inode); + struct super_block *sb = dentry->d_sb; + const struct unicode_map *um = sb->s_encoding; + int ret = 0; + + if (!inode || !needs_casefold(inode)) + return 0; + + ret = utf8_casefold_hash(um, dentry, str); + if (ret < 0) + goto err; + + return 0; +err: + if (sb_has_enc_strict_mode(sb)) + ret = -EINVAL; + else + ret = 0; + return ret; +} +EXPORT_SYMBOL(generic_ci_d_hash); + +static const struct dentry_operations generic_ci_dentry_ops = { + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, +}; +#endif + +#ifdef CONFIG_FS_ENCRYPTION +static const struct dentry_operations generic_encrypted_dentry_ops = { + .d_revalidate = fscrypt_d_revalidate, +}; +#endif + +#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION) +static const struct dentry_operations generic_encrypted_ci_dentry_ops = { + .d_hash = generic_ci_d_hash, + .d_compare = generic_ci_d_compare, + .d_revalidate = fscrypt_d_revalidate, +}; +#endif + +/** + * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry + * @dir: parent of dentry whose ops to set + * @dentry: detnry to set ops on + * + * This function sets the dentry ops for the given dentry to handle both + * casefolding and encryption of the dentry name. + */ +void generic_set_encrypted_ci_d_ops(struct inode *dir, struct dentry *dentry) +{ +#ifdef CONFIG_FS_ENCRYPTION + if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) { +#ifdef CONFIG_UNICODE + if (dir->i_sb->s_encoding) { + d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops); + return; + } +#endif + d_set_d_op(dentry, &generic_encrypted_dentry_ops); + return; + } +#endif +#ifdef CONFIG_UNICODE + if (dir->i_sb->s_encoding) { + d_set_d_op(dentry, &generic_ci_dentry_ops); + return; + } +#endif +} +EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops); diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index 00d5ef5f99f7..214a2fa1f1e3 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -128,24 +128,14 @@ static void encode_netobj(struct xdr_stream *xdr, static int decode_netobj(struct xdr_stream *xdr, struct xdr_netobj *obj) { - u32 length; - __be32 *p; + ssize_t ret; - p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; - length = be32_to_cpup(p++); - if (unlikely(length > XDR_MAX_NETOBJ)) - goto out_size; - obj->len = length; - obj->data = (u8 *)p; + ret = xdr_stream_decode_opaque_inline(xdr, (void *)&obj->data, + XDR_MAX_NETOBJ); + if (unlikely(ret < 0)) + return -EIO; + obj->len = ret; return 0; -out_size: - dprintk("NFS: returned netobj was too long: %u\n", length); - return -EIO; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index 2c6176387143..747b9c8c940a 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -125,24 +125,14 @@ static void encode_netobj(struct xdr_stream *xdr, static int decode_netobj(struct xdr_stream *xdr, struct xdr_netobj *obj) { - u32 length; - __be32 *p; + ssize_t ret; - p = xdr_inline_decode(xdr, 4); - if (unlikely(p == NULL)) - goto out_overflow; - length = be32_to_cpup(p++); - if (unlikely(length > XDR_MAX_NETOBJ)) - goto out_size; - obj->len = length; - obj->data = (u8 *)p; + ret = xdr_stream_decode_opaque_inline(xdr, (void *)&obj->data, + XDR_MAX_NETOBJ); + if (unlikely(ret < 0)) + return -EIO; + obj->len = ret; return 0; -out_size: - dprintk("NFS: returned netobj was too long: %u\n", length); - return -EIO; -out_overflow: - print_overflow_msg(__func__, xdr); - return -EIO; } /* diff --git a/fs/locks.c b/fs/locks.c index 665e3ce9ab47..1a40e277eb5e 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2691,7 +2691,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, } if (inode) { /* userspace relies on this representation of dev_t */ - seq_printf(f, "%d %02x:%02x:%ld ", fl_pid, + seq_printf(f, "%d %02x:%02x:%lu ", fl_pid, MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); } else { diff --git a/fs/namei.c b/fs/namei.c index 8da39bd08b7d..3817aa28be5b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1122,7 +1122,8 @@ static int may_linkat(struct path *link) * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory * should be allowed, or not, on files that already * exist. - * @dir: the sticky parent directory + * @dir_mode: mode bits of directory + * @dir_uid: owner of directory * @inode: the inode of the file to open * * Block an O_CREAT open of a FIFO (or a regular file) when: @@ -1138,18 +1139,18 @@ static int may_linkat(struct path *link) * * Returns 0 if the open is allowed, -ve on error. */ -static int may_create_in_sticky(struct dentry * const dir, +static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid, struct inode * const inode) { if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) || (!sysctl_protected_regular && S_ISREG(inode->i_mode)) || - likely(!(dir->d_inode->i_mode & S_ISVTX)) || - uid_eq(inode->i_uid, dir->d_inode->i_uid) || + likely(!(dir_mode & S_ISVTX)) || + uid_eq(inode->i_uid, dir_uid) || uid_eq(current_fsuid(), inode->i_uid)) return 0; - if (likely(dir->d_inode->i_mode & 0002) || - (dir->d_inode->i_mode & 0020 && + if (likely(dir_mode & 0002) || + (dir_mode & 0020 && ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) || (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) { return -EACCES; @@ -1480,7 +1481,7 @@ static int follow_dotdot_rcu(struct nameidata *nd) nd->path.dentry = parent; nd->seq = seq; if (unlikely(!path_connected(&nd->path))) - return -ENOENT; + return -ECHILD; break; } else { struct mount *mnt = real_mount(nd->path.mnt); @@ -3379,6 +3380,8 @@ static int do_last(struct nameidata *nd, int *opened) { struct dentry *dir = nd->path.dentry; + kuid_t dir_uid = nd->inode->i_uid; + umode_t dir_mode = nd->inode->i_mode; int open_flag = op->open_flag; bool will_truncate = (open_flag & O_TRUNC) != 0; bool got_write = false; @@ -3514,7 +3517,7 @@ finish_open: error = -EISDIR; if (d_is_dir(nd->path.dentry)) goto out; - error = may_create_in_sticky(dir, + error = may_create_in_sticky(dir_mode, dir_uid, d_backing_inode(nd->path.dentry)); if (unlikely(error)) goto out; diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 5f93cfacb3d1..ac3e06367cb6 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -89,7 +89,7 @@ config NFS_V4 config NFS_SWAP bool "Provide swap over NFS support" default n - depends on NFS_FS + depends on NFS_FS && SWAP select SUNRPC_SWAP help This option enables swapon to work on files located on NFS mounts. diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index 606dd3871f66..09b3bcb86d32 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -52,6 +52,16 @@ nfs4_is_valid_delegation(const struct nfs_delegation *delegation, return false; } +struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode) +{ + struct nfs_delegation *delegation; + + delegation = rcu_dereference(NFS_I(inode)->delegation); + if (nfs4_is_valid_delegation(delegation, 0)) + return delegation; + return NULL; +} + static int nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark) { @@ -91,7 +101,7 @@ int nfs4_check_delegation(struct inode *inode, fmode_t flags) return nfs4_do_check_delegation(inode, flags, false); } -static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid) +static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid) { struct inode *inode = state->inode; struct file_lock *fl; @@ -106,7 +116,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ spin_lock(&flctx->flc_lock); restart: list_for_each_entry(fl, list, fl_list) { - if (nfs_file_open_context(fl->fl_file) != ctx) + if (nfs_file_open_context(fl->fl_file)->state != state) continue; spin_unlock(&flctx->flc_lock); status = nfs4_lock_delegation_recall(fl, state, stateid); @@ -153,7 +163,7 @@ again: seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); err = nfs4_open_delegation_recall(ctx, state, stateid, type); if (!err) - err = nfs_delegation_claim_locks(ctx, state, stateid); + err = nfs_delegation_claim_locks(state, stateid); if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) err = -EAGAIN; mutex_unlock(&sp->so_delegreturn_mutex); @@ -224,6 +234,8 @@ static struct inode *nfs_delegation_grab_inode(struct nfs_delegation *delegation spin_lock(&delegation->lock); if (delegation->inode != NULL) inode = igrab(delegation->inode); + if (!inode) + set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags); spin_unlock(&delegation->lock); return inode; } @@ -853,10 +865,11 @@ restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(delegation, &server->delegations, super_list) { - if (test_bit(NFS_DELEGATION_RETURNING, - &delegation->flags)) - continue; - if (test_bit(NFS_DELEGATION_NEED_RECLAIM, + if (test_bit(NFS_DELEGATION_INODE_FREEING, + &delegation->flags) || + test_bit(NFS_DELEGATION_RETURNING, + &delegation->flags) || + test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags) == 0) continue; if (!nfs_sb_active(server->super)) @@ -961,10 +974,11 @@ restart: list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry_rcu(delegation, &server->delegations, super_list) { - if (test_bit(NFS_DELEGATION_RETURNING, - &delegation->flags)) - continue; - if (test_bit(NFS_DELEGATION_TEST_EXPIRED, + if (test_bit(NFS_DELEGATION_INODE_FREEING, + &delegation->flags) || + test_bit(NFS_DELEGATION_RETURNING, + &delegation->flags) || + test_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags) == 0) continue; if (!nfs_sb_active(server->super)) diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h index ddaf2644cf13..510c9edcc712 100644 --- a/fs/nfs/delegation.h +++ b/fs/nfs/delegation.h @@ -34,6 +34,7 @@ enum { NFS_DELEGATION_RETURNING, NFS_DELEGATION_REVOKED, NFS_DELEGATION_TEST_EXPIRED, + NFS_DELEGATION_INODE_FREEING, }; int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); @@ -63,6 +64,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, struct rpc_cred **cred); +struct nfs_delegation *nfs4_get_valid_delegation(const struct inode *inode); void nfs_mark_delegation_referenced(struct nfs_delegation *delegation); int nfs4_have_delegation(struct inode *inode, fmode_t flags); int nfs4_check_delegation(struct inode *inode, fmode_t flags); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b8c8ce34d768..7dba2594421e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -169,6 +169,17 @@ typedef struct { bool eof; } nfs_readdir_descriptor_t; +static +void nfs_readdir_init_array(struct page *page) +{ + struct nfs_cache_array *array; + + array = kmap_atomic(page); + memset(array, 0, sizeof(struct nfs_cache_array)); + array->eof_index = -1; + kunmap_atomic(array); +} + /* * we are freeing strings created by nfs_add_to_readdir_array() */ @@ -181,6 +192,7 @@ void nfs_readdir_clear_array(struct page *page) array = kmap_atomic(page); for (i = 0; i < array->size; i++) kfree(array->array[i].string.name); + array->size = 0; kunmap_atomic(array); } @@ -617,6 +629,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, int status = -ENOMEM; unsigned int array_size = ARRAY_SIZE(pages); + nfs_readdir_init_array(page); + entry.prev_cookie = 0; entry.cookie = desc->last_cookie; entry.eof = 0; @@ -633,8 +647,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, } array = kmap(page); - memset(array, 0, sizeof(struct nfs_cache_array)); - array->eof_index = -1; status = nfs_readdir_alloc_pages(pages, array_size); if (status < 0) @@ -689,6 +701,7 @@ int nfs_readdir_filler(struct file *file, struct page* page) unlock_page(page); return 0; error: + nfs_readdir_clear_array(page); unlock_page(page); return ret; } @@ -696,8 +709,6 @@ int nfs_readdir_filler(struct file *file, struct page* page) static void cache_page_release(nfs_readdir_descriptor_t *desc) { - if (!desc->page->mapping) - nfs_readdir_clear_array(desc->page); put_page(desc->page); desc->page = NULL; } @@ -711,19 +722,28 @@ struct page *get_cache_page(nfs_readdir_descriptor_t *desc) /* * Returns 0 if desc->dir_cookie was found on page desc->page_index + * and locks the page to prevent removal from the page cache. */ static -int find_cache_page(nfs_readdir_descriptor_t *desc) +int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) { int res; desc->page = get_cache_page(desc); if (IS_ERR(desc->page)) return PTR_ERR(desc->page); - - res = nfs_readdir_search_array(desc); + res = lock_page_killable(desc->page); if (res != 0) - cache_page_release(desc); + goto error; + res = -EAGAIN; + if (desc->page->mapping != NULL) { + res = nfs_readdir_search_array(desc); + if (res == 0) + return 0; + } + unlock_page(desc->page); +error: + cache_page_release(desc); return res; } @@ -738,7 +758,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) desc->last_cookie = 0; } do { - res = find_cache_page(desc); + res = find_and_lock_cache_page(desc); } while (res == -EAGAIN); return res; } @@ -777,7 +797,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) desc->eof = 1; kunmap(desc->page); - cache_page_release(desc); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; @@ -823,13 +842,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) status = nfs_do_filldir(desc); + out_release: + nfs_readdir_clear_array(desc->page); + cache_page_release(desc); out: dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; - out_release: - cache_page_release(desc); - goto out; } /* The file offset position represents the dirent entry number. A @@ -894,6 +913,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; res = nfs_do_filldir(desc); + unlock_page(desc->page); + cache_page_release(desc); if (res < 0) break; } while (!desc->eof); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 9cdac9945483..9d07b53e1647 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -261,10 +261,10 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, data->ds_commit_index); /* verifier not set so always fail */ - if (verfp->committed < 0) + if (verfp->committed < 0 || data->res.verf->committed <= NFS_UNSTABLE) return 1; - return nfs_direct_cmp_verf(verfp, &data->verf); + return nfs_direct_cmp_verf(verfp, data->res.verf); } /** diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index d6515f1584f3..d78ec99b6c4c 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -131,16 +131,6 @@ FF_LAYOUT_LSEG(struct pnfs_layout_segment *lseg) generic_hdr); } -static inline struct nfs4_deviceid_node * -FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx) -{ - if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt || - FF_LAYOUT_LSEG(lseg)->mirror_array[idx] == NULL || - FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds == NULL) - return NULL; - return &FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds->id_node; -} - static inline struct nfs4_ff_layout_ds * FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node) { @@ -150,9 +140,25 @@ FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node) static inline struct nfs4_ff_layout_mirror * FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx) { - if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt) - return NULL; - return FF_LAYOUT_LSEG(lseg)->mirror_array[idx]; + struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); + + if (idx < fls->mirror_array_cnt) + return fls->mirror_array[idx]; + return NULL; +} + +static inline struct nfs4_deviceid_node * +FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx) +{ + struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, idx); + + if (mirror != NULL) { + struct nfs4_ff_layout_ds *mirror_ds = mirror->mirror_ds; + + if (!IS_ERR_OR_NULL(mirror_ds)) + return &mirror_ds->id_node; + } + return NULL; } static inline u32 diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 6cd33bd5da87..f1cb0b7eb05f 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2373,6 +2373,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, void *data) { struct nfs_commitres *result = data; + struct nfs_writeverf *verf = result->verf; enum nfs_stat status; int error; @@ -2385,7 +2386,9 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, result->op_status = status; if (status != NFS3_OK) goto out_status; - error = decode_writeverf3(xdr, &result->verf->verifier); + error = decode_writeverf3(xdr, &verf->verifier); + if (!error) + verf->committed = NFS_FILE_SYNC; out: return error; out_status: diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6409ff4876cb..4d45786738ab 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1355,8 +1355,6 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode, return 0; if ((delegation->type & fmode) != fmode) return 0; - if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) - return 0; switch (claim) { case NFS4_OPEN_CLAIM_NULL: case NFS4_OPEN_CLAIM_FH: @@ -1615,7 +1613,6 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) { struct nfs4_state *state = opendata->state; - struct nfs_inode *nfsi = NFS_I(state->inode); struct nfs_delegation *delegation; int open_mode = opendata->o_arg.open_flags; fmode_t fmode = opendata->o_arg.fmode; @@ -1632,7 +1629,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) } spin_unlock(&state->owner->so_lock); rcu_read_lock(); - delegation = rcu_dereference(nfsi->delegation); + delegation = nfs4_get_valid_delegation(state->inode); if (!can_open_delegated(delegation, fmode, claim)) { rcu_read_unlock(); break; @@ -2153,7 +2150,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) data->o_arg.open_flags, claim)) goto out_no_action; rcu_read_lock(); - delegation = rcu_dereference(NFS_I(data->state->inode)->delegation); + delegation = nfs4_get_valid_delegation(data->state->inode); if (can_open_delegated(delegation, data->o_arg.fmode, claim)) goto unlock_no_action; rcu_read_unlock(); @@ -2926,6 +2923,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, exception.retry = 1; continue; } + if (status == -NFS4ERR_EXPIRED) { + nfs4_schedule_lease_recovery(server->nfs_client); + exception.retry = 1; + continue; + } if (status == -EAGAIN) { /* We must have found a delegation */ exception.retry = 1; @@ -5655,6 +5657,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, } status = task->tk_status; if (setclientid.sc_cred) { + kfree(clp->cl_acceptor); clp->cl_acceptor = rpcauth_stringify_acceptor(setclientid.sc_cred); put_rpccred(setclientid.sc_cred); } @@ -5799,8 +5802,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data) d_data = (struct nfs4_delegreturndata *)data; - if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) + if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task)) { + nfs4_sequence_done(task, &d_data->res.seq_res); return; + } nfs4_setup_sequence(d_data->res.server->nfs_client, &d_data->args.seq_args, diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 525684b0056f..0b2d051990e9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4409,11 +4409,14 @@ static int decode_write_verifier(struct xdr_stream *xdr, struct nfs_write_verifi static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res) { + struct nfs_writeverf *verf = res->verf; int status; status = decode_op_hdr(xdr, OP_COMMIT); if (!status) - status = decode_write_verifier(xdr, &res->verf->verifier); + status = decode_write_verifier(xdr, &verf->verifier); + if (!status) + verf->committed = NFS_FILE_SYNC; return status; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index ec04cce31814..83abf3dd7351 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -725,22 +725,35 @@ static int pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, struct nfs_server *server, struct list_head *layout_list) + __must_hold(&clp->cl_lock) + __must_hold(RCU) { struct pnfs_layout_hdr *lo, *next; struct inode *inode; list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { - if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) + if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) || + test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) || + !list_empty(&lo->plh_bulk_destroy)) continue; + /* If the sb is being destroyed, just bail */ + if (!nfs_sb_active(server->super)) + break; inode = igrab(lo->plh_inode); - if (inode == NULL) - continue; - list_del_init(&lo->plh_layouts); - if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) - continue; - rcu_read_unlock(); - spin_unlock(&clp->cl_lock); - iput(inode); + if (inode != NULL) { + list_del_init(&lo->plh_layouts); + if (pnfs_layout_add_bulk_destroy_list(inode, + layout_list)) + continue; + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); + iput(inode); + } else { + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); + set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags); + } + nfs_sb_deactive(server->super); spin_lock(&clp->cl_lock); rcu_read_lock(); return -EAGAIN; @@ -778,7 +791,7 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, /* Free all lsegs that are attached to commit buckets */ nfs_commit_inode(inode, 0); pnfs_put_layout_hdr(lo); - iput(inode); + nfs_iput_and_deactive(inode); } return ret; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 87f144f14d1e..965d657086c8 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -99,6 +99,7 @@ enum { NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ + NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */ }; enum layoutdriver_policy_flags { diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 4a3dd66175fe..b0ef37f3e2dd 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -30,12 +30,11 @@ EXPORT_SYMBOL_GPL(pnfs_generic_rw_release); /* Fake up some data that will cause nfs_commit_release to retry the writes. */ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data) { - struct nfs_page *first = nfs_list_entry(data->pages.next); + struct nfs_writeverf *verf = data->res.verf; data->task.tk_status = 0; - memcpy(&data->verf.verifier, &first->wb_verf, - sizeof(data->verf.verifier)); - data->verf.verifier.data[0]++; /* ensure verifier mismatch */ + memset(&verf->verifier, 0, sizeof(verf->verifier)); + verf->committed = NFS_UNSTABLE; } EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f464f8d9060c..470b761839a5 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1925,7 +1925,7 @@ static int nfs_parse_devname(const char *dev_name, /* kill possible hostname list: not supported */ comma = strchr(dev_name, ','); if (comma != NULL && comma < end) - *comma = 0; + len = comma - dev_name; } if (len > maxnamlen) diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3c1e46f4bce3..89f36040adf6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -643,7 +643,7 @@ out: return ret; out_launder: nfs_write_error_remove_page(req); - return ret; + return 0; } static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, @@ -783,7 +783,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) struct nfs_inode *nfsi = NFS_I(inode); struct nfs_page *head; - atomic_long_dec(&nfsi->nrequests); if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { head = req->wb_head; @@ -796,8 +795,10 @@ static void nfs_inode_remove_request(struct nfs_page *req) spin_unlock(&mapping->private_lock); } - if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) + if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { nfs_release_request(req); + atomic_long_dec(&nfsi->nrequests); + } } static void @@ -1806,6 +1807,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) static void nfs_commit_release_pages(struct nfs_commit_data *data) { + const struct nfs_writeverf *verf = data->res.verf; struct nfs_page *req; int status = data->task.tk_status; struct nfs_commit_info cinfo; @@ -1832,7 +1834,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data) /* Okay, COMMIT succeeded, apparently. Check the verifier * returned by the server against all stored verfs. */ - if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) { + if (verf->committed > NFS_UNSTABLE && + !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier)) { /* We have a match */ if (req->wb_page) nfs_inode_remove_request(req); diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index ea45d954e8d7..99add0cf20ff 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -683,7 +683,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task) /* Client gets 2 lease periods to return it */ cutoff = ktime_add_ns(task->tk_start, - nn->nfsd4_lease * NSEC_PER_SEC * 2); + (u64)nn->nfsd4_lease * NSEC_PER_SEC * 2); if (ktime_before(now, cutoff)) { rpc_delay(task, HZ/100); /* 10 mili-seconds */ diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 66eaeb1e8c2c..dc9586feab31 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -661,7 +661,7 @@ struct cld_net { struct cld_upcall { struct list_head cu_list; struct cld_net *cu_net; - struct task_struct *cu_task; + struct completion cu_done; struct cld_msg cu_msg; }; @@ -670,23 +670,18 @@ __cld_pipe_upcall(struct rpc_pipe *pipe, struct cld_msg *cmsg) { int ret; struct rpc_pipe_msg msg; + struct cld_upcall *cup = container_of(cmsg, struct cld_upcall, cu_msg); memset(&msg, 0, sizeof(msg)); msg.data = cmsg; msg.len = sizeof(*cmsg); - /* - * Set task state before we queue the upcall. That prevents - * wake_up_process in the downcall from racing with schedule. - */ - set_current_state(TASK_UNINTERRUPTIBLE); ret = rpc_queue_upcall(pipe, &msg); if (ret < 0) { - set_current_state(TASK_RUNNING); goto out; } - schedule(); + wait_for_completion(&cup->cu_done); if (msg.errno < 0) ret = msg.errno; @@ -753,7 +748,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) if (copy_from_user(&cup->cu_msg, src, mlen) != 0) return -EFAULT; - wake_up_process(cup->cu_task); + complete(&cup->cu_done); return mlen; } @@ -768,7 +763,7 @@ cld_pipe_destroy_msg(struct rpc_pipe_msg *msg) if (msg->errno >= 0) return; - wake_up_process(cup->cu_task); + complete(&cup->cu_done); } static const struct rpc_pipe_ops cld_upcall_ops = { @@ -899,7 +894,7 @@ restart_search: goto restart_search; } } - new->cu_task = current; + init_completion(&new->cu_done); new->cu_msg.cm_vers = CLD_UPCALL_VERSION; put_unaligned(cn->cn_xid++, &new->cu_msg.cm_xid); new->cu_net = cn; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 87ee9cbf7dcb..fca8b2e7fbeb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3058,12 +3058,17 @@ static bool replay_matches_cache(struct svc_rqst *rqstp, (bool)seq->cachethis) return false; /* - * If there's an error than the reply can have fewer ops than - * the call. But if we cached a reply with *more* ops than the - * call you're sending us now, then this new call is clearly not - * really a replay of the old one: + * If there's an error then the reply can have fewer ops than + * the call. */ - if (slot->sl_opcnt < argp->opcnt) + if (slot->sl_opcnt < argp->opcnt && !slot->sl_status) + return false; + /* + * But if we cached a reply with *more* ops than the call you're + * sending us now, then this new call is clearly not really a + * replay of the old one: + */ + if (slot->sl_opcnt > argp->opcnt) return false; /* This is the only check explicitly called by spec: */ if (!same_creds(&rqstp->rq_cred, &slot->sl_cred)) @@ -6035,7 +6040,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, } if (fl_flags & FL_SLEEP) { - nbl->nbl_time = jiffies; + nbl->nbl_time = get_seconds(); spin_lock(&nn->blocked_locks_lock); list_add_tail(&nbl->nbl_list, &lock_sop->lo_blocked); list_add_tail(&nbl->nbl_lru, &nn->blocked_locks_lru); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 133d8bf62a5c..7872b1ead885 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -591,7 +591,7 @@ static inline bool nfsd4_stateid_generation_after(stateid_t *a, stateid_t *b) struct nfsd4_blocked_lock { struct list_head nbl_list; struct list_head nbl_lru; - unsigned long nbl_time; + time_t nbl_time; struct file_lock nbl_lock; struct knfsd_fh nbl_fh; struct nfsd4_callback nbl_cb; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index f55527ef21e8..06d1f2edf2ec 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -396,10 +396,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, bool get_write_count; bool size_change = (iap->ia_valid & ATTR_SIZE); - if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + if (iap->ia_valid & ATTR_SIZE) { accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; - if (iap->ia_valid & ATTR_SIZE) ftype = S_IFREG; + } + + /* + * If utimes(2) and friends are called with times not NULL, we should + * not set NFSD_MAY_WRITE bit. Otherwise fh_verify->nfsd_permission + * will return EACCESS, when the caller's effective UID does not match + * the owner of the file, and the caller is not privileged. In this + * situation, we should return EPERM(notify_change will return this). + */ + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME)) { + accmode |= NFSD_MAY_OWNER_OVERRIDE; + if (!(iap->ia_valid & (ATTR_ATIME_SET | ATTR_MTIME_SET))) + accmode |= NFSD_MAY_WRITE; + } /* Callers that do fh_verify should do the fh_want_write: */ get_write_count = !fhp->fh_dentry; diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 506da82ff3f1..a308f7a7e577 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -90,6 +90,7 @@ void fsnotify_unmount_inodes(struct super_block *sb) iput_inode = inode; + cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 917fadca8a7b..b73b78771915 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -335,8 +335,8 @@ int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh) down_read(&OCFS2_I(inode)->ip_xattr_sem); acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh); up_read(&OCFS2_I(inode)->ip_xattr_sem); - if (IS_ERR(acl) || !acl) - return PTR_ERR(acl); + if (IS_ERR_OR_NULL(acl)) + return PTR_ERR_OR_ZERO(acl); ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); if (ret) return ret; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 99550f4bd159..7de0c9562b70 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2054,7 +2054,8 @@ out_write_size: inode->i_mtime = inode->i_ctime = current_time(inode); di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); - ocfs2_update_inode_fsync_trans(handle, inode, 1); + if (handle) + ocfs2_update_inode_fsync_trans(handle, inode, 1); } if (handle) ocfs2_journal_dirty(handle, wc->w_di_bh); @@ -2151,13 +2152,30 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, struct ocfs2_dio_write_ctxt *dwc = NULL; struct buffer_head *di_bh = NULL; u64 p_blkno; - loff_t pos = iblock << inode->i_sb->s_blocksize_bits; + unsigned int i_blkbits = inode->i_sb->s_blocksize_bits; + loff_t pos = iblock << i_blkbits; + sector_t endblk = (i_size_read(inode) - 1) >> i_blkbits; unsigned len, total_len = bh_result->b_size; int ret = 0, first_get_block = 0; len = osb->s_clustersize - (pos & (osb->s_clustersize - 1)); len = min(total_len, len); + /* + * bh_result->b_size is count in get_more_blocks according to write + * "pos" and "end", we need map twice to return different buffer state: + * 1. area in file size, not set NEW; + * 2. area out file size, set NEW. + * + * iblock endblk + * |--------|---------|---------|--------- + * |<-------area in file------->| + */ + + if ((iblock <= endblk) && + ((iblock + ((len - 1) >> i_blkbits)) > endblk)) + len = (endblk - iblock + 1) << i_blkbits; + mlog(0, "get block of %lu at %llu:%u req %u\n", inode->i_ino, pos, len, total_len); @@ -2241,6 +2259,9 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock, if (desc->c_needs_zero) set_buffer_new(bh_result); + if (iblock > endblk) + set_buffer_new(bh_result); + /* May sleep in end_io. It should not happen in a irq context. So defer * it to dio work queue. */ set_buffer_defer_completion(bh_result); diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 9f8250df99f1..f9b84f7a3e4b 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -99,25 +99,34 @@ out: return ret; } +/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it + * will be easier to handle read failure. + */ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, unsigned int nr, struct buffer_head *bhs[]) { int status = 0; unsigned int i; struct buffer_head *bh; + int new_bh = 0; trace_ocfs2_read_blocks_sync((unsigned long long)block, nr); if (!nr) goto bail; + /* Don't put buffer head and re-assign it to NULL if it is allocated + * outside since the caller can't be aware of this alternation! + */ + new_bh = (bhs[0] == NULL); + for (i = 0 ; i < nr ; i++) { if (bhs[i] == NULL) { bhs[i] = sb_getblk(osb->sb, block++); if (bhs[i] == NULL) { status = -ENOMEM; mlog_errno(status); - goto bail; + break; } } bh = bhs[i]; @@ -157,9 +166,26 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, submit_bh(REQ_OP_READ, 0, bh); } +read_failure: for (i = nr; i > 0; i--) { bh = bhs[i - 1]; + if (unlikely(status)) { + if (new_bh && bh) { + /* If middle bh fails, let previous bh + * finish its read and then put it to + * aovoid bh leak + */ + if (!buffer_jbd(bh)) + wait_on_buffer(bh); + put_bh(bh); + bhs[i - 1] = NULL; + } else if (bh && buffer_uptodate(bh)) { + clear_buffer_uptodate(bh); + } + continue; + } + /* No need to wait on the buffer if it's managed by JBD. */ if (!buffer_jbd(bh)) wait_on_buffer(bh); @@ -169,8 +195,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, * so we can safely record this and loop back * to cleanup the other buffers. */ status = -EIO; - put_bh(bh); - bhs[i - 1] = NULL; + goto read_failure; } } @@ -178,6 +203,9 @@ bail: return status; } +/* Caller must provide a bhs[] with all NULL or non-NULL entries, so it + * will be easier to handle read failure. + */ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, struct buffer_head *bhs[], int flags, int (*validate)(struct super_block *sb, @@ -187,6 +215,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, int i, ignore_cache = 0; struct buffer_head *bh; struct super_block *sb = ocfs2_metadata_cache_get_super(ci); + int new_bh = 0; trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags); @@ -212,6 +241,11 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, goto bail; } + /* Don't put buffer head and re-assign it to NULL if it is allocated + * outside since the caller can't be aware of this alternation! + */ + new_bh = (bhs[0] == NULL); + ocfs2_metadata_cache_io_lock(ci); for (i = 0 ; i < nr ; i++) { if (bhs[i] == NULL) { @@ -220,7 +254,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, ocfs2_metadata_cache_io_unlock(ci); status = -ENOMEM; mlog_errno(status); - goto bail; + /* Don't forget to put previous bh! */ + break; } } bh = bhs[i]; @@ -314,16 +349,27 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, } } - status = 0; - +read_failure: for (i = (nr - 1); i >= 0; i--) { bh = bhs[i]; if (!(flags & OCFS2_BH_READAHEAD)) { - if (status) { - /* Clear the rest of the buffers on error */ - put_bh(bh); - bhs[i] = NULL; + if (unlikely(status)) { + /* Clear the buffers on error including those + * ever succeeded in reading + */ + if (new_bh && bh) { + /* If middle bh fails, let previous bh + * finish its read and then put it to + * aovoid bh leak + */ + if (!buffer_jbd(bh)) + wait_on_buffer(bh); + put_bh(bh); + bhs[i] = NULL; + } else if (bh && buffer_uptodate(bh)) { + clear_buffer_uptodate(bh); + } continue; } /* We know this can't have changed as we hold the @@ -341,9 +387,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, * uptodate. */ status = -EIO; clear_buffer_needs_validate(bh); - put_bh(bh); - bhs[i] = NULL; - continue; + goto read_failure; } if (buffer_needs_validate(bh)) { @@ -353,11 +397,8 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, BUG_ON(buffer_jbd(bh)); clear_buffer_needs_validate(bh); status = validate(sb, bh); - if (status) { - put_bh(bh); - bhs[i] = NULL; - continue; - } + if (status) + goto read_failure; } } diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 9b984cae4c4e..1d6dc8422899 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -329,7 +329,7 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle) { char *buf; - buf = (char *) get_zeroed_page(GFP_NOFS); + buf = (char *) get_zeroed_page(GFP_ATOMIC); if (buf) { dump_mle(mle, buf, PAGE_SIZE - 1); free_page((unsigned long)buf); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 5193218f5889..e961015fb484 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -3422,7 +3422,7 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb, * we can recover correctly from node failure. Otherwise, we may get * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set. */ - if (!ocfs2_is_o2cb_active() && + if (ocfs2_userspace_stack(osb) && lockres->l_ops->flags & LOCK_TYPE_USES_LVB) lvb = 1; diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index ab30c005cc4b..9fa98abecfc6 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -290,7 +290,7 @@ static int ocfs2_info_scan_inode_alloc(struct ocfs2_super *osb, if (inode_alloc) inode_lock(inode_alloc); - if (o2info_coherent(&fi->ifi_req)) { + if (inode_alloc && o2info_coherent(&fi->ifi_req)) { status = ocfs2_inode_lock(inode_alloc, &bh, 0); if (status < 0) { mlog_errno(status); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index e5dcea6cee5f..39bb80fb2934 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -231,7 +231,8 @@ void ocfs2_recovery_exit(struct ocfs2_super *osb) /* At this point, we know that no more recovery threads can be * launched, so wait for any recovery completion work to * complete. */ - flush_workqueue(osb->ocfs2_wq); + if (osb->ocfs2_wq) + flush_workqueue(osb->ocfs2_wq); /* * Now that recovery is shut down, and the osb is about to be @@ -1017,7 +1018,8 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) mlog_errno(status); } - if (status == 0) { + /* Shutdown the kernel journal system */ + if (!jbd2_journal_destroy(journal->j_journal) && !status) { /* * Do not toggle if flush was unsuccessful otherwise * will leave dirty metadata in a "clean" journal @@ -1026,9 +1028,6 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) if (status < 0) mlog_errno(status); } - - /* Shutdown the kernel journal system */ - jbd2_journal_destroy(journal->j_journal); journal->j_journal = NULL; OCFS2_I(inode)->ip_open_count--; @@ -1081,6 +1080,14 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed) ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); + if (replayed) { + jbd2_journal_lock_updates(journal->j_journal); + status = jbd2_journal_flush(journal->j_journal); + jbd2_journal_unlock_updates(journal->j_journal); + if (status < 0) + mlog_errno(status); + } + status = ocfs2_journal_toggle_dirty(osb, 1, replayed); if (status < 0) { mlog_errno(status); diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 497a4171ef61..bfb50fc51528 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -637,9 +637,11 @@ static inline void ocfs2_update_inode_fsync_trans(handle_t *handle, { struct ocfs2_inode_info *oi = OCFS2_I(inode); - oi->i_sync_tid = handle->h_transaction->t_tid; - if (datasync) - oi->i_datasync_tid = handle->h_transaction->t_tid; + if (!is_handle_aborted(handle)) { + oi->i_sync_tid = handle->h_transaction->t_tid; + if (datasync) + oi->i_datasync_tid = handle->h_transaction->t_tid; + } } #endif /* OCFS2_JOURNAL_H */ diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 5d53d0d63d19..ea38677daa06 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -391,7 +391,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) struct ocfs2_dinode *alloc = NULL; cancel_delayed_work(&osb->la_enable_wq); - flush_workqueue(osb->ocfs2_wq); + if (osb->ocfs2_wq) + flush_workqueue(osb->ocfs2_wq); if (osb->local_alloc_state == OCFS2_LA_UNUSED) goto out; diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index f55f82ca3425..1565dd8e8856 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -25,6 +25,7 @@ #include "ocfs2_ioctl.h" #include "alloc.h" +#include "localalloc.h" #include "aops.h" #include "dlmglue.h" #include "extent_map.h" @@ -222,6 +223,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, struct ocfs2_refcount_tree *ref_tree = NULL; u32 new_phys_cpos, new_len; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); + int need_free = 0; if ((ext_flags & OCFS2_EXT_REFCOUNTED) && *len) { BUG_ON(!ocfs2_is_refcount_inode(inode)); @@ -312,6 +314,7 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, if (!partial) { context->range->me_flags &= ~OCFS2_MOVE_EXT_FL_COMPLETE; ret = -ENOSPC; + need_free = 1; goto out_commit; } } @@ -336,6 +339,20 @@ static int ocfs2_defrag_extent(struct ocfs2_move_extents_context *context, mlog_errno(ret); out_commit: + if (need_free && context->data_ac) { + struct ocfs2_alloc_context *data_ac = context->data_ac; + + if (context->data_ac->ac_which == OCFS2_AC_USE_LOCAL) + ocfs2_free_local_alloc_bits(osb, handle, data_ac, + new_phys_cpos, new_len); + else + ocfs2_free_clusters(handle, + data_ac->ac_inode, + data_ac->ac_bh, + ocfs2_clusters_to_blocks(osb->sb, new_phys_cpos), + new_len); + } + ocfs2_commit_trans(osb, handle); out_unlock_mutex: diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index b39d14cbfa34..d212d09c00b1 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -727,7 +727,7 @@ static int ocfs2_release_dquot(struct dquot *dquot) mutex_lock(&dquot->dq_lock); /* Check whether we are not racing with some other dqget() */ - if (atomic_read(&dquot->dq_count) > 1) + if (dquot_is_busy(dquot)) goto out; /* Running from downconvert thread? Postpone quota processing to wq */ if (current == osb->dc_task) { diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index d6c350ba25b9..c4b029c43464 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -48,12 +48,6 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl"; */ static struct ocfs2_stack_plugin *active_stack; -inline int ocfs2_is_o2cb_active(void) -{ - return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB); -} -EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active); - static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name) { struct ocfs2_stack_plugin *p; diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index e3036e1790e8..f2dce10fae54 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -298,9 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin); void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin); -/* In ocfs2_downconvert_lock(), we need to know which stack we are using */ -int ocfs2_is_o2cb_active(void); - extern struct kset *ocfs2_kset; #endif /* STACKGLUE_H */ diff --git a/fs/open.c b/fs/open.c index 65fc76706d2a..3b62683fe332 100644 --- a/fs/open.c +++ b/fs/open.c @@ -836,9 +836,6 @@ cleanup_file: * the return value of d_splice_alias(), then the caller needs to perform dput() * on it after finish_open(). * - * On successful return @file is a fully instantiated open file. After this, if - * an error occurs in ->atomic_open(), it needs to clean up with fput(). - * * Returns zero on success or -errno if the open failed. */ int finish_open(struct file *file, struct dentry *dentry, diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index 1c59dff530de..34d1cc98260d 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -305,6 +305,7 @@ static void *help_start(struct seq_file *m, loff_t *pos) static void *help_next(struct seq_file *m, void *v, loff_t *pos) { + (*pos)++; gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n"); return NULL; diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c index 079a465796f3..bc56df2ae705 100644 --- a/fs/orangefs/orangefs-sysfs.c +++ b/fs/orangefs/orangefs-sysfs.c @@ -323,7 +323,7 @@ static ssize_t sysfs_service_op_show(struct kobject *kobj, /* Can't do a service_operation if the client is not running... */ rc = is_daemon_in_service(); if (rc) { - pr_info("%s: Client not running :%d:\n", + pr_info_ratelimited("%s: Client not running :%d:\n", __func__, is_daemon_in_service()); goto out; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index f2bfc9b15c37..b075fc2fbbfe 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -469,7 +469,7 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, bool origin) { int err; - const struct cred *old_cred; + const struct cred *old_cred, *hold_cred = NULL; struct cred *override_cred; struct dentry *parent = dentry->d_parent; @@ -504,7 +504,7 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, goto out_revert_creds; } } - put_cred(override_creds(override_cred)); + hold_cred = override_creds(override_cred); put_cred(override_cred); if (!ovl_dentry_is_whiteout(dentry)) @@ -515,7 +515,9 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, hardlink); } out_revert_creds: - ovl_revert_creds(old_cred); + ovl_revert_creds(old_cred ?: hold_cred); + if (old_cred && hold_cred) + put_cred(hold_cred); if (!err) { struct inode *realinode = d_inode(ovl_dentry_upper(dentry)); @@ -1043,7 +1045,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, if (newdentry == trap) goto out_dput; - if (WARN_ON(olddentry->d_inode == newdentry->d_inode)) + if (olddentry->d_inode == newdentry->d_inode) goto out_dput; err = 0; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 44f75736f29d..ce48ed6ba052 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -244,6 +244,21 @@ out: return err; } +int __ovl_xattr_get(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size) +{ + ssize_t res; + const struct cred *old_cred; + struct dentry *realdentry = + ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry); + + old_cred = ovl_override_creds(dentry->d_sb); + res = __vfs_getxattr(realdentry, d_inode(realdentry), name, value, + size); + ovl_revert_creds(old_cred); + return res; +} + int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { @@ -524,12 +539,13 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry, int nlink_diff; int nlink; char buf[13]; - int err; + ssize_t err; if (!lowerdentry || !upperdentry || d_inode(lowerdentry)->i_nlink == 1) return fallback; - err = vfs_getxattr(upperdentry, OVL_XATTR_NLINK, &buf, sizeof(buf) - 1); + err = ovl_vfs_getxattr(upperdentry, OVL_XATTR_NLINK, + &buf, sizeof(buf) - 1); if (err < 0) goto fail; @@ -551,7 +567,7 @@ unsigned int ovl_get_nlink(struct dentry *lowerdentry, return nlink; fail: - pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%i)\n", + pr_warn_ratelimited("overlayfs: failed to get index nlink (%pd2, err=%zi)\n", upperdentry, err); return fallback; } diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 2f08467880cf..6071196a7b63 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -29,10 +29,10 @@ struct ovl_lookup_data { static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, size_t prelen, const char *post) { - int res; + ssize_t res; char *s, *next, *buf = NULL; - res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0); + res = ovl_vfs_getxattr(dentry, OVL_XATTR_REDIRECT, NULL, 0); if (res < 0) { if (res == -ENODATA || res == -EOPNOTSUPP) return 0; @@ -45,7 +45,7 @@ static int ovl_check_redirect(struct dentry *dentry, struct ovl_lookup_data *d, if (res == 0) goto invalid; - res = vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res); + res = ovl_vfs_getxattr(dentry, OVL_XATTR_REDIRECT, buf, res); if (res < 0) goto fail; if (res == 0) @@ -85,7 +85,7 @@ err_free: kfree(buf); return 0; fail: - pr_warn_ratelimited("overlayfs: failed to get redirect (%i)\n", res); + pr_warn_ratelimited("overlayfs: failed to get redirect (%zi)\n", res); goto err_free; invalid: pr_warn_ratelimited("overlayfs: invalid redirect (%s)\n", buf); @@ -99,10 +99,10 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry) static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) { - int res; + ssize_t res; struct ovl_fh *fh = NULL; - res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); + res = ovl_vfs_getxattr(dentry, OVL_XATTR_ORIGIN, NULL, 0); if (res < 0) { if (res == -ENODATA || res == -EOPNOTSUPP) return NULL; @@ -116,7 +116,7 @@ static struct ovl_fh *ovl_get_origin_fh(struct dentry *dentry) if (!fh) return ERR_PTR(-ENOMEM); - res = vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res); + res = ovl_vfs_getxattr(dentry, OVL_XATTR_ORIGIN, fh, res); if (res < 0) goto fail; @@ -142,10 +142,11 @@ out: return NULL; fail: - pr_warn_ratelimited("overlayfs: failed to get origin (%i)\n", res); + pr_warn_ratelimited("overlayfs: failed to get origin (%zi)\n", res); goto out; invalid: - pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", res, fh); + pr_warn_ratelimited("overlayfs: invalid origin (%*phN)\n", + (int)res, fh); goto out; } diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index c0307da67a52..fdf83a10a9d3 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -188,6 +188,8 @@ void ovl_drop_write(struct dentry *dentry); struct dentry *ovl_workdir(struct dentry *dentry); const struct cred *ovl_override_creds(struct super_block *sb); void ovl_revert_creds(const struct cred *oldcred); +ssize_t ovl_vfs_getxattr(struct dentry *dentry, const char *name, void *buf, + size_t size); struct super_block *ovl_same_sb(struct super_block *sb); bool ovl_can_decode_fh(struct super_block *sb); struct dentry *ovl_indexdir(struct super_block *sb); @@ -280,6 +282,8 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name, const void *value, size_t size, int flags); int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size); +int __ovl_xattr_get(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); struct posix_acl *ovl_get_acl(struct inode *inode, int type); int ovl_open_maybe_copy_up(struct dentry *dentry, unsigned int file_flags); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 46cb8314f903..01cd5a2cbe35 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -733,6 +733,14 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler, return ovl_xattr_get(dentry, inode, handler->name, buffer, size); } +static int __maybe_unused +__ovl_posix_acl_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return __ovl_xattr_get(dentry, inode, handler->name, buffer, size); +} + static int __maybe_unused ovl_posix_acl_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, @@ -813,6 +821,13 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler, return ovl_xattr_get(dentry, inode, name, buffer, size); } +static int __ovl_other_xattr_get(const struct xattr_handler *handler, + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) +{ + return __ovl_xattr_get(dentry, inode, name, buffer, size); +} + static int ovl_other_xattr_set(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *value, @@ -826,6 +841,7 @@ ovl_posix_acl_access_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_ACCESS, .flags = ACL_TYPE_ACCESS, .get = ovl_posix_acl_xattr_get, + .__get = __ovl_posix_acl_xattr_get, .set = ovl_posix_acl_xattr_set, }; @@ -834,6 +850,7 @@ ovl_posix_acl_default_xattr_handler = { .name = XATTR_NAME_POSIX_ACL_DEFAULT, .flags = ACL_TYPE_DEFAULT, .get = ovl_posix_acl_xattr_get, + .__get = __ovl_posix_acl_xattr_get, .set = ovl_posix_acl_xattr_set, }; @@ -846,6 +863,7 @@ static const struct xattr_handler ovl_own_xattr_handler = { static const struct xattr_handler ovl_other_xattr_handler = { .prefix = "", /* catch all */ .get = ovl_other_xattr_get, + .__get = __ovl_other_xattr_get, .set = ovl_other_xattr_set, }; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index ce179c2ce616..e2244c134c9c 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -52,6 +52,12 @@ void ovl_revert_creds(const struct cred *old_cred) revert_creds(old_cred); } +ssize_t ovl_vfs_getxattr(struct dentry *dentry, const char *name, void *buf, + size_t size) +{ + return __vfs_getxattr(dentry, d_inode(dentry), name, buf, size); +} + struct super_block *ovl_same_sb(struct super_block *sb) { struct ovl_fs *ofs = sb->s_fs_info; @@ -339,13 +345,13 @@ void ovl_copy_up_end(struct dentry *dentry) bool ovl_check_dir_xattr(struct dentry *dentry, const char *name) { - int res; + ssize_t res; char val; if (!d_is_dir(dentry)) return false; - res = vfs_getxattr(dentry, name, &val, 1); + res = ovl_vfs_getxattr(dentry, name, &val, 1); if (res == 1 && val == 'y') return true; diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index d9d50d3af33b..585651de1f5b 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -50,6 +50,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) long cached; long available; unsigned long pages[NR_LRU_LISTS]; + unsigned long sreclaimable, sunreclaim; int lru; si_meminfo(&i); @@ -65,6 +66,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v) pages[lru] = global_node_page_state(NR_LRU_BASE + lru); available = si_mem_available(); + sreclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE); + sunreclaim = global_node_page_state(NR_SLAB_UNRECLAIMABLE); show_val_kb(m, "MemTotal: ", i.totalram); show_val_kb(m, "MemFree: ", i.freeram); @@ -106,16 +109,17 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "Mapped: ", global_node_page_state(NR_FILE_MAPPED)); show_val_kb(m, "Shmem: ", i.sharedram); - show_val_kb(m, "Slab: ", - global_node_page_state(NR_SLAB_RECLAIMABLE) + - global_node_page_state(NR_SLAB_UNRECLAIMABLE)); - - show_val_kb(m, "SReclaimable: ", - global_node_page_state(NR_SLAB_RECLAIMABLE)); - show_val_kb(m, "SUnreclaim: ", - global_node_page_state(NR_SLAB_UNRECLAIMABLE)); + show_val_kb(m, "KReclaimable: ", sreclaimable + + global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE)); + show_val_kb(m, "Slab: ", sreclaimable + sunreclaim); + show_val_kb(m, "SReclaimable: ", sreclaimable); + show_val_kb(m, "SUnreclaim: ", sunreclaim); seq_printf(m, "KernelStack: %8lu kB\n", global_zone_page_state(NR_KERNEL_STACK_KB)); +#ifdef CONFIG_SHADOW_CALL_STACK + seq_printf(m, "ShadowCallStack:%8lu kB\n", + global_zone_page_state(NR_KERNEL_SCS_BYTES) / 1024); +#endif show_val_kb(m, "PageTables: ", global_zone_page_state(NR_PAGETABLE)); #ifdef CONFIG_QUICKLIST diff --git a/fs/proc/page.c b/fs/proc/page.c index 1491918a33c3..0c952c217118 100644 --- a/fs/proc/page.c +++ b/fs/proc/page.c @@ -42,10 +42,12 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf, return -EINVAL; while (count > 0) { - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - else - ppage = NULL; + /* + * TODO: ZONE_DEVICE support requires to identify + * memmaps that were actually initialized. + */ + ppage = pfn_to_online_page(pfn); + if (!ppage || PageSlab(ppage)) pcount = 0; else @@ -214,10 +216,11 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf, return -EINVAL; while (count > 0) { - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - else - ppage = NULL; + /* + * TODO: ZONE_DEVICE support requires to identify + * memmaps that were actually initialized. + */ + ppage = pfn_to_online_page(pfn); if (put_user(stable_page_flags(ppage), out)) { ret = -EFAULT; @@ -259,10 +262,11 @@ static ssize_t kpagecgroup_read(struct file *file, char __user *buf, return -EINVAL; while (count > 0) { - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - else - ppage = NULL; + /* + * TODO: ZONE_DEVICE support requires to identify + * memmaps that were actually initialized. + */ + ppage = pfn_to_online_page(pfn); if (ppage) ino = page_cgroup_ino(ppage); diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 885d445afa0d..ce400f97370d 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -164,6 +164,16 @@ int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, return remap_pfn_range(vma, from, pfn, size, prot); } +/* + * Architectures which support memory encryption override this. + */ +ssize_t __weak +copy_oldmem_page_encrypted(unsigned long pfn, char *buf, size_t csize, + unsigned long offset, int userbuf) +{ + return copy_oldmem_page(pfn, buf, csize, offset, userbuf); +} + /* * Copy to either kernel or user space */ diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index eb7b1dcfeac8..1cf9889ca5d0 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -321,6 +321,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record) GFP_KERNEL); if (!tmp_prz) return -ENOMEM; + prz = tmp_prz; free_prz = true; while (cxt->ftrace_read_cnt < cxt->max_ftrace_cnt) { @@ -343,7 +344,6 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record) goto out; } record->id = 0; - prz = tmp_prz; } } @@ -465,6 +465,17 @@ static int notrace ramoops_pstore_write(struct pstore_record *record) prz = cxt->dprzs[cxt->dump_write_cnt]; + /* + * Since this is a new crash dump, we need to reset the buffer in + * case it still has an old dump present. Without this, the new dump + * will get appended, which would seriously confuse anything trying + * to check dump file contents. Specifically, ramoops_read_kmsg_hdr() + * expects to find a dump header in the beginning of buffer data, so + * we must to reset the buffer values, in order to ensure that the + * header will be written to the beginning of the buffer. + */ + persistent_ram_zap(prz); + /* Build header and append record contents. */ hlen = ramoops_write_kmsg_hdr(prz, record); size = record->size; diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9c81fd973418..30f5da8f4aff 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -491,7 +491,7 @@ int dquot_release(struct dquot *dquot) mutex_lock(&dquot->dq_lock); /* Check whether we are not racing with some other dqget() */ - if (atomic_read(&dquot->dq_count) > 1) + if (dquot_is_busy(dquot)) goto out_dqlock; if (dqopt->ops[dquot->dq_id.type]->release_dqblk) { ret = dqopt->ops[dquot->dq_id.type]->release_dqblk(dquot); @@ -617,7 +617,7 @@ EXPORT_SYMBOL(dquot_scan_active); /* Write all dquot structures to quota files */ int dquot_writeback_dquots(struct super_block *sb, int type) { - struct list_head *dirty; + struct list_head dirty; struct dquot *dquot; struct quota_info *dqopt = sb_dqopt(sb); int cnt; @@ -631,9 +631,10 @@ int dquot_writeback_dquots(struct super_block *sb, int type) if (!sb_has_quota_active(sb, cnt)) continue; spin_lock(&dq_list_lock); - dirty = &dqopt->info[cnt].dqi_dirty_list; - while (!list_empty(dirty)) { - dquot = list_first_entry(dirty, struct dquot, + /* Move list away to avoid livelock. */ + list_replace_init(&dqopt->info[cnt].dqi_dirty_list, &dirty); + while (!list_empty(&dirty)) { + dquot = list_first_entry(&dirty, struct dquot, dq_dirty); WARN_ON(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags)); @@ -975,6 +976,7 @@ static int add_dquot_ref(struct super_block *sb, int type) * later. */ old_inode = inode; + cond_resched(); spin_lock(&sb->s_inode_list_lock); } spin_unlock(&sb->s_inode_list_lock); @@ -2848,68 +2850,73 @@ EXPORT_SYMBOL(dquot_quotactl_sysfile_ops); static int do_proc_dqstats(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - unsigned int type = (int *)table->data - dqstats.stat; + unsigned int type = (unsigned long *)table->data - dqstats.stat; + s64 value = percpu_counter_sum(&dqstats.counter[type]); + + /* Filter negative values for non-monotonic counters */ + if (value < 0 && (type == DQST_ALLOC_DQUOTS || + type == DQST_FREE_DQUOTS)) + value = 0; /* Update global table */ - dqstats.stat[type] = - percpu_counter_sum_positive(&dqstats.counter[type]); - return proc_dointvec(table, write, buffer, lenp, ppos); + dqstats.stat[type] = value; + return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } static struct ctl_table fs_dqstats_table[] = { { .procname = "lookups", .data = &dqstats.stat[DQST_LOOKUPS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "drops", .data = &dqstats.stat[DQST_DROPS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "reads", .data = &dqstats.stat[DQST_READS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "writes", .data = &dqstats.stat[DQST_WRITES], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "cache_hits", .data = &dqstats.stat[DQST_CACHE_HITS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "allocated_dquots", .data = &dqstats.stat[DQST_ALLOC_DQUOTS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "free_dquots", .data = &dqstats.stat[DQST_FREE_DQUOTS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, { .procname = "syncs", .data = &dqstats.stat[DQST_SYNCS], - .maxlen = sizeof(int), + .maxlen = sizeof(unsigned long), .mode = 0444, .proc_handler = do_proc_dqstats, }, diff --git a/fs/read_write.c b/fs/read_write.c index d9368db7f611..0da6e4f19d7f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1713,6 +1713,34 @@ static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) return security_file_permission(file, write ? MAY_WRITE : MAY_READ); } +/* + * Ensure that we don't remap a partial EOF block in the middle of something + * else. Assume that the offsets have already been checked for block + * alignment. + * + * For deduplication we always scale down to the previous block because we + * can't meaningfully compare post-EOF contents. + * + * For clone we only link a partial EOF block above the destination file's EOF. + */ +static int generic_remap_check_len(struct inode *inode_in, + struct inode *inode_out, + loff_t pos_out, + u64 *len, + bool is_dedupe) +{ + u64 blkmask = i_blocksize(inode_in) - 1; + + if ((*len & blkmask) == 0) + return 0; + + if (is_dedupe) + *len &= ~blkmask; + else if (pos_out + *len < i_size_read(inode_out)) + return -EINVAL; + + return 0; +} /* * Check that the two inodes are eligible for cloning, the ranges make @@ -1819,6 +1847,11 @@ int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in, return -EBADE; } + ret = generic_remap_check_len(inode_in, inode_out, pos_out, len, + is_dedupe); + if (ret) + return ret; + return 1; } EXPORT_SYMBOL(vfs_clone_file_prep_inodes); diff --git a/fs/readdir.c b/fs/readdir.c index d336db65a33e..0c357663e33a 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -65,6 +65,40 @@ out: } EXPORT_SYMBOL(iterate_dir); +/* + * POSIX says that a dirent name cannot contain NULL or a '/'. + * + * It's not 100% clear what we should really do in this case. + * The filesystem is clearly corrupted, but returning a hard + * error means that you now don't see any of the other names + * either, so that isn't a perfect alternative. + * + * And if you return an error, what error do you use? Several + * filesystems seem to have decided on EUCLEAN being the error + * code for EFSCORRUPTED, and that may be the error to use. Or + * just EIO, which is perhaps more obvious to users. + * + * In order to see the other file names in the directory, the + * caller might want to make this a "soft" error: skip the + * entry, and return the error at the end instead. + * + * Note that this should likely do a "memchr(name, 0, len)" + * check too, since that would be filesystem corruption as + * well. However, that case can't actually confuse user space, + * which has to do a strlen() on the name anyway to find the + * filename length, and the above "soft error" worry means + * that it's probably better left alone until we have that + * issue clarified. + */ +static int verify_dirent_name(const char *name, int len) +{ + if (!len) + return -EIO; + if (memchr(name, '/', len)) + return -EIO; + return 0; +} + /* * Traditional linux readdir() handling.. * @@ -174,6 +208,9 @@ static int filldir(struct dir_context *ctx, const char *name, int namlen, int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, sizeof(long)); + buf->error = verify_dirent_name(name, namlen); + if (unlikely(buf->error)) + return buf->error; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; @@ -260,6 +297,9 @@ static int filldir64(struct dir_context *ctx, const char *name, int namlen, int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, sizeof(u64)); + buf->error = verify_dirent_name(name, namlen); + if (unlikely(buf->error)) + return buf->error; buf->error = -EINVAL; /* only used if we fail.. */ if (reclen > buf->count) return -EINVAL; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 11a48affa882..683496322aa8 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2096,6 +2096,15 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, goto out_inserted_sd; } + /* + * Mark it private if we're creating the privroot + * or something under it. + */ + if (IS_PRIVATE(dir) || dentry == REISERFS_SB(sb)->priv_root) { + inode->i_flags |= S_PRIVATE; + inode->i_opflags &= ~IOP_XATTR; + } + if (reiserfs_posixacl(inode->i_sb)) { reiserfs_write_unlock(inode->i_sb); retval = reiserfs_inherit_default_acl(th, dir, dentry, inode); @@ -2110,8 +2119,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, reiserfs_warning(inode->i_sb, "jdm-13090", "ACLs aren't enabled in the fs, " "but vfs thinks they are!"); - } else if (IS_PRIVATE(dir)) - inode->i_flags |= S_PRIVATE; + } if (security->name) { reiserfs_write_unlock(inode->i_sb); diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 5089dac02660..14ba7a12b89d 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -377,10 +377,13 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, /* * Propagate the private flag so we know we're - * in the priv tree + * in the priv tree. Also clear IOP_XATTR + * since we don't have xattrs on xattr files. */ - if (IS_PRIVATE(dir)) + if (IS_PRIVATE(dir)) { inode->i_flags |= S_PRIVATE; + inode->i_opflags &= ~IOP_XATTR; + } } reiserfs_write_unlock(dir->i_sb); if (retval == IO_ERROR) { diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index eabf85371ece..0efe7c7c4124 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -1168,6 +1168,8 @@ static inline int bmap_would_wrap(unsigned bmap_nr) return bmap_nr > ((1LL << 16) - 1); } +extern const struct xattr_handler *reiserfs_xattr_handlers[]; + /* * this says about version of key of all items (but stat data) the * object consists of diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 0037aea97d39..2946713cb00d 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -2250,7 +2250,8 @@ error_out: /* also releases the path */ unfix_nodes(&s_ins_balance); #ifdef REISERQUOTA_DEBUG - reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, + if (inode) + reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(ih)); #endif diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 4885c7b6e44f..9caf3948417c 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -629,6 +629,7 @@ static void reiserfs_put_super(struct super_block *s) reiserfs_write_unlock(s); mutex_destroy(&REISERFS_SB(s)->lock); destroy_workqueue(REISERFS_SB(s)->commit_wq); + kfree(REISERFS_SB(s)->s_jdev); kfree(s->s_fs_info); s->s_fs_info = NULL; } @@ -1953,7 +1954,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (!sbi->s_jdev) { SWARN(silent, s, "", "Cannot allocate memory for " "journal device name"); - goto error; + goto error_unlocked; } } #ifdef CONFIG_QUOTA @@ -2052,6 +2053,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) if (replay_only(s)) goto error_unlocked; + s->s_xattr = reiserfs_xattr_handlers; + if (bdev_read_only(s->s_bdev) && !sb_rdonly(s)) { SWARN(silent, s, "clm-7000", "Detected readonly device, marking FS readonly"); @@ -2241,6 +2244,7 @@ error_unlocked: kfree(qf_names[j]); } #endif + kfree(sbi->s_jdev); kfree(sbi); s->s_fs_info = NULL; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 83423192588c..28f6daf371d3 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -122,13 +122,13 @@ static struct dentry *open_xa_root(struct super_block *sb, int flags) struct dentry *xaroot; if (d_really_is_negative(privroot)) - return ERR_PTR(-ENODATA); + return ERR_PTR(-EOPNOTSUPP); inode_lock_nested(d_inode(privroot), I_MUTEX_XATTR); xaroot = dget(REISERFS_SB(sb)->xattr_root); if (!xaroot) - xaroot = ERR_PTR(-ENODATA); + xaroot = ERR_PTR(-EOPNOTSUPP); else if (d_really_is_negative(xaroot)) { int err = -ENODATA; @@ -319,8 +319,12 @@ static int reiserfs_for_each_xattr(struct inode *inode, out_dir: dput(dir); out: - /* -ENODATA isn't an error */ - if (err == -ENODATA) + /* + * -ENODATA: this object doesn't have any xattrs + * -EOPNOTSUPP: this file system doesn't have xattrs enabled on disk. + * Neither are errors + */ + if (err == -ENODATA || err == -EOPNOTSUPP) err = 0; return err; } @@ -610,6 +614,10 @@ int reiserfs_xattr_set(struct inode *inode, const char *name, int error, error2; size_t jbegin_count = reiserfs_xattr_nblocks(inode, buffer_size); + /* Check before we start a transaction and then do nothing. */ + if (!d_really_is_positive(REISERFS_SB(inode->i_sb)->priv_root)) + return -EOPNOTSUPP; + if (!(flags & XATTR_REPLACE)) jbegin_count += reiserfs_xattr_jcreate_nblocks(inode); @@ -832,8 +840,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) if (d_really_is_negative(dentry)) return -EINVAL; - if (!dentry->d_sb->s_xattr || - get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) + if (get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) return -EOPNOTSUPP; dir = open_xa_dir(d_inode(dentry), XATTR_REPLACE); @@ -873,6 +880,7 @@ static int create_privroot(struct dentry *dentry) } d_inode(dentry)->i_flags |= S_PRIVATE; + d_inode(dentry)->i_opflags &= ~IOP_XATTR; reiserfs_info(dentry->d_sb, "Created %s - reserved for xattr " "storage.\n", PRIVROOT_NAME); @@ -886,7 +894,7 @@ static int create_privroot(struct dentry *dentry) { return 0; } #endif /* Actual operations that are exported to VFS-land */ -static const struct xattr_handler *reiserfs_xattr_handlers[] = { +const struct xattr_handler *reiserfs_xattr_handlers[] = { #ifdef CONFIG_REISERFS_FS_XATTR &reiserfs_xattr_user_handler, &reiserfs_xattr_trusted_handler, @@ -957,8 +965,10 @@ int reiserfs_lookup_privroot(struct super_block *s) if (!IS_ERR(dentry)) { REISERFS_SB(s)->priv_root = dentry; d_set_d_op(dentry, &xattr_lookup_poison_ops); - if (d_really_is_positive(dentry)) + if (d_really_is_positive(dentry)) { d_inode(dentry)->i_flags |= S_PRIVATE; + d_inode(dentry)->i_opflags &= ~IOP_XATTR; + } } else err = PTR_ERR(dentry); inode_unlock(d_inode(s->s_root)); @@ -987,7 +997,6 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) } if (d_really_is_positive(privroot)) { - s->s_xattr = reiserfs_xattr_handlers; inode_lock(d_inode(privroot)); if (!REISERFS_SB(s)->xattr_root) { struct dentry *dentry; diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index aa9380bac196..05f666794561 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -320,10 +320,8 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, * would be useless since permissions are ignored, and a pain because * it introduces locking cycles */ - if (IS_PRIVATE(dir)) { - inode->i_flags |= S_PRIVATE; + if (IS_PRIVATE(inode)) goto apply_umask; - } err = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); if (err) diff --git a/fs/sdcardfs/inode.c b/fs/sdcardfs/inode.c index aeedd17e0a7c..d6a256ac2ffe 100644 --- a/fs/sdcardfs/inode.c +++ b/fs/sdcardfs/inode.c @@ -87,6 +87,9 @@ static int sdcardfs_create(struct inode *dir, struct dentry *dentry, lower_dentry_mnt = lower_path.mnt; lower_parent_dentry = lock_parent(lower_dentry); + if (d_is_positive(lower_dentry)) + return -EEXIST; + /* set last 16bytes of mode field to 0664 */ mode = (mode & S_IFMT) | 00664; diff --git a/fs/sdcardfs/lookup.c b/fs/sdcardfs/lookup.c index bc5db71efa0b..886aee279920 100644 --- a/fs/sdcardfs/lookup.c +++ b/fs/sdcardfs/lookup.c @@ -257,7 +257,6 @@ static struct dentry *__sdcardfs_lookup(struct dentry *dentry, struct dentry *lower_dentry; const struct qstr *name; struct path lower_path; - struct qstr dname; struct dentry *ret_dentry = NULL; struct sdcardfs_sb_info *sbi; @@ -316,6 +315,7 @@ put_name: /* no error: handle positive dentries */ if (!err) { +found: /* check if the dentry is an obb dentry * if true, the lower_inode must be replaced with * the inode of the graft path @@ -362,29 +362,26 @@ put_name: if (err && err != -ENOENT) goto out; - /* instatiate a new negative dentry */ - dname.name = name->name; - dname.len = name->len; - - /* See if the low-level filesystem might want - * to use its own hash - */ - lower_dentry = d_hash_and_lookup(lower_dir_dentry, &dname); - - if (IS_ERR(lower_dentry)) - return lower_dentry; - - if (!lower_dentry) { - /* We called vfs_path_lookup earlier, and did not get a negative - * dentry then. Don't confuse the lower filesystem by forcing - * one on it now... - */ - err = -ENOENT; + /* get a (very likely) new negative dentry */ + lower_dentry = lookup_one_len_unlocked(name->name, + lower_dir_dentry, name->len); + if (IS_ERR(lower_dentry)) { + err = PTR_ERR(lower_dentry); goto out; } lower_path.dentry = lower_dentry; lower_path.mnt = mntget(lower_dir_mnt); + + /* + * Check if someone sneakily filled in the dentry when + * we weren't looking. We'll check again in create. + */ + if (unlikely(d_inode_rcu(lower_dentry))) { + err = 0; + goto found; + } + sdcardfs_set_lower_path(dentry, &lower_path); /* diff --git a/fs/sdcardfs/main.c b/fs/sdcardfs/main.c index d890c5711907..19312ee881cc 100644 --- a/fs/sdcardfs/main.c +++ b/fs/sdcardfs/main.c @@ -19,6 +19,7 @@ */ #include "sdcardfs.h" +#include #include #include #include @@ -374,6 +375,9 @@ static int sdcardfs_read_super(struct vfsmount *mnt, struct super_block *sb, list_add(&sb_info->list, &sdcardfs_super_list); mutex_unlock(&sdcardfs_super_list_lock); + sb_info->fscrypt_nb.notifier_call = sdcardfs_on_fscrypt_key_removed; + fscrypt_register_key_removal_notifier(&sb_info->fscrypt_nb); + if (!silent) pr_info("sdcardfs: mounted on top of %s type %s\n", dev_name, lower_sb->s_type->name); @@ -444,6 +448,9 @@ void sdcardfs_kill_sb(struct super_block *sb) if (sb->s_magic == SDCARDFS_SUPER_MAGIC && sb->s_fs_info) { sbi = SDCARDFS_SB(sb); + + fscrypt_unregister_key_removal_notifier(&sbi->fscrypt_nb); + mutex_lock(&sdcardfs_super_list_lock); list_del(&sbi->list); mutex_unlock(&sdcardfs_super_list_lock); diff --git a/fs/sdcardfs/sdcardfs.h b/fs/sdcardfs/sdcardfs.h index 6219771ed71c..c9d855da629f 100644 --- a/fs/sdcardfs/sdcardfs.h +++ b/fs/sdcardfs/sdcardfs.h @@ -150,6 +150,8 @@ extern struct inode *sdcardfs_iget(struct super_block *sb, struct inode *lower_inode, userid_t id); extern int sdcardfs_interpose(struct dentry *dentry, struct super_block *sb, struct path *lower_path, userid_t id); +extern int sdcardfs_on_fscrypt_key_removed(struct notifier_block *nb, + unsigned long action, void *data); /* file private data */ struct sdcardfs_file_info { @@ -223,6 +225,7 @@ struct sdcardfs_sb_info { struct path obbpath; void *pkgl_id; struct list_head list; + struct notifier_block fscrypt_nb; }; /* diff --git a/fs/sdcardfs/super.c b/fs/sdcardfs/super.c index bd2eb0257020..5e7ceae0f9dd 100644 --- a/fs/sdcardfs/super.c +++ b/fs/sdcardfs/super.c @@ -319,6 +319,23 @@ static int sdcardfs_show_options(struct vfsmount *mnt, struct seq_file *m, return 0; }; +int sdcardfs_on_fscrypt_key_removed(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct sdcardfs_sb_info *sbi = container_of(nb, struct sdcardfs_sb_info, + fscrypt_nb); + + /* + * Evict any unused sdcardfs dentries (and hence any unused sdcardfs + * inodes, since sdcardfs doesn't cache unpinned inodes by themselves) + * so that the lower filesystem's encrypted inodes can be evicted. + * This is needed to make the FS_IOC_REMOVE_ENCRYPTION_KEY ioctl + * properly "lock" the files underneath the sdcardfs mount. + */ + shrink_dcache_sb(sbi->sb); + return NOTIFY_OK; +} + const struct super_operations sdcardfs_sops = { .put_super = sdcardfs_put_super, .statfs = sdcardfs_statfs, diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig index dfc6fdf019d7..fe221d7d99d6 100644 --- a/fs/ubifs/Kconfig +++ b/fs/ubifs/Kconfig @@ -7,6 +7,7 @@ config UBIFS_FS select CRYPTO if UBIFS_FS_ZLIB select CRYPTO_LZO if UBIFS_FS_LZO select CRYPTO_DEFLATE if UBIFS_FS_ZLIB + select FS_ENCRYPTION_ALGS if FS_ENCRYPTION depends on MTD_UBI help UBIFS is a file system for flash devices which works on top of UBI. diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 8464a3dd3107..7d5c2cf95353 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -208,6 +208,7 @@ static int dbg_check_name(const struct ubifs_info *c, return 0; } +static void ubifs_set_d_ops(struct inode *dir, struct dentry *dentry); static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { @@ -221,6 +222,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino); err = fscrypt_prepare_lookup(dir, dentry, &nm); + ubifs_set_d_ops(dir, dentry); if (err == -ENOENT) return d_splice_alias(NULL, dentry); if (err) @@ -237,9 +239,9 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, goto out_fname; } - if (nm.hash) { - ubifs_assert(fname_len(&nm) == 0); - ubifs_assert(fname_name(&nm) == NULL); + if (fname_name(&nm) == NULL) { + if (nm.hash & ~UBIFS_S_KEY_HASH_MASK) + goto done; /* ENOENT */ dent_key_init_hash(c, &key, dir->i_ino, nm.hash); err = ubifs_tnc_lookup_dh(c, &key, dent, nm.minor_hash); } else { @@ -537,7 +539,7 @@ static int ubifs_readdir(struct file *file, struct dir_context *ctx) if (encrypted) { err = fscrypt_get_encryption_info(dir); - if (err && err != -ENOKEY) + if (err) return err; err = fscrypt_fname_alloc_buffer(dir, UBIFS_MAX_NLEN, &fstr); @@ -1684,3 +1686,19 @@ const struct file_operations ubifs_dir_operations = { .compat_ioctl = ubifs_compat_ioctl, #endif }; + +#ifdef CONFIG_FS_ENCRYPTION +static const struct dentry_operations ubifs_encrypted_dentry_ops = { + .d_revalidate = fscrypt_d_revalidate, +}; +#endif + +static void ubifs_set_d_ops(struct inode *dir, struct dentry *dentry) +{ +#ifdef CONFIG_FS_ENCRYPTION + if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) { + d_set_d_op(dentry, &ubifs_encrypted_dentry_ops); + return; + } +#endif +} diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index ff44fd90d51b..76fff889a2f7 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -797,7 +797,9 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu, if (page_offset > end_index) break; - page = find_or_create_page(mapping, page_offset, ra_gfp_mask); + page = pagecache_get_page(mapping, page_offset, + FGP_LOCK|FGP_ACCESSED|FGP_CREAT|FGP_NOWAIT, + ra_gfp_mask); if (!page) break; if (!PageUptodate(page)) diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 71c344001574..203067d39855 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -28,6 +28,11 @@ #include #include "ubifs.h" +/* Need to be kept consistent with checked flags in ioctl2ubifs() */ +#define UBIFS_SUPPORTED_IOCTL_FLAGS \ + (FS_COMPR_FL | FS_SYNC_FL | FS_APPEND_FL | \ + FS_IMMUTABLE_FL | FS_DIRSYNC_FL) + /** * ubifs_set_inode_flags - set VFS inode flags. * @inode: VFS inode to set flags for @@ -127,7 +132,8 @@ static int setflags(struct inode *inode, int flags) } } - ui->flags = ioctl2ubifs(flags); + ui->flags &= ~ioctl2ubifs(UBIFS_SUPPORTED_IOCTL_FLAGS); + ui->flags |= ioctl2ubifs(flags); ubifs_set_inode_flags(inode); inode->i_ctime = current_time(inode); release = ui->dirty; @@ -169,6 +175,9 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (get_user(flags, (int __user *) arg)) return -EFAULT; + if (flags & ~UBIFS_SUPPORTED_IOCTL_FLAGS) + return -EOPNOTSUPP; + if (!S_ISDIR(inode->i_mode)) flags &= ~FS_DIRSYNC_FL; diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 69051f7a9606..99d31a64f2a3 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -583,7 +583,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, if (!xent) { dent->ch.node_type = UBIFS_DENT_NODE; - if (nm->hash) + if (fname_name(nm) == NULL) dent_key_init_hash(c, &dent_key, dir->i_ino, nm->hash); else dent_key_init(c, &dent_key, dir->i_ino, nm); @@ -630,7 +630,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, kfree(dent); if (deletion) { - if (nm->hash) + if (fname_name(nm) == NULL) err = ubifs_tnc_remove_dh(c, &dent_key, nm->minor_hash); else err = ubifs_tnc_remove_nm(c, &dent_key, nm); diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index b1f7c0caa3ac..7547be512db2 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -162,7 +162,6 @@ static inline void dent_key_init(const struct ubifs_info *c, uint32_t hash = c->key_hash(fname_name(nm), fname_len(nm)); ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); - ubifs_assert(!nm->hash && !nm->minor_hash); key->u32[0] = inum; key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS); } diff --git a/fs/udf/super.c b/fs/udf/super.c index 242d960df9a1..51de27685e18 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -2467,17 +2467,29 @@ static unsigned int udf_count_free_table(struct super_block *sb, static unsigned int udf_count_free(struct super_block *sb) { unsigned int accum = 0; - struct udf_sb_info *sbi; + struct udf_sb_info *sbi = UDF_SB(sb); struct udf_part_map *map; + unsigned int part = sbi->s_partition; + int ptype = sbi->s_partmaps[part].s_partition_type; + + if (ptype == UDF_METADATA_MAP25) { + part = sbi->s_partmaps[part].s_type_specific.s_metadata. + s_phys_partition_ref; + } else if (ptype == UDF_VIRTUAL_MAP15 || ptype == UDF_VIRTUAL_MAP20) { + /* + * Filesystems with VAT are append-only and we cannot write to + * them. Let's just report 0 here. + */ + return 0; + } - sbi = UDF_SB(sb); if (sbi->s_lvid_bh) { struct logicalVolIntegrityDesc *lvid = (struct logicalVolIntegrityDesc *) sbi->s_lvid_bh->b_data; - if (le32_to_cpu(lvid->numOfPartitions) > sbi->s_partition) { + if (le32_to_cpu(lvid->numOfPartitions) > part) { accum = le32_to_cpu( - lvid->freeSpaceTable[sbi->s_partition]); + lvid->freeSpaceTable[part]); if (accum == 0xFFFFFFFF) accum = 0; } @@ -2486,7 +2498,7 @@ static unsigned int udf_count_free(struct super_block *sb) if (accum) return accum; - map = &sbi->s_partmaps[sbi->s_partition]; + map = &sbi->s_partmaps[part]; if (map->s_partition_flags & UDF_PART_FLAG_UNALLOC_BITMAP) { accum += udf_count_free_bitmap(sb, map->s_uspace.s_bitmap); diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c index 71ca4d047d65..d18789f27650 100644 --- a/fs/unicode/utf8-core.c +++ b/fs/unicode/utf8-core.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "utf8n.h" @@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const struct qstr *str, } return -EINVAL; } - EXPORT_SYMBOL(utf8_casefold); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str) +{ + const struct utf8data *data = utf8nfdicf(um->version); + struct utf8cursor cur; + int c; + unsigned long hash = init_name_hash(salt); + + if (utf8ncursor(&cur, data, str->name, str->len) < 0) + return -EINVAL; + + while ((c = utf8byte(&cur))) { + if (c < 0) + return c; + hash = partial_name_hash((unsigned char)c, hash); + } + str->hash = end_name_hash(hash); + return 0; +} +EXPORT_SYMBOL(utf8_casefold_hash); + int utf8_normalize(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen) { diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 3d7853e6ba02..05c8986464c9 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1821,13 +1821,12 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api))) goto out; features = uffdio_api.features; - if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES)) { - memset(&uffdio_api, 0, sizeof(uffdio_api)); - if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) - goto out; - ret = -EINVAL; - goto out; - } + ret = -EINVAL; + if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES)) + goto err_out; + ret = -EPERM; + if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE)) + goto err_out; /* report all available features and ioctls to userland */ uffdio_api.features = UFFD_API_FEATURES; uffdio_api.ioctls = UFFD_API_IOCTLS; @@ -1840,6 +1839,11 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, ret = 0; out: return ret; +err_out: + memset(&uffdio_api, 0, sizeof(uffdio_api)); + if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api))) + ret = -EFAULT; + goto out; } static long userfaultfd_ioctl(struct file *file, unsigned cmd, diff --git a/fs/verity/enable.c b/fs/verity/enable.c index eabc6ac19906..15e7d14ec2ff 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -8,18 +8,48 @@ #include "fsverity_private.h" #include +#include #include #include #include #include -static int build_merkle_tree_level(struct inode *inode, unsigned int level, +/* + * Read a file data page for Merkle tree construction. Do aggressive readahead, + * since we're sequentially reading the entire file. + */ +static struct page *read_file_data_page(struct file *filp, pgoff_t index, + struct file_ra_state *ra, + unsigned long remaining_pages) +{ + struct page *page; + + page = find_get_page_flags(filp->f_mapping, index, FGP_ACCESSED); + if (!page || !PageUptodate(page)) { + if (page) + put_page(page); + else + page_cache_sync_readahead(filp->f_mapping, ra, filp, + index, remaining_pages); + page = read_mapping_page(filp->f_mapping, index, NULL); + if (IS_ERR(page)) + return page; + } + if (PageReadahead(page)) + page_cache_async_readahead(filp->f_mapping, ra, filp, page, + index, remaining_pages); + return page; +} + +static int build_merkle_tree_level(struct file *filp, unsigned int level, u64 num_blocks_to_hash, const struct merkle_tree_params *params, u8 *pending_hashes, struct ahash_request *req) { + struct inode *inode = file_inode(filp); const struct fsverity_operations *vops = inode->i_sb->s_vop; + struct file_ra_state ra = { 0 }; unsigned int pending_size = 0; u64 dst_block_num; u64 i; @@ -36,6 +66,8 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, dst_block_num = 0; /* unused */ } + file_ra_state_init(&ra, filp->f_mapping); + for (i = 0; i < num_blocks_to_hash; i++) { struct page *src_page; @@ -45,7 +77,8 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, if (level == 0) { /* Leaf: hashing a data block */ - src_page = read_mapping_page(inode->i_mapping, i, NULL); + src_page = read_file_data_page(filp, i, &ra, + num_blocks_to_hash - i); if (IS_ERR(src_page)) { err = PTR_ERR(src_page); fsverity_err(inode, @@ -54,9 +87,14 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, return err; } } else { + unsigned long num_ra_pages = + min_t(unsigned long, num_blocks_to_hash - i, + inode->i_sb->s_bdi->io_pages); + /* Non-leaf: hashing hash block from level below */ src_page = vops->read_merkle_tree_page(inode, - params->level_start[level - 1] + i); + params->level_start[level - 1] + i, + num_ra_pages); if (IS_ERR(src_page)) { err = PTR_ERR(src_page); fsverity_err(inode, @@ -103,17 +141,18 @@ static int build_merkle_tree_level(struct inode *inode, unsigned int level, } /* - * Build the Merkle tree for the given inode using the given parameters, and + * Build the Merkle tree for the given file using the given parameters, and * return the root hash in @root_hash. * * The tree is written to a filesystem-specific location as determined by the * ->write_merkle_tree_block() method. However, the blocks that comprise the * tree are the same for all filesystems. */ -static int build_merkle_tree(struct inode *inode, +static int build_merkle_tree(struct file *filp, const struct merkle_tree_params *params, u8 *root_hash) { + struct inode *inode = file_inode(filp); u8 *pending_hashes; struct ahash_request *req; u64 blocks; @@ -126,9 +165,11 @@ static int build_merkle_tree(struct inode *inode, return 0; } + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(params->hash_alg, GFP_KERNEL); + pending_hashes = kmalloc(params->block_size, GFP_KERNEL); - req = ahash_request_alloc(params->hash_alg->tfm, GFP_KERNEL); - if (!pending_hashes || !req) + if (!pending_hashes) goto out; /* @@ -139,7 +180,7 @@ static int build_merkle_tree(struct inode *inode, blocks = (inode->i_size + params->block_size - 1) >> params->log_blocksize; for (level = 0; level <= params->num_levels; level++) { - err = build_merkle_tree_level(inode, level, blocks, params, + err = build_merkle_tree_level(filp, level, blocks, params, pending_hashes, req); if (err) goto out; @@ -150,7 +191,7 @@ static int build_merkle_tree(struct inode *inode, err = 0; out: kfree(pending_hashes); - ahash_request_free(req); + fsverity_free_hash_request(params->hash_alg, req); return err; } @@ -175,8 +216,7 @@ static int enable_verity(struct file *filp, /* Get the salt if the user provided one */ if (arg->salt_size && - copy_from_user(desc->salt, - (const u8 __user *)(uintptr_t)arg->salt_ptr, + copy_from_user(desc->salt, u64_to_user_ptr(arg->salt_ptr), arg->salt_size)) { err = -EFAULT; goto out; @@ -185,8 +225,7 @@ static int enable_verity(struct file *filp, /* Get the signature if the user provided one */ if (arg->sig_size && - copy_from_user(desc->signature, - (const u8 __user *)(uintptr_t)arg->sig_ptr, + copy_from_user(desc->signature, u64_to_user_ptr(arg->sig_ptr), arg->sig_size)) { err = -EFAULT; goto out; @@ -227,7 +266,7 @@ static int enable_verity(struct file *filp, */ pr_debug("Building Merkle tree...\n"); BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE); - err = build_merkle_tree(inode, ¶ms, desc->root_hash); + err = build_merkle_tree(filp, ¶ms, desc->root_hash); if (err) { fsverity_err(inode, "Error %d building Merkle tree", err); goto rollback; diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h index e74c79b64d88..4b2c8aed0563 100644 --- a/fs/verity/fsverity_private.h +++ b/fs/verity/fsverity_private.h @@ -16,6 +16,7 @@ #include #include +#include struct ahash_request; @@ -37,11 +38,12 @@ struct fsverity_hash_alg { const char *name; /* crypto API name, e.g. sha256 */ unsigned int digest_size; /* digest size in bytes, e.g. 32 for SHA-256 */ unsigned int block_size; /* block size in bytes, e.g. 64 for SHA-256 */ + mempool_t *req_pool; /* mempool with a preallocated hash request */ }; /* Merkle tree parameters: hash algorithm, initial hash state, and topology */ struct merkle_tree_params { - const struct fsverity_hash_alg *hash_alg; /* the hash algorithm */ + struct fsverity_hash_alg *hash_alg; /* the hash algorithm */ const u8 *hashstate; /* initial hash state or NULL */ unsigned int digest_size; /* same as hash_alg->digest_size */ unsigned int block_size; /* size of data and tree blocks */ @@ -50,6 +52,7 @@ struct merkle_tree_params { unsigned int log_arity; /* log2(hashes_per_block) */ unsigned int num_levels; /* number of levels in Merkle tree */ u64 tree_size; /* Merkle tree size in bytes */ + unsigned long level0_blocks; /* number of blocks in tree level 0 */ /* * Starting block index for each tree level, ordered from leaf level (0) @@ -114,14 +117,18 @@ struct fsverity_signed_digest { extern struct fsverity_hash_alg fsverity_hash_algs[]; -const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, - unsigned int num); -const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, +struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num); +struct ahash_request *fsverity_alloc_hash_request(struct fsverity_hash_alg *alg, + gfp_t gfp_flags); +void fsverity_free_hash_request(struct fsverity_hash_alg *alg, + struct ahash_request *req); +const u8 *fsverity_prepare_hash_state(struct fsverity_hash_alg *alg, const u8 *salt, size_t salt_size); int fsverity_hash_page(const struct merkle_tree_params *params, const struct inode *inode, struct ahash_request *req, struct page *page, u8 *out); -int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, +int fsverity_hash_buffer(struct fsverity_hash_alg *alg, const void *data, size_t size, u8 *out); void __init fsverity_check_hash_algs(void); diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c index 31e6d7d2389a..6682e4e6b601 100644 --- a/fs/verity/hash_algs.c +++ b/fs/verity/hash_algs.c @@ -24,6 +24,8 @@ struct fsverity_hash_alg fsverity_hash_algs[] = { }, }; +static DEFINE_MUTEX(fsverity_hash_alg_init_mutex); + /** * fsverity_get_hash_alg() - validate and prepare a hash algorithm * @inode: optional inode for logging purposes @@ -36,8 +38,8 @@ struct fsverity_hash_alg fsverity_hash_algs[] = { * * Return: pointer to the hash alg on success, else an ERR_PTR() */ -const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, - unsigned int num) +struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num) { struct fsverity_hash_alg *alg; struct crypto_ahash *tfm; @@ -50,10 +52,15 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, } alg = &fsverity_hash_algs[num]; - /* pairs with cmpxchg() below */ - tfm = READ_ONCE(alg->tfm); - if (likely(tfm != NULL)) + /* pairs with smp_store_release() below */ + if (likely(smp_load_acquire(&alg->tfm) != NULL)) return alg; + + mutex_lock(&fsverity_hash_alg_init_mutex); + + if (alg->tfm != NULL) + goto out_unlock; + /* * Using the shash API would make things a bit simpler, but the ahash * API is preferable as it allows the use of crypto accelerators. @@ -64,12 +71,14 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, fsverity_warn(inode, "Missing crypto API support for hash algorithm \"%s\"", alg->name); - return ERR_PTR(-ENOPKG); + alg = ERR_PTR(-ENOPKG); + goto out_unlock; } fsverity_err(inode, "Error allocating hash algorithm \"%s\": %ld", alg->name, PTR_ERR(tfm)); - return ERR_CAST(tfm); + alg = ERR_CAST(tfm); + goto out_unlock; } err = -EINVAL; @@ -78,18 +87,63 @@ const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, if (WARN_ON(alg->block_size != crypto_ahash_blocksize(tfm))) goto err_free_tfm; + alg->req_pool = mempool_create_kmalloc_pool(1, + sizeof(struct ahash_request) + + crypto_ahash_reqsize(tfm)); + if (!alg->req_pool) { + err = -ENOMEM; + goto err_free_tfm; + } + pr_info("%s using implementation \"%s\"\n", alg->name, crypto_ahash_driver_name(tfm)); - /* pairs with READ_ONCE() above */ - if (cmpxchg(&alg->tfm, NULL, tfm) != NULL) - crypto_free_ahash(tfm); - - return alg; + /* pairs with smp_load_acquire() above */ + smp_store_release(&alg->tfm, tfm); + goto out_unlock; err_free_tfm: crypto_free_ahash(tfm); - return ERR_PTR(err); + alg = ERR_PTR(err); +out_unlock: + mutex_unlock(&fsverity_hash_alg_init_mutex); + return alg; +} + +/** + * fsverity_alloc_hash_request() - allocate a hash request object + * @alg: the hash algorithm for which to allocate the request + * @gfp_flags: memory allocation flags + * + * This is mempool-backed, so this never fails if __GFP_DIRECT_RECLAIM is set in + * @gfp_flags. However, in that case this might need to wait for all + * previously-allocated requests to be freed. So to avoid deadlocks, callers + * must never need multiple requests at a time to make forward progress. + * + * Return: the request object on success; NULL on failure (but see above) + */ +struct ahash_request *fsverity_alloc_hash_request(struct fsverity_hash_alg *alg, + gfp_t gfp_flags) +{ + struct ahash_request *req = mempool_alloc(alg->req_pool, gfp_flags); + + if (req) + ahash_request_set_tfm(req, alg->tfm); + return req; +} + +/** + * fsverity_free_hash_request() - free a hash request object + * @alg: the hash algorithm + * @req: the hash request object to free + */ +void fsverity_free_hash_request(struct fsverity_hash_alg *alg, + struct ahash_request *req) +{ + if (req) { + ahash_request_zero(req); + mempool_free(req, alg->req_pool); + } } /** @@ -101,7 +155,7 @@ err_free_tfm: * Return: NULL if the salt is empty, otherwise the kmalloc()'ed precomputed * initial hash state on success or an ERR_PTR() on failure. */ -const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, +const u8 *fsverity_prepare_hash_state(struct fsverity_hash_alg *alg, const u8 *salt, size_t salt_size) { u8 *hashstate = NULL; @@ -119,11 +173,8 @@ const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, if (!hashstate) return ERR_PTR(-ENOMEM); - req = ahash_request_alloc(alg->tfm, GFP_KERNEL); - if (!req) { - err = -ENOMEM; - goto err_free; - } + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(alg, GFP_KERNEL); /* * Zero-pad the salt to the next multiple of the input size of the hash @@ -158,7 +209,7 @@ const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, if (err) goto err_free; out: - ahash_request_free(req); + fsverity_free_hash_request(alg, req); kfree(padded_salt); return hashstate; @@ -229,7 +280,7 @@ int fsverity_hash_page(const struct merkle_tree_params *params, * * Return: 0 on success, -errno on failure */ -int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, +int fsverity_hash_buffer(struct fsverity_hash_alg *alg, const void *data, size_t size, u8 *out) { struct ahash_request *req; @@ -237,9 +288,8 @@ int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, DECLARE_CRYPTO_WAIT(wait); int err; - req = ahash_request_alloc(alg->tfm, GFP_KERNEL); - if (!req) - return -ENOMEM; + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(alg, GFP_KERNEL); sg_init_one(&sg, data, size); ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | @@ -249,7 +299,7 @@ int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, err = crypto_wait_req(crypto_ahash_digest(req), &wait); - ahash_request_free(req); + fsverity_free_hash_request(alg, req); return err; } diff --git a/fs/verity/open.c b/fs/verity/open.c index 4cdd75acbc97..25b29065d897 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -31,7 +31,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, unsigned int log_blocksize, const u8 *salt, size_t salt_size) { - const struct fsverity_hash_alg *hash_alg; + struct fsverity_hash_alg *hash_alg; int err; u64 blocks; u64 offset; @@ -102,6 +102,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, /* temporarily using level_start[] to store blocks in level */ params->level_start[params->num_levels++] = blocks; } + params->level0_blocks = params->level_start[0]; /* Compute the starting block of each level */ offset = 0; @@ -126,7 +127,7 @@ out_err: * Compute the file measurement by hashing the fsverity_descriptor excluding the * signature and with the sig_size field set to 0. */ -static int compute_file_measurement(const struct fsverity_hash_alg *hash_alg, +static int compute_file_measurement(struct fsverity_hash_alg *hash_alg, struct fsverity_descriptor *desc, u8 *measurement) { diff --git a/fs/verity/verify.c b/fs/verity/verify.c index cf09852e5227..5324270cd7d4 100644 --- a/fs/verity/verify.c +++ b/fs/verity/verify.c @@ -84,7 +84,8 @@ static inline int cmp_hashes(const struct fsverity_info *vi, * Return: true if the page is valid, else false. */ static bool verify_page(struct inode *inode, const struct fsverity_info *vi, - struct ahash_request *req, struct page *data_page) + struct ahash_request *req, struct page *data_page, + unsigned long level0_ra_pages) { const struct merkle_tree_params *params = &vi->tree_params; const unsigned int hsize = params->digest_size; @@ -117,8 +118,8 @@ static bool verify_page(struct inode *inode, const struct fsverity_info *vi, pr_debug_ratelimited("Level %d: hindex=%lu, hoffset=%u\n", level, hindex, hoffset); - hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, - hindex); + hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, hindex, + level == 0 ? level0_ra_pages : 0); if (IS_ERR(hpage)) { err = PTR_ERR(hpage); fsverity_err(inode, @@ -191,13 +192,12 @@ bool fsverity_verify_page(struct page *page) struct ahash_request *req; bool valid; - req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS); - if (unlikely(!req)) - return false; + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(vi->tree_params.hash_alg, GFP_NOFS); - valid = verify_page(inode, vi, req, page); + valid = verify_page(inode, vi, req, page, 0); - ahash_request_free(req); + fsverity_free_hash_request(vi->tree_params.hash_alg, req); return valid; } @@ -222,25 +222,42 @@ void fsverity_verify_bio(struct bio *bio) { struct inode *inode = bio->bi_io_vec->bv_page->mapping->host; const struct fsverity_info *vi = inode->i_verity_info; + const struct merkle_tree_params *params = &vi->tree_params; struct ahash_request *req; struct bio_vec *bv; int i; + unsigned long max_ra_pages = 0; - req = ahash_request_alloc(vi->tree_params.hash_alg->tfm, GFP_NOFS); - if (unlikely(!req)) { + /* This allocation never fails, since it's mempool-backed. */ + req = fsverity_alloc_hash_request(params->hash_alg, GFP_NOFS); + + if (bio->bi_opf & REQ_RAHEAD) { + /* + * If this bio is for data readahead, then we also do readahead + * of the first (largest) level of the Merkle tree. Namely, + * when a Merkle tree page is read, we also try to piggy-back on + * some additional pages -- up to 1/4 the number of data pages. + * + * This improves sequential read performance, as it greatly + * reduces the number of I/O requests made to the Merkle tree. + */ bio_for_each_segment_all(bv, bio, i) - SetPageError(bv->bv_page); - return; + max_ra_pages++; + max_ra_pages /= 4; } bio_for_each_segment_all(bv, bio, i) { struct page *page = bv->bv_page; + unsigned long level0_index = page->index >> params->log_arity; + unsigned long level0_ra_pages = + min(max_ra_pages, params->level0_blocks - level0_index); - if (!PageError(page) && !verify_page(inode, vi, req, page)) + if (!PageError(page) && + !verify_page(inode, vi, req, page, level0_ra_pages)) SetPageError(page); } - ahash_request_free(req); + fsverity_free_hash_request(params->hash_alg, req); } EXPORT_SYMBOL_GPL(fsverity_verify_bio); #endif /* CONFIG_BLOCK */ diff --git a/fs/xattr.c b/fs/xattr.c index 3e5d157d9b8c..35ead9b764c6 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -307,6 +307,9 @@ __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name, handler = xattr_resolve_name(inode, &name); if (IS_ERR(handler)) return PTR_ERR(handler); + if (unlikely(handler->__get)) + return handler->__get(handler, dentry, inode, name, value, + size); if (!handler->get) return -EOPNOTSUPP; return handler->get(handler, dentry, inode, name, value, size); @@ -318,6 +321,7 @@ vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { struct inode *inode = dentry->d_inode; int error; + const struct xattr_handler *handler; error = xattr_permission(inode, name, MAY_READ); if (error) @@ -340,7 +344,12 @@ vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) return ret; } nolsm: - return __vfs_getxattr(dentry, inode, name, value, size); + handler = xattr_resolve_name(inode, &name); + if (IS_ERR(handler)) + return PTR_ERR(handler); + if (!handler->get) + return -EOPNOTSUPP; + return handler->get(handler, dentry, inode, name, value, size); } EXPORT_SYMBOL_GPL(vfs_getxattr); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 7b25a88569c9..84245d210182 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5556,7 +5556,7 @@ __xfs_bunmapi( * Make sure we don't touch multiple AGF headers out of order * in a single transaction, as that could cause AB-BA deadlocks. */ - if (!wasdel) { + if (!wasdel && !isrt) { agno = XFS_FSB_TO_AGNO(mp, del.br_startblock); if (prev_agno != NULLAGNUMBER && prev_agno > agno) break; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 16f93d7356b7..e5970ecdfd58 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -58,6 +58,32 @@ static kmem_zone_t *xfs_buf_zone; #define xb_to_gfp(flags) \ ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : GFP_NOFS) | __GFP_NOWARN) +/* + * Locking orders + * + * xfs_buf_ioacct_inc: + * xfs_buf_ioacct_dec: + * b_sema (caller holds) + * b_lock + * + * xfs_buf_stale: + * b_sema (caller holds) + * b_lock + * lru_lock + * + * xfs_buf_rele: + * b_lock + * pag_buf_lock + * lru_lock + * + * xfs_buftarg_wait_rele + * lru_lock + * b_lock (trylock due to inversion) + * + * xfs_buftarg_isolate + * lru_lock + * b_lock (trylock due to inversion) + */ static inline int xfs_buf_is_vmapped( @@ -983,8 +1009,18 @@ xfs_buf_rele( ASSERT(atomic_read(&bp->b_hold) > 0); - release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock); + /* + * We grab the b_lock here first to serialise racing xfs_buf_rele() + * calls. The pag_buf_lock being taken on the last reference only + * serialises against racing lookups in xfs_buf_find(). IOWs, the second + * to last reference we drop here is not serialised against the last + * reference until we take bp->b_lock. Hence if we don't grab b_lock + * first, the last "release" reference can win the race to the lock and + * free the buffer before the second-to-last reference is processed, + * leading to a use-after-free scenario. + */ spin_lock(&bp->b_lock); + release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock); if (!release) { /* * Drop the in-flight state if the buffer is already on the LRU @@ -1702,7 +1738,7 @@ xfs_buftarg_isolate( * zero. If the value is already zero, we need to reclaim the * buffer, otherwise it gets another trip through the LRU. */ - if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { + if (atomic_add_unless(&bp->b_lru_ref, -1, 0)) { spin_unlock(&bp->b_lock); return LRU_ROTATE; } diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index fa0bc4d46065..5f616a6a5358 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -252,6 +252,32 @@ xfs_compat_ioc_bulkstat( int done; int error; + /* + * Output structure handling functions. Depending on the command, + * either the xfs_bstat and xfs_inogrp structures are written out + * to userpace memory via bulkreq.ubuffer. Normally the compat + * functions and structure size are the correct ones to use ... + */ + inumbers_fmt_pf inumbers_func = xfs_inumbers_fmt_compat; + bulkstat_one_pf bs_one_func = xfs_bulkstat_one_compat; + size_t bs_one_size = sizeof(struct compat_xfs_bstat); + +#ifdef CONFIG_X86_X32 + if (in_x32_syscall()) { + /* + * ... but on x32 the input xfs_fsop_bulkreq has pointers + * which must be handled in the "compat" (32-bit) way, while + * the xfs_bstat and xfs_inogrp structures follow native 64- + * bit layout convention. So adjust accordingly, otherwise + * the data written out in compat layout will not match what + * x32 userspace expects. + */ + inumbers_func = xfs_inumbers_fmt; + bs_one_func = xfs_bulkstat_one; + bs_one_size = sizeof(struct xfs_bstat); + } +#endif + /* done = 1 if there are more stats to get and if bulkstat */ /* should be called again (unused here, but used in dmapi) */ @@ -283,15 +309,15 @@ xfs_compat_ioc_bulkstat( if (cmd == XFS_IOC_FSINUMBERS_32) { error = xfs_inumbers(mp, &inlast, &count, - bulkreq.ubuffer, xfs_inumbers_fmt_compat); + bulkreq.ubuffer, inumbers_func); } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { int res; - error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, - sizeof(compat_xfs_bstat_t), NULL, &res); + error = bs_one_func(mp, inlast, bulkreq.ubuffer, + bs_one_size, NULL, &res); } else if (cmd == XFS_IOC_FSBULKSTAT_32) { error = xfs_bulkstat(mp, &inlast, &count, - xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), + bs_one_func, bs_one_size, bulkreq.ubuffer, &done); } else error = -EINVAL; @@ -347,6 +373,7 @@ xfs_compat_attrlist_by_handle( { int error; attrlist_cursor_kern_t *cursor; + compat_xfs_fsop_attrlist_handlereq_t __user *p = arg; compat_xfs_fsop_attrlist_handlereq_t al_hreq; struct dentry *dentry; char *kbuf; @@ -381,6 +408,11 @@ xfs_compat_attrlist_by_handle( if (error) goto out_kfree; + if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) { + error = -EFAULT; + goto out_kfree; + } + if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen)) error = -EFAULT; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index dc95a49d62e7..4e768e606998 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1539,6 +1539,8 @@ out_free_iclog: if (iclog->ic_bp) xfs_buf_free(iclog->ic_bp); kmem_free(iclog); + if (prev_iclog == log->l_iclog) + break; } spinlock_destroy(&log->l_icloglock); xfs_buf_free(log->l_xbuf); diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index a65108594a07..21bc6d2d23ca 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -214,6 +214,9 @@ xfs_fs_rm_xquota( if (XFS_IS_QUOTA_ON(mp)) return -EINVAL; + if (uflags & ~(FS_USER_QUOTA | FS_GROUP_QUOTA | FS_PROJ_QUOTA)) + return -EINVAL; + if (uflags & FS_USER_QUOTA) flags |= XFS_DQ_USER; if (uflags & FS_GROUP_QUOTA) diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 488719d43ca8..cdcb7235e41a 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -1214,13 +1214,11 @@ xfs_rtmount_inodes( xfs_sb_t *sbp; sbp = &mp->m_sb; - if (sbp->sb_rbmino == NULLFSINO) - return 0; error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip); if (error) return error; ASSERT(mp->m_rbmip != NULL); - ASSERT(sbp->sb_rsumino != NULLFSINO); + error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip); if (error) { IRELE(mp->m_rbmip); diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 4f077edb9b81..71fadbe77e21 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -556,11 +556,12 @@ typedef u64 acpi_integer; #define ACPI_MAKE_RSDP_SIG(dest) (memcpy (ACPI_CAST_PTR (char, (dest)), ACPI_SIG_RSDP, 8)) /* - * Algorithm to obtain access bit width. + * Algorithm to obtain access bit or byte width. * Can be used with access_width of struct acpi_generic_address and access_size of * struct acpi_resource_generic_register. */ #define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) +#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1)) /******************************************************************************* * diff --git a/include/drm/drm_dp_mst_helper.h b/include/drm/drm_dp_mst_helper.h index d55abb75f29a..eec6cba204ea 100644 --- a/include/drm/drm_dp_mst_helper.h +++ b/include/drm/drm_dp_mst_helper.h @@ -313,7 +313,7 @@ struct drm_dp_resource_status_notify { struct drm_dp_query_payload_ack_reply { u8 port_number; - u8 allocated_pbn; + u16 allocated_pbn; }; struct drm_dp_sideband_msg_req_body { diff --git a/include/drm/drm_vma_manager.h b/include/drm/drm_vma_manager.h index d84d52f6d2b1..b54c98f05460 100644 --- a/include/drm/drm_vma_manager.h +++ b/include/drm/drm_vma_manager.h @@ -41,6 +41,7 @@ struct drm_vma_offset_node { rwlock_t vm_lock; struct drm_mm_node vm_node; struct rb_root vm_files; + bool readonly:1; }; struct drm_vma_offset_manager { diff --git a/include/dt-bindings/clock/rk3328-cru.h b/include/dt-bindings/clock/rk3328-cru.h index d2b26a4b43eb..4a9db1b2669b 100644 --- a/include/dt-bindings/clock/rk3328-cru.h +++ b/include/dt-bindings/clock/rk3328-cru.h @@ -178,7 +178,7 @@ #define HCLK_TSP 309 #define HCLK_GMAC 310 #define HCLK_I2S0_8CH 311 -#define HCLK_I2S1_8CH 313 +#define HCLK_I2S1_8CH 312 #define HCLK_I2S2_2CH 313 #define HCLK_SPDIF_8CH 314 #define HCLK_VOP 315 diff --git a/include/dt-bindings/reset/amlogic,meson8b-reset.h b/include/dt-bindings/reset/amlogic,meson8b-reset.h index 614aff2c7aff..a03e86fe2c57 100644 --- a/include/dt-bindings/reset/amlogic,meson8b-reset.h +++ b/include/dt-bindings/reset/amlogic,meson8b-reset.h @@ -95,9 +95,9 @@ #define RESET_VD_RMEM 64 #define RESET_AUDIN 65 #define RESET_DBLK 66 -#define RESET_PIC_DC 66 -#define RESET_PSC 66 -#define RESET_NAND 66 +#define RESET_PIC_DC 67 +#define RESET_PSC 68 +#define RESET_NAND 69 #define RESET_GE2D 70 #define RESET_PARSER_REG 71 #define RESET_PARSER_FETCH 72 diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d7a9700b9333..4bb3bca75004 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -99,7 +99,7 @@ static inline bool has_acpi_companion(struct device *dev) static inline void acpi_preset_companion(struct device *dev, struct acpi_device *parent, u64 addr) { - ACPI_COMPANION_SET(dev, acpi_find_child_device(parent, addr, NULL)); + ACPI_COMPANION_SET(dev, acpi_find_child_device(parent, addr, false)); } static inline const char *acpi_dev_name(struct acpi_device *adev) diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h index 1b0a17b22cd3..d560580d9cda 100644 --- a/include/linux/ahci_platform.h +++ b/include/linux/ahci_platform.h @@ -23,6 +23,8 @@ struct ahci_host_priv; struct platform_device; struct scsi_host_template; +int ahci_platform_enable_phys(struct ahci_host_priv *hpriv); +void ahci_platform_disable_phys(struct ahci_host_priv *hpriv); int ahci_platform_enable_clks(struct ahci_host_priv *hpriv); void ahci_platform_disable_clks(struct ahci_host_priv *hpriv); int ahci_platform_enable_regulators(struct ahci_host_priv *hpriv); diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 03885e63f92b..2664b9e89f9b 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -108,7 +108,7 @@ static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb) #define AARP_RESOLVE_TIME (10 * HZ) extern struct datalink_proto *ddp_dl, *aarp_dl; -extern void aarp_proto_init(void); +extern int aarp_proto_init(void); /* Inter module exports */ diff --git a/include/linux/bio-crypt-ctx.h b/include/linux/bio-crypt-ctx.h new file mode 100644 index 000000000000..8456a409fc21 --- /dev/null +++ b/include/linux/bio-crypt-ctx.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ +#ifndef __LINUX_BIO_CRYPT_CTX_H +#define __LINUX_BIO_CRYPT_CTX_H + +enum blk_crypto_mode_num { + BLK_ENCRYPTION_MODE_INVALID, + BLK_ENCRYPTION_MODE_AES_256_XTS, + BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV, + BLK_ENCRYPTION_MODE_ADIANTUM, + BLK_ENCRYPTION_MODE_MAX, +}; + +#ifdef CONFIG_BLOCK +#include + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + +#define BLK_CRYPTO_MAX_KEY_SIZE 64 +#define BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE 128 + +/** + * struct blk_crypto_key - an inline encryption key + * @crypto_mode: encryption algorithm this key is for + * @data_unit_size: the data unit size for all encryption/decryptions with this + * key. This is the size in bytes of each individual plaintext and + * ciphertext. This is always a power of 2. It might be e.g. the + * filesystem block size or the disk sector size. + * @data_unit_size_bits: log2 of data_unit_size + * @size: size of this key in bytes (determined by @crypto_mode) + * @hash: hash of this key, for keyslot manager use only + * @is_hw_wrapped: @raw points to a wrapped key to be used by an inline + * encryption hardware that accepts wrapped keys. + * @raw: the raw bytes of this key. Only the first @size bytes are used. + * + * A blk_crypto_key is immutable once created, and many bios can reference it at + * the same time. It must not be freed until all bios using it have completed. + */ +struct blk_crypto_key { + enum blk_crypto_mode_num crypto_mode; + unsigned int data_unit_size; + unsigned int data_unit_size_bits; + unsigned int size; + unsigned int hash; + bool is_hw_wrapped; + u8 raw[BLK_CRYPTO_MAX_WRAPPED_KEY_SIZE]; +}; + +#define BLK_CRYPTO_MAX_IV_SIZE 32 +#define BLK_CRYPTO_DUN_ARRAY_SIZE (BLK_CRYPTO_MAX_IV_SIZE/sizeof(u64)) + +/** + * struct bio_crypt_ctx - an inline encryption context + * @bc_key: the key, algorithm, and data unit size to use + * @bc_keyslot: the keyslot that has been assigned for this key in @bc_ksm, + * or -1 if no keyslot has been assigned yet. + * @bc_dun: the data unit number (starting IV) to use + * @bc_ksm: the keyslot manager into which the key has been programmed with + * @bc_keyslot, or NULL if this key hasn't yet been programmed. + * + * A bio_crypt_ctx specifies that the contents of the bio will be encrypted (for + * write requests) or decrypted (for read requests) inline by the storage device + * or controller, or by the crypto API fallback. + */ +struct bio_crypt_ctx { + const struct blk_crypto_key *bc_key; + int bc_keyslot; + + /* Data unit number */ + u64 bc_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]; + + /* + * The keyslot manager where the key has been programmed + * with keyslot. + */ + struct keyslot_manager *bc_ksm; +}; + +int bio_crypt_ctx_init(void); + +struct bio_crypt_ctx *bio_crypt_alloc_ctx(gfp_t gfp_mask); + +void bio_crypt_free_ctx(struct bio *bio); + +static inline bool bio_has_crypt_ctx(struct bio *bio) +{ + return bio->bi_crypt_context; +} + +void bio_crypt_clone(struct bio *dst, struct bio *src, gfp_t gfp_mask); + +static inline void bio_crypt_set_ctx(struct bio *bio, + const struct blk_crypto_key *key, + u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], + gfp_t gfp_mask) +{ + struct bio_crypt_ctx *bc = bio_crypt_alloc_ctx(gfp_mask); + + bc->bc_key = key; + memcpy(bc->bc_dun, dun, sizeof(bc->bc_dun)); + bc->bc_ksm = NULL; + bc->bc_keyslot = -1; + + bio->bi_crypt_context = bc; +} + +void bio_crypt_ctx_release_keyslot(struct bio_crypt_ctx *bc); + +int bio_crypt_ctx_acquire_keyslot(struct bio_crypt_ctx *bc, + struct keyslot_manager *ksm); + +struct request; +bool bio_crypt_should_process(struct request *rq); + +static inline bool bio_crypt_dun_is_contiguous(const struct bio_crypt_ctx *bc, + unsigned int bytes, + u64 next_dun[BLK_CRYPTO_DUN_ARRAY_SIZE]) +{ + int i = 0; + unsigned int inc = bytes >> bc->bc_key->data_unit_size_bits; + + while (i < BLK_CRYPTO_DUN_ARRAY_SIZE) { + if (bc->bc_dun[i] + inc != next_dun[i]) + return false; + inc = ((bc->bc_dun[i] + inc) < inc); + i++; + } + + return true; +} + + +static inline void bio_crypt_dun_increment(u64 dun[BLK_CRYPTO_DUN_ARRAY_SIZE], + unsigned int inc) +{ + int i = 0; + + while (inc && i < BLK_CRYPTO_DUN_ARRAY_SIZE) { + dun[i] += inc; + inc = (dun[i] < inc); + i++; + } +} + +static inline void bio_crypt_advance(struct bio *bio, unsigned int bytes) +{ + struct bio_crypt_ctx *bc = bio->bi_crypt_context; + + if (!bc) + return; + + bio_crypt_dun_increment(bc->bc_dun, + bytes >> bc->bc_key->data_unit_size_bits); +} + +bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2); + +bool bio_crypt_ctx_mergeable(struct bio *b_1, unsigned int b1_bytes, + struct bio *b_2); + +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ +static inline int bio_crypt_ctx_init(void) +{ + return 0; +} + +static inline bool bio_has_crypt_ctx(struct bio *bio) +{ + return false; +} + +static inline void bio_crypt_clone(struct bio *dst, struct bio *src, + gfp_t gfp_mask) { } + +static inline void bio_crypt_free_ctx(struct bio *bio) { } + +static inline void bio_crypt_advance(struct bio *bio, unsigned int bytes) { } + +static inline bool bio_crypt_ctx_compatible(struct bio *b_1, struct bio *b_2) +{ + return true; +} + +static inline bool bio_crypt_ctx_mergeable(struct bio *b_1, + unsigned int b1_bytes, + struct bio *b_2) +{ + return true; +} + +#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ + +#if IS_ENABLED(CONFIG_DM_DEFAULT_KEY) +static inline void bio_set_skip_dm_default_key(struct bio *bio) +{ + bio->bi_skip_dm_default_key = true; +} + +static inline bool bio_should_skip_dm_default_key(const struct bio *bio) +{ + return bio->bi_skip_dm_default_key; +} + +static inline void bio_clone_skip_dm_default_key(struct bio *dst, + const struct bio *src) +{ + dst->bi_skip_dm_default_key = src->bi_skip_dm_default_key; +} +#else /* CONFIG_DM_DEFAULT_KEY */ +static inline void bio_set_skip_dm_default_key(struct bio *bio) +{ +} + +static inline bool bio_should_skip_dm_default_key(const struct bio *bio) +{ + return false; +} + +static inline void bio_clone_skip_dm_default_key(struct bio *dst, + const struct bio *src) +{ +} +#endif /* !CONFIG_DM_DEFAULT_KEY */ + +#endif /* CONFIG_BLOCK */ + +#endif /* __LINUX_BIO_CRYPT_CTX_H */ diff --git a/include/linux/bio.h b/include/linux/bio.h index e260f000b9ac..2e08e3731376 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef CONFIG_BLOCK diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 835c2271196a..10a4dd02221d 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -86,6 +86,14 @@ * contain all bit positions from 0 to 'bits' - 1. */ +/* + * Allocation and deallocation of bitmap. + * Provided in lib/bitmap.c to avoid circular dependency. + */ +extern unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags); +extern unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags); +extern void bitmap_free(const unsigned long *bitmap); + /* * lib/bitmap.c provides these functions: */ @@ -185,8 +193,13 @@ extern int bitmap_print_to_pagebuf(bool list, char *buf, #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) +/* + * The static inlines below do not handle constant nbits==0 correctly, + * so make such users (should any ever turn up) call the out-of-line + * versions. + */ #define small_const_nbits(nbits) \ - (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) + (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0) static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { @@ -350,7 +363,7 @@ static __always_inline void bitmap_clear(unsigned long *map, unsigned int start, } static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, - unsigned int shift, int nbits) + unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift; diff --git a/include/linux/bitops.h b/include/linux/bitops.h index b767c7ad65c6..c51574fab0b0 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -4,7 +4,8 @@ #include #include -#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) extern unsigned int __sw_hweight8(unsigned int w); extern unsigned int __sw_hweight16(unsigned int w); diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h new file mode 100644 index 000000000000..913b367d42bd --- /dev/null +++ b/include/linux/blk-crypto.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ + +#ifndef __LINUX_BLK_CRYPTO_H +#define __LINUX_BLK_CRYPTO_H + +#include + +#define SECTOR_SHIFT 9 + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + +int blk_crypto_submit_bio(struct bio **bio_ptr); + +bool blk_crypto_endio(struct bio *bio); + +int blk_crypto_init_key(struct blk_crypto_key *blk_key, + const u8 *raw_key, unsigned int raw_key_size, + bool is_hw_wrapped, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size); + +int blk_crypto_evict_key(struct request_queue *q, + const struct blk_crypto_key *key); + +#else /* CONFIG_BLK_INLINE_ENCRYPTION */ + +static inline int blk_crypto_submit_bio(struct bio **bio_ptr) +{ + return 0; +} + +static inline bool blk_crypto_endio(struct bio *bio) +{ + return true; +} + +#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK + +int blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, + unsigned int data_unit_size, + struct request_queue *q); + +int blk_crypto_fallback_init(void); + +#else /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */ + +static inline int +blk_crypto_start_using_mode(enum blk_crypto_mode_num mode_num, + unsigned int data_unit_size, + struct request_queue *q) +{ + return 0; +} + +static inline int blk_crypto_fallback_init(void) +{ + return 0; +} + +#endif /* CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK */ + +#endif /* __LINUX_BLK_CRYPTO_H */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e8d94ca2ddf5..cb62f73cf44f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -17,6 +17,7 @@ struct block_device; struct io_context; struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); +struct bio_crypt_ctx; /* * Block error status values. See block/blk-core:blk_errors for the details. @@ -95,6 +96,14 @@ struct bio { struct blk_issue_stat bi_issue_stat; #endif #endif + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + struct bio_crypt_ctx *bi_crypt_context; +#if IS_ENABLED(CONFIG_DM_DEFAULT_KEY) + bool bi_skip_dm_default_key; +#endif +#endif + union { #if defined(CONFIG_BLK_DEV_INTEGRITY) struct bio_integrity_payload *bi_integrity; /* data integrity */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 94c731913be7..a384d0c81bca 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -43,6 +43,7 @@ struct pr_ops; struct rq_wb; struct blk_queue_stats; struct blk_stat_callback; +struct keyslot_manager; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -343,6 +344,7 @@ struct queue_limits { unsigned int max_sectors; unsigned int max_segment_size; unsigned int physical_block_size; + unsigned int logical_block_size; unsigned int alignment_offset; unsigned int io_min; unsigned int io_opt; @@ -353,7 +355,6 @@ struct queue_limits { unsigned int discard_granularity; unsigned int discard_alignment; - unsigned short logical_block_size; unsigned short max_segments; unsigned short max_integrity_segments; unsigned short max_discard_segments; @@ -542,6 +543,11 @@ struct request_queue { */ unsigned int request_fn_active; +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + /* Inline crypto capabilities */ + struct keyslot_manager *ksm; +#endif + unsigned int rq_timeout; int poll_nsec; @@ -1185,7 +1191,7 @@ extern void blk_queue_max_write_same_sectors(struct request_queue *q, unsigned int max_write_same_sectors); extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, unsigned int max_write_same_sectors); -extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); +extern void blk_queue_logical_block_size(struct request_queue *, unsigned int); extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); @@ -1443,7 +1449,7 @@ static inline unsigned int queue_max_segment_size(struct request_queue *q) return q->limits.max_segment_size; } -static inline unsigned short queue_logical_block_size(struct request_queue *q) +static inline unsigned queue_logical_block_size(struct request_queue *q) { int retval = 512; @@ -1453,7 +1459,7 @@ static inline unsigned short queue_logical_block_size(struct request_queue *q) return retval; } -static inline unsigned short bdev_logical_block_size(struct block_device *bdev) +static inline unsigned int bdev_logical_block_size(struct block_device *bdev) { return queue_logical_block_size(bdev_get_queue(bdev)); } diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 8804753805ac..7bb2d8de9f30 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -116,7 +116,13 @@ extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes); static inline sector_t blk_rq_trace_sector(struct request *rq) { - return blk_rq_is_passthrough(rq) ? 0 : blk_rq_pos(rq); + /* + * Tracing should ignore starting sector for passthrough requests and + * requests where starting sector didn't get set. + */ + if (blk_rq_is_passthrough(rq) || blk_rq_pos(rq) == (sector_t)-1) + return 0; + return blk_rq_pos(rq); } static inline unsigned int blk_rq_trace_nr_sectors(struct request *rq) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 44dfae288fcf..540c44fab023 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -15,27 +15,42 @@ struct bpf_sock_ops_kern; extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) +struct bpf_prog_list { + struct list_head node; + struct bpf_prog *prog; +}; + +struct bpf_prog_array; + struct cgroup_bpf { - /* - * Store two sets of bpf_prog pointers, one for programs that are - * pinned directly to this cgroup, and one for those that are effective - * when this cgroup is accessed. + /* array of effective progs in this cgroup */ + struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE]; + + /* attached progs to this cgroup and attach flags + * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will + * have either zero or one element + * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS */ - struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; - struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE]; - bool disallow_override[MAX_BPF_ATTACH_TYPE]; + struct list_head progs[MAX_BPF_ATTACH_TYPE]; + u32 flags[MAX_BPF_ATTACH_TYPE]; + + /* temp storage for effective prog array used by prog_attach/detach */ + struct bpf_prog_array __rcu *inactive; }; void cgroup_bpf_put(struct cgroup *cgrp); -void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); +int cgroup_bpf_inherit(struct cgroup *cgrp); -int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, - struct bpf_prog *prog, enum bpf_attach_type type, - bool overridable); +int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); +int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); -/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ -int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, bool overridable); +/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ +int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); +int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, @@ -97,8 +112,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct cgroup_bpf {}; static inline void cgroup_bpf_put(struct cgroup *cgrp) {} -static inline void cgroup_bpf_inherit(struct cgroup *cgrp, - struct cgroup *parent) {} +static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c2954ab168af..56e877211868 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -255,6 +255,58 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +/* an array of programs to be executed under rcu_lock. + * + * Typical usage: + * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN); + * + * the structure returned by bpf_prog_array_alloc() should be populated + * with program pointers and the last pointer must be NULL. + * The user has to keep refcnt on the program and make sure the program + * is removed from the array before bpf_prog_put(). + * The 'struct bpf_prog_array *' should only be replaced with xchg() + * since other cpus are walking the array of pointers in parallel. + */ +struct bpf_prog_array { + struct rcu_head rcu; + struct bpf_prog *progs[0]; +}; + +struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); +void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); + +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, + struct bpf_prog *old_prog); +int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, + struct bpf_prog *exclude_prog, + struct bpf_prog *include_prog, + struct bpf_prog_array **new_array); + +#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ + ({ \ + struct bpf_prog **_prog, *__prog; \ + struct bpf_prog_array *_array; \ + u32 _ret = 1; \ + rcu_read_lock(); \ + _array = rcu_dereference(array); \ + if (unlikely(check_non_null && !_array))\ + goto _out; \ + _prog = _array->progs; \ + while ((__prog = READ_ONCE(*_prog))) { \ + _ret &= func(__prog, ctx); \ + _prog++; \ + } \ +_out: \ + rcu_read_unlock(); \ + _ret; \ + }) + +#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, false) + +#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, true) + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index c0c0b992210e..995903c7055b 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -18,6 +18,7 @@ #include #include #include +#include #include /* @@ -90,6 +91,36 @@ struct can_priv { #define get_can_dlc(i) (min_t(__u8, (i), CAN_MAX_DLC)) #define get_canfd_dlc(i) (min_t(__u8, (i), CANFD_MAX_DLC)) +/* Check for outgoing skbs that have not been created by the CAN subsystem */ +static inline bool can_skb_headroom_valid(struct net_device *dev, + struct sk_buff *skb) +{ + /* af_packet creates a headroom of HH_DATA_MOD bytes which is fine */ + if (WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct can_skb_priv))) + return false; + + /* af_packet does not apply CAN skb specific settings */ + if (skb->ip_summed == CHECKSUM_NONE) { + /* init headroom */ + can_skb_prv(skb)->ifindex = dev->ifindex; + can_skb_prv(skb)->skbcnt = 0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* preform proper loopback on capable devices */ + if (dev->flags & IFF_ECHO) + skb->pkt_type = PACKET_LOOPBACK; + else + skb->pkt_type = PACKET_HOST; + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + } + + return true; +} + /* Drop a given socketbuffer if it does not contain a valid CAN frame. */ static inline bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb) @@ -107,6 +138,9 @@ static inline bool can_dropped_invalid_skb(struct net_device *dev, } else goto inval_skb; + if (!can_skb_headroom_valid(dev, skb)) + goto inval_skb; + return false; inval_skb: diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index de0f5fe28490..a22949de5b40 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -61,6 +61,7 @@ struct css_task_iter { struct list_head *mg_tasks_head; struct list_head *dying_tasks_head; + struct list_head *cur_tasks_head; struct css_set *cur_cset; struct css_set *cur_dcset; struct task_struct *cur_task; diff --git a/include/linux/compat.h b/include/linux/compat.h index 23909d12f729..cec96d4794d0 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -324,8 +324,6 @@ struct compat_kexec_segment; struct compat_mq_attr; struct compat_msgbuf; -extern void compat_exit_robust_list(struct task_struct *curr); - asmlinkage long compat_sys_set_robust_list(struct compat_robust_list_head __user *head, compat_size_t len); diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index ef9856bb1994..e8642cc797fd 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -59,3 +59,9 @@ __has_builtin(__builtin_sub_overflow) #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif + +#if __has_feature(shadow_call_stack) +# define __noscs __attribute__((__no_sanitize__("shadow-call-stack"))) +#else +# define __noscs +#endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index a207f820d3b0..226f4ea0e57c 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -158,6 +158,10 @@ struct ftrace_likely_data { #define __malloc #endif +#ifndef __noscs +# define __noscs +#endif + /* * Allow us to avoid 'defined but not used' warnings on functions and data, * as well as force them to be emitted to the assembly file. diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h index edfeaba95429..a2681d17a579 100644 --- a/include/linux/coresight-pmu.h +++ b/include/linux/coresight-pmu.h @@ -23,11 +23,13 @@ /* ETMv3.5/PTM's ETMCR config bit */ #define ETM_OPT_CYCACC 12 +#define ETM_OPT_CTXTID 14 #define ETM_OPT_TS 28 #define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ #define ETM4_CFG_BIT_CYCACC 4 +#define ETM4_CFG_BIT_CTXTID 6 #define ETM4_CFG_BIT_TS 11 #define ETM4_CFG_BIT_RETSTK 12 diff --git a/include/linux/coresight.h b/include/linux/coresight.h index d950dad5056a..d38d1ac45e13 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -47,6 +47,7 @@ enum coresight_dev_type { CORESIGHT_DEV_TYPE_LINK, CORESIGHT_DEV_TYPE_LINKSINK, CORESIGHT_DEV_TYPE_SOURCE, + CORESIGHT_DEV_TYPE_HELPER, }; enum coresight_dev_subtype_sink { @@ -69,19 +70,30 @@ enum coresight_dev_subtype_source { CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE, }; +enum coresight_dev_subtype_helper { + CORESIGHT_DEV_SUBTYPE_HELPER_NONE, + CORESIGHT_DEV_SUBTYPE_HELPER_CATU, +}; + /** - * struct coresight_dev_subtype - further characterisation of a type + * union coresight_dev_subtype - further characterisation of a type * @sink_subtype: type of sink this component is, as defined - by @coresight_dev_subtype_sink. + * by @coresight_dev_subtype_sink. * @link_subtype: type of link this component is, as defined - by @coresight_dev_subtype_link. + * by @coresight_dev_subtype_link. * @source_subtype: type of source this component is, as defined - by @coresight_dev_subtype_source. + * by @coresight_dev_subtype_source. + * @helper_subtype: type of helper this component is, as defined + * by @coresight_dev_subtype_helper. */ -struct coresight_dev_subtype { - enum coresight_dev_subtype_sink sink_subtype; - enum coresight_dev_subtype_link link_subtype; +union coresight_dev_subtype { + /* We have some devices which acts as LINK and SINK */ + struct { + enum coresight_dev_subtype_sink sink_subtype; + enum coresight_dev_subtype_link link_subtype; + }; enum coresight_dev_subtype_source source_subtype; + enum coresight_dev_subtype_helper helper_subtype; }; /** @@ -120,7 +132,7 @@ struct coresight_platform_data { */ struct coresight_desc { enum coresight_dev_type type; - struct coresight_dev_subtype subtype; + union coresight_dev_subtype subtype; const struct coresight_ops *ops; struct coresight_platform_data *pdata; struct device *dev; @@ -156,21 +168,24 @@ struct coresight_connection { * @orphan: true if the component has connections that haven't been linked. * @enable: 'true' if component is currently part of an active path. * @activated: 'true' only if a _sink_ has been activated. A sink can be - activated but not yet enabled. Enabling for a _sink_ - happens when a source has been selected for that it. + * activated but not yet enabled. Enabling for a _sink_ + * appens when a source has been selected for that it. + * @ea: Device attribute for sink representation under PMU directory. */ struct coresight_device { struct coresight_connection *conns; int nr_inport; int nr_outport; enum coresight_dev_type type; - struct coresight_dev_subtype subtype; + union coresight_dev_subtype subtype; const struct coresight_ops *ops; struct device dev; atomic_t *refcnt; bool orphan; bool enable; /* true only if configured as part of a path */ + /* sink specific fields */ bool activated; /* true only if a sink is part of a path */ + struct dev_ext_attribute *ea; }; #define to_coresight_device(d) container_of(d, struct coresight_device, dev) @@ -178,6 +193,7 @@ struct coresight_device { #define source_ops(csdev) csdev->ops->source_ops #define sink_ops(csdev) csdev->ops->sink_ops #define link_ops(csdev) csdev->ops->link_ops +#define helper_ops(csdev) csdev->ops->helper_ops /** * struct coresight_ops_sink - basic operations for a sink @@ -186,23 +202,16 @@ struct coresight_device { * @disable: disables the sink. * @alloc_buffer: initialises perf's ring buffer for trace collection. * @free_buffer: release memory allocated in @get_config. - * @set_buffer: initialises buffer mechanic before a trace session. - * @reset_buffer: finalises buffer mechanic after a trace session. * @update_buffer: update buffer pointers after a trace session. */ struct coresight_ops_sink { - int (*enable)(struct coresight_device *csdev, u32 mode); - void (*disable)(struct coresight_device *csdev); - void *(*alloc_buffer)(struct coresight_device *csdev, int cpu, - void **pages, int nr_pages, bool overwrite); + int (*enable)(struct coresight_device *csdev, u32 mode, void *data); + int (*disable)(struct coresight_device *csdev); + void *(*alloc_buffer)(struct coresight_device *csdev, + struct perf_event *event, void **pages, + int nr_pages, bool overwrite); void (*free_buffer)(void *config); - int (*set_buffer)(struct coresight_device *csdev, - struct perf_output_handle *handle, - void *sink_config); - unsigned long (*reset_buffer)(struct coresight_device *csdev, - struct perf_output_handle *handle, - void *sink_config); - void (*update_buffer)(struct coresight_device *csdev, + unsigned long (*update_buffer)(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config); }; @@ -237,10 +246,25 @@ struct coresight_ops_source { struct perf_event *event); }; +/** + * struct coresight_ops_helper - Operations for a helper device. + * + * All operations could pass in a device specific data, which could + * help the helper device to determine what to do. + * + * @enable : Enable the device + * @disable : Disable the device + */ +struct coresight_ops_helper { + int (*enable)(struct coresight_device *csdev, void *data); + int (*disable)(struct coresight_device *csdev, void *data); +}; + struct coresight_ops { const struct coresight_ops_sink *sink_ops; const struct coresight_ops_link *link_ops; const struct coresight_ops_source *source_ops; + const struct coresight_ops_helper *helper_ops; }; #ifdef CONFIG_CORESIGHT @@ -251,6 +275,14 @@ extern int coresight_enable(struct coresight_device *csdev); extern void coresight_disable(struct coresight_device *csdev); extern int coresight_timeout(void __iomem *addr, u32 offset, int position, int value); + +extern int coresight_claim_device(void __iomem *base); +extern int coresight_claim_device_unlocked(void __iomem *base); + +extern void coresight_disclaim_device(void __iomem *base); +extern void coresight_disclaim_device_unlocked(void __iomem *base); + +extern bool coresight_loses_context_with_cpu(struct device *dev); #else static inline struct coresight_device * coresight_register(struct coresight_desc *desc) { return NULL; } @@ -260,6 +292,23 @@ coresight_enable(struct coresight_device *csdev) { return -ENOSYS; } static inline void coresight_disable(struct coresight_device *csdev) {} static inline int coresight_timeout(void __iomem *addr, u32 offset, int position, int value) { return 1; } +static inline int coresight_claim_device_unlocked(void __iomem *base) +{ + return -EINVAL; +} + +static inline int coresight_claim_device(void __iomem *base) +{ + return -EINVAL; +} + +static inline void coresight_disclaim_device(void __iomem *base) {} +static inline void coresight_disclaim_device_unlocked(void __iomem *base) {} + +static inline bool coresight_loses_context_with_cpu(struct device *dev) +{ + return false; +} #endif #ifdef CONFIG_OF diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 9dc4a6d4eebd..cedfbbad32e3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -59,6 +59,11 @@ extern ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_mds(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_tsx_async_abort(struct device *dev, + struct device_attribute *attr, + char *buf); +extern ssize_t cpu_show_itlb_multihit(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, @@ -205,28 +210,7 @@ static inline int cpuhp_smt_enable(void) { return 0; } static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } #endif -/* - * These are used for a global "mitigations=" cmdline option for toggling - * optional CPU mitigations. - */ -enum cpu_mitigations { - CPU_MITIGATIONS_OFF, - CPU_MITIGATIONS_AUTO, - CPU_MITIGATIONS_AUTO_NOSMT, -}; - -extern enum cpu_mitigations cpu_mitigations; - -/* mitigations=off */ -static inline bool cpu_mitigations_off(void) -{ - return cpu_mitigations == CPU_MITIGATIONS_OFF; -} - -/* mitigations=auto,nosmt */ -static inline bool cpu_mitigations_auto_nosmt(void) -{ - return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; -} +extern bool cpu_mitigations_off(void); +extern bool cpu_mitigations_auto_nosmt(void); #endif /* _LINUX_CPU_H_ */ diff --git a/include/linux/cpufeature.h b/include/linux/cpufeature.h index 986c06c88d81..84d3c81b5978 100644 --- a/include/linux/cpufeature.h +++ b/include/linux/cpufeature.h @@ -45,7 +45,7 @@ * 'asm/cpufeature.h' of your favorite architecture. */ #define module_cpu_feature_match(x, __initfunc) \ -static struct cpu_feature const cpu_feature_match_ ## x[] = \ +static struct cpu_feature const __maybe_unused cpu_feature_match_ ## x[] = \ { { .feature = cpu_feature(x) }, { } }; \ MODULE_DEVICE_TABLE(cpu, cpu_feature_match_ ## x); \ \ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 1149ce35d2a5..cd91e8c75b3d 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -57,6 +57,8 @@ static const struct file_operations __fops = { \ .llseek = no_llseek, \ } +typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); + #if defined(CONFIG_DEBUG_FS) struct dentry *debugfs_lookup(const char *name, struct dentry *parent); @@ -78,7 +80,6 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent); struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, const char *dest); -typedef struct vfsmount *(*debugfs_automount_t)(struct dentry *, void *); struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, debugfs_automount_t f, @@ -207,7 +208,7 @@ static inline struct dentry *debugfs_create_symlink(const char *name, static inline struct dentry *debugfs_create_automount(const char *name, struct dentry *parent, - struct vfsmount *(*f)(void *), + debugfs_automount_t f, void *data) { return ERR_PTR(-ENODEV); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 91a063a1f3b3..248e6ecea471 100755 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -321,6 +321,12 @@ struct dm_target { * on max_io_len boundary. */ bool split_discard_bios:1; + + /* + * Set if inline crypto capabilities from this target's underlying + * device(s) can be exposed via the device-mapper device. + */ + bool may_passthrough_inline_crypto:1; }; /* Each target can link one of these into the table */ diff --git a/include/linux/device.h b/include/linux/device.h index f355fc7ea653..1ab043064875 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -684,7 +684,8 @@ extern unsigned long devm_get_free_pages(struct device *dev, gfp_t gfp_mask, unsigned int order); extern void devm_free_pages(struct device *dev, unsigned long addr); -void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res); +void __iomem *devm_ioremap_resource(struct device *dev, + const struct resource *res); /* allows to add/remove a custom action to devres stack */ int devm_add_action(struct device *dev, void (*action)(void *), void *data); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 7bf3b99e6fbb..9aee5f345e29 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -650,8 +650,7 @@ static inline unsigned int dma_get_max_seg_size(struct device *dev) return SZ_64K; } -static inline unsigned int dma_set_max_seg_size(struct device *dev, - unsigned int size) +static inline int dma_set_max_seg_size(struct device *dev, unsigned int size) { if (dev->dma_parms) { dev->dma_parms->max_segment_size = size; diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 8319101170fc..8089e28539f1 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -677,6 +677,7 @@ struct dma_filter { * @fill_align: alignment shift for memset operations * @dev_id: unique device ID * @dev: struct device reference for dma mapping api + * @owner: owner module (automatically set based on the provided dev) * @src_addr_widths: bit mask of src addr widths the device supports * @dst_addr_widths: bit mask of dst addr widths the device supports * @directions: bit mask of slave direction the device supports since @@ -738,6 +739,7 @@ struct dma_device { int dev_id; struct device *dev; + struct module *owner; u32 src_addr_widths; u32 dst_addr_widths; @@ -1362,8 +1364,11 @@ static inline int dma_get_slave_caps(struct dma_chan *chan, static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx) { struct dma_slave_caps caps; + int ret; - dma_get_slave_caps(tx->chan, &caps); + ret = dma_get_slave_caps(tx->chan, &caps); + if (ret) + return ret; if (caps.descriptor_reuse) { tx->flags |= DMA_CTRL_REUSE; diff --git a/include/linux/edac.h b/include/linux/edac.h index cd75c173fd00..90f72336aea6 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -17,6 +17,7 @@ #include #include #include +#include #define EDAC_DEVICE_NAME_LEN 31 @@ -667,6 +668,6 @@ struct mem_ctl_info { /* * Maximum number of memory controllers in the coherent fabric. */ -#define EDAC_MAX_MCS 16 +#define EDAC_MAX_MCS 2 * MAX_NUMNODES #endif diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 284738996028..e8763a955f90 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -23,6 +23,7 @@ #define NULL_ADDR ((block_t)0) /* used as block_t addresses */ #define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ +#define COMPRESS_ADDR ((block_t)-2) /* used as compressed data flag */ #define F2FS_BYTES_TO_BLK(bytes) ((bytes) >> F2FS_BLKSIZE_BITS) #define F2FS_BLK_TO_BYTES(blk) ((blk) << F2FS_BLKSIZE_BITS) @@ -37,9 +38,6 @@ #define F2FS_MAX_QUOTAS 3 #define F2FS_ENC_UTF8_12_1 1 -#define F2FS_ENC_STRICT_MODE_FL (1 << 0) -#define f2fs_has_strict_mode(sbi) \ - (sbi->s_encoding_flags & F2FS_ENC_STRICT_MODE_FL) #define F2FS_IO_SIZE(sbi) (1 << F2FS_OPTION(sbi).write_io_size_bits) /* Blocks */ #define F2FS_IO_SIZE_KB(sbi) (1 << (F2FS_OPTION(sbi).write_io_size_bits + 2)) /* KB */ @@ -271,6 +269,10 @@ struct f2fs_inode { __le32 i_inode_checksum;/* inode meta checksum */ __le64 i_crtime; /* creation time */ __le32 i_crtime_nsec; /* creation time in nano scale */ + __le64 i_compr_blocks; /* # of compressed blocks */ + __u8 i_compress_algorithm; /* compress algorithm */ + __u8 i_log_cluster_size; /* log of cluster size */ + __le16 i_padding; /* padding */ __le32 i_extra_end[0]; /* for attribute size calculation */ } __packed; __le32 i_addr[DEF_ADDRS_PER_INODE]; /* Pointers to data blocks */ diff --git a/include/linux/fb.h b/include/linux/fb.h index bc24e48e396d..ccd1f74ca6ab 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -725,8 +725,6 @@ extern int fb_parse_edid(unsigned char *edid, struct fb_var_screeninfo *var); extern const unsigned char *fb_firmware_edid(struct device *device); extern void fb_edid_to_monspecs(unsigned char *edid, struct fb_monspecs *specs); -extern void fb_edid_add_monspecs(unsigned char *edid, - struct fb_monspecs *specs); extern void fb_destroy_modedb(struct fb_videomode *modedb); extern int fb_find_mode_cvt(struct fb_videomode *mode, int margins, int rb); extern unsigned char *fb_ddc_read(struct i2c_adapter *adapter); @@ -800,7 +798,6 @@ struct dmt_videomode { extern const char *fb_mode_option; extern const struct fb_videomode vesa_modes[]; -extern const struct fb_videomode cea_modes[65]; extern const struct dmt_videomode dmt_modes[]; struct fb_modelist { diff --git a/include/linux/fs.h b/include/linux/fs.h index 6f5a1ba546fc..90d15f08f4dc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1309,6 +1309,12 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_ACTIVE (1<<30) #define SB_NOUSER (1<<31) +/* These flags relate to encoding and casefolding */ +#define SB_ENC_STRICT_MODE_FL (1 << 0) + +#define sb_has_enc_strict_mode(sb) \ + (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL) + /* * Umount options */ @@ -1377,6 +1383,10 @@ struct super_block { #endif struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ +#ifdef CONFIG_UNICODE + struct unicode_map *s_encoding; + __u16 s_encoding_flags; +#endif struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; struct backing_dev_info *s_bdi; @@ -3190,6 +3200,20 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); +#ifdef CONFIG_UNICODE +extern int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str); +extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len, + const char *str, const struct qstr *name); +extern bool needs_casefold(const struct inode *dir); +#else +static inline bool needs_casefold(const struct inode *dir) +{ + return 0; +} +#endif +extern void generic_set_encrypted_ci_d_ops(struct inode *dir, + struct dentry *dentry); + #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, struct page *, struct page *, @@ -3441,4 +3465,15 @@ static inline void simple_fill_fsxattr(struct fsxattr *fa, __u32 xflags) fa->fsx_xflags = xflags; } +/* + * Flush file data before changing attributes. Caller must hold any locks + * required to prevent further writes to this file until we're done setting + * flags. + */ +static inline int inode_drain_writes(struct inode *inode) +{ + inode_dio_wait(inode); + return filemap_write_and_wait(inode->i_mapping); +} + #endif /* _LINUX_FS_H */ diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 72ea24ce52ab..db2dbc0b972a 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -20,7 +20,6 @@ #define FS_CRYPTO_BLOCK_SIZE 16 -struct fscrypt_ctx; struct fscrypt_info; struct fscrypt_str { @@ -62,18 +61,13 @@ struct fscrypt_operations { bool (*dummy_context)(struct inode *); bool (*empty_dir)(struct inode *); unsigned int max_namelen; -}; - -/* Decryption work */ -struct fscrypt_ctx { - union { - struct { - struct bio *bio; - struct work_struct work; - }; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ + bool (*has_stable_inodes)(struct super_block *sb); + void (*get_ino_and_lblk_bits)(struct super_block *sb, + int *ino_bits_ret, int *lblk_bits_ret); + bool (*inline_crypt_enabled)(struct super_block *sb); + int (*get_num_devices)(struct super_block *sb); + void (*get_devices)(struct super_block *sb, + struct request_queue **devs); }; static inline bool fscrypt_has_encryption_key(const struct inode *inode) @@ -82,6 +76,21 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return READ_ONCE(inode->i_crypt_info) != NULL; } +/** + * fscrypt_needs_contents_encryption() - check whether an inode needs + * contents encryption + * + * Return: %true iff the inode is an encrypted regular file and the kernel was + * built with fscrypt support. + * + * If you need to know whether the encrypt bit is set even when the kernel was + * built without fscrypt support, you must use IS_ENCRYPTED() directly instead. + */ +static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) +{ + return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); +} + static inline bool fscrypt_dummy_context_enabled(struct inode *inode) { return inode->i_sb->s_cop->dummy_context && @@ -102,8 +111,6 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry) /* crypto.c */ extern void fscrypt_enqueue_decrypt_work(struct work_struct *); -extern struct fscrypt_ctx *fscrypt_get_ctx(gfp_t); -extern void fscrypt_release_ctx(struct fscrypt_ctx *); extern struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, unsigned int len, @@ -131,6 +138,7 @@ static inline struct page *fscrypt_pagecache_page(struct page *bounce_page) } extern void fscrypt_free_bounce_page(struct page *bounce_page); +extern int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags); /* policy.c */ extern int fscrypt_ioctl_set_policy(struct file *, const void __user *); @@ -146,6 +154,8 @@ extern int fscrypt_ioctl_remove_key(struct file *filp, void __user *arg); extern int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg); extern int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg); +extern int fscrypt_register_key_removal_notifier(struct notifier_block *nb); +extern int fscrypt_unregister_key_removal_notifier(struct notifier_block *nb); /* keysetup.c */ extern int fscrypt_get_encryption_info(struct inode *); @@ -165,87 +175,17 @@ static inline void fscrypt_free_filename(struct fscrypt_name *fname) extern int fscrypt_fname_alloc_buffer(const struct inode *, u32, struct fscrypt_str *); extern void fscrypt_fname_free_buffer(struct fscrypt_str *); -extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, - const struct fscrypt_str *, struct fscrypt_str *); - -#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 - -/* Extracts the second-to-last ciphertext block; see explanation below */ -#define FSCRYPT_FNAME_DIGEST(name, len) \ - ((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \ - FS_CRYPTO_BLOCK_SIZE)) - -#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE - -/** - * fscrypt_digested_name - alternate identifier for an on-disk filename - * - * When userspace lists an encrypted directory without access to the key, - * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE - * bytes are shown in this abbreviated form (base64-encoded) rather than as the - * full ciphertext (base64-encoded). This is necessary to allow supporting - * filenames up to NAME_MAX bytes, since base64 encoding expands the length. - * - * To make it possible for filesystems to still find the correct directory entry - * despite not knowing the full on-disk name, we encode any filesystem-specific - * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups, - * followed by the second-to-last ciphertext block of the filename. Due to the - * use of the CBC-CTS encryption mode, the second-to-last ciphertext block - * depends on the full plaintext. (Note that ciphertext stealing causes the - * last two blocks to appear "flipped".) This makes accidental collisions very - * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they - * share the same filesystem-specific hashes. - * - * However, this scheme isn't immune to intentional collisions, which can be - * created by anyone able to create arbitrary plaintext filenames and view them - * without the key. Making the "digest" be a real cryptographic hash like - * SHA-256 over the full ciphertext would prevent this, although it would be - * less efficient and harder to implement, especially since the filesystem would - * need to calculate it for each directory entry examined during a search. - */ -struct fscrypt_digested_name { - u32 hash; - u32 minor_hash; - u8 digest[FSCRYPT_FNAME_DIGEST_SIZE]; -}; - -/** - * fscrypt_match_name() - test whether the given name matches a directory entry - * @fname: the name being searched for - * @de_name: the name from the directory entry - * @de_name_len: the length of @de_name in bytes - * - * Normally @fname->disk_name will be set, and in that case we simply compare - * that to the name stored in the directory entry. The only exception is that - * if we don't have the key for an encrypted directory and a filename in it is - * very long, then we won't have the full disk_name and we'll instead need to - * match against the fscrypt_digested_name. - * - * Return: %true if the name matches, otherwise %false. - */ -static inline bool fscrypt_match_name(const struct fscrypt_name *fname, - const u8 *de_name, u32 de_name_len) -{ - if (unlikely(!fname->disk_name.name)) { - const struct fscrypt_digested_name *n = - (const void *)fname->crypto_buf.name; - if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_')) - return false; - if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) - return false; - return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len), - n->digest, FSCRYPT_FNAME_DIGEST_SIZE); - } - - if (de_name_len != fname->disk_name.len) - return false; - return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); -} +extern int fscrypt_fname_disk_to_usr(const struct inode *inode, + u32 hash, u32 minor_hash, + const struct fscrypt_str *iname, + struct fscrypt_str *oname); +extern bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len); +extern u64 fscrypt_fname_siphash(const struct inode *dir, + const struct qstr *name); /* bio.c */ extern void fscrypt_decrypt_bio(struct bio *); -extern void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, - struct bio *bio); extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, unsigned int); @@ -260,6 +200,8 @@ extern int __fscrypt_prepare_rename(struct inode *old_dir, unsigned int flags); extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname); +extern int fscrypt_prepare_setflags(struct inode *inode, + unsigned int oldflags, unsigned int flags); extern int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link); @@ -276,6 +218,11 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) return false; } +static inline bool fscrypt_needs_contents_encryption(const struct inode *inode) +{ + return false; +} + static inline bool fscrypt_dummy_context_enabled(struct inode *inode) { return false; @@ -290,16 +237,6 @@ static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work) { } -static inline struct fscrypt_ctx *fscrypt_get_ctx(gfp_t gfp_flags) -{ - return ERR_PTR(-EOPNOTSUPP); -} - -static inline void fscrypt_release_ctx(struct fscrypt_ctx *ctx) -{ - return; -} - static inline struct page *fscrypt_encrypt_pagecache_blocks(struct page *page, unsigned int len, unsigned int offs, @@ -405,6 +342,18 @@ static inline int fscrypt_ioctl_get_key_status(struct file *filp, return -EOPNOTSUPP; } +static inline int fscrypt_register_key_removal_notifier( + struct notifier_block *nb) +{ + return 0; +} + +static inline int fscrypt_unregister_key_removal_notifier( + struct notifier_block *nb) +{ + return 0; +} + /* keysetup.c */ static inline int fscrypt_get_encryption_info(struct inode *inode) { @@ -457,7 +406,7 @@ static inline void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str) return; } -static inline int fscrypt_fname_disk_to_usr(struct inode *inode, +static inline int fscrypt_fname_disk_to_usr(const struct inode *inode, u32 hash, u32 minor_hash, const struct fscrypt_str *iname, struct fscrypt_str *oname) @@ -474,13 +423,15 @@ static inline bool fscrypt_match_name(const struct fscrypt_name *fname, return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); } -/* bio.c */ -static inline void fscrypt_decrypt_bio(struct bio *bio) +static inline u64 fscrypt_fname_siphash(const struct inode *dir, + const struct qstr *name) { + WARN_ON_ONCE(1); + return 0; } -static inline void fscrypt_enqueue_decrypt_bio(struct fscrypt_ctx *ctx, - struct bio *bio) +/* bio.c */ +static inline void fscrypt_decrypt_bio(struct bio *bio) { } @@ -521,6 +472,13 @@ static inline int __fscrypt_prepare_lookup(struct inode *dir, return -EOPNOTSUPP; } +static inline int fscrypt_prepare_setflags(struct inode *inode, + unsigned int oldflags, + unsigned int flags) +{ + return 0; +} + static inline int __fscrypt_prepare_symlink(struct inode *dir, unsigned int len, unsigned int max_len, @@ -547,6 +505,74 @@ static inline const char *fscrypt_get_symlink(struct inode *inode, } #endif /* !CONFIG_FS_ENCRYPTION */ +/* inline_crypt.c */ +#ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT +extern bool fscrypt_inode_uses_inline_crypto(const struct inode *inode); + +extern bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode); + +extern void fscrypt_set_bio_crypt_ctx(struct bio *bio, + const struct inode *inode, + u64 first_lblk, gfp_t gfp_mask); + +extern void fscrypt_set_bio_crypt_ctx_bh(struct bio *bio, + const struct buffer_head *first_bh, + gfp_t gfp_mask); + +extern bool fscrypt_mergeable_bio(struct bio *bio, const struct inode *inode, + u64 next_lblk); + +extern bool fscrypt_mergeable_bio_bh(struct bio *bio, + const struct buffer_head *next_bh); + +#else /* CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ +static inline bool fscrypt_inode_uses_inline_crypto(const struct inode *inode) +{ + return false; +} + +static inline bool fscrypt_inode_uses_fs_layer_crypto(const struct inode *inode) +{ + return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); +} + +static inline void fscrypt_set_bio_crypt_ctx(struct bio *bio, + const struct inode *inode, + u64 first_lblk, gfp_t gfp_mask) { } + +static inline void fscrypt_set_bio_crypt_ctx_bh( + struct bio *bio, + const struct buffer_head *first_bh, + gfp_t gfp_mask) { } + +static inline bool fscrypt_mergeable_bio(struct bio *bio, + const struct inode *inode, + u64 next_lblk) +{ + return true; +} + +static inline bool fscrypt_mergeable_bio_bh(struct bio *bio, + const struct buffer_head *next_bh) +{ + return true; +} +#endif /* !CONFIG_FS_ENCRYPTION_INLINE_CRYPT */ + +#if IS_ENABLED(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_DM_DEFAULT_KEY) +static inline bool +fscrypt_inode_should_skip_dm_default_key(const struct inode *inode) +{ + return IS_ENCRYPTED(inode) && S_ISREG(inode->i_mode); +} +#else +static inline bool +fscrypt_inode_should_skip_dm_default_key(const struct inode *inode) +{ + return false; +} +#endif + /** * fscrypt_require_key - require an inode's encryption key * @inode: the inode we need the key for @@ -645,8 +671,9 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, * filenames are presented in encrypted form. Therefore, we'll try to set up * the directory's encryption key, but even without it the lookup can continue. * - * This also installs a custom ->d_revalidate() method which will invalidate the - * dentry if it was created without the key and the key is later added. + * After calling this function, a filesystem should ensure that it's dentry + * operations contain fscrypt_d_revalidate if DCACHE_ENCRYPTED_NAME was set, + * so that the dentry can be invalidated if the key is later added. * * Return: 0 on success; -ENOENT if key is unavailable but the filename isn't a * correctly formed encoded ciphertext name, so a negative dentry should be diff --git a/include/linux/fsverity.h b/include/linux/fsverity.h index 3b6b8ccebe7d..ecc604e61d61 100644 --- a/include/linux/fsverity.h +++ b/include/linux/fsverity.h @@ -77,6 +77,10 @@ struct fsverity_operations { * * @inode: the inode * @index: 0-based index of the page within the Merkle tree + * @num_ra_pages: The number of Merkle tree pages that should be + * prefetched starting at @index if the page at @index + * isn't already cached. Implementations may ignore this + * argument; it's only a performance optimization. * * This can be called at any time on an open verity file, as well as * between ->begin_enable_verity() and ->end_enable_verity(). It may be @@ -87,7 +91,8 @@ struct fsverity_operations { * Return: the page on success, ERR_PTR() on failure */ struct page *(*read_merkle_tree_page)(struct inode *inode, - pgoff_t index); + pgoff_t index, + unsigned long num_ra_pages); /** * Write a Merkle tree block to the given inode. diff --git a/include/linux/futex.h b/include/linux/futex.h index c0fb9a24bbd2..a4b6cba699bf 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -2,7 +2,9 @@ #ifndef _LINUX_FUTEX_H #define _LINUX_FUTEX_H +#include #include + #include struct inode; @@ -12,9 +14,6 @@ struct task_struct; long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); -extern int -handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi); - /* * Futexes are matched on equal values of this key. * The key type depends on whether it's a shared or private mapping. @@ -54,24 +53,35 @@ union futex_key { #define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } } #ifdef CONFIG_FUTEX -extern void exit_robust_list(struct task_struct *curr); -#ifdef CONFIG_HAVE_FUTEX_CMPXCHG -#define futex_cmpxchg_enabled 1 -#else -extern int futex_cmpxchg_enabled; -#endif -#else -static inline void exit_robust_list(struct task_struct *curr) -{ -} -#endif +enum { + FUTEX_STATE_OK, + FUTEX_STATE_EXITING, + FUTEX_STATE_DEAD, +}; -#ifdef CONFIG_FUTEX_PI -extern void exit_pi_state_list(struct task_struct *curr); -#else -static inline void exit_pi_state_list(struct task_struct *curr) +static inline void futex_init_task(struct task_struct *tsk) { + tsk->robust_list = NULL; +#ifdef CONFIG_COMPAT + tsk->compat_robust_list = NULL; +#endif + INIT_LIST_HEAD(&tsk->pi_state_list); + tsk->pi_state_cache = NULL; + tsk->futex_state = FUTEX_STATE_OK; + mutex_init(&tsk->futex_exit_mutex); } + +void futex_exit_recursive(struct task_struct *tsk); +void futex_exit_release(struct task_struct *tsk); +void futex_exec_release(struct task_struct *tsk); + +long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, + u32 __user *uaddr2, u32 val2, u32 val3); +#else +static inline void futex_init_task(struct task_struct *tsk) { } +static inline void futex_exit_recursive(struct task_struct *tsk) { } +static inline void futex_exit_release(struct task_struct *tsk) { } +static inline void futex_exec_release(struct task_struct *tsk) { } #endif #endif diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 872f930f1b06..dd0a452373e7 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -51,7 +51,8 @@ typedef unsigned long (*genpool_algo_t)(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, - void *data, struct gen_pool *pool); + void *data, struct gen_pool *pool, + unsigned long start_addr); /* * General purpose special memory pool descriptor. @@ -131,24 +132,24 @@ extern void gen_pool_set_algo(struct gen_pool *pool, genpool_algo_t algo, extern unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data, - struct gen_pool *pool); + struct gen_pool *pool, unsigned long start_addr); extern unsigned long gen_pool_fixed_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, - void *data, struct gen_pool *pool); + void *data, struct gen_pool *pool, unsigned long start_addr); extern unsigned long gen_pool_first_fit_align(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, - void *data, struct gen_pool *pool); + void *data, struct gen_pool *pool, unsigned long start_addr); extern unsigned long gen_pool_first_fit_order_align(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, - void *data, struct gen_pool *pool); + void *data, struct gen_pool *pool, unsigned long start_addr); extern unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data, - struct gen_pool *pool); + struct gen_pool *pool, unsigned long start_addr); extern struct gen_pool *devm_gen_pool_create(struct device *dev, diff --git a/include/linux/gfp.h b/include/linux/gfp.h index d71757463610..7e95665ec014 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -329,6 +329,29 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) return !!(gfp_flags & __GFP_DIRECT_RECLAIM); } +/** + * gfpflags_normal_context - is gfp_flags a normal sleepable context? + * @gfp_flags: gfp_flags to test + * + * Test whether @gfp_flags indicates that the allocation is from the + * %current context and allowed to sleep. + * + * An allocation being allowed to block doesn't mean it owns the %current + * context. When direct reclaim path tries to allocate memory, the + * allocation context is nested inside whatever %current was doing at the + * time of the original allocation. The nested allocation may be allowed + * to block but modifying anything %current owns can corrupt the outer + * context's expectations. + * + * %true result from this function indicates that the allocation context + * can sleep and use anything that's associated with %current. + */ +static inline bool gfpflags_normal_context(const gfp_t gfp_flags) +{ + return (gfp_flags & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC)) == + __GFP_DIRECT_RECLAIM; +} + #ifdef CONFIG_HIGHMEM #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM #else diff --git a/include/linux/gnss.h b/include/linux/gnss.h new file mode 100644 index 000000000000..43546977098c --- /dev/null +++ b/include/linux/gnss.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * GNSS receiver support + * + * Copyright (C) 2018 Johan Hovold + */ + +#ifndef _LINUX_GNSS_H +#define _LINUX_GNSS_H + +#include +#include +#include +#include +#include +#include +#include + +struct gnss_device; + +enum gnss_type { + GNSS_TYPE_NMEA = 0, + GNSS_TYPE_SIRF, + GNSS_TYPE_UBX, + + GNSS_TYPE_COUNT +}; + +struct gnss_operations { + int (*open)(struct gnss_device *gdev); + void (*close)(struct gnss_device *gdev); + int (*write_raw)(struct gnss_device *gdev, const unsigned char *buf, + size_t count); +}; + +struct gnss_device { + struct device dev; + struct cdev cdev; + int id; + + enum gnss_type type; + unsigned long flags; + + struct rw_semaphore rwsem; + const struct gnss_operations *ops; + unsigned int count; + unsigned int disconnected:1; + + struct mutex read_mutex; + struct kfifo read_fifo; + wait_queue_head_t read_queue; + + struct mutex write_mutex; + char *write_buf; +}; + +struct gnss_device *gnss_allocate_device(struct device *parent); +void gnss_put_device(struct gnss_device *gdev); +int gnss_register_device(struct gnss_device *gdev); +void gnss_deregister_device(struct gnss_device *gdev); + +int gnss_insert_raw(struct gnss_device *gdev, const unsigned char *buf, + size_t count); + +static inline void gnss_set_drvdata(struct gnss_device *gdev, void *data) +{ + dev_set_drvdata(&gdev->dev, data); +} + +static inline void *gnss_get_drvdata(struct gnss_device *gdev) +{ + return dev_get_drvdata(&gdev->dev); +} + +#endif /* _LINUX_GNSS_H */ diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index c4a350d83578..79ad4f8b889d 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -404,7 +404,7 @@ static inline int gpiod_to_irq(const struct gpio_desc *desc) static inline struct gpio_desc *gpio_to_desc(unsigned gpio) { - return ERR_PTR(-EINVAL); + return NULL; } static inline int desc_to_gpio(const struct gpio_desc *desc) diff --git a/include/linux/hid.h b/include/linux/hid.h index 06e6e04e6c11..99c327842f12 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -363,6 +363,7 @@ struct hid_item { #define HID_GROUP_RMI 0x0100 #define HID_GROUP_WACOM 0x0101 #define HID_GROUP_LOGITECH_DJ_DEVICE 0x0102 +#define HID_GROUP_STEAM 0x0103 /* * HID protocol status @@ -398,6 +399,7 @@ struct hid_global { struct hid_local { unsigned usage[HID_MAX_USAGES]; /* usage array */ + u8 usage_size[HID_MAX_USAGES]; /* usage size array */ unsigned collection_index[HID_MAX_USAGES]; /* collection index array */ unsigned usage_index; unsigned usage_minimum; @@ -476,7 +478,7 @@ struct hid_report_enum { }; #define HID_MIN_BUFFER_SIZE 64 /* make sure there is at least a packet size of space */ -#define HID_MAX_BUFFER_SIZE 4096 /* 4kb */ +#define HID_MAX_BUFFER_SIZE 8192 /* 8kb */ #define HID_CONTROL_FIFO_SIZE 256 /* to init devices with >100 reports */ #define HID_OUTPUT_FIFO_SIZE 64 diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index ceb4ac236519..822256f2e3fd 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -415,12 +415,18 @@ extern u64 hrtimer_next_event_without(const struct hrtimer *exclude); extern bool hrtimer_active(const struct hrtimer *timer); -/* - * Helper function to check, whether the timer is on one of the queues +/** + * hrtimer_is_queued = check, whether the timer is on one of the queues + * @timer: Timer to check + * + * Returns: True if the timer is queued, false otherwise + * + * The function can be used lockless, but it gives only a current snapshot. */ -static inline int hrtimer_is_queued(struct hrtimer *timer) +static inline bool hrtimer_is_queued(struct hrtimer *timer) { - return timer->state & HRTIMER_STATE_ENQUEUED; + /* The READ_ONCE pairs with the update functions of timer->state */ + return !!(READ_ONCE(timer->state) & HRTIMER_STATE_ENQUEUED); } /* diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index 548fd535fd02..d433f5e292c9 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -28,6 +28,14 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) return (struct ethhdr *)skb_mac_header(skb); } +/* Prefer this version in TX path, instead of + * skb_reset_mac_header() + eth_hdr() + */ +static inline struct ethhdr *skb_eth_hdr(const struct sk_buff *skb) +{ + return (struct ethhdr *)skb->data; +} + static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb_inner_mac_header(skb); diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 39faaaf843e1..c91cf2dee12a 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -2,15 +2,10 @@ #ifndef _INET_DIAG_H_ #define _INET_DIAG_H_ 1 +#include #include -struct net; -struct sock; struct inet_hashinfo; -struct nlattr; -struct nlmsghdr; -struct sk_buff; -struct netlink_callback; struct inet_diag_handler { void (*dump)(struct sk_buff *skb, @@ -62,6 +57,17 @@ int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); +static inline size_t inet_diag_msg_attrs_size(void) +{ + return nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ +#if IS_ENABLED(CONFIG_IPV6) + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(1) /* INET_DIAG_SKV6ONLY */ +#endif + + nla_total_size(4) /* INET_DIAG_MARK */ + + nla_total_size(4); /* INET_DIAG_CLASS_ID */ +} int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, struct user_namespace *user_ns, bool net_admin); diff --git a/include/linux/init.h b/include/linux/init.h index f138e5b918c2..45f1b7723d86 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -153,15 +153,6 @@ extern bool initcall_debug; #ifndef __ASSEMBLY__ -#ifdef CONFIG_LTO_CLANG - /* prepend the variable name with __COUNTER__ to ensure correct ordering */ - #define ___initcall_name2(c, fn, id) __initcall_##c##_##fn##id - #define ___initcall_name1(c, fn, id) ___initcall_name2(c, fn, id) - #define __initcall_name(fn, id) ___initcall_name1(__COUNTER__, fn, id) -#else - #define __initcall_name(fn, id) __initcall_##fn##id -#endif - /* * initcalls are now grouped by functionality into separate * subsections. Ordering inside the subsections is determined @@ -177,10 +168,33 @@ extern bool initcall_debug; * and remove that completely, so the initcall sections have to be marked * as KEEP() in the linker script. */ +#ifdef CONFIG_LTO_CLANG + /* + * With LTO, the compiler doesn't necessarily obey link order for + * initcalls, and the initcall variable needs to be globally unique + * to avoid naming collisions. In order to preserve the correct + * order, we add each variable into its own section and generate a + * linker script (in scripts/link-vmlinux.sh) to ensure the order + * remains correct. We also add a __COUNTER__ prefix to the name, + * so we can retain the order of initcalls within each compilation + * unit, and __LINE__ to make the names more unique. + */ + #define ___lto_initcall(c, l, fn, id, __sec) \ + static initcall_t __initcall_##c##_##l##_##fn##id __used \ + __attribute__((__section__( #__sec \ + __stringify(.init..##c##_##l##_##fn)))) = fn; + #define __lto_initcall(c, l, fn, id, __sec) \ + ___lto_initcall(c, l, fn, id, __sec) -#define __define_initcall(fn, id) \ - static initcall_t __initcall_name(fn, id) __used \ - __attribute__((__section__(".initcall" #id ".init"))) = fn; + #define ___define_initcall(fn, id, __sec) \ + __lto_initcall(__COUNTER__, __LINE__, fn, id, __sec) +#else + #define ___define_initcall(fn, id, __sec) \ + static initcall_t __initcall_##fn##id __used \ + __attribute__((__section__(#__sec ".init"))) = fn; +#endif + +#define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id) /* * Early initcalls run before initializing SMP. @@ -219,13 +233,8 @@ extern bool initcall_debug; #define __exitcall(fn) \ static exitcall_t __exitcall_##fn __exit_call = fn -#define console_initcall(fn) \ - static initcall_t __initcall_##fn \ - __used __section(.con_initcall.init) = fn - -#define security_initcall(fn) \ - static initcall_t __initcall_##fn \ - __used __section(.security_initcall.init) = fn +#define console_initcall(fn) ___define_initcall(fn, con, .con_initcall) +#define security_initcall(fn) ___define_initcall(fn, security, .security_initcall) struct obs_kernel_param { const char *str; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index a6ab2f51f703..4def15c32a01 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -282,7 +282,8 @@ enum { #define QI_DEV_IOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) #define QI_DEV_IOTLB_QDEP(qdep) (((qdep) & 0x1f) << 16) #define QI_DEV_IOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) -#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) +#define QI_DEV_IOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ + ((u64)((pfsid >> 4) & 0xfff) << 52)) #define QI_DEV_IOTLB_SIZE 1 #define QI_DEV_IOTLB_MAX_INVS 32 @@ -307,7 +308,8 @@ enum { #define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32) #define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 16) #define QI_DEV_EIOTLB_QDEP(qd) ((u64)((qd) & 0x1f) << 4) -#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) +#define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | \ + ((u64)((pfsid >> 4) & 0xfff) << 52)) #define QI_DEV_EIOTLB_MAX_INVS 32 #define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 99bc5b3ae26e..733eaf95e207 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -130,7 +130,7 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid) BUG(); } -static int intel_svm_is_pasid_valid(struct device *dev, int pasid) +static inline int intel_svm_is_pasid_valid(struct device *dev, int pasid) { return -EINVAL; } diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 845ff8c51564..0fe1fdedb8a1 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -152,7 +152,7 @@ #define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB) #define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC) #define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt) -#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt) +#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) #define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt) #define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb) #define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt) @@ -179,7 +179,7 @@ #define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB) #define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC) #define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt) -#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt) +#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) #define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt) #define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb) #define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt) @@ -238,7 +238,7 @@ #define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB) #define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC) #define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) -#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) +#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb) #define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt) #define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb) #define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt) @@ -264,7 +264,7 @@ #define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB) #define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC) #define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) -#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) +#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb) #define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt) #define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb) #define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt) @@ -337,7 +337,7 @@ #define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB) #define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC) #define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt) -#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt) +#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb) #define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt) #define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb) #define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt) @@ -361,7 +361,7 @@ #define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB) #define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC) #define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt) -#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt) +#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) #define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt) #define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb) #define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 6d64b9b8acd7..7bf6e1f251ed 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1593,7 +1593,7 @@ static inline int jbd2_space_needed(journal_t *journal) static inline unsigned long jbd2_log_space_left(journal_t *journal) { /* Allow for rounding errors */ - unsigned long free = journal->j_free - 32; + long free = journal->j_free - 32; if (journal->j_committing_transaction) { unsigned long committing = atomic_read(&journal-> @@ -1602,7 +1602,7 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) /* Transaction + control blocks */ free -= committing + (committing >> JBD2_CONTROL_BLOCKS_SHIFT); } - return free; + return max_t(long, free, 0); } /* diff --git a/include/linux/kcov.h b/include/linux/kcov.h index f5d8ce4f4f86..a10e84707d82 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -8,23 +8,64 @@ struct task_struct; #ifdef CONFIG_KCOV -void kcov_task_init(struct task_struct *t); -void kcov_task_exit(struct task_struct *t); - enum kcov_mode { /* Coverage collection is not enabled yet. */ KCOV_MODE_DISABLED = 0, + /* KCOV was initialized, but tracing mode hasn't been chosen yet. */ + KCOV_MODE_INIT = 1, /* * Tracing coverage collection mode. * Covered PCs are collected in a per-task buffer. */ - KCOV_MODE_TRACE = 1, + KCOV_MODE_TRACE_PC = 2, + /* Collecting comparison operands mode. */ + KCOV_MODE_TRACE_CMP = 3, }; +#define KCOV_IN_CTXSW (1 << 30) + +void kcov_task_init(struct task_struct *t); +void kcov_task_exit(struct task_struct *t); + +#define kcov_prepare_switch(t) \ +do { \ + (t)->kcov_mode |= KCOV_IN_CTXSW; \ +} while (0) + +#define kcov_finish_switch(t) \ +do { \ + (t)->kcov_mode &= ~KCOV_IN_CTXSW; \ +} while (0) + +/* See Documentation/dev-tools/kcov.rst for usage details. */ +void kcov_remote_start(u64 handle); +void kcov_remote_stop(void); +u64 kcov_common_handle(void); + +static inline void kcov_remote_start_common(u64 id) +{ + kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_COMMON, id)); +} + +static inline void kcov_remote_start_usb(u64 id) +{ + kcov_remote_start(kcov_remote_handle(KCOV_SUBSYSTEM_USB, id)); +} + #else static inline void kcov_task_init(struct task_struct *t) {} static inline void kcov_task_exit(struct task_struct *t) {} +static inline void kcov_prepare_switch(struct task_struct *t) {} +static inline void kcov_finish_switch(struct task_struct *t) {} +static inline void kcov_remote_start(u64 handle) {} +static inline void kcov_remote_stop(void) {} +static inline u64 kcov_common_handle(void) +{ + return 0; +} +static inline void kcov_remote_start_common(u64 id) {} +static inline void kcov_remote_start_usb(u64 id) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index caf4d07273bb..21c4a3aa5b57 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -186,6 +186,7 @@ struct kernfs_root { /* private fields, do not use outside kernfs proper */ struct idr ino_idr; + u32 last_ino; u32 next_generation; struct kernfs_syscall_ops *syscall_ops; diff --git a/include/linux/key.h b/include/linux/key.h index 8a15cabe928d..afe4d6b90cad 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -345,6 +345,9 @@ static inline key_serial_t key_serial(const struct key *key) extern void key_set_timeout(struct key *, unsigned); +extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, + key_perm_t perm); + /* * The permissions required on a key that we're looking up. */ diff --git a/include/linux/keyslot-manager.h b/include/linux/keyslot-manager.h new file mode 100644 index 000000000000..2f4aac2851bf --- /dev/null +++ b/include/linux/keyslot-manager.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ + +#ifndef __LINUX_KEYSLOT_MANAGER_H +#define __LINUX_KEYSLOT_MANAGER_H + +#include + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + +struct keyslot_manager; + +/** + * struct keyslot_mgmt_ll_ops - functions to manage keyslots in hardware + * @keyslot_program: Program the specified key into the specified slot in the + * inline encryption hardware. + * @keyslot_evict: Evict key from the specified keyslot in the hardware. + * The key is provided so that e.g. dm layers can evict + * keys from the devices that they map over. + * Returns 0 on success, -errno otherwise. + * @derive_raw_secret: (Optional) Derive a software secret from a + * hardware-wrapped key. Returns 0 on success, -EOPNOTSUPP + * if unsupported on the hardware, or another -errno code. + * + * This structure should be provided by storage device drivers when they set up + * a keyslot manager - this structure holds the function ptrs that the keyslot + * manager will use to manipulate keyslots in the hardware. + */ +struct keyslot_mgmt_ll_ops { + int (*keyslot_program)(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, + unsigned int slot); + int (*keyslot_evict)(struct keyslot_manager *ksm, + const struct blk_crypto_key *key, + unsigned int slot); + int (*derive_raw_secret)(struct keyslot_manager *ksm, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *secret, unsigned int secret_size); +}; + +struct keyslot_manager *keyslot_manager_create( + struct device *dev, + unsigned int num_slots, + const struct keyslot_mgmt_ll_ops *ksm_ops, + const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], + void *ll_priv_data); + +int keyslot_manager_get_slot_for_key(struct keyslot_manager *ksm, + const struct blk_crypto_key *key); + +void keyslot_manager_get_slot(struct keyslot_manager *ksm, unsigned int slot); + +void keyslot_manager_put_slot(struct keyslot_manager *ksm, unsigned int slot); + +bool keyslot_manager_crypto_mode_supported(struct keyslot_manager *ksm, + enum blk_crypto_mode_num crypto_mode, + unsigned int data_unit_size); + +int keyslot_manager_evict_key(struct keyslot_manager *ksm, + const struct blk_crypto_key *key); + +void keyslot_manager_reprogram_all_keys(struct keyslot_manager *ksm); + +void *keyslot_manager_private(struct keyslot_manager *ksm); + +void keyslot_manager_destroy(struct keyslot_manager *ksm); + +struct keyslot_manager *keyslot_manager_create_passthrough( + struct device *dev, + const struct keyslot_mgmt_ll_ops *ksm_ops, + const unsigned int crypto_mode_supported[BLK_ENCRYPTION_MODE_MAX], + void *ll_priv_data); + +void keyslot_manager_intersect_modes(struct keyslot_manager *parent, + const struct keyslot_manager *child); + +int keyslot_manager_derive_raw_secret(struct keyslot_manager *ksm, + const u8 *wrapped_key, + unsigned int wrapped_key_size, + u8 *secret, unsigned int secret_size); + +#endif /* CONFIG_BLK_INLINE_ENCRYPTION */ + +#endif /* __LINUX_KEYSLOT_MANAGER_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c8b9d3519c8e..30376715a607 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -140,7 +140,7 @@ static inline bool is_error_page(struct page *page) extern struct kmem_cache *kvm_vcpu_cache; -extern spinlock_t kvm_lock; +extern struct mutex kvm_lock; extern struct list_head vm_list; struct kvm_io_range { @@ -695,7 +695,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); -unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); +unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu); @@ -890,6 +890,7 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); bool kvm_is_reserved_pfn(kvm_pfn_t pfn); +bool kvm_is_zone_device_pfn(kvm_pfn_t pfn); struct kvm_irq_ack_notifier { struct hlist_node link; @@ -1013,6 +1014,7 @@ enum kvm_stat_kind { struct kvm_stat_data { int offset; + int mode; struct kvm *kvm; }; @@ -1020,6 +1022,7 @@ struct kvm_stats_debugfs_item { const char *name; int offset; enum kvm_stat_kind kind; + int mode; }; extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; @@ -1258,4 +1261,10 @@ static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */ +typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data); + +int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, + uintptr_t data, const char *name, + struct task_struct **thread_ptr); + #endif diff --git a/include/linux/libata.h b/include/linux/libata.h index c5188dc389c8..93838d98e3f3 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1229,6 +1229,7 @@ struct pci_bits { }; extern int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits); +extern void ata_pci_shutdown_one(struct pci_dev *pdev); extern void ata_pci_remove_one(struct pci_dev *pdev); #ifdef CONFIG_PM diff --git a/include/linux/libfdt_env.h b/include/linux/libfdt_env.h index 14997285e53d..8b54c591678e 100644 --- a/include/linux/libfdt_env.h +++ b/include/linux/libfdt_env.h @@ -2,10 +2,14 @@ #ifndef _LIBFDT_ENV_H #define _LIBFDT_ENV_H +#include /* For INT_MAX */ #include #include +#define INT32_MAX S32_MAX +#define UINT32_MAX U32_MAX + typedef __be16 fdt16_t; typedef __be32 fdt32_t; typedef __be64 fdt64_t; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 3eaad2fbf284..84284e3353ed 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -18,6 +18,7 @@ #include #include #include +#include enum { /* when a dimm supports both PMEM and BLK access a label is required */ @@ -36,6 +37,9 @@ enum { /* region flag indicating to direct-map persistent memory by default */ ND_REGION_PAGEMAP = 0, + /* Platform provides asynchronous flush mechanism */ + ND_REGION_ASYNC = 3, + /* mark newly adjusted resources as requiring a label update */ DPA_RESOURCE_ADJUSTED = 1 << 0, }; @@ -53,12 +57,14 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc); +struct device_node; struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; unsigned long bus_dsm_mask; unsigned long cmd_mask; struct module *module; char *provider_name; + struct device_node *of_node; ndctl_fn ndctl; int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc, @@ -90,6 +96,7 @@ struct nd_mapping_desc { int position; }; +struct nd_region; struct nd_region_desc { struct resource *res; struct nd_mapping_desc *mapping; @@ -100,6 +107,8 @@ struct nd_region_desc { int num_lanes; int numa_node; unsigned long flags; + struct device_node *of_node; + int (*flush)(struct nd_region *nd_region, struct bio *bio); }; struct device; @@ -171,7 +180,8 @@ unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr); unsigned int nd_region_acquire_lane(struct nd_region *nd_region); void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); -void nvdimm_flush(struct nd_region *nd_region); +int nvdimm_flush(struct nd_region *nd_region, struct bio *bio); +int generic_nvdimm_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region); int nvdimm_has_cache(struct nd_region *nd_region); diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index 3ef96743db8d..1ecd35664e0d 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -72,10 +72,10 @@ static inline void hlist_nulls_add_head(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); h->first = n; if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); } static inline void __hlist_nulls_del(struct hlist_nulls_node *n) @@ -85,13 +85,13 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n) WRITE_ONCE(*pprev, next); if (!is_a_nulls(next)) - next->pprev = pprev; + WRITE_ONCE(next->pprev, pprev); } static inline void hlist_nulls_del(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** diff --git a/include/linux/log2.h b/include/linux/log2.h index c373295f359f..df50139a0815 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -67,16 +67,13 @@ unsigned long __rounddown_pow_of_two(unsigned long n) } /** - * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value - * @n - parameter + * const_ilog2 - log base 2 of 32-bit or a 64-bit constant unsigned value + * @n: parameter * - * constant-capable log of base 2 calculation - * - this can be used to initialise global variables from constant data, hence - * the massive ternary operator construction - * - * selects the appropriately-sized optimised version depending on sizeof(n) + * Use this where sparse expects a true constant expression, e.g. for array + * indices. */ -#define ilog2(n) \ +#define const_ilog2(n) \ ( \ __builtin_constant_p(n) ? ( \ (n) < 2 ? 0 : \ @@ -142,10 +139,26 @@ unsigned long __rounddown_pow_of_two(unsigned long n) (n) & (1ULL << 4) ? 4 : \ (n) & (1ULL << 3) ? 3 : \ (n) & (1ULL << 2) ? 2 : \ - 1 ) : \ - (sizeof(n) <= 4) ? \ - __ilog2_u32(n) : \ - __ilog2_u64(n) \ + 1) : \ + -1) + +/** + * ilog2 - log base 2 of 32-bit or a 64-bit unsigned value + * @n: parameter + * + * constant-capable log of base 2 calculation + * - this can be used to initialise global variables from constant data, hence + * the massive ternary operator construction + * + * selects the appropriately-sized optimised version depending on sizeof(n) + */ +#define ilog2(n) \ +( \ + __builtin_constant_p(n) ? \ + const_ilog2(n) : \ + (sizeof(n) <= 4) ? \ + __ilog2_u32(n) : \ + __ilog2_u64(n) \ ) /** diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 7161d8e7ee79..7e9f59aeadb6 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1727,6 +1727,14 @@ union security_list_options { int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux); void (*bpf_prog_free_security)(struct bpf_prog_aux *aux); #endif /* CONFIG_BPF_SYSCALL */ +#ifdef CONFIG_PERF_EVENTS + int (*perf_event_open)(struct perf_event_attr *attr, int type); + int (*perf_event_alloc)(struct perf_event *event); + void (*perf_event_free)(struct perf_event *event); + int (*perf_event_read)(struct perf_event *event); + int (*perf_event_write)(struct perf_event *event); + +#endif }; struct security_hook_heads { @@ -1955,6 +1963,13 @@ struct security_hook_heads { struct list_head bpf_prog_alloc_security; struct list_head bpf_prog_free_security; #endif /* CONFIG_BPF_SYSCALL */ +#ifdef CONFIG_PERF_EVENTS + struct list_head perf_event_open; + struct list_head perf_event_alloc; + struct list_head perf_event_free; + struct list_head perf_event_read; + struct list_head perf_event_write; +#endif } __randomize_layout; /* diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 58e110aee7ab..d36a02935391 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -316,6 +316,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {} extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); +extern int __add_memory(int nid, u64 start, u64 size); extern int add_memory(int nid, u64 start, u64 size); extern int add_memory_resource(int nid, struct resource *resource, bool online); extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock); diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h index 5aacdb017a9f..806a4f095312 100644 --- a/include/linux/mfd/intel_soc_pmic.h +++ b/include/linux/mfd/intel_soc_pmic.h @@ -25,6 +25,7 @@ struct intel_soc_pmic { int irq; struct regmap *regmap; struct regmap_irq_chip_data *irq_chip_data; + struct regmap_irq_chip_data *irq_chip_data_pwrbtn; struct regmap_irq_chip_data *irq_chip_data_tmu; struct regmap_irq_chip_data *irq_chip_data_bcu; struct regmap_irq_chip_data *irq_chip_data_adc; diff --git a/include/linux/mfd/max8997.h b/include/linux/mfd/max8997.h index cf815577bd68..3ae1fe743bc3 100644 --- a/include/linux/mfd/max8997.h +++ b/include/linux/mfd/max8997.h @@ -178,7 +178,6 @@ struct max8997_led_platform_data { struct max8997_platform_data { /* IRQ */ int ono; - int wakeup; /* ---- PMIC ---- */ struct max8997_regulator_data *regulators; diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h index 638222e43e48..93011c61aafd 100644 --- a/include/linux/mfd/mc13xxx.h +++ b/include/linux/mfd/mc13xxx.h @@ -247,6 +247,7 @@ struct mc13xxx_platform_data { #define MC13XXX_ADC0_TSMOD0 (1 << 12) #define MC13XXX_ADC0_TSMOD1 (1 << 13) #define MC13XXX_ADC0_TSMOD2 (1 << 14) +#define MC13XXX_ADC0_CHRGRAWDIV (1 << 15) #define MC13XXX_ADC0_ADINC1 (1 << 16) #define MC13XXX_ADC0_ADINC2 (1 << 17) diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index 6dec43826303..cffb23b8bd70 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -3733,6 +3733,9 @@ enum usb_irq_events { #define TPS65917_REGEN3_CTRL_MODE_ACTIVE 0x01 #define TPS65917_REGEN3_CTRL_MODE_ACTIVE_SHIFT 0x00 +/* POWERHOLD Mask field for PRIMARY_SECONDARY_PAD2 register */ +#define TPS65917_PRIMARY_SECONDARY_PAD2_GPIO_5_MASK 0xC + /* Registers for function RESOURCE */ #define TPS65917_REGEN1_CTRL 0x2 #define TPS65917_PLLEN_CTRL 0x3 diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h index d3156594674c..338e0f6e2226 100644 --- a/include/linux/mfd/rk808.h +++ b/include/linux/mfd/rk808.h @@ -443,7 +443,7 @@ enum { enum { RK805_ID = 0x8050, RK808_ID = 0x0000, - RK818_ID = 0x8181, + RK818_ID = 0x8180, }; struct rk808 { diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 4de703d9e21f..5e1e50b8f8c4 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -56,6 +56,7 @@ #define UHID_MINOR 239 #define USERIO_MINOR 240 #define VHOST_VSOCK_MINOR 241 +#define RFKILL_MINOR 242 #define MISC_DYNAMIC_MINOR 255 struct device; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1d793d86d55f..6ffa181598e6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8671,8 +8671,6 @@ struct mlx5_ifc_query_lag_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x40]; - struct mlx5_ifc_lagc_bits ctx; }; diff --git a/include/linux/mm.h b/include/linux/mm.h index 040e1bd11a24..98d3ab44cfee 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -581,11 +581,6 @@ static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags) extern void kvfree(const void *addr); -static inline atomic_t *compound_mapcount_ptr(struct page *page) -{ - return &page[1].compound_mapcount; -} - static inline int compound_mapcount(struct page *page) { VM_BUG_ON_PAGE(!PageCompound(page), page); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 10bc2286b4c4..7d24a6dec27d 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -240,6 +240,11 @@ struct page_frag_cache { typedef unsigned long vm_flags_t; +static inline atomic_t *compound_mapcount_ptr(struct page *page) +{ + return &page[1].compound_mapcount; +} + /* * A region containing a mapping of a non-memory backed file under NOMMU * conditions. These are held in a global tree and are pinned by the VMAs that diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 0a7abe8a407f..68bbbd9edc08 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -67,6 +67,8 @@ #define SDIO_VENDOR_ID_TI 0x0097 #define SDIO_DEVICE_ID_TI_WL1271 0x4076 +#define SDIO_VENDOR_ID_TI_WL1251 0x104c +#define SDIO_DEVICE_ID_TI_WL1251 0x9066 #define SDIO_VENDOR_ID_STE 0x0020 #define SDIO_DEVICE_ID_STE_CW1200 0x2280 diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 71b7a8bc82ea..a12bd7d1609f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -142,6 +142,9 @@ enum zone_stat_item { NR_MLOCK, /* mlock()ed pages found and moved off LRU */ NR_PAGETABLE, /* used for pagetables */ NR_KERNEL_STACK_KB, /* measured in KiB */ +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) + NR_KERNEL_SCS_BYTES, /* measured in bytes */ +#endif /* Second 128 byte cacheline */ NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) @@ -181,7 +184,7 @@ enum node_stat_item { NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ NR_DIRTIED, /* page dirtyings since bootup */ NR_WRITTEN, /* page writings since bootup */ - NR_INDIRECTLY_RECLAIMABLE_BYTES, /* measured in bytes */ + NR_KERNEL_MISC_RECLAIMABLE, /* reclaimable non-slab kernel pages */ NR_VM_NODE_STAT_ITEMS }; diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 1c2e8d6b7274..6db347b58644 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -519,9 +519,9 @@ struct platform_device_id { #define MDIO_NAME_SIZE 32 #define MDIO_MODULE_PREFIX "mdio:" -#define MDIO_ID_FMT "%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d" +#define MDIO_ID_FMT "%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u" #define MDIO_ID_ARGS(_id) \ - (_id)>>31, ((_id)>>30) & 1, ((_id)>>29) & 1, ((_id)>>28) & 1, \ + ((_id)>>31) & 1, ((_id)>>30) & 1, ((_id)>>29) & 1, ((_id)>>28) & 1, \ ((_id)>>27) & 1, ((_id)>>26) & 1, ((_id)>>25) & 1, ((_id)>>24) & 1, \ ((_id)>>23) & 1, ((_id)>>22) & 1, ((_id)>>21) & 1, ((_id)>>20) & 1, \ ((_id)>>19) & 1, ((_id)>>18) & 1, ((_id)>>17) & 1, ((_id)>>16) & 1, \ diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 6cd0f6b7658b..aadc2ee050f1 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -401,7 +401,7 @@ static inline struct device_node *mtd_get_of_node(struct mtd_info *mtd) return dev_of_node(&mtd->dev); } -static inline int mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops) +static inline u32 mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops) { return ops->mode == MTD_OPS_AUTO_OOB ? mtd->oobavail : mtd->oobsize; } diff --git a/include/linux/ndctl.h b/include/linux/ndctl.h new file mode 100644 index 000000000000..cd5a293ce3ae --- /dev/null +++ b/include/linux/ndctl.h @@ -0,0 +1,22 @@ +/* + * Copyright (c) 2014-2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU Lesser General Public License, + * version 2.1, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for + * more details. + */ +#ifndef _LINUX_NDCTL_H +#define _LINUX_NDCTL_H + +#include + +enum { + ND_MIN_NAMESPACE_SIZE = PAGE_SIZE, +}; + +#endif /* _LINUX_NDCTL_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f707c3c145a6..f6ff9cd0e24a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1718,6 +1718,11 @@ struct net_device { unsigned char if_port; unsigned char dma; + /* Note : dev->mtu is often read without holding a lock. + * Writers usually hold RTNL. + * It is recommended to use READ_ONCE() to annotate the reads, + * and to use WRITE_ONCE() to annotate the writes. + */ unsigned int mtu; unsigned int min_mtu; unsigned int max_mtu; @@ -3310,6 +3315,7 @@ int dev_set_alias(struct net_device *, const char *, size_t); int dev_change_net_namespace(struct net_device *, struct net *, const char *); int __dev_set_mtu(struct net_device *, int); int dev_set_mtu(struct net_device *, int); +int dev_validate_mtu(struct net_device *dev, int mtu); void dev_set_group(struct net_device *, int); int dev_set_mac_address(struct net_device *, struct sockaddr *); int dev_change_carrier(struct net_device *, bool new_carrier); @@ -3524,7 +3530,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) if (debug_value == 0) /* no output */ return 0; /* set low N bits */ - return (1 << debug_value) - 1; + return (1U << debug_value) - 1; } static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 91a533bd3eb1..b7246b7e0bf4 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -445,13 +445,6 @@ ip6addrptr(const struct sk_buff *skb, bool src, struct in6_addr *addr) sizeof(*addr)); } -/* Calculate the bytes required to store the inclusive range of a-b */ -static inline int -bitmap_bytes(u32 a, u32 b) -{ - return 4 * ((((b - a + 8) / 8) + 3) / 4); -} - #include #include #include diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index a726f96010d5..e9c3b98df3e2 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -279,6 +279,8 @@ struct nvme_fc_remote_port { * * Host/Initiator Transport Entrypoints/Parameters: * + * @module: The LLDD module using the interface + * * @localport_delete: The LLDD initiates deletion of a localport via * nvme_fc_deregister_localport(). However, the teardown is * asynchronous. This routine is called upon the completion of the @@ -392,6 +394,8 @@ struct nvme_fc_remote_port { * Value is Mandatory. Allowed to be zero. */ struct nvme_fc_port_template { + struct module *module; + /* initiator-based functions */ void (*localport_delete)(struct nvme_fc_local_port *); void (*remoteport_delete)(struct nvme_fc_remote_port *); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 6900ad07554b..5de46a658548 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -568,12 +568,28 @@ static inline int PageTransCompound(struct page *page) * * Unlike PageTransCompound, this is safe to be called only while * split_huge_pmd() cannot run from under us, like if protected by the - * MMU notifier, otherwise it may result in page->_mapcount < 0 false + * MMU notifier, otherwise it may result in page->_mapcount check false * positives. + * + * We have to treat page cache THP differently since every subpage of it + * would get _mapcount inc'ed once it is PMD mapped. But, it may be PTE + * mapped in the current process so comparing subpage's _mapcount to + * compound_mapcount to filter out PTE mapped case. */ static inline int PageTransCompoundMap(struct page *page) { - return PageTransCompound(page) && atomic_read(&page->_mapcount) < 0; + struct page *head; + + if (!PageTransCompound(page)) + return 0; + + if (PageAnon(page)) + return atomic_read(&page->_mapcount) < 0; + + head = compound_head(page); + /* File THP is PMD mapped and not PTE mapped */ + return atomic_read(&page->_mapcount) == + atomic_read(compound_mapcount_ptr(head)); } /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 00ae925e2878..ff924b5fd0b8 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -54,6 +54,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include struct perf_callchain_entry { @@ -408,7 +409,7 @@ struct pmu { /* * Set up pmu-private data structures for an AUX area */ - void *(*setup_aux) (int cpu, void **pages, + void *(*setup_aux) (struct perf_event *event, void **pages, int nr_pages, bool overwrite); /* optional */ @@ -466,7 +467,7 @@ struct pmu { */ struct perf_addr_filter { struct list_head entry; - struct inode *inode; + struct path path; unsigned long offset; unsigned long size; unsigned int range : 1, @@ -489,6 +490,11 @@ struct perf_addr_filters_head { unsigned int nr_file_filters; }; +struct perf_addr_filter_range { + unsigned long start; + unsigned long size; +}; + /** * enum perf_event_active_state - the states of a event */ @@ -677,7 +683,7 @@ struct perf_event { /* address range filters */ struct perf_addr_filters_head addr_filters; /* vma address array for file-based filders */ - unsigned long *addr_filters_offs; + struct perf_addr_filter_range *addr_filter_ranges; unsigned long addr_filters_gen; void (*destroy)(struct perf_event *); @@ -707,6 +713,9 @@ struct perf_event { int cgrp_defer_enabled; #endif +#ifdef CONFIG_SECURITY + void *security; +#endif struct list_head sb_list; #endif /* CONFIG_PERF_EVENTS */ }; @@ -1170,24 +1179,41 @@ extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, int perf_event_max_stack_handler(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -static inline bool perf_paranoid_any(void) -{ - return sysctl_perf_event_paranoid > 2; -} +/* Access to perf_event_open(2) syscall. */ +#define PERF_SECURITY_OPEN 0 -static inline bool perf_paranoid_tracepoint_raw(void) +/* Finer grained perf_event_open(2) access control. */ +#define PERF_SECURITY_CPU 1 +#define PERF_SECURITY_KERNEL 2 +#define PERF_SECURITY_TRACEPOINT 3 + +static inline int perf_is_paranoid(void) { return sysctl_perf_event_paranoid > -1; } -static inline bool perf_paranoid_cpu(void) +static inline int perf_allow_kernel(struct perf_event_attr *attr) { - return sysctl_perf_event_paranoid > 0; + if (sysctl_perf_event_paranoid > 1 && !capable(CAP_SYS_ADMIN)) + return -EACCES; + + return security_perf_event_open(attr, PERF_SECURITY_KERNEL); } -static inline bool perf_paranoid_kernel(void) +static inline int perf_allow_cpu(struct perf_event_attr *attr) { - return sysctl_perf_event_paranoid > 1; + if (sysctl_perf_event_paranoid > 0 && !capable(CAP_SYS_ADMIN)) + return -EACCES; + + return security_perf_event_open(attr, PERF_SECURITY_CPU); +} + +static inline int perf_allow_tracepoint(struct perf_event_attr *attr) +{ + if (sysctl_perf_event_paranoid > -1 && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); } extern void perf_event_init(void); diff --git a/include/linux/phy.h b/include/linux/phy.h index efc04c2d92c9..8b6850707e62 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -372,6 +372,7 @@ struct phy_c45_device_ids { * is_pseudo_fixed_link: Set to true if this phy is an Ethernet switch, etc. * has_fixups: Set to true if this phy has fixups/quirks. * suspended: Set to true if this phy has been suspended successfully. + * suspended_by_mdio_bus: Set to true if this phy was suspended by MDIO bus. * sysfs_links: Internal boolean tracking sysfs symbolic links setup/removal. * loopback_enabled: Set true if this phy has been loopbacked successfully. * state: state of the PHY for management purposes @@ -410,6 +411,7 @@ struct phy_device { bool is_pseudo_fixed_link; bool has_fixups; bool suspended; + bool suspended_by_mdio_bus; bool sysfs_links; bool loopback_enabled; diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 896cb71a382c..1a1d58ebffbf 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -49,6 +49,7 @@ struct dw_dma_slave { * @data_width: Maximum data width supported by hardware per AHB master * (in bytes, power of 2) * @multi_block: Multi block transfers supported by hardware per channel. + * @protctl: Protection control signals setting per channel. */ struct dw_dma_platform_data { unsigned int nr_channels; @@ -65,6 +66,11 @@ struct dw_dma_platform_data { unsigned char nr_masters; unsigned char data_width[DW_DMA_MAX_NR_MASTERS]; unsigned char multi_block[DW_DMA_MAX_NR_CHANNELS]; +#define CHAN_PROTCTL_PRIVILEGED BIT(0) +#define CHAN_PROTCTL_BUFFERABLE BIT(1) +#define CHAN_PROTCTL_CACHEABLE BIT(2) +#define CHAN_PROTCTL_MASK GENMASK(2, 0) + unsigned char protctl; }; #endif /* _PLATFORM_DATA_DMA_DW_H */ diff --git a/include/linux/platform_data/dma-ep93xx.h b/include/linux/platform_data/dma-ep93xx.h index f8f1f6b952a6..eb9805bb3fe8 100644 --- a/include/linux/platform_data/dma-ep93xx.h +++ b/include/linux/platform_data/dma-ep93xx.h @@ -85,7 +85,7 @@ static inline enum dma_transfer_direction ep93xx_dma_chan_direction(struct dma_chan *chan) { if (!ep93xx_dma_chan_is_m2p(chan)) - return DMA_NONE; + return DMA_TRANS_NONE; /* even channels are for TX, odd for RX */ return (chan->chan_id % 2 == 0) ? DMA_MEM_TO_DEV : DMA_DEV_TO_MEM; diff --git a/include/linux/platform_data/dma-imx-sdma.h b/include/linux/platform_data/dma-imx-sdma.h index 6eaa53cef0bd..30e676b36b24 100644 --- a/include/linux/platform_data/dma-imx-sdma.h +++ b/include/linux/platform_data/dma-imx-sdma.h @@ -51,7 +51,10 @@ struct sdma_script_start_addrs { /* End of v2 array */ s32 zcanfd_2_mcu_addr; s32 zqspi_2_mcu_addr; + s32 mcu_2_ecspi_addr; /* End of v3 array */ + s32 mcu_2_zqspi_addr; + /* End of v4 array */ }; /** diff --git a/include/linux/poll.h b/include/linux/poll.h index d384f12abdd5..c7acd7c09747 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -15,7 +15,11 @@ extern struct ctl_table epoll_table[]; /* for sysctl */ /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating additional memory. */ +#ifdef __clang__ +#define MAX_STACK_ALLOC 768 +#else #define MAX_STACK_ALLOC 832 +#endif #define FRONTEND_STACK_ALLOC 256 #define SELECT_STACK_ALLOC FRONTEND_STACK_ALLOC #define POLL_STACK_ALLOC FRONTEND_STACK_ALLOC diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 38d8225510f1..3097b08c55cb 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -82,29 +82,32 @@ struct posix_clock_operations { * * @ops: Functional interface to the clock * @cdev: Character device instance for this clock - * @kref: Reference count. + * @dev: Pointer to the clock's device. * @rwsem: Protects the 'zombie' field from concurrent access. * @zombie: If 'zombie' is true, then the hardware has disappeared. - * @release: A function to free the structure when the reference count reaches - * zero. May be NULL if structure is statically allocated. * * Drivers should embed their struct posix_clock within a private * structure, obtaining a reference to it during callbacks using * container_of(). + * + * Drivers should supply an initialized but not exposed struct device + * to posix_clock_register(). It is used to manage lifetime of the + * driver's private structure. It's 'release' field should be set to + * a release function for this private structure. */ struct posix_clock { struct posix_clock_operations ops; struct cdev cdev; - struct kref kref; + struct device *dev; struct rw_semaphore rwsem; bool zombie; - void (*release)(struct posix_clock *clk); }; /** * posix_clock_register() - register a new clock - * @clk: Pointer to the clock. Caller must provide 'ops' and 'release' - * @devid: Allocated device id + * @clk: Pointer to the clock. Caller must provide 'ops' field + * @dev: Pointer to the initialized device. Caller must provide + * 'release' field * * A clock driver calls this function to register itself with the * clock device subsystem. If 'clk' points to dynamically allocated @@ -113,7 +116,7 @@ struct posix_clock { * * Returns zero on success, non-zero otherwise. */ -int posix_clock_register(struct posix_clock *clk, dev_t devid); +int posix_clock_register(struct posix_clock *clk, struct device *dev); /** * posix_clock_unregister() - unregister a clock diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index e5380471c2cd..428278a44c7d 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -44,6 +44,9 @@ extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); #else + +#include + static inline int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus) { diff --git a/include/linux/quota.h b/include/linux/quota.h index 5ac9de4fcd6f..aa9a42eceab0 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -263,7 +263,7 @@ enum { }; struct dqstats { - int stat[_DQST_DQSTAT_LAST]; + unsigned long stat[_DQST_DQSTAT_LAST]; struct percpu_counter counter[_DQST_DQSTAT_LAST]; }; diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index a109e6107c06..50ab5d6ccc4e 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -51,6 +51,16 @@ static inline struct dquot *dqgrab(struct dquot *dquot) atomic_inc(&dquot->dq_count); return dquot; } + +static inline bool dquot_is_busy(struct dquot *dquot) +{ + if (test_bit(DQ_MOD_B, &dquot->dq_flags)) + return true; + if (atomic_read(&dquot->dq_count) > 1) + return true; + return false; +} + void dqput(struct dquot *dquot); int dquot_scan_active(struct super_block *sb, int (*fn)(struct dquot *dquot, unsigned long priv), diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index e4b257ff881b..cf64a9492256 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -34,7 +34,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) { if (!hlist_nulls_unhashed(n)) { __hlist_nulls_del(n); - n->pprev = NULL; + WRITE_ONCE(n->pprev, NULL); } } @@ -66,7 +66,7 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) static inline void hlist_nulls_del_rcu(struct hlist_nulls_node *n) { __hlist_nulls_del(n); - n->pprev = LIST_POISON2; + WRITE_ONCE(n->pprev, LIST_POISON2); } /** @@ -94,10 +94,47 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, struct hlist_nulls_node *first = h->first; n->next = first; - n->pprev = &h->first; + WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) - first->pprev = &n->next; + WRITE_ONCE(first->pprev, &n->next); +} + +/** + * hlist_nulls_add_tail_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist_nulls, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() + * or hlist_nulls_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, + struct hlist_nulls_head *h) +{ + struct hlist_nulls_node *i, *last = NULL; + + /* Note: write side code, so rcu accessors are not needed. */ + for (i = h->first; !is_a_nulls(i); i = i->next) + last = i; + + if (last) { + n->next = last->next; + n->pprev = &last->next; + rcu_assign_pointer(hlist_next_rcu(last), n); + } else { + hlist_nulls_add_head_rcu(n, h); + } } /** diff --git a/include/linux/regulator/ab8500.h b/include/linux/regulator/ab8500.h index d8ecefaf63ca..3f6b8b9ef49d 100644 --- a/include/linux/regulator/ab8500.h +++ b/include/linux/regulator/ab8500.h @@ -38,14 +38,11 @@ enum ab8505_regulator_id { AB8505_LDO_AUX6, AB8505_LDO_INTCORE, AB8505_LDO_ADC, - AB8505_LDO_USB, AB8505_LDO_AUDIO, AB8505_LDO_ANAMIC1, AB8505_LDO_ANAMIC2, AB8505_LDO_AUX8, AB8505_LDO_ANA, - AB8505_SYSCLKREQ_2, - AB8505_SYSCLKREQ_4, AB8505_NUM_REGULATORS, }; diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 25602afd4844..f3f76051e8b0 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -508,7 +508,7 @@ static inline int regulator_get_error_flags(struct regulator *regulator, static inline int regulator_set_load(struct regulator *regulator, int load_uA) { - return REGULATOR_MODE_NORMAL; + return 0; } static inline int regulator_allow_bypass(struct regulator *regulator, diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h index adb88f8cefbc..576caaf0c9af 100644 --- a/include/linux/reset-controller.h +++ b/include/linux/reset-controller.h @@ -7,7 +7,7 @@ struct reset_controller_dev; /** - * struct reset_control_ops + * struct reset_control_ops - reset controller driver callbacks * * @reset: for self-deasserting resets, does all necessary * things to reset the device diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index d87dfa41142d..8b7bce207229 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -267,6 +267,16 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, unsigned long offset, unsigned long size, gfp_t gfp_mask); +#ifdef CONFIG_SGL_ALLOC +struct scatterlist *sgl_alloc_order(unsigned long long length, + unsigned int order, bool chainable, + gfp_t gfp, unsigned int *nent_p); +struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, + unsigned int *nent_p); +void sgl_free_order(struct scatterlist *sgl, int order); +void sgl_free(struct scatterlist *sgl); +#endif /* CONFIG_SGL_ALLOC */ + size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen, off_t skip, bool to_buffer); diff --git a/include/linux/sched.h b/include/linux/sched.h index ff0a01b696ff..223c6b9c4083 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1189,6 +1189,8 @@ struct task_struct { #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; + struct mutex futex_exit_mutex; + unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; @@ -1318,8 +1320,10 @@ struct task_struct { #endif /* CONFIG_TRACING */ #ifdef CONFIG_KCOV + /* See kernel/kcov.c for more details. */ + /* Coverage collection mode enabled for this task (0 if disabled): */ - enum kcov_mode kcov_mode; + unsigned int kcov_mode; /* Size of the kcov_area: */ unsigned int kcov_size; @@ -1329,6 +1333,12 @@ struct task_struct { /* KCOV descriptor wired with this task or NULL: */ struct kcov *kcov; + + /* KCOV common handle for remote coverage collection: */ + u64 kcov_handle; + + /* KCOV sequence number: */ + int kcov_sequence; #endif #ifdef CONFIG_MEMCG @@ -1572,7 +1582,6 @@ extern struct pid *cad_pid; */ #define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_EXITING 0x00000004 /* Getting shut down */ -#define PF_EXITPIDONE 0x00000008 /* PI exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 9f7cc1d7ec4a..efb9e12e7f91 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -125,8 +125,10 @@ extern struct mm_struct *get_task_mm(struct task_struct *task); * succeeds. */ extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); -/* Remove the current tasks stale references to the old mm_struct */ -extern void mm_release(struct task_struct *, struct mm_struct *); +/* Remove the current tasks stale references to the old mm_struct on exit() */ +extern void exit_mm_release(struct task_struct *, struct mm_struct *); +/* Remove the current tasks stale references to the old mm_struct on exec() */ +extern void exec_mm_release(struct task_struct *, struct mm_struct *); #ifdef CONFIG_MEMCG extern void mm_update_next_owner(struct mm_struct *mm); diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 2d0ed38ed585..15644d84149f 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -39,6 +39,8 @@ void __noreturn do_task_dead(void); extern void proc_caches_init(void); +extern void fork_init(void); + extern void release_task(struct task_struct * p); #ifdef CONFIG_HAVE_COPY_THREAD_TLS diff --git a/include/linux/scs.h b/include/linux/scs.h new file mode 100644 index 000000000000..c5572fd770b0 --- /dev/null +++ b/include/linux/scs.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shadow Call Stack support. + * + * Copyright (C) 2019 Google LLC + */ + +#ifndef _LINUX_SCS_H +#define _LINUX_SCS_H + +#include +#include +#include + +#ifdef CONFIG_SHADOW_CALL_STACK + +/* + * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit + * architecture) provided ~40% safety margin on stack usage while keeping + * memory allocation overhead reasonable. + */ +#define SCS_SIZE 1024UL +#define GFP_SCS (GFP_KERNEL | __GFP_ZERO) + +/* + * A random number outside the kernel's virtual address space to mark the + * end of the shadow stack. + */ +#define SCS_END_MAGIC 0xaf0194819b1635f6UL + +#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack) + +static inline void task_set_scs(struct task_struct *tsk, void *s) +{ + task_scs(tsk) = s; +} + +extern void scs_init(void); +extern void scs_task_reset(struct task_struct *tsk); +extern int scs_prepare(struct task_struct *tsk, int node); +extern bool scs_corrupted(struct task_struct *tsk); +extern void scs_release(struct task_struct *tsk); + +#else /* CONFIG_SHADOW_CALL_STACK */ + +#define task_scs(tsk) NULL + +static inline void task_set_scs(struct task_struct *tsk, void *s) {} +static inline void scs_init(void) {} +static inline void scs_task_reset(struct task_struct *tsk) {} +static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; } +static inline bool scs_corrupted(struct task_struct *tsk) { return false; } +static inline void scs_release(struct task_struct *tsk) {} + +#endif /* CONFIG_SHADOW_CALL_STACK */ + +#endif /* _LINUX_SCS_H */ diff --git a/include/linux/security.h b/include/linux/security.h index 73f1ef625d40..666c75c2269c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1801,5 +1801,42 @@ static inline void free_secdata(void *secdata) { } #endif /* CONFIG_SECURITY */ -#endif /* ! __LINUX_SECURITY_H */ +#ifdef CONFIG_PERF_EVENTS +struct perf_event_attr; +struct perf_event; +#ifdef CONFIG_SECURITY +extern int security_perf_event_open(struct perf_event_attr *attr, int type); +extern int security_perf_event_alloc(struct perf_event *event); +extern void security_perf_event_free(struct perf_event *event); +extern int security_perf_event_read(struct perf_event *event); +extern int security_perf_event_write(struct perf_event *event); +#else +static inline int security_perf_event_open(struct perf_event_attr *attr, + int type) +{ + return 0; +} + +static inline int security_perf_event_alloc(struct perf_event *event) +{ + return 0; +} + +static inline void security_perf_event_free(struct perf_event *event) +{ +} + +static inline int security_perf_event_read(struct perf_event *event) +{ + return 0; +} + +static inline int security_perf_event_write(struct perf_event *event) +{ + return 0; +} +#endif /* CONFIG_SECURITY */ +#endif /* CONFIG_PERF_EVENTS */ + +#endif /* ! __LINUX_SECURITY_H */ diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 09c6e28746f9..ab437dd2e3b9 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -140,6 +140,20 @@ void *__seq_open_private(struct file *, const struct seq_operations *, int); int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); +#define DEFINE_SHOW_ATTRIBUTE(__name) \ +static int __name ## _open(struct inode *inode, struct file *file) \ +{ \ + return single_open(file, __name ## _show, inode->i_private); \ +} \ + \ +static const struct file_operations __name ## _fops = { \ + .owner = THIS_MODULE, \ + .open = __name ## _open, \ + .read = seq_read, \ + .llseek = seq_lseek, \ + .release = single_release, \ +} + static inline struct user_namespace *seq_user_ns(struct seq_file *seq) { #ifdef CONFIG_USER_NS diff --git a/include/linux/serdev.h b/include/linux/serdev.h index d609e6dc5bad..49f6e382c94e 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -164,9 +164,21 @@ int serdev_device_add(struct serdev_device *); void serdev_device_remove(struct serdev_device *); struct serdev_controller *serdev_controller_alloc(struct device *, size_t); -int serdev_controller_add(struct serdev_controller *); +int serdev_controller_add_platform(struct serdev_controller *, bool); void serdev_controller_remove(struct serdev_controller *); +/** + * serdev_controller_add() - Add an serdev controller + * @ctrl: controller to be registered. + * + * Register a controller previously allocated via serdev_controller_alloc() with + * the serdev core. + */ +static inline int serdev_controller_add(struct serdev_controller *ctrl) +{ + return serdev_controller_add_platform(ctrl, false); +} + static inline void serdev_controller_write_wakeup(struct serdev_controller *ctrl) { struct serdev_device *serdev = ctrl->serdev; diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index a27ef5f56431..791a6be0e394 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -134,6 +134,10 @@ struct uart_8250_port { void (*dl_write)(struct uart_8250_port *, int); struct uart_8250_em485 *em485; + + /* Serial port overrun backoff */ + struct delayed_work overrun_backoff; + u32 overrun_backoff_time_ms; }; static inline struct uart_8250_port *up_to_u8250p(struct uart_port *up) diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 868b60a79c0b..b2a7b7c15451 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -166,6 +166,7 @@ struct uart_port { struct console *cons; /* struct console, if any */ #if defined(CONFIG_SERIAL_CORE_CONSOLE) || defined(SUPPORT_SYSRQ) unsigned long sysrq; /* sysrq timeout */ + unsigned int sysrq_ch; /* char for sysrq */ #endif /* flags must be updated while holding port mutex */ @@ -474,8 +475,42 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) } return 0; } +static inline int +uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) +{ + if (port->sysrq) { + if (ch && time_before(jiffies, port->sysrq)) { + port->sysrq_ch = ch; + port->sysrq = 0; + return 1; + } + port->sysrq = 0; + } + return 0; +} +static inline void +uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + int sysrq_ch; + + sysrq_ch = port->sysrq_ch; + port->sysrq_ch = 0; + + spin_unlock_irqrestore(&port->lock, irqflags); + + if (sysrq_ch) + handle_sysrq(sysrq_ch); +} #else -#define uart_handle_sysrq_char(port,ch) ({ (void)port; 0; }) +static inline int +uart_handle_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; } +static inline int +uart_prepare_sysrq_char(struct uart_port *port, unsigned int ch) { return 0; } +static inline void +uart_unlock_and_check_sysrq(struct uart_port *port, unsigned long irqflags) +{ + spin_unlock_irqrestore(&port->lock, irqflags); +} #endif /* diff --git a/include/linux/signal.h b/include/linux/signal.h index 843bd62b1ead..c4e3eb89a622 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -268,6 +268,9 @@ extern void signal_setup_done(int failed, struct ksignal *ksig, int stepping); extern void exit_signals(struct task_struct *tsk); extern void kernel_sigaction(int, __sighandler_t); +#define SIG_KTHREAD ((__force __sighandler_t)2) +#define SIG_KTHREAD_KERNEL ((__force __sighandler_t)3) + static inline void allow_signal(int sig) { /* @@ -275,7 +278,17 @@ static inline void allow_signal(int sig) * know it'll be handled, so that they don't get converted to * SIGKILL or just silently dropped. */ - kernel_sigaction(sig, (__force __sighandler_t)2); + kernel_sigaction(sig, SIG_KTHREAD); +} + +static inline void allow_kernel_signal(int sig) +{ + /* + * Kernel threads handle their own signals. Let the signal code + * know signals sent by the kernel will be handled, so that they + * don't get silently dropped. + */ + kernel_sigaction(sig, SIG_KTHREAD_KERNEL); } static inline void disallow_signal(int sig) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 965999d3417a..1db3bf571b76 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1230,7 +1230,8 @@ static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 return skb->hash; } -__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb); +__u32 skb_get_hash_perturb(const struct sk_buff *skb, + const siphash_key_t *perturb); static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) { @@ -1347,6 +1348,19 @@ static inline int skb_queue_empty(const struct sk_buff_head *list) return list->next == (const struct sk_buff *) list; } +/** + * skb_queue_empty_lockless - check if a queue is empty + * @list: queue head + * + * Returns true if the queue is empty, false otherwise. + * This variant can be used in lockless contexts. + */ +static inline bool skb_queue_empty_lockless(const struct sk_buff_head *list) +{ + return READ_ONCE(list->next) == (const struct sk_buff *) list; +} + + /** * skb_queue_is_last - check if skb is the last entry in the queue * @list: queue head @@ -1643,7 +1657,7 @@ static inline struct sk_buff *skb_peek_next(struct sk_buff *skb, */ static inline struct sk_buff *skb_peek_tail(const struct sk_buff_head *list_) { - struct sk_buff *skb = list_->prev; + struct sk_buff *skb = READ_ONCE(list_->prev); if (skb == (struct sk_buff *)list_) skb = NULL; @@ -1711,9 +1725,13 @@ static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) { - newsk->next = next; - newsk->prev = prev; - next->prev = prev->next = newsk; + /* See skb_queue_empty_lockless() and skb_peek_tail() + * for the opposite READ_ONCE() + */ + WRITE_ONCE(newsk->next, next); + WRITE_ONCE(newsk->prev, prev); + WRITE_ONCE(next->prev, newsk); + WRITE_ONCE(prev->next, newsk); list->qlen++; } @@ -1724,11 +1742,11 @@ static inline void __skb_queue_splice(const struct sk_buff_head *list, struct sk_buff *first = list->next; struct sk_buff *last = list->prev; - first->prev = prev; - prev->next = first; + WRITE_ONCE(first->prev, prev); + WRITE_ONCE(prev->next, first); - last->next = next; - next->prev = last; + WRITE_ONCE(last->next, next); + WRITE_ONCE(next->prev, last); } /** @@ -1869,8 +1887,8 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) next = skb->next; prev = skb->prev; skb->next = skb->prev = NULL; - next->prev = prev; - prev->next = next; + WRITE_ONCE(next->prev, prev); + WRITE_ONCE(prev->next, next); } /** diff --git a/include/linux/slab.h b/include/linux/slab.h index c8f9c967886e..5921f381f5bc 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -267,11 +267,42 @@ static inline const char *__check_heap_object(const void *ptr, #define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ (KMALLOC_MIN_SIZE) : 16) -#ifndef CONFIG_SLOB -extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; +/* + * Whenever changing this, take care of that kmalloc_type() and + * create_kmalloc_caches() still work as intended. + */ +enum kmalloc_cache_type { + KMALLOC_NORMAL = 0, + KMALLOC_RECLAIM, #ifdef CONFIG_ZONE_DMA -extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; + KMALLOC_DMA, #endif + NR_KMALLOC_TYPES +}; + +#ifndef CONFIG_SLOB +extern struct kmem_cache * +kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1]; + +static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags) +{ +#ifdef CONFIG_ZONE_DMA + /* + * The most common case is KMALLOC_NORMAL, so test for it + * with a single branch for both flags. + */ + if (likely((flags & (__GFP_DMA | __GFP_RECLAIMABLE)) == 0)) + return KMALLOC_NORMAL; + + /* + * At least one of the flags has to be set. If both are, __GFP_DMA + * is more important. + */ + return flags & __GFP_DMA ? KMALLOC_DMA : KMALLOC_RECLAIM; +#else + return flags & __GFP_RECLAIMABLE ? KMALLOC_RECLAIM : KMALLOC_NORMAL; +#endif +} /* * Figure out which kmalloc slab an allocation of a certain size @@ -476,18 +507,20 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) static __always_inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { +#ifndef CONFIG_SLOB + unsigned int index; +#endif if (size > KMALLOC_MAX_CACHE_SIZE) return kmalloc_large(size, flags); #ifndef CONFIG_SLOB - if (!(flags & GFP_DMA)) { - int index = kmalloc_index(size); + index = kmalloc_index(size); - if (!index) - return ZERO_SIZE_PTR; + if (!index) + return ZERO_SIZE_PTR; - return kmem_cache_alloc_trace(kmalloc_caches[index], - flags, size); - } + return kmem_cache_alloc_trace( + kmalloc_caches[kmalloc_type(flags)][index], + flags, size); #endif } return __kmalloc(size, flags); @@ -517,13 +550,14 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { #ifndef CONFIG_SLOB if (__builtin_constant_p(size) && - size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) { + size <= KMALLOC_MAX_CACHE_SIZE) { int i = kmalloc_index(size); if (!i) return ZERO_SIZE_PTR; - return kmem_cache_alloc_node_trace(kmalloc_caches[i], + return kmem_cache_alloc_node_trace( + kmalloc_caches[kmalloc_type(flags)][i], flags, node, size); } #endif diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 01b5028098ef..65b055b49fc7 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -427,5 +427,9 @@ extern int _atomic_dec_and_lock_irqsafe(atomic_t *atomic, spinlock_t *lock, #define atomic_dec_and_lock_irqsafe(atomic, lock, flags) \ __cond_lock(lock, _atomic_dec_and_lock_irqsafe(atomic, \ lock, flags)) +extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, + unsigned long *flags); +#define atomic_dec_and_lock_irqsave(atomic, lock, flags) \ + __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags))) #endif /* __LINUX_SPINLOCK_H */ diff --git a/include/linux/stat.h b/include/linux/stat.h index 22484e44544d..07295841fccd 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -33,7 +33,8 @@ struct kstat { STATX_ATTR_IMMUTABLE | \ STATX_ATTR_APPEND | \ STATX_ATTR_NODUMP | \ - STATX_ATTR_ENCRYPTED \ + STATX_ATTR_ENCRYPTED | \ + STATX_ATTR_VERITY \ )/* Attrs corresponding to FS_*_FL flags */ u64 ino; dev_t dev; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index d96e74e114c0..c9548a63d09b 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -188,7 +188,6 @@ struct rpc_timer { struct rpc_wait_queue { spinlock_t lock; struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */ - pid_t owner; /* process id of last task serviced */ unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ unsigned char priority; /* current priority */ unsigned char nr; /* # tasks remaining for cookie */ @@ -204,7 +203,6 @@ struct rpc_wait_queue { * from a single cookie. The aim is to improve * performance of NFS operations such as read/write. */ -#define RPC_BATCH_COUNT 16 #define RPC_IS_PRIORITY(q) ((q)->maxpriority > 0) /* diff --git a/include/linux/swap.h b/include/linux/swap.h index 97d0c7929dc4..595a0e1b86b7 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -369,14 +369,8 @@ extern unsigned long vm_total_pages; extern int node_reclaim_mode; extern int sysctl_min_unmapped_ratio; extern int sysctl_min_slab_ratio; -extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int); #else #define node_reclaim_mode 0 -static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, - unsigned int order) -{ - return 0; -} #endif extern int page_evictable(struct page *page); diff --git a/include/linux/time.h b/include/linux/time.h index 87c36cf1cec2..21086c5143d9 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -301,4 +301,17 @@ static inline bool itimerspec64_valid(const struct itimerspec64 *its) */ #define time_after32(a, b) ((s32)((u32)(b) - (u32)(a)) < 0) #define time_before32(b, a) time_after32(a, b) + +/** + * time_between32 - check if a 32-bit timestamp is within a given time range + * @t: the time which may be within [l,h] + * @l: the lower bound of the range + * @h: the higher bound of the range + * + * time_before32(t, l, h) returns true if @l <= @t <= @h. All operands are + * treated as 32-bit integers. + * + * Equivalent to !(time_before32(@t, @l) || time_after32(@t, @h)). + */ +#define time_between32(t, l, h) ((u32)(h) - (u32)(l) >= (u32)(t) - (u32)(l)) #endif diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 39cc2ac83b66..16cfa4e948e2 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -272,14 +272,37 @@ struct trace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; - struct bpf_prog *prog; - struct perf_event *bpf_prog_owner; + struct bpf_prog_array __rcu *prog_array; int (*perf_perm)(struct trace_event_call *, struct perf_event *); #endif }; +#ifdef CONFIG_PERF_EVENTS +static inline bool bpf_prog_array_valid(struct trace_event_call *call) +{ + /* + * This inline function checks whether call->prog_array + * is valid or not. The function is called in various places, + * outside rcu_read_lock/unlock, as a heuristic to speed up execution. + * + * If this function returns true, and later call->prog_array + * becomes false inside rcu_read_lock/unlock region, + * we bail out then. If this function return false, + * there is a risk that we might miss a few events if the checking + * were delayed until inside rcu_read_lock/unlock region and + * call->prog_array happened to become non-NULL then. + * + * Here, READ_ONCE() is used instead of rcu_access_pointer(). + * rcu_access_pointer() requires the actual definition of + * "struct bpf_prog_array" while READ_ONCE() only needs + * a declaration of the same type. + */ + return !!READ_ONCE(call->prog_array); +} +#endif + static inline const char * trace_event_name(struct trace_event_call *call) { @@ -436,12 +459,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file) } #ifdef CONFIG_BPF_EVENTS -unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); +unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); +int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog); +void perf_event_detach_bpf_prog(struct perf_event *event); #else -static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { return 1; } + +static inline int +perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} + +static inline void perf_event_detach_bpf_prog(struct perf_event *event) { } + #endif enum { @@ -512,6 +546,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, { perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event); } + #endif #endif /* _LINUX_TRACE_EVENT_H */ diff --git a/include/linux/tty.h b/include/linux/tty.h index 1dd587ba6d88..ead308e996c0 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -224,6 +224,8 @@ struct tty_port_client_operations { void (*write_wakeup)(struct tty_port *port); }; +extern const struct tty_port_client_operations tty_port_default_client_ops; + struct tty_port { struct tty_bufhead buf; /* Locked internally */ struct tty_struct *tty; /* Back pointer */ @@ -365,6 +367,7 @@ struct tty_file_private { #define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ #define TTY_HUPPED 18 /* Post driver->hangup() */ #define TTY_HUPPING 19 /* Hangup in progress */ +#define TTY_LDISC_CHANGING 20 /* Change pending - non-block IO */ #define TTY_LDISC_HALTED 22 /* Line discipline is halted */ /* Values for tty->flow_change */ @@ -382,6 +385,12 @@ static inline void tty_set_flow_change(struct tty_struct *tty, int val) smp_mb(); } +static inline bool tty_io_nonblock(struct tty_struct *tty, struct file *file) +{ + return file->f_flags & O_NONBLOCK || + test_bit(TTY_LDISC_CHANGING, &tty->flags); +} + static inline bool tty_io_error(struct tty_struct *tty) { return test_bit(TTY_IO_ERROR, &tty->flags); diff --git a/include/linux/unicode.h b/include/linux/unicode.h index 990aa97d8049..74484d44c755 100644 --- a/include/linux/unicode.h +++ b/include/linux/unicode.h @@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct qstr *str, int utf8_casefold(const struct unicode_map *um, const struct qstr *str, unsigned char *dest, size_t dlen); +int utf8_casefold_hash(const struct unicode_map *um, const void *salt, + struct qstr *str); + struct unicode_map *utf8_load(const char *version); void utf8_unload(struct unicode_map *um); diff --git a/include/linux/usb/irda.h b/include/linux/usb/irda.h index 396d2b043e64..556a801efce3 100644 --- a/include/linux/usb/irda.h +++ b/include/linux/usb/irda.h @@ -119,11 +119,22 @@ struct usb_irda_cs_descriptor { * 6 - 115200 bps * 7 - 576000 bps * 8 - 1.152 Mbps - * 9 - 5 mbps + * 9 - 4 Mbps * 10..15 - Reserved */ #define USB_IRDA_STATUS_LINK_SPEED 0x0f +#define USB_IRDA_LS_NO_CHANGE 0 +#define USB_IRDA_LS_2400 1 +#define USB_IRDA_LS_9600 2 +#define USB_IRDA_LS_19200 3 +#define USB_IRDA_LS_38400 4 +#define USB_IRDA_LS_57600 5 +#define USB_IRDA_LS_115200 6 +#define USB_IRDA_LS_576000 7 +#define USB_IRDA_LS_1152000 8 +#define USB_IRDA_LS_4000000 9 + /* The following is a 4-bit value used only for * outbound header: * diff --git a/include/linux/xattr.h b/include/linux/xattr.h index d70f77a4b62a..7f538d232179 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -30,10 +30,13 @@ struct xattr_handler { const char *prefix; int flags; /* fs private flags */ bool (*list)(struct dentry *dentry); - int (*get)(const struct xattr_handler *, struct dentry *dentry, + int (*get)(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size); - int (*set)(const struct xattr_handler *, struct dentry *dentry, + int (*__get)(const struct xattr_handler *handler, struct dentry *dentry, + struct inode *inode, const char *name, void *buffer, + size_t size); + int (*set)(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, const void *buffer, size_t size, int flags); }; diff --git a/include/math-emu/soft-fp.h b/include/math-emu/soft-fp.h index 3f284bc03180..5650c1628383 100644 --- a/include/math-emu/soft-fp.h +++ b/include/math-emu/soft-fp.h @@ -138,7 +138,7 @@ do { \ _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND); \ } while (0) -#define _FP_ROUND_ZERO(wc, X) 0 +#define _FP_ROUND_ZERO(wc, X) (void)0 #define _FP_ROUND_PINF(wc, X) \ do { \ diff --git a/include/media/davinci/vpbe.h b/include/media/davinci/vpbe.h index 79a566d7defd..180a05e91497 100644 --- a/include/media/davinci/vpbe.h +++ b/include/media/davinci/vpbe.h @@ -92,7 +92,7 @@ struct vpbe_config { struct encoder_config_info *ext_encoders; /* amplifier information goes here */ struct amp_config_info *amp; - int num_outputs; + unsigned int num_outputs; /* Order is venc outputs followed by LCD and then external encoders */ struct vpbe_output *outputs; }; diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h index 8ffa94009d1a..76002416cead 100644 --- a/include/media/v4l2-device.h +++ b/include/media/v4l2-device.h @@ -268,7 +268,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ - !(grpid) || __sd->grp_id == (grpid), o, f , \ + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ } while (0) @@ -280,7 +280,7 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ - !(grpid) || __sd->grp_id == (grpid), o, f , \ + (grpid) == 0 || __sd->grp_id == (grpid), o, f , \ ##args); \ }) @@ -294,8 +294,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, struct v4l2_subdev *__sd; \ \ __v4l2_device_call_subdevs_p(v4l2_dev, __sd, \ - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \ - ##args); \ + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ + f , ##args); \ } while (0) /* @@ -308,8 +308,8 @@ static inline void v4l2_subdev_notify(struct v4l2_subdev *sd, ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, \ - !(grpmsk) || (__sd->grp_id & (grpmsk)), o, f , \ - ##args); \ + (grpmsk) == 0 || (__sd->grp_id & (grpmsk)), o, \ + f , ##args); \ }) /* diff --git a/include/media/v4l2-rect.h b/include/media/v4l2-rect.h index d2125f0cc7cd..1584c760b993 100644 --- a/include/media/v4l2-rect.h +++ b/include/media/v4l2-rect.h @@ -75,10 +75,10 @@ static inline void v4l2_rect_map_inside(struct v4l2_rect *r, r->left = boundary->left; if (r->top < boundary->top) r->top = boundary->top; - if (r->left + r->width > boundary->width) - r->left = boundary->width - r->width; - if (r->top + r->height > boundary->height) - r->top = boundary->height - r->height; + if (r->left + r->width > boundary->left + boundary->width) + r->left = boundary->left + boundary->width - r->width; + if (r->top + r->height > boundary->top + boundary->height) + r->top = boundary->top + boundary->height - r->height; } /** diff --git a/include/net/bonding.h b/include/net/bonding.h index 04008209506a..b0f20bc0fd4a 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -149,7 +149,6 @@ struct slave { unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS]; s8 link; /* one of BOND_LINK_XXXX */ s8 link_new_state; /* one of BOND_LINK_XXXX */ - s8 new_link; u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ inactive:1, /* indicates inactive slave */ @@ -523,7 +522,7 @@ static inline void bond_propose_link_state(struct slave *slave, int state) static inline void bond_commit_link_state(struct slave *slave, bool notify) { - if (slave->link == slave->link_new_state) + if (slave->link_new_state == BOND_LINK_NOCHANGE) return; slave->link = slave->link_new_state; diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 71c72a939bf8..c86fcadccbd7 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -134,7 +134,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb, static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL - sk->sk_napi_id = skb->napi_id; + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif } @@ -143,8 +143,8 @@ static inline void sk_mark_napi_id_once(struct sock *sk, const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL - if (!sk->sk_napi_id) - sk->sk_napi_id = skb->napi_id; + if (!READ_ONCE(sk->sk_napi_id)) + WRITE_ONCE(sk->sk_napi_id, skb->napi_id); #endif } diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 40face2f023c..e3dba0600b63 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2875,6 +2875,9 @@ struct cfg80211_external_auth_params { * * @start_radar_detection: Start radar detection in the driver. * + * @end_cac: End running CAC, probably because a related CAC + * was finished on another phy. + * * @update_ft_ies: Provide updated Fast BSS Transition information to the * driver. If the SME is in the driver/firmware, this information can be * used in building Authentication and Reassociation Request frames. @@ -3183,6 +3186,8 @@ struct cfg80211_ops { struct net_device *dev, struct cfg80211_chan_def *chandef, u32 cac_time_ms); + void (*end_cac)(struct wiphy *wiphy, + struct net_device *dev); int (*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_update_ft_ies_params *ftie); int (*crit_proto_start)(struct wiphy *wiphy, @@ -4516,6 +4521,17 @@ static inline const u8 *cfg80211_find_ext_ie(u8 ext_eid, const u8 *ies, int len) const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type, const u8 *ies, int len); +/** + * cfg80211_send_layer2_update - send layer 2 update frame + * + * @dev: network device + * @addr: STA MAC address + * + * Wireless drivers can use this function to update forwarding tables in bridge + * devices upon STA association. + */ +void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr); + /** * DOC: Regulatory enforcement infrastructure * diff --git a/include/net/dst.h b/include/net/dst.h index cbd5f09c4cc8..8c714eb6e18b 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -110,7 +110,7 @@ struct dst_entry { struct dst_metrics { u32 metrics[RTAX_MAX]; refcount_t refcnt; -}; +} __aligned(4); /* Low pointer bits contain DST_METRICS_FLAGS */ extern const struct dst_metrics dst_default_metrics; u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old); @@ -542,7 +542,16 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) struct dst_entry *dst = skb_dst(skb); if (dst && dst->ops->update_pmtu) - dst->ops->update_pmtu(dst, NULL, skb, mtu); + dst->ops->update_pmtu(dst, NULL, skb, mtu, true); +} + +/* update dst pmtu but not do neighbor confirm */ +static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu) +{ + struct dst_entry *dst = skb_dst(skb); + + if (dst && dst->ops->update_pmtu) + dst->ops->update_pmtu(dst, NULL, skb, mtu, false); } #endif /* _NET_DST_H */ diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 5ec645f27ee3..443863c7b8da 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -27,7 +27,8 @@ struct dst_ops { struct dst_entry * (*negative_advice)(struct dst_entry *); void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); void (*redirect)(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb); diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 648caf90ec07..b8fd023ba625 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -102,6 +102,7 @@ struct fib_rule_notifier_info { [FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \ [FRA_PRIORITY] = { .type = NLA_U32 }, \ [FRA_FWMARK] = { .type = NLA_U32 }, \ + [FRA_TUN_ID] = { .type = NLA_U64 }, \ [FRA_FWMASK] = { .type = NLA_U32 }, \ [FRA_TABLE] = { .type = NLA_U32 }, \ [FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \ diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 22aba321282d..ddf916e5e57d 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -4,6 +4,8 @@ #include #include +#include +#include #include /** @@ -229,7 +231,7 @@ struct flow_dissector { struct flow_keys { struct flow_dissector_key_control control; #define FLOW_KEYS_HASH_START_FIELD basic - struct flow_dissector_key_basic basic; + struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT); struct flow_dissector_key_tags tags; struct flow_dissector_key_vlan vlan; struct flow_dissector_key_keyid keyid; @@ -281,4 +283,12 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec return ((char *)target_container) + flow_dissector->offset[key_id]; } +static inline void +flow_dissector_init_keys(struct flow_dissector_key_control *key_control, + struct flow_dissector_key_basic *key_basic) +{ + memset(key_control, 0, sizeof(*key_control)); + memset(key_basic, 0, sizeof(*key_basic)); +} + #endif diff --git a/include/net/fq.h b/include/net/fq.h index 6d8521a30c5c..2c7687902789 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -70,7 +70,7 @@ struct fq { struct list_head backlogs; spinlock_t lock; u32 flows_cnt; - u32 perturbation; + siphash_key_t perturbation; u32 limit; u32 memory_limit; u32 memory_usage; diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index ac1a2317941e..46903e23eab9 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -105,7 +105,7 @@ static struct fq_flow *fq_flow_classify(struct fq *fq, lockdep_assert_held(&fq->lock); - hash = skb_get_hash_perturb(skb, fq->perturbation); + hash = skb_get_hash_perturb(skb, &fq->perturbation); idx = reciprocal_scale(hash, fq->flows_cnt); flow = &fq->flows[idx]; @@ -255,7 +255,7 @@ static int fq_init(struct fq *fq, int flows_cnt) INIT_LIST_HEAD(&fq->backlogs); spin_lock_init(&fq->lock); fq->flows_cnt = max_t(u32, flows_cnt, 1); - fq->perturbation = prandom_u32(); + get_random_bytes(&fq->perturbation, sizeof(fq->perturbation)); fq->quantum = 300; fq->limit = 8192; fq->memory_limit = 16 << 20; /* 16 MBytes */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 2dbbbff5e1e3..573ab110c9ec 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -106,12 +106,18 @@ struct inet_bind_hashbucket { struct hlist_head chain; }; -/* - * Sockets can be hashed in established or listening table +/* Sockets can be hashed in established or listening table. + * We must use different 'nulls' end-of-chain value for all hash buckets : + * A socket might transition from ESTABLISH to LISTEN state without + * RCU grace period. A lookup in ehash table needs to handle this case. */ +#define LISTENING_NULLS_BASE (1U << 29) struct inet_listen_hashbucket { spinlock_t lock; - struct hlist_head head; + union { + struct hlist_head head; + struct hlist_nulls_head nulls_head; + }; }; /* This is for listening sockets, thus all sockets which possess wildcards. */ diff --git a/include/net/ip.h b/include/net/ip.h index f60840e1556c..640ee2512e99 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -649,4 +649,9 @@ extern int sysctl_icmp_msgs_burst; int ip_misc_proc_init(void); #endif +static inline bool inetdev_valid_mtu(unsigned int mtu) +{ + return likely(mtu >= IPV4_MIN_MTU); +} + #endif /* _IP_H */ diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 82bc9f0e8a76..f4e5ac8aa366 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -891,6 +891,7 @@ struct netns_ipvs { struct delayed_work defense_work; /* Work handler */ int drop_rate; int drop_counter; + int old_secure_tcp; atomic_t dropentry; /* locks in ctl.c */ spinlock_t dropentry_lock; /* drop entry handling */ diff --git a/include/net/llc.h b/include/net/llc.h index 890a87318014..df282d9b4017 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -66,6 +66,7 @@ struct llc_sap { int sk_count; struct hlist_nulls_head sk_laddr_hash[LLC_SK_LADDR_HASH_ENTRIES]; struct hlist_head sk_dev_hash[LLC_SK_DEV_HASH_ENTRIES]; + struct rcu_head rcu; }; static inline diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h index df528a623548..ea985aa7a6c5 100644 --- a/include/net/llc_conn.h +++ b/include/net/llc_conn.h @@ -104,7 +104,7 @@ void llc_sk_reset(struct sock *sk); /* Access to a connection */ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb); -int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); +void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb); void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit); void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 393099b1901a..e89273f9a0bc 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -429,8 +429,8 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { unsigned long now = jiffies; - if (neigh->used != now) - neigh->used = now; + if (READ_ONCE(neigh->used) != now) + WRITE_ONCE(neigh->used, now); if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); return 0; @@ -458,7 +458,7 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb do { seq = read_seqbegin(&hh->hh_lock); - hh_len = hh->hh_len; + hh_len = READ_ONCE(hh->hh_len); if (likely(hh_len <= HH_DATA_MOD)) { hh_alen = HH_DATA_MOD; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 59a4f50ffe8d..a9704c57430d 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -759,7 +759,8 @@ struct nft_expr_ops { */ struct nft_expr { const struct nft_expr_ops *ops; - unsigned char data[]; + unsigned char data[] + __attribute__((aligned(__alignof__(u64)))); }; static inline void *nft_expr_priv(const struct nft_expr *expr) diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 23e22054aa60..04aa2c7d35c4 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -181,7 +181,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, static inline bool reqsk_queue_empty(const struct request_sock_queue *queue) { - return queue->rskq_accept_head == NULL; + return READ_ONCE(queue->rskq_accept_head) == NULL; } static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue *queue, @@ -193,7 +193,7 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue req = queue->rskq_accept_head; if (req) { sk_acceptq_removed(parent); - queue->rskq_accept_head = req->dl_next; + WRITE_ONCE(queue->rskq_accept_head, req->dl_next); if (queue->rskq_accept_head == NULL) queue->rskq_accept_tail = NULL; } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f59acacaa265..37876d842f2e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -305,6 +305,11 @@ static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc) return q; } +static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc) +{ + return rcu_dereference_bh(qdisc->dev_queue->qdisc); +} + static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc) { return qdisc->dev_queue->qdisc_sleeping; diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 749a42882437..c713bd62428f 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -103,6 +103,8 @@ void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int); /* * sctp/socket.c */ +int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags); int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 94c775773f58..c1f71dd464d3 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1181,6 +1181,9 @@ struct sctp_ep_common { /* What socket does this endpoint belong to? */ struct sock *sk; + /* Cache netns and it won't change once set */ + struct net *net; + /* This is where we receive inbound chunks. */ struct sctp_inq inqueue; diff --git a/include/net/sock.h b/include/net/sock.h index 60eef7f1ac05..c6a003bc4737 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -693,6 +693,11 @@ static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_h hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } +static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list) +{ + hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); +} + static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { sock_hold(sk); @@ -916,8 +921,8 @@ static inline void sk_incoming_cpu_update(struct sock *sk) { int cpu = raw_smp_processor_id(); - if (unlikely(sk->sk_incoming_cpu != cpu)) - sk->sk_incoming_cpu = cpu; + if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu)) + WRITE_ONCE(sk->sk_incoming_cpu, cpu); } static inline void sock_rps_record_flow_hash(__u32 hash) @@ -1232,7 +1237,7 @@ static inline void sk_sockets_allocated_inc(struct sock *sk) percpu_counter_inc(sk->sk_prot->sockets_allocated); } -static inline int +static inline u64 sk_sockets_allocated_read_positive(struct sock *sk) { return percpu_counter_read_positive(sk->sk_prot->sockets_allocated); @@ -2131,12 +2136,17 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, * sk_page_frag - return an appropriate page_frag * @sk: socket * - * If socket allocation mode allows current thread to sleep, it means its - * safe to use the per task page_frag instead of the per socket one. + * Use the per task page_frag instead of the per socket one for + * optimization when we know that we're in the normal context and owns + * everything that's associated with %current. + * + * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest + * inside other socket operations and end up recursing into sk_page_frag() + * while it's already in use. */ static inline struct page_frag *sk_page_frag(struct sock *sk) { - if (gfpflags_allow_blocking(sk->sk_allocation)) + if (gfpflags_normal_context(sk->sk_allocation)) return ¤t->task_frag; return &sk->sk_frag; @@ -2224,7 +2234,7 @@ static inline ktime_t sock_read_timestamp(struct sock *sk) return kt; #else - return sk->sk_stamp; + return READ_ONCE(sk->sk_stamp); #endif } @@ -2235,7 +2245,7 @@ static inline void sock_write_timestamp(struct sock *sk, ktime_t kt) sk->sk_stamp = kt; write_sequnlock(&sk->sk_stamp_seq); #else - sk->sk_stamp = kt; + WRITE_ONCE(sk->sk_stamp, kt); #endif } diff --git a/include/net/tcp.h b/include/net/tcp.h index 45a3b9534a9b..5bd33cb5c590 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -290,7 +290,7 @@ static inline bool tcp_under_memory_pressure(const struct sock *sk) mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; - return tcp_memory_pressure; + return READ_ONCE(tcp_memory_pressure); } /* * The next routines deal with comparing 32 bit unsigned ints @@ -501,19 +501,27 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); */ static inline void tcp_synq_overflow(const struct sock *sk) { - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned long last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); unsigned long now = jiffies; - if (time_after(now, last_overflow + HZ)) - tcp_sk(sk)->rx_opt.ts_recent_stamp = now; + if (!time_between32(now, last_overflow, last_overflow + HZ)) + WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now); } /* syncookies: no recent synqueue overflow on this listening socket? */ static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) { - unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; + unsigned long last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); - return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID); + /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID, + * then we're under synflood. However, we have to use + * 'last_overflow - HZ' as lower bound. That's because a concurrent + * tcp_synq_overflow() could update .ts_recent_stamp after we read + * jiffies but before we store .ts_recent_stamp into last_overflow, + * which could lead to rejecting a valid syncookie. + */ + return !time_between32(jiffies, last_overflow - HZ, + last_overflow + TCP_SYNCOOKIE_VALID); } static inline u32 tcp_cookie_time(void) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index b8a5118b6a42..73cc5cfb72e0 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -306,7 +306,7 @@ enum ib_cq_creation_flags { struct ib_cq_init_attr { unsigned int cqe; - int comp_vector; + u32 comp_vector; u32 flags; }; @@ -1120,7 +1120,7 @@ struct ib_qp_init_attr { struct ib_qp_cap cap; enum ib_sig_type sq_sig_type; enum ib_qp_type qp_type; - enum ib_qp_create_flags create_flags; + u32 create_flags; /* * Only needed for special QP types, or when using the RW API. diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h index 2b7e227960e1..91f403341dd7 100644 --- a/include/scsi/scsi_eh.h +++ b/include/scsi/scsi_eh.h @@ -32,6 +32,7 @@ extern int scsi_ioctl_reset(struct scsi_device *, int __user *); struct scsi_eh_save { /* saved state */ int result; + unsigned int resid_len; int eh_eflags; enum dma_data_direction data_direction; unsigned underflow; diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h index 6665cb29e1a2..c2a71fd8dfaf 100644 --- a/include/sound/rawmidi.h +++ b/include/sound/rawmidi.h @@ -92,9 +92,9 @@ struct snd_rawmidi_substream { struct list_head list; /* list of all substream for given stream */ int stream; /* direction */ int number; /* substream number */ - unsigned int opened: 1, /* open flag */ - append: 1, /* append flag (merge more streams) */ - active_sensing: 1; /* send active sensing when close */ + bool opened; /* open flag */ + bool append; /* append flag (merge more streams) */ + bool active_sensing; /* send active sensing when close */ int use_count; /* use counter (for output) */ size_t bytes; struct snd_rawmidi *rmidi; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 32d0c1fe2bfa..3ebada29a313 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -325,7 +325,7 @@ DECLARE_EVENT_CLASS( __entry->extent_type = btrfs_file_extent_type(l, fi); __entry->compression = btrfs_file_extent_compression(l, fi); __entry->extent_start = start; - __entry->extent_end = (start + btrfs_file_extent_inline_len(l, slot, fi)); + __entry->extent_end = (start + btrfs_file_extent_ram_bytes(l, fi)); ), TP_printk_btrfs( diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 4c282573437d..ed0d788495fc 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -49,6 +49,7 @@ TRACE_DEFINE_ENUM(CP_SYNC); TRACE_DEFINE_ENUM(CP_RECOVERY); TRACE_DEFINE_ENUM(CP_DISCARD); TRACE_DEFINE_ENUM(CP_TRIMMED); +TRACE_DEFINE_ENUM(CP_PAUSE); #define show_block_type(type) \ __print_symbolic(type, \ @@ -134,13 +135,14 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { CP_SYNC, "Sync" }, \ { CP_RECOVERY, "Recovery" }, \ { CP_DISCARD, "Discard" }, \ - { CP_UMOUNT, "Umount" }, \ + { CP_PAUSE, "Pause" }, \ { CP_TRIMMED, "Trimmed" }) #define show_fsync_cpreason(type) \ __print_symbolic(type, \ { CP_NO_NEEDED, "no needed" }, \ { CP_NON_REGULAR, "non regular" }, \ + { CP_COMPRESSED, "compressed" }, \ { CP_HARDLINK, "hardlink" }, \ { CP_SB_NEED_CP, "sb needs cp" }, \ { CP_WRONG_PINO, "wrong pino" }, \ @@ -158,6 +160,11 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { F2FS_GOING_DOWN_METAFLUSH, "meta flush" }, \ { F2FS_GOING_DOWN_NEED_FSCK, "need fsck" }) +#define show_compress_algorithm(type) \ + __print_symbolic(type, \ + { COMPRESS_LZO, "LZO" }, \ + { COMPRESS_LZ4, "LZ4" }) + struct f2fs_sb_info; struct f2fs_io_info; struct extent_info; @@ -1720,6 +1727,100 @@ TRACE_EVENT(f2fs_shutdown, __entry->ret) ); +DECLARE_EVENT_CLASS(f2fs_zip_start, + + TP_PROTO(struct inode *inode, pgoff_t cluster_idx, + unsigned int cluster_size, unsigned char algtype), + + TP_ARGS(inode, cluster_idx, cluster_size, algtype), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(pgoff_t, idx) + __field(unsigned int, size) + __field(unsigned int, algtype) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->idx = cluster_idx; + __entry->size = cluster_size; + __entry->algtype = algtype; + ), + + TP_printk("dev = (%d,%d), ino = %lu, cluster_idx:%lu, " + "cluster_size = %u, algorithm = %s", + show_dev_ino(__entry), + __entry->idx, + __entry->size, + show_compress_algorithm(__entry->algtype)) +); + +DECLARE_EVENT_CLASS(f2fs_zip_end, + + TP_PROTO(struct inode *inode, pgoff_t cluster_idx, + unsigned int compressed_size, int ret), + + TP_ARGS(inode, cluster_idx, compressed_size, ret), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(ino_t, ino) + __field(pgoff_t, idx) + __field(unsigned int, size) + __field(unsigned int, ret) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->idx = cluster_idx; + __entry->size = compressed_size; + __entry->ret = ret; + ), + + TP_printk("dev = (%d,%d), ino = %lu, cluster_idx:%lu, " + "compressed_size = %u, ret = %d", + show_dev_ino(__entry), + __entry->idx, + __entry->size, + __entry->ret) +); + +DEFINE_EVENT(f2fs_zip_start, f2fs_compress_pages_start, + + TP_PROTO(struct inode *inode, pgoff_t cluster_idx, + unsigned int cluster_size, unsigned char algtype), + + TP_ARGS(inode, cluster_idx, cluster_size, algtype) +); + +DEFINE_EVENT(f2fs_zip_start, f2fs_decompress_pages_start, + + TP_PROTO(struct inode *inode, pgoff_t cluster_idx, + unsigned int cluster_size, unsigned char algtype), + + TP_ARGS(inode, cluster_idx, cluster_size, algtype) +); + +DEFINE_EVENT(f2fs_zip_end, f2fs_compress_pages_end, + + TP_PROTO(struct inode *inode, pgoff_t cluster_idx, + unsigned int compressed_size, int ret), + + TP_ARGS(inode, cluster_idx, compressed_size, ret) +); + +DEFINE_EVENT(f2fs_zip_end, f2fs_decompress_pages_end, + + TP_PROTO(struct inode *inode, pgoff_t cluster_idx, + unsigned int compressed_size, int ret), + + TP_ARGS(inode, cluster_idx, compressed_size, ret) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 3df781d4fc21..33efaa451a34 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -308,9 +308,14 @@ TRACE_EVENT(sched_switch, (__entry->prev_state & (TASK_REPORT_MAX - 1)) ? __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|", - { 0x01, "S" }, { 0x02, "D" }, { 0x04, "T" }, - { 0x08, "t" }, { 0x10, "X" }, { 0x20, "Z" }, - { 0x40, "P" }, { 0x80, "I" }) : + { TASK_INTERRUPTIBLE, "S" }, + { TASK_UNINTERRUPTIBLE, "D" }, + { __TASK_STOPPED, "T" }, + { __TASK_TRACED, "t" }, + { EXIT_DEAD, "X" }, + { EXIT_ZOMBIE, "Z" }, + { TASK_PARKED, "P" }, + { TASK_DEAD, "I" }) : "R", __entry->prev_state & TASK_REPORT_MAX ? "+" : "", diff --git a/include/trace/events/wbt.h b/include/trace/events/wbt.h index b048694070e2..37342a13c9cb 100644 --- a/include/trace/events/wbt.h +++ b/include/trace/events/wbt.h @@ -33,7 +33,8 @@ TRACE_EVENT(wbt_stat, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strlcpy(__entry->name, dev_name(bdi->dev), + ARRAY_SIZE(__entry->name)); __entry->rmean = stat[0].mean; __entry->rmin = stat[0].min; __entry->rmax = stat[0].max; @@ -67,7 +68,8 @@ TRACE_EVENT(wbt_lat, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strlcpy(__entry->name, dev_name(bdi->dev), + ARRAY_SIZE(__entry->name)); __entry->lat = div_u64(lat, 1000); ), @@ -103,7 +105,8 @@ TRACE_EVENT(wbt_step, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strlcpy(__entry->name, dev_name(bdi->dev), + ARRAY_SIZE(__entry->name)); __entry->msg = msg; __entry->step = step; __entry->window = div_u64(window, 1000); @@ -138,7 +141,8 @@ TRACE_EVENT(wbt_timer, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strlcpy(__entry->name, dev_name(bdi->dev), + ARRAY_SIZE(__entry->name)); __entry->status = status; __entry->step = step; __entry->inflight = inflight; diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index 2ec9064a2bb7..e5150fc67e91 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -66,7 +66,11 @@ TRACE_EVENT(xen_mc_callback, TP_PROTO(xen_mc_callback_fn_t fn, void *data), TP_ARGS(fn, data), TP_STRUCT__entry( - __field(xen_mc_callback_fn_t, fn) + /* + * Use field_struct to avoid is_signed_type() + * comparison of a function pointer. + */ + __field_struct(xen_mc_callback_fn_t, fn) __field(void *, data) ), TP_fast_assign( diff --git a/include/trace/perf.h b/include/trace/perf.h index e4b249821684..dbc6c74defc3 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -35,7 +35,6 @@ perf_trace_##call(void *__data, proto) \ struct trace_event_call *event_call = __data; \ struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ struct trace_event_raw_##call *entry; \ - struct bpf_prog *prog = event_call->prog; \ struct pt_regs *__regs; \ u64 __count = 1; \ struct task_struct *__task = NULL; \ @@ -47,8 +46,9 @@ perf_trace_##call(void *__data, proto) \ __data_size = trace_event_get_offsets_##call(&__data_offsets, args); \ \ head = this_cpu_ptr(event_call->perf_events); \ - if (!prog && __builtin_constant_p(!__task) && !__task && \ - hlist_empty(head)) \ + if (!bpf_prog_array_valid(event_call) && \ + __builtin_constant_p(!__task) && !__task && \ + hlist_empty(head)) \ return; \ \ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ diff --git a/include/uapi/linux/android/binderfs.h b/include/uapi/linux/android/binderfs.h new file mode 100644 index 000000000000..65b2efd1a0a5 --- /dev/null +++ b/include/uapi/linux/android/binderfs.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2018 Canonical Ltd. + * + */ + +#ifndef _UAPI_LINUX_BINDER_CTL_H +#define _UAPI_LINUX_BINDER_CTL_H + +#include +#include +#include + +#define BINDERFS_MAX_NAME 255 + +/** + * struct binderfs_device - retrieve information about a new binder device + * @name: the name to use for the new binderfs binder device + * @major: major number allocated for binderfs binder devices + * @minor: minor number allocated for the new binderfs binder device + * + */ +struct binderfs_device { + char name[BINDERFS_MAX_NAME + 1]; + __u8 major; + __u8 minor; +}; + +/** + * Allocate a new binder device. + */ +#define BINDER_CTL_ADD _IOWR('b', 1, struct binderfs_device) + +#endif /* _UAPI_LINUX_BINDER_CTL_H */ + diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a88b2c458dcc..3dd49ee01094 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -144,11 +144,47 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE -/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command - * to the given target_fd cgroup the descendent cgroup will be able to - * override effective bpf program that was inherited from this cgroup +/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command + * + * NONE(default): No further bpf programs allowed in the subtree. + * + * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, + * the program in this cgroup yields to sub-cgroup program. + * + * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, + * that cgroup program gets run in addition to the program in this cgroup. + * + * Only one program is allowed to be attached to a cgroup with + * NONE or BPF_F_ALLOW_OVERRIDE flag. + * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will + * release old program and attach the new one. Attach flags has to match. + * + * Multiple programs are allowed to be attached to a cgroup with + * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order + * (those that were attached first, run first) + * The programs of sub-cgroup are executed first, then programs of + * this cgroup and then programs of parent cgroup. + * When children program makes decision (like picking TCP CA or sock bind) + * parent program has a chance to override it. + * + * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. + * A cgroup with NONE doesn't allow any programs in sub-cgroups. + * Ex1: + * cgrp1 (MULTI progs A, B) -> + * cgrp2 (OVERRIDE prog C) -> + * cgrp3 (MULTI prog D) -> + * cgrp4 (OVERRIDE prog E) -> + * cgrp5 (NONE prog F) + * the event in cgrp5 triggers execution of F,D,A,B in that order. + * if prog F is detached, the execution is E,D,A,B + * if prog F and D are detached, the execution is E,A,B + * if prog F, E and D are detached, the execution is C,A,B + * + * All eligible programs are executed regardless of return code from + * earlier programs. */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +#define BPF_F_ALLOW_MULTI (1U << 1) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel diff --git a/include/uapi/linux/cec-funcs.h b/include/uapi/linux/cec-funcs.h index 28e8a2a86e16..2a114f7a24d4 100644 --- a/include/uapi/linux/cec-funcs.h +++ b/include/uapi/linux/cec-funcs.h @@ -952,7 +952,8 @@ static inline void cec_msg_give_deck_status(struct cec_msg *msg, msg->len = 3; msg->msg[1] = CEC_MSG_GIVE_DECK_STATUS; msg->msg[2] = status_req; - msg->reply = reply ? CEC_MSG_DECK_STATUS : 0; + msg->reply = (reply && status_req != CEC_OP_STATUS_REQ_OFF) ? + CEC_MSG_DECK_STATUS : 0; } static inline void cec_ops_give_deck_status(const struct cec_msg *msg, @@ -1056,7 +1057,8 @@ static inline void cec_msg_give_tuner_device_status(struct cec_msg *msg, msg->len = 3; msg->msg[1] = CEC_MSG_GIVE_TUNER_DEVICE_STATUS; msg->msg[2] = status_req; - msg->reply = reply ? CEC_MSG_TUNER_DEVICE_STATUS : 0; + msg->reply = (reply && status_req != CEC_OP_STATUS_REQ_OFF) ? + CEC_MSG_TUNER_DEVICE_STATUS : 0; } static inline void cec_ops_give_tuner_device_status(const struct cec_msg *msg, diff --git a/include/uapi/linux/cec.h b/include/uapi/linux/cec.h index c3114c989e91..f50dd34e4f7b 100644 --- a/include/uapi/linux/cec.h +++ b/include/uapi/linux/cec.h @@ -789,8 +789,8 @@ struct cec_event { #define CEC_MSG_SELECT_DIGITAL_SERVICE 0x93 #define CEC_MSG_TUNER_DEVICE_STATUS 0x07 /* Recording Flag Operand (rec_flag) */ -#define CEC_OP_REC_FLAG_USED 0 -#define CEC_OP_REC_FLAG_NOT_USED 1 +#define CEC_OP_REC_FLAG_NOT_USED 0 +#define CEC_OP_REC_FLAG_USED 1 /* Tuner Display Info Operand (tuner_display_info) */ #define CEC_OP_TUNER_DISPLAY_INFO_DIGITAL 0 #define CEC_OP_TUNER_DISPLAY_INFO_NONE 1 diff --git a/include/uapi/linux/fscrypt.h b/include/uapi/linux/fscrypt.h index 39ccfe9311c3..0c9a8b81d357 100644 --- a/include/uapi/linux/fscrypt.h +++ b/include/uapi/linux/fscrypt.h @@ -8,6 +8,7 @@ #ifndef _UAPI_LINUX_FSCRYPT_H #define _UAPI_LINUX_FSCRYPT_H +#include #include /* Encryption policy flags */ @@ -17,7 +18,8 @@ #define FSCRYPT_POLICY_FLAGS_PAD_32 0x03 #define FSCRYPT_POLICY_FLAGS_PAD_MASK 0x03 #define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04 -#define FSCRYPT_POLICY_FLAGS_VALID 0x07 +#define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08 +#define FSCRYPT_POLICY_FLAGS_VALID 0x0F /* Encryption algorithms */ #define FSCRYPT_MODE_AES_256_XTS 1 @@ -108,11 +110,25 @@ struct fscrypt_key_specifier { } u; }; +/* + * Payload of Linux keyring key of type "fscrypt-provisioning", referenced by + * fscrypt_add_key_arg::key_id as an alternative to fscrypt_add_key_arg::raw. + */ +struct fscrypt_provisioning_key_payload { + __u32 type; + __u32 __reserved; + __u8 raw[]; +}; + /* Struct passed to FS_IOC_ADD_ENCRYPTION_KEY */ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; - __u32 __reserved[9]; + __u32 key_id; + __u32 __reserved[7]; + /* N.B.: "temporary" flag, not reserved upstream */ +#define __FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001 + __u32 __flags; __u8 raw[]; }; diff --git a/include/uapi/linux/incrementalfs.h b/include/uapi/linux/incrementalfs.h new file mode 100644 index 000000000000..fd65f575cdf0 --- /dev/null +++ b/include/uapi/linux/incrementalfs.h @@ -0,0 +1,275 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Userspace interface for Incremental FS. + * + * Incremental FS is special-purpose Linux virtual file system that allows + * execution of a program while its binary and resource files are still being + * lazily downloaded over the network, USB etc. + * + * Copyright 2019 Google LLC + */ +#ifndef _UAPI_LINUX_INCREMENTALFS_H +#define _UAPI_LINUX_INCREMENTALFS_H + +#include +#include +#include +#include + +/* ===== constants ===== */ +#define INCFS_NAME "incremental-fs" +#define INCFS_MAGIC_NUMBER (0x5346434e49ul) +#define INCFS_DATA_FILE_BLOCK_SIZE 4096 +#define INCFS_HEADER_VER 1 + +/* TODO: This value is assumed in incfs_copy_signature_info_from_user to be the + * actual signature length. Set back to 64 when fixed. + */ +#define INCFS_MAX_HASH_SIZE 32 +#define INCFS_MAX_FILE_ATTR_SIZE 512 + +#define INCFS_PENDING_READS_FILENAME ".pending_reads" +#define INCFS_LOG_FILENAME ".log" +#define INCFS_XATTR_ID_NAME (XATTR_USER_PREFIX "incfs.id") +#define INCFS_XATTR_SIZE_NAME (XATTR_USER_PREFIX "incfs.size") +#define INCFS_XATTR_METADATA_NAME (XATTR_USER_PREFIX "incfs.metadata") + +#define INCFS_MAX_SIGNATURE_SIZE 8096 +#define INCFS_SIGNATURE_VERSION 2 +#define INCFS_SIGNATURE_SECTIONS 2 + +#define INCFS_IOCTL_BASE_CODE 'g' + +/* ===== ioctl requests on the command dir ===== */ + +/* Create a new file */ +#define INCFS_IOC_CREATE_FILE \ + _IOWR(INCFS_IOCTL_BASE_CODE, 30, struct incfs_new_file_args) + +/* Read file signature */ +#define INCFS_IOC_READ_FILE_SIGNATURE \ + _IOR(INCFS_IOCTL_BASE_CODE, 31, struct incfs_get_file_sig_args) + +/* + * Fill in one or more data block. This may only be called on a handle + * passed as a parameter to INCFS_IOC_PERMIT_FILLING + * + * Returns number of blocks filled in, or error if none were + */ +#define INCFS_IOC_FILL_BLOCKS \ + _IOR(INCFS_IOCTL_BASE_CODE, 32, struct incfs_fill_blocks) + +/* + * Permit INCFS_IOC_FILL_BLOCKS on the given file descriptor + * May only be called on .pending_reads file + * + * Returns 0 on success or error + */ +#define INCFS_IOC_PERMIT_FILL \ + _IOW(INCFS_IOCTL_BASE_CODE, 33, struct incfs_permit_fill) + +enum incfs_compression_alg { + COMPRESSION_NONE = 0, + COMPRESSION_LZ4 = 1 +}; + +enum incfs_block_flags { + INCFS_BLOCK_FLAGS_NONE = 0, + INCFS_BLOCK_FLAGS_HASH = 1, +}; + +typedef struct { + __u8 bytes[16]; +} incfs_uuid_t __attribute__((aligned (8))); + +/* + * Description of a pending read. A pending read - a read call by + * a userspace program for which the filesystem currently doesn't have data. + */ +struct incfs_pending_read_info { + /* Id of a file that is being read from. */ + incfs_uuid_t file_id; + + /* A number of microseconds since system boot to the read. */ + __aligned_u64 timestamp_us; + + /* Index of a file block that is being read. */ + __u32 block_index; + + /* A serial number of this pending read. */ + __u32 serial_number; +}; + +/* + * Description of a data or hash block to add to a data file. + */ +struct incfs_fill_block { + /* Index of a data block. */ + __u32 block_index; + + /* Length of data */ + __u32 data_len; + + /* + * A pointer to an actual data for the block. + * + * Equivalent to: __u8 *data; + */ + __aligned_u64 data; + + /* + * Compression algorithm used to compress the data block. + * Values from enum incfs_compression_alg. + */ + __u8 compression; + + /* Values from enum incfs_block_flags */ + __u8 flags; + + /* Reserved - must be 0 */ + __u16 reserved1; + + /* Reserved - must be 0 */ + __u32 reserved2; + + /* Reserved - must be 0 */ + __aligned_u64 reserved3; +}; + +/* + * Description of a number of blocks to add to a data file + * + * Argument for INCFS_IOC_FILL_BLOCKS + */ +struct incfs_fill_blocks { + /* Number of blocks */ + __u64 count; + + /* A pointer to an array of incfs_fill_block structs */ + __aligned_u64 fill_blocks; +}; + +/* + * Permit INCFS_IOC_FILL_BLOCKS on the given file descriptor + * May only be called on .pending_reads file + * + * Argument for INCFS_IOC_PERMIT_FILL + */ +struct incfs_permit_fill { + /* File to permit fills on */ + __u32 file_descriptor; +}; + +enum incfs_hash_tree_algorithm { + INCFS_HASH_TREE_NONE = 0, + INCFS_HASH_TREE_SHA256 = 1 +}; + +/* + * Create a new file or directory. + */ +struct incfs_new_file_args { + /* Id of a file to create. */ + incfs_uuid_t file_id; + + /* + * Total size of the new file. Ignored if S_ISDIR(mode). + */ + __aligned_u64 size; + + /* + * File mode. Permissions and dir flag. + */ + __u16 mode; + + /* Reserved - must be 0 */ + __u16 reserved1; + + /* Reserved - must be 0 */ + __u32 reserved2; + + /* + * A pointer to a null-terminated relative path to the file's parent + * dir. + * Max length: PATH_MAX + * + * Equivalent to: char *directory_path; + */ + __aligned_u64 directory_path; + + /* + * A pointer to a null-terminated file's name. + * Max length: PATH_MAX + * + * Equivalent to: char *file_name; + */ + __aligned_u64 file_name; + + /* + * A pointer to a file attribute to be set on creation. + * + * Equivalent to: u8 *file_attr; + */ + __aligned_u64 file_attr; + + /* + * Length of the data buffer specfied by file_attr. + * Max value: INCFS_MAX_FILE_ATTR_SIZE + */ + __u32 file_attr_len; + + /* Reserved - must be 0 */ + __u32 reserved4; + + /* + * Points to an APK V4 Signature data blob + * Signature must have two sections + * Format is: + * u32 version + * u32 size_of_hash_info_section + * u8 hash_info_section[] + * u32 size_of_signing_info_section + * u8 signing_info_section[] + * + * Note that incfs does not care about what is in signing_info_section + * + * hash_info_section has following format: + * u32 hash_algorithm; // Must be SHA256 == 1 + * u8 log2_blocksize; // Must be 12 for 4096 byte blocks + * u32 salt_size; + * u8 salt[]; + * u32 hash_size; + * u8 root_hash[]; + */ + __aligned_u64 signature_info; + + /* Size of signature_info */ + __aligned_u64 signature_size; + + /* Reserved - must be 0 */ + __aligned_u64 reserved6; +}; + +/* + * Request a digital signature blob for a given file. + * Argument for INCFS_IOC_READ_FILE_SIGNATURE ioctl + */ +struct incfs_get_file_sig_args { + /* + * A pointer to the data buffer to save an signature blob to. + * + * Equivalent to: u8 *file_signature; + */ + __aligned_u64 file_signature; + + /* Size of the buffer at file_signature. */ + __u32 file_signature_buf_size; + + /* + * Number of bytes save file_signature buffer. + * It is set after ioctl done. + */ + __u32 file_signature_len_out; +}; + +#endif /* _UAPI_LINUX_INCREMENTALFS_H */ diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h index 33eabbb8ada1..1d0350e44ae3 100644 --- a/include/uapi/linux/kcov.h +++ b/include/uapi/linux/kcov.h @@ -4,8 +4,60 @@ #include +/* + * Argument for KCOV_REMOTE_ENABLE ioctl, see Documentation/dev-tools/kcov.rst + * and the comment before kcov_remote_start() for usage details. + */ +struct kcov_remote_arg { + __u32 trace_mode; /* KCOV_TRACE_PC or KCOV_TRACE_CMP */ + __u32 area_size; /* Length of coverage buffer in words */ + __u32 num_handles; /* Size of handles array */ + __aligned_u64 common_handle; + __aligned_u64 handles[0]; +}; + +#define KCOV_REMOTE_MAX_HANDLES 0x100 + #define KCOV_INIT_TRACE _IOR('c', 1, unsigned long) #define KCOV_ENABLE _IO('c', 100) #define KCOV_DISABLE _IO('c', 101) +#define KCOV_REMOTE_ENABLE _IOW('c', 102, struct kcov_remote_arg) + +enum { + /* + * Tracing coverage collection mode. + * Covered PCs are collected in a per-task buffer. + * In new KCOV version the mode is chosen by calling + * ioctl(fd, KCOV_ENABLE, mode). In older versions the mode argument + * was supposed to be 0 in such a call. So, for reasons of backward + * compatibility, we have chosen the value KCOV_TRACE_PC to be 0. + */ + KCOV_TRACE_PC = 0, + /* Collecting comparison operands mode. */ + KCOV_TRACE_CMP = 1, +}; + +/* + * The format for the types of collected comparisons. + * + * Bit 0 shows whether one of the arguments is a compile-time constant. + * Bits 1 & 2 contain log2 of the argument size, up to 8 bytes. + */ +#define KCOV_CMP_CONST (1 << 0) +#define KCOV_CMP_SIZE(n) ((n) << 1) +#define KCOV_CMP_MASK KCOV_CMP_SIZE(3) + +#define KCOV_SUBSYSTEM_COMMON (0x00ull << 56) +#define KCOV_SUBSYSTEM_USB (0x01ull << 56) + +#define KCOV_SUBSYSTEM_MASK (0xffull << 56) +#define KCOV_INSTANCE_MASK (0xffffffffull) + +static inline __u64 kcov_remote_handle(__u64 subsys, __u64 inst) +{ + if (subsys & ~KCOV_SUBSYSTEM_MASK || inst & ~KCOV_INSTANCE_MASK) + return 0; + return subsys | inst; +} #endif /* _LINUX_KCOV_IOCTLS_H */ diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index d94c457845b1..de394b15079d 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -73,6 +73,7 @@ #define DAXFS_MAGIC 0x64646178 #define BINFMTFS_MAGIC 0x42494e4d #define DEVPTS_SUPER_MAGIC 0x1cd1 +#define BINDERFS_SUPER_MAGIC 0x6c6f6f70 #define FUTEXFS_SUPER_MAGIC 0xBAD1DEA #define PIPEFS_MAGIC 0x50495045 #define PROC_SUPER_MAGIC 0x9fa0 diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 145f242c7c90..0303ad623ab4 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -256,10 +256,6 @@ enum nd_driver_flags { ND_DRIVER_DAX_PMEM = 1 << ND_DEVICE_DAX_PMEM, }; -enum { - ND_MIN_NAMESPACE_SIZE = 0x00400000, -}; - enum ars_masks { ARS_STATUS_MASK = 0x0000FFFF, ARS_EXT_STATUS_SHIFT = 16, diff --git a/include/uapi/linux/netfilter/xt_sctp.h b/include/uapi/linux/netfilter/xt_sctp.h index 4bc6d1a08781..b4d804a9fccb 100644 --- a/include/uapi/linux/netfilter/xt_sctp.h +++ b/include/uapi/linux/netfilter/xt_sctp.h @@ -41,19 +41,19 @@ struct xt_sctp_info { #define SCTP_CHUNKMAP_SET(chunkmap, type) \ do { \ (chunkmap)[type / bytes(__u32)] |= \ - 1 << (type % bytes(__u32)); \ + 1u << (type % bytes(__u32)); \ } while (0) #define SCTP_CHUNKMAP_CLEAR(chunkmap, type) \ do { \ (chunkmap)[type / bytes(__u32)] &= \ - ~(1 << (type % bytes(__u32))); \ + ~(1u << (type % bytes(__u32))); \ } while (0) #define SCTP_CHUNKMAP_IS_SET(chunkmap, type) \ ({ \ ((chunkmap)[type / bytes (__u32)] & \ - (1 << (type % bytes (__u32)))) ? 1: 0; \ + (1u << (type % bytes (__u32)))) ? 1: 0; \ }) #define SCTP_CHUNKMAP_RESET(chunkmap) \ diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 7b35e98d3c58..ad80a5c885d5 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -167,8 +167,8 @@ struct statx { #define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */ #define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ #define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ - #define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ +#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/include/uapi/linux/usb/charger.h b/include/uapi/linux/usb/charger.h index 5f72af35b3ed..ad22079125bf 100644 --- a/include/uapi/linux/usb/charger.h +++ b/include/uapi/linux/usb/charger.h @@ -14,18 +14,18 @@ * ACA (Accessory Charger Adapters) */ enum usb_charger_type { - UNKNOWN_TYPE, - SDP_TYPE, - DCP_TYPE, - CDP_TYPE, - ACA_TYPE, + UNKNOWN_TYPE = 0, + SDP_TYPE = 1, + DCP_TYPE = 2, + CDP_TYPE = 3, + ACA_TYPE = 4, }; /* USB charger state */ enum usb_charger_state { - USB_CHARGER_DEFAULT, - USB_CHARGER_PRESENT, - USB_CHARGER_ABSENT, + USB_CHARGER_DEFAULT = 0, + USB_CHARGER_PRESENT = 1, + USB_CHARGER_ABSENT = 2, }; #endif /* _UAPI__LINUX_USB_CHARGER_H */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 91bf4b6ec822..8fe7585f85b5 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -44,5 +44,6 @@ #define VIRTIO_ID_INPUT 18 /* virtio input */ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ +#define VIRTIO_ID_PMEM 27 /* virtio pmem */ #endif /* _LINUX_VIRTIO_IDS_H */ diff --git a/include/uapi/linux/virtio_pmem.h b/include/uapi/linux/virtio_pmem.h new file mode 100644 index 000000000000..efcd72f2d20d --- /dev/null +++ b/include/uapi/linux/virtio_pmem.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* + * Definitions for virtio-pmem devices. + * + * Copyright (C) 2019 Red Hat, Inc. + * + * Author(s): Pankaj Gupta + */ + +#ifndef _UAPI_LINUX_VIRTIO_PMEM_H +#define _UAPI_LINUX_VIRTIO_PMEM_H + +#include +#include +#include + +struct virtio_pmem_config { + __u64 start; + __u64 size; +}; + +#define VIRTIO_PMEM_REQ_TYPE_FLUSH 0 + +struct virtio_pmem_resp { + /* Host return status corresponding to flush request */ + __u32 ret; +}; + +struct virtio_pmem_req { + /* command type */ + __u32 type; +}; + +#endif diff --git a/init/Kconfig b/init/Kconfig index 4b5d3f03b25b..c8ca71df333b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -18,6 +18,9 @@ config DEFCONFIG_LIST config TOOLS_SUPPORT_RELR bool "Declare tool support for RELR" + # Prevent this from being enabled by default in allyesconfig or + # allmodconfig builds. + depends on !COMPILE_TEST config CONSTRUCTORS bool diff --git a/init/init_task.c b/init/init_task.c index 9325fee7dc82..7da82dd8034f 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -20,6 +21,13 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); struct task_struct init_task = INIT_TASK(init_task); EXPORT_SYMBOL(init_task); +#ifdef CONFIG_SHADOW_CALL_STACK +unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data + __aligned(SCS_SIZE) = { + [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC +}; +#endif + /* * Initial thread structure. Alignment of this is handled by a special * linker map entry. diff --git a/init/main.c b/init/main.c index 219ac9e92bc3..62192c5e508b 100644 --- a/init/main.c +++ b/init/main.c @@ -102,7 +102,6 @@ static int kernel_init(void *); extern void init_IRQ(void); -extern void fork_init(void); extern void radix_tree_init(void); /* diff --git a/ipc/sem.c b/ipc/sem.c index d6dd2dc9ddad..6adc245f3e02 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2248,11 +2248,9 @@ void exit_sem(struct task_struct *tsk) ipc_assert_locked_object(&sma->sem_perm); list_del(&un->list_id); - /* we are the last process using this ulp, acquiring ulp->lock - * isn't required. Besides that, we are also protected against - * IPC_RMID as we hold sma->sem_perm lock now - */ + spin_lock(&ulp->lock); list_del_rcu(&un->list_proc); + spin_unlock(&ulp->lock); /* perform adjustments registered in un */ for (i = 0; i < sma->sem_nsems; i++) { diff --git a/kernel/Makefile b/kernel/Makefile index 7ccfbb98bd2b..273c913d1a9d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -61,9 +61,6 @@ obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += time/ obj-$(CONFIG_FUTEX) += futex.o -ifeq ($(CONFIG_COMPAT),y) -obj-$(CONFIG_FUTEX) += futex_compat.o -endif obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += smp.o ifneq ($(CONFIG_SMP),y) @@ -116,6 +113,7 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ obj-$(CONFIG_CFI_CLANG) += cfi.o +obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/audit.c b/kernel/audit.c index cc187d0e9f9d..e656f46356c8 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1081,13 +1081,11 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature audit_log_end(ab); } -static int audit_set_feature(struct sk_buff *skb) +static int audit_set_feature(struct audit_features *uaf) { - struct audit_features *uaf; int i; BUILD_BUG_ON(AUDIT_LAST_FEATURE + 1 > ARRAY_SIZE(audit_feature_names)); - uaf = nlmsg_data(nlmsg_hdr(skb)); /* if there is ever a version 2 we should handle that here */ @@ -1155,6 +1153,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { u32 seq; void *data; + int data_len; int err; struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; @@ -1168,6 +1167,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) seq = nlh->nlmsg_seq; data = nlmsg_data(nlh); + data_len = nlmsg_len(nlh); switch (msg_type) { case AUDIT_GET: { @@ -1191,7 +1191,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) struct audit_status s; memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); + memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); if (s.mask & AUDIT_STATUS_ENABLED) { err = audit_set_enabled(s.enabled); if (err < 0) @@ -1295,7 +1295,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return err; break; case AUDIT_SET_FEATURE: - err = audit_set_feature(skb); + if (data_len < sizeof(struct audit_features)) + return -EINVAL; + err = audit_set_feature(data); if (err) return err; break; @@ -1307,6 +1309,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) err = audit_filter(msg_type, AUDIT_FILTER_USER); if (err == 1) { /* match or error */ + char *str = data; + err = 0; if (msg_type == AUDIT_USER_TTY) { err = tty_audit_push(); @@ -1314,26 +1318,24 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; } audit_log_common_recv_msg(&ab, msg_type); - if (msg_type != AUDIT_USER_TTY) + if (msg_type != AUDIT_USER_TTY) { + /* ensure NULL termination */ + str[data_len - 1] = '\0'; audit_log_format(ab, " msg='%.*s'", AUDIT_MESSAGE_TEXT_MAX, - (char *)data); - else { - int size; - + str); + } else { audit_log_format(ab, " data="); - size = nlmsg_len(nlh); - if (size > 0 && - ((unsigned char *)data)[size - 1] == '\0') - size--; - audit_log_n_untrustedstring(ab, data, size); + if (data_len > 0 && str[data_len - 1] == '\0') + data_len--; + audit_log_n_untrustedstring(ab, str, data_len); } audit_log_end(ab); } break; case AUDIT_ADD_RULE: case AUDIT_DEL_RULE: - if (nlmsg_len(nlh) < sizeof(struct audit_rule_data)) + if (data_len < sizeof(struct audit_rule_data)) return -EINVAL; if (audit_enabled == AUDIT_LOCKED) { audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE); @@ -1341,7 +1343,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) audit_log_end(ab); return -EPERM; } - err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh)); + err = audit_rule_change(msg_type, seq, data, data_len); break; case AUDIT_LIST_RULES: err = audit_list_rules_send(skb, seq); @@ -1355,7 +1357,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) case AUDIT_MAKE_EQUIV: { void *bufp = data; u32 sizes[2]; - size_t msglen = nlmsg_len(nlh); + size_t msglen = data_len; char *old, *new; err = -EINVAL; @@ -1431,7 +1433,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memset(&s, 0, sizeof(s)); /* guard against past and future API changes */ - memcpy(&s, data, min_t(size_t, sizeof(s), nlmsg_len(nlh))); + memcpy(&s, data, min_t(size_t, sizeof(s), data_len)); /* check if new data is valid */ if ((s.enabled != 0 && s.enabled != 1) || (s.log_passwd != 0 && s.log_passwd != 1)) diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 4a98f6e314a9..35f1d706bd5b 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -365,12 +365,12 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent) struct dentry *d = kern_path_locked(watch->path, parent); if (IS_ERR(d)) return PTR_ERR(d); - inode_unlock(d_backing_inode(parent->dentry)); if (d_is_positive(d)) { /* update watch filter fields */ watch->dev = d->d_sb->s_dev; watch->ino = d_backing_inode(d)->i_ino; } + inode_unlock(d_backing_inode(parent->dentry)); dput(d); return 0; } diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 215c6e1ee026..16cf396ea738 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -435,6 +435,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, bufp = data->buf; for (i = 0; i < data->field_count; i++) { struct audit_field *f = &entry->rule.fields[i]; + u32 f_val; err = -EINVAL; @@ -443,12 +444,12 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, goto exit_free; f->type = data->fields[i]; - f->val = data->values[i]; + f_val = data->values[i]; /* Support legacy tests for a valid loginuid */ - if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) { + if ((f->type == AUDIT_LOGINUID) && (f_val == AUDIT_UID_UNSET)) { f->type = AUDIT_LOGINUID_SET; - f->val = 0; + f_val = 0; entry->rule.pflags |= AUDIT_LOGINUID_LEGACY; } @@ -464,7 +465,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_SUID: case AUDIT_FSUID: case AUDIT_OBJ_UID: - f->uid = make_kuid(current_user_ns(), f->val); + f->uid = make_kuid(current_user_ns(), f_val); if (!uid_valid(f->uid)) goto exit_free; break; @@ -473,12 +474,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_SGID: case AUDIT_FSGID: case AUDIT_OBJ_GID: - f->gid = make_kgid(current_user_ns(), f->val); + f->gid = make_kgid(current_user_ns(), f_val); if (!gid_valid(f->gid)) goto exit_free; break; case AUDIT_SESSIONID: case AUDIT_ARCH: + f->val = f_val; entry->rule.arch_f = f; break; case AUDIT_SUBJ_USER: @@ -491,11 +493,13 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, case AUDIT_OBJ_TYPE: case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - + } + entry->rule.buflen += f_val; + f->lsm_str = str; err = security_audit_rule_init(f->type, f->op, str, (void **)&f->lsm_rule); /* Keep currently invalid fields around in case they @@ -504,68 +508,71 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, pr_warn("audit rule for LSM \'%s\' is invalid\n", str); err = 0; - } - if (err) { - kfree(str); + } else if (err) goto exit_free; - } else - f->lsm_str = str; break; case AUDIT_WATCH: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - - err = audit_to_watch(&entry->rule, str, f->val, f->op); + } + err = audit_to_watch(&entry->rule, str, f_val, f->op); if (err) { kfree(str); goto exit_free; } + entry->rule.buflen += f_val; break; case AUDIT_DIR: - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); goto exit_free; - entry->rule.buflen += f->val; - + } err = audit_make_tree(&entry->rule, str, f->op); kfree(str); if (err) goto exit_free; + entry->rule.buflen += f_val; break; case AUDIT_INODE: + f->val = f_val; err = audit_to_inode(&entry->rule, f); if (err) goto exit_free; break; case AUDIT_FILTERKEY: - if (entry->rule.filterkey || f->val > AUDIT_MAX_KEY_LEN) + if (entry->rule.filterkey || f_val > AUDIT_MAX_KEY_LEN) goto exit_free; - str = audit_unpack_string(&bufp, &remain, f->val); - if (IS_ERR(str)) - goto exit_free; - entry->rule.buflen += f->val; - entry->rule.filterkey = str; - break; - case AUDIT_EXE: - if (entry->rule.exe || f->val > PATH_MAX) - goto exit_free; - str = audit_unpack_string(&bufp, &remain, f->val); + str = audit_unpack_string(&bufp, &remain, f_val); if (IS_ERR(str)) { err = PTR_ERR(str); goto exit_free; } - entry->rule.buflen += f->val; - - audit_mark = audit_alloc_mark(&entry->rule, str, f->val); + entry->rule.buflen += f_val; + entry->rule.filterkey = str; + break; + case AUDIT_EXE: + if (entry->rule.exe || f_val > PATH_MAX) + goto exit_free; + str = audit_unpack_string(&bufp, &remain, f_val); + if (IS_ERR(str)) { + err = PTR_ERR(str); + goto exit_free; + } + audit_mark = audit_alloc_mark(&entry->rule, str, f_val); if (IS_ERR(audit_mark)) { kfree(str); err = PTR_ERR(audit_mark); goto exit_free; } + entry->rule.buflen += f_val; entry->rule.exe = audit_mark; break; + default: + f->val = f_val; + break; } } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 76d789d6cea0..ffa8d64f6fef 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1102,7 +1102,7 @@ static void audit_log_execve_info(struct audit_context *context, } /* write as much as we can to the audit log */ - if (len_buf > 0) { + if (len_buf >= 0) { /* NOTE: some magic numbers here - basically if we * can't fit a reasonable amount of data into the * existing audit buffer, flush it and start with diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 546113430049..6b7500bbdb53 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -27,129 +27,361 @@ void cgroup_bpf_put(struct cgroup *cgrp) { unsigned int type; - for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) { - struct bpf_prog *prog = cgrp->bpf.prog[type]; + for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { + struct list_head *progs = &cgrp->bpf.progs[type]; + struct bpf_prog_list *pl, *tmp; - if (prog) { - bpf_prog_put(prog); + list_for_each_entry_safe(pl, tmp, progs, node) { + list_del(&pl->node); + bpf_prog_put(pl->prog); + kfree(pl); static_branch_dec(&cgroup_bpf_enabled_key); } + bpf_prog_array_free(cgrp->bpf.effective[type]); } } +/* count number of elements in the list. + * it's slow but the list cannot be long + */ +static u32 prog_list_length(struct list_head *head) +{ + struct bpf_prog_list *pl; + u32 cnt = 0; + + list_for_each_entry(pl, head, node) { + if (!pl->prog) + continue; + cnt++; + } + return cnt; +} + +/* if parent has non-overridable prog attached, + * disallow attaching new programs to the descendent cgroup. + * if parent has overridable or multi-prog, allow attaching + */ +static bool hierarchy_allows_attach(struct cgroup *cgrp, + enum bpf_attach_type type, + u32 new_flags) +{ + struct cgroup *p; + + p = cgroup_parent(cgrp); + if (!p) + return true; + do { + u32 flags = p->bpf.flags[type]; + u32 cnt; + + if (flags & BPF_F_ALLOW_MULTI) + return true; + cnt = prog_list_length(&p->bpf.progs[type]); + WARN_ON_ONCE(cnt > 1); + if (cnt == 1) + return !!(flags & BPF_F_ALLOW_OVERRIDE); + p = cgroup_parent(p); + } while (p); + return true; +} + +/* compute a chain of effective programs for a given cgroup: + * start from the list of programs in this cgroup and add + * all parent programs. + * Note that parent's F_ALLOW_OVERRIDE-type program is yielding + * to programs in this cgroup + */ +static int compute_effective_progs(struct cgroup *cgrp, + enum bpf_attach_type type, + struct bpf_prog_array __rcu **array) +{ + struct bpf_prog_array __rcu *progs; + struct bpf_prog_list *pl; + struct cgroup *p = cgrp; + int cnt = 0; + + /* count number of effective programs by walking parents */ + do { + if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) + cnt += prog_list_length(&p->bpf.progs[type]); + p = cgroup_parent(p); + } while (p); + + progs = bpf_prog_array_alloc(cnt, GFP_KERNEL); + if (!progs) + return -ENOMEM; + + /* populate the array with effective progs */ + cnt = 0; + p = cgrp; + do { + if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) + list_for_each_entry(pl, + &p->bpf.progs[type], node) { + if (!pl->prog) + continue; + rcu_dereference_protected(progs, 1)-> + progs[cnt++] = pl->prog; + } + p = cgroup_parent(p); + } while (p); + + *array = progs; + return 0; +} + +static void activate_effective_progs(struct cgroup *cgrp, + enum bpf_attach_type type, + struct bpf_prog_array __rcu *array) +{ + struct bpf_prog_array __rcu *old_array; + + old_array = xchg(&cgrp->bpf.effective[type], array); + /* free prog array after grace period, since __cgroup_bpf_run_*() + * might be still walking the array + */ + bpf_prog_array_free(old_array); +} + /** * cgroup_bpf_inherit() - inherit effective programs from parent * @cgrp: the cgroup to modify - * @parent: the parent to inherit from */ -void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) +int cgroup_bpf_inherit(struct cgroup *cgrp) { - unsigned int type; +/* has to use marco instead of const int, since compiler thinks + * that array below is variable length + */ +#define NR ARRAY_SIZE(cgrp->bpf.effective) + struct bpf_prog_array __rcu *arrays[NR] = {}; + int i; - for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) { - struct bpf_prog *e; + for (i = 0; i < NR; i++) + INIT_LIST_HEAD(&cgrp->bpf.progs[i]); - e = rcu_dereference_protected(parent->bpf.effective[type], - lockdep_is_held(&cgroup_mutex)); - rcu_assign_pointer(cgrp->bpf.effective[type], e); - cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type]; - } + for (i = 0; i < NR; i++) + if (compute_effective_progs(cgrp, i, &arrays[i])) + goto cleanup; + + for (i = 0; i < NR; i++) + activate_effective_progs(cgrp, i, arrays[i]); + + return 0; +cleanup: + for (i = 0; i < NR; i++) + bpf_prog_array_free(arrays[i]); + return -ENOMEM; } +#define BPF_CGROUP_MAX_PROGS 64 + /** - * __cgroup_bpf_update() - Update the pinned program of a cgroup, and + * __cgroup_bpf_attach() - Attach the program to a cgroup, and * propagate the change to descendants * @cgrp: The cgroup which descendants to traverse - * @parent: The parent of @cgrp, or %NULL if @cgrp is the root - * @prog: A new program to pin - * @type: Type of pinning operation (ingress/egress) - * - * Each cgroup has a set of two pointers for bpf programs; one for eBPF - * programs it owns, and which is effective for execution. - * - * If @prog is not %NULL, this function attaches a new program to the cgroup - * and releases the one that is currently attached, if any. @prog is then made - * the effective program of type @type in that cgroup. - * - * If @prog is %NULL, the currently attached program of type @type is released, - * and the effective program of the parent cgroup (if any) is inherited to - * @cgrp. - * - * Then, the descendants of @cgrp are walked and the effective program for - * each of them is set to the effective program of @cgrp unless the - * descendant has its own program attached, in which case the subbranch is - * skipped. This ensures that delegated subcgroups with own programs are left - * untouched. + * @prog: A program to attach + * @type: Type of attach operation * * Must be called with cgroup_mutex held. */ -int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, - struct bpf_prog *prog, enum bpf_attach_type type, - bool new_overridable) +int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags) { - struct bpf_prog *old_prog, *effective = NULL; - struct cgroup_subsys_state *pos; - bool overridable = true; + struct list_head *progs = &cgrp->bpf.progs[type]; + struct bpf_prog *old_prog = NULL; + struct cgroup_subsys_state *css; + struct bpf_prog_list *pl; + bool pl_was_allocated; + u32 old_flags; + int err; - if (parent) { - overridable = !parent->bpf.disallow_override[type]; - effective = rcu_dereference_protected(parent->bpf.effective[type], - lockdep_is_held(&cgroup_mutex)); - } + if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) + /* invalid combination */ + return -EINVAL; - if (prog && effective && !overridable) - /* if parent has non-overridable prog attached, disallow - * attaching new programs to descendent cgroup + if (!hierarchy_allows_attach(cgrp, type, flags)) + return -EPERM; + + if (!list_empty(progs) && cgrp->bpf.flags[type] != flags) + /* Disallow attaching non-overridable on top + * of existing overridable in this cgroup. + * Disallow attaching multi-prog if overridable or none */ return -EPERM; - if (prog && effective && overridable != new_overridable) - /* if parent has overridable prog attached, only - * allow overridable programs in descendent cgroup - */ - return -EPERM; + if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) + return -E2BIG; - old_prog = cgrp->bpf.prog[type]; + if (flags & BPF_F_ALLOW_MULTI) { + list_for_each_entry(pl, progs, node) + if (pl->prog == prog) + /* disallow attaching the same prog twice */ + return -EINVAL; - if (prog) { - overridable = new_overridable; - effective = prog; - if (old_prog && - cgrp->bpf.disallow_override[type] == new_overridable) - /* disallow attaching non-overridable on top - * of existing overridable in this cgroup - * and vice versa - */ - return -EPERM; - } - - if (!prog && !old_prog) - /* report error when trying to detach and nothing is attached */ - return -ENOENT; - - cgrp->bpf.prog[type] = prog; - - css_for_each_descendant_pre(pos, &cgrp->self) { - struct cgroup *desc = container_of(pos, struct cgroup, self); - - /* skip the subtree if the descendant has its own program */ - if (desc->bpf.prog[type] && desc != cgrp) { - pos = css_rightmost_descendant(pos); + pl = kmalloc(sizeof(*pl), GFP_KERNEL); + if (!pl) + return -ENOMEM; + pl_was_allocated = true; + pl->prog = prog; + list_add_tail(&pl->node, progs); + } else { + if (list_empty(progs)) { + pl = kmalloc(sizeof(*pl), GFP_KERNEL); + if (!pl) + return -ENOMEM; + pl_was_allocated = true; + list_add_tail(&pl->node, progs); } else { - rcu_assign_pointer(desc->bpf.effective[type], - effective); - desc->bpf.disallow_override[type] = !overridable; + pl = list_first_entry(progs, typeof(*pl), node); + old_prog = pl->prog; + pl_was_allocated = false; } + pl->prog = prog; } - if (prog) - static_branch_inc(&cgroup_bpf_enabled_key); + old_flags = cgrp->bpf.flags[type]; + cgrp->bpf.flags[type] = flags; + /* allocate and recompute effective prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + err = compute_effective_progs(desc, type, &desc->bpf.inactive); + if (err) + goto cleanup; + } + + /* all allocations were successful. Activate all prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + activate_effective_progs(desc, type, desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + static_branch_inc(&cgroup_bpf_enabled_key); if (old_prog) { bpf_prog_put(old_prog); static_branch_dec(&cgroup_bpf_enabled_key); } return 0; + +cleanup: + /* oom while computing effective. Free all computed effective arrays + * since they were not activated + */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + bpf_prog_array_free(desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + /* and cleanup the prog list */ + pl->prog = old_prog; + if (pl_was_allocated) { + list_del(&pl->node); + kfree(pl); + } + return err; +} + +/** + * __cgroup_bpf_detach() - Detach the program from a cgroup, and + * propagate the change to descendants + * @cgrp: The cgroup which descendants to traverse + * @prog: A program to detach or NULL + * @type: Type of detach operation + * + * Must be called with cgroup_mutex held. + */ +int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 unused_flags) +{ + struct list_head *progs = &cgrp->bpf.progs[type]; + u32 flags = cgrp->bpf.flags[type]; + struct bpf_prog *old_prog = NULL; + struct cgroup_subsys_state *css; + struct bpf_prog_list *pl; + int err; + + if (flags & BPF_F_ALLOW_MULTI) { + if (!prog) + /* to detach MULTI prog the user has to specify valid FD + * of the program to be detached + */ + return -EINVAL; + } else { + if (list_empty(progs)) + /* report error when trying to detach and nothing is attached */ + return -ENOENT; + } + + if (flags & BPF_F_ALLOW_MULTI) { + /* find the prog and detach it */ + list_for_each_entry(pl, progs, node) { + if (pl->prog != prog) + continue; + old_prog = prog; + /* mark it deleted, so it's ignored while + * recomputing effective + */ + pl->prog = NULL; + break; + } + if (!old_prog) + return -ENOENT; + } else { + /* to maintain backward compatibility NONE and OVERRIDE cgroups + * allow detaching with invalid FD (prog==NULL) + */ + pl = list_first_entry(progs, typeof(*pl), node); + old_prog = pl->prog; + pl->prog = NULL; + } + + /* allocate and recompute effective prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + err = compute_effective_progs(desc, type, &desc->bpf.inactive); + if (err) + goto cleanup; + } + + /* all allocations were successful. Activate all prog arrays */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + activate_effective_progs(desc, type, desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + /* now can actually delete it from this cgroup list */ + list_del(&pl->node); + kfree(pl); + if (list_empty(progs)) + /* last program was detached, reset flags to zero */ + cgrp->bpf.flags[type] = 0; + + bpf_prog_put(old_prog); + static_branch_dec(&cgroup_bpf_enabled_key); + return 0; + +cleanup: + /* oom while computing effective. Free all computed effective arrays + * since they were not activated + */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + bpf_prog_array_free(desc->bpf.inactive); + desc->bpf.inactive = NULL; + } + + /* and restore back old_prog */ + pl->prog = old_prog; + return err; } /** @@ -171,36 +403,26 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, enum bpf_attach_type type) { - struct bpf_prog *prog; + unsigned int offset = skb->data - skb_network_header(skb); + struct sock *save_sk; struct cgroup *cgrp; - int ret = 0; + int ret; if (!sk || !sk_fullsock(sk)) return 0; - if (sk->sk_family != AF_INET && - sk->sk_family != AF_INET6) + if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) return 0; cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - - rcu_read_lock(); - - prog = rcu_dereference(cgrp->bpf.effective[type]); - if (prog) { - unsigned int offset = skb->data - skb_network_header(skb); - struct sock *save_sk = skb->sk; - - skb->sk = sk; - __skb_push(skb, offset); - ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM; - __skb_pull(skb, offset); - skb->sk = save_sk; - } - - rcu_read_unlock(); - - return ret; + save_sk = skb->sk; + skb->sk = sk; + __skb_push(skb, offset); + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, + bpf_prog_run_save_cb); + __skb_pull(skb, offset); + skb->sk = save_sk; + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); @@ -221,19 +443,10 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk, enum bpf_attach_type type) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - struct bpf_prog *prog; - int ret = 0; + int ret; - - rcu_read_lock(); - - prog = rcu_dereference(cgrp->bpf.effective[type]); - if (prog) - ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM; - - rcu_read_unlock(); - - return ret; + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN); + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); @@ -258,18 +471,10 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, enum bpf_attach_type type) { struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); - struct bpf_prog *prog; - int ret = 0; + int ret; - - rcu_read_lock(); - - prog = rcu_dereference(cgrp->bpf.effective[type]); - if (prog) - ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM; - - rcu_read_unlock(); - - return ret; + ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops, + BPF_PROG_RUN); + return ret == 1 ? 0 : -EPERM; } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 6f812bdfcb0a..cc84110949ac 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1481,6 +1481,118 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); +static unsigned int __bpf_prog_ret1(const void *ctx, + const struct bpf_insn *insn) +{ + return 1; +} + +static struct bpf_prog_dummy { + struct bpf_prog prog; +} dummy_bpf_prog = { + .prog = { + .bpf_func = __bpf_prog_ret1, + }, +}; + +/* to avoid allocating empty bpf_prog_array for cgroups that + * don't have bpf program attached use one global 'empty_prog_array' + * It will not be modified the caller of bpf_prog_array_alloc() + * (since caller requested prog_cnt == 0) + * that pointer should be 'freed' by bpf_prog_array_free() + */ +static struct { + struct bpf_prog_array hdr; + struct bpf_prog *null_prog; +} empty_prog_array = { + .null_prog = NULL, +}; + +struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) +{ + if (prog_cnt) + return kzalloc(sizeof(struct bpf_prog_array) + + sizeof(struct bpf_prog *) * (prog_cnt + 1), + flags); + + return &empty_prog_array.hdr; +} + +void bpf_prog_array_free(struct bpf_prog_array __rcu *progs) +{ + if (!progs || + progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr) + return; + kfree_rcu(progs, rcu); +} + +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, + struct bpf_prog *old_prog) +{ + struct bpf_prog **prog = progs->progs; + + for (; *prog; prog++) + if (*prog == old_prog) { + WRITE_ONCE(*prog, &dummy_bpf_prog.prog); + break; + } +} + +int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, + struct bpf_prog *exclude_prog, + struct bpf_prog *include_prog, + struct bpf_prog_array **new_array) +{ + int new_prog_cnt, carry_prog_cnt = 0; + struct bpf_prog **existing_prog; + struct bpf_prog_array *array; + int new_prog_idx = 0; + + /* Figure out how many existing progs we need to carry over to + * the new array. + */ + if (old_array) { + existing_prog = old_array->progs; + for (; *existing_prog; existing_prog++) { + if (*existing_prog != exclude_prog && + *existing_prog != &dummy_bpf_prog.prog) + carry_prog_cnt++; + if (*existing_prog == include_prog) + return -EEXIST; + } + } + + /* How many progs (not NULL) will be in the new array? */ + new_prog_cnt = carry_prog_cnt; + if (include_prog) + new_prog_cnt += 1; + + /* Do we have any prog (not NULL) in the new array? */ + if (!new_prog_cnt) { + *new_array = NULL; + return 0; + } + + /* +1 as the end of prog_array is marked with NULL */ + array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL); + if (!array) + return -ENOMEM; + + /* Fill in the new prog array */ + if (carry_prog_cnt) { + existing_prog = old_array->progs; + for (; *existing_prog; existing_prog++) + if (*existing_prog != exclude_prog && + *existing_prog != &dummy_bpf_prog.prog) + array->progs[new_prog_idx++] = *existing_prog; + } + if (include_prog) + array->progs[new_prog_idx++] = include_prog; + array->progs[new_prog_idx] = NULL; + *new_array = array; + return 0; +} + static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 0f00b4fd20b5..1a846a636ae1 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -391,8 +391,7 @@ static int dev_map_notification(struct notifier_block *notifier, struct bpf_dtab_netdev *dev, *odev; dev = READ_ONCE(dtab->netdev_map[i]); - if (!dev || - dev->dev->ifindex != netdev->ifindex) + if (!dev || netdev != dev->dev) continue; odev = cmpxchg(&dtab->netdev_map[i], dev, NULL); if (dev == odev) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 34f2573b4d34..89d58554eb99 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -397,12 +397,12 @@ static int map_create(union bpf_attr *attr) err = bpf_map_new_fd(map, f_flags); if (err < 0) { /* failed to allocate fd. - * bpf_map_put() is needed because the above + * bpf_map_put_with_uref() is needed because the above * bpf_map_alloc_id() has published the map * to the userspace and the userspace may * have refcnt-ed it through BPF_MAP_GET_FD_BY_ID. */ - bpf_map_put(map); + bpf_map_put_with_uref(map); return err; } @@ -1242,6 +1242,9 @@ static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach) return 0; } +#define BPF_F_ATTACH_MASK \ + (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) + static int bpf_prog_attach(const union bpf_attr *attr) { enum bpf_prog_type ptype; @@ -1255,7 +1258,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) if (CHECK_ATTR(BPF_PROG_ATTACH)) return -EINVAL; - if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE) + if (attr->attach_flags & ~BPF_F_ATTACH_MASK) return -EINVAL; switch (attr->attach_type) { @@ -1286,8 +1289,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) return PTR_ERR(cgrp); } - ret = cgroup_bpf_update(cgrp, prog, attr->attach_type, - attr->attach_flags & BPF_F_ALLOW_OVERRIDE); + ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, + attr->attach_flags); if (ret) bpf_prog_put(prog); cgroup_put(cgrp); @@ -1299,6 +1302,8 @@ static int bpf_prog_attach(const union bpf_attr *attr) static int bpf_prog_detach(const union bpf_attr *attr) { + enum bpf_prog_type ptype; + struct bpf_prog *prog; struct cgroup *cgrp; int ret; @@ -1311,23 +1316,33 @@ static int bpf_prog_detach(const union bpf_attr *attr) switch (attr->attach_type) { case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: + ptype = BPF_PROG_TYPE_CGROUP_SKB; + break; case BPF_CGROUP_INET_SOCK_CREATE: + ptype = BPF_PROG_TYPE_CGROUP_SOCK; + break; case BPF_CGROUP_SOCK_OPS: - cgrp = cgroup_get_from_fd(attr->target_fd); - if (IS_ERR(cgrp)) - return PTR_ERR(cgrp); - - ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false); - cgroup_put(cgrp); + ptype = BPF_PROG_TYPE_SOCK_OPS; break; case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_VERDICT: - ret = sockmap_get_from_fd(attr, false); - break; + return sockmap_get_from_fd(attr, false); default: return -EINVAL; } + cgrp = cgroup_get_from_fd(attr->target_fd); + if (IS_ERR(cgrp)) + return PTR_ERR(cgrp); + + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); + if (IS_ERR(prog)) + prog = NULL; + + ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); + if (prog) + bpf_prog_put(prog); + cgroup_put(cgrp); return ret; } @@ -1448,7 +1463,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr) fd = bpf_map_new_fd(map, f_flags); if (fd < 0) - bpf_map_put(map); + bpf_map_put_with_uref(map); return fd; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a4875ff0bab1..615a2e44d2a0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1251,6 +1251,30 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, return check_generic_ptr_alignment(reg, pointer_desc, off, size, strict); } +static int check_ctx_reg(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int regno) +{ + /* Access to ctx or passing it to a helper is only allowed in + * its original, unmodified form. + */ + + if (reg->off) { + verbose("dereference of modified ctx ptr R%d off=%d disallowed\n", + regno, reg->off); + return -EACCES; + } + + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose("variable ctx access var_off=%s disallowed\n", tn_buf); + return -EACCES; + } + + return 0; +} + /* truncate register to smaller size (in bytes) * must be called with size < BPF_REG_SIZE */ @@ -1320,22 +1344,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn verbose("R%d leaks addr into ctx\n", value_regno); return -EACCES; } - /* ctx accesses must be at a fixed offset, so that we can - * determine what type of data were returned. - */ - if (reg->off) { - verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n", - regno, reg->off, off - reg->off); - return -EACCES; - } - if (!tnum_is_const(reg->var_off) || reg->var_off.value) { - char tn_buf[48]; + err = check_ctx_reg(env, reg, regno); + if (err < 0) + return err; - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose("variable ctx access var_off=%s off=%d size=%d", - tn_buf, off, size); - return -EACCES; - } err = check_ctx_access(env, insn_idx, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { /* ctx access returns either a scalar, or a @@ -1573,6 +1585,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, expected_type = PTR_TO_CTX; if (type != expected_type) goto err_type; + err = check_ctx_reg(env, reg, regno); + if (err < 0) + return err; } else if (arg_type == ARG_PTR_TO_MEM || arg_type == ARG_PTR_TO_UNINIT_MEM) { expected_type = PTR_TO_STACK; @@ -3442,6 +3457,7 @@ static bool may_access_skb(enum bpf_prog_type type) static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) { struct bpf_reg_state *regs = cur_regs(env); + static const int ctx_reg = BPF_REG_6; u8 mode = BPF_MODE(insn->code); int i, err; @@ -3458,11 +3474,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) } /* check whether implicit source operand (register R6) is readable */ - err = check_reg_arg(env, BPF_REG_6, SRC_OP); + err = check_reg_arg(env, ctx_reg, SRC_OP); if (err) return err; - if (regs[BPF_REG_6].type != PTR_TO_CTX) { + if (regs[ctx_reg].type != PTR_TO_CTX) { verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n"); return -EINVAL; } @@ -3474,6 +3490,10 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return err; } + err = check_ctx_reg(env, ®s[ctx_reg], ctx_reg); + if (err < 0) + return err; + /* reset caller saved regs to unreadable */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(regs, caller_saved[i]); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index bb0285973cca..a86b94e29d6b 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1902,6 +1902,9 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags) if (ret) goto destroy_root; + ret = cgroup_bpf_inherit(root_cgrp); + WARN_ON_ONCE(ret); + trace_cgroup_setup_root(root); /* @@ -2885,8 +2888,6 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); - WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt)); - if (!(cgroup_ss_mask(dsct) & (1 << ss->id))) continue; @@ -2896,6 +2897,8 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) return PTR_ERR(css); } + WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt)); + if (css_visible(css)) { ret = css_populate_dir(css); if (ret) @@ -2931,11 +2934,11 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp) for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); - WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt)); - if (!css) continue; + WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt)); + if (css->parent && !(cgroup_ss_mask(dsct) & (1 << ss->id))) { kill_css(css); @@ -3222,7 +3225,8 @@ static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf, if (strcmp(strstrip(buf), "threaded")) return -EINVAL; - cgrp = cgroup_kn_lock_live(of->kn, false); + /* drain dying csses before we re-apply (threaded) subtree control */ + cgrp = cgroup_kn_lock_live(of->kn, true); if (!cgrp) return -ENOENT; @@ -4137,12 +4141,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it) } } while (!css_set_populated(cset) && list_empty(&cset->dying_tasks)); - if (!list_empty(&cset->tasks)) + if (!list_empty(&cset->tasks)) { it->task_pos = cset->tasks.next; - else if (!list_empty(&cset->mg_tasks)) + it->cur_tasks_head = &cset->tasks; + } else if (!list_empty(&cset->mg_tasks)) { it->task_pos = cset->mg_tasks.next; - else + it->cur_tasks_head = &cset->mg_tasks; + } else { it->task_pos = cset->dying_tasks.next; + it->cur_tasks_head = &cset->dying_tasks; + } it->tasks_head = &cset->tasks; it->mg_tasks_head = &cset->mg_tasks; @@ -4200,10 +4208,14 @@ repeat: else it->task_pos = it->task_pos->next; - if (it->task_pos == it->tasks_head) + if (it->task_pos == it->tasks_head) { it->task_pos = it->mg_tasks_head->next; - if (it->task_pos == it->mg_tasks_head) + it->cur_tasks_head = it->mg_tasks_head; + } + if (it->task_pos == it->mg_tasks_head) { it->task_pos = it->dying_tasks_head->next; + it->cur_tasks_head = it->dying_tasks_head; + } if (it->task_pos == it->dying_tasks_head) css_task_iter_advance_css_set(it); } else { @@ -4222,11 +4234,12 @@ repeat: goto repeat; /* and dying leaders w/o live member threads */ - if (!atomic_read(&task->signal->live)) + if (it->cur_tasks_head == it->dying_tasks_head && + !atomic_read(&task->signal->live)) goto repeat; } else { /* skip all dying ones */ - if (task->flags & PF_EXITING) + if (it->cur_tasks_head == it->dying_tasks_head) goto repeat; } } @@ -4335,6 +4348,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) struct kernfs_open_file *of = s->private; struct css_task_iter *it = of->priv; + if (pos) + (*pos)++; + return css_task_iter_next(it); } @@ -4350,7 +4366,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, * from position 0, so we can simply keep iterating on !0 *pos. */ if (!it) { - if (WARN_ON_ONCE((*pos)++)) + if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); it = kzalloc(sizeof(*it), GFP_KERNEL); @@ -4358,10 +4374,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, return ERR_PTR(-ENOMEM); of->priv = it; css_task_iter_start(&cgrp->self, iter_flags, it); - } else if (!(*pos)++) { + } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); - } + } else + return it->cur_task; return cgroup_procs_next(s, NULL, NULL); } @@ -4899,6 +4916,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent) cgrp->self.parent = &parent->self; cgrp->root = root; cgrp->level = level; + ret = cgroup_bpf_inherit(cgrp); + if (ret) + goto out_idr_free; spin_lock_irq(&css_set_lock); for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { @@ -4941,9 +4961,6 @@ static struct cgroup *cgroup_create(struct cgroup *parent) goto out_idr_free; } - if (parent) - cgroup_bpf_inherit(cgrp, parent); - cgroup_propagate_control(cgrp); return cgrp; @@ -5921,6 +5938,10 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd) return; } + /* Don't associate the sock with unrelated interrupted task's cgroup. */ + if (in_interrupt()) + return; + rcu_read_lock(); while (true) { @@ -5945,14 +5966,23 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd) #endif /* CONFIG_SOCK_CGROUP_DATA */ #ifdef CONFIG_CGROUP_BPF -int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, bool overridable) +int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags) { - struct cgroup *parent = cgroup_parent(cgrp); int ret; mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable); + ret = __cgroup_bpf_attach(cgrp, prog, type, flags); + mutex_unlock(&cgroup_mutex); + return ret; +} +int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags) +{ + int ret; + + mutex_lock(&cgroup_mutex); + ret = __cgroup_bpf_detach(cgrp, prog, type, flags); mutex_unlock(&cgroup_mutex); return ret; } diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index c9960baaa14f..940d2e8db776 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c @@ -48,7 +48,7 @@ struct pids_cgroup { * %PIDS_MAX = (%PID_MAX_LIMIT + 1). */ atomic64_t counter; - int64_t limit; + atomic64_t limit; /* Handle for "pids.events" */ struct cgroup_file events_file; @@ -76,8 +76,8 @@ pids_css_alloc(struct cgroup_subsys_state *parent) if (!pids) return ERR_PTR(-ENOMEM); - pids->limit = PIDS_MAX; atomic64_set(&pids->counter, 0); + atomic64_set(&pids->limit, PIDS_MAX); atomic64_set(&pids->events_limit, 0); return &pids->css; } @@ -149,13 +149,14 @@ static int pids_try_charge(struct pids_cgroup *pids, int num) for (p = pids; parent_pids(p); p = parent_pids(p)) { int64_t new = atomic64_add_return(num, &p->counter); + int64_t limit = atomic64_read(&p->limit); /* * Since new is capped to the maximum number of pid_t, if * p->limit is %PIDS_MAX then we know that this test will never * fail. */ - if (new > p->limit) + if (new > limit) goto revert; } @@ -280,7 +281,7 @@ set_limit: * Limit updates don't need to be mutex'd, since it isn't * critical that any racing fork()s follow the new limit. */ - pids->limit = limit; + atomic64_set(&pids->limit, limit); return nbytes; } @@ -288,7 +289,7 @@ static int pids_max_show(struct seq_file *sf, void *v) { struct cgroup_subsys_state *css = seq_css(sf); struct pids_cgroup *pids = css_pids(css); - int64_t limit = pids->limit; + int64_t limit = atomic64_read(&pids->limit); if (limit >= PIDS_MAX) seq_printf(sf, "%s\n", PIDS_MAX_STR); diff --git a/kernel/cpu.c b/kernel/cpu.c index 7bd8c5ee2cae..51af85ada97f 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -376,6 +376,7 @@ void __init cpu_smt_disable(bool force) pr_info("SMT: Force disabled\n"); cpu_smt_control = CPU_SMT_FORCE_DISABLED; } else { + pr_info("SMT: disabled\n"); cpu_smt_control = CPU_SMT_DISABLED; } } @@ -493,8 +494,7 @@ static int bringup_wait_for_ap(unsigned int cpu) if (WARN_ON_ONCE((!cpu_online(cpu)))) return -ECANCELED; - /* Unpark the stopper thread and the hotplug thread of the target cpu */ - stop_machine_unpark(cpu); + /* Unpark the hotplug thread of the target cpu */ kthread_unpark(st->thread); /* @@ -1063,8 +1063,8 @@ void notify_cpu_starting(unsigned int cpu) /* * Called from the idle task. Wake up the controlling task which brings the - * stopper and the hotplug thread of the upcoming CPU up and then delegates - * the rest of the online bringup to the hotplug thread. + * hotplug thread of the upcoming CPU up and then delegates the rest of the + * online bringup to the hotplug thread. */ void cpuhp_online_idle(enum cpuhp_state state) { @@ -1074,6 +1074,12 @@ void cpuhp_online_idle(enum cpuhp_state state) if (state != CPUHP_AP_ONLINE_IDLE) return; + /* + * Unpart the stopper thread before we start the idle loop (and start + * scheduling); this ensures the stopper task is always available. + */ + stop_machine_unpark(smp_processor_id()); + st->state = CPUHP_AP_ONLINE_IDLE; complete_ap_thread(st, true); } @@ -2316,7 +2322,18 @@ void __init boot_cpu_hotplug_init(void) this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); } -enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; +/* + * These are used for a global "mitigations=" cmdline option for toggling + * optional CPU mitigations. + */ +enum cpu_mitigations { + CPU_MITIGATIONS_OFF, + CPU_MITIGATIONS_AUTO, + CPU_MITIGATIONS_AUTO_NOSMT, +}; + +static enum cpu_mitigations cpu_mitigations __ro_after_init = + CPU_MITIGATIONS_AUTO; static int __init mitigations_parse_cmdline(char *arg) { @@ -2333,3 +2350,17 @@ static int __init mitigations_parse_cmdline(char *arg) return 0; } early_param("mitigations", mitigations_parse_cmdline); + +/* mitigations=off */ +bool cpu_mitigations_off(void) +{ + return cpu_mitigations == CPU_MITIGATIONS_OFF; +} +EXPORT_SYMBOL_GPL(cpu_mitigations_off); + +/* mitigations=auto,nosmt */ +bool cpu_mitigations_auto_nosmt(void) +{ + return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; +} +EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); diff --git a/kernel/cred.c b/kernel/cred.c index 5ab1f7ec946e..a9f0f8b21d8c 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -220,7 +220,7 @@ struct cred *cred_alloc_blank(void) new->magic = CRED_MAGIC; #endif - if (security_cred_alloc_blank(new, GFP_KERNEL) < 0) + if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; return new; @@ -279,7 +279,7 @@ struct cred *prepare_creds(void) new->security = NULL; #endif - if (security_prepare_creds(new, old, GFP_KERNEL) < 0) + if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; validate_creds(new); return new; @@ -654,7 +654,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) #ifdef CONFIG_SECURITY new->security = NULL; #endif - if (security_prepare_creds(new, old, GFP_KERNEL) < 0) + if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; put_cred(old); diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index 3083df2a783e..90bd7895dc85 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -2629,7 +2629,7 @@ static int kdb_per_cpu(int argc, const char **argv) diag = kdbgetularg(argv[3], &whichcpu); if (diag) return diag; - if (!cpu_online(whichcpu)) { + if (whichcpu >= nr_cpu_ids || !cpu_online(whichcpu)) { kdb_printf("cpu %ld is not online\n", whichcpu); return KDB_BADCPUNUM; } diff --git a/kernel/events/core.c b/kernel/events/core.c index ff503be82692..728d42109b50 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -397,13 +397,8 @@ static cpumask_var_t perf_online_mask; * 0 - disallow raw tracepoint access for unpriv * 1 - disallow cpu events for unpriv * 2 - disallow kernel profiling for unpriv - * 3 - disallow all unpriv perf event use */ -#ifdef CONFIG_SECURITY_PERF_EVENTS_RESTRICT -int sysctl_perf_event_paranoid __read_mostly = 3; -#else int sysctl_perf_event_paranoid __read_mostly = 2; -#endif /* Minimum for 512 kiB + 1 user control page */ int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */ @@ -1222,6 +1217,7 @@ static void put_ctx(struct perf_event_context *ctx) * perf_event_context::lock * perf_event::mmap_mutex * mmap_sem + * perf_addr_filters_head::lock */ static struct perf_event_context * perf_event_ctx_lock_nested(struct perf_event *event, int nesting) @@ -2728,7 +2724,7 @@ static int perf_event_stop(struct perf_event *event, int restart) * * (p1) when userspace mappings change as a result of (1) or (2) or (3) below, * we update the addresses of corresponding vmas in - * event::addr_filters_offs array and bump the event::addr_filters_gen; + * event::addr_filter_ranges array and bump the event::addr_filters_gen; * (p2) when an event is scheduled in (pmu::add), it calls * perf_event_addr_filters_sync() which calls pmu::addr_filters_sync() * if the generation has changed since the previous call. @@ -3911,8 +3907,9 @@ find_get_context(struct pmu *pmu, struct task_struct *task, if (!task) { /* Must be root to operate on a CPU event: */ - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EACCES); + err = perf_allow_cpu(&event->attr); + if (err) + return ERR_PTR(err); cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); ctx = &cpuctx->ctx; @@ -4215,6 +4212,8 @@ static void _free_event(struct perf_event *event) unaccount_event(event); + security_perf_event_free(event); + if (event->rb) { /* * Can happen when we close an event with re-directed output. @@ -4237,7 +4236,7 @@ static void _free_event(struct perf_event *event) perf_event_free_bpf_prog(event); perf_addr_filters_splice(event, NULL); - kfree(event->addr_filters_offs); + kfree(event->addr_filter_ranges); if (event->destroy) event->destroy(event); @@ -4634,6 +4633,10 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) struct perf_event_context *ctx; int ret; + ret = security_perf_event_read(event); + if (ret) + return ret; + ctx = perf_event_ctx_lock(event); ret = __perf_read(event, buf, count); perf_event_ctx_unlock(event, ctx); @@ -4767,6 +4770,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) if (perf_event_check_period(event, value)) return -EINVAL; + if (!event->attr.freq && (value & (1ULL << 63))) + return -EINVAL; + event_function_call(event, __perf_event_period, &value); return 0; @@ -4879,6 +4885,11 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct perf_event_context *ctx; long ret; + /* Treat ioctl like writes as it is likely a mutating operation. */ + ret = security_perf_event_write(event); + if (ret) + return ret; + ctx = perf_event_ctx_lock(event); ret = _perf_ioctl(event, cmd, arg); perf_event_ctx_unlock(event, ctx); @@ -5339,6 +5350,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; + ret = security_perf_event_read(event); + if (ret) + return ret; + vma_size = vma->vm_end - vma->vm_start; if (vma->vm_pgoff == 0) { @@ -5443,7 +5458,15 @@ accounting: */ user_lock_limit *= num_online_cpus(); - user_locked = atomic_long_read(&user->locked_vm) + user_extra; + user_locked = atomic_long_read(&user->locked_vm); + + /* + * sysctl_perf_event_mlock may have changed, so that + * user->locked_vm > user_lock_limit + */ + if (user_locked > user_lock_limit) + user_locked = user_lock_limit; + user_locked += user_extra; if (user_locked > user_lock_limit) extra = user_locked - user_lock_limit; @@ -5452,7 +5475,7 @@ accounting: lock_limit >>= PAGE_SHIFT; locked = vma->vm_mm->pinned_vm + extra; - if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() && + if ((locked > lock_limit) && perf_is_paranoid() && !capable(CAP_IPC_LOCK)) { ret = -EPERM; goto unlock; @@ -6435,8 +6458,9 @@ static void perf_event_addr_filters_exec(struct perf_event *event, void *data) raw_spin_lock_irqsave(&ifh->lock, flags); list_for_each_entry(filter, &ifh->list, entry) { - if (filter->inode) { - event->addr_filters_offs[count] = 0; + if (filter->path.dentry) { + event->addr_filter_ranges[count].start = 0; + event->addr_filter_ranges[count].size = 0; restart++; } @@ -7102,7 +7126,7 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter, struct file *file, unsigned long offset, unsigned long size) { - if (filter->inode != file_inode(file)) + if (d_inode(filter->path.dentry) != file_inode(file)) return false; if (filter->offset > offset + size) @@ -7114,28 +7138,47 @@ static bool perf_addr_filter_match(struct perf_addr_filter *filter, return true; } +static bool perf_addr_filter_vma_adjust(struct perf_addr_filter *filter, + struct vm_area_struct *vma, + struct perf_addr_filter_range *fr) +{ + unsigned long vma_size = vma->vm_end - vma->vm_start; + unsigned long off = vma->vm_pgoff << PAGE_SHIFT; + struct file *file = vma->vm_file; + + if (!perf_addr_filter_match(filter, file, off, vma_size)) + return false; + + if (filter->offset < off) { + fr->start = vma->vm_start; + fr->size = min(vma_size, filter->size - (off - filter->offset)); + } else { + fr->start = vma->vm_start + filter->offset - off; + fr->size = min(vma->vm_end - fr->start, filter->size); + } + + return true; +} + static void __perf_addr_filters_adjust(struct perf_event *event, void *data) { struct perf_addr_filters_head *ifh = perf_event_addr_filters(event); struct vm_area_struct *vma = data; - unsigned long off = vma->vm_pgoff << PAGE_SHIFT, flags; - struct file *file = vma->vm_file; struct perf_addr_filter *filter; unsigned int restart = 0, count = 0; + unsigned long flags; if (!has_addr_filter(event)) return; - if (!file) + if (!vma->vm_file) return; raw_spin_lock_irqsave(&ifh->lock, flags); list_for_each_entry(filter, &ifh->list, entry) { - if (perf_addr_filter_match(filter, file, off, - vma->vm_end - vma->vm_start)) { - event->addr_filters_offs[count] = vma->vm_start; + if (perf_addr_filter_vma_adjust(filter, vma, + &event->addr_filter_ranges[count])) restart++; - } count++; } @@ -8016,11 +8059,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, struct pt_regs *regs, struct hlist_head *head, struct task_struct *task) { - struct bpf_prog *prog = call->prog; - - if (prog) { + if (bpf_prog_array_valid(call)) { *(struct pt_regs **)raw_data = regs; - if (!trace_call_bpf(prog, raw_data) || hlist_empty(head)) { + if (!trace_call_bpf(call, raw_data) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; } @@ -8210,13 +8251,11 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) { bool is_kprobe, is_tracepoint, is_syscall_tp; struct bpf_prog *prog; + int ret; if (event->attr.type != PERF_TYPE_TRACEPOINT) return perf_event_set_bpf_handler(event, prog_fd); - if (event->tp_event->prog) - return -EEXIST; - is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE; is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT; is_syscall_tp = is_syscall_trace_event(event->tp_event); @@ -8244,26 +8283,20 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EACCES; } } - event->tp_event->prog = prog; - event->tp_event->bpf_prog_owner = event; - return 0; + ret = perf_event_attach_bpf_prog(event, prog); + if (ret) + bpf_prog_put(prog); + return ret; } static void perf_event_free_bpf_prog(struct perf_event *event) { - struct bpf_prog *prog; - - perf_event_free_bpf_handler(event); - - if (!event->tp_event) + if (event->attr.type != PERF_TYPE_TRACEPOINT) { + perf_event_free_bpf_handler(event); return; - - prog = event->tp_event->prog; - if (prog && event->tp_event->bpf_prog_owner == event) { - event->tp_event->prog = NULL; - bpf_prog_put(prog); } + perf_event_detach_bpf_prog(event); } #else @@ -8323,8 +8356,7 @@ static void free_filters_list(struct list_head *filters) struct perf_addr_filter *filter, *iter; list_for_each_entry_safe(filter, iter, filters, entry) { - if (filter->inode) - iput(filter->inode); + path_put(&filter->path); list_del(&filter->entry); kfree(filter); } @@ -8362,26 +8394,19 @@ static void perf_addr_filters_splice(struct perf_event *event, * @filter; if so, adjust filter's address range. * Called with mm::mmap_sem down for reading. */ -static unsigned long perf_addr_filter_apply(struct perf_addr_filter *filter, - struct mm_struct *mm) +static void perf_addr_filter_apply(struct perf_addr_filter *filter, + struct mm_struct *mm, + struct perf_addr_filter_range *fr) { struct vm_area_struct *vma; for (vma = mm->mmap; vma; vma = vma->vm_next) { - struct file *file = vma->vm_file; - unsigned long off = vma->vm_pgoff << PAGE_SHIFT; - unsigned long vma_size = vma->vm_end - vma->vm_start; - - if (!file) + if (!vma->vm_file) continue; - if (!perf_addr_filter_match(filter, file, off, vma_size)) - continue; - - return vma->vm_start; + if (perf_addr_filter_vma_adjust(filter, vma, fr)) + return; } - - return 0; } /* @@ -8404,26 +8429,29 @@ static void perf_event_addr_filters_apply(struct perf_event *event) if (task == TASK_TOMBSTONE) return; - if (!ifh->nr_file_filters) - return; + if (ifh->nr_file_filters) { + mm = get_task_mm(event->ctx->task); + if (!mm) + goto restart; - mm = get_task_mm(event->ctx->task); - if (!mm) - goto restart; - - down_read(&mm->mmap_sem); + down_read(&mm->mmap_sem); + } raw_spin_lock_irqsave(&ifh->lock, flags); list_for_each_entry(filter, &ifh->list, entry) { - event->addr_filters_offs[count] = 0; + if (filter->path.dentry) { + /* + * Adjust base offset if the filter is associated to a + * binary that needs to be mapped: + */ + event->addr_filter_ranges[count].start = 0; + event->addr_filter_ranges[count].size = 0; - /* - * Adjust base offset if the filter is associated to a binary - * that needs to be mapped: - */ - if (filter->inode) - event->addr_filters_offs[count] = - perf_addr_filter_apply(filter, mm); + perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]); + } else { + event->addr_filter_ranges[count].start = filter->offset; + event->addr_filter_ranges[count].size = filter->size; + } count++; } @@ -8431,9 +8459,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event) event->addr_filters_gen++; raw_spin_unlock_irqrestore(&ifh->lock, flags); - up_read(&mm->mmap_sem); + if (ifh->nr_file_filters) { + up_read(&mm->mmap_sem); - mmput(mm); + mmput(mm); + } restart: perf_event_stop(event, 1); @@ -8494,7 +8524,6 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, { struct perf_addr_filter *filter = NULL; char *start, *orig, *filename = NULL; - struct path path; substring_t args[MAX_OPT_ARGS]; int state = IF_STATE_ACTION, token; unsigned int kernel = 0; @@ -8598,19 +8627,18 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, goto fail_free_name; /* look up the path and grab its inode */ - ret = kern_path(filename, LOOKUP_FOLLOW, &path); + ret = kern_path(filename, LOOKUP_FOLLOW, + &filter->path); if (ret) goto fail_free_name; - filter->inode = igrab(d_inode(path.dentry)); - path_put(&path); kfree(filename); filename = NULL; ret = -EINVAL; - if (!filter->inode || - !S_ISREG(filter->inode->i_mode)) - /* free_filters_list() will iput() */ + if (!filter->path.dentry || + !S_ISREG(d_inode(filter->path.dentry) + ->i_mode)) goto fail; event->addr_filters.nr_file_filters++; @@ -9653,14 +9681,28 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, goto err_pmu; if (has_addr_filter(event)) { - event->addr_filters_offs = kcalloc(pmu->nr_addr_filters, - sizeof(unsigned long), - GFP_KERNEL); - if (!event->addr_filters_offs) { + event->addr_filter_ranges = kcalloc(pmu->nr_addr_filters, + sizeof(struct perf_addr_filter_range), + GFP_KERNEL); + if (!event->addr_filter_ranges) { err = -ENOMEM; goto err_per_task; } + /* + * Clone the parent's vma offsets: they are valid until exec() + * even if the mm is not shared with the parent. + */ + if (event->parent) { + struct perf_addr_filters_head *ifh = perf_event_addr_filters(event); + + raw_spin_lock_irq(&ifh->lock); + memcpy(event->addr_filter_ranges, + event->parent->addr_filter_ranges, + pmu->nr_addr_filters * sizeof(struct perf_addr_filter_range)); + raw_spin_unlock_irq(&ifh->lock); + } + /* force hw sync on the address filters */ event->addr_filters_gen = 1; } @@ -9673,13 +9715,22 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, } } + err = security_perf_event_alloc(event); + if (err) + goto err_callchain_buffer; + /* symmetric to unaccount_event() in _free_event() */ account_event(event); return event; +err_callchain_buffer: + if (!event->parent) { + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + put_callchain_buffers(); + } err_addr_filters: - kfree(event->addr_filters_offs); + kfree(event->addr_filter_ranges); err_per_task: exclusive_event_destroy(event); @@ -9795,9 +9846,11 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, attr->branch_sample_type = mask; } /* privileged levels capture (kernel, hv): check permissions */ - if ((mask & PERF_SAMPLE_BRANCH_PERM_PLM) - && perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + if (mask & PERF_SAMPLE_BRANCH_PERM_PLM) { + ret = perf_allow_kernel(attr); + if (ret) + return ret; + } } if (attr->sample_type & PERF_SAMPLE_REGS_USER) { @@ -10007,16 +10060,19 @@ SYSCALL_DEFINE5(perf_event_open, if (flags & ~PERF_FLAG_ALL) return -EINVAL; - if (perf_paranoid_any() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + /* Do we allow access to perf_event_open(2) ? */ + err = security_perf_event_open(&attr, PERF_SECURITY_OPEN); + if (err) + return err; err = perf_copy_attr(attr_uptr, &attr); if (err) return err; if (!attr.exclude_kernel) { - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + err = perf_allow_kernel(&attr); + if (err) + return err; } if (attr.namespaces) { @@ -10033,9 +10089,11 @@ SYSCALL_DEFINE5(perf_event_open, } /* Only privileged users can get physical addresses */ - if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR) && - perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + if ((attr.sample_type & PERF_SAMPLE_PHYS_ADDR)) { + err = perf_allow_kernel(&attr); + if (err) + return err; + } if (!attr.sample_max_stack) attr.sample_max_stack = sysctl_perf_event_max_stack; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index f3a69a4f0d57..20f6a50fb40b 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -668,7 +668,7 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event, goto out; } - rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages, + rb->aux_priv = event->pmu->setup_aux(event, rb->aux_pages, nr_pages, overwrite); if (!rb->aux_priv) goto out; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 01941cffa9c2..c74fc9826250 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1854,7 +1854,7 @@ static void handle_trampoline(struct pt_regs *regs) sigill: uprobe_warn(current, "handle uretprobe, sending SIGILL."); - force_sig_info(SIGILL, SEND_SIG_FORCED, current); + force_sig(SIGILL, current); } @@ -1970,7 +1970,7 @@ static void handle_singlestep(struct uprobe_task *utask, struct pt_regs *regs) if (unlikely(err)) { uprobe_warn(current, "execute the probed insn, sending SIGILL."); - force_sig_info(SIGILL, SEND_SIG_FORCED, current); + force_sig(SIGILL, current); } } diff --git a/kernel/exit.c b/kernel/exit.c index 7d4f55ee7b33..101d1e0f63cd 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -497,7 +497,7 @@ static void exit_mm(void) struct mm_struct *mm = current->mm; struct core_state *core_state; - mm_release(current, mm); + exit_mm_release(current, mm); if (!mm) return; sync_mm_rss(mm); @@ -577,10 +577,6 @@ static struct task_struct *find_child_reaper(struct task_struct *father, } write_unlock_irq(&tasklist_lock); - if (unlikely(pid_ns == &init_pid_ns)) { - panic("Attempted to kill init! exitcode=0x%08x\n", - father->signal->group_exit_code ?: father->exit_code); - } list_for_each_entry_safe(p, n, dead, ptrace_entry) { list_del_init(&p->ptrace_entry); @@ -804,32 +800,12 @@ void __noreturn do_exit(long code) */ if (unlikely(tsk->flags & PF_EXITING)) { pr_alert("Fixing recursive fault but reboot is needed!\n"); - /* - * We can do this unlocked here. The futex code uses - * this flag just to verify whether the pi state - * cleanup has been done or not. In the worst case it - * loops once more. We pretend that the cleanup was - * done as there is no way to return. Either the - * OWNER_DIED bit is set by now or we push the blocked - * task into the wait for ever nirwana as well. - */ - tsk->flags |= PF_EXITPIDONE; + futex_exit_recursive(tsk); set_current_state(TASK_UNINTERRUPTIBLE); schedule(); } exit_signals(tsk); /* sets PF_EXITING */ - /* - * Ensure that all new tsk->pi_lock acquisitions must observe - * PF_EXITING. Serializes against futex.c:attach_to_pi_owner(). - */ - smp_mb(); - /* - * Ensure that we must observe the pi_state in exit_mm() -> - * mm_release() -> exit_pi_state_list(). - */ - raw_spin_lock_irq(&tsk->pi_lock); - raw_spin_unlock_irq(&tsk->pi_lock); if (unlikely(in_atomic())) { pr_info("note: %s[%d] exited with preempt_count %d\n", @@ -844,6 +820,14 @@ void __noreturn do_exit(long code) acct_update_integrals(tsk); group_dead = atomic_dec_and_test(&tsk->signal->live); if (group_dead) { + /* + * If the last thread of global init has exited, panic + * immediately to get a useable coredump. + */ + if (unlikely(is_global_init(tsk))) + panic("Attempted to kill init! exitcode=0x%08x\n", + tsk->signal->group_exit_code ?: (int)code); + #ifdef CONFIG_POSIX_TIMERS hrtimer_cancel(&tsk->signal->real_timer); exit_itimers(tsk->signal); @@ -904,12 +888,6 @@ void __noreturn do_exit(long code) * Make sure we are holding no locks: */ debug_check_no_locks_held(); - /* - * We can do this unlocked here. The futex code uses this flag - * just to verify whether the pi state cleanup has been done - * or not. In the worst case it loops once more. - */ - tsk->flags |= PF_EXITPIDONE; if (tsk->io_context) exit_io_context(tsk); diff --git a/kernel/fork.c b/kernel/fork.c index 836d86563e0c..fba0ccfa6457 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -93,6 +93,7 @@ #include #include #include +#include #include #include @@ -170,10 +171,6 @@ static inline void free_task_struct(struct task_struct *tsk) } #endif -void __weak arch_release_thread_stack(unsigned long *stack) -{ -} - #ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR /* @@ -356,7 +353,6 @@ static void release_task_stack(struct task_struct *tsk) return; /* Better to leak the stack than to free prematurely */ account_kernel_stack(tsk, -1); - arch_release_thread_stack(tsk->stack); free_thread_stack(tsk); tsk->stack = NULL; #ifdef CONFIG_VMAP_STACK @@ -375,6 +371,7 @@ void put_task_stack(struct task_struct *tsk) void free_task(struct task_struct *tsk) { cpufreq_task_times_exit(tsk); + scs_release(tsk); #ifndef CONFIG_THREAD_INFO_IN_TASK /* @@ -500,6 +497,8 @@ void __init fork_init(void) NULL, free_vm_stack_cache); #endif + scs_init(); + lockdep_init_task(&init_task); } @@ -555,6 +554,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) if (err) goto free_stack; + err = scs_prepare(tsk, node); + if (err) + goto free_stack; + #ifdef CONFIG_SECCOMP /* * We must handle setting up seccomp filters once we're under @@ -1176,24 +1179,8 @@ static int wait_for_vfork_done(struct task_struct *child, * restoring the old one. . . * Eric Biederman 10 January 1998 */ -void mm_release(struct task_struct *tsk, struct mm_struct *mm) +static void mm_release(struct task_struct *tsk, struct mm_struct *mm) { - /* Get rid of any futexes when releasing the mm */ -#ifdef CONFIG_FUTEX - if (unlikely(tsk->robust_list)) { - exit_robust_list(tsk); - tsk->robust_list = NULL; - } -#ifdef CONFIG_COMPAT - if (unlikely(tsk->compat_robust_list)) { - compat_exit_robust_list(tsk); - tsk->compat_robust_list = NULL; - } -#endif - if (unlikely(!list_empty(&tsk->pi_state_list))) - exit_pi_state_list(tsk); -#endif - uprobe_free_utask(tsk); /* Get rid of any cached register state */ @@ -1226,6 +1213,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) complete_vfork_done(tsk); } +void exit_mm_release(struct task_struct *tsk, struct mm_struct *mm) +{ + futex_exit_release(tsk); + mm_release(tsk, mm); +} + +void exec_mm_release(struct task_struct *tsk, struct mm_struct *mm) +{ + futex_exec_release(tsk); + mm_release(tsk, mm); +} + /* * Allocate a new mm structure and copy contents from the * mm structure of the passed in task structure. @@ -1975,14 +1974,8 @@ static __latent_entropy struct task_struct *copy_process( #ifdef CONFIG_BLOCK p->plug = NULL; #endif -#ifdef CONFIG_FUTEX - p->robust_list = NULL; -#ifdef CONFIG_COMPAT - p->compat_robust_list = NULL; -#endif - INIT_LIST_HEAD(&p->pi_state_list); - p->pi_state_cache = NULL; -#endif + futex_init_task(p); + /* * sigaltstack should be cleared when sharing the same VM */ diff --git a/kernel/futex.c b/kernel/futex.c index 3fb780f79ec6..3fbb6dc4c91b 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -44,6 +44,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include @@ -173,8 +174,10 @@ * double_lock_hb() and double_unlock_hb(), respectively. */ -#ifndef CONFIG_HAVE_FUTEX_CMPXCHG -int __read_mostly futex_cmpxchg_enabled; +#ifdef CONFIG_HAVE_FUTEX_CMPXCHG +#define futex_cmpxchg_enabled 1 +#else +static int __read_mostly futex_cmpxchg_enabled; #endif /* @@ -342,6 +345,12 @@ static inline bool should_fail_futex(bool fshared) } #endif /* CONFIG_FAIL_FUTEX */ +#ifdef CONFIG_COMPAT +static void compat_exit_robust_list(struct task_struct *curr); +#else +static inline void compat_exit_robust_list(struct task_struct *curr) { } +#endif + static inline void futex_get_mm(union futex_key *key) { mmgrab(key->private.mm); @@ -891,7 +900,7 @@ static struct task_struct *futex_find_get_task(pid_t pid) * Kernel cleans up PI-state, but userspace is likely hosed. * (Robust-futex cleanup is separate and might save the day for userspace.) */ -void exit_pi_state_list(struct task_struct *curr) +static void exit_pi_state_list(struct task_struct *curr) { struct list_head *next, *head = &curr->pi_state_list; struct futex_pi_state *pi_state; @@ -961,7 +970,8 @@ void exit_pi_state_list(struct task_struct *curr) } raw_spin_unlock_irq(&curr->pi_lock); } - +#else +static inline void exit_pi_state_list(struct task_struct *curr) { } #endif /* @@ -1170,16 +1180,47 @@ out_error: return ret; } +/** + * wait_for_owner_exiting - Block until the owner has exited + * @exiting: Pointer to the exiting task + * + * Caller must hold a refcount on @exiting. + */ +static void wait_for_owner_exiting(int ret, struct task_struct *exiting) +{ + if (ret != -EBUSY) { + WARN_ON_ONCE(exiting); + return; + } + + if (WARN_ON_ONCE(ret == -EBUSY && !exiting)) + return; + + mutex_lock(&exiting->futex_exit_mutex); + /* + * No point in doing state checking here. If the waiter got here + * while the task was in exec()->exec_futex_release() then it can + * have any FUTEX_STATE_* value when the waiter has acquired the + * mutex. OK, if running, EXITING or DEAD if it reached exit() + * already. Highly unlikely and not a problem. Just one more round + * through the futex maze. + */ + mutex_unlock(&exiting->futex_exit_mutex); + + put_task_struct(exiting); +} + static int handle_exit_race(u32 __user *uaddr, u32 uval, struct task_struct *tsk) { u32 uval2; /* - * If PF_EXITPIDONE is not yet set, then try again. + * If the futex exit state is not yet FUTEX_STATE_DEAD, tell the + * caller that the alleged owner is busy. */ - if (tsk && !(tsk->flags & PF_EXITPIDONE)) - return -EAGAIN; + if (tsk && tsk->futex_state != FUTEX_STATE_DEAD) + return -EBUSY; /* * Reread the user space value to handle the following situation: @@ -1197,8 +1238,9 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval, * *uaddr = 0xC0000000; tsk = get_task(PID); * } if (!tsk->flags & PF_EXITING) { * ... attach(); - * tsk->flags |= PF_EXITPIDONE; } else { - * if (!(tsk->flags & PF_EXITPIDONE)) + * tsk->futex_state = } else { + * FUTEX_STATE_DEAD; if (tsk->futex_state != + * FUTEX_STATE_DEAD) * return -EAGAIN; * return -ESRCH; <--- FAIL * } @@ -1229,7 +1271,8 @@ static int handle_exit_race(u32 __user *uaddr, u32 uval, * it after doing proper sanity checks. */ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, - struct futex_pi_state **ps) + struct futex_pi_state **ps, + struct task_struct **exiting) { pid_t pid = uval & FUTEX_TID_MASK; struct futex_pi_state *pi_state; @@ -1254,22 +1297,33 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, } /* - * We need to look at the task state flags to figure out, - * whether the task is exiting. To protect against the do_exit - * change of the task flags, we do this protected by - * p->pi_lock: + * We need to look at the task state to figure out, whether the + * task is exiting. To protect against the change of the task state + * in futex_exit_release(), we do this protected by p->pi_lock: */ raw_spin_lock_irq(&p->pi_lock); - if (unlikely(p->flags & PF_EXITING)) { + if (unlikely(p->futex_state != FUTEX_STATE_OK)) { /* - * The task is on the way out. When PF_EXITPIDONE is - * set, we know that the task has finished the - * cleanup: + * The task is on the way out. When the futex state is + * FUTEX_STATE_DEAD, we know that the task has finished + * the cleanup: */ int ret = handle_exit_race(uaddr, uval, p); raw_spin_unlock_irq(&p->pi_lock); - put_task_struct(p); + /* + * If the owner task is between FUTEX_STATE_EXITING and + * FUTEX_STATE_DEAD then store the task pointer and keep + * the reference on the task struct. The calling code will + * drop all locks, wait for the task to reach + * FUTEX_STATE_DEAD and then drop the refcount. This is + * required to prevent a live lock when the current task + * preempted the exiting task between the two states. + */ + if (ret == -EBUSY) + *exiting = p; + else + put_task_struct(p); return ret; } @@ -1308,7 +1362,8 @@ static int attach_to_pi_owner(u32 __user *uaddr, u32 uval, union futex_key *key, static int lookup_pi_state(u32 __user *uaddr, u32 uval, struct futex_hash_bucket *hb, - union futex_key *key, struct futex_pi_state **ps) + union futex_key *key, struct futex_pi_state **ps, + struct task_struct **exiting) { struct futex_q *top_waiter = futex_top_waiter(hb, key); @@ -1323,7 +1378,7 @@ static int lookup_pi_state(u32 __user *uaddr, u32 uval, * We are the first waiter - try to look up the owner based on * @uval and attach to it. */ - return attach_to_pi_owner(uaddr, uval, key, ps); + return attach_to_pi_owner(uaddr, uval, key, ps, exiting); } static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) @@ -1351,6 +1406,8 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) * lookup * @task: the task to perform the atomic lock work for. This will * be "current" except in the case of requeue pi. + * @exiting: Pointer to store the task pointer of the owner task + * which is in the middle of exiting * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) * * Return: @@ -1359,11 +1416,17 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) * - <0 - error * * The hb->lock and futex_key refs shall be held by the caller. + * + * @exiting is only set when the return value is -EBUSY. If so, this holds + * a refcount on the exiting task on return and the caller needs to drop it + * after waiting for the exit to complete. */ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps, - struct task_struct *task, int set_waiters) + struct task_struct *task, + struct task_struct **exiting, + int set_waiters) { u32 uval, newval, vpid = task_pid_vnr(task); struct futex_q *top_waiter; @@ -1433,7 +1496,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, * attach to the owner. If that fails, no harm done, we only * set the FUTEX_WAITERS bit in the user space variable. */ - return attach_to_pi_owner(uaddr, newval, key, ps); + return attach_to_pi_owner(uaddr, newval, key, ps, exiting); } /** @@ -1852,6 +1915,8 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, * @key1: the from futex key * @key2: the to futex key * @ps: address to store the pi_state pointer + * @exiting: Pointer to store the task pointer of the owner task + * which is in the middle of exiting * @set_waiters: force setting the FUTEX_WAITERS bit (1) or not (0) * * Try and get the lock on behalf of the top waiter if we can do it atomically. @@ -1859,16 +1924,20 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit. * hb1 and hb2 must be held by the caller. * + * @exiting is only set when the return value is -EBUSY. If so, this holds + * a refcount on the exiting task on return and the caller needs to drop it + * after waiting for the exit to complete. + * * Return: * - 0 - failed to acquire the lock atomically; * - >0 - acquired the lock, return value is vpid of the top_waiter * - <0 - error */ -static int futex_proxy_trylock_atomic(u32 __user *pifutex, - struct futex_hash_bucket *hb1, - struct futex_hash_bucket *hb2, - union futex_key *key1, union futex_key *key2, - struct futex_pi_state **ps, int set_waiters) +static int +futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, + struct futex_hash_bucket *hb2, union futex_key *key1, + union futex_key *key2, struct futex_pi_state **ps, + struct task_struct **exiting, int set_waiters) { struct futex_q *top_waiter = NULL; u32 curval; @@ -1905,7 +1974,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, */ vpid = task_pid_vnr(top_waiter->task); ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, - set_waiters); + exiting, set_waiters); if (ret == 1) { requeue_pi_wake_futex(top_waiter, key2, hb2); return vpid; @@ -2034,6 +2103,8 @@ retry_private: } if (requeue_pi && (task_count - nr_wake < nr_requeue)) { + struct task_struct *exiting = NULL; + /* * Attempt to acquire uaddr2 and wake the top waiter. If we * intend to requeue waiters, force setting the FUTEX_WAITERS @@ -2041,7 +2112,8 @@ retry_private: * faults rather in the requeue loop below. */ ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1, - &key2, &pi_state, nr_requeue); + &key2, &pi_state, + &exiting, nr_requeue); /* * At this point the top_waiter has either taken uaddr2 or is @@ -2068,7 +2140,8 @@ retry_private: * If that call succeeds then we have pi_state and an * initial refcount on it. */ - ret = lookup_pi_state(uaddr2, ret, hb2, &key2, &pi_state); + ret = lookup_pi_state(uaddr2, ret, hb2, &key2, + &pi_state, &exiting); } switch (ret) { @@ -2086,17 +2159,24 @@ retry_private: if (!ret) goto retry; goto out; + case -EBUSY: case -EAGAIN: /* * Two reasons for this: - * - Owner is exiting and we just wait for the + * - EBUSY: Owner is exiting and we just wait for the * exit to complete. - * - The user space value changed. + * - EAGAIN: The user space value changed. */ double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); put_futex_key(&key2); put_futex_key(&key1); + /* + * Handle the case where the owner is in the middle of + * exiting. Wait for the exit to complete otherwise + * this task might loop forever, aka. live lock. + */ + wait_for_owner_exiting(ret, exiting); cond_resched(); goto retry; default: @@ -2816,6 +2896,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, { struct hrtimer_sleeper timeout, *to = NULL; struct futex_pi_state *pi_state = NULL; + struct task_struct *exiting = NULL; struct rt_mutex_waiter rt_waiter; struct futex_hash_bucket *hb; struct futex_q q = futex_q_init; @@ -2843,7 +2924,8 @@ retry: retry_private: hb = queue_lock(&q); - ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); + ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, + &exiting, 0); if (unlikely(ret)) { /* * Atomic work succeeded and we got the lock, @@ -2856,15 +2938,22 @@ retry_private: goto out_unlock_put_key; case -EFAULT: goto uaddr_faulted; + case -EBUSY: case -EAGAIN: /* * Two reasons for this: - * - Task is exiting and we just wait for the + * - EBUSY: Task is exiting and we just wait for the * exit to complete. - * - The user space value changed. + * - EAGAIN: The user space value changed. */ queue_unlock(hb); put_futex_key(&q.key); + /* + * Handle the case where the owner is in the middle of + * exiting. Wait for the exit to complete otherwise + * this task might loop forever, aka. live lock. + */ + wait_for_owner_exiting(ret, exiting); cond_resched(); goto retry; default: @@ -3480,11 +3569,16 @@ err_unlock: return ret; } +/* Constants for the pending_op argument of handle_futex_death */ +#define HANDLE_DEATH_PENDING true +#define HANDLE_DEATH_LIST false + /* * Process a futex-list entry, check whether it's owned by the * dying task, and do notification if so: */ -int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) +static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, + bool pi, bool pending_op) { u32 uval, uninitialized_var(nval), mval; int err; @@ -3497,6 +3591,42 @@ retry: if (get_user(uval, uaddr)) return -1; + /* + * Special case for regular (non PI) futexes. The unlock path in + * user space has two race scenarios: + * + * 1. The unlock path releases the user space futex value and + * before it can execute the futex() syscall to wake up + * waiters it is killed. + * + * 2. A woken up waiter is killed before it can acquire the + * futex in user space. + * + * In both cases the TID validation below prevents a wakeup of + * potential waiters which can cause these waiters to block + * forever. + * + * In both cases the following conditions are met: + * + * 1) task->robust_list->list_op_pending != NULL + * @pending_op == true + * 2) User space futex value == 0 + * 3) Regular futex: @pi == false + * + * If these conditions are met, it is safe to attempt waking up a + * potential waiter without touching the user space futex value and + * trying to set the OWNER_DIED bit. The user space futex value is + * uncontended and the rest of the user space mutex state is + * consistent, so a woken waiter will just take over the + * uncontended futex. Setting the OWNER_DIED bit would create + * inconsistent state and malfunction of the user space owner died + * handling. + */ + if (pending_op && !pi && !uval) { + futex_wake(uaddr, 1, 1, FUTEX_BITSET_MATCH_ANY); + return 0; + } + if ((uval & FUTEX_TID_MASK) != task_pid_vnr(curr)) return 0; @@ -3575,7 +3705,7 @@ static inline int fetch_robust_entry(struct robust_list __user **entry, * * We silently return on any sign of list-walking problem. */ -void exit_robust_list(struct task_struct *curr) +static void exit_robust_list(struct task_struct *curr) { struct robust_list_head __user *head = curr->robust_list; struct robust_list __user *entry, *next_entry, *pending; @@ -3616,10 +3746,11 @@ void exit_robust_list(struct task_struct *curr) * A pending lock might already be on the list, so * don't process it twice: */ - if (entry != pending) + if (entry != pending) { if (handle_futex_death((void __user *)entry + futex_offset, - curr, pi)) + curr, pi, HANDLE_DEATH_LIST)) return; + } if (rc) return; entry = next_entry; @@ -3633,9 +3764,118 @@ void exit_robust_list(struct task_struct *curr) cond_resched(); } - if (pending) + if (pending) { handle_futex_death((void __user *)pending + futex_offset, - curr, pip); + curr, pip, HANDLE_DEATH_PENDING); + } +} + +static void futex_cleanup(struct task_struct *tsk) +{ + if (unlikely(tsk->robust_list)) { + exit_robust_list(tsk); + tsk->robust_list = NULL; + } + +#ifdef CONFIG_COMPAT + if (unlikely(tsk->compat_robust_list)) { + compat_exit_robust_list(tsk); + tsk->compat_robust_list = NULL; + } +#endif + + if (unlikely(!list_empty(&tsk->pi_state_list))) + exit_pi_state_list(tsk); +} + +/** + * futex_exit_recursive - Set the tasks futex state to FUTEX_STATE_DEAD + * @tsk: task to set the state on + * + * Set the futex exit state of the task lockless. The futex waiter code + * observes that state when a task is exiting and loops until the task has + * actually finished the futex cleanup. The worst case for this is that the + * waiter runs through the wait loop until the state becomes visible. + * + * This is called from the recursive fault handling path in do_exit(). + * + * This is best effort. Either the futex exit code has run already or + * not. If the OWNER_DIED bit has been set on the futex then the waiter can + * take it over. If not, the problem is pushed back to user space. If the + * futex exit code did not run yet, then an already queued waiter might + * block forever, but there is nothing which can be done about that. + */ +void futex_exit_recursive(struct task_struct *tsk) +{ + /* If the state is FUTEX_STATE_EXITING then futex_exit_mutex is held */ + if (tsk->futex_state == FUTEX_STATE_EXITING) + mutex_unlock(&tsk->futex_exit_mutex); + tsk->futex_state = FUTEX_STATE_DEAD; +} + +static void futex_cleanup_begin(struct task_struct *tsk) +{ + /* + * Prevent various race issues against a concurrent incoming waiter + * including live locks by forcing the waiter to block on + * tsk->futex_exit_mutex when it observes FUTEX_STATE_EXITING in + * attach_to_pi_owner(). + */ + mutex_lock(&tsk->futex_exit_mutex); + + /* + * Switch the state to FUTEX_STATE_EXITING under tsk->pi_lock. + * + * This ensures that all subsequent checks of tsk->futex_state in + * attach_to_pi_owner() must observe FUTEX_STATE_EXITING with + * tsk->pi_lock held. + * + * It guarantees also that a pi_state which was queued right before + * the state change under tsk->pi_lock by a concurrent waiter must + * be observed in exit_pi_state_list(). + */ + raw_spin_lock_irq(&tsk->pi_lock); + tsk->futex_state = FUTEX_STATE_EXITING; + raw_spin_unlock_irq(&tsk->pi_lock); +} + +static void futex_cleanup_end(struct task_struct *tsk, int state) +{ + /* + * Lockless store. The only side effect is that an observer might + * take another loop until it becomes visible. + */ + tsk->futex_state = state; + /* + * Drop the exit protection. This unblocks waiters which observed + * FUTEX_STATE_EXITING to reevaluate the state. + */ + mutex_unlock(&tsk->futex_exit_mutex); +} + +void futex_exec_release(struct task_struct *tsk) +{ + /* + * The state handling is done for consistency, but in the case of + * exec() there is no way to prevent futher damage as the PID stays + * the same. But for the unlikely and arguably buggy case that a + * futex is held on exec(), this provides at least as much state + * consistency protection which is possible. + */ + futex_cleanup_begin(tsk); + futex_cleanup(tsk); + /* + * Reset the state to FUTEX_STATE_OK. The task is alive and about + * exec a new binary. + */ + futex_cleanup_end(tsk, FUTEX_STATE_OK); +} + +void futex_exit_release(struct task_struct *tsk) +{ + futex_cleanup_begin(tsk); + futex_cleanup(tsk); + futex_cleanup_end(tsk, FUTEX_STATE_DEAD); } long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, @@ -3731,6 +3971,193 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); } +#ifdef CONFIG_COMPAT +/* + * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ +static inline int +compat_fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, + compat_uptr_t __user *head, unsigned int *pi) +{ + if (get_user(*uentry, head)) + return -EFAULT; + + *entry = compat_ptr((*uentry) & ~1); + *pi = (unsigned int)(*uentry) & 1; + + return 0; +} + +static void __user *futex_uaddr(struct robust_list __user *entry, + compat_long_t futex_offset) +{ + compat_uptr_t base = ptr_to_compat(entry); + void __user *uaddr = compat_ptr(base + futex_offset); + + return uaddr; +} + +/* + * Walk curr->robust_list (very carefully, it's a userspace list!) + * and mark any locks found there dead, and notify any waiters. + * + * We silently return on any sign of list-walking problem. + */ +static void compat_exit_robust_list(struct task_struct *curr) +{ + struct compat_robust_list_head __user *head = curr->compat_robust_list; + struct robust_list __user *entry, *next_entry, *pending; + unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; + unsigned int uninitialized_var(next_pi); + compat_uptr_t uentry, next_uentry, upending; + compat_long_t futex_offset; + int rc; + + if (!futex_cmpxchg_enabled) + return; + + /* + * Fetch the list head (which was registered earlier, via + * sys_set_robust_list()): + */ + if (compat_fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) + return; + /* + * Fetch the relative futex offset: + */ + if (get_user(futex_offset, &head->futex_offset)) + return; + /* + * Fetch any possibly pending lock-add first, and handle it + * if it exists: + */ + if (compat_fetch_robust_entry(&upending, &pending, + &head->list_op_pending, &pip)) + return; + + next_entry = NULL; /* avoid warning with gcc */ + while (entry != (struct robust_list __user *) &head->list) { + /* + * Fetch the next entry in the list before calling + * handle_futex_death: + */ + rc = compat_fetch_robust_entry(&next_uentry, &next_entry, + (compat_uptr_t __user *)&entry->next, &next_pi); + /* + * A pending lock might already be on the list, so + * dont process it twice: + */ + if (entry != pending) { + void __user *uaddr = futex_uaddr(entry, futex_offset); + + if (handle_futex_death(uaddr, curr, pi, + HANDLE_DEATH_LIST)) + return; + } + if (rc) + return; + uentry = next_uentry; + entry = next_entry; + pi = next_pi; + /* + * Avoid excessively long or circular lists: + */ + if (!--limit) + break; + + cond_resched(); + } + if (pending) { + void __user *uaddr = futex_uaddr(pending, futex_offset); + + handle_futex_death(uaddr, curr, pip, HANDLE_DEATH_PENDING); + } +} + +COMPAT_SYSCALL_DEFINE2(set_robust_list, + struct compat_robust_list_head __user *, head, + compat_size_t, len) +{ + if (!futex_cmpxchg_enabled) + return -ENOSYS; + + if (unlikely(len != sizeof(*head))) + return -EINVAL; + + current->compat_robust_list = head; + + return 0; +} + +COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, + compat_uptr_t __user *, head_ptr, + compat_size_t __user *, len_ptr) +{ + struct compat_robust_list_head __user *head; + unsigned long ret; + struct task_struct *p; + + if (!futex_cmpxchg_enabled) + return -ENOSYS; + + rcu_read_lock(); + + ret = -ESRCH; + if (!pid) + p = current; + else { + p = find_task_by_vpid(pid); + if (!p) + goto err_unlock; + } + + ret = -EPERM; + if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) + goto err_unlock; + + head = p->compat_robust_list; + rcu_read_unlock(); + + if (put_user(sizeof(*head), len_ptr)) + return -EFAULT; + return put_user(ptr_to_compat(head), head_ptr); + +err_unlock: + rcu_read_unlock(); + + return ret; +} + +COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + struct compat_timespec __user *, utime, u32 __user *, uaddr2, + u32, val3) +{ + struct timespec ts; + ktime_t t, *tp = NULL; + int val2 = 0; + int cmd = op & FUTEX_CMD_MASK; + + if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || + cmd == FUTEX_WAIT_BITSET || + cmd == FUTEX_WAIT_REQUEUE_PI)) { + if (compat_get_timespec(&ts, utime)) + return -EFAULT; + if (!timespec_valid(&ts)) + return -EINVAL; + + t = timespec_to_ktime(ts); + if (cmd == FUTEX_WAIT) + t = ktime_add_safe(ktime_get(), t); + tp = &t; + } + if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || + cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) + val2 = (int) (unsigned long) utime; + + return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); +} +#endif /* CONFIG_COMPAT */ + static void __init futex_detect_cmpxchg(void) { #ifndef CONFIG_HAVE_FUTEX_CMPXCHG diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c deleted file mode 100644 index 83f830acbb5f..000000000000 --- a/kernel/futex_compat.c +++ /dev/null @@ -1,202 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/kernel/futex_compat.c - * - * Futex compatibililty routines. - * - * Copyright 2006, Red Hat, Inc., Ingo Molnar - */ - -#include -#include -#include -#include -#include -#include - -#include - - -/* - * Fetch a robust-list pointer. Bit 0 signals PI futexes: - */ -static inline int -fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, - compat_uptr_t __user *head, unsigned int *pi) -{ - if (get_user(*uentry, head)) - return -EFAULT; - - *entry = compat_ptr((*uentry) & ~1); - *pi = (unsigned int)(*uentry) & 1; - - return 0; -} - -static void __user *futex_uaddr(struct robust_list __user *entry, - compat_long_t futex_offset) -{ - compat_uptr_t base = ptr_to_compat(entry); - void __user *uaddr = compat_ptr(base + futex_offset); - - return uaddr; -} - -/* - * Walk curr->robust_list (very carefully, it's a userspace list!) - * and mark any locks found there dead, and notify any waiters. - * - * We silently return on any sign of list-walking problem. - */ -void compat_exit_robust_list(struct task_struct *curr) -{ - struct compat_robust_list_head __user *head = curr->compat_robust_list; - struct robust_list __user *entry, *next_entry, *pending; - unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; - unsigned int uninitialized_var(next_pi); - compat_uptr_t uentry, next_uentry, upending; - compat_long_t futex_offset; - int rc; - - if (!futex_cmpxchg_enabled) - return; - - /* - * Fetch the list head (which was registered earlier, via - * sys_set_robust_list()): - */ - if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) - return; - /* - * Fetch the relative futex offset: - */ - if (get_user(futex_offset, &head->futex_offset)) - return; - /* - * Fetch any possibly pending lock-add first, and handle it - * if it exists: - */ - if (fetch_robust_entry(&upending, &pending, - &head->list_op_pending, &pip)) - return; - - next_entry = NULL; /* avoid warning with gcc */ - while (entry != (struct robust_list __user *) &head->list) { - /* - * Fetch the next entry in the list before calling - * handle_futex_death: - */ - rc = fetch_robust_entry(&next_uentry, &next_entry, - (compat_uptr_t __user *)&entry->next, &next_pi); - /* - * A pending lock might already be on the list, so - * dont process it twice: - */ - if (entry != pending) { - void __user *uaddr = futex_uaddr(entry, futex_offset); - - if (handle_futex_death(uaddr, curr, pi)) - return; - } - if (rc) - return; - uentry = next_uentry; - entry = next_entry; - pi = next_pi; - /* - * Avoid excessively long or circular lists: - */ - if (!--limit) - break; - - cond_resched(); - } - if (pending) { - void __user *uaddr = futex_uaddr(pending, futex_offset); - - handle_futex_death(uaddr, curr, pip); - } -} - -COMPAT_SYSCALL_DEFINE2(set_robust_list, - struct compat_robust_list_head __user *, head, - compat_size_t, len) -{ - if (!futex_cmpxchg_enabled) - return -ENOSYS; - - if (unlikely(len != sizeof(*head))) - return -EINVAL; - - current->compat_robust_list = head; - - return 0; -} - -COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, - compat_uptr_t __user *, head_ptr, - compat_size_t __user *, len_ptr) -{ - struct compat_robust_list_head __user *head; - unsigned long ret; - struct task_struct *p; - - if (!futex_cmpxchg_enabled) - return -ENOSYS; - - rcu_read_lock(); - - ret = -ESRCH; - if (!pid) - p = current; - else { - p = find_task_by_vpid(pid); - if (!p) - goto err_unlock; - } - - ret = -EPERM; - if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) - goto err_unlock; - - head = p->compat_robust_list; - rcu_read_unlock(); - - if (put_user(sizeof(*head), len_ptr)) - return -EFAULT; - return put_user(ptr_to_compat(head), head_ptr); - -err_unlock: - rcu_read_unlock(); - - return ret; -} - -COMPAT_SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, - struct compat_timespec __user *, utime, u32 __user *, uaddr2, - u32, val3) -{ - struct timespec ts; - ktime_t t, *tp = NULL; - int val2 = 0; - int cmd = op & FUTEX_CMD_MASK; - - if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || - cmd == FUTEX_WAIT_BITSET || - cmd == FUTEX_WAIT_REQUEUE_PI)) { - if (compat_get_timespec(&ts, utime)) - return -EFAULT; - if (!timespec_valid(&ts)) - return -EINVAL; - - t = timespec_to_ktime(ts); - if (cmd == FUTEX_WAIT) - t = ktime_add_safe(ktime_get(), t); - tp = &t; - } - if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE || - cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP) - val2 = (int) (unsigned long) utime; - - return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); -} diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4ef7f3b820ce..5230c47fc43e 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -119,8 +119,6 @@ static inline void unregister_handler_proc(unsigned int irq, extern bool irq_can_set_affinity_usr(unsigned int irq); -extern int irq_select_affinity_usr(unsigned int irq); - extern void irq_set_thread_affinity(struct irq_desc *desc); extern int irq_do_set_affinity(struct irq_data *data, diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index ac4644e92b49..b269ae16b10c 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -147,6 +147,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, switch (fwid->type) { case IRQCHIP_FWNODE_NAMED: case IRQCHIP_FWNODE_NAMED_ID: + domain->fwnode = fwnode; domain->name = kstrdup(fwid->name, GFP_KERNEL); if (!domain->name) { kfree(domain); @@ -1537,6 +1538,7 @@ int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg) if (rv) { /* Restore the original irq_data. */ *root_irq_data = *child_irq_data; + kfree(child_irq_data); goto error; } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 9c86a3e45110..037e8fc1b008 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -382,23 +382,9 @@ int irq_setup_affinity(struct irq_desc *desc) { return irq_select_affinity(irq_desc_get_irq(desc)); } -#endif +#endif /* CONFIG_AUTO_IRQ_AFFINITY */ +#endif /* CONFIG_SMP */ -/* - * Called when a bogus affinity is set via /proc/irq - */ -int irq_select_affinity_usr(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - unsigned long flags; - int ret; - - raw_spin_lock_irqsave(&desc->lock, flags); - ret = irq_setup_affinity(desc); - raw_spin_unlock_irqrestore(&desc->lock, flags); - return ret; -} -#endif /** * irq_set_vcpu_affinity - Set vcpu affinity for the interrupt diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index c010cc0daf79..b031db9d56c6 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -117,6 +117,28 @@ static int irq_affinity_list_proc_show(struct seq_file *m, void *v) return show_irq_affinity(AFFINITY_LIST, m); } +#ifndef CONFIG_AUTO_IRQ_AFFINITY +static inline int irq_select_affinity_usr(unsigned int irq) +{ + /* + * If the interrupt is started up already then this fails. The + * interrupt is assigned to an online CPU already. There is no + * point to move it around randomly. Tell user space that the + * selected mask is bogus. + * + * If not then any change to the affinity is pointless because the + * startup code invokes irq_setup_affinity() which will select + * a online CPU anyway. + */ + return -EINVAL; +} +#else +/* ALPHA magic affinity auto selector. Keep it for historical reasons. */ +static inline int irq_select_affinity_usr(unsigned int irq) +{ + return irq_select_affinity(irq); +} +#endif static ssize_t write_irq_affinity(int type, struct file *file, const char __user *buffer, size_t count, loff_t *pos) diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 06432e1a792a..4ad367a05274 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -314,6 +314,12 @@ static inline void cleanup_symbol_name(char *s) { char *res; +#ifdef CONFIG_THINLTO + /* Filter out hashes from static functions */ + res = strrchr(s, '$'); + if (res) + *res = '\0'; +#endif res = strrchr(s, '.'); if (res && !strcmp(res, ".cfi")) *res = '\0'; diff --git a/kernel/kcov.c b/kernel/kcov.c index f1e060b04ef6..2f0048ef4b64 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -20,34 +21,157 @@ #include #include #include +#include +#include #include +#define kcov_debug(fmt, ...) pr_debug("%s: " fmt, __func__, ##__VA_ARGS__) + +/* Number of 64-bit words written per one comparison: */ +#define KCOV_WORDS_PER_CMP 4 + /* * kcov descriptor (one per opened debugfs file). * State transitions of the descriptor: * - initial state after open() * - then there must be a single ioctl(KCOV_INIT_TRACE) call * - then, mmap() call (several calls are allowed but not useful) - * - then, repeated enable/disable for a task (only one task a time allowed) + * - then, ioctl(KCOV_ENABLE, arg), where arg is + * KCOV_TRACE_PC - to trace only the PCs + * or + * KCOV_TRACE_CMP - to trace only the comparison operands + * - then, ioctl(KCOV_DISABLE) to disable the task. + * Enabling/disabling ioctls can be repeated (only one task a time allowed). */ struct kcov { /* * Reference counter. We keep one for: * - opened file descriptor * - task with enabled coverage (we can't unwire it from another task) + * - each code section for remote coverage collection */ - atomic_t refcount; + refcount_t refcount; /* The lock protects mode, size, area and t. */ spinlock_t lock; enum kcov_mode mode; - /* Size of arena (in long's for KCOV_MODE_TRACE). */ - unsigned size; + /* Size of arena (in long's). */ + unsigned int size; /* Coverage buffer shared with user space. */ void *area; /* Task for which we collect coverage, or NULL. */ struct task_struct *t; + /* Collecting coverage from remote (background) threads. */ + bool remote; + /* Size of remote area (in long's). */ + unsigned int remote_size; + /* + * Sequence is incremented each time kcov is reenabled, used by + * kcov_remote_stop(), see the comment there. + */ + int sequence; }; +struct kcov_remote_area { + struct list_head list; + unsigned int size; +}; + +struct kcov_remote { + u64 handle; + struct kcov *kcov; + struct hlist_node hnode; +}; + +static DEFINE_SPINLOCK(kcov_remote_lock); +static DEFINE_HASHTABLE(kcov_remote_map, 4); +static struct list_head kcov_remote_areas = LIST_HEAD_INIT(kcov_remote_areas); + +/* Must be called with kcov_remote_lock locked. */ +static struct kcov_remote *kcov_remote_find(u64 handle) +{ + struct kcov_remote *remote; + + hash_for_each_possible(kcov_remote_map, remote, hnode, handle) { + if (remote->handle == handle) + return remote; + } + return NULL; +} + +static struct kcov_remote *kcov_remote_add(struct kcov *kcov, u64 handle) +{ + struct kcov_remote *remote; + + if (kcov_remote_find(handle)) + return ERR_PTR(-EEXIST); + remote = kmalloc(sizeof(*remote), GFP_ATOMIC); + if (!remote) + return ERR_PTR(-ENOMEM); + remote->handle = handle; + remote->kcov = kcov; + hash_add(kcov_remote_map, &remote->hnode, handle); + return remote; +} + +/* Must be called with kcov_remote_lock locked. */ +static struct kcov_remote_area *kcov_remote_area_get(unsigned int size) +{ + struct kcov_remote_area *area; + struct list_head *pos; + + kcov_debug("size = %u\n", size); + list_for_each(pos, &kcov_remote_areas) { + area = list_entry(pos, struct kcov_remote_area, list); + if (area->size == size) { + list_del(&area->list); + kcov_debug("rv = %px\n", area); + return area; + } + } + kcov_debug("rv = NULL\n"); + return NULL; +} + +/* Must be called with kcov_remote_lock locked. */ +static void kcov_remote_area_put(struct kcov_remote_area *area, + unsigned int size) +{ + kcov_debug("area = %px, size = %u\n", area, size); + INIT_LIST_HEAD(&area->list); + area->size = size; + list_add(&area->list, &kcov_remote_areas); +} + +static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) +{ + unsigned int mode; + + /* + * We are interested in code coverage as a function of a syscall inputs, + * so we ignore code executed in interrupts. + */ + if (!in_task()) + return false; + mode = READ_ONCE(t->kcov_mode); + /* + * There is some code that runs in interrupts but for which + * in_interrupt() returns false (e.g. preempt_schedule_irq()). + * READ_ONCE()/barrier() effectively provides load-acquire wrt + * interrupts, there are paired barrier()/WRITE_ONCE() in + * kcov_start(). + */ + barrier(); + return mode == needed_mode; +} + +static notrace unsigned long canonicalize_ip(unsigned long ip) +{ +#ifdef CONFIG_RANDOMIZE_BASE + ip -= kaslr_offset(); +#endif + return ip; +} + /* * Entry point from instrumented code. * This is called once per basic-block/edge. @@ -55,64 +179,223 @@ struct kcov { void notrace __sanitizer_cov_trace_pc(void) { struct task_struct *t; - enum kcov_mode mode; + unsigned long *area; + unsigned long ip = canonicalize_ip(_RET_IP_); + unsigned long pos; t = current; - /* - * We are interested in code coverage as a function of a syscall inputs, - * so we ignore code executed in interrupts. - */ - if (!t || !in_task()) + if (!check_kcov_mode(KCOV_MODE_TRACE_PC, t)) return; - mode = READ_ONCE(t->kcov_mode); - if (mode == KCOV_MODE_TRACE) { - unsigned long *area; - unsigned long pos; - unsigned long ip = _RET_IP_; -#ifdef CONFIG_RANDOMIZE_BASE - ip -= kaslr_offset(); -#endif - - /* - * There is some code that runs in interrupts but for which - * in_interrupt() returns false (e.g. preempt_schedule_irq()). - * READ_ONCE()/barrier() effectively provides load-acquire wrt - * interrupts, there are paired barrier()/WRITE_ONCE() in - * kcov_ioctl_locked(). - */ - barrier(); - area = t->kcov_area; - /* The first word is number of subsequent PCs. */ - pos = READ_ONCE(area[0]) + 1; - if (likely(pos < t->kcov_size)) { - area[pos] = ip; - WRITE_ONCE(area[0], pos); - } + area = t->kcov_area; + /* The first 64-bit word is the number of subsequent PCs. */ + pos = READ_ONCE(area[0]) + 1; + if (likely(pos < t->kcov_size)) { + area[pos] = ip; + WRITE_ONCE(area[0], pos); } } EXPORT_SYMBOL(__sanitizer_cov_trace_pc); -static void kcov_get(struct kcov *kcov) +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS +static void notrace write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip) { - atomic_inc(&kcov->refcount); -} + struct task_struct *t; + u64 *area; + u64 count, start_index, end_pos, max_pos; -static void kcov_put(struct kcov *kcov) -{ - if (atomic_dec_and_test(&kcov->refcount)) { - vfree(kcov->area); - kfree(kcov); + t = current; + if (!check_kcov_mode(KCOV_MODE_TRACE_CMP, t)) + return; + + ip = canonicalize_ip(ip); + + /* + * We write all comparison arguments and types as u64. + * The buffer was allocated for t->kcov_size unsigned longs. + */ + area = (u64 *)t->kcov_area; + max_pos = t->kcov_size * sizeof(unsigned long); + + count = READ_ONCE(area[0]); + + /* Every record is KCOV_WORDS_PER_CMP 64-bit words. */ + start_index = 1 + count * KCOV_WORDS_PER_CMP; + end_pos = (start_index + KCOV_WORDS_PER_CMP) * sizeof(u64); + if (likely(end_pos <= max_pos)) { + area[start_index] = type; + area[start_index + 1] = arg1; + area[start_index + 2] = arg2; + area[start_index + 3] = ip; + WRITE_ONCE(area[0], count + 1); } } -void kcov_task_init(struct task_struct *t) +void notrace __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(0), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp1); + +void notrace __sanitizer_cov_trace_cmp2(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(1), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp2); + +void notrace __sanitizer_cov_trace_cmp4(u32 arg1, u32 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(2), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp4); + +void notrace __sanitizer_cov_trace_cmp8(u64 arg1, u64 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(3), arg1, arg2, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_cmp8); + +void notrace __sanitizer_cov_trace_const_cmp1(u8 arg1, u8 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(0) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp1); + +void notrace __sanitizer_cov_trace_const_cmp2(u16 arg1, u16 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(1) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp2); + +void notrace __sanitizer_cov_trace_const_cmp4(u32 arg1, u32 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(2) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp4); + +void notrace __sanitizer_cov_trace_const_cmp8(u64 arg1, u64 arg2) +{ + write_comp_data(KCOV_CMP_SIZE(3) | KCOV_CMP_CONST, arg1, arg2, + _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_const_cmp8); + +void notrace __sanitizer_cov_trace_switch(u64 val, u64 *cases) +{ + u64 i; + u64 count = cases[0]; + u64 size = cases[1]; + u64 type = KCOV_CMP_CONST; + + switch (size) { + case 8: + type |= KCOV_CMP_SIZE(0); + break; + case 16: + type |= KCOV_CMP_SIZE(1); + break; + case 32: + type |= KCOV_CMP_SIZE(2); + break; + case 64: + type |= KCOV_CMP_SIZE(3); + break; + default: + return; + } + for (i = 0; i < count; i++) + write_comp_data(type, cases[i + 2], val, _RET_IP_); +} +EXPORT_SYMBOL(__sanitizer_cov_trace_switch); +#endif /* ifdef CONFIG_KCOV_ENABLE_COMPARISONS */ + +static void kcov_start(struct task_struct *t, unsigned int size, + void *area, enum kcov_mode mode, int sequence) +{ + kcov_debug("t = %px, size = %u, area = %px\n", t, size, area); + /* Cache in task struct for performance. */ + t->kcov_size = size; + t->kcov_area = area; + /* See comment in check_kcov_mode(). */ + barrier(); + WRITE_ONCE(t->kcov_mode, mode); + t->kcov_sequence = sequence; +} + +static void kcov_stop(struct task_struct *t) { WRITE_ONCE(t->kcov_mode, KCOV_MODE_DISABLED); barrier(); t->kcov_size = 0; t->kcov_area = NULL; +} + +static void kcov_task_reset(struct task_struct *t) +{ + kcov_stop(t); t->kcov = NULL; + t->kcov_sequence = 0; + t->kcov_handle = 0; +} + +void kcov_task_init(struct task_struct *t) +{ + kcov_task_reset(t); + t->kcov_handle = current->kcov_handle; +} + +static void kcov_reset(struct kcov *kcov) +{ + kcov->t = NULL; + kcov->mode = KCOV_MODE_INIT; + kcov->remote = false; + kcov->remote_size = 0; + kcov->sequence++; +} + +static void kcov_remote_reset(struct kcov *kcov) +{ + int bkt; + struct kcov_remote *remote; + struct hlist_node *tmp; + + spin_lock(&kcov_remote_lock); + hash_for_each_safe(kcov_remote_map, bkt, tmp, remote, hnode) { + if (remote->kcov != kcov) + continue; + kcov_debug("removing handle %llx\n", remote->handle); + hash_del(&remote->hnode); + kfree(remote); + } + /* Do reset before unlock to prevent races with kcov_remote_start(). */ + kcov_reset(kcov); + spin_unlock(&kcov_remote_lock); +} + +static void kcov_disable(struct task_struct *t, struct kcov *kcov) +{ + kcov_task_reset(t); + if (kcov->remote) + kcov_remote_reset(kcov); + else + kcov_reset(kcov); +} + +static void kcov_get(struct kcov *kcov) +{ + refcount_inc(&kcov->refcount); +} + +static void kcov_put(struct kcov *kcov) +{ + if (refcount_dec_and_test(&kcov->refcount)) { + kcov_remote_reset(kcov); + vfree(kcov->area); + kfree(kcov); + } } void kcov_task_exit(struct task_struct *t) @@ -122,14 +405,36 @@ void kcov_task_exit(struct task_struct *t) kcov = t->kcov; if (kcov == NULL) return; + spin_lock(&kcov->lock); + kcov_debug("t = %px, kcov->t = %px\n", t, kcov->t); + /* + * For KCOV_ENABLE devices we want to make sure that t->kcov->t == t, + * which comes down to: + * WARN_ON(!kcov->remote && kcov->t != t); + * + * For KCOV_REMOTE_ENABLE devices, the exiting task is either: + * 2. A remote task between kcov_remote_start() and kcov_remote_stop(). + * In this case we should print a warning right away, since a task + * shouldn't be exiting when it's in a kcov coverage collection + * section. Here t points to the task that is collecting remote + * coverage, and t->kcov->t points to the thread that created the + * kcov device. Which means that to detect this case we need to + * check that t != t->kcov->t, and this gives us the following: + * WARN_ON(kcov->remote && kcov->t != t); + * + * 2. The task that created kcov exiting without calling KCOV_DISABLE, + * and then again we can make sure that t->kcov->t == t: + * WARN_ON(kcov->remote && kcov->t != t); + * + * By combining all three checks into one we get: + */ if (WARN_ON(kcov->t != t)) { spin_unlock(&kcov->lock); return; } /* Just to not leave dangling references behind. */ - kcov_task_init(t); - kcov->t = NULL; + kcov_disable(t, kcov); spin_unlock(&kcov->lock); kcov_put(kcov); } @@ -148,7 +453,7 @@ static int kcov_mmap(struct file *filep, struct vm_area_struct *vma) spin_lock(&kcov->lock); size = kcov->size * sizeof(unsigned long); - if (kcov->mode == KCOV_MODE_DISABLED || vma->vm_pgoff != 0 || + if (kcov->mode != KCOV_MODE_INIT || vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != size) { res = -EINVAL; goto exit; @@ -177,7 +482,9 @@ static int kcov_open(struct inode *inode, struct file *filep) kcov = kzalloc(sizeof(*kcov), GFP_KERNEL); if (!kcov) return -ENOMEM; - atomic_set(&kcov->refcount, 1); + kcov->mode = KCOV_MODE_DISABLED; + kcov->sequence = 1; + refcount_set(&kcov->refcount, 1); spin_lock_init(&kcov->lock); filep->private_data = kcov; return nonseekable_open(inode, filep); @@ -189,14 +496,64 @@ static int kcov_close(struct inode *inode, struct file *filep) return 0; } +static int kcov_get_mode(unsigned long arg) +{ + if (arg == KCOV_TRACE_PC) + return KCOV_MODE_TRACE_PC; + else if (arg == KCOV_TRACE_CMP) +#ifdef CONFIG_KCOV_ENABLE_COMPARISONS + return KCOV_MODE_TRACE_CMP; +#else + return -ENOTSUPP; +#endif + else + return -EINVAL; +} + +/* + * Fault in a lazily-faulted vmalloc area before it can be used by + * __santizer_cov_trace_pc(), to avoid recursion issues if any code on the + * vmalloc fault handling path is instrumented. + */ +static void kcov_fault_in_area(struct kcov *kcov) +{ + unsigned long stride = PAGE_SIZE / sizeof(unsigned long); + unsigned long *area = kcov->area; + unsigned long offset; + + for (offset = 0; offset < kcov->size; offset += stride) + READ_ONCE(area[offset]); +} + +static inline bool kcov_check_handle(u64 handle, bool common_valid, + bool uncommon_valid, bool zero_valid) +{ + if (handle & ~(KCOV_SUBSYSTEM_MASK | KCOV_INSTANCE_MASK)) + return false; + switch (handle & KCOV_SUBSYSTEM_MASK) { + case KCOV_SUBSYSTEM_COMMON: + return (handle & KCOV_INSTANCE_MASK) ? + common_valid : zero_valid; + case KCOV_SUBSYSTEM_USB: + return uncommon_valid; + default: + return false; + } + return false; +} + static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, unsigned long arg) { struct task_struct *t; unsigned long size, unused; + int mode, i; + struct kcov_remote_arg *remote_arg; + struct kcov_remote *remote; switch (cmd) { case KCOV_INIT_TRACE: + kcov_debug("KCOV_INIT_TRACE\n"); /* * Enable kcov in trace mode and setup buffer size. * Must happen before anything else. @@ -212,9 +569,10 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, if (size < 2 || size > INT_MAX / sizeof(unsigned long)) return -EINVAL; kcov->size = size; - kcov->mode = KCOV_MODE_TRACE; + kcov->mode = KCOV_MODE_INIT; return 0; case KCOV_ENABLE: + kcov_debug("KCOV_ENABLE\n"); /* * Enable coverage for the current task. * At this point user must have been enabled trace mode, @@ -222,25 +580,25 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, * at task exit or voluntary by KCOV_DISABLE. After that it can * be enabled for another task. */ - unused = arg; - if (unused != 0 || kcov->mode == KCOV_MODE_DISABLED || - kcov->area == NULL) + if (kcov->mode != KCOV_MODE_INIT || !kcov->area) return -EINVAL; t = current; if (kcov->t != NULL || t->kcov != NULL) return -EBUSY; - /* Cache in task struct for performance. */ - t->kcov_size = kcov->size; - t->kcov_area = kcov->area; - /* See comment in __sanitizer_cov_trace_pc(). */ - barrier(); - WRITE_ONCE(t->kcov_mode, kcov->mode); + mode = kcov_get_mode(arg); + if (mode < 0) + return mode; + kcov_fault_in_area(kcov); + kcov->mode = mode; + kcov_start(t, kcov->size, kcov->area, kcov->mode, + kcov->sequence); t->kcov = kcov; kcov->t = t; - /* This is put either in kcov_task_exit() or in KCOV_DISABLE. */ + /* Put either in kcov_task_exit() or in KCOV_DISABLE. */ kcov_get(kcov); return 0; case KCOV_DISABLE: + kcov_debug("KCOV_DISABLE\n"); /* Disable coverage for the current task. */ unused = arg; if (unused != 0 || current->kcov != kcov) @@ -248,10 +606,65 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, t = current; if (WARN_ON(kcov->t != t)) return -EINVAL; - kcov_task_init(t); - kcov->t = NULL; + kcov_disable(t, kcov); kcov_put(kcov); return 0; + case KCOV_REMOTE_ENABLE: + kcov_debug("KCOV_REMOTE_ENABLE\n"); + if (kcov->mode != KCOV_MODE_INIT || !kcov->area) + return -EINVAL; + t = current; + if (kcov->t != NULL || t->kcov != NULL) + return -EBUSY; + remote_arg = (struct kcov_remote_arg *)arg; + mode = kcov_get_mode(remote_arg->trace_mode); + if (mode < 0) + return mode; + if (remote_arg->area_size > LONG_MAX / sizeof(unsigned long)) + return -EINVAL; + kcov->mode = mode; + t->kcov = kcov; + kcov->t = t; + kcov->remote = true; + kcov->remote_size = remote_arg->area_size; + spin_lock(&kcov_remote_lock); + for (i = 0; i < remote_arg->num_handles; i++) { + kcov_debug("handle %llx\n", remote_arg->handles[i]); + if (!kcov_check_handle(remote_arg->handles[i], + false, true, false)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return -EINVAL; + } + remote = kcov_remote_add(kcov, remote_arg->handles[i]); + if (IS_ERR(remote)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return PTR_ERR(remote); + } + } + if (remote_arg->common_handle) { + kcov_debug("common handle %llx\n", + remote_arg->common_handle); + if (!kcov_check_handle(remote_arg->common_handle, + true, false, false)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return -EINVAL; + } + remote = kcov_remote_add(kcov, + remote_arg->common_handle); + if (IS_ERR(remote)) { + spin_unlock(&kcov_remote_lock); + kcov_disable(t, kcov); + return PTR_ERR(remote); + } + t->kcov_handle = remote_arg->common_handle; + } + spin_unlock(&kcov_remote_lock); + /* Put either in kcov_task_exit() or in KCOV_DISABLE. */ + kcov_get(kcov); + return 0; default: return -ENOTTY; } @@ -261,11 +674,35 @@ static long kcov_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct kcov *kcov; int res; + struct kcov_remote_arg *remote_arg = NULL; + unsigned int remote_num_handles; + unsigned long remote_arg_size; + + if (cmd == KCOV_REMOTE_ENABLE) { + if (get_user(remote_num_handles, (unsigned __user *)(arg + + offsetof(struct kcov_remote_arg, num_handles)))) + return -EFAULT; + if (remote_num_handles > KCOV_REMOTE_MAX_HANDLES) + return -EINVAL; + remote_arg_size = sizeof(*remote_arg) + + sizeof(remote_arg->handles[0]) * remote_num_handles; + remote_arg = memdup_user((void __user *)arg, remote_arg_size); + if (IS_ERR(remote_arg)) + return PTR_ERR(remote_arg); + if (remote_arg->num_handles != remote_num_handles) { + kfree(remote_arg); + return -EINVAL; + } + arg = (unsigned long)remote_arg; + } kcov = filep->private_data; spin_lock(&kcov->lock); res = kcov_ioctl_locked(kcov, cmd, arg); spin_unlock(&kcov->lock); + + kfree(remote_arg); + return res; } @@ -277,6 +714,207 @@ static const struct file_operations kcov_fops = { .release = kcov_close, }; +/* + * kcov_remote_start() and kcov_remote_stop() can be used to annotate a section + * of code in a kernel background thread to allow kcov to be used to collect + * coverage from that part of code. + * + * The handle argument of kcov_remote_start() identifies a code section that is + * used for coverage collection. A userspace process passes this handle to + * KCOV_REMOTE_ENABLE ioctl to make the used kcov device start collecting + * coverage for the code section identified by this handle. + * + * The usage of these annotations in the kernel code is different depending on + * the type of the kernel thread whose code is being annotated. + * + * For global kernel threads that are spawned in a limited number of instances + * (e.g. one USB hub_event() worker thread is spawned per USB HCD), each + * instance must be assigned a unique 4-byte instance id. The instance id is + * then combined with a 1-byte subsystem id to get a handle via + * kcov_remote_handle(subsystem_id, instance_id). + * + * For local kernel threads that are spawned from system calls handler when a + * user interacts with some kernel interface (e.g. vhost workers), a handle is + * passed from a userspace process as the common_handle field of the + * kcov_remote_arg struct (note, that the user must generate a handle by using + * kcov_remote_handle() with KCOV_SUBSYSTEM_COMMON as the subsystem id and an + * arbitrary 4-byte non-zero number as the instance id). This common handle + * then gets saved into the task_struct of the process that issued the + * KCOV_REMOTE_ENABLE ioctl. When this proccess issues system calls that spawn + * kernel threads, the common handle must be retrived via kcov_common_handle() + * and passed to the spawned threads via custom annotations. Those kernel + * threads must in turn be annotated with kcov_remote_start(common_handle) and + * kcov_remote_stop(). All of the threads that are spawned by the same process + * obtain the same handle, hence the name "common". + * + * See Documentation/dev-tools/kcov.rst for more details. + * + * Internally, this function looks up the kcov device associated with the + * provided handle, allocates an area for coverage collection, and saves the + * pointers to kcov and area into the current task_struct to allow coverage to + * be collected via __sanitizer_cov_trace_pc() + * In turns kcov_remote_stop() clears those pointers from task_struct to stop + * collecting coverage and copies all collected coverage into the kcov area. + */ +void kcov_remote_start(u64 handle) +{ + struct kcov_remote *remote; + void *area; + struct task_struct *t; + unsigned int size; + enum kcov_mode mode; + int sequence; + + if (WARN_ON(!kcov_check_handle(handle, true, true, true))) + return; + if (WARN_ON(!in_task())) + return; + t = current; + /* + * Check that kcov_remote_start is not called twice + * nor called by user tasks (with enabled kcov). + */ + if (WARN_ON(t->kcov)) + return; + + kcov_debug("handle = %llx\n", handle); + + spin_lock(&kcov_remote_lock); + remote = kcov_remote_find(handle); + if (!remote) { + kcov_debug("no remote found"); + spin_unlock(&kcov_remote_lock); + return; + } + /* Put in kcov_remote_stop(). */ + kcov_get(remote->kcov); + t->kcov = remote->kcov; + /* + * Read kcov fields before unlock to prevent races with + * KCOV_DISABLE / kcov_remote_reset(). + */ + size = remote->kcov->remote_size; + mode = remote->kcov->mode; + sequence = remote->kcov->sequence; + area = kcov_remote_area_get(size); + spin_unlock(&kcov_remote_lock); + + if (!area) { + area = vmalloc(size * sizeof(unsigned long)); + if (!area) { + t->kcov = NULL; + kcov_put(remote->kcov); + return; + } + } + /* Reset coverage size. */ + *(u64 *)area = 0; + + kcov_debug("area = %px, size = %u", area, size); + + kcov_start(t, size, area, mode, sequence); + +} +EXPORT_SYMBOL(kcov_remote_start); + +static void kcov_move_area(enum kcov_mode mode, void *dst_area, + unsigned int dst_area_size, void *src_area) +{ + u64 word_size = sizeof(unsigned long); + u64 count_size, entry_size_log; + u64 dst_len, src_len; + void *dst_entries, *src_entries; + u64 dst_occupied, dst_free, bytes_to_move, entries_moved; + + kcov_debug("%px %u <= %px %lu\n", + dst_area, dst_area_size, src_area, *(unsigned long *)src_area); + + switch (mode) { + case KCOV_MODE_TRACE_PC: + dst_len = READ_ONCE(*(unsigned long *)dst_area); + src_len = *(unsigned long *)src_area; + count_size = sizeof(unsigned long); + entry_size_log = __ilog2_u64(sizeof(unsigned long)); + break; + case KCOV_MODE_TRACE_CMP: + dst_len = READ_ONCE(*(u64 *)dst_area); + src_len = *(u64 *)src_area; + count_size = sizeof(u64); + BUILD_BUG_ON(!is_power_of_2(KCOV_WORDS_PER_CMP)); + entry_size_log = __ilog2_u64(sizeof(u64) * KCOV_WORDS_PER_CMP); + break; + default: + WARN_ON(1); + return; + } + + /* As arm can't divide u64 integers use log of entry size. */ + if (dst_len > ((dst_area_size * word_size - count_size) >> + entry_size_log)) + return; + dst_occupied = count_size + (dst_len << entry_size_log); + dst_free = dst_area_size * word_size - dst_occupied; + bytes_to_move = min(dst_free, src_len << entry_size_log); + dst_entries = dst_area + dst_occupied; + src_entries = src_area + count_size; + memcpy(dst_entries, src_entries, bytes_to_move); + entries_moved = bytes_to_move >> entry_size_log; + + switch (mode) { + case KCOV_MODE_TRACE_PC: + WRITE_ONCE(*(unsigned long *)dst_area, dst_len + entries_moved); + break; + case KCOV_MODE_TRACE_CMP: + WRITE_ONCE(*(u64 *)dst_area, dst_len + entries_moved); + break; + default: + break; + } +} + +/* See the comment before kcov_remote_start() for usage details. */ +void kcov_remote_stop(void) +{ + struct task_struct *t = current; + struct kcov *kcov = t->kcov; + void *area = t->kcov_area; + unsigned int size = t->kcov_size; + int sequence = t->kcov_sequence; + + if (!kcov) { + kcov_debug("no kcov found\n"); + return; + } + + kcov_stop(t); + t->kcov = NULL; + + spin_lock(&kcov->lock); + /* + * KCOV_DISABLE could have been called between kcov_remote_start() + * and kcov_remote_stop(), hence the check. + */ + kcov_debug("move if: %d == %d && %d\n", + sequence, kcov->sequence, (int)kcov->remote); + if (sequence == kcov->sequence && kcov->remote) + kcov_move_area(kcov->mode, kcov->area, kcov->size, area); + spin_unlock(&kcov->lock); + + spin_lock(&kcov_remote_lock); + kcov_remote_area_put(area, size); + spin_unlock(&kcov_remote_lock); + + kcov_put(kcov); +} +EXPORT_SYMBOL(kcov_remote_stop); + +/* See the comment before kcov_remote_start() for usage details. */ +u64 kcov_common_handle(void) +{ + return current->kcov_handle; +} +EXPORT_SYMBOL(kcov_common_handle); + static int __init kcov_init(void) { /* @@ -284,10 +922,8 @@ static int __init kcov_init(void) * there is no need to protect it against removal races. The * use of debugfs_create_file_unsafe() is actually safe here. */ - if (!debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops)) { - pr_err("failed to create kcov in debugfs\n"); - return -ENOMEM; - } + debugfs_create_file_unsafe("kcov", 0600, NULL, NULL, &kcov_fops); + return 0; } diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 8f15665ab616..27cf24e285e0 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -473,6 +473,10 @@ static struct page *kimage_alloc_crash_control_pages(struct kimage *image, } } + /* Ensure that these pages are decrypted if SME is enabled. */ + if (pages) + arch_kexec_post_alloc_pages(page_address(pages), 1 << order, 0); + return pages; } @@ -867,6 +871,7 @@ static int kimage_load_crash_segment(struct kimage *image, result = -ENOMEM; goto out; } + arch_kexec_post_alloc_pages(page_address(page), 1, 0); ptr = kmap(page); ptr += maddr & ~PAGE_MASK; mchunk = min_t(size_t, mbytes, @@ -884,6 +889,7 @@ static int kimage_load_crash_segment(struct kimage *image, result = copy_from_user(ptr, buf, uchunk); kexec_flush_icache_page(page); kunmap(page); + arch_kexec_pre_free_pages(page_address(page), 1); if (result) { result = -EFAULT; goto out; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index f7a4602a76f9..66f1818d4762 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -523,6 +523,8 @@ static void do_unoptimize_kprobes(void) arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); /* Loop free_list for disarming */ list_for_each_entry_safe(op, tmp, &freeing_list, list) { + /* Switching from detour code to origin */ + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; /* Disarm probes if marked disabled */ if (kprobe_disabled(&op->kp)) arch_disarm_kprobe(&op->kp); @@ -544,8 +546,14 @@ static void do_free_cleaned_kprobes(void) struct optimized_kprobe *op, *tmp; list_for_each_entry_safe(op, tmp, &freeing_list, list) { - BUG_ON(!kprobe_unused(&op->kp)); list_del_init(&op->list); + if (WARN_ON_ONCE(!kprobe_unused(&op->kp))) { + /* + * This must not happen, but if there is a kprobe + * still in use, keep it on kprobes hash list. + */ + continue; + } free_aggr_kprobe(&op->kp); } } @@ -617,6 +625,18 @@ void wait_for_kprobe_optimizer(void) mutex_unlock(&kprobe_mutex); } +static bool optprobe_queued_unopt(struct optimized_kprobe *op) +{ + struct optimized_kprobe *_op; + + list_for_each_entry(_op, &unoptimizing_list, list) { + if (op == _op) + return true; + } + + return false; +} + /* Optimize kprobe if p is ready to be optimized */ static void optimize_kprobe(struct kprobe *p) { @@ -638,17 +658,21 @@ static void optimize_kprobe(struct kprobe *p) return; /* Check if it is already optimized. */ - if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) + if (op->kp.flags & KPROBE_FLAG_OPTIMIZED) { + if (optprobe_queued_unopt(op)) { + /* This is under unoptimizing. Just dequeue the probe */ + list_del_init(&op->list); + } return; + } op->kp.flags |= KPROBE_FLAG_OPTIMIZED; - if (!list_empty(&op->list)) - /* This is under unoptimizing. Just dequeue the probe */ - list_del_init(&op->list); - else { - list_add(&op->list, &optimizing_list); - kick_kprobe_optimizer(); - } + /* On unoptimizing/optimizing_list, op must have OPTIMIZED flag */ + if (WARN_ON_ONCE(!list_empty(&op->list))) + return; + + list_add(&op->list, &optimizing_list); + kick_kprobe_optimizer(); } /* Short cut to direct unoptimizing */ @@ -656,6 +680,7 @@ static void force_unoptimize_kprobe(struct optimized_kprobe *op) { lockdep_assert_cpus_held(); arch_unoptimize_kprobe(op); + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; if (kprobe_disabled(&op->kp)) arch_disarm_kprobe(&op->kp); } @@ -669,31 +694,33 @@ static void unoptimize_kprobe(struct kprobe *p, bool force) return; /* This is not an optprobe nor optimized */ op = container_of(p, struct optimized_kprobe, kp); - if (!kprobe_optimized(p)) { - /* Unoptimized or unoptimizing case */ - if (force && !list_empty(&op->list)) { - /* - * Only if this is unoptimizing kprobe and forced, - * forcibly unoptimize it. (No need to unoptimize - * unoptimized kprobe again :) - */ + if (!kprobe_optimized(p)) + return; + + if (!list_empty(&op->list)) { + if (optprobe_queued_unopt(op)) { + /* Queued in unoptimizing queue */ + if (force) { + /* + * Forcibly unoptimize the kprobe here, and queue it + * in the freeing list for release afterwards. + */ + force_unoptimize_kprobe(op); + list_move(&op->list, &freeing_list); + } + } else { + /* Dequeue from the optimizing queue */ list_del_init(&op->list); - force_unoptimize_kprobe(op); + op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; } return; } - op->kp.flags &= ~KPROBE_FLAG_OPTIMIZED; - if (!list_empty(&op->list)) { - /* Dequeue from the optimization queue */ - list_del_init(&op->list); - return; - } /* Optimized kprobe case */ - if (force) + if (force) { /* Forcibly update the code: this is a special case */ force_unoptimize_kprobe(op); - else { + } else { list_add(&op->list, &unoptimizing_list); kick_kprobe_optimizer(); } diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c index 183001d36cfd..8828fae2a835 100644 --- a/kernel/locking/spinlock_debug.c +++ b/kernel/locking/spinlock_debug.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include "sched.h" #ifdef CONFIG_MTK_AEE_FEATURE #include @@ -205,19 +205,19 @@ EXPORT_SYMBOL(__rwlock_init); static void spin_dump(raw_spinlock_t *lock, const char *msg) { - struct task_struct *owner = NULL; + struct task_struct *owner = READ_ONCE(lock->owner); - if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT) - owner = lock->owner; + if (owner == SPINLOCK_OWNER_INIT) + owner = NULL; printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n", msg, raw_smp_processor_id(), current->comm, task_pid_nr(current)); printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, " ".owner_cpu: %d\n", - lock, lock->magic, + lock, READ_ONCE(lock->magic), owner ? owner->comm : "", owner ? task_pid_nr(owner) : -1, - lock->owner_cpu); + READ_ONCE(lock->owner_cpu)); dump_stack(); } @@ -260,17 +260,17 @@ debug_spin_lock_before(raw_spinlock_t *lock) #ifdef CONFIG_DEBUG_LOCK_ALLOC SPIN_BUG_ON(lock->dep_map.name == NULL, lock, "uninitialized"); #endif - SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic"); - SPIN_BUG_ON(lock->owner == current, lock, "recursion"); - SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(), + SPIN_BUG_ON(READ_ONCE(lock->magic) != SPINLOCK_MAGIC, lock, "bad magic"); + SPIN_BUG_ON(READ_ONCE(lock->owner) == current, lock, "recursion"); + SPIN_BUG_ON(READ_ONCE(lock->owner_cpu) == raw_smp_processor_id(), lock, "cpu recursion"); } static inline void debug_spin_lock_after(raw_spinlock_t *lock) { lock->lock_t = sched_clock(); - lock->owner_cpu = raw_smp_processor_id(); - lock->owner = current; + WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id()); + WRITE_ONCE(lock->owner, current); } static inline void debug_spin_unlock(raw_spinlock_t *lock) @@ -280,8 +280,8 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock) SPIN_BUG_ON(lock->owner != current, lock, "wrong owner"); SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), lock, "wrong CPU"); - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; + WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT); + WRITE_ONCE(lock->owner_cpu, -1); lock->unlock_t = sched_clock(); spin_lock_check_holding_time(lock); @@ -545,8 +545,8 @@ static inline void debug_write_lock_before(rwlock_t *lock) static inline void debug_write_lock_after(rwlock_t *lock) { - lock->owner_cpu = raw_smp_processor_id(); - lock->owner = current; + WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id()); + WRITE_ONCE(lock->owner, current); } static inline void debug_write_unlock(rwlock_t *lock) @@ -555,8 +555,8 @@ static inline void debug_write_unlock(rwlock_t *lock) RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner"); RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(), lock, "wrong CPU"); - lock->owner = SPINLOCK_OWNER_INIT; - lock->owner_cpu = -1; + WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT); + WRITE_ONCE(lock->owner_cpu, -1); } void do_raw_write_lock(rwlock_t *lock) diff --git a/kernel/module.c b/kernel/module.c index d338d37ee808..0919a5f59f72 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1020,6 +1020,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, strlcpy(last_unloaded_module, mod->name, sizeof(last_unloaded_module)); free_module(mod); + /* someone could wait for the module in add_unformed_module() */ + wake_up_all(&module_wq); return 0; out: mutex_unlock(&module_mutex); @@ -1728,6 +1730,8 @@ static int module_add_modinfo_attrs(struct module *mod) error_out: if (i > 0) module_remove_modinfo_attrs(mod, --i); + else + kfree(mod->modinfo_attrs); return error; } diff --git a/kernel/padata.c b/kernel/padata.c index 87540ce72aea..528a251217df 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -34,6 +34,8 @@ #define MAX_OBJ_NUM 1000 +static void padata_free_pd(struct parallel_data *pd); + static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) { int cpu, target_cpu; @@ -292,6 +294,7 @@ static void padata_serial_worker(struct work_struct *serial_work) struct padata_serial_queue *squeue; struct parallel_data *pd; LIST_HEAD(local_list); + int cnt; local_bh_disable(); squeue = container_of(serial_work, struct padata_serial_queue, work); @@ -301,6 +304,8 @@ static void padata_serial_worker(struct work_struct *serial_work) list_replace_init(&squeue->serial.list, &local_list); spin_unlock(&squeue->serial.lock); + cnt = 0; + while (!list_empty(&local_list)) { struct padata_priv *padata; @@ -310,9 +315,12 @@ static void padata_serial_worker(struct work_struct *serial_work) list_del_init(&padata->list); padata->serial(padata); - atomic_dec(&pd->refcnt); + cnt++; } local_bh_enable(); + + if (atomic_sub_and_test(cnt, &pd->refcnt)) + padata_free_pd(pd); } /** @@ -435,7 +443,7 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd); atomic_set(&pd->seq_nr, -1); atomic_set(&pd->reorder_objects, 0); - atomic_set(&pd->refcnt, 0); + atomic_set(&pd->refcnt, 1); pd->pinst = pinst; spin_lock_init(&pd->lock); @@ -460,31 +468,6 @@ static void padata_free_pd(struct parallel_data *pd) kfree(pd); } -/* Flush all objects out of the padata queues. */ -static void padata_flush_queues(struct parallel_data *pd) -{ - int cpu; - struct padata_parallel_queue *pqueue; - struct padata_serial_queue *squeue; - - for_each_cpu(cpu, pd->cpumask.pcpu) { - pqueue = per_cpu_ptr(pd->pqueue, cpu); - flush_work(&pqueue->work); - } - - del_timer_sync(&pd->timer); - - if (atomic_read(&pd->reorder_objects)) - padata_reorder(pd); - - for_each_cpu(cpu, pd->cpumask.cbcpu) { - squeue = per_cpu_ptr(pd->squeue, cpu); - flush_work(&squeue->work); - } - - BUG_ON(atomic_read(&pd->refcnt) != 0); -} - static void __padata_start(struct padata_instance *pinst) { pinst->flags |= PADATA_INIT; @@ -498,10 +481,6 @@ static void __padata_stop(struct padata_instance *pinst) pinst->flags &= ~PADATA_INIT; synchronize_rcu(); - - get_online_cpus(); - padata_flush_queues(pinst->pd); - put_online_cpus(); } /* Replace the internal control structure with a new one. */ @@ -522,8 +501,8 @@ static void padata_replace(struct padata_instance *pinst, if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu)) notification_mask |= PADATA_CPU_SERIAL; - padata_flush_queues(pd_old); - padata_free_pd(pd_old); + if (atomic_dec_and_test(&pd_old->refcnt)) + padata_free_pd(pd_old); if (notification_mask) blocking_notifier_call_chain(&pinst->cpumask_change_notifier, diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 0972a8e09d08..ff2aabb70de9 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -734,8 +734,15 @@ zone_found: * We have found the zone. Now walk the radix tree to find the leaf node * for our PFN. */ + + /* + * If the zone we wish to scan is the the current zone and the + * pfn falls into the current node then we do not need to walk + * the tree. + */ node = bm->cur.node; - if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) + if (zone == bm->cur.zone && + ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) goto node_found; node = zone->rtree; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 9e861d59a01d..57932647be7c 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -487,6 +487,7 @@ static u32 clear_idx; /* record buffer */ #define LOG_ALIGN __alignof__(struct printk_log) #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) +#define LOG_BUF_LEN_MAX (u32)(1 << 31) static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; @@ -1261,19 +1262,28 @@ void log_buf_vmcoreinfo_setup(void) static unsigned long __initdata new_log_buf_len; /* we practice scaling the ring buffer by powers of 2 */ -static void __init log_buf_len_update(unsigned size) +static void __init log_buf_len_update(u64 size) { + if (size > (u64)LOG_BUF_LEN_MAX) { + size = (u64)LOG_BUF_LEN_MAX; + pr_err("log_buf over 2G is not supported.\n"); + } + if (size) size = roundup_pow_of_two(size); if (size > log_buf_len) - new_log_buf_len = size; + new_log_buf_len = (unsigned long)size; } /* save requested log_buf_len since it's too early to process it */ static int __init log_buf_len_setup(char *str) { - unsigned int size = memparse(str, &str); + u64 size; + if (!str) + return -EINVAL; + + size = memparse(str, &str); log_buf_len_update(size); return 0; @@ -1317,7 +1327,7 @@ void __init setup_log_buf(int early) { unsigned long flags; char *new_log_buf; - int free; + unsigned int free; if (log_buf != __log_buf) return; @@ -1337,7 +1347,7 @@ void __init setup_log_buf(int early) } if (unlikely(!new_log_buf)) { - pr_err("log_buf_len: %ld bytes not available\n", + pr_err("log_buf_len: %lu bytes not available\n", new_log_buf_len); return; } @@ -1350,8 +1360,8 @@ void __init setup_log_buf(int early) memcpy(log_buf, __log_buf, __LOG_BUF_LEN); logbuf_unlock_irqrestore(flags); - pr_info("log_buf_len: %d bytes\n", log_buf_len); - pr_info("early log buf free: %d(%d%%)\n", + pr_info("log_buf_len: %u bytes\n", log_buf_len); + pr_info("early log buf free: %u(%u%%)\n", free, (free * 100) / __LOG_BUF_LEN); } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 09fb3f58a838..43a283041296 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -258,12 +258,17 @@ static int ptrace_check_attach(struct task_struct *child, bool ignore_state) return ret; } -static int ptrace_has_cap(struct user_namespace *ns, unsigned int mode) +static bool ptrace_has_cap(const struct cred *cred, struct user_namespace *ns, + unsigned int mode) { + int ret; + if (mode & PTRACE_MODE_NOAUDIT) - return has_ns_capability_noaudit(current, ns, CAP_SYS_PTRACE); + ret = security_capable(cred, ns, CAP_SYS_PTRACE); else - return has_ns_capability(current, ns, CAP_SYS_PTRACE); + ret = security_capable(cred, ns, CAP_SYS_PTRACE); + + return ret == 0; } /* Returns 0 on success, -errno on denial. */ @@ -315,7 +320,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) gid_eq(caller_gid, tcred->sgid) && gid_eq(caller_gid, tcred->gid)) goto ok; - if (ptrace_has_cap(tcred->user_ns, mode)) + if (ptrace_has_cap(cred, tcred->user_ns, mode)) goto ok; rcu_read_unlock(); return -EPERM; @@ -334,7 +339,7 @@ ok: mm = task->mm; if (mm && ((get_dumpable(mm) != SUID_DUMP_USER) && - !ptrace_has_cap(mm->user_ns, mode))) + !ptrace_has_cap(cred, mm->user_ns, mode))) return -EPERM; return security_ptrace_access_check(task, mode); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 01ab898d0d5f..f6b48d68ee4d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -19,12 +19,14 @@ #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -452,10 +454,11 @@ void wake_q_add(struct wake_q_head *head, struct task_struct *task) * its already queued (either by us or someone else) and will get the * wakeup due to that. * - * This cmpxchg() implies a full barrier, which pairs with the write - * barrier implied by the wakeup in wake_up_q(). + * In order to ensure that a pending wakeup will observe our pending + * state, even in the failed case, an explicit smp_mb() must be used. */ - if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) + smp_mb__before_atomic(); + if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)) return; head->count++; @@ -3881,6 +3884,7 @@ static inline void prepare_task_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { + kcov_prepare_switch(prev); sched_info_switch(rq, prev, next); perf_event_task_sched_out(prev, next); hook_ca_context_switch(rq, prev, next); @@ -3959,6 +3963,7 @@ static struct rq *finish_task_switch(struct task_struct *prev) smp_mb__after_unlock_lock(); finish_lock_switch(rq, prev); finish_arch_post_lock_switch(); + kcov_finish_switch(current); fire_sched_in_preempt_notifiers(current); if (mm) @@ -6619,10 +6624,11 @@ void init_idle(struct task_struct *idle, int cpu) struct rq *rq = cpu_rq(cpu); unsigned long flags; + __sched_fork(0, idle); + raw_spin_lock_irqsave(&idle->pi_lock, flags); raw_spin_lock(&rq->lock); - __sched_fork(0, idle); idle->state = TASK_RUNNING; idle->se.exec_start = sched_clock(); idle->flags |= PF_IDLE; diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index d2d874fc28e9..196de0dfd455 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -760,7 +760,7 @@ void vtime_account_system(struct task_struct *tsk) write_seqcount_begin(&vtime->seqcount); /* We might have scheduled out from guest path */ - if (current->flags & PF_VCPU) + if (tsk->flags & PF_VCPU) vtime_account_guest(tsk, vtime); else __vtime_account_system(tsk, vtime); @@ -803,7 +803,7 @@ void vtime_guest_enter(struct task_struct *tsk) */ write_seqcount_begin(&vtime->seqcount); __vtime_account_system(tsk, vtime); - current->flags |= PF_VCPU; + tsk->flags |= PF_VCPU; write_seqcount_end(&vtime->seqcount); } EXPORT_SYMBOL_GPL(vtime_guest_enter); @@ -814,7 +814,7 @@ void vtime_guest_exit(struct task_struct *tsk) write_seqcount_begin(&vtime->seqcount); vtime_account_guest(tsk, vtime); - current->flags &= ~PF_VCPU; + tsk->flags &= ~PF_VCPU; write_seqcount_end(&vtime->seqcount); } EXPORT_SYMBOL_GPL(vtime_guest_exit); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bba99325a76e..7762fbc4fd58 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4409,23 +4409,16 @@ static inline u64 sched_cfs_bandwidth_slice(void) } /* - * Replenish runtime according to assigned quota and update expiration time. - * We use sched_clock_cpu directly instead of rq->clock to avoid adding - * additional synchronization around rq->lock. + * Replenish runtime according to assigned quota. We use sched_clock_cpu + * directly instead of rq->clock to avoid adding additional synchronization + * around rq->lock. * * requires cfs_b->lock */ void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) { - u64 now; - - if (cfs_b->quota == RUNTIME_INF) - return; - - now = sched_clock_cpu(smp_processor_id()); - cfs_b->runtime = cfs_b->quota; - cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period); - cfs_b->expires_seq++; + if (cfs_b->quota != RUNTIME_INF) + cfs_b->runtime = cfs_b->quota; } static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg) @@ -4447,8 +4440,7 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) { struct task_group *tg = cfs_rq->tg; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); - u64 amount = 0, min_amount, expires; - int expires_seq; + u64 amount = 0, min_amount; /* note: this is a positive sum as runtime_remaining <= 0 */ min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining; @@ -4465,61 +4457,17 @@ static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq) cfs_b->idle = 0; } } - expires_seq = cfs_b->expires_seq; - expires = cfs_b->runtime_expires; raw_spin_unlock(&cfs_b->lock); cfs_rq->runtime_remaining += amount; - /* - * we may have advanced our local expiration to account for allowed - * spread between our sched_clock and the one on which runtime was - * issued. - */ - if (cfs_rq->expires_seq != expires_seq) { - cfs_rq->expires_seq = expires_seq; - cfs_rq->runtime_expires = expires; - } return cfs_rq->runtime_remaining > 0; } -/* - * Note: This depends on the synchronization provided by sched_clock and the - * fact that rq->clock snapshots this value. - */ -static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq) -{ - struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); - - /* if the deadline is ahead of our clock, nothing to do */ - if (likely((s64)(rq_clock(rq_of(cfs_rq)) - cfs_rq->runtime_expires) < 0)) - return; - - if (cfs_rq->runtime_remaining < 0) - return; - - /* - * If the local deadline has passed we have to consider the - * possibility that our sched_clock is 'fast' and the global deadline - * has not truly expired. - * - * Fortunately we can check determine whether this the case by checking - * whether the global deadline(cfs_b->expires_seq) has advanced. - */ - if (cfs_rq->expires_seq == cfs_b->expires_seq) { - /* extend local deadline, drift is bounded above by 2 ticks */ - cfs_rq->runtime_expires += TICK_NSEC; - } else { - /* global deadline is ahead, expiration has passed */ - cfs_rq->runtime_remaining = 0; - } -} - static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) { /* dock delta_exec before expiring quota (as it could span periods) */ cfs_rq->runtime_remaining -= delta_exec; - expire_cfs_rq_runtime(cfs_rq); if (likely(cfs_rq->runtime_remaining > 0)) return; @@ -4738,8 +4686,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) resched_curr(rq); } -static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, - u64 remaining, u64 expires) +static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, u64 remaining) { struct cfs_rq *cfs_rq; u64 runtime; @@ -4764,7 +4711,6 @@ static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b, remaining -= runtime; cfs_rq->runtime_remaining += runtime; - cfs_rq->runtime_expires = expires; /* we check whether we're throttled above */ if (cfs_rq->runtime_remaining > 0) @@ -4789,7 +4735,7 @@ next: */ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) { - u64 runtime, runtime_expires; + u64 runtime; int throttled; /* no need to continue the timer with no bandwidth constraint */ @@ -4817,8 +4763,6 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) /* account preceding periods in which throttling occurred */ cfs_b->nr_throttled += overrun; - runtime_expires = cfs_b->runtime_expires; - /* * This check is repeated as we are holding onto the new bandwidth while * we unthrottle. This can potentially race with an unthrottled group @@ -4831,8 +4775,7 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) cfs_b->distribute_running = 1; raw_spin_unlock(&cfs_b->lock); /* we can't nest cfs_b->lock while distributing bandwidth */ - runtime = distribute_cfs_runtime(cfs_b, runtime, - runtime_expires); + runtime = distribute_cfs_runtime(cfs_b, runtime); raw_spin_lock(&cfs_b->lock); cfs_b->distribute_running = 0; @@ -4909,8 +4852,7 @@ static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq) return; raw_spin_lock(&cfs_b->lock); - if (cfs_b->quota != RUNTIME_INF && - cfs_rq->runtime_expires == cfs_b->runtime_expires) { + if (cfs_b->quota != RUNTIME_INF) { cfs_b->runtime += slack_runtime; /* we are under rq->lock, defer unthrottling using a timer */ @@ -4942,7 +4884,6 @@ static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) { u64 runtime = 0, slice = sched_cfs_bandwidth_slice(); - u64 expires; /* confirm we're still not at a refresh boundary */ raw_spin_lock(&cfs_b->lock); @@ -4959,7 +4900,6 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice) runtime = cfs_b->runtime; - expires = cfs_b->runtime_expires; if (runtime) cfs_b->distribute_running = 1; @@ -4968,11 +4908,10 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) if (!runtime) return; - runtime = distribute_cfs_runtime(cfs_b, runtime, expires); + runtime = distribute_cfs_runtime(cfs_b, runtime); raw_spin_lock(&cfs_b->lock); - if (expires == cfs_b->runtime_expires) - cfs_b->runtime -= min(runtime, cfs_b->runtime); + cfs_b->runtime -= min(runtime, cfs_b->runtime); cfs_b->distribute_running = 0; raw_spin_unlock(&cfs_b->lock); } @@ -5067,20 +5006,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) if (++count > 3) { u64 new, old = ktime_to_ns(cfs_b->period); - new = (old * 147) / 128; /* ~115% */ - new = min(new, max_cfs_quota_period); + /* + * Grow period by a factor of 2 to avoid losing precision. + * Precision loss in the quota/period ratio can cause __cfs_schedulable + * to fail. + */ + new = old * 2; + if (new < max_cfs_quota_period) { + cfs_b->period = ns_to_ktime(new); + cfs_b->quota *= 2; - cfs_b->period = ns_to_ktime(new); - - /* since max is 1s, this is limited to 1e9^2, which fits in u64 */ - cfs_b->quota *= new; - cfs_b->quota = div64_u64(cfs_b->quota, old); - - pr_warn_ratelimited( - "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n", - smp_processor_id(), - div_u64(new, NSEC_PER_USEC), - div_u64(cfs_b->quota, NSEC_PER_USEC)); + pr_warn_ratelimited( + "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us = %lld, cfs_quota_us = %lld)\n", + smp_processor_id(), + div_u64(new, NSEC_PER_USEC), + div_u64(cfs_b->quota, NSEC_PER_USEC)); + } else { + pr_warn_ratelimited( + "cfs_period_timer[cpu%d]: period too short, but cannot scale up without losing precision (cfs_period_us = %lld, cfs_quota_us = %lld)\n", + smp_processor_id(), + div_u64(old, NSEC_PER_USEC), + div_u64(cfs_b->quota, NSEC_PER_USEC)); + } /* reset count so we don't come right back in here */ count = 0; @@ -11199,13 +11146,22 @@ out_all_pinned: sd->nr_balance_failed = 0; out_one_pinned: + ld_moved = 0; + + /* + * idle_balance() disregards balance intervals, so we could repeatedly + * reach this code, which would lead to balance_interval skyrocketting + * in a short amount of time. Skip the balance_interval increase logic + * to avoid that. + */ + if (env.idle == CPU_NEWLY_IDLE) + goto out; + /* tune up the balancing interval */ if (((env.flags & LBF_ALL_PINNED) && sd->balance_interval < MAX_PINNED_INTERVAL) || (sd->balance_interval < sd->max_interval)) sd->balance_interval *= 2; - - ld_moved = 0; out: return ld_moved; } diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index b2deee9a913c..e451b50d3ceb 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -186,7 +186,8 @@ static void group_init(struct psi_group *group) for_each_possible_cpu(cpu) seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq); - group->avg_next_update = sched_clock() + psi_period; + group->avg_last_update = sched_clock(); + group->avg_next_update = group->avg_last_update + psi_period; INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work); mutex_init(&group->avgs_lock); /* Init trigger-related members */ @@ -482,7 +483,7 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value) u32 remaining; remaining = win->size - elapsed; - growth += div_u64(win->prev_growth * remaining, win->size); + growth += div64_u64(win->prev_growth * remaining, win->size); } return growth; @@ -1211,7 +1212,10 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, if (static_branch_likely(&psi_disabled)) return -EOPNOTSUPP; - buf_size = min(nbytes, (sizeof(buf) - 1)); + if (!nbytes) + return -EINVAL; + + buf_size = min(nbytes, sizeof(buf)); if (copy_from_user(buf, user_buf, buf_size)) return -EFAULT; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ca9c64ab3b64..eab32f244e6b 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -282,8 +282,6 @@ struct cfs_bandwidth { ktime_t period; u64 quota, runtime; s64 hierarchical_quota; - u64 runtime_expires; - int expires_seq; short idle, period_active; struct hrtimer period_timer, slack_timer; @@ -496,8 +494,6 @@ struct cfs_rq { #ifdef CONFIG_CFS_BANDWIDTH int runtime_enabled; - int expires_seq; - u64 runtime_expires; s64 runtime_remaining; u64 throttled_clock, throttled_clock_task; diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 1603b6a59f20..061ffd5a6ebb 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1534,7 +1534,7 @@ void sched_init_numa(void) int level = 0; int i, j, k; - sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL); + sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL); if (!sched_domains_numa_distance) return; diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c index ddaed5065cf0..aca061cbf162 100644 --- a/kernel/sched/tune.c +++ b/kernel/sched/tune.c @@ -97,9 +97,9 @@ root_schedtune = { * value */ #ifdef CONFIG_MTK_IO_BOOST -#define BOOSTGROUPS_COUNT 6 +#define BOOSTGROUPS_COUNT 7 #else -#define BOOSTGROUPS_COUNT 5 +#define BOOSTGROUPS_COUNT 6 #endif /* Array of configured boostgroups */ diff --git a/kernel/scs.c b/kernel/scs.c new file mode 100644 index 000000000000..ad74d13f2c0f --- /dev/null +++ b/kernel/scs.c @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Shadow Call Stack support. + * + * Copyright (C) 2019 Google LLC + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline void *__scs_base(struct task_struct *tsk) +{ + /* + * To minimize risk the of exposure, architectures may clear a + * task's thread_info::shadow_call_stack while that task is + * running, and only save/restore the active shadow call stack + * pointer when the usual register may be clobbered (e.g. across + * context switches). + * + * The shadow call stack is aligned to SCS_SIZE, and grows + * upwards, so we can mask out the low bits to extract the base + * when the task is not running. + */ + return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1)); +} + +static inline unsigned long *scs_magic(void *s) +{ + return (unsigned long *)(s + SCS_SIZE) - 1; +} + +static inline void scs_set_magic(void *s) +{ + *scs_magic(s) = SCS_END_MAGIC; +} + +#ifdef CONFIG_SHADOW_CALL_STACK_VMAP + +/* Matches NR_CACHED_STACKS for VMAP_STACK */ +#define NR_CACHED_SCS 2 +static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]); + +static void *scs_alloc(int node) +{ + int i; + void *s; + + for (i = 0; i < NR_CACHED_SCS; i++) { + s = this_cpu_xchg(scs_cache[i], NULL); + if (s) { + memset(s, 0, SCS_SIZE); + goto out; + } + } + + /* + * We allocate a full page for the shadow stack, which should be + * more than we need. Check the assumption nevertheless. + */ + BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE); + + s = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE, + VMALLOC_START, VMALLOC_END, + GFP_SCS, PAGE_KERNEL, 0, + node, __builtin_return_address(0)); + +out: + if (s) + scs_set_magic(s); + /* TODO: poison for KASAN, unpoison in scs_free */ + + return s; +} + +static void scs_free(void *s) +{ + int i; + + for (i = 0; i < NR_CACHED_SCS; i++) + if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL) + return; + + vfree_atomic(s); +} + +static struct page *__scs_page(struct task_struct *tsk) +{ + return vmalloc_to_page(__scs_base(tsk)); +} + +static int scs_cleanup(unsigned int cpu) +{ + int i; + void **cache = per_cpu_ptr(scs_cache, cpu); + + for (i = 0; i < NR_CACHED_SCS; i++) { + vfree(cache[i]); + cache[i] = NULL; + } + + return 0; +} + +void __init scs_init(void) +{ + WARN_ON(cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL, + scs_cleanup) < 0); +} + +#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */ + +static struct kmem_cache *scs_cache; + +static inline void *scs_alloc(int node) +{ + void *s; + + s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node); + if (s) { + scs_set_magic(s); + /* + * Poison the allocation to catch unintentional accesses to + * the shadow stack when KASAN is enabled. + */ + kasan_poison_object_data(scs_cache, s); + } + + return s; +} + +static inline void scs_free(void *s) +{ + kasan_unpoison_object_data(scs_cache, s); + kmem_cache_free(scs_cache, s); +} + +static struct page *__scs_page(struct task_struct *tsk) +{ + return virt_to_page(__scs_base(tsk)); +} + +void __init scs_init(void) +{ + scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE, + 0, NULL); + WARN_ON(!scs_cache); +} + +#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */ + +void scs_task_reset(struct task_struct *tsk) +{ + /* + * Reset the shadow stack to the base address in case the task + * is reused. + */ + task_set_scs(tsk, __scs_base(tsk)); +} + +static void scs_account(struct task_struct *tsk, int account) +{ + mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES, + account * SCS_SIZE); +} + +int scs_prepare(struct task_struct *tsk, int node) +{ + void *s; + + s = scs_alloc(node); + if (!s) + return -ENOMEM; + + task_set_scs(tsk, s); + scs_account(tsk, 1); + + return 0; +} + +#ifdef CONFIG_DEBUG_STACK_USAGE +static inline unsigned long scs_used(struct task_struct *tsk) +{ + unsigned long *p = __scs_base(tsk); + unsigned long *end = scs_magic(p); + unsigned long s = (unsigned long)p; + + while (p < end && READ_ONCE_NOCHECK(*p)) + p++; + + return (unsigned long)p - s; +} + +static void scs_check_usage(struct task_struct *tsk) +{ + static DEFINE_SPINLOCK(lock); + static unsigned long highest; + unsigned long used = scs_used(tsk); + + if (used <= highest) + return; + + spin_lock(&lock); + + if (used > highest) { + pr_info("%s (%d): highest shadow stack usage: %lu bytes\n", + tsk->comm, task_pid_nr(tsk), used); + highest = used; + } + + spin_unlock(&lock); +} +#else +static inline void scs_check_usage(struct task_struct *tsk) +{ +} +#endif + +bool scs_corrupted(struct task_struct *tsk) +{ + unsigned long *magic = scs_magic(__scs_base(tsk)); + + return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC; +} + +void scs_release(struct task_struct *tsk) +{ + void *s; + + s = __scs_base(tsk); + if (!s) + return; + + WARN_ON(scs_corrupted(tsk)); + scs_check_usage(tsk); + + scs_account(tsk, -1); + task_set_scs(tsk, NULL); + scs_free(s); +} diff --git a/kernel/signal.c b/kernel/signal.c index 2b123fd32810..a0740a9efe43 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -79,10 +79,19 @@ static int sig_task_ignored(struct task_struct *t, int sig, bool force) handler = sig_handler(t, sig); + /* SIGKILL and SIGSTOP may not be sent to the global init */ + if (unlikely(is_global_init(t) && sig_kernel_only(sig))) + return true; + if (unlikely(t->signal->flags & SIGNAL_UNKILLABLE) && handler == SIG_DFL && !(force && sig_kernel_only(sig))) return 1; + /* Only allow kernel generated signals to this kthread */ + if (unlikely((t->flags & PF_KTHREAD) && + (handler == SIG_KTHREAD_KERNEL) && !force)) + return true; + return sig_handler_ignored(handler, sig); } @@ -372,27 +381,32 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi { struct sigqueue *q = NULL; struct user_struct *user; + int sigpending; /* * Protect access to @t credentials. This can go away when all * callers hold rcu read lock. + * + * NOTE! A pending signal will hold on to the user refcount, + * and we get/put the refcount only when the sigpending count + * changes from/to zero. */ rcu_read_lock(); - user = get_uid(__task_cred(t)->user); - atomic_inc(&user->sigpending); + user = __task_cred(t)->user; + sigpending = atomic_inc_return(&user->sigpending); + if (sigpending == 1) + get_uid(user); rcu_read_unlock(); - if (override_rlimit || - atomic_read(&user->sigpending) <= - task_rlimit(t, RLIMIT_SIGPENDING)) { + if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { q = kmem_cache_alloc(sigqueue_cachep, flags); } else { print_dropped_signal(sig); } if (unlikely(q == NULL)) { - atomic_dec(&user->sigpending); - free_uid(user); + if (atomic_dec_and_test(&user->sigpending)) + free_uid(user); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; @@ -406,8 +420,8 @@ static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) return; - atomic_dec(&q->user->sigpending); - free_uid(q->user); + if (atomic_dec_and_test(&q->user->sigpending)) + free_uid(q->user); kmem_cache_free(sigqueue_cachep, q); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7004f9a9e30d..c37bdfe479ae 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1501,7 +1501,7 @@ static struct ctl_table vm_table[] = { .procname = "drop_caches", .data = &sysctl_drop_caches, .maxlen = sizeof(int), - .mode = 0644, + .mode = 0200, .proc_handler = drop_caches_sysctl_handler, .extra1 = &one, .extra2 = &four, diff --git a/kernel/taskstats.c b/kernel/taskstats.c index 4559e914452b..390c76d4503c 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -568,25 +568,33 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; - struct taskstats *stats; + struct taskstats *stats_new, *stats; - if (sig->stats || thread_group_empty(tsk)) - goto ret; + /* Pairs with smp_store_release() below. */ + stats = smp_load_acquire(&sig->stats); + if (stats || thread_group_empty(tsk)) + return stats; /* No problem if kmem_cache_zalloc() fails */ - stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); + stats_new = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); spin_lock_irq(&tsk->sighand->siglock); - if (!sig->stats) { - sig->stats = stats; - stats = NULL; + stats = sig->stats; + if (!stats) { + /* + * Pairs with smp_store_release() above and order the + * kmem_cache_zalloc(). + */ + smp_store_release(&sig->stats, stats_new); + stats = stats_new; + stats_new = NULL; } spin_unlock_irq(&tsk->sighand->siglock); - if (stats) - kmem_cache_free(taskstats_cache, stats); -ret: - return sig->stats; + if (stats_new) + kmem_cache_free(taskstats_cache, stats_new); + + return stats; } /* Send pid data out on exit */ diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index ed3e0686131a..a0068c1a1376 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -91,6 +91,7 @@ static int alarmtimer_rtc_add_device(struct device *dev, unsigned long flags; struct rtc_device *rtc = to_rtc_device(dev); struct wakeup_source *__ws; + int ret = 0; if (rtcdev) return -EBUSY; @@ -105,8 +106,8 @@ static int alarmtimer_rtc_add_device(struct device *dev, spin_lock_irqsave(&rtcdev_lock, flags); if (!rtcdev) { if (!try_module_get(rtc->owner)) { - spin_unlock_irqrestore(&rtcdev_lock, flags); - return -1; + ret = -1; + goto unlock; } rtcdev = rtc; @@ -115,11 +116,12 @@ static int alarmtimer_rtc_add_device(struct device *dev, ws = __ws; __ws = NULL; } +unlock: spin_unlock_irqrestore(&rtcdev_lock, flags); wakeup_source_unregister(__ws); - return 0; + return ret; } static inline void alarmtimer_rtc_timer_init(void) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 3b71d859ee38..825d24df921a 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -280,8 +280,15 @@ static void clocksource_watchdog(unsigned long data) next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); if (next_cpu >= nr_cpu_ids) next_cpu = cpumask_first(cpu_online_mask); - watchdog_timer.expires += WATCHDOG_INTERVAL; - add_timer_on(&watchdog_timer, next_cpu); + + /* + * Arm timer if not already pending: could race with concurrent + * pair clocksource_stop_watchdog() clocksource_start_watchdog(). + */ + if (!timer_pending(&watchdog_timer)) { + watchdog_timer.expires += WATCHDOG_INTERVAL; + add_timer_on(&watchdog_timer, next_cpu); + } out: spin_unlock(&watchdog_lock); } diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 49ea81c230a4..c9a16a8b7ed4 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -875,11 +875,14 @@ EXPORT_SYMBOL_GPL(hrtimer_forward); static int enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base) { + u8 state = timer->state; + debug_activate(timer); base->cpu_base->active_bases |= 1 << base->index; - timer->state |= HRTIMER_STATE_ENQUEUED; + /* Pairs with the lockless read in hrtimer_is_queued() */ + WRITE_ONCE(timer->state, state | HRTIMER_STATE_ENQUEUED); return timerqueue_add(&base->active, &timer->node); } @@ -899,8 +902,9 @@ static void __remove_hrtimer(struct hrtimer *timer, u8 newstate, int reprogram) { struct hrtimer_cpu_base *cpu_base = base->cpu_base; + u8 state = timer->state; - if (!(timer->state & HRTIMER_STATE_ENQUEUED)) + if (!(state & HRTIMER_STATE_ENQUEUED)) goto out; if (!timerqueue_del(&base->active, &timer->node)) @@ -924,7 +928,8 @@ out: * We need to preserve PINNED state here, otherwise we may end up * migrating pinned hrtimers as well. */ - timer->state = newstate | (timer->state & HRTIMER_STATE_PINNED); + /* Pairs with the lockless read in hrtimer_is_queued() */ + WRITE_ONCE(timer->state, newstate | (state & HRTIMER_STATE_PINNED)); } /* @@ -933,8 +938,9 @@ out: static inline int remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart) { - if (hrtimer_is_queued(timer)) { - u8 state = timer->state; + u8 state = timer->state; + + if (state & HRTIMER_STATE_ENQUEUED) { int reprogram; /* diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 17cdc554c9fe..e5706a826c1f 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -27,8 +27,6 @@ #include "posix-timers.h" -static void delete_clock(struct kref *kref); - /* * Returns NULL if the posix_clock instance attached to 'fp' is old and stale. */ @@ -138,7 +136,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) err = 0; if (!err) { - kref_get(&clk->kref); + get_device(clk->dev); fp->private_data = clk; } out: @@ -154,7 +152,7 @@ static int posix_clock_release(struct inode *inode, struct file *fp) if (clk->ops.release) err = clk->ops.release(clk); - kref_put(&clk->kref, delete_clock); + put_device(clk->dev); fp->private_data = NULL; @@ -174,38 +172,35 @@ static const struct file_operations posix_clock_file_operations = { #endif }; -int posix_clock_register(struct posix_clock *clk, dev_t devid) +int posix_clock_register(struct posix_clock *clk, struct device *dev) { int err; - kref_init(&clk->kref); init_rwsem(&clk->rwsem); cdev_init(&clk->cdev, &posix_clock_file_operations); + err = cdev_device_add(&clk->cdev, dev); + if (err) { + pr_err("%s unable to add device %d:%d\n", + dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt)); + return err; + } clk->cdev.owner = clk->ops.owner; - err = cdev_add(&clk->cdev, devid, 1); + clk->dev = dev; - return err; + return 0; } EXPORT_SYMBOL_GPL(posix_clock_register); -static void delete_clock(struct kref *kref) -{ - struct posix_clock *clk = container_of(kref, struct posix_clock, kref); - - if (clk->release) - clk->release(clk); -} - void posix_clock_unregister(struct posix_clock *clk) { - cdev_del(&clk->cdev); + cdev_device_del(&clk->cdev, clk->dev); down_write(&clk->rwsem); clk->zombie = true; up_write(&clk->rwsem); - kref_put(&clk->kref, delete_clock); + put_device(clk->dev); } EXPORT_SYMBOL_GPL(posix_clock_unregister); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f0c53b815ce9..710505b026dd 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -60,8 +60,9 @@ static void tick_do_update_jiffies64(ktime_t now) /* * Do a quick check without holding jiffies_lock: + * The READ_ONCE() pairs with two updates done later in this function. */ - delta = ktime_sub(now, last_jiffies_update); + delta = ktime_sub(now, READ_ONCE(last_jiffies_update)); if (delta < tick_period) return; @@ -72,8 +73,9 @@ static void tick_do_update_jiffies64(ktime_t now) if (delta >= tick_period) { delta = ktime_sub(delta, tick_period); - last_jiffies_update = ktime_add(last_jiffies_update, - tick_period); + /* Pairs with the lockless read in this function. */ + WRITE_ONCE(last_jiffies_update, + ktime_add(last_jiffies_update, tick_period)); /* Slow path for long timeouts */ if (unlikely(delta >= tick_period)) { @@ -81,8 +83,10 @@ static void tick_do_update_jiffies64(ktime_t now) ticks = ktime_divns(delta, incr); - last_jiffies_update = ktime_add_ns(last_jiffies_update, - incr * ticks); + /* Pairs with the lockless read in this function. */ + WRITE_ONCE(last_jiffies_update, + ktime_add_ns(last_jiffies_update, + incr * ticks)); } do_timer(++ticks); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 6cc3941f4435..06ba3bb58301 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -94,7 +94,6 @@ config RING_BUFFER_ALLOW_SWAP config TRACING bool - select DEBUG_FS select RING_BUFFER select STACKTRACE if STACKTRACE_SUPPORT select TRACEPOINTS diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f9dd8fd055a6..7a3df66056de 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -17,7 +17,7 @@ /** * trace_call_bpf - invoke BPF program - * @prog: BPF program + * @call: tracepoint event * @ctx: opaque context pointer * * kprobe handlers execute BPF programs via this helper. @@ -29,7 +29,7 @@ * 1 - store kprobe event into ring buffer * Other values are reserved and currently alias to 1 */ -unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { unsigned int ret; @@ -49,9 +49,22 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) goto out; } - rcu_read_lock(); - ret = BPF_PROG_RUN(prog, ctx); - rcu_read_unlock(); + /* + * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock + * to all call sites, we did a bpf_prog_array_valid() there to check + * whether call->prog_array is empty or not, which is + * a heurisitc to speed up execution. + * + * If bpf_prog_array_valid() fetched prog_array was + * non-NULL, we go into trace_call_bpf() and do the actual + * proper rcu_dereference() under RCU lock. + * If it turns out that prog_array is NULL then, we bail out. + * For the opposite, if the bpf_prog_array_valid() fetched pointer + * was NULL, you'll skip the prog_array with the risk of missing + * out of events when it was updated in between this and the + * rcu_dereference() which is accepted risk. + */ + ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN); out: __this_cpu_dec(bpf_prog_active); @@ -674,3 +687,62 @@ const struct bpf_verifier_ops perf_event_prog_ops = { .is_valid_access = pe_prog_is_valid_access, .convert_ctx_access = pe_prog_convert_ctx_access, }; + +static DEFINE_MUTEX(bpf_event_mutex); + +int perf_event_attach_bpf_prog(struct perf_event *event, + struct bpf_prog *prog) +{ + struct bpf_prog_array __rcu *old_array; + struct bpf_prog_array *new_array; + int ret = -EEXIST; + + mutex_lock(&bpf_event_mutex); + + if (event->prog) + goto out; + + old_array = rcu_dereference_protected(event->tp_event->prog_array, + lockdep_is_held(&bpf_event_mutex)); + ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array); + if (ret < 0) + goto out; + + /* set the new array to event->tp_event and set event->prog */ + event->prog = prog; + rcu_assign_pointer(event->tp_event->prog_array, new_array); + bpf_prog_array_free(old_array); + +out: + mutex_unlock(&bpf_event_mutex); + return ret; +} + +void perf_event_detach_bpf_prog(struct perf_event *event) +{ + struct bpf_prog_array __rcu *old_array; + struct bpf_prog_array *new_array; + int ret; + + mutex_lock(&bpf_event_mutex); + + if (!event->prog) + goto out; + + old_array = rcu_dereference_protected(event->tp_event->prog_array, + lockdep_is_held(&bpf_event_mutex)); + + ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array); + if (ret < 0) { + bpf_prog_array_delete_safe(old_array, event->prog); + } else { + rcu_assign_pointer(event->tp_event->prog_array, new_array); + bpf_prog_array_free(old_array); + } + + bpf_prog_put(event->prog); + event->prog = NULL; + +out: + mutex_unlock(&bpf_event_mutex); +} diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8905860a6714..5cc253f8fbf7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -623,8 +623,7 @@ static int function_stat_show(struct seq_file *m, void *v) } #ifdef CONFIG_FUNCTION_GRAPH_TRACER - avg = rec->time; - do_div(avg, rec->counter); + avg = div64_ul(rec->time, rec->counter); if (tracing_thresh && (avg < tracing_thresh)) goto out; #endif @@ -650,7 +649,8 @@ static int function_stat_show(struct seq_file *m, void *v) * Divide only 1000 for ns^2 -> us^2 conversion. * trace_print_graph_duration will divide 1000 again. */ - do_div(stddev, rec->counter * (rec->counter - 1) * 1000); + stddev = div64_ul(stddev, + rec->counter * (rec->counter - 1) * 1000); } trace_seq_init(&s); @@ -5147,8 +5147,8 @@ static const struct file_operations ftrace_notrace_fops = { static DEFINE_MUTEX(graph_lock); -struct ftrace_hash *ftrace_graph_hash = EMPTY_HASH; -struct ftrace_hash *ftrace_graph_notrace_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_hash = EMPTY_HASH; +struct ftrace_hash __rcu *ftrace_graph_notrace_hash = EMPTY_HASH; enum graph_filter_type { GRAPH_FILTER_NOTRACE = 0, @@ -5420,8 +5420,15 @@ ftrace_graph_release(struct inode *inode, struct file *file) mutex_unlock(&graph_lock); - /* Wait till all users are no longer using the old hash */ - synchronize_sched(); + /* + * We need to do a hard force of sched synchronization. + * This is because we use preempt_disable() to do RCU, but + * the function tracers can be called where RCU is not watching + * (like before user_exit()). We can not rely on the RCU + * infrastructure to do the synchronization, thus we must do it + * ourselves. + */ + schedule_on_each_cpu(ftrace_sync); free_ftrace_hash(old_hash); } @@ -6312,9 +6319,10 @@ static void *fpid_next(struct seq_file *m, void *v, loff_t *pos) struct trace_array *tr = m->private; struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids); - if (v == FTRACE_NO_PIDS) + if (v == FTRACE_NO_PIDS) { + (*pos)++; return NULL; - + } return trace_pid_next(pid_list, v, pos); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cd3e8e9ac239..5f52fc4b0ee3 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1567,6 +1567,7 @@ static __init int init_trace_selftests(void) pr_info("Running postponed tracer tests:\n"); + tracing_selftest_running = true; list_for_each_entry_safe(p, n, &postponed_selftests, list) { ret = run_tracer_selftest(p->type); /* If the test fails, then warn and remove from available_tracers */ @@ -1585,6 +1586,7 @@ static __init int init_trace_selftests(void) list_del(&p->list); kfree(p); } + tracing_selftest_running = false; out: mutex_unlock(&trace_types_lock); @@ -4409,6 +4411,10 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set) int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) { + if ((mask == TRACE_ITER_RECORD_TGID) || + (mask == TRACE_ITER_RECORD_CMD)) + lockdep_assert_held(&event_mutex); + /* do nothing if flag is already set */ if (!!(tr->trace_flags & mask) == !!enabled) return 0; @@ -4474,6 +4480,7 @@ static int trace_set_options(struct trace_array *tr, char *option) cmp += 2; } + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); for (i = 0; trace_options[i]; i++) { @@ -4488,6 +4495,7 @@ static int trace_set_options(struct trace_array *tr, char *option) ret = set_tracer_option(tr, cmp, neg); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); /* * If the first trailing whitespace is replaced with '\0' by strstrip, @@ -7432,9 +7440,11 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt, if (val != 0 && val != 1) return -EINVAL; + mutex_lock(&event_mutex); mutex_lock(&trace_types_lock); ret = set_tracer_flag(tr, 1 << index, val); mutex_unlock(&trace_types_lock); + mutex_unlock(&event_mutex); if (ret < 0) return ret; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index dbb212c40a41..c4c61ebb8d05 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -868,22 +868,31 @@ extern void __trace_graph_return(struct trace_array *tr, unsigned long flags, int pc); #ifdef CONFIG_DYNAMIC_FTRACE -extern struct ftrace_hash *ftrace_graph_hash; -extern struct ftrace_hash *ftrace_graph_notrace_hash; +extern struct ftrace_hash __rcu *ftrace_graph_hash; +extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; + struct ftrace_hash *hash; preempt_disable_notrace(); - if (ftrace_hash_empty(ftrace_graph_hash)) { + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) + */ + hash = rcu_dereference_protected(ftrace_graph_hash, !preemptible()); + + if (ftrace_hash_empty(hash)) { ret = 1; goto out; } - if (ftrace_lookup_ip(ftrace_graph_hash, addr)) { + if (ftrace_lookup_ip(hash, addr)) { /* * This needs to be cleared on the return functions @@ -919,10 +928,20 @@ static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) static inline int ftrace_graph_notrace_addr(unsigned long addr) { int ret = 0; + struct ftrace_hash *notrace_hash; preempt_disable_notrace(); - if (ftrace_lookup_ip(ftrace_graph_notrace_hash, addr)) + /* + * Have to open code "rcu_dereference_sched()" because the + * function graph tracer can be called when RCU is not + * "watching". + * Protected with schedule_on_each_cpu(ftrace_sync) + */ + notrace_hash = rcu_dereference_protected(ftrace_graph_notrace_hash, + !preemptible()); + + if (ftrace_lookup_ip(notrace_hash, addr)) ret = 1; preempt_enable_notrace(); diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 13ba2d3f6a91..80b7b194c181 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -7,6 +7,7 @@ #include #include +#include #include "trace.h" static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; @@ -24,8 +25,10 @@ static int total_ref_count; static int perf_trace_event_perm(struct trace_event_call *tp_event, struct perf_event *p_event) { + int ret; + if (tp_event->perf_perm) { - int ret = tp_event->perf_perm(tp_event, p_event); + ret = tp_event->perf_perm(tp_event, p_event); if (ret) return ret; } @@ -44,8 +47,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event, /* The ftrace function trace is allowed only for root. */ if (ftrace_event_is_function(tp_event)) { - if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return -EPERM; + ret = perf_allow_tracepoint(&p_event->attr); + if (ret) + return ret; if (!is_sampling_event(p_event)) return 0; @@ -80,8 +84,9 @@ static int perf_trace_event_perm(struct trace_event_call *tp_event, * ...otherwise raw tracepoint data can be a severe data leak, * only allow root to have these. */ - if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return -EPERM; + ret = perf_allow_tracepoint(&p_event->attr); + if (ret) + return ret; return 0; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 24e2a2593ec7..b7157dc95aa2 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -329,7 +329,8 @@ void trace_event_enable_cmd_record(bool enable) struct trace_event_file *file; struct trace_array *tr; - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); + do_for_each_event_file(tr, file) { if (!(file->flags & EVENT_FILE_FL_ENABLED)) @@ -343,7 +344,6 @@ void trace_event_enable_cmd_record(bool enable) clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags); } } while_for_each_event_file(); - mutex_unlock(&event_mutex); } void trace_event_enable_tgid_record(bool enable) @@ -351,7 +351,8 @@ void trace_event_enable_tgid_record(bool enable) struct trace_event_file *file; struct trace_array *tr; - mutex_lock(&event_mutex); + lockdep_assert_held(&event_mutex); + do_for_each_event_file(tr, file) { if (!(file->flags & EVENT_FILE_FL_ENABLED)) continue; @@ -365,7 +366,6 @@ void trace_event_enable_tgid_record(bool enable) &file->flags); } } while_for_each_event_file(); - mutex_unlock(&event_mutex); } static int __ftrace_event_enable_disable(struct trace_event_file *file, diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index e2da180ca172..31e91efe243e 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -127,9 +127,10 @@ static void *trigger_next(struct seq_file *m, void *t, loff_t *pos) { struct trace_event_file *event_file = event_file_data(m->private); - if (t == SHOW_AVAILABLE_TRIGGERS) + if (t == SHOW_AVAILABLE_TRIGGERS) { + (*pos)++; return NULL; - + } return seq_list_next(t, &event_file->triggers, pos); } diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index ea20274a105a..0c23b5615977 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1185,13 +1185,12 @@ static void kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; - struct bpf_prog *prog = call->prog; struct kprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; - if (prog && !trace_call_bpf(prog, regs)) + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) return; head = this_cpu_ptr(call->perf_events); @@ -1221,13 +1220,12 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; - struct bpf_prog *prog = call->prog; struct kretprobe_trace_entry_head *entry; struct hlist_head *head; int size, __size, dsize; int rctx; - if (prog && !trace_call_bpf(prog, regs)) + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) return; head = this_cpu_ptr(call->perf_events); diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index e288168661e1..e304196d7c28 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -89,8 +89,10 @@ static void tracing_sched_unregister(void) static void tracing_start_sched_switch(int ops) { - bool sched_register = (!sched_cmdline_ref && !sched_tgid_ref); + bool sched_register; + mutex_lock(&sched_register_mutex); + sched_register = (!sched_cmdline_ref && !sched_tgid_ref); switch (ops) { case RECORD_CMDLINE: diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 0fa9dadf3f4f..a5a4b5663163 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -640,7 +640,7 @@ static void start_wakeup_tracer(struct trace_array *tr) if (ret) { pr_info("wakeup trace: Couldn't activate tracepoint" " probe to kernel_sched_migrate_task\n"); - return; + goto fail_deprobe_sched_switch; } wakeup_reset(tr); @@ -658,6 +658,8 @@ static void start_wakeup_tracer(struct trace_array *tr) printk(KERN_ERR "failed to start wakeup tracer\n"); return; +fail_deprobe_sched_switch: + unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL); fail_deprobe_wake_new: unregister_trace_sched_wakeup_new(probe_wakeup, NULL); fail_deprobe: diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 719a52a4064a..6f9091f874a9 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -196,6 +196,11 @@ check_stack(unsigned long ip, unsigned long *stack) local_irq_restore(flags); } +/* Some archs may not define MCOUNT_INSN_SIZE */ +#ifndef MCOUNT_INSN_SIZE +# define MCOUNT_INSN_SIZE 0 +#endif + static void stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c index 75bf1bcb4a8a..92b76f9e25ed 100644 --- a/kernel/trace/trace_stat.c +++ b/kernel/trace/trace_stat.c @@ -278,18 +278,22 @@ static int tracing_stat_init(void) d_tracing = tracing_init_dentry(); if (IS_ERR(d_tracing)) - return 0; + return -ENODEV; stat_dir = tracefs_create_dir("trace_stat", d_tracing); - if (!stat_dir) + if (!stat_dir) { pr_warn("Could not create tracefs 'trace_stat' entry\n"); + return -ENOMEM; + } return 0; } static int init_stat_file(struct stat_session *session) { - if (!stat_dir && tracing_stat_init()) - return -ENODEV; + int ret; + + if (!stat_dir && (ret = tracing_stat_init())) + return ret; session->file = tracefs_create_file(session->ts->name, 0644, stat_dir, @@ -302,7 +306,7 @@ static int init_stat_file(struct stat_session *session) int register_stat_tracer(struct tracer_stat *trace) { struct stat_session *session, *node; - int ret; + int ret = -EINVAL; if (!trace) return -EINVAL; @@ -313,17 +317,15 @@ int register_stat_tracer(struct tracer_stat *trace) /* Already registered? */ mutex_lock(&all_stat_sessions_mutex); list_for_each_entry(node, &all_stat_sessions, session_list) { - if (node->ts == trace) { - mutex_unlock(&all_stat_sessions_mutex); - return -EINVAL; - } + if (node->ts == trace) + goto out; } - mutex_unlock(&all_stat_sessions_mutex); + ret = -ENOMEM; /* Init the session */ session = kzalloc(sizeof(*session), GFP_KERNEL); if (!session) - return -ENOMEM; + goto out; session->ts = trace; INIT_LIST_HEAD(&session->session_list); @@ -332,15 +334,16 @@ int register_stat_tracer(struct tracer_stat *trace) ret = init_stat_file(session); if (ret) { destroy_session(session); - return ret; + goto out; } + ret = 0; /* Register */ - mutex_lock(&all_stat_sessions_mutex); list_add_tail(&session->session_list, &all_stat_sessions); + out: mutex_unlock(&all_stat_sessions_mutex); - return 0; + return ret; } void unregister_stat_tracer(struct tracer_stat *trace) diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a2a642f2c64f..19bcaaac884b 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -560,9 +560,10 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); static int sys_perf_refcount_enter; static int sys_perf_refcount_exit; -static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs, - struct syscall_metadata *sys_data, - struct syscall_trace_enter *rec) { +static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs, + struct syscall_metadata *sys_data, + struct syscall_trace_enter *rec) +{ struct syscall_tp_t { unsigned long long regs; unsigned long syscall_nr; @@ -574,7 +575,7 @@ static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs, param.syscall_nr = rec->nr; for (i = 0; i < sys_data->nb_args; i++) param.args[i] = rec->args[i]; - return trace_call_bpf(prog, ¶m); + return trace_call_bpf(call, ¶m); } static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) @@ -582,7 +583,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) struct syscall_metadata *sys_data; struct syscall_trace_enter *rec; struct hlist_head *head; - struct bpf_prog *prog; + bool valid_prog_array; int syscall_nr; int rctx; int size; @@ -597,9 +598,9 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) if (!sys_data) return; - prog = READ_ONCE(sys_data->enter_event->prog); head = this_cpu_ptr(sys_data->enter_event->perf_events); - if (!prog && hlist_empty(head)) + valid_prog_array = bpf_prog_array_valid(sys_data->enter_event); + if (!valid_prog_array && hlist_empty(head)) return; /* get the size after alignment with the u32 buffer size field */ @@ -615,7 +616,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) syscall_get_arguments(current, regs, 0, sys_data->nb_args, (unsigned long *)&rec->args); - if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) || + if ((valid_prog_array && + !perf_call_bpf_enter(sys_data->enter_event, regs, sys_data, rec)) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; @@ -660,8 +662,9 @@ static void perf_sysenter_disable(struct trace_event_call *call) mutex_unlock(&syscall_trace_lock); } -static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs, - struct syscall_trace_exit *rec) { +static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs, + struct syscall_trace_exit *rec) +{ struct syscall_tp_t { unsigned long long regs; unsigned long syscall_nr; @@ -671,7 +674,7 @@ static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs, *(struct pt_regs **)¶m = regs; param.syscall_nr = rec->nr; param.ret = rec->ret; - return trace_call_bpf(prog, ¶m); + return trace_call_bpf(call, ¶m); } static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) @@ -679,7 +682,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; struct hlist_head *head; - struct bpf_prog *prog; + bool valid_prog_array; int syscall_nr; int rctx; int size; @@ -694,9 +697,9 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) if (!sys_data) return; - prog = READ_ONCE(sys_data->exit_event->prog); head = this_cpu_ptr(sys_data->exit_event->perf_events); - if (!prog && hlist_empty(head)) + valid_prog_array = bpf_prog_array_valid(sys_data->exit_event); + if (!valid_prog_array && hlist_empty(head)) return; /* We can probably do that at build time */ @@ -710,7 +713,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) rec->nr = syscall_nr; rec->ret = syscall_get_return_value(current, regs); - if ((prog && !perf_call_bpf_exit(prog, regs, rec)) || + if ((valid_prog_array && + !perf_call_bpf_exit(sys_data->exit_event, regs, rec)) || hlist_empty(head)) { perf_swevent_put_recursion_context(rctx); return; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 10d13fdcabf4..78d56bddaaa5 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1118,13 +1118,12 @@ static void __uprobe_perf_func(struct trace_uprobe *tu, { struct trace_event_call *call = &tu->tp.call; struct uprobe_trace_entry_head *entry; - struct bpf_prog *prog = call->prog; struct hlist_head *head; void *data; int size, esize; int rctx; - if (prog && !trace_call_bpf(prog, regs)) + if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) return; esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 305039b122fa..35b2ba07f3c6 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -90,8 +90,8 @@ static int tracing_map_cmp_atomic64(void *val_a, void *val_b) #define DEFINE_TRACING_MAP_CMP_FN(type) \ static int tracing_map_cmp_##type(void *val_a, void *val_b) \ { \ - type a = *(type *)val_a; \ - type b = *(type *)val_b; \ + type a = (type)(*(u64 *)val_a); \ + type b = (type)(*(u64 *)val_b); \ \ return (a > b) ? 1 : ((a < b) ? -1 : 0); \ } diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 087994b23f8b..e4db5d54c07c 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -164,6 +164,8 @@ static void lockup_detector_update_enable(void) #ifdef CONFIG_SOFTLOCKUP_DETECTOR +#define SOFTLOCKUP_RESET ULONG_MAX + /* Global variables, exported for sysctl */ unsigned int __read_mostly softlockup_panic = CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; @@ -271,7 +273,7 @@ notrace void touch_softlockup_watchdog_sched(void) * Preemption can be enabled. It doesn't matter which CPU's timestamp * gets zeroed here, so use the raw_ operation. */ - raw_cpu_write(watchdog_touch_ts, 0); + raw_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET); } notrace void touch_softlockup_watchdog(void) @@ -295,14 +297,14 @@ void touch_all_softlockup_watchdogs(void) * the softlockup check. */ for_each_cpu(cpu, &watchdog_allowed_mask) - per_cpu(watchdog_touch_ts, cpu) = 0; + per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET; wq_watchdog_touch(-1); } void touch_softlockup_watchdog_sync(void) { __this_cpu_write(softlockup_touch_sync, true); - __this_cpu_write(watchdog_touch_ts, 0); + __this_cpu_write(watchdog_touch_ts, SOFTLOCKUP_RESET); } static int is_softlockup(unsigned long touch_ts) @@ -354,7 +356,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) /* .. and repeat */ hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); - if (touch_ts == 0) { + if (touch_ts == SOFTLOCKUP_RESET) { if (unlikely(__this_cpu_read(softlockup_touch_sync))) { /* * If the time stamp was touched atomically diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c98eb62b6f1f..6835fbd8205a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1406,14 +1406,16 @@ static void __queue_work(int cpu, struct workqueue_struct *wq, WARN_ON_ONCE(!is_chained_work(wq))) return; retry: - if (req_cpu == WORK_CPU_UNBOUND) - cpu = wq_select_unbound_cpu(raw_smp_processor_id()); - /* pwq which will be used unless @work is executing elsewhere */ - if (!(wq->flags & WQ_UNBOUND)) - pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); - else + if (wq->flags & WQ_UNBOUND) { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = wq_select_unbound_cpu(raw_smp_processor_id()); pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); + } else { + if (req_cpu == WORK_CPU_UNBOUND) + cpu = raw_smp_processor_id(); + pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); + } /* * If @work was previously on a different pool, it might still be @@ -1530,8 +1532,10 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct work_struct *work = &dwork->work; WARN_ON_ONCE(!wq); - WARN_ON_ONCE(timer->function != delayed_work_timer_fn || - timer->data != (unsigned long)dwork); +#ifndef CONFIG_CFI + WARN_ON_ONCE(timer->function != delayed_work_timer_fn); +#endif + WARN_ON_ONCE(timer->data != (unsigned long)dwork); WARN_ON_ONCE(timer_pending(timer)); WARN_ON_ONCE(!list_empty(&work->entry)); @@ -2389,8 +2393,14 @@ repeat: */ if (need_to_create_worker(pool)) { spin_lock(&wq_mayday_lock); - get_pwq(pwq); - list_move_tail(&pwq->mayday_node, &wq->maydays); + /* + * Queue iff we aren't racing destruction + * and somebody else hasn't queued it already. + */ + if (wq->rescuer && list_empty(&pwq->mayday_node)) { + get_pwq(pwq); + list_add_tail(&pwq->mayday_node, &wq->maydays); + } spin_unlock(&wq_mayday_lock); } } @@ -4110,9 +4120,29 @@ void destroy_workqueue(struct workqueue_struct *wq) struct pool_workqueue *pwq; int node; + /* + * Remove it from sysfs first so that sanity check failure doesn't + * lead to sysfs name conflicts. + */ + workqueue_sysfs_unregister(wq); + /* drain it before proceeding with destruction */ drain_workqueue(wq); + /* kill rescuer, if sanity checks fail, leave it w/o rescuer */ + if (wq->rescuer) { + struct worker *rescuer = wq->rescuer; + + /* this prevents new queueing */ + spin_lock_irq(&wq_mayday_lock); + wq->rescuer = NULL; + spin_unlock_irq(&wq_mayday_lock); + + /* rescuer will empty maydays list before exiting */ + kthread_stop(rescuer->task); + kfree(rescuer); + } + /* sanity checks */ mutex_lock(&wq->mutex); for_each_pwq(pwq, wq) { @@ -4144,11 +4174,6 @@ void destroy_workqueue(struct workqueue_struct *wq) list_del_rcu(&wq->list); mutex_unlock(&wq_pool_mutex); - workqueue_sysfs_unregister(wq); - - if (wq->rescuer) - kthread_stop(wq->rescuer->task); - if (!(wq->flags & WQ_UNBOUND)) { /* * The base ref is never dropped on per-cpu pwqs. Directly @@ -4425,7 +4450,8 @@ static void show_pwq(struct pool_workqueue *pwq) pr_info(" pwq %d:", pool->id); pr_cont_pool_info(pool); - pr_cont(" active=%d/%d%s\n", pwq->nr_active, pwq->max_active, + pr_cont(" active=%d/%d refcnt=%d%s\n", + pwq->nr_active, pwq->max_active, pwq->refcnt, !list_empty(&pwq->mayday_node) ? " MAYDAY" : ""); hash_for_each(pool->busy_hash, bkt, worker, hentry) { diff --git a/lib/Kconfig b/lib/Kconfig index b1445b22a6de..8396c4cfa1ab 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -413,6 +413,10 @@ config HAS_DMA depends on !NO_DMA default y +config SGL_ALLOC + bool + default n + config DMA_NOOP_OPS bool depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7f7f95c4cdfb..8eb1c163a11c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -67,7 +67,7 @@ config DYNAMIC_DEBUG bool "Enable dynamic printk() support" default n depends on PRINTK - depends on DEBUG_FS + depends on (DEBUG_FS || PROC_FS) help Compiles debug level messages into the kernel, which would not @@ -85,8 +85,9 @@ config DYNAMIC_DEBUG Usage: Dynamic debugging is controlled via the 'dynamic_debug/control' file, - which is contained in the 'debugfs' filesystem. Thus, the debugfs - filesystem must first be mounted before making use of this feature. + which is contained in the 'debugfs' filesystem or procfs. + Thus, the debugfs or procfs filesystem must first be mounted before + making use of this feature. We refer the control file as: /dynamic_debug/control. This file contains a list of the debug statements that can be enabled. The format for each line of the file is: @@ -734,16 +735,21 @@ endmenu # "Memory Debugging" config ARCH_HAS_KCOV bool help - KCOV does not have any arch-specific code, but currently it is enabled - only for x86_64. KCOV requires testing on other archs, and most likely - disabling of instrumentation for some early boot code. + An architecture should select this when it can successfully + build and run with CONFIG_KCOV. This typically requires + disabling instrumentation for some early boot code. + +# Upstream uses $(cc-option, -fsanitize-coverage=trace-pc), which requires +# cc-option support. Here we instead check CC in scripts/Makefile.kcov. +config CC_HAS_SANCOV_TRACE_PC + def_bool ARCH_HAS_KCOV config KCOV bool "Code coverage for fuzzing" depends on ARCH_HAS_KCOV + depends on CC_HAS_SANCOV_TRACE_PC || GCC_PLUGINS select DEBUG_FS - select GCC_PLUGINS if !COMPILE_TEST - select GCC_PLUGIN_SANCOV if !COMPILE_TEST + select GCC_PLUGIN_SANCOV if !CC_HAS_SANCOV_TRACE_PC help KCOV exposes kernel code coverage information in a form suitable for coverage-guided fuzzing (randomized testing). @@ -754,10 +760,21 @@ config KCOV For more details, see Documentation/dev-tools/kcov.rst. +# Upstream uses $(cc-option, -fsanitize-coverage=trace-cmp), which requires +# cc-option support. Here we instead check CC in scripts/Makefile.kcov. +config KCOV_ENABLE_COMPARISONS + bool "Enable comparison operands collection by KCOV" + depends on KCOV + help + KCOV also exposes operands of every comparison in the instrumented + code along with operand sizes and PCs of the comparison instructions. + These operands can be used by fuzzing engines to improve the quality + of fuzzing coverage. + config KCOV_INSTRUMENT_ALL bool "Instrument all code by default" depends on KCOV - default y if KCOV + default y help If you are doing generic system call fuzzing (like e.g. syzkaller), then you will want to instrument the whole kernel and you should @@ -1921,6 +1938,16 @@ config TEST_MEMINIT If unsure, say N. +config TEST_STACKINIT + tristate "Test level of stack variable initialization" + help + Test if the kernel is zero-initializing stack variables and + padding. Coverage is controlled by compiler flags, + CONFIG_GCC_PLUGIN_STRUCTLEAK, CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF, + or CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL. + + If unsure, say N. + endmenu # runtime tests config MEMTEST diff --git a/lib/Makefile b/lib/Makefile index b892d0261501..d3ab213bc1f0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -76,6 +76,10 @@ obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o +obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o +obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o +CFLAGS_test_stackinit.o += $(call cc-disable-warning, switch-unreachable) +obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) diff --git a/lib/bitmap.c b/lib/bitmap.c index 2a9373ef4054..fbe38a83acb3 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -1212,3 +1213,22 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n } EXPORT_SYMBOL(bitmap_copy_le); #endif + +unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags) +{ + return kmalloc_array(BITS_TO_LONGS(nbits), sizeof(unsigned long), + flags); +} +EXPORT_SYMBOL(bitmap_alloc); + +unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags) +{ + return bitmap_alloc(nbits, flags | __GFP_ZERO); +} +EXPORT_SYMBOL(bitmap_zalloc); + +void bitmap_free(const unsigned long *bitmap) +{ + kfree(bitmap); +} +EXPORT_SYMBOL(bitmap_free); diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c index dbb6076af6d8..f7cb553124cb 100644 --- a/lib/dec_and_lock.c +++ b/lib/dec_and_lock.c @@ -51,3 +51,18 @@ int _atomic_dec_and_lock_irqsafe(atomic_t *atomic, spinlock_t *lock, } EXPORT_SYMBOL(_atomic_dec_and_lock_irqsafe); +int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, + unsigned long *flags) +{ + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ + if (atomic_add_unless(atomic, -1, 1)) + return 0; + + /* Otherwise do it the slow way */ + spin_lock_irqsave(lock, *flags); + if (atomic_dec_and_test(atomic)) + return 1; + spin_unlock_irqrestore(lock, *flags); + return 0; +} +EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); diff --git a/lib/devres.c b/lib/devres.c index 5f2aedd58bc5..40a8b12a8b6b 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -132,7 +132,8 @@ EXPORT_SYMBOL(devm_iounmap); * if (IS_ERR(base)) * return PTR_ERR(base); */ -void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) +void __iomem *devm_ioremap_resource(struct device *dev, + const struct resource *res) { resource_size_t size; const char *name; diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 7f5cdc1e6b29..028e593287a6 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -437,6 +437,7 @@ void debug_dma_dump_mappings(struct device *dev) } spin_unlock_irqrestore(&bucket->lock, flags); + cond_resched(); } } EXPORT_SYMBOL(debug_dma_dump_mappings); diff --git a/lib/dump_stack.c b/lib/dump_stack.c index c5edbedd364d..287ea178f0fa 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -46,7 +46,12 @@ retry: was_locked = 1; } else { local_irq_restore(flags); - cpu_relax(); + /* + * Wait for the lock to release before jumping to + * atomic_cmpxchg() in order to mitigate the thundering herd + * problem. + */ + do { cpu_relax(); } while (atomic_read(&dump_lock) != -1); goto retry; } diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 457b3df52eb1..e3b553e47e37 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -1094,22 +1094,26 @@ static void ddebug_remove_all_tables(void) static __initdata int ddebug_init_success; -static int __init dynamic_debug_init_debugfs(void) +static int __init dynamic_debug_init_control(void) { - struct dentry *dir, *file; + struct proc_dir_entry *procfs_dir; + struct dentry *debugfs_dir; if (!ddebug_init_success) return -ENODEV; - dir = debugfs_create_dir("dynamic_debug", NULL); - if (!dir) - return -ENOMEM; - file = debugfs_create_file("control", 0644, dir, NULL, - &ddebug_proc_fops); - if (!file) { - debugfs_remove(dir); - return -ENOMEM; + /* Create the control file in debugfs if it is enabled */ + if (debugfs_initialized()) { + debugfs_dir = debugfs_create_dir("dynamic_debug", NULL); + debugfs_create_file("control", 0644, debugfs_dir, NULL, + &ddebug_proc_fops); } + + /* Also create the control file in procfs */ + procfs_dir = proc_mkdir("dynamic_debug", NULL); + if (procfs_dir) + proc_create("control", 0644, procfs_dir, &ddebug_proc_fops); + return 0; } @@ -1186,4 +1190,4 @@ out_err: early_initcall(dynamic_debug_init); /* Debugfs setup must be done later */ -fs_initcall(dynamic_debug_init_debugfs); +fs_initcall(dynamic_debug_init_control); diff --git a/lib/genalloc.c b/lib/genalloc.c index ca06adc4f445..7e85d1e37a6e 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -35,6 +35,7 @@ #include #include #include +#include static inline size_t chunk_size(const struct gen_pool_chunk *chunk) { @@ -187,7 +188,7 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy int nbytes = sizeof(struct gen_pool_chunk) + BITS_TO_LONGS(nbits) * sizeof(long); - chunk = kzalloc_node(nbytes, GFP_KERNEL, nid); + chunk = vzalloc_node(nbytes, nid); if (unlikely(chunk == NULL)) return -ENOMEM; @@ -251,7 +252,7 @@ void gen_pool_destroy(struct gen_pool *pool) bit = find_next_bit(chunk->bits, end_bit, 0); BUG_ON(bit < end_bit); - kfree(chunk); + vfree(chunk); } kfree_const(pool->name); kfree(pool); @@ -311,7 +312,7 @@ unsigned long gen_pool_alloc_algo(struct gen_pool *pool, size_t size, end_bit = chunk_size(chunk) >> order; retry: start_bit = algo(chunk->bits, end_bit, start_bit, - nbits, data, pool); + nbits, data, pool, chunk->start_addr); if (start_bit >= end_bit) continue; remain = bitmap_set_ll(chunk->bits, start_bit, nbits); @@ -525,7 +526,7 @@ EXPORT_SYMBOL(gen_pool_set_algo); */ unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data, - struct gen_pool *pool) + struct gen_pool *pool, unsigned long start_addr) { return bitmap_find_next_zero_area(map, size, start, nr, 0); } @@ -543,16 +544,19 @@ EXPORT_SYMBOL(gen_pool_first_fit); */ unsigned long gen_pool_first_fit_align(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data, - struct gen_pool *pool) + struct gen_pool *pool, unsigned long start_addr) { struct genpool_data_align *alignment; - unsigned long align_mask; + unsigned long align_mask, align_off; int order; alignment = data; order = pool->min_alloc_order; align_mask = ((alignment->align + (1UL << order) - 1) >> order) - 1; - return bitmap_find_next_zero_area(map, size, start, nr, align_mask); + align_off = (start_addr & (alignment->align - 1)) >> order; + + return bitmap_find_next_zero_area_off(map, size, start, nr, + align_mask, align_off); } EXPORT_SYMBOL(gen_pool_first_fit_align); @@ -567,7 +571,7 @@ EXPORT_SYMBOL(gen_pool_first_fit_align); */ unsigned long gen_pool_fixed_alloc(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data, - struct gen_pool *pool) + struct gen_pool *pool, unsigned long start_addr) { struct genpool_data_fixed *fixed_data; int order; @@ -601,7 +605,8 @@ EXPORT_SYMBOL(gen_pool_fixed_alloc); */ unsigned long gen_pool_first_fit_order_align(unsigned long *map, unsigned long size, unsigned long start, - unsigned int nr, void *data, struct gen_pool *pool) + unsigned int nr, void *data, struct gen_pool *pool, + unsigned long start_addr) { unsigned long align_mask = roundup_pow_of_two(nr) - 1; @@ -624,7 +629,7 @@ EXPORT_SYMBOL(gen_pool_first_fit_order_align); */ unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size, unsigned long start, unsigned int nr, void *data, - struct gen_pool *pool) + struct gen_pool *pool, unsigned long start_addr) { unsigned long start_bit = size; unsigned long len = size + 1; diff --git a/lib/idr.c b/lib/idr.c index edd9b2be1651..8c1a98d03164 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -111,13 +111,27 @@ void *idr_get_next(struct idr *idr, int *nextid) { struct radix_tree_iter iter; void __rcu **slot; + void *entry = NULL; - slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid); + radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, *nextid) { + entry = rcu_dereference_raw(*slot); + if (!entry) + continue; + if (!radix_tree_deref_retry(entry)) + break; + if (slot != (void *)&idr->idr_rt.rnode && + entry != (void *)RADIX_TREE_INTERNAL_NODE) + break; + slot = radix_tree_iter_retry(&iter); + } if (!slot) return NULL; + if (WARN_ON_ONCE(iter.index > INT_MAX)) + return NULL; + *nextid = iter.index; - return rcu_dereference_raw(*slot); + return entry; } EXPORT_SYMBOL(idr_get_next); diff --git a/lib/kfifo.c b/lib/kfifo.c index 90ba1eb1df06..a94227c55551 100644 --- a/lib/kfifo.c +++ b/lib/kfifo.c @@ -82,7 +82,8 @@ int __kfifo_init(struct __kfifo *fifo, void *buffer, { size /= esize; - size = roundup_pow_of_two(size); + if (!is_power_of_2(size)) + size = rounddown_pow_of_two(size); fifo->in = 0; fifo->out = 0; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d172f0341b80..ff00c816266b 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2184,7 +2184,7 @@ void __rcu **idr_get_free_cmn(struct radix_tree_root *root, offset = radix_tree_find_next_bit(node, IDR_FREE, offset + 1); start = next_index(start, node, offset); - if (start > max) + if (start > max || start == 0) return ERR_PTR(-ENOSPC); while (offset == RADIX_TREE_MAP_SIZE) { offset = node->offset + 1; diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk index c6aa03631df8..0809805a7e23 100644 --- a/lib/raid6/unroll.awk +++ b/lib/raid6/unroll.awk @@ -13,7 +13,7 @@ BEGIN { for (i = 0; i < rep; ++i) { tmp = $0 gsub(/\$\$/, i, tmp) - gsub(/\$\#/, n, tmp) + gsub(/\$#/, n, tmp) gsub(/\$\*/, "$", tmp) print tmp } diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 355f2e90b72c..834c846c5af8 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -317,7 +317,7 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, if (prv) table->nents = ++table->orig_nents; - return -ENOMEM; + return -ENOMEM; } sg_init_table(sg, alloc_size); @@ -433,6 +433,111 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, } EXPORT_SYMBOL(sg_alloc_table_from_pages); +#ifdef CONFIG_SGL_ALLOC + +/** + * sgl_alloc_order - allocate a scatterlist and its pages + * @length: Length in bytes of the scatterlist. Must be at least one + * @order: Second argument for alloc_pages() + * @chainable: Whether or not to allocate an extra element in the scatterlist + * for scatterlist chaining purposes + * @gfp: Memory allocation flags + * @nent_p: [out] Number of entries in the scatterlist that have pages + * + * Returns: A pointer to an initialized scatterlist or %NULL upon failure. + */ +struct scatterlist *sgl_alloc_order(unsigned long long length, + unsigned int order, bool chainable, + gfp_t gfp, unsigned int *nent_p) +{ + struct scatterlist *sgl, *sg; + struct page *page; + unsigned int nent, nalloc; + u32 elem_len; + + nent = round_up(length, PAGE_SIZE << order) >> (PAGE_SHIFT + order); + /* Check for integer overflow */ + if (length > (nent << (PAGE_SHIFT + order))) + return NULL; + nalloc = nent; + if (chainable) { + /* Check for integer overflow */ + if (nalloc + 1 < nalloc) + return NULL; + nalloc++; + } + sgl = kmalloc_array(nalloc, sizeof(struct scatterlist), + (gfp & ~GFP_DMA) | __GFP_ZERO); + if (!sgl) + return NULL; + + sg_init_table(sgl, nent); + sg = sgl; + while (length) { + elem_len = min_t(u64, length, PAGE_SIZE << order); + page = alloc_pages(gfp, order); + if (!page) { + sgl_free(sgl); + return NULL; + } + + sg_set_page(sg, page, elem_len, 0); + length -= elem_len; + sg = sg_next(sg); + } + WARN_ON_ONCE(sg); + if (nent_p) + *nent_p = nent; + return sgl; +} +EXPORT_SYMBOL(sgl_alloc_order); + +/** + * sgl_alloc - allocate a scatterlist and its pages + * @length: Length in bytes of the scatterlist + * @gfp: Memory allocation flags + * @nent_p: [out] Number of entries in the scatterlist + * + * Returns: A pointer to an initialized scatterlist or %NULL upon failure. + */ +struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp, + unsigned int *nent_p) +{ + return sgl_alloc_order(length, 0, false, gfp, nent_p); +} +EXPORT_SYMBOL(sgl_alloc); + +/** + * sgl_free_order - free a scatterlist and its pages + * @sgl: Scatterlist with one or more elements + * @order: Second argument for __free_pages() + */ +void sgl_free_order(struct scatterlist *sgl, int order) +{ + struct scatterlist *sg; + struct page *page; + + for (sg = sgl; sg; sg = sg_next(sg)) { + page = sg_page(sg); + if (page) + __free_pages(page, order); + } + kfree(sgl); +} +EXPORT_SYMBOL(sgl_free_order); + +/** + * sgl_free - free a scatterlist and its pages + * @sgl: Scatterlist with one or more elements + */ +void sgl_free(struct scatterlist *sgl) +{ + sgl_free_order(sgl, 0); +} +EXPORT_SYMBOL(sgl_free); + +#endif /* CONFIG_SGL_ALLOC */ + void __sg_page_iter_start(struct sg_page_iter *piter, struct scatterlist *sglist, unsigned int nents, unsigned long pgoffset) diff --git a/lib/stackdepot.c b/lib/stackdepot.c index b4596be84a4c..8d0d0da59074 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -96,15 +96,19 @@ static bool init_stack_slab(void **prealloc) return true; if (stack_slabs[depot_index] == NULL) { stack_slabs[depot_index] = *prealloc; + *prealloc = NULL; } else { - stack_slabs[depot_index + 1] = *prealloc; + /* If this is the last depot slab, do not touch the next one. */ + if (depot_index + 1 < STACK_ALLOC_MAX_SLABS) { + stack_slabs[depot_index + 1] = *prealloc; + *prealloc = NULL; + } /* * This smp_store_release pairs with smp_load_acquire() from * |next_slab_inited| above and in depot_save_stack(). */ smp_store_release(&next_slab_inited, 1); } - *prealloc = NULL; return true; } diff --git a/lib/test_kasan.c b/lib/test_kasan.c index d02a539a44aa..2c67ec7c9e17 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -157,6 +157,7 @@ static noinline void __init kmalloc_oob_krealloc_more(void) if (!ptr1 || !ptr2) { pr_err("Allocation failed\n"); kfree(ptr1); + kfree(ptr2); return; } diff --git a/lib/test_meminit.c b/lib/test_meminit.c index 9729f271d150..61d55b74d835 100644 --- a/lib/test_meminit.c +++ b/lib/test_meminit.c @@ -183,6 +183,9 @@ static bool __init check_buf(void *buf, int size, bool want_ctor, return fail; } +#define BULK_SIZE 100 +static void *bulk_array[BULK_SIZE]; + /* * Test kmem_cache with given parameters: * want_ctor - use a constructor; @@ -203,9 +206,24 @@ static int __init do_kmem_cache_size(size_t size, bool want_ctor, want_rcu ? SLAB_TYPESAFE_BY_RCU : 0, want_ctor ? test_ctor : NULL); for (iter = 0; iter < 10; iter++) { + /* Do a test of bulk allocations */ + if (!want_rcu && !want_ctor) { + int ret; + + ret = kmem_cache_alloc_bulk(c, alloc_mask, BULK_SIZE, bulk_array); + if (!ret) { + fail = true; + } else { + int i; + for (i = 0; i < ret; i++) + fail |= check_buf(bulk_array[i], size, want_ctor, want_rcu, want_zero); + kmem_cache_free_bulk(c, ret, bulk_array); + } + } + buf = kmem_cache_alloc(c, alloc_mask); /* Check that buf is zeroed, if it must be. */ - fail = check_buf(buf, size, want_ctor, want_rcu, want_zero); + fail |= check_buf(buf, size, want_ctor, want_rcu, want_zero); fill_with_garbage_skip(buf, size, want_ctor ? CTOR_BYTES : 0); if (!want_rcu) { @@ -297,6 +315,32 @@ out: return 1; } +static int __init do_kmem_cache_size_bulk(int size, int *total_failures) +{ + struct kmem_cache *c; + int i, iter, maxiter = 1024; + int num, bytes; + bool fail = false; + void *objects[10]; + + c = kmem_cache_create("test_cache", size, size, 0, NULL); + for (iter = 0; (iter < maxiter) && !fail; iter++) { + num = kmem_cache_alloc_bulk(c, GFP_KERNEL, ARRAY_SIZE(objects), + objects); + for (i = 0; i < num; i++) { + bytes = count_nonzero_bytes(objects[i], size); + if (bytes) + fail = true; + fill_with_garbage(objects[i], size); + } + + if (num) + kmem_cache_free_bulk(c, num, objects); + } + *total_failures += fail; + return 1; +} + /* * Test kmem_cache allocation by creating caches of different sizes, with and * without constructors, with and without SLAB_TYPESAFE_BY_RCU. @@ -318,6 +362,7 @@ static int __init test_kmemcache(int *total_failures) num_tests += do_kmem_cache_size(size, ctor, rcu, zero, &failures); } + num_tests += do_kmem_cache_size_bulk(size, &failures); } REPORT_FAILURES_IN_FN(); *total_failures += failures; @@ -352,11 +397,16 @@ static int __init test_meminit_init(void) num_tests += test_kmemcache(&failures); num_tests += test_rcu_persistent(&failures); - if (failures == 0) + if (failures == 0) { pr_info("all %d tests passed!\n", num_tests); - else + } else { pr_info("failures: %d out of %d\n", failures, num_tests); - + /* + * Android 4.14 only: if this test is built as part of the + * kernel, make the failure visible. + */ + panic("Test failed!\n"); + } return failures ? -EINVAL : 0; } module_init(test_meminit_init); diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c new file mode 100644 index 000000000000..c589bfa120f3 --- /dev/null +++ b/lib/test_stackinit.c @@ -0,0 +1,402 @@ +// SPDX-Licenses: GPLv2 +/* + * Test cases for compiler-based stack variable zeroing via future + * compiler flags or CONFIG_GCC_PLUGIN_STRUCTLEAK*. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include + +/* Exfiltration buffer. */ +#define MAX_VAR_SIZE 128 +static u8 check_buf[MAX_VAR_SIZE]; + +/* Character array to trigger stack protector in all functions. */ +#define VAR_BUFFER 32 + +/* Volatile mask to convince compiler to copy memory with 0xff. */ +static volatile u8 forced_mask = 0xff; + +/* Location and size tracking to validate fill and test are colocated. */ +static void *fill_start, *target_start; +static size_t fill_size, target_size; + +static bool range_contains(char *haystack_start, size_t haystack_size, + char *needle_start, size_t needle_size) +{ + if (needle_start >= haystack_start && + needle_start + needle_size <= haystack_start + haystack_size) + return true; + return false; +} + +#define DO_NOTHING_TYPE_SCALAR(var_type) var_type +#define DO_NOTHING_TYPE_STRING(var_type) void +#define DO_NOTHING_TYPE_STRUCT(var_type) void + +#define DO_NOTHING_RETURN_SCALAR(ptr) *(ptr) +#define DO_NOTHING_RETURN_STRING(ptr) /**/ +#define DO_NOTHING_RETURN_STRUCT(ptr) /**/ + +#define DO_NOTHING_CALL_SCALAR(var, name) \ + (var) = do_nothing_ ## name(&(var)) +#define DO_NOTHING_CALL_STRING(var, name) \ + do_nothing_ ## name(var) +#define DO_NOTHING_CALL_STRUCT(var, name) \ + do_nothing_ ## name(&(var)) + +#define FETCH_ARG_SCALAR(var) &var +#define FETCH_ARG_STRING(var) var +#define FETCH_ARG_STRUCT(var) &var + +#define FILL_SIZE_STRING 16 + +#define INIT_CLONE_SCALAR /**/ +#define INIT_CLONE_STRING [FILL_SIZE_STRING] +#define INIT_CLONE_STRUCT /**/ + +#define INIT_SCALAR_none /**/ +#define INIT_SCALAR_zero = 0 + +#define INIT_STRING_none [FILL_SIZE_STRING] /**/ +#define INIT_STRING_zero [FILL_SIZE_STRING] = { } + +#define INIT_STRUCT_none /**/ +#define INIT_STRUCT_zero = { } +#define INIT_STRUCT_static_partial = { .two = 0, } +#define INIT_STRUCT_static_all = { .one = arg->one, \ + .two = arg->two, \ + .three = arg->three, \ + .four = arg->four, \ + } +#define INIT_STRUCT_dynamic_partial = { .two = arg->two, } +#define INIT_STRUCT_dynamic_all = { .one = arg->one, \ + .two = arg->two, \ + .three = arg->three, \ + .four = arg->four, \ + } +#define INIT_STRUCT_runtime_partial ; \ + var.two = 0 +#define INIT_STRUCT_runtime_all ; \ + var.one = 0; \ + var.two = 0; \ + var.three = 0; \ + memset(&var.four, 0, \ + sizeof(var.four)) + +/* + * @name: unique string name for the test + * @var_type: type to be tested for zeroing initialization + * @which: is this a SCALAR, STRING, or STRUCT type? + * @init_level: what kind of initialization is performed + * @xfail: is this test expected to fail? + */ +#define DEFINE_TEST_DRIVER(name, var_type, which, xfail) \ +/* Returns 0 on success, 1 on failure. */ \ +static noinline __init int test_ ## name (void) \ +{ \ + var_type zero INIT_CLONE_ ## which; \ + int ignored; \ + u8 sum = 0, i; \ + \ + /* Notice when a new test is larger than expected. */ \ + BUILD_BUG_ON(sizeof(zero) > MAX_VAR_SIZE); \ + \ + /* Fill clone type with zero for per-field init. */ \ + memset(&zero, 0x00, sizeof(zero)); \ + /* Clear entire check buffer for 0xFF overlap test. */ \ + memset(check_buf, 0x00, sizeof(check_buf)); \ + /* Fill stack with 0xFF. */ \ + ignored = leaf_ ##name((unsigned long)&ignored, 1, \ + FETCH_ARG_ ## which(zero)); \ + /* Verify all bytes overwritten with 0xFF. */ \ + for (sum = 0, i = 0; i < target_size; i++) \ + sum += (check_buf[i] != 0xFF); \ + if (sum) { \ + pr_err(#name ": leaf fill was not 0xFF!?\n"); \ + return 1; \ + } \ + /* Clear entire check buffer for later bit tests. */ \ + memset(check_buf, 0x00, sizeof(check_buf)); \ + /* Extract stack-defined variable contents. */ \ + ignored = leaf_ ##name((unsigned long)&ignored, 0, \ + FETCH_ARG_ ## which(zero)); \ + \ + /* Validate that compiler lined up fill and target. */ \ + if (!range_contains(fill_start, fill_size, \ + target_start, target_size)) { \ + pr_err(#name ": stack fill missed target!?\n"); \ + pr_err(#name ": fill %zu wide\n", fill_size); \ + pr_err(#name ": target offset by %d\n", \ + (int)((ssize_t)(uintptr_t)fill_start - \ + (ssize_t)(uintptr_t)target_start)); \ + return 1; \ + } \ + \ + /* Look for any bytes still 0xFF in check region. */ \ + for (sum = 0, i = 0; i < target_size; i++) \ + sum += (check_buf[i] == 0xFF); \ + \ + if (sum == 0) { \ + pr_info(#name " ok\n"); \ + return 0; \ + } else { \ + pr_warn(#name " %sFAIL (uninit bytes: %d)\n", \ + (xfail) ? "X" : "", sum); \ + return (xfail) ? 0 : 1; \ + } \ +} +#define DEFINE_TEST(name, var_type, which, init_level) \ +/* no-op to force compiler into ignoring "uninitialized" vars */\ +static noinline __init DO_NOTHING_TYPE_ ## which(var_type) \ +do_nothing_ ## name(var_type *ptr) \ +{ \ + /* Will always be true, but compiler doesn't know. */ \ + if ((unsigned long)ptr > 0x2) \ + return DO_NOTHING_RETURN_ ## which(ptr); \ + else \ + return DO_NOTHING_RETURN_ ## which(ptr + 1); \ +} \ +static noinline __init int leaf_ ## name(unsigned long sp, \ + bool fill, \ + var_type *arg) \ +{ \ + char buf[VAR_BUFFER]; \ + var_type var INIT_ ## which ## _ ## init_level; \ + \ + target_start = &var; \ + target_size = sizeof(var); \ + /* \ + * Keep this buffer around to make sure we've got a \ + * stack frame of SOME kind... \ + */ \ + memset(buf, (char)(sp & 0xff), sizeof(buf)); \ + /* Fill variable with 0xFF. */ \ + if (fill) { \ + fill_start = &var; \ + fill_size = sizeof(var); \ + memset(fill_start, \ + (char)((sp & 0xff) | forced_mask), \ + fill_size); \ + } \ + \ + /* Silence "never initialized" warnings. */ \ + DO_NOTHING_CALL_ ## which(var, name); \ + \ + /* Exfiltrate "var". */ \ + memcpy(check_buf, target_start, target_size); \ + \ + return (int)buf[0] | (int)buf[sizeof(buf) - 1]; \ +} \ +DEFINE_TEST_DRIVER(name, var_type, which, 0) + +/* Structure with no padding. */ +struct test_packed { + unsigned long one; + unsigned long two; + unsigned long three; + unsigned long four; +}; + +/* Simple structure with padding likely to be covered by compiler. */ +struct test_small_hole { + size_t one; + char two; + /* 3 byte padding hole here. */ + int three; + unsigned long four; +}; + +/* Try to trigger unhandled padding in a structure. */ +struct test_aligned { + u32 internal1; + u64 internal2; +} __aligned(64); + +struct test_big_hole { + u8 one; + u8 two; + u8 three; + /* 61 byte padding hole here. */ + struct test_aligned four; +} __aligned(64); + +struct test_trailing_hole { + char *one; + char *two; + char *three; + char four; + /* "sizeof(unsigned long) - 1" byte padding hole here. */ +}; + +/* Test if STRUCTLEAK is clearing structs with __user fields. */ +struct test_user { + u8 one; + unsigned long two; + char __user *three; + unsigned long four; +}; + +#define DEFINE_SCALAR_TEST(name, init) \ + DEFINE_TEST(name ## _ ## init, name, SCALAR, init) + +#define DEFINE_SCALAR_TESTS(init) \ + DEFINE_SCALAR_TEST(u8, init); \ + DEFINE_SCALAR_TEST(u16, init); \ + DEFINE_SCALAR_TEST(u32, init); \ + DEFINE_SCALAR_TEST(u64, init); \ + DEFINE_TEST(char_array_ ## init, unsigned char, STRING, init) + +#define DEFINE_STRUCT_TEST(name, init) \ + DEFINE_TEST(name ## _ ## init, \ + struct test_ ## name, STRUCT, init) + +#define DEFINE_STRUCT_TESTS(init) \ + DEFINE_STRUCT_TEST(small_hole, init); \ + DEFINE_STRUCT_TEST(big_hole, init); \ + DEFINE_STRUCT_TEST(trailing_hole, init); \ + DEFINE_STRUCT_TEST(packed, init) + +/* These should be fully initialized all the time! */ +DEFINE_SCALAR_TESTS(zero); +DEFINE_STRUCT_TESTS(zero); +/* Static initialization: padding may be left uninitialized. */ +DEFINE_STRUCT_TESTS(static_partial); +DEFINE_STRUCT_TESTS(static_all); +/* Dynamic initialization: padding may be left uninitialized. */ +DEFINE_STRUCT_TESTS(dynamic_partial); +DEFINE_STRUCT_TESTS(dynamic_all); +/* Runtime initialization: padding may be left uninitialized. */ +DEFINE_STRUCT_TESTS(runtime_partial); +DEFINE_STRUCT_TESTS(runtime_all); +/* No initialization without compiler instrumentation. */ +DEFINE_SCALAR_TESTS(none); +DEFINE_STRUCT_TESTS(none); +DEFINE_TEST(user, struct test_user, STRUCT, none); + +/* + * Check two uses through a variable declaration outside either path, + * which was noticed as a special case in porting earlier stack init + * compiler logic. + */ +static int noinline __leaf_switch_none(int path, bool fill) +{ + uint64_t var; + + switch (path) { + + case 1: + target_start = &var; + target_size = sizeof(var); + if (fill) { + fill_start = &var; + fill_size = sizeof(var); + + memset(fill_start, forced_mask | 0x55, fill_size); + } + memcpy(check_buf, target_start, target_size); + break; + case 2: + target_start = &var; + target_size = sizeof(var); + if (fill) { + fill_start = &var; + fill_size = sizeof(var); + + memset(fill_start, forced_mask | 0xaa, fill_size); + } + memcpy(check_buf, target_start, target_size); + break; + default: + var = 5; + return var & forced_mask; + } + return 0; +} + +static noinline __init int leaf_switch_1_none(unsigned long sp, bool fill, + uint64_t *arg) +{ + return __leaf_switch_none(1, fill); +} + +static noinline __init int leaf_switch_2_none(unsigned long sp, bool fill, + uint64_t *arg) +{ + return __leaf_switch_none(2, fill); +} + +/* + * These are expected to fail for most configurations because neither + * GCC nor Clang have a way to perform initialization of variables in + * non-code areas (i.e. in a switch statement before the first "case"). + * https://bugs.llvm.org/show_bug.cgi?id=44916 + */ +DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR, 1); +DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, 1); + +static int __init test_stackinit_init(void) +{ + unsigned int failures = 0; + +#define test_scalars(init) do { \ + failures += test_u8_ ## init (); \ + failures += test_u16_ ## init (); \ + failures += test_u32_ ## init (); \ + failures += test_u64_ ## init (); \ + failures += test_char_array_ ## init (); \ + } while (0) + +#define test_structs(init) do { \ + failures += test_small_hole_ ## init (); \ + failures += test_big_hole_ ## init (); \ + failures += test_trailing_hole_ ## init (); \ + failures += test_packed_ ## init (); \ + } while (0) + + /* These are explicitly initialized and should always pass. */ + test_scalars(zero); + test_structs(zero); + /* Padding here appears to be accidentally always initialized? */ + test_structs(dynamic_partial); + /* Padding initialization depends on compiler behaviors. */ + test_structs(static_partial); + test_structs(static_all); + test_structs(dynamic_all); + test_structs(runtime_partial); + test_structs(runtime_all); + + /* STRUCTLEAK_BYREF_ALL should cover everything from here down. */ + test_scalars(none); + failures += test_switch_1_none(); + failures += test_switch_2_none(); + + /* STRUCTLEAK_BYREF should cover from here down. */ + test_structs(none); + + /* STRUCTLEAK will only cover this. */ + failures += test_user(); + + if (failures == 0) { + pr_info("all tests passed!\n"); + } else { + pr_err("failures: %u\n", failures); + /* + * Android 4.14 only: if this test is built as part of the + * kernel, make the failure visible. + */ + panic("Test failed!\n"); + } + + return failures ? -EINVAL : 0; +} +module_init(test_stackinit_init); + +static void __exit test_stackinit_exit(void) +{ } +module_exit(test_stackinit_exit); + +MODULE_LICENSE("GPL"); diff --git a/mm/filemap.c b/mm/filemap.c index f411ea676a5c..7c44e1fb18bb 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -340,7 +340,8 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, .range_end = end, }; - if (!mapping_cap_writeback_dirty(mapping)) + if (!mapping_cap_writeback_dirty(mapping) || + !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) return 0; wbc_attach_fdatawrite_inode(&wbc, mapping->host); @@ -2926,6 +2927,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) unsigned long limit = rlimit(RLIMIT_FSIZE); loff_t pos; + if (IS_SWAPFILE(inode)) + return -ETXTBSY; + if (!iov_iter_count(from)) return 0; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index c0d9cc4c0e6f..0177b2fb50e0 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -163,16 +163,13 @@ static ssize_t enabled_store(struct kobject *kobj, { ssize_t ret = count; - if (!memcmp("always", buf, - min(sizeof("always")-1, count))) { + if (sysfs_streq(buf, "always")) { clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("madvise", buf, - min(sizeof("madvise")-1, count))) { + } else if (sysfs_streq(buf, "madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("never", buf, - min(sizeof("never")-1, count))) { + } else if (sysfs_streq(buf, "never")) { clear_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags); } else @@ -236,32 +233,27 @@ static ssize_t defrag_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { - if (!memcmp("always", buf, - min(sizeof("always")-1, count))) { + if (sysfs_streq(buf, "always")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("defer+madvise", buf, - min(sizeof("defer+madvise")-1, count))) { + } else if (sysfs_streq(buf, "defer+madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("defer", buf, - min(sizeof("defer")-1, count))) { + } else if (sysfs_streq(buf, "defer")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("madvise", buf, - min(sizeof("madvise")-1, count))) { + } else if (sysfs_streq(buf, "madvise")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); set_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags); - } else if (!memcmp("never", buf, - min(sizeof("never")-1, count))) { + } else if (sysfs_streq(buf, "never")) { clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags); clear_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_OR_MADV_FLAG, &transparent_hugepage_flags); @@ -502,13 +494,13 @@ void prep_transhuge_page(struct page *page) set_compound_page_dtor(page, TRANSHUGE_PAGE_DTOR); } -unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len, +static unsigned long __thp_get_unmapped_area(struct file *filp, + unsigned long addr, unsigned long len, loff_t off, unsigned long flags, unsigned long size) { - unsigned long addr; loff_t off_end = off + len; loff_t off_align = round_up(off, size); - unsigned long len_pad; + unsigned long len_pad, ret; if (off_end <= off_align || (off_end - off_align) < size) return 0; @@ -517,30 +509,40 @@ unsigned long __thp_get_unmapped_area(struct file *filp, unsigned long len, if (len_pad < len || (off + len_pad) < off) return 0; - addr = current->mm->get_unmapped_area(filp, 0, len_pad, + ret = current->mm->get_unmapped_area(filp, addr, len_pad, off >> PAGE_SHIFT, flags); - if (IS_ERR_VALUE(addr)) + + /* + * The failure might be due to length padding. The caller will retry + * without the padding. + */ + if (IS_ERR_VALUE(ret)) return 0; - addr += (off - addr) & (size - 1); - return addr; + /* + * Do not try to align to THP boundary if allocation at the address + * hint succeeds. + */ + if (ret == addr) + return addr; + + ret += (off - ret) & (size - 1); + return ret; } unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { + unsigned long ret; loff_t off = (loff_t)pgoff << PAGE_SHIFT; - if (addr) - goto out; if (!IS_DAX(filp->f_mapping->host) || !IS_ENABLED(CONFIG_FS_DAX_PMD)) goto out; - addr = __thp_get_unmapped_area(filp, len, off, flags, PMD_SIZE); - if (addr) - return addr; - - out: + ret = __thp_get_unmapped_area(filp, addr, len, off, flags, PMD_SIZE); + if (ret) + return ret; +out: return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); } EXPORT_SYMBOL_GPL(thp_get_unmapped_area); @@ -2552,7 +2554,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) unsigned long flags; pgoff_t end; - VM_BUG_ON_PAGE(is_huge_zero_page(page), page); + VM_BUG_ON_PAGE(is_huge_zero_page(head), head); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageCompound(page), page); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 698bfe4852eb..9e8c023b1168 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -1081,11 +1081,10 @@ static bool pfn_range_valid_gigantic(struct zone *z, struct page *page; for (i = start_pfn; i < end_pfn; i++) { - if (!pfn_valid(i)) + page = pfn_to_online_page(i); + if (!page) return false; - page = pfn_to_page(i); - if (page_zone(page) != z) return false; diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index eec1150125b9..e430e04997ee 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -196,7 +196,7 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, again: rcu_read_lock(); h_cg = hugetlb_cgroup_from_task(current); - if (!css_tryget_online(&h_cg->css)) { + if (!css_tryget(&h_cg->css)) { rcu_read_unlock(); goto again; } diff --git a/mm/internal.h b/mm/internal.h index 47f32cad37ff..bf8f41dc6e48 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -475,6 +475,16 @@ static inline void mminit_validate_memmodel_limits(unsigned long *start_pfn, #define NODE_RECLAIM_SOME 0 #define NODE_RECLAIM_SUCCESS 1 +#ifdef CONFIG_NUMA +extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int); +#else +static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, + unsigned int order) +{ + return NODE_RECLAIM_NOSCAN; +} +#endif + extern int hwpoison_filter(struct page *p); extern u32 hwpoison_filter_dev_major; diff --git a/mm/ksm.c b/mm/ksm.c index f50cc573815f..764486ffcd16 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -849,13 +849,13 @@ static int remove_stable_node(struct stable_node *stable_node) return 0; } - if (WARN_ON_ONCE(page_mapped(page))) { - /* - * This should not happen: but if it does, just refuse to let - * merge_across_nodes be switched - there is no need to panic. - */ - err = -EBUSY; - } else { + /* + * Page could be still mapped if this races with __mmput() running in + * between ksm_exit() and exit_mmap(). Just refuse to let + * merge_across_nodes/max_page_sharing be switched. + */ + err = -EBUSY; + if (!page_mapped(page)) { /* * The stable node did not yet appear stale to get_ksm_page(), * since that allows for an unmapped ksm page to be recognized diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9a9ad669977c..6e741913264b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -725,7 +725,7 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) if (unlikely(!memcg)) memcg = root_mem_cgroup; } - } while (!css_tryget_online(&memcg->css)); + } while (!css_tryget(&memcg->css)); rcu_read_unlock(); return memcg; } @@ -5902,19 +5902,9 @@ void mem_cgroup_sk_alloc(struct sock *sk) if (!mem_cgroup_sockets_enabled) return; - /* - * Socket cloning can throw us here with sk_memcg already - * filled. It won't however, necessarily happen from - * process context. So the test for root memcg given - * the current task's memcg won't help us in this case. - * - * Respecting the original socket's memcg is a better - * decision in this case. - */ - if (sk->sk_memcg) { - css_get(&sk->sk_memcg->css); + /* Do not associate the sock with unrelated interrupted task's memcg. */ + if (in_interrupt()) return; - } rcu_read_lock(); memcg = mem_cgroup_from_task(current); diff --git a/mm/memory.c b/mm/memory.c index f363e842bc7b..59a72238ed9f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2548,6 +2548,10 @@ static int do_page_mkwrite(struct vm_fault *vmf) vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; + if (vmf->vma->vm_file && + IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host)) + return VM_FAULT_SIGBUS; + ret = vmf->vma->vm_ops->page_mkwrite(vmf); /* Restore original flags so that caller is not surprised */ vmf->flags = old_flags; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index c9d3a49bd4e2..2d6626ab29d1 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -343,12 +343,8 @@ static unsigned long find_smallest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { - struct mem_section *ms; - for (; start_pfn < end_pfn; start_pfn += PAGES_PER_SECTION) { - ms = __pfn_to_section(start_pfn); - - if (unlikely(!valid_section(ms))) + if (unlikely(!pfn_to_online_page(start_pfn))) continue; if (unlikely(pfn_to_nid(start_pfn) != nid)) @@ -368,15 +364,12 @@ static unsigned long find_biggest_section_pfn(int nid, struct zone *zone, unsigned long start_pfn, unsigned long end_pfn) { - struct mem_section *ms; unsigned long pfn; /* pfn is the end pfn of a memory section. */ pfn = end_pfn - 1; for (; pfn >= start_pfn; pfn -= PAGES_PER_SECTION) { - ms = __pfn_to_section(pfn); - - if (unlikely(!valid_section(ms))) + if (unlikely(!pfn_to_online_page(pfn))) continue; if (unlikely(pfn_to_nid(pfn) != nid)) @@ -398,7 +391,6 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */ unsigned long zone_end_pfn = z; unsigned long pfn; - struct mem_section *ms; int nid = zone_to_nid(zone); zone_span_writelock(zone); @@ -436,9 +428,7 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, */ pfn = zone_start_pfn; for (; pfn < zone_end_pfn; pfn += PAGES_PER_SECTION) { - ms = __pfn_to_section(pfn); - - if (unlikely(!valid_section(ms))) + if (unlikely(!pfn_to_online_page(pfn))) continue; if (page_zone(pfn_to_page(pfn)) != zone) @@ -459,70 +449,33 @@ static void shrink_zone_span(struct zone *zone, unsigned long start_pfn, zone_span_writeunlock(zone); } -static void shrink_pgdat_span(struct pglist_data *pgdat, - unsigned long start_pfn, unsigned long end_pfn) +static void update_pgdat_span(struct pglist_data *pgdat) { - unsigned long pgdat_start_pfn = pgdat->node_start_pfn; - unsigned long p = pgdat_end_pfn(pgdat); /* pgdat_end_pfn namespace clash */ - unsigned long pgdat_end_pfn = p; - unsigned long pfn; - struct mem_section *ms; - int nid = pgdat->node_id; + unsigned long node_start_pfn = 0, node_end_pfn = 0; + struct zone *zone; - if (pgdat_start_pfn == start_pfn) { - /* - * If the section is smallest section in the pgdat, it need - * shrink pgdat->node_start_pfn and pgdat->node_spanned_pages. - * In this case, we find second smallest valid mem_section - * for shrinking zone. - */ - pfn = find_smallest_section_pfn(nid, NULL, end_pfn, - pgdat_end_pfn); - if (pfn) { - pgdat->node_start_pfn = pfn; - pgdat->node_spanned_pages = pgdat_end_pfn - pfn; + for (zone = pgdat->node_zones; + zone < pgdat->node_zones + MAX_NR_ZONES; zone++) { + unsigned long zone_end_pfn = zone->zone_start_pfn + + zone->spanned_pages; + + /* No need to lock the zones, they can't change. */ + if (!zone->spanned_pages) + continue; + if (!node_end_pfn) { + node_start_pfn = zone->zone_start_pfn; + node_end_pfn = zone_end_pfn; + continue; } - } else if (pgdat_end_pfn == end_pfn) { - /* - * If the section is biggest section in the pgdat, it need - * shrink pgdat->node_spanned_pages. - * In this case, we find second biggest valid mem_section for - * shrinking zone. - */ - pfn = find_biggest_section_pfn(nid, NULL, pgdat_start_pfn, - start_pfn); - if (pfn) - pgdat->node_spanned_pages = pfn - pgdat_start_pfn + 1; + + if (zone_end_pfn > node_end_pfn) + node_end_pfn = zone_end_pfn; + if (zone->zone_start_pfn < node_start_pfn) + node_start_pfn = zone->zone_start_pfn; } - /* - * If the section is not biggest or smallest mem_section in the pgdat, - * it only creates a hole in the pgdat. So in this case, we need not - * change the pgdat. - * But perhaps, the pgdat has only hole data. Thus it check the pgdat - * has only hole or not. - */ - pfn = pgdat_start_pfn; - for (; pfn < pgdat_end_pfn; pfn += PAGES_PER_SECTION) { - ms = __pfn_to_section(pfn); - - if (unlikely(!valid_section(ms))) - continue; - - if (pfn_to_nid(pfn) != nid) - continue; - - /* If the section is current section, it continues the loop */ - if (start_pfn == pfn) - continue; - - /* If we find valid section, we have nothing to do */ - return; - } - - /* The pgdat has no valid section */ - pgdat->node_start_pfn = 0; - pgdat->node_spanned_pages = 0; + pgdat->node_start_pfn = node_start_pfn; + pgdat->node_spanned_pages = node_end_pfn - node_start_pfn; } static void __remove_zone(struct zone *zone, unsigned long start_pfn) @@ -531,9 +484,19 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn) int nr_pages = PAGES_PER_SECTION; unsigned long flags; +#ifdef CONFIG_ZONE_DEVICE + /* + * Zone shrinking code cannot properly deal with ZONE_DEVICE. So + * we will not try to shrink the zones - which is okay as + * set_zone_contiguous() cannot deal with ZONE_DEVICE either way. + */ + if (zone_idx(zone) == ZONE_DEVICE) + return; +#endif + pgdat_resize_lock(zone->zone_pgdat, &flags); shrink_zone_span(zone, start_pfn, start_pfn + nr_pages); - shrink_pgdat_span(pgdat, start_pfn, start_pfn + nr_pages); + update_pgdat_span(pgdat); pgdat_resize_unlock(zone->zone_pgdat, &flags); } @@ -1110,7 +1073,12 @@ static int online_memory_block(struct memory_block *mem, void *arg) return device_online(&mem->dev); } -/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ +/* + * NOTE: The caller must call lock_device_hotplug() to serialize hotplug + * and online/offline operations (triggered e.g. by sysfs). + * + * we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG + */ int __ref add_memory_resource(int nid, struct resource *res, bool online) { u64 start, size; @@ -1203,9 +1171,9 @@ out: mem_hotplug_done(); return ret; } -EXPORT_SYMBOL_GPL(add_memory_resource); -int __ref add_memory(int nid, u64 start, u64 size) +/* requires device_hotplug_lock, see add_memory_resource() */ +int __ref __add_memory(int nid, u64 start, u64 size) { struct resource *res; int ret; @@ -1219,6 +1187,17 @@ int __ref add_memory(int nid, u64 start, u64 size) release_memory_resource(res); return ret; } + +int add_memory(int nid, u64 start, u64 size) +{ + int rc; + + lock_device_hotplug(); + rc = __add_memory(nid, start, size); + unlock_device_hotplug(); + + return rc; +} EXPORT_SYMBOL_GPL(add_memory); #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 123b88dc3091..6bc6c2f833a4 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2745,6 +2745,9 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) char *flags = strchr(str, '='); int err = 1; + if (flags) + *flags++ = '\0'; /* terminate mode string */ + if (nodelist) { /* NUL-terminate mode or flags string */ *nodelist++ = '\0'; @@ -2755,9 +2758,6 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) } else nodes_clear(nodes); - if (flags) - *flags++ = '\0'; /* terminate mode string */ - for (mode = 0; mode < MPOL_MAX; mode++) { if (!strcmp(str, policy_modes[mode])) { break; diff --git a/mm/mmap.c b/mm/mmap.c index a41e32d87e73..70195ec86fac 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -89,12 +89,6 @@ static void unmap_region(struct mm_struct *mm, * MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes * w: (no) no w: (no) no w: (copy) copy w: (no) no * x: (no) no x: (no) yes x: (no) yes x: (yes) yes - * - * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and - * MAP_PRIVATE: - * r: (no) no - * w: (no) no - * x: (yes) yes */ pgprot_t protection_map[16] __ro_after_init = { __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111, @@ -208,8 +202,6 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) bool populate; LIST_HEAD(uf); - brk = untagged_addr(brk); - if (down_write_killable(&mm->mmap_sem)) return -EINTR; @@ -1510,8 +1502,12 @@ unsigned long do_mmap(struct file *file, unsigned long addr, switch (flags & MAP_TYPE) { case MAP_SHARED: - if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) - return -EACCES; + if (prot & PROT_WRITE) { + if (!(file->f_mode & FMODE_WRITE)) + return -EACCES; + if (IS_SWAPFILE(file->f_mapping->host)) + return -ETXTBSY; + } /* * Make sure we don't allow writing to an append-only @@ -1600,8 +1596,6 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, struct file *file = NULL; unsigned long retval; - addr = untagged_addr(addr); - if (!(flags & MAP_ANONYMOUS)) { audit_mmap_fd(fd, flags); file = fget(fd); diff --git a/mm/mprotect.c b/mm/mprotect.c index 2fb38d9c7e12..3979aa727f89 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -148,6 +148,31 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, return pages; } +/* + * Used when setting automatic NUMA hinting protection where it is + * critical that a numa hinting PMD is not confused with a bad PMD. + */ +static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd) +{ + pmd_t pmdval = pmd_read_atomic(pmd); + + /* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + barrier(); +#endif + + if (pmd_none(pmdval)) + return 1; + if (pmd_trans_huge(pmdval)) + return 0; + if (unlikely(pmd_bad(pmdval))) { + pmd_clear_bad(pmd); + return 1; + } + + return 0; +} + static inline unsigned long change_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, pgprot_t newprot, int dirty_accountable, int prot_numa) @@ -164,8 +189,17 @@ static inline unsigned long change_pmd_range(struct vm_area_struct *vma, unsigned long this_pages; next = pmd_addr_end(addr, end); - if (!is_swap_pmd(*pmd) && !pmd_trans_huge(*pmd) && !pmd_devmap(*pmd) - && pmd_none_or_clear_bad(pmd)) + + /* + * Automatic NUMA balancing walks the tables with mmap_sem + * held for read. It's possible a parallel update to occur + * between pmd_trans_huge() and a pmd_none_or_clear_bad() + * check leading to a false positive and clearing. + * Hence, it's necessary to atomically read the PMD value + * for all the checks. + */ + if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) && + pmd_none_or_clear_bad_unless_trans_huge(pmd)) goto next; /* invoke the mmu notifier if the pmd is populated */ diff --git a/mm/mremap.c b/mm/mremap.c index 9737d473089d..d18f8429596f 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -543,7 +543,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, LIST_HEAD(uf_unmap); addr = untagged_addr(addr); - new_addr = untagged_addr(new_addr); if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) return ret; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d69fc6854153..aaf040dc6f4e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -202,11 +202,11 @@ static void wb_min_max_ratio(struct bdi_writeback *wb, if (this_bw < tot_bw) { if (min) { min *= this_bw; - do_div(min, tot_bw); + min = div64_ul(min, tot_bw); } if (max < 100) { max *= this_bw; - do_div(max, tot_bw); + max = div64_ul(max, tot_bw); } } @@ -2151,6 +2151,13 @@ EXPORT_SYMBOL(tag_pages_for_writeback); * not miss some pages (e.g., because some other process has cleared TOWRITE * tag we set). The rule we follow is that TOWRITE tag can be cleared only * by the process clearing the DIRTY tag (and submitting the page for IO). + * + * To avoid deadlocks between range_cyclic writeback and callers that hold + * pages in PageWriteback to aggregate IO until write_cache_pages() returns, + * we do not loop back to the start of the file. Doing so causes a page + * lock/page writeback access order inversion - we should only ever lock + * multiple pages in ascending page->index order, and looping back to the start + * of the file violates that rule and causes deadlocks. */ int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, @@ -2165,7 +2172,6 @@ int write_cache_pages(struct address_space *mapping, pgoff_t index; pgoff_t end; /* Inclusive */ pgoff_t done_index; - int cycled; int range_whole = 0; int tag; @@ -2173,23 +2179,17 @@ int write_cache_pages(struct address_space *mapping, if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; - if (index == 0) - cycled = 1; - else - cycled = 0; end = -1; } else { index = wbc->range_start >> PAGE_SHIFT; end = wbc->range_end >> PAGE_SHIFT; if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) range_whole = 1; - cycled = 1; /* ignore range_cyclic tests */ } if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag = PAGECACHE_TAG_TOWRITE; else tag = PAGECACHE_TAG_DIRTY; -retry: if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag_pages_for_writeback(mapping, index, end); done_index = index; @@ -2281,17 +2281,14 @@ continue_unlock: pagevec_release(&pvec); cond_resched(); } - if (!cycled && !done) { - /* - * range_cyclic: - * We hit the last page and there is more work to be done: wrap - * back to the start of the file - */ - cycled = 1; - index = 0; - end = writeback_index - 1; - goto retry; - } + + /* + * If we hit the last page and there is more work to be done: wrap + * back the index back to the start of the file for the next + * time we are called. + */ + if (wbc->range_cyclic && !done) + done_index = 0; if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index c2eca9fee330..a8b8c44a794c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4666,6 +4666,7 @@ long si_mem_available(void) unsigned long pagecache; unsigned long wmark_low = 0; unsigned long pages[NR_LRU_LISTS]; + unsigned long reclaimable; struct zone *zone; int lru; @@ -4691,19 +4692,13 @@ long si_mem_available(void) available += pagecache; /* - * Part of the reclaimable slab consists of items that are in use, - * and cannot be freed. Cap this estimate at the low watermark. + * Part of the reclaimable slab and other kernel memory consists of + * items that are in use, and cannot be freed. Cap this estimate at the + * low watermark. */ - available += global_node_page_state(NR_SLAB_RECLAIMABLE) - - min(global_node_page_state(NR_SLAB_RECLAIMABLE) / 2, - wmark_low); - - /* - * Part of the kernel memory, which can be released under memory - * pressure. - */ - available += global_node_page_state(NR_INDIRECTLY_RECLAIMABLE_BYTES) >> - PAGE_SHIFT; + reclaimable = global_node_page_state(NR_SLAB_RECLAIMABLE) + + global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE); + available += reclaimable - min(reclaimable / 2, wmark_low); if (available < 0) available = 0; @@ -4932,6 +4927,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) " managed:%lukB" " mlocked:%lukB" " kernel_stack:%lukB" +#ifdef CONFIG_SHADOW_CALL_STACK + " shadow_call_stack:%lukB" +#endif " pagetables:%lukB" " bounce:%lukB" " free_pcp:%lukB" @@ -4953,6 +4951,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask) K(zone->managed_pages), K(zone_page_state(zone, NR_MLOCK)), zone_page_state(zone, NR_KERNEL_STACK_KB), +#ifdef CONFIG_SHADOW_CALL_STACK + zone_page_state(zone, NR_KERNEL_SCS_BYTES) / 1024, +#endif K(zone_page_state(zone, NR_PAGETABLE)), K(zone_page_state(zone, NR_BOUNCE)), K(free_pcp), diff --git a/mm/page_io.c b/mm/page_io.c index f6e1c0789695..ced871419c2d 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -363,10 +364,19 @@ int swap_readpage(struct page *page, bool do_poll) struct swap_info_struct *sis = page_swap_info(page); blk_qc_t qc; struct gendisk *disk; + unsigned long pflags; VM_BUG_ON_PAGE(!PageSwapCache(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageUptodate(page), page); + + /* + * Count submission time as memory stall. When the device is congested, + * or the submitting cgroup IO-throttled, submission can be a + * significant part of overall IO time. + */ + psi_memstall_enter(&pflags); + if (frontswap_load(page) == 0) { SetPageUptodate(page); unlock_page(page); @@ -384,7 +394,7 @@ int swap_readpage(struct page *page, bool do_poll) #endif count_vm_event(PSWPIN); } - return ret; + goto out; } ret = bdev_read_page(sis->bdev, swap_page_sector(page), page); @@ -397,7 +407,7 @@ int swap_readpage(struct page *page, bool do_poll) current->swap_in++; #endif count_vm_event(PSWPIN); - return 0; + goto out; } ret = 0; @@ -433,6 +443,7 @@ int swap_readpage(struct page *page, bool do_poll) bio_put(bio); out: + psi_memstall_leave(&pflags); return ret; } diff --git a/mm/page_owner.c b/mm/page_owner.c index c7bd67dc107a..f98e891b60ec 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -274,7 +274,8 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, * not matter as the mixed block count will still be correct */ for (; pfn < end_pfn; ) { - if (!pfn_valid(pfn)) { + page = pfn_to_online_page(pfn); + if (!page) { pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES); continue; } @@ -282,13 +283,13 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m, block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages); block_end_pfn = min(block_end_pfn, end_pfn); - page = pfn_to_page(pfn); pageblock_mt = get_pageblock_migratetype(page); for (; pfn < block_end_pfn; pfn++) { if (!pfn_valid_within(pfn)) continue; + /* The pageblock is online, no need to recheck. */ page = pfn_to_page(pfn); if (page_zone(page) != zone) diff --git a/mm/shmem.c b/mm/shmem.c index 70851a0ad121..f78ef0be8699 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2052,9 +2052,10 @@ unsigned long shmem_get_unmapped_area(struct file *file, /* * Our priority is to support MAP_SHARED mapped hugely; * and support MAP_PRIVATE mapped hugely too, until it is COWed. - * But if caller specified an address hint, respect that as before. + * But if caller specified an address hint and we allocated area there + * successfully, respect that as before. */ - if (uaddr) + if (uaddr == addr) return addr; if (shmem_huge != SHMEM_HUGE_FORCE) { @@ -2088,7 +2089,7 @@ unsigned long shmem_get_unmapped_area(struct file *file, if (inflated_len < len) return addr; - inflated_addr = get_area(NULL, 0, inflated_len, 0, flags); + inflated_addr = get_area(NULL, uaddr, inflated_len, 0, flags); if (IS_ERR_VALUE(inflated_addr)) return addr; if (inflated_addr & ~PAGE_MASK) @@ -2677,31 +2678,33 @@ static void shmem_tag_pins(struct address_space *mapping) void **slot; pgoff_t start; struct page *page; + unsigned int tagged = 0; lru_add_drain(); start = 0; - rcu_read_lock(); + spin_lock_irq(&mapping->tree_lock); radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) { - page = radix_tree_deref_slot(slot); + page = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); if (!page || radix_tree_exception(page)) { if (radix_tree_deref_retry(page)) { slot = radix_tree_iter_retry(&iter); continue; } } else if (page_count(page) - page_mapcount(page) > 1) { - spin_lock_irq(&mapping->tree_lock); radix_tree_tag_set(&mapping->page_tree, iter.index, SHMEM_TAG_PINNED); - spin_unlock_irq(&mapping->tree_lock); } - if (need_resched()) { - slot = radix_tree_iter_resume(slot, &iter); - cond_resched_rcu(); - } + if (++tagged % 1024) + continue; + + slot = radix_tree_iter_resume(slot, &iter); + spin_unlock_irq(&mapping->tree_lock); + cond_resched(); + spin_lock_irq(&mapping->tree_lock); } - rcu_read_unlock(); + spin_unlock_irq(&mapping->tree_lock); } /* @@ -2914,7 +2917,7 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, } shmem_falloc.waitq = &shmem_falloc_waitq; - shmem_falloc.start = unmap_start >> PAGE_SHIFT; + shmem_falloc.start = (u64)unmap_start >> PAGE_SHIFT; shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; spin_lock(&inode->i_lock); inode->i_private = &shmem_falloc; diff --git a/mm/slab.c b/mm/slab.c index e8b1e6ea211f..5dd23f6fd451 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1280,7 +1280,7 @@ void __init kmem_cache_init(void) * Initialize the caches that provide memory for the kmem_cache_node * structures first. Without this, further allocations will bug. */ - kmalloc_caches[INDEX_NODE] = create_kmalloc_cache( + kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE] = create_kmalloc_cache( kmalloc_info[INDEX_NODE].name, kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS); slab_state = PARTIAL_NODE; @@ -1295,7 +1295,7 @@ void __init kmem_cache_init(void) for_each_online_node(nid) { init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid); - init_list(kmalloc_caches[INDEX_NODE], + init_list(kmalloc_caches[KMALLOC_NORMAL][INDEX_NODE], &init_kmem_cache_node[SIZE_NODE + nid], nid); } } diff --git a/mm/slab_common.c b/mm/slab_common.c index c70bd327329b..20da89561fd2 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -917,14 +917,10 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, return s; } -struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; +struct kmem_cache * +kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1] __ro_after_init; EXPORT_SYMBOL(kmalloc_caches); -#ifdef CONFIG_ZONE_DMA -struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; -EXPORT_SYMBOL(kmalloc_dma_caches); -#endif - /* * Conversion table for small slabs sizes / 8 to the index in the * kmalloc array. This is necessary for slabs < 192 since we have non power @@ -984,12 +980,7 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) index = fls(size - 1); } -#ifdef CONFIG_ZONE_DMA - if (unlikely((flags & GFP_DMA))) - return kmalloc_dma_caches[index]; - -#endif - return kmalloc_caches[index]; + return kmalloc_caches[kmalloc_type(flags)][index]; } /* @@ -1003,15 +994,15 @@ const struct kmalloc_info_struct kmalloc_info[] __initconst = { {"kmalloc-16", 16}, {"kmalloc-32", 32}, {"kmalloc-64", 64}, {"kmalloc-128", 128}, {"kmalloc-256", 256}, {"kmalloc-512", 512}, - {"kmalloc-1024", 1024}, {"kmalloc-2048", 2048}, - {"kmalloc-4096", 4096}, {"kmalloc-8192", 8192}, - {"kmalloc-16384", 16384}, {"kmalloc-32768", 32768}, - {"kmalloc-65536", 65536}, {"kmalloc-131072", 131072}, - {"kmalloc-262144", 262144}, {"kmalloc-524288", 524288}, - {"kmalloc-1048576", 1048576}, {"kmalloc-2097152", 2097152}, - {"kmalloc-4194304", 4194304}, {"kmalloc-8388608", 8388608}, - {"kmalloc-16777216", 16777216}, {"kmalloc-33554432", 33554432}, - {"kmalloc-67108864", 67108864} + {"kmalloc-1k", 1024}, {"kmalloc-2k", 2048}, + {"kmalloc-4k", 4096}, {"kmalloc-8k", 8192}, + {"kmalloc-16k", 16384}, {"kmalloc-32k", 32768}, + {"kmalloc-64k", 65536}, {"kmalloc-128k", 131072}, + {"kmalloc-256k", 262144}, {"kmalloc-512k", 524288}, + {"kmalloc-1M", 1048576}, {"kmalloc-2M", 2097152}, + {"kmalloc-4M", 4194304}, {"kmalloc-8M", 8388608}, + {"kmalloc-16M", 16777216}, {"kmalloc-32M", 33554432}, + {"kmalloc-64M", 67108864} }; /* @@ -1061,9 +1052,36 @@ void __init setup_kmalloc_cache_index_table(void) } } -static void __init new_kmalloc_cache(int idx, unsigned long flags) +static const char * +kmalloc_cache_name(const char *prefix, unsigned int size) { - kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name, + + static const char units[3] = "\0kM"; + int idx = 0; + + while (size >= 1024 && (size % 1024 == 0)) { + size /= 1024; + idx++; + } + + return kasprintf(GFP_NOWAIT, "%s-%u%c", prefix, size, units[idx]); +} + +static void __init +new_kmalloc_cache(int idx, int type, unsigned long flags) +{ + const char *name; + + if (type == KMALLOC_RECLAIM) { + flags |= SLAB_RECLAIM_ACCOUNT; + name = kmalloc_cache_name("kmalloc-rcl", + kmalloc_info[idx].size); + BUG_ON(!name); + } else { + name = kmalloc_info[idx].name; + } + + kmalloc_caches[type][idx] = create_kmalloc_cache(name, kmalloc_info[idx].size, flags); } @@ -1074,21 +1092,25 @@ static void __init new_kmalloc_cache(int idx, unsigned long flags) */ void __init create_kmalloc_caches(unsigned long flags) { - int i; + int i, type; - for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { - if (!kmalloc_caches[i]) - new_kmalloc_cache(i, flags); + for (type = KMALLOC_NORMAL; type <= KMALLOC_RECLAIM; type++) { + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { + if (!kmalloc_caches[type][i]) + new_kmalloc_cache(i, type, flags); - /* - * Caches that are not of the two-to-the-power-of size. - * These have to be created immediately after the - * earlier power of two caches - */ - if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6) - new_kmalloc_cache(1, flags); - if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7) - new_kmalloc_cache(2, flags); + /* + * Caches that are not of the two-to-the-power-of size. + * These have to be created immediately after the + * earlier power of two caches + */ + if (KMALLOC_MIN_SIZE <= 32 && i == 6 && + !kmalloc_caches[type][1]) + new_kmalloc_cache(1, type, flags); + if (KMALLOC_MIN_SIZE <= 64 && i == 7 && + !kmalloc_caches[type][2]) + new_kmalloc_cache(2, type, flags); + } } /* Kmalloc array is now usable */ @@ -1096,16 +1118,15 @@ void __init create_kmalloc_caches(unsigned long flags) #ifdef CONFIG_ZONE_DMA for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { - struct kmem_cache *s = kmalloc_caches[i]; + struct kmem_cache *s = kmalloc_caches[KMALLOC_NORMAL][i]; if (s) { int size = kmalloc_size(i); - char *n = kasprintf(GFP_NOWAIT, - "dma-kmalloc-%d", size); + const char *n = kmalloc_cache_name("dma-kmalloc", size); BUG_ON(!n); - kmalloc_dma_caches[i] = create_kmalloc_cache(n, - size, SLAB_CACHE_DMA | flags); + kmalloc_caches[KMALLOC_DMA][i] = create_kmalloc_cache( + n, size, SLAB_CACHE_DMA | flags); } } #endif diff --git a/mm/slub.c b/mm/slub.c index 9795e89ff572..cf3d86ad8c7b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1419,12 +1419,15 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, void *old_tail = *tail ? *tail : *head; int rsize; - if (slab_want_init_on_free(s)) { - void *p = NULL; + /* Head and tail of the reconstructed freelist */ + *head = NULL; + *tail = NULL; - do { - object = next; - next = get_freepointer(s, object); + do { + object = next; + next = get_freepointer(s, object); + + if (slab_want_init_on_free(s)) { /* * Clear the object and the metadata, but don't touch * the redzone. @@ -1434,29 +1437,8 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, : 0; memset((char *)object + s->inuse, 0, s->size - s->inuse - rsize); - set_freepointer(s, object, p); - p = object; - } while (object != old_tail); - } -/* - * Compiler cannot detect this function can be removed if slab_free_hook() - * evaluates to nothing. Thus, catch all relevant config debug options here. - */ -#if defined(CONFIG_LOCKDEP) || \ - defined(CONFIG_DEBUG_KMEMLEAK) || \ - defined(CONFIG_DEBUG_OBJECTS_FREE) || \ - defined(CONFIG_KASAN) - - next = *head; - - /* Head and tail of the reconstructed freelist */ - *head = NULL; - *tail = NULL; - - do { - object = next; - next = get_freepointer(s, object); + } /* If object's reuse doesn't have to be delayed */ if (!slab_free_hook(s, object)) { /* Move object to the new freelist */ @@ -1471,9 +1453,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, *tail = NULL; return *head != NULL; -#else - return true; -#endif } static void *setup_object(struct kmem_cache *s, struct page *page, @@ -2694,6 +2673,17 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, return p; } +/* + * If the object has been wiped upon free, make sure it's fully initialized by + * zeroing out freelist pointer. + */ +static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, + void *obj) +{ + if (unlikely(slab_want_init_on_free(s)) && obj) + memset((void *)((char *)obj + s->offset), 0, sizeof(void *)); +} + /* * Inlined fastpath so that allocation functions (kmalloc, kmem_cache_alloc) * have the fastpath folded into their functions. So no function call @@ -2782,12 +2772,8 @@ redo: prefetch_freepointer(s, next_object); stat(s, ALLOC_FASTPATH); } - /* - * If the object has been wiped upon free, make sure it's fully - * initialized by zeroing out freelist pointer. - */ - if (unlikely(slab_want_init_on_free(s)) && object) - memset(object + s->offset, 0, sizeof(void *)); + + maybe_wipe_obj_freeptr(s, object); if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) memset(object, 0, s->object_size); @@ -3192,6 +3178,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, void *object = c->freelist; if (unlikely(!object)) { + /* + * We may have removed an object from c->freelist using + * the fastpath in the previous iteration; in that case, + * c->tid has not been bumped yet. + * Since ___slab_alloc() may reenable interrupts while + * allocating memory, we should bump c->tid now. + */ + c->tid = next_tid(c->tid); + /* * Invoking slow path likely have side-effect * of re-populating per CPU c->freelist @@ -3202,10 +3197,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, goto error; c = this_cpu_ptr(s->cpu_slab); + maybe_wipe_obj_freeptr(s, p[i]); + continue; /* goto for-loop */ } c->freelist = get_freepointer(s, object); p[i] = object; + maybe_wipe_obj_freeptr(s, p[i]); } c->tid = next_tid(c->tid); local_irq_enable(); @@ -4735,6 +4733,7 @@ static int list_locations(struct kmem_cache *s, char *buf, static void __init resiliency_test(void) { u8 *p; + int type = KMALLOC_NORMAL; BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10); @@ -4747,7 +4746,7 @@ static void __init resiliency_test(void) pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n", p + 16); - validate_slab_cache(kmalloc_caches[4]); + validate_slab_cache(kmalloc_caches[type][4]); /* Hmmm... The next two are dangerous */ p = kzalloc(32, GFP_KERNEL); @@ -4756,33 +4755,33 @@ static void __init resiliency_test(void) p); pr_err("If allocated object is overwritten then not detectable\n\n"); - validate_slab_cache(kmalloc_caches[5]); + validate_slab_cache(kmalloc_caches[type][5]); p = kzalloc(64, GFP_KERNEL); p += 64 + (get_cycles() & 0xff) * sizeof(void *); *p = 0x56; pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", p); pr_err("If allocated object is overwritten then not detectable\n\n"); - validate_slab_cache(kmalloc_caches[6]); + validate_slab_cache(kmalloc_caches[type][6]); pr_err("\nB. Corruption after free\n"); p = kzalloc(128, GFP_KERNEL); kfree(p); *p = 0x78; pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches[7]); + validate_slab_cache(kmalloc_caches[type][7]); p = kzalloc(256, GFP_KERNEL); kfree(p); p[50] = 0x9a; pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches[8]); + validate_slab_cache(kmalloc_caches[type][8]); p = kzalloc(512, GFP_KERNEL); kfree(p); p[512] = 0xab; pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); - validate_slab_cache(kmalloc_caches[9]); + validate_slab_cache(kmalloc_caches[type][9]); } #else #ifdef CONFIG_SYSFS @@ -4872,7 +4871,17 @@ static ssize_t show_slab_objects(struct kmem_cache *s, } } - get_online_mems(); + /* + * It is impossible to take "mem_hotplug_lock" here with "kernfs_mutex" + * already held which will conflict with an existing lock order: + * + * mem_hotplug_lock->slab_mutex->kernfs_mutex + * + * We don't really need mem_hotplug_lock (to hold off + * slab_mem_going_offline_callback) here because slab's memory hot + * unplug code doesn't destroy the kmem_cache->node[] data. + */ + #ifdef CONFIG_SLUB_DEBUG if (flags & SO_ALL) { struct kmem_cache_node *n; @@ -4913,7 +4922,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s, x += sprintf(buf + x, " N%d=%lu", node, nodes[node]); #endif - put_online_mems(); kfree(nodes); return x + sprintf(buf + x, "\n"); } diff --git a/mm/swapfile.c b/mm/swapfile.c index 4f9e522643a2..4cc380d5712b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2375,6 +2375,7 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, list_add_tail(&new_se->list, &sis->first_swap_extent.list); return 1; } +EXPORT_SYMBOL_GPL(add_swap_extent); /* * A `swap extent' is a simple thing which maps a contiguous range of pages @@ -2396,9 +2397,8 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, * requirements, they are simply tossed out - we will never use those blocks * for swapping. * - * For S_ISREG swapfiles we set S_SWAPFILE across the life of the swapon. This - * prevents root from shooting her foot off by ftruncating an in-use swapfile, - * which will scribble on the fs. + * For all swap devices we set S_SWAPFILE across the life of the swapon. This + * prevents users from writing to the swap device, which will corrupt memory. * * The amount of disk space which a single swap extent represents varies. * Typically it is in the 1-4 megabyte range. So we can have hundreds of @@ -2661,13 +2661,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) inode = mapping->host; if (S_ISBLK(inode->i_mode)) { struct block_device *bdev = I_BDEV(inode); + set_blocksize(bdev, old_block_size); blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); - } else { - inode_lock(inode); - inode->i_flags &= ~S_SWAPFILE; - inode_unlock(inode); } + + inode_lock(inode); + inode->i_flags &= ~S_SWAPFILE; + inode_unlock(inode); filp_close(swap_file, NULL); /* @@ -2895,11 +2896,11 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) p->flags |= SWP_BLKDEV; } else if (S_ISREG(inode->i_mode)) { p->bdev = inode->i_sb->s_bdev; - inode_lock(inode); - if (IS_SWAPFILE(inode)) - return -EBUSY; - } else - return -EINVAL; + } + + inode_lock(inode); + if (IS_SWAPFILE(inode)) + return -EBUSY; return 0; } @@ -3273,6 +3274,17 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (error) goto bad_swap; + /* + * Flush any pending IO and dirty mappings before we start using this + * swap device. + */ + inode->i_flags |= S_SWAPFILE; + error = inode_drain_writes(inode); + if (error) { + inode->i_flags &= ~S_SWAPFILE; + goto bad_swap; + } + mutex_lock(&swapon_mutex); prio = -1; if (swap_flags & SWAP_FLAG_PREFER) @@ -3293,8 +3305,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait); - if (S_ISREG(inode->i_mode)) - inode->i_flags |= S_SWAPFILE; error = 0; goto out; bad_swap: @@ -3314,7 +3324,7 @@ bad_swap: kvfree(cluster_info); kvfree(frontswap_map); if (swap_file) { - if (inode && S_ISREG(inode->i_mode)) { + if (inode) { inode_unlock(inode); inode = NULL; } @@ -3327,7 +3337,7 @@ out: } if (name) putname(name); - if (inode && S_ISREG(inode->i_mode)) + if (inode) inode_unlock(inode); if (!error) enable_swap_slots_cache(); diff --git a/mm/util.c b/mm/util.c index 842ba5fb662e..b8e849125ecc 100644 --- a/mm/util.c +++ b/mm/util.c @@ -367,7 +367,8 @@ EXPORT_SYMBOL(vm_mmap); * __GFP_RETRY_MAYFAIL is supported, and it should be used only if kmalloc is * preferable to the vmalloc fallback, due to visible performance drawbacks. * - * Any use of gfp flags outside of GFP_KERNEL should be consulted with mm people. + * Please note that any use of gfp flags outside of GFP_KERNEL is careful to not + * fall back to vmalloc. */ void *kvmalloc_node(size_t size, gfp_t flags, int node) { @@ -378,7 +379,8 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) * vmalloc uses GFP_KERNEL for some internal allocations (e.g page tables) * so the given set of flags has to be compatible. */ - WARN_ON_ONCE((flags & GFP_KERNEL) != GFP_KERNEL); + if ((flags & GFP_KERNEL) != GFP_KERNEL) + return kmalloc_node(size, flags, node); /* * We want to attempt a large physically contiguous block first because @@ -639,8 +641,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) * Part of the kernel memory, which can be released * under memory pressure. */ - free += global_node_page_state( - NR_INDIRECTLY_RECLAIMABLE_BYTES) >> PAGE_SHIFT; + free += global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE); /* * Leave reserved pages. The pages are not for anonymous pages. diff --git a/mm/vmscan.c b/mm/vmscan.c index 830191d43354..146c63c44de9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2480,10 +2480,13 @@ out: /* * Scan types proportional to swappiness and * their relative recent reclaim efficiency. - * Make sure we don't miss the last page - * because of a round-off error. + * Make sure we don't miss the last page on + * the offlined memory cgroups because of a + * round-off error. */ - scan = DIV64_U64_ROUND_UP(scan * fraction[file], + scan = mem_cgroup_online(memcg) ? + div64_u64(scan * fraction[file], denominator) : + DIV64_U64_ROUND_UP(scan * fraction[file], denominator); break; case SCAN_FILE: diff --git a/mm/vmstat.c b/mm/vmstat.c index dda6a7c36e07..5d60abdf23b5 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1046,6 +1046,9 @@ const char * const vmstat_text[] = { "nr_mlock", "nr_page_table_pages", "nr_kernel_stack", +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK) + "nr_shadow_call_stack_bytes", +#endif "nr_bounce", #if IS_ENABLED(CONFIG_ZSMALLOC) "nr_zspages", @@ -1091,7 +1094,7 @@ const char * const vmstat_text[] = { "nr_vmscan_immediate_reclaim", "nr_dirtied", "nr_written", - "", /* nr_indirectly_reclaimable */ + "nr_kernel_misc_reclaimable", /* enum writeback_stat_item counters */ "nr_dirty_threshold", @@ -1683,10 +1686,6 @@ static int vmstat_show(struct seq_file *m, void *arg) unsigned long *l = arg; unsigned long off = l - (unsigned long *)m->private; - /* Skip hidden vmstat items. */ - if (*vmstat_text[off] == '\0') - return 0; - seq_puts(m, vmstat_text[off]); seq_put_decimal_ull(m, " ", *l); seq_putc(m, '\n'); @@ -1816,12 +1815,13 @@ static bool need_update(int cpu) /* * The fast way of checking if there are any vmstat diffs. - * This works because the diffs are byte sized items. */ - if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS)) + if (memchr_inv(p->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS * + sizeof(p->vm_stat_diff[0]))) return true; #ifdef CONFIG_NUMA - if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS)) + if (memchr_inv(p->vm_numa_stat_diff, 0, NR_VM_NUMA_STAT_ITEMS * + sizeof(p->vm_numa_stat_diff[0]))) return true; #endif } @@ -1962,7 +1962,7 @@ void __init init_mm_internals(void) #endif #ifdef CONFIG_PROC_FS proc_create("buddyinfo", 0444, NULL, &buddyinfo_file_operations); - proc_create("pagetypeinfo", 0444, NULL, &pagetypeinfo_file_operations); + proc_create("pagetypeinfo", 0400, NULL, &pagetypeinfo_file_operations); proc_create("vmstat", 0444, NULL, &vmstat_file_operations); proc_create("zoneinfo", 0444, NULL, &zoneinfo_file_operations); #endif diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 7dbe2d214d24..801afc0b8c36 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2108,6 +2108,11 @@ int zs_page_migrate(struct address_space *mapping, struct page *newpage, zs_pool_dec_isolated(pool); } + if (page_zone(newpage) != page_zone(page)) { + dec_zone_page_state(page, NR_ZSPAGES); + inc_zone_page_state(newpage, NR_ZSPAGES); + } + reset_page(page); put_page(page); page = newpage; diff --git a/net/6lowpan/nhc.c b/net/6lowpan/nhc.c index 4fa2fdda174d..9e56fb98f33c 100644 --- a/net/6lowpan/nhc.c +++ b/net/6lowpan/nhc.c @@ -18,7 +18,7 @@ #include "nhc.h" static struct rb_root rb_root = RB_ROOT; -static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX]; +static struct lowpan_nhc *lowpan_nexthdr_nhcs[NEXTHDR_MAX + 1]; static DEFINE_SPINLOCK(lowpan_nhc_lock); static int lowpan_nhc_insert(struct lowpan_nhc *nhc) diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 0e7afdf86127..235bed825e3a 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -110,6 +110,7 @@ int vlan_check_real_dev(struct net_device *real_dev, void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); +void vlan_dev_uninit(struct net_device *dev); bool vlan_dev_inherit_address(struct net_device *dev, struct net_device *real_dev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ac4c93c999b0..ed3717dc2d20 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -610,7 +610,8 @@ static int vlan_dev_init(struct net_device *dev) return 0; } -static void vlan_dev_uninit(struct net_device *dev) +/* Note: this function might be called multiple times for the same device. */ +void vlan_dev_uninit(struct net_device *dev) { struct vlan_priority_tci_mapping *pm; struct vlan_dev_priv *vlan = vlan_dev_priv(dev); diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 5e831de3103e..fdf39dd5e755 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -95,11 +95,13 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], struct ifla_vlan_flags *flags; struct ifla_vlan_qos_mapping *m; struct nlattr *attr; - int rem; + int rem, err; if (data[IFLA_VLAN_FLAGS]) { flags = nla_data(data[IFLA_VLAN_FLAGS]); - vlan_dev_change_flags(dev, flags->flags, flags->mask); + err = vlan_dev_change_flags(dev, flags->flags, flags->mask); + if (err) + return err; } if (data[IFLA_VLAN_INGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) { @@ -110,7 +112,9 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[], if (data[IFLA_VLAN_EGRESS_QOS]) { nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) { m = nla_data(attr); - vlan_dev_set_egress_priority(dev, m->from, m->to); + err = vlan_dev_set_egress_priority(dev, m->from, m->to); + if (err) + return err; } } return 0; @@ -157,10 +161,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; err = vlan_changelink(dev, tb, data, extack); - if (err < 0) - return err; - - return register_vlan_dev(dev); + if (!err) + err = register_vlan_dev(dev); + if (err) + vlan_dev_uninit(dev); + return err; } static inline size_t vlan_qos_map_size(unsigned int n) diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 8ad3ec2610b6..b9e85a4751a6 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -879,15 +879,24 @@ static struct notifier_block aarp_notifier = { static unsigned char aarp_snap_id[] = { 0x00, 0x00, 0x00, 0x80, 0xF3 }; -void __init aarp_proto_init(void) +int __init aarp_proto_init(void) { + int rc; + aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv); - if (!aarp_dl) + if (!aarp_dl) { printk(KERN_CRIT "Unable to register AARP with SNAP.\n"); + return -ENOMEM; + } setup_timer(&aarp_timer, aarp_expire_timeout, 0); aarp_timer.expires = jiffies + sysctl_aarp_expiry_time; add_timer(&aarp_timer); - register_netdevice_notifier(&aarp_notifier); + rc = register_netdevice_notifier(&aarp_notifier); + if (rc) { + del_timer_sync(&aarp_timer); + unregister_snap_client(aarp_dl); + } + return rc; } /* Remove the AARP entries associated with a device. */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 475f332b1ad2..b4268bd2e557 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1911,9 +1911,6 @@ static unsigned char ddp_snap_id[] = { 0x08, 0x00, 0x07, 0x80, 0x9B }; EXPORT_SYMBOL(atrtr_get_dev); EXPORT_SYMBOL(atalk_find_dev_addr); -static const char atalk_err_snap[] __initconst = - KERN_CRIT "Unable to register DDP with SNAP.\n"; - /* Called by proto.c on kernel start up */ static int __init atalk_init(void) { @@ -1928,17 +1925,23 @@ static int __init atalk_init(void) goto out_proto; ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); - if (!ddp_dl) - printk(atalk_err_snap); + if (!ddp_dl) { + pr_crit("Unable to register DDP with SNAP.\n"); + rc = -ENOMEM; + goto out_sock; + } dev_add_pack(<alk_packet_type); dev_add_pack(&ppptalk_packet_type); rc = register_netdevice_notifier(&ddp_notifier); if (rc) - goto out_sock; + goto out_snap; + + rc = aarp_proto_init(); + if (rc) + goto out_dev; - aarp_proto_init(); rc = atalk_proc_init(); if (rc) goto out_aarp; @@ -1952,11 +1955,13 @@ out_proc: atalk_proc_exit(); out_aarp: aarp_cleanup_module(); +out_dev: unregister_netdevice_notifier(&ddp_notifier); -out_sock: +out_snap: dev_remove_pack(&ppptalk_packet_type); dev_remove_pack(<alk_packet_type); unregister_snap_client(ddp_dl); +out_sock: sock_unregister(PF_APPLETALK); out_proto: proto_unregister(&ddp_proto); diff --git a/net/atm/common.c b/net/atm/common.c index 9e812c782a37..0fd2d26d4c6e 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -667,7 +667,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) mask |= POLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* writable? */ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 8b3f9441b3a0..7a723e124dbb 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -149,7 +150,7 @@ static void batadv_iv_ogm_orig_free(struct batadv_orig_node *orig_node) * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_add_if(struct batadv_orig_node *orig_node, - int max_if_num) + unsigned int max_if_num) { void *data_ptr; size_t old_size; @@ -193,7 +194,8 @@ unlock: */ static void batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { size_t chunk_size; size_t if_offset; @@ -231,7 +233,8 @@ batadv_iv_ogm_drop_bcast_own_entry(struct batadv_orig_node *orig_node, */ static void batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { size_t if_offset; void *data_ptr; @@ -268,7 +271,8 @@ batadv_iv_ogm_drop_bcast_own_sum_entry(struct batadv_orig_node *orig_node, * Return: 0 on success, a negative error code otherwise. */ static int batadv_iv_ogm_orig_del_if(struct batadv_orig_node *orig_node, - int max_if_num, int del_if_num) + unsigned int max_if_num, + unsigned int del_if_num) { spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); @@ -302,7 +306,8 @@ static struct batadv_orig_node * batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr) { struct batadv_orig_node *orig_node; - int size, hash_added; + int hash_added; + size_t size; orig_node = batadv_orig_hash_find(bat_priv, addr); if (orig_node) @@ -366,14 +371,18 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) unsigned char *ogm_buff; u32 random_seqno; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno); hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN; ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC); - if (!ogm_buff) + if (!ogm_buff) { + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); return -ENOMEM; + } hard_iface->bat_iv.ogm_buff = ogm_buff; @@ -385,35 +394,59 @@ static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) batadv_ogm_packet->reserved = 0; batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE; + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); + return 0; } static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) { + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + kfree(hard_iface->bat_iv.ogm_buff); hard_iface->bat_iv.ogm_buff = NULL; + + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; + void *ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + + ogm_buff = hard_iface->bat_iv.ogm_buff; + if (!ogm_buff) + goto unlock; + + batadv_ogm_packet = ogm_buff; ether_addr_copy(batadv_ogm_packet->orig, hard_iface->net_dev->dev_addr); ether_addr_copy(batadv_ogm_packet->prev_sender, hard_iface->net_dev->dev_addr); + +unlock: + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; - unsigned char *ogm_buff = hard_iface->bat_iv.ogm_buff; + void *ogm_buff; - batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + + ogm_buff = hard_iface->bat_iv.ogm_buff; + if (!ogm_buff) + goto unlock; + + batadv_ogm_packet = ogm_buff; batadv_ogm_packet->ttl = BATADV_TTL; + +unlock: + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } /* when do we schedule our own ogm to be sent */ @@ -890,7 +923,7 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) u32 i; size_t word_index; u8 *w; - int if_num; + unsigned int if_num; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -911,7 +944,11 @@ batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) } } -static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) +/** + * batadv_iv_ogm_schedule_buff() - schedule submission of hardif ogm buffer + * @hard_iface: interface whose ogm buffer should be transmitted + */ +static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff; @@ -922,8 +959,10 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) u16 tvlv_len = 0; unsigned long send_time; - if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || - (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) + lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); + + /* interface already disabled by batadv_iv_ogm_iface_disable */ + if (!*ogm_buff) return; /* the interface gets activated here to avoid race conditions between @@ -992,6 +1031,17 @@ out: batadv_hardif_put(primary_if); } +static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) +{ + if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || + hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) + return; + + mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); + batadv_iv_ogm_schedule_buff(hard_iface); + mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); +} + /** * batadv_iv_ogm_orig_update - use OGM to update corresponding data in an * originator @@ -1020,7 +1070,7 @@ batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_neigh_node *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; struct batadv_orig_node *orig_node_tmp; - int if_num; + unsigned int if_num; u8 sum_orig, sum_neigh; u8 *neigh_addr; u8 tq_avg; @@ -1179,7 +1229,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, u8 total_count; u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; - int if_num; + unsigned int if_num; unsigned int tq_asym_penalty, inv_asym_penalty; unsigned int combined_tq; unsigned int tq_iface_penalty; @@ -1220,7 +1270,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, orig_node->last_seen = jiffies; /* find packet count of corresponding one hop neighbor */ - spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); + spin_lock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); if_num = if_incoming->if_num; orig_eq_count = orig_neigh_node->bat_iv.bcast_own_sum[if_num]; neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); @@ -1230,7 +1280,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, } else { neigh_rq_count = 0; } - spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); + spin_unlock_bh(&orig_neigh_node->bat_iv.ogm_cnt_lock); /* pay attention to not get a value bigger than 100 % */ if (orig_eq_count > neigh_rq_count) @@ -1698,9 +1748,9 @@ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, if (is_my_orig) { unsigned long *word; - int offset; + size_t offset; s32 bit_pos; - s16 if_num; + unsigned int if_num; u8 *weight; orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, @@ -2477,7 +2527,7 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1, return ret; } -static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface) +static void batadv_iv_iface_enabled(struct batadv_hard_iface *hard_iface) { /* begin scheduling originator messages on that interface */ batadv_iv_ogm_schedule(hard_iface); @@ -2817,8 +2867,8 @@ unlock: static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .name = "BATMAN_IV", .iface = { - .activate = batadv_iv_iface_activate, .enable = batadv_iv_ogm_iface_enable, + .enabled = batadv_iv_iface_enabled, .disable = batadv_iv_ogm_iface_disable, .update_mac = batadv_iv_ogm_iface_update_mac, .primary_set = batadv_iv_ogm_primary_iface_set, diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 371a1f1651b4..eb8cec14b854 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -19,7 +19,6 @@ #include "main.h" #include -#include #include #include #include @@ -623,11 +622,11 @@ static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1, int ret = 0; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - if (WARN_ON(!ifinfo1)) + if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - if (WARN_ON(!ifinfo2)) + if (!ifinfo2) goto err_ifinfo2; ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; @@ -649,11 +648,11 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, bool ret = false; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - if (WARN_ON(!ifinfo1)) + if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - if (WARN_ON(!ifinfo2)) + if (!ifinfo2) goto err_ifinfo2; threshold = ifinfo1->bat_v.throughput / 4; @@ -815,7 +814,7 @@ static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv, } orig_gw = batadv_gw_node_get(bat_priv, orig_node); - if (!orig_node) + if (!orig_gw) goto out; if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index e07f636160b6..cec31769bb3f 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include #include @@ -127,14 +129,12 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb, } /** - * batadv_v_ogm_send - periodic worker broadcasting the own OGM - * @work: work queue item + * batadv_v_ogm_send_softif() - periodic worker broadcasting the own OGM + * @bat_priv: the bat priv with all the soft interface information */ -static void batadv_v_ogm_send(struct work_struct *work) +static void batadv_v_ogm_send_softif(struct batadv_priv *bat_priv) { struct batadv_hard_iface *hard_iface; - struct batadv_priv_bat_v *bat_v; - struct batadv_priv *bat_priv; struct batadv_ogm2_packet *ogm_packet; struct sk_buff *skb, *skb_tmp; unsigned char *ogm_buff; @@ -142,8 +142,7 @@ static void batadv_v_ogm_send(struct work_struct *work) u16 tvlv_len = 0; int ret; - bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); - bat_priv = container_of(bat_v, struct batadv_priv, bat_v); + lockdep_assert_held(&bat_priv->bat_v.ogm_buff_mutex); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) goto out; @@ -234,6 +233,23 @@ out: return; } +/** + * batadv_v_ogm_send() - periodic worker broadcasting the own OGM + * @work: work queue item + */ +static void batadv_v_ogm_send(struct work_struct *work) +{ + struct batadv_priv_bat_v *bat_v; + struct batadv_priv *bat_priv; + + bat_v = container_of(work, struct batadv_priv_bat_v, ogm_wq.work); + bat_priv = container_of(bat_v, struct batadv_priv, bat_v); + + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); + batadv_v_ogm_send_softif(bat_priv); + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); +} + /** * batadv_v_ogm_iface_enable - prepare an interface for B.A.T.M.A.N. V * @hard_iface: the interface to prepare @@ -260,11 +276,15 @@ void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface) struct batadv_priv *bat_priv = netdev_priv(primary_iface->soft_iface); struct batadv_ogm2_packet *ogm_packet; + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); if (!bat_priv->bat_v.ogm_buff) - return; + goto unlock; ogm_packet = (struct batadv_ogm2_packet *)bat_priv->bat_v.ogm_buff; ether_addr_copy(ogm_packet->orig, primary_iface->net_dev->dev_addr); + +unlock: + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } /** @@ -886,6 +906,8 @@ int batadv_v_ogm_init(struct batadv_priv *bat_priv) atomic_set(&bat_priv->bat_v.ogm_seqno, random_seqno); INIT_DELAYED_WORK(&bat_priv->bat_v.ogm_wq, batadv_v_ogm_send); + mutex_init(&bat_priv->bat_v.ogm_buff_mutex); + return 0; } @@ -897,7 +919,11 @@ void batadv_v_ogm_free(struct batadv_priv *bat_priv) { cancel_delayed_work_sync(&bat_priv->bat_v.ogm_wq); + mutex_lock(&bat_priv->bat_v.ogm_buff_mutex); + kfree(bat_priv->bat_v.ogm_buff); bat_priv->bat_v.ogm_buff = NULL; bat_priv->bat_v.ogm_buff_len = 0; + + mutex_unlock(&bat_priv->bat_v.ogm_buff_mutex); } diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index e32ad47c6efd..4957d4824437 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -18,6 +18,7 @@ #include "debugfs.h" #include "main.h" +#include #include #include #include @@ -338,7 +339,26 @@ out: } /** - * batadv_debugfs_del_hardif - delete the base directory for a hard interface + * batadv_debugfs_rename_hardif() - Fix debugfs path for renamed hardif + * @hard_iface: hard interface which was renamed + */ +void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface) +{ + const char *name = hard_iface->net_dev->name; + struct dentry *dir; + struct dentry *d; + + dir = hard_iface->debug_dir; + if (!dir) + return; + + d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name); + if (!d) + pr_err("Can't rename debugfs dir to %s\n", name); +} + +/** + * batadv_debugfs_del_hardif() - delete the base directory for a hard interface * in debugfs. * @hard_iface: hard interface which is deleted. */ @@ -401,6 +421,30 @@ out: return -ENOMEM; } +/** + * batadv_debugfs_rename_meshif() - Fix debugfs path for renamed softif + * @dev: net_device which was renamed + */ +void batadv_debugfs_rename_meshif(struct net_device *dev) +{ + struct batadv_priv *bat_priv = netdev_priv(dev); + const char *name = dev->name; + struct dentry *dir; + struct dentry *d; + + dir = bat_priv->debug_dir; + if (!dir) + return; + + d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name); + if (!d) + pr_err("Can't rename debugfs dir to %s\n", name); +} + +/** + * batadv_debugfs_del_meshif() - Remove interface dependent debugfs entries + * @dev: netdev struct of the soft interface + */ void batadv_debugfs_del_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 9c5d4a65b98c..901bbc357bf4 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -29,8 +29,10 @@ struct net_device; void batadv_debugfs_init(void); void batadv_debugfs_destroy(void); int batadv_debugfs_add_meshif(struct net_device *dev); +void batadv_debugfs_rename_meshif(struct net_device *dev); void batadv_debugfs_del_meshif(struct net_device *dev); int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface); +void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface); void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface); #else @@ -48,6 +50,10 @@ static inline int batadv_debugfs_add_meshif(struct net_device *dev) return 0; } +static inline void batadv_debugfs_rename_meshif(struct net_device *dev) +{ +} + static inline void batadv_debugfs_del_meshif(struct net_device *dev) { } @@ -58,6 +64,11 @@ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) return 0; } +static inline +void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface) +{ +} + static inline void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface) { diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 8d1d0fdb157e..1519cbf70150 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -243,6 +243,7 @@ static u32 batadv_hash_dat(const void *data, u32 size) u32 hash = 0; const struct batadv_dat_entry *dat = data; const unsigned char *key; + __be16 vid; u32 i; key = (const unsigned char *)&dat->ip; @@ -252,7 +253,8 @@ static u32 batadv_hash_dat(const void *data, u32 size) hash ^= (hash >> 6); } - key = (const unsigned char *)&dat->vid; + vid = htons(dat->vid); + key = (__force const unsigned char *)&vid; for (i = 0; i < sizeof(dat->vid); i++) { hash += key[i]; hash += (hash << 10); diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c index c6d37d22bd12..788d62073964 100644 --- a/net/batman-adv/fragmentation.c +++ b/net/batman-adv/fragmentation.c @@ -500,6 +500,8 @@ int batadv_frag_send_packet(struct sk_buff *skb, */ if (skb->priority >= 256 && skb->priority <= 263) frag_header.priority = skb->priority - 256; + else + frag_header.priority = 0; ether_addr_copy(frag_header.orig, primary_if->net_dev->dev_addr); ether_addr_copy(frag_header.dest, orig_node->orig); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 2e1a084b0bd2..9fdfa9984f02 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -738,6 +739,11 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, hard_iface->soft_iface = soft_iface; bat_priv = netdev_priv(hard_iface->soft_iface); + if (bat_priv->num_ifaces >= UINT_MAX) { + ret = -ENOSPC; + goto err_dev; + } + ret = netdev_master_upper_dev_link(hard_iface->net_dev, soft_iface, NULL, NULL); if (ret) @@ -790,6 +796,9 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_recalc_extra_skbroom(soft_iface); + if (bat_priv->algo_ops->iface.enabled) + bat_priv->algo_ops->iface.enabled(hard_iface); + out: return 0; @@ -845,7 +854,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, batadv_hardif_recalc_extra_skbroom(hard_iface->soft_iface); /* nobody uses this interface anymore */ - if (!bat_priv->num_ifaces) { + if (bat_priv->num_ifaces == 0) { batadv_gw_check_client_stop(bat_priv); if (autodel == BATADV_IF_CLEANUP_AUTO) @@ -881,7 +890,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) if (ret) goto free_if; - hard_iface->if_num = -1; + hard_iface->if_num = 0; hard_iface->net_dev = net_dev; hard_iface->soft_iface = NULL; hard_iface->if_status = BATADV_IF_NOT_IN_USE; @@ -893,6 +902,7 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_LIST_HEAD(&hard_iface->list); INIT_HLIST_HEAD(&hard_iface->neigh_list); + mutex_init(&hard_iface->bat_iv.ogm_buff_mutex); spin_lock_init(&hard_iface->neigh_list_lock); kref_init(&hard_iface->refcount); @@ -950,6 +960,32 @@ void batadv_hardif_remove_interfaces(void) rtnl_unlock(); } +/** + * batadv_hard_if_event_softif() - Handle events for soft interfaces + * @event: NETDEV_* event to handle + * @net_dev: net_device which generated an event + * + * Return: NOTIFY_* result + */ +static int batadv_hard_if_event_softif(unsigned long event, + struct net_device *net_dev) +{ + struct batadv_priv *bat_priv; + + switch (event) { + case NETDEV_REGISTER: + batadv_sysfs_add_meshif(net_dev); + bat_priv = netdev_priv(net_dev); + batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS); + break; + case NETDEV_CHANGENAME: + batadv_debugfs_rename_meshif(net_dev); + break; + } + + return NOTIFY_DONE; +} + static int batadv_hard_if_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -958,12 +994,8 @@ static int batadv_hard_if_event(struct notifier_block *this, struct batadv_hard_iface *primary_if = NULL; struct batadv_priv *bat_priv; - if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) { - batadv_sysfs_add_meshif(net_dev); - bat_priv = netdev_priv(net_dev); - batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS); - return NOTIFY_DONE; - } + if (batadv_softif_is_valid(net_dev)) + return batadv_hard_if_event_softif(event, net_dev); hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface && (event == NETDEV_REGISTER || @@ -1012,6 +1044,9 @@ static int batadv_hard_if_event(struct notifier_block *this, if (batadv_is_wifi_hardif(hard_iface)) hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS; break; + case NETDEV_CHANGENAME: + batadv_debugfs_rename_hardif(hard_iface); + break; default: break; } diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8e2a4b205257..653eaadcfefb 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1500,7 +1500,7 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) } int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, - int max_if_num) + unsigned int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_algo_ops *bao = bat_priv->algo_ops; @@ -1535,7 +1535,7 @@ err: } int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, - int max_if_num) + unsigned int max_if_num) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index d94220a6d21a..d6ca52220ec0 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -78,9 +78,9 @@ int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, - int max_if_num); + unsigned int max_if_num); int batadv_orig_hash_del_if(struct batadv_hard_iface *hard_iface, - int max_if_num); + unsigned int max_if_num); struct batadv_orig_node_vlan * batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, unsigned short vid); diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index cd82cff716c7..f59aac06733e 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -950,14 +950,10 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, struct batadv_orig_node *orig_node = NULL, *orig_node_gw = NULL; int check, hdr_size = sizeof(*unicast_packet); enum batadv_subtype subtype; - struct ethhdr *ethhdr; int ret = NET_RX_DROP; bool is4addr, is_gw; unicast_packet = (struct batadv_unicast_packet *)skb->data; - unicast_4addr_packet = (struct batadv_unicast_4addr_packet *)skb->data; - ethhdr = eth_hdr(skb); - is4addr = unicast_packet->packet_type == BATADV_UNICAST_4ADDR; /* the caller function should have already pulled 2 bytes */ if (is4addr) @@ -977,12 +973,14 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, if (!batadv_check_unicast_ttvn(bat_priv, skb, hdr_size)) goto free_skb; + unicast_packet = (struct batadv_unicast_packet *)skb->data; + /* packet for me */ if (batadv_is_my_mac(bat_priv, unicast_packet->dest)) { /* If this is a unicast packet from another backgone gw, * drop it. */ - orig_addr_gw = ethhdr->h_source; + orig_addr_gw = eth_hdr(skb)->h_source; orig_node_gw = batadv_orig_hash_find(bat_priv, orig_addr_gw); if (orig_node_gw) { is_gw = batadv_bla_is_backbone_gw(skb, orig_node_gw, @@ -997,6 +995,8 @@ int batadv_recv_unicast_packet(struct sk_buff *skb, } if (is4addr) { + unicast_4addr_packet = + (struct batadv_unicast_4addr_packet *)skb->data; subtype = unicast_4addr_packet->subtype; batadv_dat_inc_counter(bat_priv, subtype); diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 2c2670b85fa9..dbc516824175 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -872,7 +872,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, struct batadv_orig_node_vlan *vlan; u8 *tt_change_ptr; - rcu_read_lock(); + spin_lock_bh(&orig_node->vlan_list_lock); hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { num_vlan++; num_entries += atomic_read(&vlan->tt.num_entries); @@ -910,7 +910,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - rcu_read_unlock(); + spin_unlock_bh(&orig_node->vlan_list_lock); return tvlv_len; } @@ -941,15 +941,20 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_softif_vlan *vlan; u16 num_vlan = 0; - u16 num_entries = 0; + u16 vlan_entries = 0; + u16 total_entries = 0; u16 tvlv_len; u8 *tt_change_ptr; int change_offset; - rcu_read_lock(); + spin_lock_bh(&bat_priv->softif_vlan_list_lock); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + num_vlan++; - num_entries += atomic_read(&vlan->tt.num_entries); + total_entries += vlan_entries; } change_offset = sizeof(**tt_data); @@ -957,7 +962,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) - *tt_len = batadv_tt_len(num_entries); + *tt_len = batadv_tt_len(total_entries); tvlv_len = *tt_len; tvlv_len += change_offset; @@ -974,6 +979,10 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { + vlan_entries = atomic_read(&vlan->tt.num_entries); + if (vlan_entries < 1) + continue; + tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); @@ -984,7 +993,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: - rcu_read_unlock(); + spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return tvlv_len; } @@ -1544,6 +1553,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, * by a given originator * @entry: the TT global entry to check * @orig_node: the originator to search in the list + * @flags: a pointer to store TT flags for the given @entry received + * from @orig_node * * find out if an orig_node is already in the list of a tt_global_entry. * @@ -1551,7 +1562,8 @@ batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, */ static bool batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, - const struct batadv_orig_node *orig_node) + const struct batadv_orig_node *orig_node, + u8 *flags) { struct batadv_tt_orig_list_entry *orig_entry; bool found = false; @@ -1559,6 +1571,10 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node); if (orig_entry) { found = true; + + if (flags) + *flags = orig_entry->flags; + batadv_tt_orig_list_entry_put(orig_entry); } @@ -1741,7 +1757,7 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, if (!(common->flags & BATADV_TT_CLIENT_TEMP)) goto out; if (batadv_tt_global_entry_has_orig(tt_global_entry, - orig_node)) + orig_node, NULL)) goto out_remove; batadv_tt_global_del_orig_list(tt_global_entry); goto add_orig_entry; @@ -2884,23 +2900,46 @@ unlock: } /** - * batadv_tt_local_valid - verify that given tt entry is a valid one + * batadv_tt_local_valid() - verify local tt entry and get flags * @entry_ptr: to be checked local tt entry * @data_ptr: not used but definition required to satisfy the callback prototype + * @flags: a pointer to store TT flags for this client to + * + * Checks the validity of the given local TT entry. If it is, then the provided + * flags pointer is updated. * * Return: true if the entry is a valid, false otherwise. */ -static bool batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr) +static bool batadv_tt_local_valid(const void *entry_ptr, + const void *data_ptr, + u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW) return false; + + if (flags) + *flags = tt_common_entry->flags; + return true; } +/** + * batadv_tt_global_valid() - verify global tt entry and get flags + * @entry_ptr: to be checked global tt entry + * @data_ptr: an orig_node object (may be NULL) + * @flags: a pointer to store TT flags for this client to + * + * Checks the validity of the given global TT entry. If it is, then the provided + * flags pointer is updated either with the common (summed) TT flags if data_ptr + * is NULL or the specific, per originator TT flags otherwise. + * + * Return: true if the entry is a valid, false otherwise. + */ static bool batadv_tt_global_valid(const void *entry_ptr, - const void *data_ptr) + const void *data_ptr, + u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; const struct batadv_tt_global_entry *tt_global_entry; @@ -2914,7 +2953,8 @@ static bool batadv_tt_global_valid(const void *entry_ptr, struct batadv_tt_global_entry, common); - return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node); + return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node, + flags); } /** @@ -2924,25 +2964,34 @@ static bool batadv_tt_global_valid(const void *entry_ptr, * @hash: hash table containing the tt entries * @tt_len: expected tvlv tt data buffer length in number of bytes * @tvlv_buff: pointer to the buffer to fill with the TT data - * @valid_cb: function to filter tt change entries + * @valid_cb: function to filter tt change entries and to return TT flags * @cb_data: data passed to the filter function as argument + * + * Fills the tvlv buff with the tt entries from the specified hash. If valid_cb + * is not provided then this becomes a no-op. */ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, struct batadv_hashtable *hash, void *tvlv_buff, u16 tt_len, bool (*valid_cb)(const void *, - const void *), + const void *, + u8 *flags), void *cb_data) { struct batadv_tt_common_entry *tt_common_entry; struct batadv_tvlv_tt_change *tt_change; struct hlist_head *head; u16 tt_tot, tt_num_entries = 0; + u8 flags; + bool ret; u32 i; tt_tot = batadv_tt_entries(tt_len); tt_change = (struct batadv_tvlv_tt_change *)tvlv_buff; + if (!valid_cb) + return; + rcu_read_lock(); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; @@ -2952,11 +3001,12 @@ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, if (tt_tot == tt_num_entries) break; - if ((valid_cb) && (!valid_cb(tt_common_entry, cb_data))) + ret = valid_cb(tt_common_entry, cb_data, &flags); + if (!ret) continue; ether_addr_copy(tt_change->addr, tt_common_entry->addr); - tt_change->flags = tt_common_entry->flags; + tt_change->flags = flags; tt_change->vid = htons(tt_common_entry->vid); memset(tt_change->reserved, 0, sizeof(tt_change->reserved)); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index d5e3968619b8..540a9c5c2270 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include /* for linux/wait.h */ @@ -81,11 +82,13 @@ enum batadv_dhcp_recipient { * @ogm_buff: buffer holding the OGM packet * @ogm_buff_len: length of the OGM packet buffer * @ogm_seqno: OGM sequence number - used to identify each OGM + * @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len */ struct batadv_hard_iface_bat_iv { unsigned char *ogm_buff; int ogm_buff_len; atomic_t ogm_seqno; + struct mutex ogm_buff_mutex; }; /** @@ -155,7 +158,7 @@ enum batadv_hard_iface_wifi_flags { */ struct batadv_hard_iface { struct list_head list; - s16 if_num; + unsigned int if_num; char if_status; u8 num_bcasts; u32 wifi_flags; @@ -989,12 +992,14 @@ struct batadv_softif_vlan { * @ogm_buff: buffer holding the OGM packet * @ogm_buff_len: length of the OGM packet buffer * @ogm_seqno: OGM sequence number - used to identify each OGM + * @ogm_buff_mutex: lock protecting ogm_buff and ogm_buff_len * @ogm_wq: workqueue used to schedule OGM transmissions */ struct batadv_priv_bat_v { unsigned char *ogm_buff; int ogm_buff_len; atomic_t ogm_seqno; + struct mutex ogm_buff_mutex; struct delayed_work ogm_wq; }; @@ -1081,7 +1086,7 @@ struct batadv_priv { atomic_t bcast_seqno; atomic_t bcast_queue_left; atomic_t batman_queue_left; - char num_ifaces; + unsigned int num_ifaces; struct kobject *mesh_obj; struct dentry *debug_dir; struct hlist_head forw_bat_list; @@ -1424,6 +1429,7 @@ struct batadv_forw_packet { * @activate: start routing mechanisms when hard-interface is brought up * (optional) * @enable: init routing info when hard-interface is enabled + * @enabled: notification when hard-interface was enabled (optional) * @disable: de-init routing info when hard-interface is disabled * @update_mac: (re-)init mac addresses of the protocol information * belonging to this hard-interface @@ -1432,6 +1438,7 @@ struct batadv_forw_packet { struct batadv_algo_iface_ops { void (*activate)(struct batadv_hard_iface *hard_iface); int (*enable)(struct batadv_hard_iface *hard_iface); + void (*enabled)(struct batadv_hard_iface *hard_iface); void (*disable)(struct batadv_hard_iface *hard_iface); void (*update_mac)(struct batadv_hard_iface *hard_iface); void (*primary_set)(struct batadv_hard_iface *hard_iface); @@ -1479,9 +1486,10 @@ struct batadv_algo_neigh_ops { */ struct batadv_algo_orig_ops { void (*free)(struct batadv_orig_node *orig_node); - int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num); - int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num, - int del_if_num); + int (*add_if)(struct batadv_orig_node *orig_node, + unsigned int max_if_num); + int (*del_if)(struct batadv_orig_node *orig_node, + unsigned int max_if_num, unsigned int del_if_num); #ifdef CONFIG_BATMAN_ADV_DEBUGFS void (*print)(struct batadv_priv *priv, struct seq_file *seq, struct batadv_hard_iface *hard_iface); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index b216e697deac..b48d54783e5d 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -460,7 +460,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock, if (sk->sk_state == BT_LISTEN) return bt_accept_poll(sk); - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); @@ -470,7 +470,7 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; if (sk->sk_state == BT_CLOSED) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index bd41b78d131d..1d085eed72d0 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -1054,8 +1054,10 @@ struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst, if (!conn) return ERR_PTR(-ENOMEM); - if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) + if (hci_explicit_conn_params_set(hdev, dst, dst_type) < 0) { + hci_conn_del(conn); return ERR_PTR(-EBUSY); + } conn->state = BT_CONNECT; set_bit(HCI_CONN_SCANNING, &conn->flags); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6bc679cd3481..ff80a9d41ce1 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -802,8 +802,8 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt) if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT) { struct hci_cp_le_write_def_data_len cp; - cp.tx_len = hdev->le_max_tx_len; - cp.tx_time = hdev->le_max_tx_time; + cp.tx_len = cpu_to_le16(hdev->le_max_tx_len); + cp.tx_time = cpu_to_le16(hdev->le_max_tx_time); hci_req_add(req, HCI_OP_LE_WRITE_DEF_DATA_LEN, sizeof(cp), &cp); } @@ -4215,7 +4215,14 @@ static void hci_rx_work(struct work_struct *work) hci_send_to_sock(hdev, skb); } - if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { + /* If the device has been opened in HCI_USER_CHANNEL, + * the userspace has exclusive access to device. + * When device is HCI_INIT, we still need to process + * the data packets to the driver in order + * to complete its setup(). + */ + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && + !test_bit(HCI_INIT, &hdev->flags)) { kfree_skb(skb); continue; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b73ac149de34..759329bec399 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1095,6 +1095,14 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) instance_flags = get_adv_instance_flags(hdev, instance); + /* If instance already has the flags set skip adding it once + * again. + */ + if (adv_instance && eir_get_data(adv_instance->adv_data, + adv_instance->adv_data_len, EIR_FLAGS, + NULL)) + goto skip_flags; + /* The Add Advertising command allows userspace to set both the general * and limited discoverable flags. */ @@ -1127,6 +1135,7 @@ static u8 create_instance_adv_data(struct hci_dev *hdev, u8 instance, u8 *ptr) } } +skip_flags: if (adv_instance) { memcpy(ptr, adv_instance->adv_data, adv_instance->adv_data_len); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 4a05235929b9..93093d7c3824 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -826,6 +826,8 @@ static int hci_sock_release(struct socket *sock) if (!sk) return 0; + lock_sock(sk); + switch (hci_pi(sk)->channel) { case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); @@ -873,6 +875,7 @@ static int hci_sock_release(struct socket *sock) skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + release_sock(sk); sock_put(sk); return 0; } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 0c2219f483d7..ebdf1b0e576a 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4918,10 +4918,8 @@ void __l2cap_physical_cfm(struct l2cap_chan *chan, int result) BT_DBG("chan %p, result %d, local_amp_id %d, remote_amp_id %d", chan, result, local_amp_id, remote_amp_id); - if (chan->state == BT_DISCONN || chan->state == BT_CLOSED) { - l2cap_chan_unlock(chan); + if (chan->state == BT_DISCONN || chan->state == BT_CLOSED) return; - } if (chan->state != BT_CONNECTED) { l2cap_do_create(chan, result, local_amp_id, remote_amp_id); @@ -6819,6 +6817,16 @@ static int l2cap_le_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) chan->sdu_len = sdu_len; chan->sdu_last_frag = skb; + /* Detect if remote is not able to use the selected MPS */ + if (skb->len + L2CAP_SDULEN_SIZE < chan->mps) { + u16 mps_len = skb->len + L2CAP_SDULEN_SIZE; + + /* Adjust the number of credits */ + BT_DBG("chan->mps %u -> %u", chan->mps, mps_len); + chan->mps = mps_len; + l2cap_chan_le_send_credits(chan); + } + return 0; } diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f6b6a92f1c48..b7cc322acdc8 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -217,6 +217,12 @@ static int br_set_mac_address(struct net_device *dev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; + /* dev_set_mac_addr() can be called by a master device on bridge's + * NETDEV_UNREGISTER, but since it's being destroyed do nothing + */ + if (dev->reg_state != NETREG_REGISTERED) + return -EBUSY; + spin_lock_bh(&br->lock); if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) { /* Mac address will be changed in br_stp_change_bridge_id(). */ diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 89936e0d55c9..6feab2279143 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -643,6 +643,9 @@ static unsigned int br_nf_forward_arp(void *priv, nf_bridge_pull_encap_header(skb); } + if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr)))) + return NF_DROP; + if (arp_hdr(skb)->ar_pln != 4) { if (IS_VLAN_ARP(skb)) nf_bridge_push_encap_header(skb); diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c index 20cbb727df4d..c217276bd76a 100644 --- a/net/bridge/br_nf_core.c +++ b/net/bridge/br_nf_core.c @@ -26,7 +26,8 @@ #endif static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index dfc86a0199da..1d8c834d9018 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -19,7 +19,6 @@ static unsigned int ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) { const struct ebt_nat_info *info = par->targinfo; - struct net_device *dev; if (!skb_make_writable(skb, 0)) return EBT_DROP; @@ -32,10 +31,22 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par) else skb->pkt_type = PACKET_MULTICAST; } else { - if (xt_hooknum(par) != NF_BR_BROUTING) - dev = br_port_get_rcu(xt_in(par))->br->dev; - else + const struct net_device *dev; + + switch (xt_hooknum(par)) { + case NF_BR_BROUTING: dev = xt_in(par); + break; + case NF_BR_PRE_ROUTING: + dev = br_port_get_rcu(xt_in(par))->br->dev; + break; + default: + dev = NULL; + break; + } + + if (!dev) /* NF_BR_LOCAL_OUT */ + return info->target; if (ether_addr_equal(info->mac, dev->dev_addr)) skb->pkt_type = PACKET_HOST; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 100b4f88179a..a1834ad7422c 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1876,7 +1876,7 @@ static int ebt_buf_count(struct ebt_entries_buf_state *state, unsigned int sz) } static int ebt_buf_add(struct ebt_entries_buf_state *state, - void *data, unsigned int sz) + const void *data, unsigned int sz) { if (state->buf_kern_start == NULL) goto count_only; @@ -1910,7 +1910,7 @@ enum compat_mwt { EBT_COMPAT_TARGET, }; -static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, +static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt, enum compat_mwt compat_mwt, struct ebt_entries_buf_state *state, const unsigned char *base) @@ -1986,22 +1986,23 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt, /* return size of all matches, watchers or target, including necessary * alignment and padding. */ -static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, +static int ebt_size_mwt(const struct compat_ebt_entry_mwt *match32, unsigned int size_left, enum compat_mwt type, struct ebt_entries_buf_state *state, const void *base) { + const char *buf = (const char *)match32; int growth = 0; - char *buf; if (size_left == 0) return 0; - buf = (char *) match32; - - while (size_left >= sizeof(*match32)) { + do { struct ebt_entry_match *match_kern; int ret; + if (size_left < sizeof(*match32)) + return -EINVAL; + match_kern = (struct ebt_entry_match *) state->buf_kern_start; if (match_kern) { char *tmp; @@ -2038,22 +2039,18 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32, if (match_kern) match_kern->match_size = ret; - /* rule should have no remaining data after target */ - if (type == EBT_COMPAT_TARGET && size_left) - return -EINVAL; - match32 = (struct compat_ebt_entry_mwt *) buf; - } + } while (size_left); return growth; } /* called for all ebt_entry structures. */ -static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, +static int size_entry_mwt(const struct ebt_entry *entry, const unsigned char *base, unsigned int *total, struct ebt_entries_buf_state *state) { - unsigned int i, j, startoff, new_offset = 0; + unsigned int i, j, startoff, next_expected_off, new_offset = 0; /* stores match/watchers/targets & offset of next struct ebt_entry: */ unsigned int offsets[4]; unsigned int *offsets_update = NULL; @@ -2140,11 +2137,13 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, return ret; } - startoff = state->buf_user_offset - startoff; - - if (WARN_ON(*total < startoff)) + next_expected_off = state->buf_user_offset - startoff; + if (next_expected_off != entry->next_offset) return -EINVAL; - *total -= startoff; + + if (*total < entry->next_offset) + return -EINVAL; + *total -= entry->next_offset; return 0; } @@ -2165,7 +2164,9 @@ static int compat_copy_entries(unsigned char *data, unsigned int size_user, if (ret < 0) return ret; - WARN_ON(size_remaining); + if (size_remaining) + return -EINVAL; + return state->buf_kern_offset; } diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 632d5a416d97..df936d2f58bd 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -953,7 +953,7 @@ static unsigned int caif_poll(struct file *file, mask |= POLLRDHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue) || + if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) mask |= POLLIN | POLLRDNORM; diff --git a/net/core/datagram.c b/net/core/datagram.c index dcb333e95702..85fcca395fad 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -97,7 +97,7 @@ int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p, if (error) goto out_err; - if (sk->sk_receive_queue.prev != skb) + if (READ_ONCE(sk->sk_receive_queue.prev) != skb) goto out; /* Socket shut down? */ @@ -281,7 +281,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags, break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (sk->sk_receive_queue.prev != *last); + } while (READ_ONCE(sk->sk_receive_queue.prev) != *last); error = -EAGAIN; @@ -844,7 +844,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); @@ -854,7 +854,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock, mask |= POLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ diff --git a/net/core/dev.c b/net/core/dev.c index ced6757c137c..78285aad1857 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3030,7 +3030,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de } skb = next; - if (netif_xmit_stopped(txq) && skb) { + if (netif_tx_queue_stopped(txq) && skb) { rc = NETDEV_TX_BUSY; break; } @@ -6863,7 +6863,8 @@ int __dev_set_mtu(struct net_device *dev, int new_mtu) if (ops->ndo_change_mtu) return ops->ndo_change_mtu(dev, new_mtu); - dev->mtu = new_mtu; + /* Pairs with all the lockless reads of dev->mtu in the stack */ + WRITE_ONCE(dev->mtu, new_mtu); return 0; } EXPORT_SYMBOL(__dev_set_mtu); @@ -6882,18 +6883,9 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) if (new_mtu == dev->mtu) return 0; - /* MTU must be positive, and in range */ - if (new_mtu < 0 || new_mtu < dev->min_mtu) { - net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n", - dev->name, new_mtu, dev->min_mtu); - return -EINVAL; - } - - if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { - net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n", - dev->name, new_mtu, dev->max_mtu); - return -EINVAL; - } + err = dev_validate_mtu(dev, new_mtu); + if (err) + return err; if (!netif_device_present(dev)) return -ENODEV; @@ -7653,8 +7645,10 @@ int register_netdevice(struct net_device *dev) goto err_uninit; ret = netdev_register_kobject(dev); - if (ret) + if (ret) { + dev->reg_state = NETREG_UNREGISTERED; goto err_uninit; + } dev->reg_state = NETREG_REGISTERED; __netdev_update_features(dev); @@ -7753,6 +7747,23 @@ int init_dummy_netdev(struct net_device *dev) EXPORT_SYMBOL_GPL(init_dummy_netdev); +int dev_validate_mtu(struct net_device *dev, int new_mtu) +{ + /* MTU must be positive, and in range */ + if (new_mtu < 0 || new_mtu < dev->min_mtu) { + net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n", + dev->name, new_mtu, dev->min_mtu); + return -EINVAL; + } + + if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { + net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n", + dev->name, new_mtu, dev->max_mtu); + return -EINVAL; + } + return 0; +} + /** * register_netdev - register a network device * @dev: device to register diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 7822defa5a5d..749d48393d06 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -2343,9 +2343,10 @@ static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr) return ret; } -static int ethtool_get_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) +static noinline_for_stack int +ethtool_get_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) { u32 bit; int ret; @@ -2375,9 +2376,10 @@ static int ethtool_get_per_queue_coalesce(struct net_device *dev, return 0; } -static int ethtool_set_per_queue_coalesce(struct net_device *dev, - void __user *useraddr, - struct ethtool_per_queue_op *per_queue_opt) +static noinline_for_stack int +ethtool_set_per_queue_coalesce(struct net_device *dev, + void __user *useraddr, + struct ethtool_per_queue_op *per_queue_opt) { u32 bit; int i, ret = 0; @@ -2434,7 +2436,7 @@ roll_back: return ret; } -static int ethtool_set_per_queue(struct net_device *dev, +static int noinline_for_stack ethtool_set_per_queue(struct net_device *dev, void __user *useraddr, u32 sub_cmd) { struct ethtool_per_queue_op per_queue_opt; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 9a6d97c1d810..9bb321df0869 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -799,7 +799,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh = nlmsg_data(nlh); frh->family = ops->family; - frh->table = rule->table; + frh->table = rule->table < 256 ? rule->table : RT_TABLE_COMPAT; if (nla_put_u32(skb, FRA_TABLE, rule->table)) goto nla_put_failure; if (nla_put_u32(skb, FRA_SUPPRESS_PREFIXLEN, rule->suppress_prefixlen)) diff --git a/net/core/filter.c b/net/core/filter.c index a3646230fbee..a00955173328 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3260,6 +3260,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_adjust_room_proto; case BPF_FUNC_skb_change_tail: return &bpf_skb_change_tail_proto; + case BPF_FUNC_skb_change_head: + return &bpf_skb_change_head_proto; case BPF_FUNC_skb_get_tunnel_key: return &bpf_skb_get_tunnel_key_proto; case BPF_FUNC_skb_set_tunnel_key: diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index e2e716003ede..071de3013364 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -450,9 +450,10 @@ bool __skb_flow_dissect(const struct sk_buff *skb, nhoff = skb_network_offset(skb); hlen = skb_headlen(skb); #if IS_ENABLED(CONFIG_NET_DSA) - if (unlikely(skb->dev && netdev_uses_dsa(skb->dev))) { + if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) && + proto == htons(ETH_P_XDSA))) { const struct dsa_device_ops *ops; - int offset; + int offset = 0; ops = skb->dev->dsa_ptr->tag_ops; if (ops->flow_dissect && @@ -889,45 +890,34 @@ out_bad: } EXPORT_SYMBOL(__skb_flow_dissect); -static u32 hashrnd __read_mostly; +static siphash_key_t hashrnd __read_mostly; static __always_inline void __flow_hash_secret_init(void) { net_get_random_once(&hashrnd, sizeof(hashrnd)); } -static __always_inline u32 __flow_hash_words(const u32 *words, u32 length, - u32 keyval) +static const void *flow_keys_hash_start(const struct flow_keys *flow) { - return jhash2(words, length, keyval); -} - -static inline const u32 *flow_keys_hash_start(const struct flow_keys *flow) -{ - const void *p = flow; - - BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % sizeof(u32)); - return (const u32 *)(p + FLOW_KEYS_HASH_OFFSET); + BUILD_BUG_ON(FLOW_KEYS_HASH_OFFSET % SIPHASH_ALIGNMENT); + return &flow->FLOW_KEYS_HASH_START_FIELD; } static inline size_t flow_keys_hash_length(const struct flow_keys *flow) { - size_t diff = FLOW_KEYS_HASH_OFFSET + sizeof(flow->addrs); - BUILD_BUG_ON((sizeof(*flow) - FLOW_KEYS_HASH_OFFSET) % sizeof(u32)); - BUILD_BUG_ON(offsetof(typeof(*flow), addrs) != - sizeof(*flow) - sizeof(flow->addrs)); + size_t len = offsetof(typeof(*flow), addrs) - FLOW_KEYS_HASH_OFFSET; switch (flow->control.addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: - diff -= sizeof(flow->addrs.v4addrs); + len += sizeof(flow->addrs.v4addrs); break; case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - diff -= sizeof(flow->addrs.v6addrs); + len += sizeof(flow->addrs.v6addrs); break; case FLOW_DISSECTOR_KEY_TIPC_ADDRS: - diff -= sizeof(flow->addrs.tipcaddrs); + len += sizeof(flow->addrs.tipcaddrs); break; } - return (sizeof(*flow) - diff) / sizeof(u32); + return len; } __be32 flow_get_u32_src(const struct flow_keys *flow) @@ -993,14 +983,15 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) } } -static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) +static inline u32 __flow_hash_from_keys(struct flow_keys *keys, + const siphash_key_t *keyval) { u32 hash; __flow_hash_consistentify(keys); - hash = __flow_hash_words(flow_keys_hash_start(keys), - flow_keys_hash_length(keys), keyval); + hash = siphash(flow_keys_hash_start(keys), + flow_keys_hash_length(keys), keyval); if (!hash) hash = 1; @@ -1010,12 +1001,13 @@ static inline u32 __flow_hash_from_keys(struct flow_keys *keys, u32 keyval) u32 flow_hash_from_keys(struct flow_keys *keys) { __flow_hash_secret_init(); - return __flow_hash_from_keys(keys, hashrnd); + return __flow_hash_from_keys(keys, &hashrnd); } EXPORT_SYMBOL(flow_hash_from_keys); static inline u32 ___skb_get_hash(const struct sk_buff *skb, - struct flow_keys *keys, u32 keyval) + struct flow_keys *keys, + const siphash_key_t *keyval) { skb_flow_dissect_flow_keys(skb, keys, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); @@ -1063,7 +1055,7 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb) NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); - return __flow_hash_from_keys(&keys, hashrnd); + return __flow_hash_from_keys(&keys, &hashrnd); } EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); @@ -1083,13 +1075,14 @@ void __skb_get_hash(struct sk_buff *skb) __flow_hash_secret_init(); - hash = ___skb_get_hash(skb, &keys, hashrnd); + hash = ___skb_get_hash(skb, &keys, &hashrnd); __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } EXPORT_SYMBOL(__skb_get_hash); -__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) +__u32 skb_get_hash_perturb(const struct sk_buff *skb, + const siphash_key_t *perturb) { struct flow_keys keys; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index eb3efeabac91..567e431813e5 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -18,6 +18,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include @@ -361,12 +362,14 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) ret = kmalloc(sizeof(*ret), GFP_ATOMIC); if (!ret) return NULL; - if (size <= PAGE_SIZE) + if (size <= PAGE_SIZE) { buckets = kzalloc(size, GFP_ATOMIC); - else + } else { buckets = (struct neighbour __rcu **) __get_free_pages(GFP_ATOMIC | __GFP_ZERO, get_order(size)); + kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); + } if (!buckets) { kfree(ret); return NULL; @@ -386,10 +389,12 @@ static void neigh_hash_free_rcu(struct rcu_head *head) size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); struct neighbour __rcu **buckets = nht->hash_buckets; - if (size <= PAGE_SIZE) + if (size <= PAGE_SIZE) { kfree(buckets); - else + } else { + kmemleak_free(buckets); free_pages((unsigned long)buckets, get_order(size)); + } kfree(nht); } @@ -1089,7 +1094,7 @@ static void neigh_update_hhs(struct neighbour *neigh) if (update) { hh = &neigh->hh; - if (hh->hh_len) { + if (READ_ONCE(hh->hh_len)) { write_seqlock_bh(&hh->hh_lock); update(hh, neigh->dev, neigh->ha); write_sequnlock_bh(&hh->hh_lock); @@ -1350,7 +1355,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) struct net_device *dev = neigh->dev; unsigned int seq; - if (dev->header_ops->cache && !neigh->hh.hh_len) + if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len)) neigh_hh_init(neigh); do { @@ -1872,8 +1877,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, goto nla_put_failure; { unsigned long now = jiffies; - unsigned int flush_delta = now - tbl->last_flush; - unsigned int rand_delta = now - tbl->last_rand; + long flush_delta = now - tbl->last_flush; + long rand_delta = now - tbl->last_rand; struct neigh_hash_table *nht; struct ndt_config ndc = { .ndtc_key_len = tbl->key_len, diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index dee57c5ff738..baf771d2d088 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -911,25 +911,30 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Kobject_put later will trigger rx_queue_release call which + * decreases dev refcount: Take that reference here + */ + dev_hold(queue->dev); + kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, "rx-%u", index); if (error) - return error; - - dev_hold(queue->dev); + goto err; if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); - if (error) { - kobject_put(kobj); - return error; - } + if (error) + goto err; } kobject_uevent(kobj, KOBJ_ADD); return error; + +err: + kobject_put(kobj); + return error; } #endif /* CONFIG_SYSFS */ @@ -1322,25 +1327,29 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Kobject_put later will trigger netdev_queue_release call + * which decreases dev refcount: Take that reference here + */ + dev_hold(queue->dev); + kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, "tx-%u", index); if (error) - return error; - - dev_hold(queue->dev); + goto err; #ifdef CONFIG_BQL error = sysfs_create_group(kobj, &dql_group); - if (error) { - kobject_put(kobj); - return error; - } + if (error) + goto err; #endif kobject_uevent(kobj, KOBJ_ADD); - return 0; + +err: + kobject_put(kobj); + return error; } #endif /* CONFIG_SYSFS */ diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 60b88718b1d4..1af25d53f63c 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -854,7 +854,8 @@ static int __init net_ns_init(void) mutex_unlock(&net_mutex); - register_pernet_subsys(&net_ns_ops); + if (register_pernet_subsys(&net_ns_ops)) + panic("Could not register network namespace subsystems"); rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, RTNL_FLAG_DOIT_UNLOCKED); diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 7bf833598615..67feeb207dad 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -57,30 +57,60 @@ static void cgrp_css_free(struct cgroup_subsys_state *css) kfree(css_cls_state(css)); } +/* + * To avoid freezing of sockets creation for tasks with big number of threads + * and opened sockets lets release file_lock every 1000 iterated descriptors. + * New sockets will already have been created with new classid. + */ + +struct update_classid_context { + u32 classid; + unsigned int batch; +}; + +#define UPDATE_CLASSID_BATCH 1000 + static int update_classid_sock(const void *v, struct file *file, unsigned n) { int err; + struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file, &err); if (sock) { spin_lock(&cgroup_sk_update_lock); - sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, - (unsigned long)v); + sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); spin_unlock(&cgroup_sk_update_lock); } + if (--ctx->batch == 0) { + ctx->batch = UPDATE_CLASSID_BATCH; + return n + 1; + } return 0; } +static void update_classid_task(struct task_struct *p, u32 classid) +{ + struct update_classid_context ctx = { + .classid = classid, + .batch = UPDATE_CLASSID_BATCH + }; + unsigned int fd = 0; + + do { + task_lock(p); + fd = iterate_fd(p->files, fd, update_classid_sock, &ctx); + task_unlock(p); + cond_resched(); + } while (fd); +} + static void cgrp_attach(struct cgroup_taskset *tset) { struct cgroup_subsys_state *css; struct task_struct *p; cgroup_taskset_for_each(p, css, tset) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)css_cls_state(css)->classid); - task_unlock(p); + update_classid_task(p, css_cls_state(css)->classid); } } @@ -102,10 +132,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, css_task_iter_start(css, 0, &it); while ((p = css_task_iter_next(&it))) { - task_lock(p); - iterate_fd(p->files, 0, update_classid_sock, - (void *)(unsigned long)cs->classid); - task_unlock(p); + update_classid_task(p, cs->classid); cond_resched(); } css_task_iter_end(&it); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7c1098cf35f4..7657785552df 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1845,6 +1845,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_MAC]) { struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); + if (ivm->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_mac) err = ops->ndo_set_vf_mac(dev, ivm->vf, @@ -1856,6 +1858,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_VLAN]) { struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); + if (ivv->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_vlan) err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, @@ -1888,6 +1892,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (len == 0) return -EINVAL; + if (ivvl[0]->vf >= INT_MAX) + return -EINVAL; err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan, ivvl[0]->qos, ivvl[0]->vlan_proto); if (err < 0) @@ -1898,6 +1904,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); struct ifla_vf_info ivf; + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_get_vf_config) err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf); @@ -1916,6 +1924,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_RATE]) { struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]); + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_rate) err = ops->ndo_set_vf_rate(dev, ivt->vf, @@ -1928,6 +1938,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_SPOOFCHK]) { struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); + if (ivs->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_spoofchk) err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, @@ -1939,6 +1951,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_LINK_STATE]) { struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); + if (ivl->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_link_state) err = ops->ndo_set_vf_link_state(dev, ivl->vf, @@ -1952,6 +1966,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) err = -EOPNOTSUPP; ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]); + if (ivrssq_en->vf >= INT_MAX) + return -EINVAL; if (ops->ndo_set_vf_rss_query_en) err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf, ivrssq_en->setting); @@ -1962,6 +1978,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_TRUST]) { struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]); + if (ivt->vf >= INT_MAX) + return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_trust) err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting); @@ -1972,15 +1990,18 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) if (tb[IFLA_VF_IB_NODE_GUID]) { struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]); + if (ivt->vf >= INT_MAX) + return -EINVAL; if (!ops->ndo_set_vf_guid) return -EOPNOTSUPP; - return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID); } if (tb[IFLA_VF_IB_PORT_GUID]) { struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]); + if (ivt->vf >= INT_MAX) + return -EINVAL; if (!ops->ndo_set_vf_guid) return -EOPNOTSUPP; @@ -2523,8 +2544,17 @@ struct net_device *rtnl_create_link(struct net *net, dev->rtnl_link_ops = ops; dev->rtnl_link_state = RTNL_LINK_INITIALIZING; - if (tb[IFLA_MTU]) - dev->mtu = nla_get_u32(tb[IFLA_MTU]); + if (tb[IFLA_MTU]) { + u32 mtu = nla_get_u32(tb[IFLA_MTU]); + int err; + + err = dev_validate_mtu(dev, mtu); + if (err) { + free_netdev(dev); + return ERR_PTR(err); + } + dev->mtu = mtu; + } if (tb[IFLA_ADDRESS]) { memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), nla_len(tb[IFLA_ADDRESS])); diff --git a/net/core/sock.c b/net/core/sock.c index 6d2f02f10b46..35312b9f6f16 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1039,7 +1039,7 @@ set_rcvbuf: break; case SO_INCOMING_CPU: - sk->sk_incoming_cpu = val; + WRITE_ONCE(sk->sk_incoming_cpu, val); break; case SO_CNX_ADVICE: @@ -1351,7 +1351,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_INCOMING_CPU: - v.val = sk->sk_incoming_cpu; + v.val = READ_ONCE(sk->sk_incoming_cpu); break; case SO_MEMINFO: @@ -1684,7 +1684,10 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); - mem_cgroup_sk_alloc(newsk); + + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + cgroup_sk_alloc(&newsk->sk_cgrp_data); rcu_read_lock(); @@ -2165,8 +2168,8 @@ static void sk_leave_memory_pressure(struct sock *sk) } else { unsigned long *memory_pressure = sk->sk_prot->memory_pressure; - if (memory_pressure && *memory_pressure) - *memory_pressure = 0; + if (memory_pressure && READ_ONCE(*memory_pressure)) + WRITE_ONCE(*memory_pressure, 0); } } @@ -2357,7 +2360,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) } if (sk_has_memory_pressure(sk)) { - int alloc; + u64 alloc; if (!sk_under_memory_pressure(sk)) return 1; @@ -3381,7 +3384,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time) { struct sock *sk = p; - return !skb_queue_empty(&sk->sk_receive_queue) || + return !skb_queue_empty_lockless(&sk->sk_receive_queue) || sk_busy_loop_timeout(sk, start_time); } EXPORT_SYMBOL(sk_busy_loop_end); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 144cd1acd7e3..069e3c4fcc44 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -274,6 +274,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, return ret; } +# ifdef CONFIG_HAVE_EBPF_JIT static int proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -284,6 +285,7 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, return proc_dointvec_minmax(table, write, buffer, lenp, ppos); } +# endif /* CONFIG_HAVE_EBPF_JIT */ static int proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, diff --git a/net/core/utils.c b/net/core/utils.c index 93066bd0305a..b1823e76b877 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -419,6 +419,23 @@ void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, } EXPORT_SYMBOL(inet_proto_csum_replace4); +/** + * inet_proto_csum_replace16 - update layer 4 header checksum field + * @sum: Layer 4 header checksum field + * @skb: sk_buff for the packet + * @from: old IPv6 address + * @to: new IPv6 address + * @pseudohdr: True if layer 4 header checksum includes pseudoheader + * + * Update layer 4 header as per the update in IPv6 src/dst address. + * + * There is no need to update skb->csum in this function, because update in two + * fields a.) IPv6 src/dst address and b.) L4 header checksum cancels each other + * for skb->csum calculation. Whereas inet_proto_csum_replace4 function needs to + * update skb->csum, because update in 3 fields a.) IPv4 src/dst address, + * b.) IPv4 Header checksum and c.) L4 header checksum results in same diff as + * L4 Header checksum for skb->csum calculation. + */ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, const __be32 *from, const __be32 *to, bool pseudohdr) @@ -430,9 +447,6 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) { *sum = csum_fold(csum_partial(diff, sizeof(diff), ~csum_unfold(*sum))); - if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) - skb->csum = ~csum_partial(diff, sizeof(diff), - ~skb->csum); } else if (pseudohdr) *sum = ~csum_fold(csum_partial(diff, sizeof(diff), csum_unfold(*sum))); diff --git a/net/dccp/feat.c b/net/dccp/feat.c index f227f002c73d..db87d9f58019 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -738,7 +738,12 @@ static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local, if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len)) return -ENOMEM; - return dccp_feat_push_change(fn, feat, is_local, mandatory, &fval); + if (dccp_feat_push_change(fn, feat, is_local, mandatory, &fval)) { + kfree(fval.sp.vec); + return -ENOMEM; + } + + return 0; } /** diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 8e08cea6f178..176bddacc16e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -121,7 +121,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr, inet->inet_sport, inet->inet_dport); - inet->inet_id = dp->dccps_iss ^ jiffies; + inet->inet_id = prandom_u32(); err = dccp_connect(sk); rt = NULL; @@ -417,7 +417,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; - newinet->inet_id = jiffies; + newinet->inet_id = prandom_u32(); if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) goto put_and_exit; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index df042b6d80b8..22876a197ebe 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -56,7 +56,7 @@ #include #include -#define DN_IFREQ_SIZE (sizeof(struct ifreq) - sizeof(struct sockaddr) + sizeof(struct sockaddr_dn)) +#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn)) static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00}; static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00}; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 0bd3afd01dd2..ccc189bc3617 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -118,7 +118,8 @@ static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb , u32 mtu); + struct sk_buff *skb , u32 mtu, + bool confirm_neigh); static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, @@ -259,7 +260,8 @@ static int dn_dst_gc(struct dst_ops *ops) * advertise to the other end). */ static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct dn_route *rt = (struct dn_route *) dst; struct neighbour *n = rt->n; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 045d8a176279..0796355e74c1 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -62,7 +62,7 @@ static struct dsa_switch_tree *dsa_add_dst(u32 tree) return NULL; dst->tree = tree; INIT_LIST_HEAD(&dst->list); - list_add_tail(&dsa_switch_trees, &dst->list); + list_add_tail(&dst->list, &dsa_switch_trees); kref_init(&dst->refcount); return dst; diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index b8c05f1cf47d..f268c5c3eedb 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -41,10 +41,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) struct dsa_slave_priv *p = netdev_priv(dev); u16 *phdr, hdr; - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; - - if (skb_cow_head(skb, 0) < 0) + if (skb_cow_head(skb, QCA_HDR_LEN) < 0) return NULL; skb_push(skb, QCA_HDR_LEN); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 39a28eb83078..b027bafc086d 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -239,7 +239,12 @@ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 eth->h_proto = type; memcpy(eth->h_source, dev->dev_addr, ETH_ALEN); memcpy(eth->h_dest, neigh->ha, ETH_ALEN); - hh->hh_len = ETH_HLEN; + + /* Pairs with READ_ONCE() in neigh_resolve_output(), + * neigh_hh_output() and neigh_update_hhs(). + */ + smp_store_release(&hh->hh_len, ETH_HLEN); + return 0; } EXPORT_SYMBOL(eth_header_cache); diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index cfe20f15f618..c962c406d7b1 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -281,6 +281,8 @@ static void send_hsr_supervision_frame(struct hsr_port *master, skb->dev->dev_addr, skb->len) <= 0) goto out; skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); if (hsrVer > 0) { hsr_tag = skb_put(skb, sizeof(struct hsr_tag)); diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 56080da4aa77..5fee6ec7c93d 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -32,6 +32,8 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) rcu_read_lock(); /* hsr->node_db, hsr->ports */ port = hsr_port_get_rcu(skb->dev); + if (!port) + goto finish_pass; if (hsr_addr_is_self(port->hsr, eth_hdr(skb)->h_source)) { /* Directly kill frames sent by ourselves */ diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index ec7a5da56129..e873a6a007f2 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -634,7 +634,7 @@ err_sysctl: void lowpan_net_frag_exit(void) { - inet_frags_fini(&lowpan_frags); lowpan_frags_sysctl_unregister(); unregister_pernet_subsys(&lowpan_frags_ops); + inet_frags_fini(&lowpan_frags); } diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c index 35c432668454..040983fc15da 100644 --- a/net/ieee802154/nl_policy.c +++ b/net/ieee802154/nl_policy.c @@ -30,7 +30,13 @@ const struct nla_policy ieee802154_policy[IEEE802154_ATTR_MAX + 1] = { [IEEE802154_ATTR_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_PAN_ID] = { .type = NLA_U16, }, [IEEE802154_ATTR_CHANNEL] = { .type = NLA_U8, }, + [IEEE802154_ATTR_BCN_ORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_SF_ORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_PAN_COORD] = { .type = NLA_U8, }, + [IEEE802154_ATTR_BAT_EXT] = { .type = NLA_U8, }, + [IEEE802154_ATTR_COORD_REALIGN] = { .type = NLA_U8, }, [IEEE802154_ATTR_PAGE] = { .type = NLA_U8, }, + [IEEE802154_ATTR_DEV_TYPE] = { .type = NLA_U8, }, [IEEE802154_ATTR_COORD_SHORT_ADDR] = { .type = NLA_U16, }, [IEEE802154_ATTR_COORD_HW_ADDR] = { .type = NLA_HW_ADDR, }, [IEEE802154_ATTR_COORD_PAN_ID] = { .type = NLA_U16, }, diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index f0165c5f376b..1c21dc5d6dd4 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1738,6 +1738,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; + int res; if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; @@ -1749,7 +1750,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) memset(opt, 0, sizeof(struct ip_options)); opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); - if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL); + rcu_read_unlock(); + + if (res) return; if (gateway) diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index f915abff1350..d3eddfd13875 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -75,7 +75,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len inet->inet_dport = usin->sin_port; sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); - inet->inet_id = jiffies; + inet->inet_id = prandom_u32(); sk_dst_set(sk, &rt->dst); err = 0; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5264510c9983..5f020c051af9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1426,11 +1426,6 @@ skip: } } -static bool inetdev_valid_mtu(unsigned int mtu) -{ - return mtu >= IPV4_MIN_MTU; -} - static void inetdev_send_gratuitous_arp(struct net_device *dev, struct in_device *in_dev) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e76b8a7bb891..eff703cb13b6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1471,8 +1471,8 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) int ret = 0; unsigned int hash = fib_laddr_hashfn(local); struct hlist_head *head = &fib_info_laddrhash[hash]; + int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; struct net *net = dev_net(dev); - int tb_id = l3mdev_fib_table(dev); struct fib_info *fi; if (!fib_info_laddrhash || local == 0) diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index b798862b6be5..4a5e55e94a9e 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -60,7 +60,9 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version) } EXPORT_SYMBOL_GPL(gre_del_protocol); -/* Fills in tpi and returns header length to be pulled. */ +/* Fills in tpi and returns header length to be pulled. + * Note that caller must use pskb_may_pull() before pulling GRE header. + */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err, __be16 proto, int nhs) { @@ -86,13 +88,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, options = (__be32 *)(greh + 1); if (greh->flags & GRE_CSUM) { - if (skb_checksum_simple_validate(skb)) { + if (!skb_checksum_simple_validate(skb)) { + skb_checksum_try_convert(skb, IPPROTO_GRE, 0, + null_compute_pseudo); + } else if (csum_err) { *csum_err = true; return -EINVAL; } - skb_checksum_try_convert(skb, IPPROTO_GRE, 0, - null_compute_pseudo); options++; } @@ -113,8 +116,14 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + u8 _val, *val; + + val = skb_header_pointer(skb, nhs + hdr_len, + sizeof(_val), &_val); + if (!val) + return -EINVAL; tpi->proto = proto; - if ((*(u8 *)options & 0xF0) != 0x40) + if ((*val & 0xF0) != 0x40) hdr_len += 4; } tpi->hdr_len = hdr_len; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index f9d790b058d2..995ef3d23368 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -254,10 +254,11 @@ bool icmp_global_allow(void) bool rc = false; /* Check if token bucket is empty and cannot be refilled - * without taking the spinlock. + * without taking the spinlock. The READ_ONCE() are paired + * with the following WRITE_ONCE() in this same function. */ - if (!icmp_global.credit) { - delta = min_t(u32, now - icmp_global.stamp, HZ); + if (!READ_ONCE(icmp_global.credit)) { + delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ); if (delta < HZ / 50) return false; } @@ -267,14 +268,14 @@ bool icmp_global_allow(void) if (delta >= HZ / 50) { incr = sysctl_icmp_msgs_per_sec * delta / HZ ; if (incr) - icmp_global.stamp = now; + WRITE_ONCE(icmp_global.stamp, now); } credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst); if (credit) { credit--; rc = true; } - icmp_global.credit = credit; + WRITE_ONCE(icmp_global.credit, credit); spin_unlock(&icmp_global.lock); return rc; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 9d6b172caf6c..7826fba34b14 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -475,8 +475,28 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) } spin_unlock_bh(&queue->fastopenq.lock); } + out: release_sock(sk); + if (newsk && mem_cgroup_sockets_enabled) { + int amt; + + /* atomically get the memory usage, set and charge the + * newsk->sk_memcg. + */ + lock_sock(newsk); + + /* The socket has not been accepted yet, no need to look at + * newsk->sk_wmem_queued. + */ + amt = sk_mem_pages(newsk->sk_forward_alloc + + atomic_read(&newsk->sk_rmem_alloc)); + mem_cgroup_sk_alloc(newsk); + if (newsk->sk_memcg && amt) + mem_cgroup_charge_skmem(newsk->sk_memcg, amt); + + release_sock(newsk); + } if (req) reqsk_put(req); return newsk; @@ -936,7 +956,7 @@ struct sock *inet_csk_reqsk_queue_add(struct sock *sk, req->sk = child; req->dl_next = NULL; if (queue->rskq_accept_head == NULL) - queue->rskq_accept_head = req; + WRITE_ONCE(queue->rskq_accept_head, req); else queue->rskq_accept_tail->dl_next = req; queue->rskq_accept_tail = req; @@ -1088,7 +1108,7 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) if (!dst) goto out; } - dst->ops->update_pmtu(dst, sk, NULL, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); dst = __sk_dst_check(sk, 0); if (!dst) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 33edccfebc30..7ba013d6c00a 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -105,13 +105,9 @@ static size_t inet_sk_attr_size(struct sock *sk, aux = handler->idiag_get_aux_size(sk, net_admin); return nla_total_size(sizeof(struct tcp_info)) - + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ - + nla_total_size(1) /* INET_DIAG_TOS */ - + nla_total_size(1) /* INET_DIAG_TCLASS */ - + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4) /* INET_DIAG_CLASS_ID */ - + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + nla_total_size(TCP_CA_NAME_MAX) + nla_total_size(sizeof(struct tcpvegas_info)) @@ -152,6 +148,24 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark)) goto errout; + if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || + ext & (1 << (INET_DIAG_TCLASS - 1))) { + u32 classid = 0; + +#ifdef CONFIG_SOCK_CGROUP_DATA + classid = sock_cgroup_classid(&sk->sk_cgrp_data); +#endif + /* Fallback to socket priority if class id isn't set. + * Classful qdiscs use it as direct reference to class. + * For cgroup2 classid is always zero. + */ + if (!classid) + classid = sk->sk_priority; + + if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) + goto errout; + } + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); @@ -289,24 +303,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } - if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || - ext & (1 << (INET_DIAG_TCLASS - 1))) { - u32 classid = 0; - -#ifdef CONFIG_SOCK_CGROUP_DATA - classid = sock_cgroup_classid(&sk->sk_cgrp_data); -#endif - /* Fallback to socket priority if class id isn't set. - * Classful qdiscs use it as direct reference to class. - * For cgroup2 classid is always zero. - */ - if (!classid) - classid = sk->sk_priority; - - if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) - goto errout; - } - out: nlmsg_end(skb, nlh); return 0; @@ -911,11 +907,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; num = 0; ilb = &hashinfo->listening_hash[i]; spin_lock(&ilb->lock); - sk_for_each(sk, &ilb->head) { + sk_nulls_for_each(sk, node, &ilb->nulls_head) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 24b066c32e06..0af13f5bdc9a 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -193,7 +193,7 @@ static inline int compute_score(struct sock *sk, struct net *net, if (sk->sk_bound_dev_if) score += 4; } - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; @@ -219,9 +219,10 @@ struct sock *__inet_lookup_listener(struct net *net, int score, hiscore = 0, matches = 0, reuseport = 0; bool exact_dif = inet_exact_dif_match(net, skb); struct sock *sk, *result = NULL; + struct hlist_nulls_node *node; u32 phash = 0; - sk_for_each_rcu(sk, &ilb->head) { + sk_nulls_for_each_rcu(sk, node, &ilb->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { @@ -442,10 +443,11 @@ static int inet_reuseport_add_sock(struct sock *sk, struct inet_listen_hashbucket *ilb) { struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; + const struct hlist_nulls_node *node; struct sock *sk2; kuid_t uid = sock_i_uid(sk); - sk_for_each_rcu(sk2, &ilb->head) { + sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) { if (sk2 != sk && sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && @@ -480,9 +482,9 @@ int __inet_hash(struct sock *sk, struct sock *osk) } if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && sk->sk_family == AF_INET6) - hlist_add_tail_rcu(&sk->sk_node, &ilb->head); + __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head); else - hlist_add_head_rcu(&sk->sk_node, &ilb->head); + __sk_nulls_add_node_rcu(sk, &ilb->nulls_head); sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: @@ -525,10 +527,7 @@ void inet_unhash(struct sock *sk) spin_lock_bh(lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - if (listener) - done = __sk_del_node_init(sk); - else - done = __sk_nulls_del_node_init_rcu(sk); + done = __sk_nulls_del_node_init_rcu(sk); if (done) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); @@ -664,7 +663,8 @@ void inet_hashinfo_init(struct inet_hashinfo *h) for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); - INIT_HLIST_HEAD(&h->listening_hash[i].head); + INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head, + i + LISTENING_NULLS_BASE); } } EXPORT_SYMBOL_GPL(inet_hashinfo_init); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f9cef27907ed..f94881412d5b 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -159,7 +159,12 @@ static void inet_peer_gc(struct inet_peer_base *base, base->total / inet_peer_threshold * HZ; for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; - delta = (__u32)jiffies - p->dtime; + + /* The READ_ONCE() pairs with the WRITE_ONCE() + * in inet_putpeer() + */ + delta = (__u32)jiffies - READ_ONCE(p->dtime); + if (delta < ttl || !refcount_dec_if_one(&p->refcnt)) gc_stack[i] = NULL; } @@ -236,7 +241,10 @@ EXPORT_SYMBOL_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { - p->dtime = (__u32)jiffies; + /* The WRITE_ONCE() pairs with itself (we run lockless) + * and the READ_ONCE() in inet_peer_gc() + */ + WRITE_ONCE(p->dtime, (__u32)jiffies); if (refcount_dec_and_test(&p->refcnt)) call_rcu(&p->rcu, inetpeer_free_rcu); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 347be2ea78d4..9940a59306b5 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -230,13 +230,10 @@ static void gre_err(struct sk_buff *skb, u32 info) const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct tnl_ptk_info tpi; - bool csum_err = false; - if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), - iph->ihl * 4) < 0) { - if (!csum_err) /* ignore csum errors. */ - return; - } + if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP), + iph->ihl * 4) < 0) + return; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, @@ -592,6 +589,9 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, truncate = true; } + if (tun_info->options_len < sizeof(*md)) + goto err_free_rt; + md = ip_tunnel_info_opts(tun_info); if (!md) goto err_free_rt; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 78a1d53e5310..de20564227a1 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1140,15 +1140,19 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, rt = *rtp; if (unlikely(!rt)) return -EFAULT; - /* - * We steal reference to this route, caller should not release it - */ - *rtp = NULL; + cork->fragsize = ip_sk_use_pmtu(sk) ? - dst_mtu(&rt->dst) : rt->dst.dev->mtu; + dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu); + + if (!inetdev_valid_mtu(cork->fragsize)) + return -ENETUNREACH; cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0; + cork->dst = &rt->dst; + /* We stole this route, caller should not release it. */ + *rtp = NULL; + cork->length = 0; cork->ttl = ipc->ttl; cork->tos = ipc->tos; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index fabc299cb875..404dc765f2bf 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -521,7 +521,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, else mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { if (!skb_is_gso(skb) && @@ -661,13 +661,19 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, dst = tnl_params->daddr; if (dst == 0) { /* NBMA tunnel */ + struct ip_tunnel_info *tun_info; if (!skb_dst(skb)) { dev->stats.tx_fifo_errors++; goto tx_error; } - if (skb->protocol == htons(ETH_P_IP)) { + tun_info = skb_tunnel_info(skb); + if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) && + ip_tunnel_info_af(tun_info) == AF_INET && + tun_info->key.u.ipv4.dst) + dst = tun_info->key.u.ipv4.dst; + else if (skb->protocol == htons(ETH_P_IP)) { rt = skb_rtable(skb); dst = rt_nexthop(rt, inner_iph->daddr); } @@ -1196,10 +1202,8 @@ int ip_tunnel_init(struct net_device *dev) iph->version = 4; iph->ihl = 5; - if (tunnel->collect_md) { - dev->features |= NETIF_F_NETNS_LOCAL; + if (tunnel->collect_md) netif_keep_dst(dev); - } return 0; } EXPORT_SYMBOL_GPL(ip_tunnel_init); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c07065b7e3b0..59384ffe89f7 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -208,8 +208,17 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, int mtu; if (!dst) { - dev->stats.tx_carrier_errors++; - goto tx_error_icmp; + struct rtable *rt; + + fl->u.ip4.flowi4_oif = dev->ifindex; + fl->u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; + rt = __ip_route_output_key(dev_net(dev), &fl->u.ip4); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error_icmp; + } + dst = &rt->dst; + skb_dst_set(skb, dst); } dst_hold(dst); @@ -244,7 +253,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, mtu = dst_mtu(dst); if (skb->len > mtu) { - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 356ae7da4f16..6dd727e0a72f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -394,10 +394,11 @@ next: ; return 1; } -static inline int check_target(struct arpt_entry *e, const char *name) +static int check_target(struct arpt_entry *e, struct net *net, const char *name) { struct xt_entry_target *t = arpt_get_target(e); struct xt_tgchk_param par = { + .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, @@ -409,8 +410,9 @@ static inline int check_target(struct arpt_entry *e, const char *name) return xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); } -static inline int -find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, +static int +find_check_entry(struct arpt_entry *e, struct net *net, const char *name, + unsigned int size, struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; @@ -429,7 +431,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, } t->u.kernel.target = target; - ret = check_target(e, name); + ret = check_target(e, net, name); if (ret) goto err; return 0; @@ -504,12 +506,13 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, return 0; } -static inline void cleanup_entry(struct arpt_entry *e) +static void cleanup_entry(struct arpt_entry *e, struct net *net) { struct xt_tgdtor_param par; struct xt_entry_target *t; t = arpt_get_target(e); + par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_ARP; @@ -522,7 +525,9 @@ static inline void cleanup_entry(struct arpt_entry *e) /* Checks and translates the user-supplied table segment (held in * newinfo). */ -static int translate_table(struct xt_table_info *newinfo, void *entry0, +static int translate_table(struct net *net, + struct xt_table_info *newinfo, + void *entry0, const struct arpt_replace *repl) { struct xt_percpu_counter_alloc_state alloc_state = { 0 }; @@ -586,7 +591,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { - ret = find_check_entry(iter, repl->name, repl->size, + ret = find_check_entry(iter, net, repl->name, repl->size, &alloc_state); if (ret != 0) break; @@ -597,7 +602,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, xt_entry_foreach(iter, entry0, newinfo->size) { if (i-- == 0) break; - cleanup_entry(iter); + cleanup_entry(iter, net); } return ret; } @@ -922,7 +927,7 @@ static int __do_replace(struct net *net, const char *name, /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries; xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) - cleanup_entry(iter); + cleanup_entry(iter, net); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, @@ -974,7 +979,7 @@ static int do_replace(struct net *net, const void __user *user, goto free_newinfo; } - ret = translate_table(newinfo, loc_cpu_entry, &tmp); + ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; @@ -986,7 +991,7 @@ static int do_replace(struct net *net, const void __user *user, free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) - cleanup_entry(iter); + cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1149,7 +1154,8 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, } } -static int translate_compat_table(struct xt_table_info **pinfo, +static int translate_compat_table(struct net *net, + struct xt_table_info **pinfo, void **pentry0, const struct compat_arpt_replace *compatr) { @@ -1217,7 +1223,7 @@ static int translate_compat_table(struct xt_table_info **pinfo, repl.num_counters = 0; repl.counters = NULL; repl.size = newinfo->size; - ret = translate_table(newinfo, entry1, &repl); + ret = translate_table(net, newinfo, entry1, &repl); if (ret) goto free_newinfo; @@ -1270,7 +1276,7 @@ static int compat_do_replace(struct net *net, void __user *user, goto free_newinfo; } - ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp); + ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; @@ -1282,7 +1288,7 @@ static int compat_do_replace(struct net *net, void __user *user, free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) - cleanup_entry(iter); + cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; @@ -1509,7 +1515,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len return ret; } -static void __arpt_unregister_table(struct xt_table *table) +static void __arpt_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; @@ -1521,7 +1527,7 @@ static void __arpt_unregister_table(struct xt_table *table) /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) - cleanup_entry(iter); + cleanup_entry(iter, net); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); @@ -1546,7 +1552,7 @@ int arpt_register_table(struct net *net, loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(newinfo, loc_cpu_entry, repl); + ret = translate_table(net, newinfo, loc_cpu_entry, repl); if (ret != 0) goto out_free; @@ -1561,7 +1567,7 @@ int arpt_register_table(struct net *net, ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); if (ret != 0) { - __arpt_unregister_table(new_table); + __arpt_unregister_table(net, new_table); *res = NULL; } @@ -1576,7 +1582,7 @@ void arpt_unregister_table(struct net *net, struct xt_table *table, const struct nf_hook_ops *ops) { nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks)); - __arpt_unregister_table(table); + __arpt_unregister_table(net, table); } /* The built-in targets: standard (NULL) and error. */ diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c index 0c366aad89cb..b531fe204323 100644 --- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -105,12 +105,26 @@ static int masq_device_event(struct notifier_block *this, return NOTIFY_DONE; } +static int inet_cmp(struct nf_conn *ct, void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct nf_conntrack_tuple *tuple; + + if (!device_cmp(ct, (void *)(long)dev->ifindex)) + return 0; + + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + return ifa->ifa_address == tuple->dst.u3.ip; +} + static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev; - struct netdev_notifier_info info; + struct net *net = dev_net(idev->dev); /* The masq_dev_notifier will catch the case of the device going * down. So if the inetdev is dead and being destroyed we have @@ -120,8 +134,10 @@ static int masq_inet_event(struct notifier_block *this, if (idev->dead) return NOTIFY_DONE; - netdev_notifier_info_init(&info, idev->dev); - return masq_device_event(this, event, &info); + if (event == NETDEV_DOWN) + nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0); + + return NOTIFY_DONE; } static struct notifier_block masq_dev_notifier = { diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index 6367ecdf76c4..1d84b02ec765 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -99,8 +99,9 @@ static int raw_diag_dump_one(struct sk_buff *in_skb, if (IS_ERR(sk)) return PTR_ERR(sk); - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) { sock_put(sk); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 1af0345879a1..b958202d27ff 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -145,7 +145,8 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static void ipv4_dst_destroy(struct dst_entry *dst); @@ -1042,7 +1043,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) } static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct rtable *rt = (struct rtable *) dst; struct flowi4 fl4; @@ -2356,14 +2358,17 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, int orig_oif = fl4->flowi4_oif; unsigned int flags = 0; struct rtable *rth; - int err = -ENETUNREACH; + int err; if (fl4->saddr) { - rth = ERR_PTR(-EINVAL); if (ipv4_is_multicast(fl4->saddr) || ipv4_is_lbcast(fl4->saddr) || - ipv4_is_zeronet(fl4->saddr)) + ipv4_is_zeronet(fl4->saddr)) { + rth = ERR_PTR(-EINVAL); goto out; + } + + rth = ERR_PTR(-ENETUNREACH); /* I removed check for oif == dev_out->oif here. It was wrong for two reasons: @@ -2531,7 +2536,8 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) } static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c1f59a53f68f..0c69b66d93d7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -328,7 +328,7 @@ void tcp_enter_memory_pressure(struct sock *sk) { unsigned long val; - if (tcp_memory_pressure) + if (READ_ONCE(tcp_memory_pressure)) return; val = jiffies; @@ -343,7 +343,7 @@ void tcp_leave_memory_pressure(struct sock *sk) { unsigned long val; - if (!tcp_memory_pressure) + if (!READ_ONCE(tcp_memory_pressure)) return; val = xchg(&tcp_memory_pressure, 0); if (val) @@ -574,7 +574,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR; return mask; @@ -1787,7 +1787,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); - if (sk_can_busy_loop(sk) && skb_queue_empty(&sk->sk_receive_queue) && + if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && (sk->sk_state == TCP_ESTABLISHED)) sk_busy_loop(sk, nonblock); @@ -2363,9 +2363,11 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; + tp->delivered = 0; tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); + tp->total_retrans = 0; inet_csk_delack_init(sk); /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 * issue in __tcp_select_window() @@ -2377,8 +2379,12 @@ int tcp_disconnect(struct sock *sk, int flags) dst_release(sk->sk_rx_dst); sk->sk_rx_dst = NULL; tcp_saved_syn_free(tp); + tp->segs_in = 0; + tp->segs_out = 0; tp->bytes_acked = 0; tp->bytes_received = 0; + tp->data_segs_in = 0; + tp->data_segs_out = 0; /* Clean up fastopen related fields */ tcp_free_fastopen_req(tp); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 06f247ca9197..434ad1e72447 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -678,8 +678,7 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) * bandwidth sample. Delivered is in packets and interval_us in uS and * ratio will be <<1 for most connections. So delivered is first scaled. */ - bw = (u64)rs->delivered * BW_UNIT; - do_div(bw, rs->interval_us); + bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us); /* If this sample is application-limited, it is likely to have a very * low delivered count that represents application behavior rather than diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 0b7a1d697c8c..67b5ec7554e7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -933,9 +933,10 @@ static void tcp_update_reordering(struct sock *sk, const int metric, /* This must be called before lost_out is incremented */ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) { - if (!tp->retransmit_skb_hint || - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) + if ((!tp->retransmit_skb_hint && tp->retrans_out >= tp->lost_out) || + (tp->retransmit_skb_hint && + before(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(tp->retransmit_skb_hint)->seq))) tp->retransmit_skb_hint = skb; } @@ -1751,8 +1752,11 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, } /* Ignore very old stuff early */ - if (!after(sp[used_sacks].end_seq, prior_snd_una)) + if (!after(sp[used_sacks].end_seq, prior_snd_una)) { + if (i == 0) + first_sack_index = -1; continue; + } used_sacks++; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0569718e3656..b4f0fc34b0ed 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -245,7 +245,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr); } - inet->inet_id = tp->write_seq ^ jiffies; + inet->inet_id = prandom_u32(); if (tcp_fastopen_defer_connect(sk, &err)) return err; @@ -1368,7 +1368,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = 0; if (inet_opt) inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; - newinet->inet_id = newtp->write_seq ^ jiffies; + newinet->inet_id = prandom_u32(); if (!dst) { dst = inet_csk_route_child_sock(sk, newsk, req); @@ -1936,13 +1936,14 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct tcp_iter_state *st = seq->private; struct net *net = seq_file_net(seq); struct inet_listen_hashbucket *ilb; + struct hlist_nulls_node *node; struct sock *sk = cur; if (!sk) { get_head: ilb = &tcp_hashinfo.listening_hash[st->bucket]; spin_lock(&ilb->lock); - sk = sk_head(&ilb->head); + sk = sk_nulls_head(&ilb->nulls_head); st->offset = 0; goto get_sk; } @@ -1950,9 +1951,9 @@ get_head: ++st->num; ++st->offset; - sk = sk_next(sk); + sk = sk_nulls_next(sk); get_sk: - sk_for_each_from(sk) { + sk_nulls_for_each_from(sk, node) { if (!net_eq(sock_net(sk), net)) continue; if (sk->sk_family == st->family) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9ce5891db58e..c5aa2c91a2e8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -708,8 +708,9 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb min_t(unsigned int, eff_sacks, (remaining - TCPOLEN_SACK_BASE_ALIGNED) / TCPOLEN_SACK_PERBLOCK); - size += TCPOLEN_SACK_BASE_ALIGNED + - opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; + if (likely(opts->num_sack_blocks)) + size += TCPOLEN_SACK_BASE_ALIGNED + + opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK; } return size; @@ -2382,6 +2383,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (tcp_small_queue_check(sk, skb, 0)) break; + /* Argh, we hit an empty skb(), presumably a thread + * is sleeping in sendmsg()/sk_stream_wait_memory(). + * We do not want to send a pure-ack packet and have + * a strange looking rtx queue with empty packet(s). + */ + if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) + break; + if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; @@ -2935,7 +2944,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) if (likely(!err)) { TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; } else if (err != -EBUSY) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); } return err; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 592d6e9967a9..895129b0928c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -358,7 +358,7 @@ static void tcp_probe_timer(struct sock *sk) return; } - if (icsk->icsk_probes_out > max_probes) { + if (icsk->icsk_probes_out >= max_probes) { abort: tcp_write_err(sk); } else { /* Only send another probe if we didn't close things up. */ @@ -413,6 +413,7 @@ void tcp_retransmit_timer(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct sk_buff *skb; if (tp->fastopen_rsk) { WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && @@ -423,10 +424,13 @@ void tcp_retransmit_timer(struct sock *sk) */ return; } - if (!tp->packets_out) - goto out; - WARN_ON(tcp_write_queue_empty(sk)); + if (!tp->packets_out) + return; + + skb = tcp_rtx_queue_head(sk); + if (WARN_ON_ONCE(!skb)) + return; tp->tlp_high_seq = 0; @@ -459,16 +463,17 @@ void tcp_retransmit_timer(struct sock *sk) goto out; } tcp_enter_loss(sk); - tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1); + tcp_retransmit_skb(sk, skb, 1); __sk_dst_reset(sk); goto out_reset_timer; } + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); if (tcp_write_timeout(sk)) goto out; if (icsk->icsk_retransmits == 0) { - int mib_idx; + int mib_idx = 0; if (icsk->icsk_ca_state == TCP_CA_Recovery) { if (tcp_is_sack(tp)) @@ -483,10 +488,9 @@ void tcp_retransmit_timer(struct sock *sk) mib_idx = LINUX_MIB_TCPSACKFAILURES; else mib_idx = LINUX_MIB_TCPRENOFAILURES; - } else { - mib_idx = LINUX_MIB_TCPTIMEOUTS; } - __NET_INC_STATS(sock_net(sk), mib_idx); + if (mib_idx) + __NET_INC_STATS(sock_net(sk), mib_idx); } tcp_enter_loss(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e4169f2efdef..b93d4e248a6c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -421,7 +421,7 @@ static int compute_score(struct sock *sk, struct net *net, score += 4; } - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; return score; } @@ -1216,6 +1216,20 @@ static void udp_set_dev_scratch(struct sk_buff *skb) scratch->_tsize_state |= UDP_SKB_IS_STATELESS; } +static void udp_skb_csum_unnecessary_set(struct sk_buff *skb) +{ + /* We come here after udp_lib_checksum_complete() returned 0. + * This means that __skb_checksum_complete() might have + * set skb->csum_valid to 1. + * On 64bit platforms, we can set csum_unnecessary + * to true, but only if the skb is not shared. + */ +#if BITS_PER_LONG == 64 + if (!skb_shared(skb)) + udp_skb_scratch(skb)->csum_unnecessary = true; +#endif +} + static int udp_skb_truesize(struct sk_buff *skb) { return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS; @@ -1345,7 +1359,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb) * queue contains some other skb */ rmem = atomic_add_return(size, &sk->sk_rmem_alloc); - if (rmem > (size + sk->sk_rcvbuf)) + if (rmem > (size + (unsigned int)sk->sk_rcvbuf)) goto uncharge_drop; spin_lock(&list->lock); @@ -1451,10 +1465,7 @@ static struct sk_buff *__first_packet_length(struct sock *sk, *total += skb->truesize; kfree_skb(skb); } else { - /* the csum related bits could be changed, refresh - * the scratch area - */ - udp_set_dev_scratch(skb); + udp_skb_csum_unnecessary_set(skb); break; } } @@ -1478,7 +1489,7 @@ static int first_packet_length(struct sock *sk) spin_lock_bh(&rcvq->lock); skb = __first_packet_length(sk, rcvq, &total); - if (!skb && !skb_queue_empty(sk_queue)) { + if (!skb && !skb_queue_empty_lockless(sk_queue)) { spin_lock(&sk_queue->lock); skb_queue_splice_tail_init(sk_queue, rcvq); spin_unlock(&sk_queue->lock); @@ -1553,7 +1564,7 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, return skb; } - if (skb_queue_empty(sk_queue)) { + if (skb_queue_empty_lockless(sk_queue)) { spin_unlock_bh(&queue->lock); goto busy_check; } @@ -1580,7 +1591,7 @@ busy_check: break; sk_busy_loop(sk, flags & MSG_DONTWAIT); - } while (!skb_queue_empty(sk_queue)); + } while (!skb_queue_empty_lockless(sk_queue)); /* sk_queue is empty, reader_queue may contain peeked packets */ } while (timeo && @@ -2601,7 +2612,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; - if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) + if (!skb_queue_empty_lockless(&udp_sk(sk)->reader_queue)) mask |= POLLIN | POLLRDNORM; sock_rps_record_flow(sk); diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index d9ad986c7b2c..cc3f6da306c6 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -67,8 +67,9 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, goto out; err = -ENOMEM; - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + 64, + rep = nlmsg_new(nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64, GFP_KERNEL); if (!rep) goto out; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index ea69214a04f7..fcd365d3412e 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -223,12 +223,13 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) } static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, sk, skb, mtu); + path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8f3d6fa2aae6..08a011e70b8d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3335,6 +3335,10 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_NONE) && (dev->type != ARPHRD_PUREIP)) { /* Alas, we support only Ethernet autoconfiguration. */ + idev = __in6_dev_get(dev); + if (!IS_ERR_OR_NULL(idev) && dev->flags & IFF_UP && + dev->flags & IFF_MULTICAST) + ipv6_mc_up(idev); return; } diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 9a31d13bf180..890adadcda16 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -150,7 +150,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) if (IS_ERR(dst)) return NULL; - dst->ops->update_pmtu(dst, sk, NULL, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); dst = inet6_csk_route_socket(sk, &fl6); return IS_ERR(dst) ? NULL : dst; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 6dc93ac28261..24a21979d7df 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -118,7 +118,7 @@ static inline int compute_score(struct sock *sk, struct net *net, if (sk->sk_bound_dev_if) score++; } - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; @@ -137,9 +137,10 @@ struct sock *inet6_lookup_listener(struct net *net, int score, hiscore = 0, matches = 0, reuseport = 0; bool exact_dif = inet6_exact_dif_match(net, skb); struct sock *sk, *result = NULL; + struct hlist_nulls_node *node; u32 phash = 0; - sk_for_each(sk, &ilb->head) { + sk_nulls_for_each(sk, node, &ilb->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e5308d7cbd75..d43abeb1e415 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -893,8 +893,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, found++; break; } - if (rt_can_ecmp) - fallback_ins = fallback_ins ?: ins; + fallback_ins = fallback_ins ?: ins; goto next_iter; } @@ -934,7 +933,9 @@ next_iter: } if (fallback_ins && !found) { - /* No ECMP-able route found, replace first non-ECMP one */ + /* No matching route with same ecmp-able-ness found, replace + * first matching route + */ ins = fallback_ins; iter = *ins; found++; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 4228f3b2f347..726ba41133a3 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -527,7 +527,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, /* TooBig packet may have updated dst->dev's mtu */ if (dst && dst_mtu(dst) > dst->dev->mtu) - dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false); return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, NEXTHDR_GRE); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 067fc78cc529..c2644405bab1 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -652,7 +652,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst_update_pmtu(skb2, rel_info); + skb_dst_update_pmtu_no_confirm(skb2, rel_info); } if (rel_type == ICMP_REDIRECT) skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); @@ -1138,7 +1138,7 @@ route_lookup: mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ? IPV6_MIN_MTU : IPV4_MIN_MTU); - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; @@ -1878,10 +1878,8 @@ static int ip6_tnl_dev_init(struct net_device *dev) if (err) return err; ip6_tnl_link_config(t); - if (t->parms.collect_md) { - dev->features |= NETIF_F_NETNS_LOCAL; + if (t->parms.collect_md) netif_keep_dst(dev); - } return 0; } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 6b2416b4a53e..396a0f61f5f8 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -453,8 +453,17 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) int err = -1; int mtu; - if (!dst) - goto tx_err_link_failure; + if (!dst) { + fl->u.ip6.flowi6_oif = dev->ifindex; + fl->u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; + dst = ip6_route_output(dev_net(dev), NULL, &fl->u.ip6); + if (dst->error) { + dst_release(dst); + dst = NULL; + goto tx_err_link_failure; + } + skb_dst_set(skb, dst); + } dst_hold(dst); dst = xfrm_lookup(t->net, dst, fl, NULL, 0); @@ -483,7 +492,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) mtu = dst_mtu(dst); if (skb->len > mtu) { - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { if (mtu < IPV6_MIN_MTU) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 5c91b05c8d8f..8c492471b0da 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -185,9 +185,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = -EBUSY; break; } - } else if (sk->sk_protocol != IPPROTO_TCP) + } else if (sk->sk_protocol == IPPROTO_TCP) { + if (sk->sk_prot != &tcpv6_prot) { + retv = -EBUSY; + break; + } break; - + } else { + break; + } if (sk->sk_state != TCP_ESTABLISHED) { retv = -ENOTCONN; break; diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index 98f61fcb9108..b0f3745d1bee 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -88,18 +88,30 @@ static struct notifier_block masq_dev_notifier = { struct masq_dev_work { struct work_struct work; struct net *net; + struct in6_addr addr; int ifindex; }; +static int inet_cmp(struct nf_conn *ct, void *work) +{ + struct masq_dev_work *w = (struct masq_dev_work *)work; + struct nf_conntrack_tuple *tuple; + + if (!device_cmp(ct, (void *)(long)w->ifindex)) + return 0; + + tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); +} + static void iterate_cleanup_work(struct work_struct *work) { struct masq_dev_work *w; - long index; w = container_of(work, struct masq_dev_work, work); - index = w->ifindex; - nf_ct_iterate_cleanup_net(w->net, device_cmp, (void *)index, 0, 0); + nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0); put_net(w->net); kfree(w); @@ -148,6 +160,7 @@ static int masq_inet_event(struct notifier_block *this, INIT_WORK(&w->work, iterate_cleanup_work); w->ifindex = dev->ifindex; w->net = net; + w->addr = ifa->addr; schedule_work(&w->work); return NOTIFY_DONE; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index fe797b29ca89..6dea6e92e686 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -593,8 +593,8 @@ err_protocol: void ipv6_frag_exit(void) { - inet_frags_fini(&ip6_frags); ip6_frags_sysctl_unregister(); unregister_pernet_subsys(&ip6_frags_ops); inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); + inet_frags_fini(&ip6_frags); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0757a9a4436a..d836b77ebf58 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -93,7 +93,8 @@ static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static void rt6_dst_from_metrics_check(struct rt6_info *rt); @@ -264,7 +265,8 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) } static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } @@ -1471,7 +1473,8 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, - const struct ipv6hdr *iph, u32 mtu) + const struct ipv6hdr *iph, u32 mtu, + bool confirm_neigh) { const struct in6_addr *daddr, *saddr; struct rt6_info *rt6 = (struct rt6_info *)dst; @@ -1489,7 +1492,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, daddr = NULL; saddr = NULL; } - dst_confirm_neigh(dst, daddr); + + if (confirm_neigh) + dst_confirm_neigh(dst, daddr); + mtu = max_t(u32, mtu, IPV6_MIN_MTU); if (mtu >= dst_mtu(dst)) return; @@ -1518,9 +1524,11 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, } static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { - __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); + __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu, + confirm_neigh); } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, @@ -1540,7 +1548,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); + __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu); @@ -3271,6 +3279,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, */ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | NLM_F_REPLACE); + cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE; nhn++; } diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index 825b8e01f947..9a01f72d907f 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -27,6 +27,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6_SEG6_HMAC #include #endif @@ -126,7 +127,8 @@ static bool decap_and_validate(struct sk_buff *skb, int proto) skb_reset_network_header(skb); skb_reset_transport_header(skb); - skb->encapsulation = 0; + if (iptunnel_pull_offloads(skb)) + return false; return true; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d2529c38e7e4..fb3f917db57a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -932,7 +932,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, } if (tunnel->parms.iph.daddr) - skb_dst_update_pmtu(skb, mtu); + skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7b4ce3f9e2f4..5ec73cf386df 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -718,6 +718,7 @@ static void tcp_v6_init_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb) { + bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = inet6_sk(sk_listener); @@ -725,7 +726,7 @@ static void tcp_v6_init_req(struct request_sock *req, ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; /* So that link locals have meaning */ - if (!sk_listener->sk_bound_dev_if && + if ((!sk_listener->sk_bound_dev_if || l3_slave) && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 89c50dbae3ee..ef4e091c3bdc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -170,7 +170,7 @@ static int compute_score(struct sock *sk, struct net *net, score++; } - if (sk->sk_incoming_cpu == raw_smp_processor_id()) + if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; return score; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 32bdddc18aca..93e167182daf 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -223,12 +223,13 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) } static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, sk, skb, mtu); + path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index ca98276c2709..7a9cbc9502d9 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2446,6 +2446,13 @@ out: return err; } +static void afiucv_iucv_exit(void) +{ + device_unregister(af_iucv_dev); + driver_unregister(&af_iucv_driver); + pr_iucv->iucv_unregister(&af_iucv_handler, 0); +} + static int __init afiucv_init(void) { int err; @@ -2479,11 +2486,18 @@ static int __init afiucv_init(void) err = afiucv_iucv_init(); if (err) goto out_sock; - } else - register_netdevice_notifier(&afiucv_netdev_notifier); + } + + err = register_netdevice_notifier(&afiucv_netdev_notifier); + if (err) + goto out_notifier; + dev_add_pack(&iucv_packet_type); return 0; +out_notifier: + if (pr_iucv) + afiucv_iucv_exit(); out_sock: sock_unregister(PF_IUCV); out_proto: @@ -2497,12 +2511,11 @@ out: static void __exit afiucv_exit(void) { if (pr_iucv) { - device_unregister(af_iucv_dev); - driver_unregister(&af_iucv_driver); - pr_iucv->iucv_unregister(&af_iucv_handler, 0); + afiucv_iucv_exit(); symbol_put(iucv_if); - } else - unregister_netdevice_notifier(&afiucv_netdev_notifier); + } + + unregister_netdevice_notifier(&afiucv_netdev_notifier); dev_remove_pack(&iucv_packet_type); sock_unregister(PF_IUCV); proto_unregister(&iucv_proto); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 12b7752b3193..d169511ea7e6 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -358,8 +358,13 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, spin_lock_bh(&pn->l2tp_session_hlist_lock); + /* IP encap expects session IDs to be globally unique, while + * UDP encap doesn't. + */ hlist_for_each_entry(session_walk, g_head, global_hlist) - if (session_walk->session_id == session->session_id) { + if (session_walk->session_id == session->session_id && + (session_walk->tunnel->encap == L2TP_ENCAPTYPE_IP || + tunnel->encap == L2TP_ENCAPTYPE_IP)) { err = -EEXIST; goto err_tlock_pnlock; } @@ -1884,7 +1889,8 @@ static __net_exit void l2tp_exit_net(struct net *net) } rcu_read_unlock_bh(); - flush_workqueue(l2tp_wq); + if (l2tp_wq) + flush_workqueue(l2tp_wq); rcu_barrier(); } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 2e472d5c3ea4..d552e8819713 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -113,22 +113,26 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr) * * Send data via reliable llc2 connection. * Returns 0 upon success, non-zero if action did not succeed. + * + * This function always consumes a reference to the skb. */ static int llc_ui_send_data(struct sock* sk, struct sk_buff *skb, int noblock) { struct llc_sock* llc = llc_sk(sk); - int rc = 0; if (unlikely(llc_data_accept_state(llc->state) || llc->remote_busy_flag || llc->p_flag)) { long timeout = sock_sndtimeo(sk, noblock); + int rc; rc = llc_ui_wait_for_busy_core(sk, timeout); + if (rc) { + kfree_skb(skb); + return rc; + } } - if (unlikely(!rc)) - rc = llc_build_and_send_pkt(sk, skb); - return rc; + return llc_build_and_send_pkt(sk, skb); } static void llc_ui_sk_init(struct socket *sock, struct sock *sk) @@ -900,7 +904,7 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name); int flags = msg->msg_flags; int noblock = flags & MSG_DONTWAIT; - struct sk_buff *skb; + struct sk_buff *skb = NULL; size_t size = 0; int rc = -EINVAL, copied = 0, hdrlen; @@ -909,10 +913,10 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) lock_sock(sk); if (addr) { if (msg->msg_namelen < sizeof(*addr)) - goto release; + goto out; } else { if (llc_ui_addr_null(&llc->addr)) - goto release; + goto out; addr = &llc->addr; } /* must bind connection to sap if user hasn't done it. */ @@ -920,7 +924,7 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) /* bind to sap with null dev, exclusive. */ rc = llc_ui_autobind(sock, addr); if (rc) - goto release; + goto out; } hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr); size = hdrlen + len; @@ -929,12 +933,12 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) copied = size - hdrlen; rc = -EINVAL; if (copied < 0) - goto release; + goto out; release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); lock_sock(sk); if (!skb) - goto release; + goto out; skb->dev = llc->dev; skb->protocol = llc_proto_type(addr->sllc_arphrd); skb_reserve(skb, hdrlen); @@ -944,29 +948,31 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (sk->sk_type == SOCK_DGRAM || addr->sllc_ua) { llc_build_and_send_ui_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); + skb = NULL; goto out; } if (addr->sllc_test) { llc_build_and_send_test_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); + skb = NULL; goto out; } if (addr->sllc_xid) { llc_build_and_send_xid_pkt(llc->sap, skb, addr->sllc_mac, addr->sllc_sap); + skb = NULL; goto out; } rc = -ENOPROTOOPT; if (!(sk->sk_type == SOCK_STREAM && !addr->sllc_ua)) goto out; rc = llc_ui_send_data(sk, skb, noblock); + skb = NULL; out: - if (rc) { - kfree_skb(skb); -release: + kfree_skb(skb); + if (rc) dprintk("%s: failed sending from %02X to %02X: %d\n", __func__, llc->laddr.lsap, llc->daddr.lsap, rc); - } release_sock(sk); return rc ? : copied; } diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index 4b60f68cb492..8354ae40ec85 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -372,6 +372,7 @@ int llc_conn_ac_send_i_cmd_p_set_1(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_i_cmd(skb, 1, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { + skb_get(skb); llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -389,7 +390,8 @@ static int llc_conn_ac_send_i_cmd_p_set_0(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { - rc = llc_conn_send_pdu(sk, skb); + skb_get(skb); + llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } return rc; @@ -406,6 +408,7 @@ int llc_conn_ac_send_i_xxx_x_set_0(struct sock *sk, struct sk_buff *skb) llc_pdu_init_as_i_cmd(skb, 0, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { + skb_get(skb); llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } @@ -916,7 +919,8 @@ static int llc_conn_ac_send_i_rsp_f_set_ackpf(struct sock *sk, llc_pdu_init_as_i_cmd(skb, llc->ack_pf, llc->vS, llc->vR); rc = llc_mac_hdr_init(skb, llc->dev->dev_addr, llc->daddr.mac); if (likely(!rc)) { - rc = llc_conn_send_pdu(sk, skb); + skb_get(skb); + llc_conn_send_pdu(sk, skb); llc_conn_ac_inc_vs_by_1(sk, skb); } return rc; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 56c3fb5cc805..7fbc682aff04 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -30,7 +30,7 @@ #endif static int llc_find_offset(int state, int ev_type); -static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *skb); +static void llc_conn_send_pdus(struct sock *sk); static int llc_conn_service(struct sock *sk, struct sk_buff *skb); static int llc_exec_conn_trans_actions(struct sock *sk, struct llc_conn_state_trans *trans, @@ -55,6 +55,8 @@ int sysctl_llc2_busy_timeout = LLC2_BUSY_TIME * HZ; * (executing it's actions and changing state), upper layer will be * indicated or confirmed, if needed. Returns 0 for success, 1 for * failure. The socket lock has to be held before calling this function. + * + * This function always consumes a reference to the skb. */ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) { @@ -62,12 +64,6 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) struct llc_sock *llc = llc_sk(skb->sk); struct llc_conn_state_ev *ev = llc_conn_ev(skb); - /* - * We have to hold the skb, because llc_conn_service will kfree it in - * the sending path and we need to look at the skb->cb, where we encode - * llc_conn_state_ev. - */ - skb_get(skb); ev->ind_prim = ev->cfm_prim = 0; /* * Send event to state machine @@ -75,21 +71,12 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) rc = llc_conn_service(skb->sk, skb); if (unlikely(rc != 0)) { printk(KERN_ERR "%s: llc_conn_service failed\n", __func__); - goto out_kfree_skb; - } - - if (unlikely(!ev->ind_prim && !ev->cfm_prim)) { - /* indicate or confirm not required */ - if (!skb->next) - goto out_kfree_skb; goto out_skb_put; } - if (unlikely(ev->ind_prim && ev->cfm_prim)) /* Paranoia */ - skb_get(skb); - switch (ev->ind_prim) { case LLC_DATA_PRIM: + skb_get(skb); llc_save_primitive(sk, skb, LLC_DATA_PRIM); if (unlikely(sock_queue_rcv_skb(sk, skb))) { /* @@ -106,6 +93,7 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) * skb->sk pointing to the newly created struct sock in * llc_conn_handler. -acme */ + skb_get(skb); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_state_change(sk); break; @@ -121,7 +109,6 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) sk->sk_state_change(sk); } } - kfree_skb(skb); sock_put(sk); break; case LLC_RESET_PRIM: @@ -130,14 +117,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) * RESET is not being notified to upper layers for now */ printk(KERN_INFO "%s: received a reset ind!\n", __func__); - kfree_skb(skb); break; default: - if (ev->ind_prim) { + if (ev->ind_prim) printk(KERN_INFO "%s: received unknown %d prim!\n", __func__, ev->ind_prim); - kfree_skb(skb); - } /* No indication */ break; } @@ -179,25 +163,22 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) printk(KERN_INFO "%s: received a reset conf!\n", __func__); break; default: - if (ev->cfm_prim) { + if (ev->cfm_prim) printk(KERN_INFO "%s: received unknown %d prim!\n", __func__, ev->cfm_prim); - break; - } - goto out_skb_put; /* No confirmation */ + /* No confirmation */ + break; } -out_kfree_skb: - kfree_skb(skb); out_skb_put: kfree_skb(skb); return rc; } -int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb) +void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb) { /* queue PDU to send to MAC layer */ skb_queue_tail(&sk->sk_write_queue, skb); - return llc_conn_send_pdus(sk, skb); + llc_conn_send_pdus(sk); } /** @@ -255,7 +236,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit) if (howmany_resend > 0) llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO; /* any PDUs to re-send are queued up; start sending to MAC */ - llc_conn_send_pdus(sk, NULL); + llc_conn_send_pdus(sk); out:; } @@ -296,7 +277,7 @@ void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit) if (howmany_resend > 0) llc->vS = (llc->vS + 1) % LLC_2_SEQ_NBR_MODULO; /* any PDUs to re-send are queued up; start sending to MAC */ - llc_conn_send_pdus(sk, NULL); + llc_conn_send_pdus(sk); out:; } @@ -340,16 +321,12 @@ out: /** * llc_conn_send_pdus - Sends queued PDUs * @sk: active connection - * @hold_skb: the skb held by caller, or NULL if does not care * - * Sends queued pdus to MAC layer for transmission. When @hold_skb is - * NULL, always return 0. Otherwise, return 0 if @hold_skb is sent - * successfully, or 1 for failure. + * Sends queued pdus to MAC layer for transmission. */ -static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb) +static void llc_conn_send_pdus(struct sock *sk) { struct sk_buff *skb; - int ret = 0; while ((skb = skb_dequeue(&sk->sk_write_queue)) != NULL) { struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); @@ -361,20 +338,10 @@ static int llc_conn_send_pdus(struct sock *sk, struct sk_buff *hold_skb) skb_queue_tail(&llc_sk(sk)->pdu_unack_q, skb); if (!skb2) break; - dev_queue_xmit(skb2); - } else { - bool is_target = skb == hold_skb; - int rc; - - if (is_target) - skb_get(skb); - rc = dev_queue_xmit(skb); - if (is_target) - ret = rc; + skb = skb2; } + dev_queue_xmit(skb); } - - return ret; } /** diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 260b3dc1b4a2..64d4bef04e73 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -127,9 +127,7 @@ void llc_sap_close(struct llc_sap *sap) list_del_rcu(&sap->node); spin_unlock_bh(&llc_sap_list_lock); - synchronize_rcu(); - - kfree(sap); + kfree_rcu(sap, rcu); } static struct packet_type llc_packet_type __read_mostly = { diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index 6daf391b3e84..fc4d2bd8816f 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -38,6 +38,8 @@ * closed and -EBUSY when sending data is not permitted in this state or * LLC has send an I pdu with p bit set to 1 and is waiting for it's * response. + * + * This function always consumes a reference to the skb. */ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb) { @@ -46,20 +48,22 @@ int llc_build_and_send_pkt(struct sock *sk, struct sk_buff *skb) struct llc_sock *llc = llc_sk(sk); if (unlikely(llc->state == LLC_CONN_STATE_ADM)) - goto out; + goto out_free; rc = -EBUSY; if (unlikely(llc_data_accept_state(llc->state) || /* data_conn_refuse */ llc->p_flag)) { llc->failed_data_req = 1; - goto out; + goto out_free; } ev = llc_conn_ev(skb); ev->type = LLC_CONN_EV_TYPE_PRIM; ev->prim = LLC_DATA_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; skb->dev = llc->dev; - rc = llc_conn_state_process(sk, skb); -out: + return llc_conn_state_process(sk, skb); + +out_free: + kfree_skb(skb); return rc; } diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index a94bd56bcac6..7ae4cc684d3a 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -58,8 +58,10 @@ int llc_sap_action_send_ui(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_ui_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) + if (likely(!rc)) { + skb_get(skb); rc = dev_queue_xmit(skb); + } return rc; } @@ -81,8 +83,10 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) + if (likely(!rc)) { + skb_get(skb); rc = dev_queue_xmit(skb); + } return rc; } @@ -135,8 +139,10 @@ int llc_sap_action_send_test_c(struct llc_sap *sap, struct sk_buff *skb) ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_test_cmd(skb); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); - if (likely(!rc)) + if (likely(!rc)) { + skb_get(skb); rc = dev_queue_xmit(skb); + } return rc; } diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index d90928f50226..a7534950e60a 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -197,29 +197,22 @@ out: * After executing actions of the event, upper layer will be indicated * if needed(on receiving an UI frame). sk can be null for the * datalink_proto case. + * + * This function always consumes a reference to the skb. */ static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); - /* - * We have to hold the skb, because llc_sap_next_state - * will kfree it in the sending path and we need to - * look at the skb->cb, where we encode llc_sap_state_ev. - */ - skb_get(skb); ev->ind_cfm_flag = 0; llc_sap_next_state(sap, skb); - if (ev->ind_cfm_flag == LLC_IND) { - if (skb->sk->sk_state == TCP_LISTEN) - kfree_skb(skb); - else { - llc_save_primitive(skb->sk, skb, ev->prim); - /* queue skb to the user. */ - if (sock_queue_rcv_skb(skb->sk, skb)) - kfree_skb(skb); - } + if (ev->ind_cfm_flag == LLC_IND && skb->sk->sk_state != TCP_LISTEN) { + llc_save_primitive(skb->sk, skb, ev->prim); + + /* queue skb to the user. */ + if (sock_queue_rcv_skb(skb->sk, skb) == 0) + return; } kfree_skb(skb); } diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c index 204a8351efff..c29170e767a8 100644 --- a/net/llc/llc_station.c +++ b/net/llc/llc_station.c @@ -32,7 +32,7 @@ static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb) return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_XID && - !pdu->dsap ? 0 : 1; /* NULL DSAP value */ + !pdu->dsap; /* NULL DSAP value */ } static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb) @@ -42,7 +42,7 @@ static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb) return LLC_PDU_IS_CMD(pdu) && /* command PDU */ LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */ LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_TEST && - !pdu->dsap ? 0 : 1; /* NULL DSAP */ + !pdu->dsap; /* NULL DSAP */ } static int llc_station_ac_send_xid_r(struct sk_buff *skb) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8168c667d91d..b1484b8316e8 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1089,50 +1089,6 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) return 0; } -/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */ -struct iapp_layer2_update { - u8 da[ETH_ALEN]; /* broadcast */ - u8 sa[ETH_ALEN]; /* STA addr */ - __be16 len; /* 6 */ - u8 dsap; /* 0 */ - u8 ssap; /* 0 */ - u8 control; - u8 xid_info[3]; -} __packed; - -static void ieee80211_send_layer2_update(struct sta_info *sta) -{ - struct iapp_layer2_update *msg; - struct sk_buff *skb; - - /* Send Level 2 Update Frame to update forwarding tables in layer 2 - * bridge devices */ - - skb = dev_alloc_skb(sizeof(*msg)); - if (!skb) - return; - msg = skb_put(skb, sizeof(*msg)); - - /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID) - * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */ - - eth_broadcast_addr(msg->da); - memcpy(msg->sa, sta->sta.addr, ETH_ALEN); - msg->len = htons(6); - msg->dsap = 0; - msg->ssap = 0x01; /* NULL LSAP, CR Bit: Response */ - msg->control = 0xaf; /* XID response lsb.1111F101. - * F=0 (no poll command; unsolicited frame) */ - msg->xid_info[0] = 0x81; /* XID format identifier */ - msg->xid_info[1] = 1; /* LLC types/classes: Type 1 LLC */ - msg->xid_info[2] = 0; /* XID sender's receive window size (RW) */ - - skb->dev = sta->sdata->dev; - skb->protocol = eth_type_trans(skb, sta->sdata->dev); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx_ni(skb); -} - static int sta_apply_auth_flags(struct ieee80211_local *local, struct sta_info *sta, u32 mask, u32 set) @@ -1442,7 +1398,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; struct ieee80211_sub_if_data *sdata; int err; - int layer2_update; if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); @@ -1486,18 +1441,12 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, test_sta_flag(sta, WLAN_STA_ASSOC)) rate_control_rate_init(sta); - layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN || - sdata->vif.type == NL80211_IFTYPE_AP; - err = sta_info_insert_rcu(sta); if (err) { rcu_read_unlock(); return err; } - if (layer2_update) - ieee80211_send_layer2_update(sta); - rcu_read_unlock(); return 0; @@ -1595,10 +1544,11 @@ static int ieee80211_change_station(struct wiphy *wiphy, sta->sdata = vlansdata; ieee80211_check_fast_xmit(sta); - if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { ieee80211_vif_inc_num_mcast(sta->sdata); - - ieee80211_send_layer2_update(sta); + cfg80211_send_layer2_update(sta->sdata->dev, + sta->sta.addr); + } } err = sta_apply_parameters(local, sta, params); @@ -2850,6 +2800,28 @@ static int ieee80211_start_radar_detection(struct wiphy *wiphy, return err; } +static void ieee80211_end_cac(struct wiphy *wiphy, + struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + + mutex_lock(&local->mtx); + list_for_each_entry(sdata, &local->interfaces, list) { + /* it might be waiting for the local->mtx, but then + * by the time it gets it, sdata->wdev.cac_started + * will no longer be true + */ + cancel_delayed_work(&sdata->dfs_cac_timer_work); + + if (sdata->wdev.cac_started) { + ieee80211_vif_release_channel(sdata); + sdata->wdev.cac_started = false; + } + } + mutex_unlock(&local->mtx); +} + static struct cfg80211_beacon_data * cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) { @@ -3780,6 +3752,7 @@ const struct cfg80211_ops mac80211_config_ops = { #endif .get_channel = ieee80211_cfg_get_channel, .start_radar_detection = ieee80211_start_radar_detection, + .end_cac = ieee80211_end_cac, .channel_switch = ieee80211_channel_switch, .set_qos_map = ieee80211_set_qos_map, .set_ap_chanwidth = ieee80211_set_ap_chanwidth, diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index c813207bb123..928b6b0464b8 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -490,9 +490,14 @@ static ssize_t ieee80211_if_fmt_aqm( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) { struct ieee80211_local *local = sdata->local; - struct txq_info *txqi = to_txq_info(sdata->vif.txq); + struct txq_info *txqi; int len; + if (!sdata->vif.txq) + return 0; + + txqi = to_txq_info(sdata->vif.txq); + spin_lock_bh(&local->fq.lock); rcu_read_lock(); @@ -659,7 +664,9 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_5ghz); DEBUGFS_ADD(hw_queues); - if (sdata->local->ops->wake_tx_queue) + if (sdata->local->ops->wake_tx_queue && + sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_NAN) DEBUGFS_ADD(aqm); } diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index fab0764c315f..994dde6e5f9d 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -326,6 +326,9 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, unsigned long fail_avg = ewma_mesh_fail_avg_read(&sta->mesh->fail_avg); + if (sta->mesh->plink_state != NL80211_PLINK_ESTAB) + return MAX_METRIC; + /* Try to get rate based on HW/SW RC algorithm. * Rate is returned in units of Kbps, correct this * to comply with airtime calculation units diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d91db72b9e9e..36bd59ff49c4 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2430,7 +2430,8 @@ struct sk_buff *ieee80211_ap_probereq_get(struct ieee80211_hw *hw, rcu_read_lock(); ssid = ieee80211_bss_get_ie(cbss, WLAN_EID_SSID); - if (WARN_ON_ONCE(ssid == NULL)) + if (WARN_ONCE(!ssid || ssid[1] > IEEE80211_MAX_SSID_LEN, + "invalid SSID element (len=%d)", ssid ? ssid[1] : -1)) ssid_len = 0; else ssid_len = ssid[1]; @@ -4756,7 +4757,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); ssidie = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID); - if (!ssidie) { + if (!ssidie || ssidie[1] > sizeof(assoc_data->ssid)) { rcu_read_unlock(); kfree(assoc_data); return -EINVAL; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 4a5bdad9f303..7ba4272642c9 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -129,7 +129,7 @@ #define CCK_GROUP \ [MINSTREL_CCK_GROUP] = { \ - .streams = 0, \ + .streams = 1, \ .flags = 0, \ .duration = { \ CCK_DURATION_LIST(false), \ @@ -282,7 +282,8 @@ minstrel_ht_get_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, break; /* short preamble */ - if (!(mi->supported[group] & BIT(idx))) + if ((mi->supported[group] & BIT(idx + 4)) && + (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)) idx += 4; } return &mi->groups[group].rates[idx]; @@ -528,7 +529,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) /* (re)Initialize group rate indexes */ for(j = 0; j < MAX_THR_RATES; j++) - tmp_group_tp_rate[j] = group; + tmp_group_tp_rate[j] = MCS_GROUP_RATES * group; for (i = 0; i < MCS_GROUP_RATES; i++) { if (!(mi->supported[group] & BIT(i))) @@ -1077,18 +1078,23 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, return; sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES]; + sample_idx %= MCS_GROUP_RATES; + + if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP] && + (sample_idx >= 4) != txrc->short_preamble) + return; + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; rate->count = 1; - if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { + if (sample_group == &minstrel_mcs_groups[MINSTREL_CCK_GROUP]) { int idx = sample_idx % ARRAY_SIZE(mp->cck_rates); rate->idx = mp->cck_rates[idx]; } else if (sample_group->flags & IEEE80211_TX_RC_VHT_MCS) { ieee80211_rate_set_vht(rate, sample_idx % MCS_GROUP_RATES, sample_group->streams); } else { - rate->idx = sample_idx % MCS_GROUP_RATES + - (sample_group->streams - 1) * 8; + rate->idx = sample_idx + (sample_group->streams - 1) * 8; } rate->flags = sample_group->flags; @@ -1132,7 +1138,6 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs; u16 sta_cap = sta->ht_cap.cap; struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap; - struct sta_info *sinfo = container_of(sta, struct sta_info, sta); int use_vht; int n_supported = 0; int ack_dur; @@ -1258,8 +1263,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, if (!n_supported) goto use_legacy; - if (test_sta_flag(sinfo, WLAN_STA_SHORT_PREAMBLE)) - mi->cck_supported_short |= mi->cck_supported_short << 4; + mi->supported[MINSTREL_CCK_GROUP] |= mi->cck_supported_short << 4; /* create an initial rate table with the lowest supported rates */ minstrel_ht_update_stats(mp, mi); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 4a6b3c7b35e3..7c92b1471c34 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3227,9 +3227,18 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) case cpu_to_le16(IEEE80211_STYPE_PROBE_RESP): /* process for all: mesh, mlme, ibss */ break; + case cpu_to_le16(IEEE80211_STYPE_DEAUTH): + if (is_multicast_ether_addr(mgmt->da) && + !is_broadcast_ether_addr(mgmt->da)) + return RX_DROP_MONITOR; + + /* process only for station/IBSS */ + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) + return RX_DROP_MONITOR; + break; case cpu_to_le16(IEEE80211_STYPE_ASSOC_RESP): case cpu_to_le16(IEEE80211_STYPE_REASSOC_RESP): - case cpu_to_le16(IEEE80211_STYPE_DEAUTH): case cpu_to_le16(IEEE80211_STYPE_DISASSOC): if (is_multicast_ether_addr(mgmt->da) && !is_broadcast_ether_addr(mgmt->da)) @@ -3853,7 +3862,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&local->sta_mtx); - list_for_each_entry_rcu(sta, &local->sta_list, list) { + list_for_each_entry(sta, &local->sta_list, list) { if (sdata != sta->sdata && (!sta->sdata->bss || sta->sdata->bss != sdata->bss)) continue; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f1b496222bda..627dc642f894 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1899,6 +1899,10 @@ int sta_info_move_state(struct sta_info *sta, ieee80211_check_fast_xmit(sta); ieee80211_check_fast_rx(sta); } + if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sta->sdata->vif.type == NL80211_IFTYPE_AP) + cfg80211_send_layer2_update(sta->sdata->dev, + sta->sta.addr); break; default: break; @@ -2313,7 +2317,8 @@ unsigned long ieee80211_sta_last_active(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); - if (time_after(stats->last_rx, sta->status_stats.last_ack)) + if (!sta->status_stats.last_ack || + time_after(stats->last_rx, sta->status_stats.last_ack)) return stats->last_rx; return sta->status_stats.last_ack; } diff --git a/net/mac80211/status.c b/net/mac80211/status.c index b18466cf466c..fbe7354aeac7 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -856,7 +856,8 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, I802_DEBUG_INC(local->dot11FailedCount); } - if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) && + if ((ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) && + ieee80211_has_pm(fc) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) && !(info->flags & IEEE80211_TX_CTL_INJECTED) && local->ps_sdata && !(local->scanning)) { diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index b3622823bad2..ebd66e8f46b3 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -266,9 +266,21 @@ int ieee80211_tkip_decrypt_data(struct crypto_cipher *tfm, if ((keyid >> 6) != key->conf.keyidx) return TKIP_DECRYPT_INVALID_KEYIDX; - if (rx_ctx->ctx.state != TKIP_STATE_NOT_INIT && - (iv32 < rx_ctx->iv32 || - (iv32 == rx_ctx->iv32 && iv16 <= rx_ctx->iv16))) + /* Reject replays if the received TSC is smaller than or equal to the + * last received value in a valid message, but with an exception for + * the case where a new key has been set and no valid frame using that + * key has yet received and the local RSC was initialized to 0. This + * exception allows the very first frame sent by the transmitter to be + * accepted even if that transmitter were to use TSC 0 (IEEE 802.11 + * described TSC to be initialized to 1 whenever a new key is taken into + * use). + */ + if (iv32 < rx_ctx->iv32 || + (iv32 == rx_ctx->iv32 && + (iv16 < rx_ctx->iv16 || + (iv16 == rx_ctx->iv16 && + (rx_ctx->iv32 || rx_ctx->iv16 || + rx_ctx->ctx.state != TKIP_STATE_NOT_INIT))))) return TKIP_DECRYPT_REPLAY; if (only_iv) { diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 81f120466c38..cd3cdd1a0b57 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -944,16 +944,22 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elem_parse_failed = true; break; case WLAN_EID_VHT_OPERATION: - if (elen >= sizeof(struct ieee80211_vht_operation)) + if (elen >= sizeof(struct ieee80211_vht_operation)) { elems->vht_operation = (void *)pos; - else - elem_parse_failed = true; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = true; break; case WLAN_EID_OPMODE_NOTIF: - if (elen > 0) + if (elen > 0) { elems->opmode_notif = pos; - else - elem_parse_failed = true; + if (calc_crc) + crc = crc32_be(crc, pos - 2, elen + 2); + break; + } + elem_parse_failed = true; break; case WLAN_EID_MESH_ID: elems->mesh_id = pos; diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 6e558a419f60..6c01166f972b 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -28,7 +28,7 @@ #include "internal.h" static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = { - [MPLS_IPTUNNEL_DST] = { .type = NLA_U32 }, + [MPLS_IPTUNNEL_DST] = { .len = sizeof(u32) }, [MPLS_IPTUNNEL_TTL] = { .type = NLA_U8 }, }; diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 8ad2b52a0b32..3c0e345367a5 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -64,9 +64,9 @@ mtype_destroy(struct ip_set *set) if (SET_WITH_TIMEOUT(set)) del_timer_sync(&map->gc); - ip_set_free(map->members); if (set->dsize && set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); + ip_set_free(map->members); ip_set_free(map); set->data = NULL; @@ -79,7 +79,7 @@ mtype_flush(struct ip_set *set) if (set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set); - memset(map->members, 0, map->memsize); + bitmap_zero(map->members, map->elements); set->elements = 0; set->ext_size = 0; } diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 4783efff0bde..a4c104a4977f 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -40,7 +40,7 @@ MODULE_ALIAS("ip_set_bitmap:ip"); /* Type structure */ struct bitmap_ip { - void *members; /* the set members */ + unsigned long *members; /* the set members */ u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ @@ -222,7 +222,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, u32 first_ip, u32 last_ip, u32 elements, u32 hosts, u8 netmask) { - map->members = ip_set_alloc(map->memsize); + map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_ip = first_ip; @@ -315,7 +315,7 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (!map) return -ENOMEM; - map->memsize = bitmap_bytes(0, elements - 1); + map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_ip; if (!init_map_ip(set, map, first_ip, last_ip, elements, hosts, netmask)) { diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 9a065f672d3a..8e58e7e34981 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -46,7 +46,7 @@ enum { /* Type structure */ struct bitmap_ipmac { - void *members; /* the set members */ + unsigned long *members; /* the set members */ u32 first_ip; /* host byte order, included in range */ u32 last_ip; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ @@ -299,7 +299,7 @@ static bool init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, u32 first_ip, u32 last_ip, u32 elements) { - map->members = ip_set_alloc(map->memsize); + map->members = bitmap_zalloc(elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_ip = first_ip; @@ -363,7 +363,7 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[], if (!map) return -ENOMEM; - map->memsize = bitmap_bytes(0, elements - 1); + map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_ipmac; if (!init_map_ipmac(set, map, first_ip, last_ip, elements)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 7f0c733358a4..6771b362a123 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -34,7 +34,7 @@ MODULE_ALIAS("ip_set_bitmap:port"); /* Type structure */ struct bitmap_port { - void *members; /* the set members */ + unsigned long *members; /* the set members */ u16 first_port; /* host byte order, included in range */ u16 last_port; /* host byte order, included in range */ u32 elements; /* number of max elements in the set */ @@ -207,7 +207,7 @@ static bool init_map_port(struct ip_set *set, struct bitmap_port *map, u16 first_port, u16 last_port) { - map->members = ip_set_alloc(map->memsize); + map->members = bitmap_zalloc(map->elements, GFP_KERNEL | __GFP_NOWARN); if (!map->members) return false; map->first_port = first_port; @@ -250,7 +250,7 @@ bitmap_port_create(struct net *net, struct ip_set *set, struct nlattr *tb[], return -ENOMEM; map->elements = elements; - map->memsize = bitmap_bytes(0, map->elements); + map->memsize = BITS_TO_LONGS(elements) * sizeof(unsigned long); set->variant = &bitmap_port; if (!init_map_port(set, map, first_port, last_port)) { kfree(map); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index dbf17d3596a6..c2b21c9c1229 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1639,6 +1639,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, struct ip_set *set; struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {}; int ret = 0; + u32 lineno; if (unlikely(protocol_failed(attr) || !attr[IPSET_ATTR_SETNAME] || @@ -1655,7 +1656,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb, return -IPSET_ERR_PROTOCOL; rcu_read_lock_bh(); - ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0); + ret = set->variant->uadt(set, tb, IPSET_TEST, &lineno, 0, 0); rcu_read_unlock_bh(); /* Userspace can't trigger element to be re-added */ if (ret == -EAGAIN) @@ -1950,8 +1951,9 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } req_version->version = IPSET_PROTOCOL; - ret = copy_to_user(user, req_version, - sizeof(struct ip_set_req_version)); + if (copy_to_user(user, req_version, + sizeof(struct ip_set_req_version))) + ret = -EFAULT; goto done; } case IP_SET_OP_GET_BYNAME: { @@ -2008,7 +2010,8 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len) } /* end of switch(op) */ copy: - ret = copy_to_user(user, data, copylen); + if (copy_to_user(user, data, copylen)) + ret = -EFAULT; done: vfree(data); diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 299edc6add5a..363475b246f6 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -198,21 +198,29 @@ struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app * mutex_lock(&__ip_vs_app_mutex); + /* increase the module use count */ + if (!ip_vs_use_count_inc()) { + err = -ENOENT; + goto out_unlock; + } + list_for_each_entry(a, &ipvs->app_list, a_list) { if (!strcmp(app->name, a->name)) { err = -EEXIST; + /* decrease the module use count */ + ip_vs_use_count_dec(); goto out_unlock; } } a = kmemdup(app, sizeof(*app), GFP_KERNEL); if (!a) { err = -ENOMEM; + /* decrease the module use count */ + ip_vs_use_count_dec(); goto out_unlock; } INIT_LIST_HEAD(&a->incs_list); list_add(&a->a_list, &ipvs->app_list); - /* increase the module use count */ - ip_vs_use_count_inc(); out_unlock: mutex_unlock(&__ip_vs_app_mutex); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 4648dccebf59..5ec80818ace2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -98,7 +98,6 @@ static bool __ip_vs_addr_is_local_v6(struct net *net, static void update_defense_level(struct netns_ipvs *ipvs) { struct sysinfo i; - static int old_secure_tcp = 0; int availmem; int nomem; int to_change = -1; @@ -179,35 +178,35 @@ static void update_defense_level(struct netns_ipvs *ipvs) spin_lock(&ipvs->securetcp_lock); switch (ipvs->sysctl_secure_tcp) { case 0: - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; break; case 1: if (nomem) { - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; ipvs->sysctl_secure_tcp = 2; } else { - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; } break; case 2: if (nomem) { - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; } else { - if (old_secure_tcp >= 2) + if (ipvs->old_secure_tcp >= 2) to_change = 0; ipvs->sysctl_secure_tcp = 1; } break; case 3: - if (old_secure_tcp < 2) + if (ipvs->old_secure_tcp < 2) to_change = 1; break; } - old_secure_tcp = ipvs->sysctl_secure_tcp; + ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp; if (to_change >= 0) ip_vs_protocol_timeout_change(ipvs, ipvs->sysctl_secure_tcp > 1); @@ -1197,7 +1196,8 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service *svc = NULL; /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOPROTOOPT; /* Lookup the scheduler by 'u->sched_name' */ if (strcmp(u->sched_name, "none")) { @@ -2395,9 +2395,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (copy_from_user(arg, user, len) != 0) return -EFAULT; - /* increase the module use count */ - ip_vs_use_count_inc(); - /* Handle daemons since they have another lock */ if (cmd == IP_VS_SO_SET_STARTDAEMON || cmd == IP_VS_SO_SET_STOPDAEMON) { @@ -2410,13 +2407,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) ret = -EINVAL; if (strscpy(cfg.mcast_ifn, dm->mcast_ifn, sizeof(cfg.mcast_ifn)) <= 0) - goto out_dec; + return ret; cfg.syncid = dm->syncid; ret = start_sync_thread(ipvs, &cfg, dm->state); } else { ret = stop_sync_thread(ipvs, dm->state); } - goto out_dec; + return ret; } mutex_lock(&__ip_vs_mutex); @@ -2511,10 +2508,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) out_unlock: mutex_unlock(&__ip_vs_mutex); - out_dec: - /* decrease the module use count */ - ip_vs_use_count_dec(); - return ret; } diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c index 0df17caa8af6..714e7e05c102 100644 --- a/net/netfilter/ipvs/ip_vs_pe.c +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -67,7 +67,8 @@ int register_ip_vs_pe(struct ip_vs_pe *pe) struct ip_vs_pe *tmp; /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOENT; mutex_lock(&ip_vs_pe_mutex); /* Make sure that the pe with this name doesn't exist diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index a2ff7d746ebf..3bd0ff36dc41 100644 --- a/net/netfilter/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c @@ -184,7 +184,8 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler) } /* increase the module use count */ - ip_vs_use_count_inc(); + if (!ip_vs_use_count_inc()) + return -ENOENT; mutex_lock(&ip_vs_sched_mutex); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index b578ebb3d7ef..b373e053ff9a 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1771,6 +1771,10 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %zd bytes\n", sizeof(struct ip_vs_sync_conn_v0)); + /* increase the module use count */ + if (!ip_vs_use_count_inc()) + return -ENOPROTOOPT; + /* Do not hold one mutex and then to block on another */ for (;;) { rtnl_lock(); @@ -1901,9 +1905,6 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); - /* increase the module use count */ - ip_vs_use_count_inc(); - return 0; out: @@ -1933,11 +1934,17 @@ out: } kfree(ti); } + + /* decrease the module use count */ + ip_vs_use_count_dec(); return result; out_early: mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); + + /* decrease the module use count */ + ip_vs_use_count_dec(); return result; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 4527921b1c3a..97d411033f8a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -209,7 +209,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) struct rtable *ort = skb_rtable(skb); if (!skb->dev && sk && sk_fullsock(sk)) - ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); + ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true); } static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index aa08a5786c05..5581d70752e3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -543,6 +543,18 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, net_eq(net, nf_ct_net(ct)); } +static inline bool +nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2) +{ + return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && + nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple, + &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) && + nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) && + nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) && + net_eq(nf_ct_net(ct1), nf_ct_net(ct2)); +} + /* caller must hold rcu readlock and none of the nf_conntrack_locks */ static void nf_ct_gc_expired(struct nf_conn *ct) { @@ -736,19 +748,21 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); const struct nf_conntrack_l4proto *l4proto; + enum ip_conntrack_info oldinfo; + struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto->allow_clash && - ((ct->status & IPS_NAT_DONE_MASK) == 0) && !nf_ct_is_dying(ct) && atomic_inc_not_zero(&ct->ct_general.use)) { - enum ip_conntrack_info oldinfo; - struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo); - - nf_ct_acct_merge(ct, ctinfo, loser_ct); - nf_conntrack_put(&loser_ct->ct_general); - nf_ct_set(skb, ct, oldinfo); - return NF_ACCEPT; + if (((ct->status & IPS_NAT_DONE_MASK) == 0) || + nf_ct_match(ct, loser_ct)) { + nf_ct_acct_merge(ct, ctinfo, loser_ct); + nf_conntrack_put(&loser_ct->ct_general); + nf_ct_set(skb, ct, oldinfo); + return NF_ACCEPT; + } + nf_ct_put(ct); } NF_CT_STAT_INC(net, drop); return NF_DROP; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c781c9a1a697..39a32edaa92c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -3422,6 +3422,9 @@ static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) list_for_each_entry(net, net_exit_list, exit_list) ctnetlink_net_exit(net); + + /* wait for other cpus until they are done with ctnl_notifiers */ + synchronize_rcu(); } static struct pernet_operations ctnetlink_net_ops = { diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 37efcc1c8887..b06ef4c62522 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -138,7 +138,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, goto err; } - if (!skb_dst_force(skb) && state->hook != NF_INET_PRE_ROUTING) { + if (skb_dst(skb) && !skb_dst_force(skb)) { status = -ENETDOWN; goto err; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7ef126489d4e..91490446ebb4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3917,14 +3917,20 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_DATA] == NULL && !(flags & NFT_SET_ELEM_INTERVAL_END)) return -EINVAL; - if (nla[NFTA_SET_ELEM_DATA] != NULL && - flags & NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; } else { if (nla[NFTA_SET_ELEM_DATA] != NULL) return -EINVAL; } + if ((flags & NFT_SET_ELEM_INTERVAL_END) && + (nla[NFTA_SET_ELEM_DATA] || + nla[NFTA_SET_ELEM_OBJREF] || + nla[NFTA_SET_ELEM_TIMEOUT] || + nla[NFTA_SET_ELEM_EXPIRATION] || + nla[NFTA_SET_ELEM_USERDATA] || + nla[NFTA_SET_ELEM_EXPR])) + return -EINVAL; + timeout = 0; if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index d33ce6d5ebce..dd1030f5dd5e 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -733,6 +733,8 @@ static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { [NFCTH_NAME] = { .type = NLA_NUL_STRING, .len = NF_CT_HELPER_NAME_LEN-1 }, [NFCTH_QUEUE_NUM] = { .type = NLA_U32, }, + [NFCTH_PRIV_DATA_LEN] = { .type = NLA_U32, }, + [NFCTH_STATUS] = { .type = NLA_U32, }, }; static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = { diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 7344ec7fff2a..8281656808ae 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -291,6 +291,24 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) module_put(me); } +static int nft_extension_dump_info(struct sk_buff *skb, int attr, + const void *info, + unsigned int size, unsigned int user_size) +{ + unsigned int info_size, aligned_size = XT_ALIGN(size); + struct nlattr *nla; + + nla = nla_reserve(skb, attr, aligned_size); + if (!nla) + return -1; + + info_size = user_size ? : size; + memcpy(nla_data(nla), info, info_size); + memset(nla_data(nla) + info_size, 0, aligned_size - info_size); + + return 0; +} + static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct xt_target *target = expr->ops->data; @@ -298,7 +316,8 @@ static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_string(skb, NFTA_TARGET_NAME, target->name) || nla_put_be32(skb, NFTA_TARGET_REV, htonl(target->revision)) || - nla_put(skb, NFTA_TARGET_INFO, XT_ALIGN(target->targetsize), info)) + nft_extension_dump_info(skb, NFTA_TARGET_INFO, info, + target->targetsize, target->usersize)) goto nla_put_failure; return 0; @@ -534,7 +553,8 @@ static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr, if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) || nla_put_be32(skb, NFTA_MATCH_REV, htonl(match->revision)) || - nla_put(skb, NFTA_MATCH_INFO, XT_ALIGN(match->matchsize), info)) + nft_extension_dump_info(skb, NFTA_MATCH_INFO, info, + match->matchsize, match->usersize)) goto nla_put_failure; return 0; diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index e110b0ebbf58..19446a89a2a8 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -121,6 +121,7 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 }, }; static int nft_payload_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 33aa2ac3a62e..73f8f99b1193 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -442,6 +442,23 @@ static bool nft_hash_lookup_fast(const struct net *net, return false; } +static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv, + const struct nft_set_ext *ext) +{ + const struct nft_data *key = nft_set_ext_key(ext); + u32 hash, k1; + + if (set->klen == 4) { + k1 = *(u32 *)key; + hash = jhash_1word(k1, priv->seed); + } else { + hash = jhash(key, set->klen, priv->seed); + } + hash = reciprocal_scale(hash, priv->buckets); + + return hash; +} + static int nft_hash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) @@ -451,8 +468,7 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set, u8 genmask = nft_genmask_next(net); u32 hash; - hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed); - hash = reciprocal_scale(hash, priv->buckets); + hash = nft_jhash(set, priv, &this->ext); hlist_for_each_entry(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&this->ext), nft_set_ext_key(&he->ext), set->klen) && @@ -491,8 +507,7 @@ static void *nft_hash_deactivate(const struct net *net, u8 genmask = nft_genmask_next(net); u32 hash; - hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed); - hash = reciprocal_scale(hash, priv->buckets); + hash = nft_jhash(set, priv, &this->ext); hlist_for_each_entry(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val, set->klen) || diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index b0617905e6e6..6cdc9a0364ea 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -246,6 +246,7 @@ static int idletimer_resume(struct notifier_block *notifier, if (!timer->suspend_time_valid) break; timer->suspend_time_valid = false; + spin_lock_bh(×tamp_lock); if (!timer->active) { spin_unlock_bh(×tamp_lock); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index fe8e8a1622b5..186f97f1c6c0 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -845,6 +845,8 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3); } +#define HASHLIMIT_MAX_SIZE 1048576 + static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, struct xt_hashlimit_htable **hinfo, struct hashlimit_cfg3 *cfg, @@ -855,6 +857,14 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, if (cfg->gc_interval == 0 || cfg->expire == 0) return -EINVAL; + if (cfg->size > HASHLIMIT_MAX_SIZE) { + cfg->size = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("size too large, truncated to %u\n", cfg->size); + } + if (cfg->max > HASHLIMIT_MAX_SIZE) { + cfg->max = HASHLIMIT_MAX_SIZE; + pr_info_ratelimited("max too large, truncated to %u\n", cfg->max); + } if (par->family == NFPROTO_IPV4) { if (cfg->srcmask > 32 || cfg->dstmask > 32) return -EINVAL; diff --git a/net/netfilter/xt_quota2.c b/net/netfilter/xt_quota2.c index 24b774263aa6..c42724469759 100644 --- a/net/netfilter/xt_quota2.c +++ b/net/netfilter/xt_quota2.c @@ -296,8 +296,8 @@ static void quota_mt2_destroy(const struct xt_mtdtor_param *par) } list_del(&e->list); - remove_proc_entry(e->name, proc_xt_quota); spin_unlock_bh(&counter_list_lock); + remove_proc_entry(e->name, proc_xt_quota); kfree(e); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c67abda5d639..3e4e07559272 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -997,7 +997,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->netlink_bind && groups) { int group; - for (group = 0; group < nlk->ngroups; group++) { + /* nl_groups is a u32, so cap the maximum groups we can bind */ + for (group = 0; group < BITS_PER_TYPE(u32); group++) { if (!test_bit(group, &groups)) continue; err = nlk->netlink_bind(net, group + 1); @@ -1016,7 +1017,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_insert(sk, nladdr->nl_pid) : netlink_autobind(sock); if (err) { - netlink_undo_bind(nlk->ngroups, groups, sk); + netlink_undo_bind(BITS_PER_TYPE(u32), groups, sk); goto unlock; } } @@ -2388,7 +2389,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, in_skb->len)) WARN_ON(nla_put_u32(skb, NLMSGERR_ATTR_OFFS, (u8 *)extack->bad_attr - - in_skb->data)); + (u8 *)nlh)); } else { if (extack->cookie_len) WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index 6bf14f4f4b42..ae315dbd3732 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -193,13 +193,20 @@ exit: void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd, struct sk_buff *skb) { - u8 gate = hdev->pipes[pipe].gate; u8 status = NFC_HCI_ANY_OK; struct hci_create_pipe_resp *create_info; struct hci_delete_pipe_noti *delete_info; struct hci_all_pipe_cleared_noti *cleared_info; + u8 gate; - pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd); + pr_debug("from pipe %x cmd %x\n", pipe, cmd); + + if (pipe >= NFC_HCI_MAX_PIPES) { + status = NFC_HCI_ANY_E_NOK; + goto exit; + } + + gate = hdev->pipes[pipe].gate; switch (cmd) { case NFC_HCI_ADM_NOTIFY_PIPE_CREATED: @@ -387,8 +394,14 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event, struct sk_buff *skb) { int r = 0; - u8 gate = hdev->pipes[pipe].gate; + u8 gate; + if (pipe >= NFC_HCI_MAX_PIPES) { + pr_err("Discarded event %x to invalid pipe %x\n", event, pipe); + goto exit; + } + + gate = hdev->pipes[pipe].gate; if (gate == NFC_HCI_INVALID_GATE) { pr_err("Discarded event %x to unopened pipe %x\n", event, pipe); goto exit; diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index a51bfba19b9e..694a43c05eb9 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -567,11 +567,11 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, if (sk->sk_state == LLCP_LISTEN) return llcp_accept_poll(sk); - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; if (sk->sk_state == LLCP_CLOSED) diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index 8d104c1db628..6f5addb5225c 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -348,7 +348,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data, nu->rx_packet_len = -1; nu->rx_skb = nci_skb_alloc(nu->ndev, NCI_MAX_PACKET_SIZE, - GFP_KERNEL); + GFP_ATOMIC); if (!nu->rx_skb) return -ENOMEM; } diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 5f2acd029da5..6199f4334fbd 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -55,7 +55,10 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = { [NFC_ATTR_LLC_SDP] = { .type = NLA_NESTED }, [NFC_ATTR_FIRMWARE_NAME] = { .type = NLA_STRING, .len = NFC_FIRMWARE_NAME_MAXSIZE }, + [NFC_ATTR_SE_INDEX] = { .type = NLA_U32 }, [NFC_ATTR_SE_APDU] = { .type = NLA_BINARY }, + [NFC_ATTR_VENDOR_ID] = { .type = NLA_U32 }, + [NFC_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 }, [NFC_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, }; @@ -1100,7 +1103,6 @@ static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info) local = nfc_llcp_find_local(dev); if (!local) { - nfc_put_device(dev); rc = -ENODEV; goto exit; } @@ -1160,7 +1162,6 @@ static int nfc_genl_llc_sdreq(struct sk_buff *skb, struct genl_info *info) local = nfc_llcp_find_local(dev); if (!local) { - nfc_put_device(dev); rc = -ENODEV; goto exit; } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 0171b27a2b81..737e37b28d93 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -879,6 +879,17 @@ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, } err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype); + if (err == NF_ACCEPT && + ct->status & IPS_SRC_NAT && ct->status & IPS_DST_NAT) { + if (maniptype == NF_NAT_MANIP_SRC) + maniptype = NF_NAT_MANIP_DST; + else + maniptype = NF_NAT_MANIP_SRC; + + err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, + maniptype); + } + /* Mark NAT done if successful and update the flow key. */ if (err == NF_ACCEPT) ovs_nat_update_key(key, skb, maniptype); @@ -1083,7 +1094,8 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key, &info->labels.mask); if (err) return err; - } else if (labels_nonzero(&info->labels.mask)) { + } else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) && + labels_nonzero(&info->labels.mask)) { err = ovs_ct_set_labels(ct, key, &info->labels.value, &info->labels.mask); if (err) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 2de2a923ff2b..3248cf04d0b3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -724,9 +724,13 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts, { size_t len = NLMSG_ALIGN(sizeof(struct ovs_header)); - /* OVS_FLOW_ATTR_UFID */ + /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback + * see ovs_nla_put_identifier() + */ if (sfid && ovs_identifier_is_ufid(sfid)) len += nla_total_size(sfid->ufid_len); + else + len += nla_total_size(ovs_key_attr_size()); /* OVS_FLOW_ATTR_KEY */ if (!sfid || should_fill_key(sfid, ufid_flags)) @@ -902,7 +906,10 @@ static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow, retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb, info->snd_portid, info->snd_seq, 0, cmd, ufid_flags); - BUG_ON(retval < 0); + if (WARN_ON_ONCE(retval < 0)) { + kfree_skb(skb); + skb = ERR_PTR(retval); + } return skb; } @@ -1365,7 +1372,10 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) OVS_FLOW_CMD_DEL, ufid_flags); rcu_read_unlock(); - BUG_ON(err < 0); + if (WARN_ON_ONCE(err < 0)) { + kfree_skb(reply); + goto out_free; + } ovs_notify(&dp_flow_genl_family, reply, info); } else { @@ -1373,6 +1383,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) } } +out_free: ovs_flow_free(flow, true); return 0; unlock: diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index b9377afeaba4..1083b5e90134 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -44,7 +44,8 @@ static struct internal_dev *internal_dev_priv(struct net_device *netdev) } /* Called with rcu_read_lock_bh. */ -static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) +static netdev_tx_t +internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) { int len, err; @@ -63,7 +64,7 @@ static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev) } else { netdev->stats.tx_errors++; } - return 0; + return NETDEV_TX_OK; } static int internal_dev_open(struct net_device *netdev) @@ -156,7 +157,7 @@ static void do_setup(struct net_device *netdev) netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH | IFF_PHONY_HEADROOM | IFF_NO_QUEUE; netdev->needs_free_netdev = true; - netdev->priv_destructor = internal_dev_destructor; + netdev->priv_destructor = NULL; netdev->ethtool_ops = &internal_dev_ethtool_ops; netdev->rtnl_link_ops = &internal_dev_link_ops; @@ -178,7 +179,6 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) struct internal_dev *internal_dev; struct net_device *dev; int err; - bool free_vport = true; vport = ovs_vport_alloc(0, &ovs_internal_vport_ops, parms); if (IS_ERR(vport)) { @@ -210,10 +210,9 @@ static struct vport *internal_dev_create(const struct vport_parms *parms) rtnl_lock(); err = register_netdevice(vport->dev); - if (err) { - free_vport = false; + if (err) goto error_unlock; - } + vport->dev->priv_destructor = internal_dev_destructor; dev_set_promiscuity(vport->dev, 1); rtnl_unlock(); @@ -227,8 +226,7 @@ error_unlock: error_free_netdev: free_netdev(dev); error_free_vport: - if (free_vport) - ovs_vport_free(vport); + ovs_vport_free(vport); error: return ERR_PTR(err); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1f86bf0d1649..387589a4a340 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -593,7 +593,8 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po, msec = 1; div = ecmd.base.speed / 1000; } - } + } else + return DEFAULT_PRB_RETIRE_TOV; mbits = (blk_size_in_bytes * 8) / (1024 * 1024); @@ -1336,15 +1337,21 @@ static void packet_sock_destruct(struct sock *sk) static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb) { - u32 rxhash; + u32 *history = po->rollover->history; + u32 victim, rxhash; int i, count = 0; rxhash = skb_get_hash(skb); for (i = 0; i < ROLLOVER_HLEN; i++) - if (po->rollover->history[i] == rxhash) + if (READ_ONCE(history[i]) == rxhash) count++; - po->rollover->history[prandom_u32() % ROLLOVER_HLEN] = rxhash; + victim = prandom_u32() % ROLLOVER_HLEN; + + /* Avoid dirtying the cache line if possible */ + if (READ_ONCE(history[victim]) != rxhash) + WRITE_ONCE(history[victim], rxhash); + return count > (ROLLOVER_HLEN >> 1); } @@ -2292,6 +2299,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, TP_STATUS_KERNEL, (macoff+snaplen)); if (!h.raw) goto drop_n_account; + + if (do_vnet && + virtio_net_hdr_from_skb(skb, h.raw + macoff - + sizeof(struct virtio_net_hdr), + vio_le(), true, 0)) + goto drop_n_account; + if (po->tp_version <= TPACKET_V2) { packet_increment_rx_head(po, &po->rx_ring); /* @@ -2304,12 +2318,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, status |= TP_STATUS_LOSING; } - if (do_vnet && - virtio_net_hdr_from_skb(skb, h.raw + macoff - - sizeof(struct virtio_net_hdr), - vio_le(), true, 0)) - goto drop_n_account; - po->stats.stats1.tp_packets++; if (copy_skb) { status |= TP_STATUS_COPY; @@ -3406,20 +3414,29 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + int copy_len; + /* If the address length field is there to be filled * in, we fill it in now. */ if (sock->type == SOCK_PACKET) { __sockaddr_check_size(sizeof(struct sockaddr_pkt)); msg->msg_namelen = sizeof(struct sockaddr_pkt); + copy_len = msg->msg_namelen; } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); + copy_len = msg->msg_namelen; + if (msg->msg_namelen < sizeof(struct sockaddr_ll)) { + memset(msg->msg_name + + offsetof(struct sockaddr_ll, sll_addr), + 0, sizeof(sll->sll_addr)); + msg->msg_namelen = sizeof(struct sockaddr_ll); + } } - memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, - msg->msg_namelen); + memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } if (pkt_sk(sk)->auxdata) { diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 1b050dd17393..a1df36f3bb6e 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -352,9 +352,9 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock, if (sk->sk_state == TCP_CLOSE) return POLLERR; - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; - if (!skb_queue_empty(&pn->ctrlreq_queue)) + if (!skb_queue_empty_lockless(&pn->ctrlreq_queue)) mask |= POLLPRI; if (!mask && sk->sk_state == TCP_CLOSE_WAIT) return POLLHUP; diff --git a/net/psample/psample.c b/net/psample/psample.c index 4cea353221da..30e8239bd774 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -223,7 +223,7 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN - NLA_ALIGNTO; - nl_skb = genlmsg_new(meta_len + data_len, GFP_ATOMIC); + nl_skb = genlmsg_new(meta_len + nla_total_size(data_len), GFP_ATOMIC); if (unlikely(!nl_skb)) return; diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c index 9252ad126335..ac46d8961b61 100644 --- a/net/rds/ib_stats.c +++ b/net/rds/ib_stats.c @@ -42,7 +42,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats); static const char *const rds_ib_stat_names[] = { "ib_connect_raced", "ib_listen_closed_stale", - "s_ib_evt_handler_call", + "ib_evt_handler_call", "ib_tasklet_call", "ib_tx_cq_event", "ib_tx_ring_full", diff --git a/net/rds/stats.c b/net/rds/stats.c index 73be187d389e..6bbab4d74c4f 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -76,6 +76,8 @@ static const char *const rds_stat_names[] = { "cong_update_received", "cong_send_error", "cong_send_blocked", + "recv_bytes_added_to_sock", + "recv_bytes_freed_fromsock", }; void rds_stats_info_copy(struct rds_info_iterator *iter, diff --git a/net/rfkill/core.c b/net/rfkill/core.c index d223a86e9778..fdca887aa731 100644 --- a/net/rfkill/core.c +++ b/net/rfkill/core.c @@ -993,10 +993,13 @@ static void rfkill_sync_work(struct work_struct *work) int __must_check rfkill_register(struct rfkill *rfkill) { static unsigned long rfkill_no; - struct device *dev = &rfkill->dev; + struct device *dev; int error; - BUG_ON(!rfkill); + if (!rfkill) + return -EINVAL; + + dev = &rfkill->dev; mutex_lock(&rfkill_global_mutex); @@ -1307,10 +1310,12 @@ static const struct file_operations rfkill_fops = { .llseek = no_llseek, }; +#define RFKILL_NAME "rfkill" + static struct miscdevice rfkill_miscdev = { - .name = "rfkill", .fops = &rfkill_fops, - .minor = MISC_DYNAMIC_MINOR, + .name = RFKILL_NAME, + .minor = RFKILL_MINOR, }; static int __init rfkill_init(void) @@ -1362,3 +1367,6 @@ static void __exit rfkill_exit(void) class_unregister(&rfkill_class); } module_exit(rfkill_exit); + +MODULE_ALIAS_MISCDEV(RFKILL_MINOR); +MODULE_ALIAS("devname:" RFKILL_NAME); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 71c7f1dd4599..b5581b0b9480 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -451,6 +451,7 @@ enum rxrpc_call_flag { RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ RXRPC_CALL_PINGING, /* Ping in process */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ + RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */ }; /* diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index ddaa471a2607..7021725fa38a 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -505,7 +505,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - if (conn) + if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) rxrpc_disconnect_call(call); for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { @@ -639,6 +639,7 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) { struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); + rxrpc_put_connection(call->conn); rxrpc_put_peer(call->peer); kfree(call->rxtx_buffer); kfree(call->rxtx_annotations); @@ -660,7 +661,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call) ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - ASSERTCMP(call->conn, ==, NULL); /* Clean up the Rx/Tx buffer */ for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 0aa4bf09fb9c..05d17ec63635 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -762,9 +762,9 @@ void rxrpc_disconnect_client_call(struct rxrpc_call *call) struct rxrpc_net *rxnet = rxrpc_net(sock_net(&call->socket->sk)); trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); - call->conn = NULL; spin_lock(&conn->channel_lock); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); /* Calls that have never actually been assigned a channel can simply be * discarded. If the conn didn't get used either, it will follow @@ -863,7 +863,6 @@ out: spin_unlock(&rxnet->client_conn_cache_lock); out_2: spin_unlock(&conn->channel_lock); - rxrpc_put_connection(conn); _leave(""); return; diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index a48c817b792b..af0232820597 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -207,9 +207,8 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) __rxrpc_disconnect_call(conn, call); spin_unlock(&conn->channel_lock); - call->conn = NULL; + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); conn->idle_timestamp = jiffies; - rxrpc_put_connection(conn); } /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index ea506a77f3c8..18ce6f97462b 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -585,8 +585,7 @@ ack: immediate_ack, true, rxrpc_propose_ack_input_data); - if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) - rxrpc_notify_socket(call); + rxrpc_notify_socket(call); _leave(" [queued]"); } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5b67cb5d47f0..9619c56ef4cd 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -96,7 +96,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, */ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) { - struct rxrpc_connection *conn = NULL; + struct rxrpc_connection *conn; struct rxrpc_ack_buffer *pkt; struct msghdr msg; struct kvec iov[2]; @@ -106,18 +106,14 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) int ret; u8 reason; - spin_lock_bh(&call->lock); - if (call->conn) - conn = rxrpc_get_connection_maybe(call->conn); - spin_unlock_bh(&call->lock); - if (!conn) + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); - if (!pkt) { - rxrpc_put_connection(conn); + if (!pkt) return -ENOMEM; - } + + conn = call->conn; msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -204,7 +200,6 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) } out: - rxrpc_put_connection(conn); kfree(pkt); return ret; } @@ -214,20 +209,18 @@ out: */ int rxrpc_send_abort_packet(struct rxrpc_call *call) { - struct rxrpc_connection *conn = NULL; + struct rxrpc_connection *conn; struct rxrpc_abort_buffer pkt; struct msghdr msg; struct kvec iov[1]; rxrpc_serial_t serial; int ret; - spin_lock_bh(&call->lock); - if (call->conn) - conn = rxrpc_get_connection_maybe(call->conn); - spin_unlock_bh(&call->lock); - if (!conn) + if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; + conn = call->conn; + msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; @@ -255,7 +248,6 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, sizeof(pkt)); - rxrpc_put_connection(conn); return ret; } @@ -404,6 +396,9 @@ send_fragmentable: } break; #endif + + default: + BUG(); } up_write(&conn->params.local->defrag_sem); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 7f749505e699..7d73e8ce6660 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -150,6 +150,9 @@ void rxrpc_error_report(struct sock *sk) struct rxrpc_peer *peer; struct sk_buff *skb; + if (unlikely(!local)) + return; + _enter("%p{%d}", sk, local->debug_id); skb = sock_dequeue_err_skb(sk); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 016e293681b8..a980b49d7a4f 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -586,6 +586,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) case RXRPC_CALL_SERVER_PREALLOC: case RXRPC_CALL_SERVER_SECURING: case RXRPC_CALL_SERVER_ACCEPTING: + rxrpc_put_call(call, rxrpc_call_put); ret = -EBUSY; goto error_release_sock; default: diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 4444d7e755e6..8ae0addb7657 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1072,10 +1072,16 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int ovr) { - int ret = 0; + int loop, ret; LIST_HEAD(actions); - ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions); + for (loop = 0; loop < 10; loop++) { + ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, + &actions); + if (ret != -EAGAIN) + break; + } + if (ret) return ret; @@ -1122,10 +1128,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, */ if (n->nlmsg_flags & NLM_F_REPLACE) ovr = 1; -replay: ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr); - if (ret == -EAGAIN) - goto replay; break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 529bb064c4a4..dcfaa4f9c7c5 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -371,7 +371,11 @@ static int __init mirred_init_module(void) return err; pr_info("Mirror/redirect action on\n"); - return tcf_register_action(&act_mirred_ops, &mirred_net_ops); + err = tcf_register_action(&act_mirred_ops, &mirred_net_ops); + if (err) + unregister_netdevice_notifier(&mirred_device_notifier); + + return err; } static void __exit mirred_cleanup_module(void) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b6f6bfad8b2a..fb0caa500ac8 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -46,7 +46,7 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla, int err = -EINVAL; int rem; - if (!nla || !n) + if (!nla) return NULL; keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL); @@ -163,6 +163,9 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(pattr); + if (!parm->nkeys) + return -EINVAL; + ksize = parm->nkeys * sizeof(struct tc_pedit_key); if (nla_len(pattr) < sizeof(*parm) + ksize) return -EINVAL; @@ -172,8 +175,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, return PTR_ERR(keys_ex); if (!tcf_idr_check(tn, parm->index, a, bind)) { - if (!parm->nkeys) - return -EINVAL; ret = tcf_idr_create(tn, parm->index, est, a, &act_pedit_ops, bind, false); if (ret) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 1879665e5a2b..8974bd25c71e 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -160,6 +160,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (!atomic_read(&head->ht.nelems)) return -1; + flow_dissector_init_keys(&skb_key.control, &skb_key.basic); fl_clear_masked_range(&skb_key, &head->mask); info = skb_tunnel_info(skb); @@ -445,6 +446,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_IP_TOS_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_IP_TTL] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_IP_TTL_MASK] = { .type = NLA_U8 }, + [TCA_FLOWER_FLAGS] = { .type = NLA_U32 }, }; static void fl_set_key_val(struct nlattr **tb, diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index d8fd152779c8..a985f91e8b47 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -136,6 +136,7 @@ static void *mall_get(struct tcf_proto *tp, u32 handle) static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { [TCA_MATCHALL_UNSPEC] = { .type = NLA_UNSPEC }, [TCA_MATCHALL_CLASSID] = { .type = NLA_U32 }, + [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 }, }; static int mall_set_parms(struct net *net, struct tcf_proto *tp, diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index cf325625c99d..89259819e9ed 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -475,10 +475,8 @@ static u32 gen_tunnel(struct rsvp_head *data) static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { [TCA_RSVP_CLASSID] = { .type = NLA_U32 }, - [TCA_RSVP_DST] = { .type = NLA_BINARY, - .len = RSVP_DST_LEN * sizeof(u32) }, - [TCA_RSVP_SRC] = { .type = NLA_BINARY, - .len = RSVP_DST_LEN * sizeof(u32) }, + [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) }, + [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) }, [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) }, }; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 75c7c7cc7499..796b4e1beb12 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -351,23 +351,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, cp->fall_through = p->fall_through; cp->tp = tp; - if (p->perfect) { - int i; - - if (tcindex_alloc_perfect_hash(cp) < 0) - goto errout; - for (i = 0; i < cp->hash; i++) - cp->perfect[i].res = p->perfect[i].res; - balloc = 1; - } - cp->h = p->h; - - err = tcindex_filter_result_init(&new_filter_result); - if (err < 0) - goto errout1; - if (old_r) - cr = r->res; - if (tb[TCA_TCINDEX_HASH]) cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); @@ -377,6 +360,33 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (tb[TCA_TCINDEX_SHIFT]) cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); + if (!cp->hash) { + /* Hash not specified, use perfect hash if the upper limit + * of the hashing index is below the threshold. + */ + if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) + cp->hash = (cp->mask >> cp->shift) + 1; + else + cp->hash = DEFAULT_HASH_SIZE; + } + + if (p->perfect) { + int i; + + if (tcindex_alloc_perfect_hash(cp) < 0) + goto errout; + for (i = 0; i < min(cp->hash, p->hash); i++) + cp->perfect[i].res = p->perfect[i].res; + balloc = 1; + } + cp->h = p->h; + + err = tcindex_filter_result_init(&new_filter_result); + if (err < 0) + goto errout_alloc; + if (old_r) + cr = r->res; + err = -EBUSY; /* Hash already allocated, make sure that we still meet the @@ -394,16 +404,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (tb[TCA_TCINDEX_FALL_THROUGH]) cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); - if (!cp->hash) { - /* Hash not specified, use perfect hash if the upper limit - * of the hashing index is below the threshold. - */ - if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) - cp->hash = (cp->mask >> cp->shift) + 1; - else - cp->hash = DEFAULT_HASH_SIZE; - } - if (!cp->perfect && !cp->h) cp->alloc_hash = cp->hash; @@ -502,7 +502,6 @@ errout_alloc: tcindex_free_perfect_hash(cp); else if (balloc == 2) kfree(cp->h); -errout1: tcf_exts_destroy(&new_filter_result.exts); errout: kfree(cp); diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 03b677bc0700..a48dca26f178 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -242,6 +242,9 @@ static int tcf_em_validate(struct tcf_proto *tp, goto errout; if (em->ops->change) { + err = -EINVAL; + if (em_hdr->flags & TCF_EM_SIMPLE) + goto errout; err = em->ops->change(net, data, data_len, em); if (err < 0) goto errout; @@ -267,12 +270,12 @@ static int tcf_em_validate(struct tcf_proto *tp, } em->data = (unsigned long) v; } + em->datalen = data_len; } } em->matchid = em_hdr->matchid; em->flags = em_hdr->flags; - em->datalen = data_len; em->net = net; err = 0; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 637949b576c6..296e95f72eb1 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1695,6 +1695,8 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, cl = cops->find(q, portid); if (!cl) return; + if (!cops->tcf_block) + return; block = cops->tcf_block(q, cl); if (!block) return; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index f50eb87cfe79..66f1d40b910a 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -695,6 +695,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, + [TCA_FQ_ORPHAN_MASK] = { .type = NLA_U32 }, [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, }; @@ -734,7 +735,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_FQ_QUANTUM]) { u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]); - if (quantum > 0) + if (quantum > 0 && quantum <= (1 << 20)) q->quantum = quantum; else err = -EINVAL; diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index fa256f8038af..c73475c3a464 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -4,11 +4,11 @@ * Copyright (C) 2013 Nandita Dukkipati */ -#include #include #include #include #include +#include #include #include @@ -125,7 +125,7 @@ struct wdrr_bucket { struct hhf_sched_data { struct wdrr_bucket buckets[WDRR_BUCKET_CNT]; - u32 perturbation; /* hash perturbation */ + siphash_key_t perturbation; /* hash perturbation */ u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ u32 drop_overlimit; /* number of times max qdisc packet * limit was hit @@ -263,7 +263,7 @@ static enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch) } /* Get hashed flow-id of the skb. */ - hash = skb_get_hash_perturb(skb, q->perturbation); + hash = skb_get_hash_perturb(skb, &q->perturbation); /* Check if this packet belongs to an already established HH flow. */ flow_pos = hash & HHF_BIT_MASK; @@ -578,7 +578,7 @@ static int hhf_init(struct Qdisc *sch, struct nlattr *opt) sch->limit = 1000; q->quantum = psched_mtu(qdisc_dev(sch)); - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); INIT_LIST_HEAD(&q->new_buckets); INIT_LIST_HEAD(&q->old_buckets); diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index f3a3e507422b..442ac9c3f16f 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -191,7 +191,8 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = mq_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats, + &sch->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) return -1; return 0; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 6bcdfe6e7b63..bb8d3fbc13bb 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -366,8 +366,8 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); sch = dev_queue->qdisc_sleeping; - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &sch->bstats) < 0 || + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, + sch->cpu_bstats, &sch->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0) return -1; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index ff4fc3e0facd..65aa03d46857 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -340,7 +340,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl_q->bstats) < 0 || + d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) return -1; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 787aa52e5991..328b043edf07 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -431,8 +431,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct netem_skb_cb *cb; struct sk_buff *skb2; struct sk_buff *segs = NULL; - unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb); - int nb = 0; + unsigned int prev_len = qdisc_pkt_len(skb); int count = 1; int rc = NET_XMIT_SUCCESS; int rc_drop = NET_XMIT_DROP; @@ -469,7 +468,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, * skb will be queued. */ if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - struct Qdisc *rootq = qdisc_root(sch); + struct Qdisc *rootq = qdisc_root_bh(sch); u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ q->duplicate = 0; @@ -489,6 +488,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, segs = netem_segment(skb, sch, to_free); if (!segs) return rc_drop; + qdisc_skb_cb(segs)->pkt_len = segs->len; } else { segs = skb; } @@ -504,6 +504,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb)) { qdisc_drop(skb, sch, to_free); + skb = NULL; goto finish_segs; } @@ -579,6 +580,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, finish_segs: if (segs) { + unsigned int len, last_len; + int nb; + + len = skb ? skb->len : 0; + nb = skb ? 1 : 0; + while (segs) { skb2 = segs->next; segs->next = NULL; @@ -594,9 +601,10 @@ finish_segs: } segs = skb2; } - sch->q.qlen += nb; - if (nb > 1) - qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len); + /* Parent qdiscs accounted for 1 skb of size @prev_len */ + qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len)); + } else if (!skb) { + return NET_XMIT_DROP; } return NET_XMIT_SUCCESS; } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 2dd6c68ae91e..ff6bc7cf6cbd 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -244,8 +244,14 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct prio_sched_data *q = qdisc_priv(sch); unsigned long band = arg - 1; - if (new == NULL) - new = &noop_qdisc; + if (!new) { + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, arg)); + if (!new) + new = &noop_qdisc; + else + qdisc_hash_add(new, true); + } *old = qdisc_replace(sch, new, &q->queues[band]); return 0; @@ -298,7 +304,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl, cl_q = q->queues[cl - 1]; if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), - d, NULL, &cl_q->bstats) < 0 || + d, cl_q->cpu_bstats, &cl_q->bstats) < 0 || gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0) return -1; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index cc39e170b4aa..04f15e0aeaa8 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include @@ -49,7 +49,7 @@ struct sfb_bucket { * (Section 4.4 of SFB reference : moving hash functions) */ struct sfb_bins { - u32 perturbation; /* jhash perturbation */ + siphash_key_t perturbation; /* siphash key */ struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS]; }; @@ -221,7 +221,8 @@ static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_da static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q) { - q->bins[slot].perturbation = prandom_u32(); + get_random_bytes(&q->bins[slot].perturbation, + sizeof(q->bins[slot].perturbation)); } static void sfb_swap_slot(struct sfb_sched_data *q) @@ -317,9 +318,9 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* If using external classifiers, get result and record it. */ if (!sfb_classify(skb, fl, &ret, &salt)) goto other_drop; - sfbhash = jhash_1word(salt, q->bins[slot].perturbation); + sfbhash = siphash_1u32(salt, &q->bins[slot].perturbation); } else { - sfbhash = skb_get_hash_perturb(skb, q->bins[slot].perturbation); + sfbhash = skb_get_hash_perturb(skb, &q->bins[slot].perturbation); } @@ -355,7 +356,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Inelastic flow */ if (q->double_buffering) { sfbhash = skb_get_hash_perturb(skb, - q->bins[slot].perturbation); + &q->bins[slot].perturbation); if (!sfbhash) sfbhash = 1; sfb_skb_cb(skb)->hashes[slot] = sfbhash; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 3fbf20126045..cbc54ddfe076 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -121,7 +121,7 @@ struct sfq_sched_data { u8 headdrop; u8 maxdepth; /* limit of packets per flow */ - u32 perturbation; + siphash_key_t perturbation; u8 cur_depth; /* depth of longest slot */ u8 flags; unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ @@ -160,7 +160,7 @@ static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index static unsigned int sfq_hash(const struct sfq_sched_data *q, const struct sk_buff *skb) { - return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1); + return skb_get_hash_perturb(skb, &q->perturbation) & (q->divisor - 1); } static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, @@ -609,9 +609,11 @@ static void sfq_perturbation(unsigned long arg) struct Qdisc *sch = (struct Qdisc *)arg; struct sfq_sched_data *q = qdisc_priv(sch); spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + siphash_key_t nkey; + get_random_bytes(&nkey, sizeof(nkey)); spin_lock(root_lock); - q->perturbation = prandom_u32(); + q->perturbation = nkey; if (!q->filter_list && q->tail) sfq_rehash(sch); spin_unlock(root_lock); @@ -690,7 +692,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) del_timer(&q->perturb_timer); if (q->perturb_period) { mod_timer(&q->perturb_timer, jiffies + q->perturb_period); - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); } sch_tree_unlock(sch); kfree(p); @@ -746,7 +748,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->quantum = psched_mtu(qdisc_dev(sch)); q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = 0; - q->perturbation = prandom_u32(); + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); if (opt) { int err = sfq_change(sch, opt); diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 23fec3817e0c..dd1a3bd80be5 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -80,6 +80,7 @@ static struct sctp_association *sctp_association_init( /* Discarding const is appropriate here. */ asoc->ep = (struct sctp_endpoint *)ep; asoc->base.sk = (struct sock *)sk; + asoc->base.net = sock_net(sk); sctp_endpoint_hold(asoc->ep); sock_hold(asoc->base.sk); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 5d4079ef3de6..c71b4191df1e 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -165,6 +165,7 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Remember who we are attached to. */ ep->base.sk = sk; + ep->base.net = sock_net(sk); sock_hold(ep->base.sk); return ep; diff --git a/net/sctp/input.c b/net/sctp/input.c index 0247cc432e02..3c0affecf272 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -813,7 +813,7 @@ static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, if (!sctp_transport_hold(t)) return err; - if (!net_eq(sock_net(t->asoc->base.sk), x->net)) + if (!net_eq(t->asoc->base.net, x->net)) goto out; if (x->lport != htons(t->asoc->base.bind_addr.port)) goto out; @@ -828,7 +828,7 @@ static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed) { const struct sctp_transport *t = data; const union sctp_addr *paddr = &t->ipaddr; - const struct net *net = sock_net(t->asoc->base.sk); + const struct net *net = t->asoc->base.net; __be16 lport = htons(t->asoc->base.bind_addr.port); __u32 addr; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 7eb06fa75730..53a66ee1331f 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -974,7 +974,7 @@ static const struct proto_ops inet6_seqpacket_ops = { .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, - .connect = inet_dgram_connect, + .connect = sctp_inet_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = sctp_getname, diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 6af871b1c297..bf39f317953a 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -253,6 +253,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb, sa->sin_port = sh->dest; sa->sin_addr.s_addr = ip_hdr(skb)->daddr; } + memset(sa->sin_zero, 0, sizeof(sa->sin_zero)); } /* Initialize an sctp_addr from a socket. */ @@ -261,6 +262,7 @@ static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk) addr->v4.sin_family = AF_INET; addr->v4.sin_port = 0; addr->v4.sin_addr.s_addr = inet_sk(sk)->inet_rcv_saddr; + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); } /* Initialize sk->sk_rcv_saddr from sctp_addr. */ @@ -283,6 +285,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr, addr->v4.sin_family = AF_INET; addr->v4.sin_port = port; addr->v4.sin_addr.s_addr = param->v4.addr.s_addr; + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); } /* Initialize an address parameter from a sctp_addr and return the length @@ -307,6 +310,7 @@ static void sctp_v4_dst_saddr(union sctp_addr *saddr, struct flowi4 *fl4, saddr->v4.sin_family = AF_INET; saddr->v4.sin_port = port; saddr->v4.sin_addr.s_addr = fl4->saddr; + memset(saddr->v4.sin_zero, 0, sizeof(saddr->v4.sin_zero)); } /* Compare two addresses exactly. */ @@ -329,6 +333,7 @@ static void sctp_v4_inaddr_any(union sctp_addr *addr, __be16 port) addr->v4.sin_family = AF_INET; addr->v4.sin_addr.s_addr = htonl(INADDR_ANY); addr->v4.sin_port = port; + memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero)); } /* Is this a wildcard address? */ @@ -1019,7 +1024,7 @@ static const struct proto_ops inet_seqpacket_ops = { .owner = THIS_MODULE, .release = inet_release, /* Needs to be wrapped... */ .bind = inet_bind, - .connect = inet_dgram_connect, + .connect = sctp_inet_connect, .socketpair = sock_no_socketpair, .accept = inet_accept, .getname = inet_getname, /* Semantics are different. */ diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 75274a60b77a..6a5a3dfa6c8d 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -221,15 +221,11 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) addrcnt++; return nla_total_size(sizeof(struct sctp_info)) - + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ - + nla_total_size(1) /* INET_DIAG_TOS */ - + nla_total_size(1) /* INET_DIAG_TCLASS */ - + nla_total_size(4) /* INET_DIAG_MARK */ - + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) - + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + + inet_diag_msg_attrs_size() + + nla_total_size(sizeof(struct inet_diag_meminfo)) + 64; } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index bf0c61adb09c..482bb0a5d4d3 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1359,8 +1359,10 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, /* Generate an INIT ACK chunk. */ new_obj = sctp_make_init_ack(asoc, chunk, GFP_ATOMIC, 0); - if (!new_obj) - goto nomem; + if (!new_obj) { + error = -ENOMEM; + break; + } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); @@ -1382,7 +1384,8 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, if (!new_obj) { if (cmd->obj.chunk) sctp_chunk_free(cmd->obj.chunk); - goto nomem; + error = -ENOMEM; + break; } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); @@ -1429,8 +1432,10 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, /* Generate a SHUTDOWN chunk. */ new_obj = sctp_make_shutdown(asoc, chunk); - if (!new_obj) - goto nomem; + if (!new_obj) { + error = -ENOMEM; + break; + } sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(new_obj)); break; @@ -1760,11 +1765,17 @@ static int sctp_cmd_interpreter(enum sctp_event event_type, break; } - if (error) + if (error) { + cmd = sctp_next_cmd(commands); + while (cmd) { + if (cmd->verb == SCTP_CMD_REPLY) + sctp_chunk_free(cmd->obj.chunk); + cmd = sctp_next_cmd(commands); + } break; + } } -out: /* If this is in response to a received chunk, wait until * we are done with the packet to open the queue so that we don't * send multiple packets in response to a single request. @@ -1779,8 +1790,5 @@ out: sp->data_ready_signalled = 0; return error; -nomem: - error = -ENOMEM; - goto out; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index a2e058127ef7..ba29d782af30 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -182,6 +182,16 @@ static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk, return true; } +/* Check for format error in an ABORT chunk */ +static inline bool sctp_err_chunk_valid(struct sctp_chunk *chunk) +{ + struct sctp_errhdr *err; + + sctp_walk_errors(err, chunk->chunk_hdr); + + return (void *)err == (void *)chunk->chunk_end; +} + /********************************************************** * These are the state functions for handling chunk events. **********************************************************/ @@ -2202,6 +2212,9 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2245,6 +2258,9 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Stop the T2-shutdown timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); @@ -2512,6 +2528,9 @@ enum sctp_disposition sctp_sf_do_9_1_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + if (!sctp_err_chunk_valid(chunk)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } @@ -2529,16 +2548,8 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort( /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); - if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) { - struct sctp_errhdr *err; - - sctp_walk_errors(err, chunk->chunk_hdr); - if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, - commands); - + if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) error = ((struct sctp_errhdr *)chunk->skb->data)->cause; - } sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); /* ASSOC_FAILED will DELETE_TCB. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6a2532370545..09cda66d0567 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1076,7 +1076,7 @@ out: */ static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs, - int addrs_size, + int addrs_size, int flags, sctp_assoc_t *assoc_id) { struct net *net = sock_net(sk); @@ -1094,7 +1094,6 @@ static int __sctp_connect(struct sock *sk, union sctp_addr *sa_addr = NULL; void *addr_buf; unsigned short port; - unsigned int f_flags = 0; sp = sctp_sk(sk); ep = sp->ep; @@ -1244,13 +1243,7 @@ static int __sctp_connect(struct sock *sk, sp->pf->to_sk_daddr(sa_addr, sk); sk->sk_err = 0; - /* in-kernel sockets don't generally have a file allocated to them - * if all they do is call sock_create_kern(). - */ - if (sk->sk_socket->file) - f_flags = sk->sk_socket->file->f_flags; - - timeo = sock_sndtimeo(sk, f_flags & O_NONBLOCK); + timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); if (assoc_id) *assoc_id = asoc->assoc_id; @@ -1345,7 +1338,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk, { struct sockaddr *kaddrs; gfp_t gfp = GFP_KERNEL; - int err = 0; + int err = 0, flags = 0; pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", __func__, sk, addrs, addrs_size); @@ -1365,11 +1358,18 @@ static int __sctp_setsockopt_connectx(struct sock *sk, return -ENOMEM; if (__copy_from_user(kaddrs, addrs, addrs_size)) { - err = -EFAULT; - } else { - err = __sctp_connect(sk, kaddrs, addrs_size, assoc_id); + kfree(kaddrs); + return -EFAULT; } + /* in-kernel sockets don't generally have a file allocated to them + * if all they do is call sock_create_kern(). + */ + if (sk->sk_socket->file) + flags = sk->sk_socket->file->f_flags; + + err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id); + kfree(kaddrs); return err; @@ -4166,31 +4166,36 @@ out_nounlock: * len: the size of the address. */ static int sctp_connect(struct sock *sk, struct sockaddr *addr, - int addr_len) + int addr_len, int flags) { - int err = 0; struct sctp_af *af; + int err = -EINVAL; lock_sock(sk); - pr_debug("%s: sk:%p, sockaddr:%p, addr_len:%d\n", __func__, sk, addr, addr_len); /* Validate addr_len before calling common connect/connectx routine. */ af = sctp_get_af_specific(addr->sa_family); - if (!af || addr_len < af->sockaddr_len) { - err = -EINVAL; - } else { - /* Pass correct addr len to common routine (so it knows there - * is only one address being passed. - */ - err = __sctp_connect(sk, addr, af->sockaddr_len, NULL); - } + if (af && addr_len >= af->sockaddr_len) + err = __sctp_connect(sk, addr, af->sockaddr_len, flags, NULL); release_sock(sk); return err; } +int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + if (addr_len < sizeof(uaddr->sa_family)) + return -EINVAL; + + if (uaddr->sa_family == AF_UNSPEC) + return -EOPNOTSUPP; + + return sctp_connect(sock->sk, uaddr, addr_len, flags); +} + /* FIXME: Write comments. */ static int sctp_disconnect(struct sock *sk, int flags) { @@ -7366,7 +7371,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) mask = 0; /* Is there any exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -7375,7 +7380,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) mask |= POLLHUP; /* Is it readable? Reconsider this code with TCP-style support. */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* The association is either gone or not ready. */ @@ -7711,7 +7716,7 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, if (sk_can_busy_loop(sk)) { sk_busy_loop(sk, noblock); - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) continue; } @@ -8131,7 +8136,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, newinet->inet_rcv_saddr = inet->inet_rcv_saddr; newinet->inet_dport = htons(asoc->peer.port); newinet->pmtudisc = inet->pmtudisc; - newinet->inet_id = asoc->next_tsn ^ jiffies; + newinet->inet_id = prandom_u32(); newinet->uc_ttl = inet->uc_ttl; newinet->mc_loop = 1; @@ -8298,7 +8303,6 @@ struct proto sctp_prot = { .name = "SCTP", .owner = THIS_MODULE, .close = sctp_close, - .connect = sctp_connect, .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, @@ -8313,7 +8317,7 @@ struct proto sctp_prot = { .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, - .get_port = sctp_get_port, + .no_autobind = true, .obj_size = sizeof(struct sctp_sock), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, @@ -8337,7 +8341,6 @@ struct proto sctpv6_prot = { .name = "SCTPv6", .owner = THIS_MODULE, .close = sctp_close, - .connect = sctp_connect, .disconnect = sctp_disconnect, .accept = sctp_accept, .ioctl = sctp_ioctl, @@ -8352,7 +8355,7 @@ struct proto sctpv6_prot = { .backlog_rcv = sctp_backlog_rcv, .hash = sctp_hash, .unhash = sctp_unhash, - .get_port = sctp_get_port, + .no_autobind = true, .obj_size = sizeof(struct sctp6_sock), .sysctl_mem = sysctl_sctp_mem, .sysctl_rmem = sysctl_sctp_rmem, diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 43105cf04bc4..4c55b759a58e 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -210,7 +210,8 @@ void sctp_transport_reset_hb_timer(struct sctp_transport *transport) /* When a data chunk is sent, reset the heartbeat interval. */ expires = jiffies + sctp_transport_timeout(transport); - if (time_before(transport->hb_timer.expires, expires) && + if ((time_before(transport->hb_timer.expires, expires) || + !timer_pending(&transport->hb_timer)) && !mod_timer(&transport->hb_timer, expires + prandom_u32_max(transport->rto))) sctp_transport_hold(transport); @@ -271,7 +272,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) pf->af->from_sk(&addr, sk); pf->to_sk_daddr(&t->ipaddr, sk); - dst->ops->update_pmtu(dst, sk, NULL, pmtu); + dst->ops->update_pmtu(dst, sk, NULL, pmtu, true); pf->to_sk_daddr(&addr, sk); dst = sctp_transport_dst_check(t); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index f04a037dc967..0de788fa43e9 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -103,6 +103,8 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) struct smc_link_group *lgr = conn->lgr; int reduced = 0; + if (!lgr) + return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { reduced = 1; @@ -431,6 +433,8 @@ int smc_conn_create(struct smc_sock *smc, __be32 peer_in_addr, local_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; smc_lgr_register_conn(conn); /* add smc conn to lgr */ + if (delayed_work_pending(&lgr->free_work)) + cancel_delayed_work(&lgr->free_work); write_unlock_bh(&lgr->conns_lock); break; } diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index d2d01cf70224..576c37d86051 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -38,15 +38,14 @@ static void smc_diag_msg_common_fill(struct smc_diag_msg *r, struct sock *sk) { struct smc_sock *smc = smc_sk(sk); + memset(r, 0, sizeof(*r)); r->diag_family = sk->sk_family; + sock_diag_save_cookie(sk, r->id.idiag_cookie); if (!smc->clcsock) return; r->id.idiag_sport = htons(smc->clcsock->sk->sk_num); r->id.idiag_dport = smc->clcsock->sk->sk_dport; r->id.idiag_if = smc->clcsock->sk->sk_bound_dev_if; - sock_diag_save_cookie(sk, r->id.idiag_cookie); - memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); - memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); r->id.idiag_src[0] = smc->clcsock->sk->sk_rcv_saddr; r->id.idiag_dst[0] = smc->clcsock->sk->sk_daddr; } diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 4410d0071515..7d89b0584944 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -513,6 +513,8 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data) struct smc_ib_device *smcibdev; smcibdev = ib_get_client_data(ibdev, &smc_ib_client); + if (!smcibdev || smcibdev->ibdev != ibdev) + return; ib_set_client_data(ibdev, &smc_ib_client, NULL); spin_lock(&smc_ib_devices.lock); list_del_init(&smcibdev->list); /* remove from smc_ib_devices */ diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index de4537f66832..ed6736a1a112 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -223,12 +223,14 @@ int smc_wr_tx_put_slot(struct smc_link *link, pend = container_of(wr_pend_priv, struct smc_wr_tx_pend, priv); if (pend->idx < link->wr_tx_cnt) { + u32 idx = pend->idx; + /* clear the full struct smc_wr_tx_pend including .priv */ memset(&link->wr_tx_pends[pend->idx], 0, sizeof(link->wr_tx_pends[pend->idx])); memset(&link->wr_tx_bufs[pend->idx], 0, sizeof(link->wr_tx_bufs[pend->idx])); - test_and_clear_bit(pend->idx, link->wr_tx_mask); + test_and_clear_bit(idx, link->wr_tx_mask); return 1; } diff --git a/net/socket.c b/net/socket.c index aab65277314d..6a5ec658fcd8 100644 --- a/net/socket.c +++ b/net/socket.c @@ -891,7 +891,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to) .msg_iocb = iocb}; ssize_t res; - if (file->f_flags & O_NONBLOCK) + if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT)) msg.msg_flags = MSG_DONTWAIT; if (iocb->ki_pos != 0) @@ -916,7 +916,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from) if (iocb->ki_pos != 0) return -ESPIPE; - if (file->f_flags & O_NONBLOCK) + if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT)) msg.msg_flags = MSG_DONTWAIT; if (sock->type == SOCK_SEQPACKET) @@ -3267,6 +3267,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCSARP: case SIOCGARP: case SIOCDARP: + case SIOCOUTQNSD: case SIOCATMARK: return sock_do_ioctl(net, sock, cmd, arg); } diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c index 1d74d653e6c0..ad0dcb69395d 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seal.c +++ b/net/sunrpc/auth_gss/gss_krb5_seal.c @@ -63,6 +63,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index cc08cb1292a9..a457e7afb768 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1188,6 +1188,7 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, dprintk("RPC: No creds found!\n"); goto out; } else { + struct timespec64 boot; /* steal creds */ rsci.cred = ud->creds; @@ -1208,6 +1209,9 @@ static int gss_proxy_save_rsc(struct cache_detail *cd, &expiry, GFP_KERNEL); if (status) goto out; + + getboottime64(&boot); + expiry -= boot.tv_sec; } rsci.h.expiry_time = expiry; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 475b453dc7ae..556989b0b5fc 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -54,9 +54,6 @@ static void cache_init(struct cache_head *h, struct cache_detail *detail) h->last_refresh = now; } -static inline int cache_is_valid(struct cache_head *h); -static void cache_fresh_locked(struct cache_head *head, time_t expiry, - struct cache_detail *detail); static void cache_fresh_unlocked(struct cache_head *head, struct cache_detail *detail); @@ -101,9 +98,6 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail, if (cache_is_expired(detail, tmp)) { hlist_del_init(&tmp->cache_list); detail->entries --; - if (cache_is_valid(tmp) == -EAGAIN) - set_bit(CACHE_NEGATIVE, &tmp->flags); - cache_fresh_locked(tmp, 0, detail); freeme = tmp; break; } diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index f9db5fe52d36..aff76fb43430 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -99,37 +99,64 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list); } -static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue) -{ - struct list_head *q = &queue->tasks[queue->priority]; - struct rpc_task *task; - - if (!list_empty(q)) { - task = list_first_entry(q, struct rpc_task, u.tk_wait.list); - if (task->tk_owner == queue->owner) - list_move_tail(&task->u.tk_wait.list, q); - } -} - static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) { if (queue->priority != priority) { - /* Fairness: rotate the list when changing priority */ - rpc_rotate_queue_owner(queue); queue->priority = priority; + queue->nr = 1U << priority; } } -static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid) -{ - queue->owner = pid; - queue->nr = RPC_BATCH_COUNT; -} - static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue) { rpc_set_waitqueue_priority(queue, queue->maxpriority); - rpc_set_waitqueue_owner(queue, 0); +} + +/* + * Add a request to a queue list + */ +static void +__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task) +{ + struct rpc_task *t; + + list_for_each_entry(t, q, u.tk_wait.list) { + if (t->tk_owner == task->tk_owner) { + list_add_tail(&task->u.tk_wait.links, + &t->u.tk_wait.links); + /* Cache the queue head in task->u.tk_wait.list */ + task->u.tk_wait.list.next = q; + task->u.tk_wait.list.prev = NULL; + return; + } + } + INIT_LIST_HEAD(&task->u.tk_wait.links); + list_add_tail(&task->u.tk_wait.list, q); +} + +/* + * Remove request from a queue list + */ +static void +__rpc_list_dequeue_task(struct rpc_task *task) +{ + struct list_head *q; + struct rpc_task *t; + + if (task->u.tk_wait.list.prev == NULL) { + list_del(&task->u.tk_wait.links); + return; + } + if (!list_empty(&task->u.tk_wait.links)) { + t = list_first_entry(&task->u.tk_wait.links, + struct rpc_task, + u.tk_wait.links); + /* Assume __rpc_list_enqueue_task() cached the queue head */ + q = t->u.tk_wait.list.next; + list_add_tail(&t->u.tk_wait.list, q); + list_del(&task->u.tk_wait.links); + } + list_del(&task->u.tk_wait.list); } /* @@ -139,22 +166,9 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task, unsigned char queue_priority) { - struct list_head *q; - struct rpc_task *t; - - INIT_LIST_HEAD(&task->u.tk_wait.links); if (unlikely(queue_priority > queue->maxpriority)) queue_priority = queue->maxpriority; - if (queue_priority > queue->priority) - rpc_set_waitqueue_priority(queue, queue_priority); - q = &queue->tasks[queue_priority]; - list_for_each_entry(t, q, u.tk_wait.list) { - if (t->tk_owner == task->tk_owner) { - list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); - return; - } - } - list_add_tail(&task->u.tk_wait.list, q); + __rpc_list_enqueue_task(&queue->tasks[queue_priority], task); } /* @@ -194,13 +208,7 @@ static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, */ static void __rpc_remove_wait_queue_priority(struct rpc_task *task) { - struct rpc_task *t; - - if (!list_empty(&task->u.tk_wait.links)) { - t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list); - list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); - list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); - } + __rpc_list_dequeue_task(task); } /* @@ -212,7 +220,8 @@ static void __rpc_remove_wait_queue(struct rpc_wait_queue *queue, struct rpc_tas __rpc_disable_timer(queue, task); if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); - list_del(&task->u.tk_wait.list); + else + list_del(&task->u.tk_wait.list); queue->qlen--; dprintk("RPC: %5u removed from queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); @@ -481,17 +490,9 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q * Service a batch of tasks from a single owner. */ q = &queue->tasks[queue->priority]; - if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, u.tk_wait.list); - if (queue->owner == task->tk_owner) { - if (--queue->nr) - goto out; - list_move_tail(&task->u.tk_wait.list, q); - } - /* - * Check if we need to switch queues. - */ - goto new_owner; + if (!list_empty(q) && --queue->nr) { + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); + goto out; } /* @@ -503,7 +504,7 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q else q = q - 1; if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, u.tk_wait.list); + task = list_first_entry(q, struct rpc_task, u.tk_wait.list); goto new_queue; } } while (q != &queue->tasks[queue->priority]); @@ -513,8 +514,6 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q new_queue: rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0])); -new_owner: - rpc_set_waitqueue_owner(queue, task->tk_owner); out: return task; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d0282cc88b14..b852c34bb637 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -795,17 +795,11 @@ void xprt_connect(struct rpc_task *task) static void xprt_connect_status(struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt; - - if (task->tk_status == 0) { - xprt->stat.connect_count++; - xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; + switch (task->tk_status) { + case 0: dprintk("RPC: %5u xprt_connect_status: connection established\n", task->tk_pid); - return; - } - - switch (task->tk_status) { + break; case -ECONNREFUSED: case -ECONNRESET: case -ECONNABORTED: @@ -822,7 +816,7 @@ static void xprt_connect_status(struct rpc_task *task) default: dprintk("RPC: %5u xprt_connect_status: error %d connecting to " "server %s\n", task->tk_pid, -task->tk_status, - xprt->servername); + task->tk_rqstp->rq_xprt->servername); task->tk_status = -EIO; } } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 8cf5ccfe180d..b1b40a1be8c5 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -238,8 +238,12 @@ rpcrdma_connect_worker(struct work_struct *work) if (++xprt->connect_cookie == 0) /* maintain a reserved value */ ++xprt->connect_cookie; if (ep->rep_connected > 0) { - if (!xprt_test_and_set_connected(xprt)) + if (!xprt_test_and_set_connected(xprt)) { + xprt->stat.connect_count++; + xprt->stat.connect_time += (long)jiffies - + xprt->stat.connect_start; xprt_wake_pending_tasks(xprt, 0); + } } else { if (xprt_test_and_clear_connected(xprt)) xprt_wake_pending_tasks(xprt, -ENOTCONN); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 2aaf46599126..c5e991d14888 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -264,6 +264,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) ia->ri_device->name, sap, rpc_get_port(sap)); #endif + init_completion(&ia->ri_remove_done); set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); ep->rep_connected = -ENODEV; xprt_force_disconnect(&xprt->rx_xprt); @@ -319,7 +320,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, int rc; init_completion(&ia->ri_done); - init_completion(&ia->ri_remove_done); id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 05a58cc1b0cd..f75b5b7c1fc2 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -127,7 +127,7 @@ static struct ctl_table xs_tunables_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &xprt_min_resvport_limit, - .extra2 = &xprt_max_resvport + .extra2 = &xprt_max_resvport_limit }, { .procname = "max_resvport", @@ -135,7 +135,7 @@ static struct ctl_table xs_tunables_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &xprt_min_resvport, + .extra1 = &xprt_min_resvport_limit, .extra2 = &xprt_max_resvport_limit }, { @@ -1592,6 +1592,9 @@ static void xs_tcp_state_change(struct sock *sk) clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); xprt_clear_connecting(xprt); + xprt->stat.connect_count++; + xprt->stat.connect_time += (long)jiffies - + xprt->stat.connect_start; xprt_wake_pending_tasks(xprt, -EAGAIN); } spin_unlock(&xprt->transport_lock); @@ -1751,11 +1754,17 @@ static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); } -static unsigned short xs_get_random_port(void) +static int xs_get_random_port(void) { - unsigned short range = xprt_max_resvport - xprt_min_resvport + 1; - unsigned short rand = (unsigned short) prandom_u32() % range; - return rand + xprt_min_resvport; + unsigned short min = xprt_min_resvport, max = xprt_max_resvport; + unsigned short range; + unsigned short rand; + + if (max < min) + return -EADDRINUSE; + range = max - min + 1; + rand = (unsigned short) prandom_u32() % range; + return rand + min; } /** @@ -1812,9 +1821,9 @@ static void xs_set_srcport(struct sock_xprt *transport, struct socket *sock) transport->srcport = xs_sock_getport(sock); } -static unsigned short xs_get_srcport(struct sock_xprt *transport) +static int xs_get_srcport(struct sock_xprt *transport) { - unsigned short port = transport->srcport; + int port = transport->srcport; if (port == 0 && transport->xprt.resvport) port = xs_get_random_port(); @@ -1835,7 +1844,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_storage myaddr; int err, nloop = 0; - unsigned short port = xs_get_srcport(transport); + int port = xs_get_srcport(transport); unsigned short last; /* @@ -1853,8 +1862,8 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) * transport->xprt.resvport == 1) xs_get_srcport above will * ensure that port is non-zero and we will bind as needed. */ - if (port == 0) - return 0; + if (port <= 0) + return port; memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen); do { @@ -2008,8 +2017,6 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, } /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0); } @@ -2041,6 +2048,9 @@ static int xs_local_setup_socket(struct sock_xprt *transport) case 0: dprintk("RPC: xprt %p connected to %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + xprt->stat.connect_count++; + xprt->stat.connect_time += (long)jiffies - + xprt->stat.connect_start; xprt_set_connected(xprt); case -ENOBUFS: break; @@ -2361,8 +2371,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) xs_set_memalloc(xprt); /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state); ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); switch (ret) { @@ -3284,12 +3292,8 @@ static int param_set_uint_minmax(const char *val, static int param_set_portnr(const char *val, const struct kernel_param *kp) { - if (kp->arg == &xprt_min_resvport) - return param_set_uint_minmax(val, kp, - RPC_MIN_RESVPORT, - xprt_max_resvport); return param_set_uint_minmax(val, kp, - xprt_min_resvport, + RPC_MIN_RESVPORT, RPC_MAX_RESVPORT); } diff --git a/net/tipc/core.c b/net/tipc/core.c index 67ac10434ba2..35f162ece2b7 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -116,14 +116,6 @@ static int __init tipc_init(void) sysctl_tipc_rmem[1] = RCVBUF_DEF; sysctl_tipc_rmem[2] = RCVBUF_MAX; - err = tipc_netlink_start(); - if (err) - goto out_netlink; - - err = tipc_netlink_compat_start(); - if (err) - goto out_netlink_compat; - err = tipc_register_sysctl(); if (err) goto out_sysctl; @@ -144,8 +136,21 @@ static int __init tipc_init(void) if (err) goto out_bearer; + err = tipc_netlink_start(); + if (err) + goto out_netlink; + + err = tipc_netlink_compat_start(); + if (err) + goto out_netlink_compat; + pr_info("Started in single node mode\n"); return 0; + +out_netlink_compat: + tipc_netlink_stop(); +out_netlink: + tipc_bearer_cleanup(); out_bearer: unregister_pernet_device(&tipc_topsrv_net_ops); out_pernet_topsrv: @@ -155,22 +160,18 @@ out_socket: out_pernet: tipc_unregister_sysctl(); out_sysctl: - tipc_netlink_compat_stop(); -out_netlink_compat: - tipc_netlink_stop(); -out_netlink: pr_err("Unable to start in single node mode\n"); return err; } static void __exit tipc_exit(void) { + tipc_netlink_compat_stop(); + tipc_netlink_stop(); tipc_bearer_cleanup(); unregister_pernet_device(&tipc_topsrv_net_ops); tipc_socket_stop(); unregister_pernet_device(&tipc_net_ops); - tipc_netlink_stop(); - tipc_netlink_compat_stop(); tipc_unregister_sysctl(); pr_info("Deactivated\n"); diff --git a/net/tipc/link.c b/net/tipc/link.c index 631bfc7e9127..82e4e0e152d1 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -811,18 +811,31 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) */ void link_prepare_wakeup(struct tipc_link *l) { + struct sk_buff_head *wakeupq = &l->wakeupq; + struct sk_buff_head *inputq = l->inputq; struct sk_buff *skb, *tmp; - int imp, i = 0; + struct sk_buff_head tmpq; + int avail[5] = {0,}; + int imp = 0; - skb_queue_walk_safe(&l->wakeupq, skb, tmp) { + __skb_queue_head_init(&tmpq); + + for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) + avail[imp] = l->backlog[imp].limit - l->backlog[imp].len; + + skb_queue_walk_safe(wakeupq, skb, tmp) { imp = TIPC_SKB_CB(skb)->chain_imp; - if (l->backlog[imp].len < l->backlog[imp].limit) { - skb_unlink(skb, &l->wakeupq); - skb_queue_tail(l->inputq, skb); - } else if (i++ > 10) { - break; - } + if (avail[imp] <= 0) + continue; + avail[imp]--; + __skb_unlink(skb, wakeupq); + __skb_queue_tail(&tmpq, skb); } + + spin_lock_bh(&inputq->lock); + skb_queue_splice_tail(&tmpq, inputq); + spin_unlock_bh(&inputq->lock); + } void tipc_link_reset(struct tipc_link *l) @@ -1073,7 +1086,7 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, default: pr_warn("Dropping received illegal msg type\n"); kfree_skb(skb); - return false; + return true; }; } diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index ad4dcc663c6d..fa0522cd683e 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -539,7 +539,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg, if (len <= 0) return -EINVAL; - len = min_t(int, len, TIPC_MAX_BEARER_NAME); + len = min_t(int, len, TIPC_MAX_LINK_NAME); if (!string_is_valid(name, len)) return -EINVAL; @@ -821,7 +821,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd, if (len <= 0) return -EINVAL; - len = min_t(int, len, TIPC_MAX_BEARER_NAME); + len = min_t(int, len, TIPC_MAX_LINK_NAME); if (!string_is_valid(name, len)) return -EINVAL; @@ -974,6 +974,10 @@ static int tipc_nl_compat_publ_dump(struct tipc_nl_compat_msg *msg, u32 sock) hdr = genlmsg_put(args, 0, 0, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_PUBL_GET); + if (!hdr) { + kfree_skb(args); + return -EMSGSIZE; + } nest = nla_nest_start(args, TIPC_NLA_SOCK); if (!nest) { @@ -1021,8 +1025,11 @@ static int tipc_nl_compat_sk_dump(struct tipc_nl_compat_msg *msg, u32 node; struct nlattr *con[TIPC_NLA_CON_MAX + 1]; - nla_parse_nested(con, TIPC_NLA_CON_MAX, - sock[TIPC_NLA_SOCK_CON], NULL, NULL); + err = nla_parse_nested(con, TIPC_NLA_CON_MAX, + sock[TIPC_NLA_SOCK_CON], NULL, NULL); + + if (err) + return err; node = nla_get_u32(con[TIPC_NLA_CON_NODE]); tipc_tlv_sprintf(msg->rep, " connected to <%u.%u.%u:%u>", diff --git a/net/tipc/node.c b/net/tipc/node.c index 42e9bdcc4bb6..82f8f69f4d6b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -688,10 +688,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) { struct tipc_link_entry *le = &n->links[bearer_id]; + struct tipc_media_addr *maddr = NULL; struct tipc_link *l = le->link; - struct tipc_media_addr *maddr; - struct sk_buff_head xmitq; int old_bearer_id = bearer_id; + struct sk_buff_head xmitq; if (!l) return; @@ -713,7 +713,8 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_node_write_unlock(n); if (delete) tipc_mon_remove_peer(n->net, n->addr, old_bearer_id); - tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); tipc_sk_rcv(n->net, &le->inputq); } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f13fb607c563..d9ec6335c7dc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -487,7 +487,7 @@ static void __tipc_shutdown(struct socket *sock, int error) struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); - long timeout = CONN_TIMEOUT_DEFAULT; + long timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT); u32 dnode = tsk_peer_node(tsk); struct sk_buff *skb; @@ -714,14 +714,14 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, /* fall thru' */ case TIPC_LISTEN: case TIPC_CONNECTING: - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); break; case TIPC_OPEN: if (!tsk->cong_link_cnt) mask |= POLLOUT; if (tipc_sk_type_connectionless(sk) && - (!skb_queue_empty(&sk->sk_receive_queue))) + (!skb_queue_empty_lockless(&sk->sk_receive_queue))) mask |= (POLLIN | POLLRDNORM); break; case TIPC_DISCONNECTING: diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c index 1a779b1e8510..40f6d82083d7 100644 --- a/net/tipc/sysctl.c +++ b/net/tipc/sysctl.c @@ -37,6 +37,8 @@ #include +static int zero; +static int one = 1; static struct ctl_table_header *tipc_ctl_hdr; static struct ctl_table tipc_table[] = { @@ -45,14 +47,16 @@ static struct ctl_table tipc_table[] = { .data = &sysctl_tipc_rmem, .maxlen = sizeof(sysctl_tipc_rmem), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, }, { .procname = "named_timeout", .data = &sysctl_tipc_named_timeout, .maxlen = sizeof(sysctl_tipc_named_timeout), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, {} }; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 2adfcc6dec5a..091e93798eac 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -225,6 +225,8 @@ static inline void unix_release_addr(struct unix_address *addr) static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp) { + *hashp = 0; + if (len <= sizeof(short) || len > sizeof(*sunaddr)) return -EINVAL; if (!sunaddr || sunaddr->sun_family != AF_UNIX) @@ -642,6 +644,9 @@ static unsigned int unix_poll(struct file *, struct socket *, poll_table *); static unsigned int unix_dgram_poll(struct file *, struct socket *, poll_table *); static int unix_ioctl(struct socket *, unsigned int, unsigned long); +#ifdef CONFIG_COMPAT +static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); +#endif static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); @@ -683,6 +688,9 @@ static const struct proto_ops unix_stream_ops = { .getname = unix_getname, .poll = unix_poll, .ioctl = unix_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = unix_compat_ioctl, +#endif .listen = unix_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, @@ -706,6 +714,9 @@ static const struct proto_ops unix_dgram_ops = { .getname = unix_getname, .poll = unix_dgram_poll, .ioctl = unix_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = unix_compat_ioctl, +#endif .listen = sock_no_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, @@ -728,6 +739,9 @@ static const struct proto_ops unix_seqpacket_ops = { .getname = unix_getname, .poll = unix_dgram_poll, .ioctl = unix_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = unix_compat_ioctl, +#endif .listen = unix_listen, .shutdown = unix_shutdown, .setsockopt = sock_no_setsockopt, @@ -2648,6 +2662,13 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return err; } +#ifdef CONFIG_COMPAT +static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); +} +#endif + static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; @@ -2665,7 +2686,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table mask |= POLLRDHUP | POLLIN | POLLRDNORM; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ @@ -2693,7 +2714,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, mask = 0; /* exceptional events? */ - if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) + if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= POLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); @@ -2703,7 +2724,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, mask |= POLLHUP; /* readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= POLLIN | POLLRDNORM; /* Connection-based need to check for termination and startup */ diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 148c949cdfe7..73eac97e19fb 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -107,6 +107,7 @@ #include #include #include +#include #include #include #include @@ -487,9 +488,13 @@ out: static int __vsock_bind_stream(struct vsock_sock *vsk, struct sockaddr_vm *addr) { - static u32 port = LAST_RESERVED_PORT + 1; + static u32 port = 0; struct sockaddr_vm new_addr; + if (!port) + port = LAST_RESERVED_PORT + 1 + + prandom_u32_max(U32_MAX - LAST_RESERVED_PORT); + vsock_addr_init(&new_addr, addr->svm_cid, addr->svm_port); if (addr->svm_port == VMADDR_PORT_ANY) { @@ -880,7 +885,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, * the queue and write as long as the socket isn't shutdown for * sending. */ - if (!skb_queue_empty(&sk->sk_receive_queue) || + if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || (sk->sk_shutdown & RCV_SHUTDOWN)) { mask |= POLLIN | POLLRDNORM; } diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 6614512f8180..736b76ec8cf0 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -144,28 +144,15 @@ struct hvsock { **************************************************************************** * The only valid Service GUIDs, from the perspectives of both the host and * * Linux VM, that can be connected by the other end, must conform to this * - * format: -facb-11e6-bd58-64006a7986d3, and the "port" must be in * - * this range [0, 0x7FFFFFFF]. * + * format: -facb-11e6-bd58-64006a7986d3. * **************************************************************************** * * When we write apps on the host to connect(), the GUID ServiceID is used. * When we write apps in Linux VM to connect(), we only need to specify the * port and the driver will form the GUID and use that to request the host. * - * From the perspective of Linux VM: - * 1. the local ephemeral port (i.e. the local auto-bound port when we call - * connect() without explicit bind()) is generated by __vsock_bind_stream(), - * and the range is [1024, 0xFFFFFFFF). - * 2. the remote ephemeral port (i.e. the auto-generated remote port for - * a connect request initiated by the host's connect()) is generated by - * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF). */ -#define MAX_LISTEN_PORT ((u32)0x7FFFFFFF) -#define MAX_VM_LISTEN_PORT MAX_LISTEN_PORT -#define MAX_HOST_LISTEN_PORT MAX_LISTEN_PORT -#define MIN_HOST_EPHEMERAL_PORT (MAX_HOST_LISTEN_PORT + 1) - /* 00000000-facb-11e6-bd58-64006a7986d3 */ static const uuid_le srv_id_template = UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58, @@ -188,33 +175,6 @@ static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id) vsock_addr_init(addr, VMADDR_CID_ANY, port); } -static void hvs_remote_addr_init(struct sockaddr_vm *remote, - struct sockaddr_vm *local) -{ - static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT; - struct sock *sk; - - vsock_addr_init(remote, VMADDR_CID_ANY, VMADDR_PORT_ANY); - - while (1) { - /* Wrap around ? */ - if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT || - host_ephemeral_port == VMADDR_PORT_ANY) - host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT; - - remote->svm_port = host_ephemeral_port++; - - sk = vsock_find_connected_socket(remote, local); - if (!sk) { - /* Found an available ephemeral port */ - return; - } - - /* Release refcnt got in vsock_find_connected_socket */ - sock_put(sk); - } -} - static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan) { set_channel_pending_send_size(chan, @@ -342,12 +302,7 @@ static void hvs_open_connection(struct vmbus_channel *chan) if_type = &chan->offermsg.offer.if_type; if_instance = &chan->offermsg.offer.if_instance; conn_from_host = chan->offermsg.offer.u.pipe.user_def[0]; - - /* The host or the VM should only listen on a port in - * [0, MAX_LISTEN_PORT] - */ - if (!is_valid_srv_id(if_type) || - get_port_by_srv_id(if_type) > MAX_LISTEN_PORT) + if (!is_valid_srv_id(if_type)) return; hvs_addr_init(&addr, conn_from_host ? if_type : if_instance); @@ -372,6 +327,13 @@ static void hvs_open_connection(struct vmbus_channel *chan) new->sk_state = TCP_SYN_SENT; vnew = vsock_sk(new); + + hvs_addr_init(&vnew->local_addr, if_type); + + /* Remote peer is always the host */ + vsock_addr_init(&vnew->remote_addr, + VMADDR_CID_HOST, VMADDR_PORT_ANY); + vnew->remote_addr.svm_port = get_port_by_srv_id(if_instance); hvs_new = vnew->trans; hvs_new->chan = chan; } else { @@ -411,8 +373,6 @@ static void hvs_open_connection(struct vmbus_channel *chan) sk->sk_ack_backlog++; hvs_addr_init(&vnew->local_addr, if_type); - hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr); - hvs_new->vm_srv_id = *if_type; hvs_new->host_srv_id = *if_instance; @@ -717,16 +677,6 @@ static bool hvs_stream_is_active(struct vsock_sock *vsk) static bool hvs_stream_allow(u32 cid, u32 port) { - /* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is - * reserved as ephemeral ports, which are used as the host's ports - * when the host initiates connections. - * - * Perform this check in the guest so an immediate error is produced - * instead of a timeout. - */ - if (port > MAX_HOST_LISTEN_PORT) - return false; - if (cid == VMADDR_CID_HOST) return true; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index a8eb0657c1e8..d20f43057323 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -92,8 +92,17 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) struct virtio_vsock_pkt *pkt = opaque; struct af_vsockmon_hdr *hdr; struct sk_buff *skb; + size_t payload_len; + void *payload_buf; - skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len, + /* A packet could be split to fit the RX buffer, so we can retrieve + * the payload length from the header and the buffer pointer taking + * care of the offset in the original packet. + */ + payload_len = le32_to_cpu(pkt->hdr.len); + payload_buf = pkt->buf + pkt->off; + + skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len, GFP_ATOMIC); if (!skb) return NULL; @@ -133,8 +142,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); - if (pkt->len) { - skb_put_data(skb, pkt->buf, pkt->len); + if (payload_len) { + skb_put_data(skb, payload_buf, payload_len); } return skb; diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 63682176c96c..c4bd3ecef508 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -40,6 +40,8 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, cfg80211_sched_dfs_chan_update(rdev); } + schedule_work(&cfg80211_disconnect_work); + return err; } diff --git a/net/wireless/core.h b/net/wireless/core.h index 90f90c7d8bf9..507ec6446eb6 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -429,6 +429,8 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev); bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range, u32 center_freq_khz, u32 bw_khz); +extern struct work_struct cfg80211_disconnect_work; + /** * cfg80211_chandef_dfs_usable - checks if chandef is DFS usable * @wiphy: the wiphy to validate against diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index a9c0f368db5d..24e18405cdb4 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -7,9 +7,13 @@ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct wireless_dev *wdev = dev->ieee80211_ptr; + struct device *pdev = wiphy_dev(wdev->wiphy); - strlcpy(info->driver, wiphy_dev(wdev->wiphy)->driver->name, - sizeof(info->driver)); + if (pdev->driver) + strlcpy(info->driver, pdev->driver->name, + sizeof(info->driver)); + else + strlcpy(info->driver, "N/A", sizeof(info->driver)); strlcpy(info->version, init_utsname()->release, sizeof(info->version)); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0019b6a7bd49..657b72da056b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -283,7 +283,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ }, [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_MESH_ID_LEN }, - [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 }, + [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_BINARY, + .len = ETH_ALEN }, [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, @@ -320,6 +321,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 }, [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG }, [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, + [NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, [NL80211_ATTR_PID] = { .type = NLA_U32 }, @@ -345,6 +347,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, [NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_PLINK_STATE] = { .type = NLA_U8 }, + [NL80211_ATTR_MEASUREMENT_DURATION] = { .type = NLA_U16 }, + [NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY] = { .type = NLA_FLAG }, [NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 }, [NL80211_ATTR_REKEY_DATA] = { .type = NLA_NESTED }, [NL80211_ATTR_SCAN_SUPP_RATES] = { .type = NLA_NESTED }, @@ -393,6 +397,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_MDID] = { .type = NLA_U16 }, [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_CRIT_PROT_ID] = { .type = NLA_U16 }, + [NL80211_ATTR_MAX_CRIT_PROT_DURATION] = { .type = NLA_U16 }, [NL80211_ATTR_PEER_AID] = { .type = NLA_U16 }, [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 }, [NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG }, @@ -418,6 +424,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 }, [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 }, [NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 }, + [NL80211_ATTR_OPER_CLASS] = { .type = NLA_U8 }, [NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN }, [NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG }, [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 }, @@ -3118,7 +3125,7 @@ static void get_key_callback(void *c, struct key_params *params) params->cipher))) goto nla_put_failure; - if (nla_put_u8(cookie->msg, NL80211_ATTR_KEY_IDX, cookie->idx)) + if (nla_put_u8(cookie->msg, NL80211_KEY_IDX, cookie->idx)) goto nla_put_failure; nla_nest_end(cookie->msg, key); @@ -5506,6 +5513,9 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->del_mpath) return -EOPNOTSUPP; + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + return rdev_del_mpath(rdev, dev, dst); } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 84f23ae015fc..cb21d445a6ef 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -537,6 +537,10 @@ static inline int rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, u32 changed) { int ret; + + if (!rdev->ops->set_wiphy_params) + return -EOPNOTSUPP; + trace_rdev_set_wiphy_params(&rdev->wiphy, changed); ret = rdev->ops->set_wiphy_params(&rdev->wiphy, changed); trace_rdev_return_int(&rdev->wiphy, ret); @@ -1139,6 +1143,16 @@ rdev_start_radar_detection(struct cfg80211_registered_device *rdev, return ret; } +static inline void +rdev_end_cac(struct cfg80211_registered_device *rdev, + struct net_device *dev) +{ + trace_rdev_end_cac(&rdev->wiphy, dev); + if (rdev->ops->end_cac) + rdev->ops->end_cac(&rdev->wiphy, dev); + trace_rdev_return_void(&rdev->wiphy); +} + static inline int rdev_set_mcast_rate(struct cfg80211_registered_device *rdev, struct net_device *dev, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index b940d5c2003b..b95d1c2bdef7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1718,21 +1718,22 @@ static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) static void handle_channel_custom(struct wiphy *wiphy, struct ieee80211_channel *chan, - const struct ieee80211_regdomain *regd) + const struct ieee80211_regdomain *regd, + u32 min_bw) { u32 bw_flags = 0; const struct ieee80211_reg_rule *reg_rule = NULL; const struct ieee80211_power_rule *power_rule = NULL; u32 bw; - for (bw = MHZ_TO_KHZ(20); bw >= MHZ_TO_KHZ(5); bw = bw / 2) { + for (bw = MHZ_TO_KHZ(20); bw >= min_bw; bw = bw / 2) { reg_rule = freq_reg_info_regd(MHZ_TO_KHZ(chan->center_freq), regd, bw); if (!IS_ERR(reg_rule)) break; } - if (IS_ERR(reg_rule)) { + if (IS_ERR_OR_NULL(reg_rule)) { pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n", chan->center_freq); if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) { @@ -1781,8 +1782,14 @@ static void handle_band_custom(struct wiphy *wiphy, if (!sband) return; + /* + * We currently assume that you always want at least 20 MHz, + * otherwise channel 12 might get enabled if this rule is + * compatible to US, which permits 2402 - 2472 MHz. + */ for (i = 0; i < sband->n_channels; i++) - handle_channel_custom(wiphy, &sband->channels[i], regd); + handle_channel_custom(wiphy, &sband->channels[i], regd, + MHZ_TO_KHZ(20)); } /* Used by drivers prior to wiphy registration */ @@ -2703,8 +2710,54 @@ static void restore_regulatory_settings(bool reset_user) schedule_work(®_work); } +static bool is_wiphy_all_set_reg_flag(enum ieee80211_regulatory_flags flag) +{ + struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; + + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + wdev_lock(wdev); + if (!(wdev->wiphy->regulatory_flags & flag)) { + wdev_unlock(wdev); + return false; + } + wdev_unlock(wdev); + } + } + + return true; +} + void regulatory_hint_disconnect(void) { + /* Restore of regulatory settings is not required when wiphy(s) + * ignore IE from connected access point but clearance of beacon hints + * is required when wiphy(s) supports beacon hints. + */ + if (is_wiphy_all_set_reg_flag(REGULATORY_COUNTRY_IE_IGNORE)) { + struct reg_beacon *reg_beacon, *btmp; + + if (is_wiphy_all_set_reg_flag(REGULATORY_DISABLE_BEACON_HINTS)) + return; + + spin_lock_bh(®_pending_beacons_lock); + list_for_each_entry_safe(reg_beacon, btmp, + ®_pending_beacons, list) { + list_del(®_beacon->list); + kfree(reg_beacon); + } + spin_unlock_bh(®_pending_beacons_lock); + + list_for_each_entry_safe(reg_beacon, btmp, + ®_beacon_list, list) { + list_del(®_beacon->list); + kfree(reg_beacon); + } + + return; + } + pr_debug("All devices are disconnected, going to restore regulatory settings\n"); restore_regulatory_settings(false); } @@ -3250,6 +3303,25 @@ bool regulatory_pre_cac_allowed(struct wiphy *wiphy) return pre_cac_allowed; } +static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev) +{ + struct wireless_dev *wdev; + /* If we finished CAC or received radar, we should end any + * CAC running on the same channels. + * the check !cfg80211_chandef_dfs_usable contain 2 options: + * either all channels are available - those the CAC_FINISHED + * event has effected another wdev state, or there is a channel + * in unavailable state in wdev chandef - those the RADAR_DETECTED + * event has effected another wdev state. + * In both cases we should end the CAC on the wdev. + */ + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + if (wdev->cac_started && + !cfg80211_chandef_dfs_usable(&rdev->wiphy, &wdev->chandef)) + rdev_end_cac(rdev, wdev->netdev); + } +} + void regulatory_propagate_dfs_state(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state, @@ -3276,8 +3348,10 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy, cfg80211_set_dfs_state(&rdev->wiphy, chandef, dfs_state); if (event == NL80211_RADAR_DETECTED || - event == NL80211_RADAR_CAC_FINISHED) + event == NL80211_RADAR_CAC_FINISHED) { cfg80211_sched_dfs_chan_update(rdev); + cfg80211_check_and_end_cac(rdev); + } nl80211_radar_notify(rdev, chandef, event, NULL, GFP_KERNEL); } diff --git a/net/wireless/sme.c b/net/wireless/sme.c index d014aea07160..8344153800e2 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -642,11 +642,15 @@ static bool cfg80211_is_all_idle(void) * All devices must be idle as otherwise if you are actively * scanning some new beacon hints could be learned and would * count as new regulatory hints. + * Also if there is any other active beaconing interface we + * need not issue a disconnect hint and reset any info such + * as chan dfs state, etc. */ list_for_each_entry(rdev, &cfg80211_rdev_list, list) { list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { wdev_lock(wdev); - if (wdev->conn || wdev->current_bss) + if (wdev->conn || wdev->current_bss || + cfg80211_beaconing_iface_active(wdev)) is_all_idle = false; wdev_unlock(wdev); } @@ -663,7 +667,7 @@ static void disconnect_work(struct work_struct *work) rtnl_unlock(); } -static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); +DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); /* diff --git a/net/wireless/trace.h b/net/wireless/trace.h index c8c463bab79d..53093eb24b7e 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -607,6 +607,11 @@ DEFINE_EVENT(wiphy_netdev_evt, rdev_flush_pmksa, TP_ARGS(wiphy, netdev) ); +DEFINE_EVENT(wiphy_netdev_evt, rdev_end_cac, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), + TP_ARGS(wiphy, netdev) +); + DECLARE_EVENT_CLASS(station_add_change, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac, struct station_parameters *params), diff --git a/net/wireless/util.c b/net/wireless/util.c index 0f6c34ff9b55..935929b45411 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -652,7 +652,7 @@ __frame_add_frag(struct sk_buff *skb, struct page *page, struct skb_shared_info *sh = skb_shinfo(skb); int page_offset; - page_ref_inc(page); + get_page(page); page_offset = ptr - page_address(page); skb_add_rx_frag(skb, sh->nr_frags, page, page_offset, len, size); } @@ -1873,3 +1873,48 @@ EXPORT_SYMBOL(rfc1042_header); const unsigned char bridge_tunnel_header[] __aligned(2) = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; EXPORT_SYMBOL(bridge_tunnel_header); + +/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */ +struct iapp_layer2_update { + u8 da[ETH_ALEN]; /* broadcast */ + u8 sa[ETH_ALEN]; /* STA addr */ + __be16 len; /* 6 */ + u8 dsap; /* 0 */ + u8 ssap; /* 0 */ + u8 control; + u8 xid_info[3]; +} __packed; + +void cfg80211_send_layer2_update(struct net_device *dev, const u8 *addr) +{ + struct iapp_layer2_update *msg; + struct sk_buff *skb; + + /* Send Level 2 Update Frame to update forwarding tables in layer 2 + * bridge devices */ + + skb = dev_alloc_skb(sizeof(*msg)); + if (!skb) + return; + msg = skb_put(skb, sizeof(*msg)); + + /* 802.2 Type 1 Logical Link Control (LLC) Exchange Identifier (XID) + * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */ + + eth_broadcast_addr(msg->da); + ether_addr_copy(msg->sa, addr); + msg->len = htons(6); + msg->dsap = 0; + msg->ssap = 0x01; /* NULL LSAP, CR Bit: Response */ + msg->control = 0xaf; /* XID response lsb.1111F101. + * F=0 (no poll command; unsolicited frame) */ + msg->xid_info[0] = 0x81; /* XID format identifier */ + msg->xid_info[1] = 1; /* LLC types/classes: Type 1 LLC */ + msg->xid_info[2] = 0; /* XID sender's receive window size (RW) */ + + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_rx_ni(skb); +} +EXPORT_SYMBOL(cfg80211_send_layer2_update); diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 6cdb054484d6..5236a3c2c0cc 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -659,7 +659,8 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev) return NULL; } -static int iw_handler_get_iwstats(struct net_device * dev, +/* noinline to avoid a bogus warning with -O3 */ +static noinline int iw_handler_get_iwstats(struct net_device * dev, struct iw_request_info * info, union iwreq_data * wrqu, char * extra) diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index c67d7a82ab13..73fd0eae08ca 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -202,6 +202,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; + int ret = 0; /* call only for station! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) @@ -219,7 +220,10 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, if (ie) { data->flags = 1; data->length = ie[1]; - memcpy(ssid, ie + 2, data->length); + if (data->length > IW_ESSID_MAX_SIZE) + ret = -EINVAL; + else + memcpy(ssid, ie + 2, data->length); } rcu_read_unlock(); } else if (wdev->wext.connect.ssid && wdev->wext.connect.ssid_len) { @@ -229,7 +233,7 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, } wdev_unlock(wdev); - return 0; + return ret; } int cfg80211_mgd_wext_siwap(struct net_device *dev, diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 1b830a6ee3ff..f4fa33b84cde 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -100,7 +100,7 @@ int x25_parse_address_block(struct sk_buff *skb, } len = *skb->data; - needed = 1 + (len >> 4) + (len & 0x0f); + needed = 1 + ((len >> 4) + (len & 0x0f) + 1) / 2; if (!pskb_may_pull(skb, needed)) { /* packet is too short to hold the addresses it claims @@ -288,7 +288,7 @@ static struct sock *x25_find_listener(struct x25_address *addr, sk_for_each(s, &x25_list) if ((!strcmp(addr->x25_addr, x25_sk(s)->source_addr.x25_addr) || - !strcmp(addr->x25_addr, + !strcmp(x25_sk(s)->source_addr.x25_addr, null_x25_address.x25_addr)) && s->sk_state == TCP_LISTEN) { /* @@ -684,11 +684,15 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - len = strlen(addr->sx25_addr.x25_addr); - for (i = 0; i < len; i++) { - if (!isdigit(addr->sx25_addr.x25_addr[i])) { - rc = -EINVAL; - goto out; + /* check for the null_x25_address */ + if (strcmp(addr->sx25_addr.x25_addr, null_x25_address.x25_addr)) { + + len = strlen(addr->sx25_addr.x25_addr); + for (i = 0; i < len; i++) { + if (!isdigit(addr->sx25_addr.x25_addr[i])) { + rc = -EINVAL; + goto out; + } } } @@ -760,6 +764,10 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, if (sk->sk_state == TCP_ESTABLISHED) goto out; + rc = -EALREADY; /* Do nothing if call is already in progress */ + if (sk->sk_state == TCP_SYN_SENT) + goto out; + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; @@ -806,7 +814,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr, /* Now the loop */ rc = -EINPROGRESS; if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) - goto out_put_neigh; + goto out; rc = x25_wait_for_connection_establishment(sk); if (rc) diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 0a0273d21f66..2a4764b0183a 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -130,7 +130,7 @@ struct sec_path *secpath_dup(struct sec_path *src) sp->len = 0; sp->olen = 0; - memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH])); + memset(sp->ovec, 0, sizeof(sp->ovec)); if (src) { int i; @@ -245,6 +245,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) else XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEINVALID); + + if (encap_type == -1) + dev_put(skb->dev); goto drop; } diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 462a3568bc2c..1d1049a8c683 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -449,6 +449,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) x->type->destructor(x); xfrm_put_type(x->type); } + if (x->xfrag.page) + put_page(x->xfrag.page); xfrm_dev_state_free(x); security_xfrm_state_free(x); kfree(x); diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index c1dc632d4ea4..3460036621e4 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -184,7 +184,7 @@ all: $(LIBBPF) clean: $(MAKE) -C ../../ M=$(CURDIR) clean - @rm -f *~ + @find $(CURDIR) -type f -name '*~' -delete $(LIBBPF): FORCE $(MAKE) -C $(dir $@) $(notdir $@) diff --git a/samples/bpf/cgroup_helpers.c b/samples/bpf/cgroup_helpers.c index 09afaddfc9ba..b5c09cd6c7bd 100644 --- a/samples/bpf/cgroup_helpers.c +++ b/samples/bpf/cgroup_helpers.c @@ -43,7 +43,7 @@ */ int setup_cgroup_environment(void) { - char cgroup_workdir[PATH_MAX + 1]; + char cgroup_workdir[PATH_MAX - 24]; format_cgroup_path(cgroup_workdir, ""); diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c index f58acfc92556..f2f9dbc021b0 100644 --- a/samples/bpf/sockex2_kern.c +++ b/samples/bpf/sockex2_kern.c @@ -14,7 +14,7 @@ struct vlan_hdr { __be16 h_vlan_encapsulated_proto; }; -struct bpf_flow_keys { +struct flow_key_record { __be32 src; __be32 dst; union { @@ -59,7 +59,7 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) } static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto, - struct bpf_flow_keys *flow) + struct flow_key_record *flow) { __u64 verlen; @@ -83,7 +83,7 @@ static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto } static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto, - struct bpf_flow_keys *flow) + struct flow_key_record *flow) { *ip_proto = load_byte(skb, nhoff + offsetof(struct ipv6hdr, nexthdr)); @@ -96,7 +96,8 @@ static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_pro return nhoff; } -static inline bool flow_dissector(struct __sk_buff *skb, struct bpf_flow_keys *flow) +static inline bool flow_dissector(struct __sk_buff *skb, + struct flow_key_record *flow) { __u64 nhoff = ETH_HLEN; __u64 ip_proto; @@ -198,7 +199,7 @@ struct bpf_map_def SEC("maps") hash_map = { SEC("socket2") int bpf_prog2(struct __sk_buff *skb) { - struct bpf_flow_keys flow = {}; + struct flow_key_record flow = {}; struct pair *value; u32 key; diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c index 95907f8d2b17..c527b57d3ec8 100644 --- a/samples/bpf/sockex3_kern.c +++ b/samples/bpf/sockex3_kern.c @@ -61,7 +61,7 @@ struct vlan_hdr { __be16 h_vlan_encapsulated_proto; }; -struct bpf_flow_keys { +struct flow_key_record { __be32 src; __be32 dst; union { @@ -88,7 +88,7 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) } struct globals { - struct bpf_flow_keys flow; + struct flow_key_record flow; }; struct bpf_map_def SEC("maps") percpu_map = { @@ -114,14 +114,14 @@ struct pair { struct bpf_map_def SEC("maps") hash_map = { .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(struct bpf_flow_keys), + .key_size = sizeof(struct flow_key_record), .value_size = sizeof(struct pair), .max_entries = 1024, }; static void update_stats(struct __sk_buff *skb, struct globals *g) { - struct bpf_flow_keys key = g->flow; + struct flow_key_record key = g->flow; struct pair *value; value = bpf_map_lookup_elem(&hash_map, &key); diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index 495ee02e2fb7..741b899b693f 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -13,7 +13,7 @@ #define PARSE_IP_PROG_FD (prog_fd[0]) #define PROG_ARRAY_FD (map_fd[0]) -struct bpf_flow_keys { +struct flow_key_record { __be32 src; __be32 dst; union { @@ -64,7 +64,7 @@ int main(int argc, char **argv) (void) f; for (i = 0; i < 5; i++) { - struct bpf_flow_keys key = {}, next_key; + struct flow_key_record key = {}, next_key; struct pair value; sleep(1); diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c index 9149c524d279..8833aacb9c8c 100644 --- a/samples/bpf/syscall_tp_kern.c +++ b/samples/bpf/syscall_tp_kern.c @@ -50,13 +50,27 @@ static __always_inline void count(void *map) SEC("tracepoint/syscalls/sys_enter_open") int trace_enter_open(struct syscalls_enter_open_args *ctx) { - count((void *)&enter_open_map); + count(&enter_open_map); + return 0; +} + +SEC("tracepoint/syscalls/sys_enter_openat") +int trace_enter_open_at(struct syscalls_enter_open_args *ctx) +{ + count(&enter_open_map); return 0; } SEC("tracepoint/syscalls/sys_exit_open") int trace_enter_exit(struct syscalls_exit_open_args *ctx) { - count((void *)&exit_open_map); + count(&exit_open_map); + return 0; +} + +SEC("tracepoint/syscalls/sys_exit_openat") +int trace_enter_exit_at(struct syscalls_exit_open_args *ctx) +{ + count(&exit_open_map); return 0; } diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c index c7d525e5696e..8c7445874662 100644 --- a/samples/bpf/trace_event_user.c +++ b/samples/bpf/trace_event_user.c @@ -34,9 +34,9 @@ static void print_ksym(__u64 addr) return; sym = ksym_search(addr); printf("%s;", sym->name); - if (!strcmp(sym->name, "sys_read")) + if (!strstr(sym->name, "sys_read")) sys_read_seen = true; - else if (!strcmp(sym->name, "sys_write")) + else if (!strstr(sym->name, "sys_write")) sys_write_seen = true; } diff --git a/samples/mei/mei-amt-version.c b/samples/mei/mei-amt-version.c index bb9988914a56..32234481ad7d 100644 --- a/samples/mei/mei-amt-version.c +++ b/samples/mei/mei-amt-version.c @@ -370,7 +370,7 @@ static uint32_t amt_host_if_call(struct amt_host_if *acmd, unsigned int expected_sz) { uint32_t in_buf_sz; - uint32_t out_buf_sz; + ssize_t out_buf_sz; ssize_t written; uint32_t status; struct amt_host_if_resp_header *msg_hdr; diff --git a/samples/pktgen/functions.sh b/samples/pktgen/functions.sh index 205e4cde4601..065a7e296ee3 100644 --- a/samples/pktgen/functions.sh +++ b/samples/pktgen/functions.sh @@ -5,6 +5,8 @@ # Author: Jesper Dangaaard Brouer # License: GPL +set -o errexit + ## -- General shell logging cmds -- function err() { local exitcode=$1 @@ -58,6 +60,7 @@ function pg_set() { function proc_cmd() { local result local proc_file=$1 + local status=0 # after shift, the remaining args are contained in $@ shift local proc_ctrl=${PROC_DIR}/$proc_file @@ -73,13 +76,13 @@ function proc_cmd() { echo "cmd: $@ > $proc_ctrl" fi # Quoting of "$@" is important for space expansion - echo "$@" > "$proc_ctrl" - local status=$? + echo "$@" > "$proc_ctrl" || status=$? - result=$(grep "Result: OK:" $proc_ctrl) - # Due to pgctrl, cannot use exit code $? from grep - if [[ "$result" == "" ]]; then - grep "Result:" $proc_ctrl >&2 + if [[ "$proc_file" != "pgctrl" ]]; then + result=$(grep "Result: OK:" $proc_ctrl) || true + if [[ "$result" == "" ]]; then + grep "Result:" $proc_ctrl >&2 + fi fi if (( $status != 0 )); then err 5 "Write error($status) occurred cmd: \"$@ > $proc_ctrl\"" @@ -105,6 +108,8 @@ function pgset() { fi } +[[ $EUID -eq 0 ]] && trap 'pg_ctrl "reset"' EXIT + ## -- General shell tricks -- function root_check_run_with_sudo() { diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c index ca495686b9c3..f8c7249fa705 100644 --- a/samples/vfio-mdev/mtty.c +++ b/samples/vfio-mdev/mtty.c @@ -171,7 +171,7 @@ static struct mdev_state *find_mdev_state_by_uuid(uuid_le uuid) return NULL; } -void dump_buffer(char *buf, uint32_t count) +void dump_buffer(u8 *buf, uint32_t count) { #if defined(DEBUG) int i; @@ -250,7 +250,7 @@ static void mtty_create_config_space(struct mdev_state *mdev_state) } static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset, - char *buf, u32 count) + u8 *buf, u32 count) { u32 cfg_addr, bar_mask, bar_index = 0; @@ -304,7 +304,7 @@ static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset, } static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, - u16 offset, char *buf, u32 count) + u16 offset, u8 *buf, u32 count) { u8 data = *buf; @@ -475,7 +475,7 @@ static void handle_bar_write(unsigned int index, struct mdev_state *mdev_state, } static void handle_bar_read(unsigned int index, struct mdev_state *mdev_state, - u16 offset, char *buf, u32 count) + u16 offset, u8 *buf, u32 count) { /* Handle read requests by guest */ switch (offset) { @@ -650,7 +650,7 @@ static void mdev_read_base(struct mdev_state *mdev_state) } } -static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, +static ssize_t mdev_access(struct mdev_device *mdev, u8 *buf, size_t count, loff_t pos, bool is_write) { struct mdev_state *mdev_state; @@ -698,7 +698,7 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, #if defined(DEBUG_REGS) pr_info("%s: BAR%d WR @0x%llx %s val:0x%02x dlab:%d\n", __func__, index, offset, wr_reg[offset], - (u8)*buf, mdev_state->s[index].dlab); + *buf, mdev_state->s[index].dlab); #endif handle_bar_write(index, mdev_state, offset, buf, count); } else { @@ -708,7 +708,7 @@ static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, #if defined(DEBUG_REGS) pr_info("%s: BAR%d RD @0x%llx %s val:0x%02x dlab:%d\n", __func__, index, offset, rd_reg[offset], - (u8)*buf, mdev_state->s[index].dlab); + *buf, mdev_state->s[index].dlab); #endif } break; @@ -827,7 +827,7 @@ ssize_t mtty_read(struct mdev_device *mdev, char __user *buf, size_t count, if (count >= 4 && !(*ppos % 4)) { u32 val; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev, (u8 *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -839,7 +839,7 @@ ssize_t mtty_read(struct mdev_device *mdev, char __user *buf, size_t count, } else if (count >= 2 && !(*ppos % 2)) { u16 val; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev, (u8 *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -851,7 +851,7 @@ ssize_t mtty_read(struct mdev_device *mdev, char __user *buf, size_t count, } else { u8 val; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev, (u8 *)&val, sizeof(val), *ppos, false); if (ret <= 0) goto read_err; @@ -889,7 +889,7 @@ ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev, (u8 *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; @@ -901,7 +901,7 @@ ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev, (u8 *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; @@ -913,7 +913,7 @@ ssize_t mtty_write(struct mdev_device *mdev, const char __user *buf, if (copy_from_user(&val, buf, sizeof(val))) goto write_err; - ret = mdev_access(mdev, (char *)&val, sizeof(val), + ret = mdev_access(mdev, (u8 *)&val, sizeof(val), *ppos, true); if (ret <= 0) goto write_err; diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 48622d8c0e31..392ad77e8423 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -220,7 +220,7 @@ cmd_modversions_c = \ > $(@D)/$(@F).symversions; \ fi; \ else \ - if $(LLVM_DIS) -o=- $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ + if $(LLVM_NM) $(@D)/.tmp_$(@F) | grep -q __ksymtab; then \ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ > $(@D)/$(@F).symversions; \ fi; \ @@ -594,7 +594,7 @@ cmd_export_list = \ $(filter_export_list) \ >>$(ksyms-lds); \ else \ - $(LLVM_DIS) -o=- $$o | \ + $(LLVM_NM) $$o | \ $(filter_export_list) \ >>$(ksyms-lds); \ fi; \ diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index b2a95af7df18..708c8f6a5717 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -14,16 +14,12 @@ ifdef CONFIG_GCC_PLUGINS endif ifdef CONFIG_GCC_PLUGIN_SANCOV - ifeq ($(CFLAGS_KCOV),) # It is needed because of the gcc-plugin.sh and gcc version checks. gcc-plugin-$(CONFIG_GCC_PLUGIN_SANCOV) += sancov_plugin.so - ifneq ($(PLUGINCC),) - CFLAGS_KCOV := $(SANCOV_PLUGIN) - else + ifeq ($(PLUGINCC),) $(warning warning: cannot use CONFIG_KCOV: -fsanitize-coverage=trace-pc is not supported by compiler) endif - endif endif gcc-plugin-$(CONFIG_GCC_PLUGIN_STRUCTLEAK) += structleak_plugin.so @@ -38,7 +34,7 @@ ifdef CONFIG_GCC_PLUGINS GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y)) export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN GCC_PLUGIN_SUBDIR - export SANCOV_PLUGIN DISABLE_LATENT_ENTROPY_PLUGIN + export DISABLE_LATENT_ENTROPY_PLUGIN ifneq ($(PLUGINCC),) # SANCOV_PLUGIN can be only in CFLAGS_KCOV because avoid duplication. diff --git a/scripts/Makefile.kcov b/scripts/Makefile.kcov new file mode 100644 index 000000000000..945724d226e8 --- /dev/null +++ b/scripts/Makefile.kcov @@ -0,0 +1,25 @@ +ifdef CONFIG_KCOV + +ifeq ($(call cc-option, -fsanitize-coverage=trace-pc -Werror),) + ifneq ($(CONFIG_COMPILE_TEST),y) + $(error Cannot use CONFIG_KCOV: \ + -fsanitize-coverage=trace-pc is not supported by compiler) + endif +endif + +ifdef CONFIG_KCOV_ENABLE_COMPARISONS + ifeq ($(call cc-option, -fsanitize-coverage=trace-cmp -Werror),) + ifneq ($(CONFIG_COMPILE_TEST),y) + $(error Cannot use CONFIG_KCOV_ENABLE_COMPARISONS: \ + -fsanitize-coverage=trace-cmp is not supported by compiler) + endif + endif +endif + +kcov-flags-$(CONFIG_CC_HAS_SANCOV_TRACE_PC) += -fsanitize-coverage=trace-pc +kcov-flags-$(CONFIG_KCOV_ENABLE_COMPARISONS) += -fsanitize-coverage=trace-cmp +kcov-flags-$(CONFIG_GCC_PLUGIN_SANCOV) += -fplugin=$(objtree)/scripts/gcc-plugins/sancov_plugin.so + +export CFLAGS_KCOV := $(kcov-flags-y) + +endif diff --git a/scripts/gcc-plugins/Makefile b/scripts/gcc-plugins/Makefile index e2ff425f4c7e..ea465799ced5 100644 --- a/scripts/gcc-plugins/Makefile +++ b/scripts/gcc-plugins/Makefile @@ -13,10 +13,6 @@ else export HOST_EXTRACXXFLAGS endif -ifneq ($(CFLAGS_KCOV), $(SANCOV_PLUGIN)) - GCC_PLUGIN := $(filter-out $(SANCOV_PLUGIN), $(GCC_PLUGIN)) -endif - export HOSTLIBS $(obj)/randomize_layout_plugin.o: $(objtree)/$(obj)/randomize_layout_seed.h diff --git a/scripts/gdb/linux/symbols.py b/scripts/gdb/linux/symbols.py index 004b0ac7fa72..4644f1a83b57 100644 --- a/scripts/gdb/linux/symbols.py +++ b/scripts/gdb/linux/symbols.py @@ -99,7 +99,8 @@ lx-symbols command.""" attrs[n]['name'].string(): attrs[n]['address'] for n in range(int(sect_attrs['nsections']))} args = [] - for section_name in [".data", ".data..read_mostly", ".rodata", ".bss"]: + for section_name in [".data", ".data..read_mostly", ".rodata", ".bss", + ".text", ".text.hot", ".text.unlikely"]: address = section_name_to_address.get(section_name) if address: args.append(" -s {name} {addr}".format( diff --git a/scripts/generate_initcall_order.pl b/scripts/generate_initcall_order.pl new file mode 100755 index 000000000000..f772b4a01caa --- /dev/null +++ b/scripts/generate_initcall_order.pl @@ -0,0 +1,250 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 +# +# Generates a linker script that specifies the correct initcall order. +# +# Copyright (C) 2019 Google LLC + +use strict; +use warnings; +use IO::Handle; + +my $nm = $ENV{'LLVM_NM'} || "llvm-nm"; +my $ar = $ENV{'AR'} || "llvm-ar"; +my $objtree = $ENV{'objtree'} || "."; + +## list of all object files to process, in link order +my @objects; +## currently active child processes +my $jobs = {}; # child process pid -> file handle +## results from child processes +my $results = {}; # object index -> { level, function } + +## reads _NPROCESSORS_ONLN to determine the number of processes to start +sub get_online_processors { + open(my $fh, "getconf _NPROCESSORS_ONLN 2>/dev/null |") + or die "$0: failed to execute getconf: $!"; + my $procs = <$fh>; + close($fh); + + if (!($procs =~ /^\d+$/)) { + return 1; + } + + return int($procs); +} + +## finds initcalls defined in an object file, parses level and function name, +## and prints it out to the parent process +sub find_initcalls { + my ($object) = @_; + + die "$0: object file $object doesn't exist?" if (! -f $object); + + open(my $fh, "\"$nm\" -just-symbol-name -defined-only \"$object\" 2>/dev/null |") + or die "$0: failed to execute \"$nm\": $!"; + + my $initcalls = {}; + + while (<$fh>) { + chomp; + + my ($counter, $line, $symbol) = $_ =~ /^__initcall_(\d+)_(\d+)_(.*)$/; + + if (!defined($counter) || !defined($line) || !defined($symbol)) { + next; + } + + my ($function, $level) = $symbol =~ + /^(.*)((early|rootfs|con|security|[0-9])s?)$/; + + die "$0: duplicate initcall counter value in object $object: $_" + if exists($initcalls->{$counter}); + + $initcalls->{$counter} = { + 'level' => $level, + 'line' => $line, + 'function' => $function + }; + } + + close($fh); + + # sort initcalls in each object file numerically by the counter value + # to ensure they are in the order they were defined + foreach my $counter (sort { $a <=> $b } keys(%{$initcalls})) { + print $initcalls->{$counter}->{"level"} . " " . + $counter . " " . + $initcalls->{$counter}->{"line"} . " " . + $initcalls->{$counter}->{"function"} . "\n"; + } +} + +## waits for any child process to complete, reads the results, and adds them to +## the $results array for later processing +sub wait_for_results { + my $pid = wait(); + if ($pid > 0) { + my $fh = $jobs->{$pid}; + + # the child process prints out results in the following format: + # line 1: + # line 2..n: + + my $index = <$fh>; + chomp($index); + + if (!($index =~ /^\d+$/)) { + die "$0: child $pid returned an invalid index: $index"; + } + $index = int($index); + + while (<$fh>) { + chomp; + my ($level, $counter, $line, $function) = $_ =~ + /^([^\ ]+)\ (\d+)\ (\d+)\ (.*)$/; + + if (!defined($level) || + !defined($counter) || + !defined($line) || + !defined($function)) { + die "$0: child $pid returned invalid data"; + } + + if (!exists($results->{$index})) { + $results->{$index} = []; + } + + push (@{$results->{$index}}, { + 'level' => $level, + 'counter' => $counter, + 'line' => $line, + 'function' => $function + }); + } + + close($fh); + delete($jobs->{$pid}); + } +} + +## launches child processes to find initcalls from the object files, waits for +## each process to complete and collects the results +sub process_objects { + my $index = 0; # link order index of the object file + my $njobs = get_online_processors(); + + while (scalar(@objects) > 0) { + my $object = shift(@objects); + + # fork a child process and read it's stdout + my $pid = open(my $fh, '-|'); + + if (!defined($pid)) { + die "$0: failed to fork: $!"; + } elsif ($pid) { + # save the child process pid and the file handle + $jobs->{$pid} = $fh; + } else { + STDOUT->autoflush(1); + print "$index\n"; + find_initcalls("$objtree/$object"); + exit; + } + + $index++; + + # if we reached the maximum number of processes, wait for one + # to complete before launching new ones + if (scalar(keys(%{$jobs})) >= $njobs && scalar(@objects) > 0) { + wait_for_results(); + } + } + + # wait for the remaining children to complete + while (scalar(keys(%{$jobs})) > 0) { + wait_for_results(); + } +} + +## gets a list of actual object files from thin archives, and adds them to +## @objects in link order +sub find_objects { + while (my $file = shift(@ARGV)) { + my $pid = open (my $fh, "\"$ar\" t \"$file\" 2>/dev/null |") + or die "$0: failed to execute $ar: $!"; + + my @output; + + while (<$fh>) { + chomp; + push(@output, $_); + } + + close($fh); + + # if $ar failed, assume we have an object file + if ($? != 0) { + push(@objects, $file); + next; + } + + # if $ar succeeded, read the list of object files + foreach (@output) { + push(@objects, $_); + } + } +} + +## START +find_objects(); +process_objects(); + +## process results and add them to $sections in the correct order +my $sections = {}; + +foreach my $index (sort { $a <=> $b } keys(%{$results})) { + foreach my $result (@{$results->{$index}}) { + my $level = $result->{'level'}; + + if (!exists($sections->{$level})) { + $sections->{$level} = []; + } + + my $fsname = $result->{'counter'} . '_' . + $result->{'line'} . '_' . + $result->{'function'}; + + push(@{$sections->{$level}}, $fsname); + } +} + +if (!keys(%{$sections})) { + exit(0); # no initcalls...? +} + +## print out a linker script that defines the order of initcalls for each +## level +print "SECTIONS {\n"; + +foreach my $level (sort(keys(%{$sections}))) { + my $section; + + if ($level eq 'con') { + $section = '.con_initcall.init'; + } elsif ($level eq 'security') { + $section = '.security_initcall.init'; + } else { + $section = ".initcall${level}.init"; + } + + print "\t${section} : {\n"; + + foreach my $fsname (@{$sections->{$level}}) { + print "\t\t*(${section}..${fsname}) ;\n" + } + + print "\t}\n"; +} + +print "}\n"; diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index b471022c8162..b43531899648 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -510,6 +510,8 @@ static void build_initial_tok_table(void) table[pos] = table[i]; learn_symbol(table[pos].sym, table[pos].len); pos++; + } else { + free(table[i].sym); } } table_cnt = pos; diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 27aac273205b..fa423fcd1a92 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -1238,7 +1238,7 @@ bool conf_set_all_new_symbols(enum conf_def_mode mode) sym_calc_value(csym); if (mode == def_random) - has_changed = randomize_choice_values(csym); + has_changed |= randomize_choice_values(csym); else { set_all_choice_values(csym); has_changed = true; diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c index ed29bad1f03a..96420b620963 100644 --- a/scripts/kconfig/expr.c +++ b/scripts/kconfig/expr.c @@ -201,6 +201,13 @@ static int expr_eq(struct expr *e1, struct expr *e2) { int res, old_count; + /* + * A NULL expr is taken to be yes, but there's also a different way to + * represent yes. expr_is_yes() checks for either representation. + */ + if (!e1 || !e2) + return expr_is_yes(e1) && expr_is_yes(e2); + if (e1->type != e2->type) return 0; switch (e1->type) { diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index cfa44718cef7..ecba415423ec 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -70,29 +70,30 @@ archive_builtin() fi } -# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into -# .tmp_symversions -modversions() +# If CONFIG_LTO_CLANG is selected, generate a linker script to ensure correct +# ordering of initcalls, and with CONFIG_MODVERSIONS also enabled, collect the +# previously generated symbol versions into the same script. +lto_lds() { if [ -z "${CONFIG_LTO_CLANG}" ]; then return fi - if [ -z "${CONFIG_MODVERSIONS}" ]; then - return + ${srctree}/scripts/generate_initcall_order.pl \ + built-in.o ${KBUILD_VMLINUX_LIBS} \ + > .tmp_lto.lds + + if [ -n "${CONFIG_MODVERSIONS}" ]; then + for a in built-in.o ${KBUILD_VMLINUX_LIBS}; do + for o in $(${AR} t $a); do + if [ -f ${o}.symversions ]; then + cat ${o}.symversions >> .tmp_lto.lds + fi + done + done fi - rm -f .tmp_symversions - - for a in built-in.o ${KBUILD_VMLINUX_LIBS}; do - for o in $(${AR} t $a); do - if [ -f ${o}.symversions ]; then - cat ${o}.symversions >> .tmp_symversions - fi - done - done - - echo "-T .tmp_symversions" + echo "-T .tmp_lto.lds" } # Link of vmlinux.o used for section mismatch analysis @@ -124,7 +125,7 @@ modpost_link() info LD vmlinux.o fi - ${LD} ${LDFLAGS} -r -o ${1} $(modversions) ${objects} + ${LD} ${LDFLAGS} -r -o ${1} $(lto_lds) ${objects} } # If CONFIG_LTO_CLANG is selected, we postpone running recordmcount until @@ -251,7 +252,7 @@ cleanup() rm -f .tmp_System.map rm -f .tmp_kallsyms* rm -f .tmp_version - rm -f .tmp_symversions + rm -f .tmp_lto.lds rm -f .tmp_vmlinux* rm -f built-in.o rm -f System.map diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 30e0aa4665ca..b522afbc3a3b 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1176,6 +1176,15 @@ static const struct sectioncheck *section_mismatch( * refsymname = *.constprop.* * * Pattern 6: + * Hide section mismatch warnings for ELF local symbols. The goal + * is to eliminate false positive modpost warnings caused by + * compiler-generated ELF local symbol names such as ".LANCHOR1". + * Autogenerated symbol names bypass modpost's "Pattern 2" + * whitelisting, which relies on pattern-matching against symbol + * names to work. (One situation where gcc can autogenerate ELF + * local symbols is when "-fsection-anchors" is used.) + * + * Pattern 7: * With CONFIG_CFI_CLANG, clang appends .cfi to all indirectly called * functions and creates a function stub with the original name. This * stub is always placed in .text, even if the actual function with the @@ -1224,6 +1233,10 @@ static int secref_whitelist(const struct sectioncheck *mismatch, return 0; /* Check for pattern 6 */ + if (strstarts(fromsym, ".L")) + return 0; + + /* Check for pattern 7 */ if (match(fromsec, text_sections) && match(tosec, init_exit_sections) && match(tosym, cfi_symbols)) diff --git a/scripts/module-lto.lds b/scripts/module-lto.lds new file mode 100644 index 000000000000..f5ee544a877d --- /dev/null +++ b/scripts/module-lto.lds @@ -0,0 +1,22 @@ +/* + * With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and + * -ffunction-sections, which increases the size of the final module. + * Merge the split sections in the final binary. + */ +SECTIONS { + /* + * LLVM may emit .eh_frame with CONFIG_CFI_CLANG despite + * -fno-asynchronous-unwind-tables. Discard the section. + */ + /DISCARD/ : { + *(.eh_frame) + } + + .bss : { *(.bss .bss[.0-9a-zA-Z_]*) } + .data : { *(.data .data[.0-9a-zA-Z_]*) } + .rela.data : { *(.rela.data .rela.data[.0-9a-zA-Z_]*) } + .rela.rodata : { *(.rela.rodata .rela.rodata[.0-9a-zA-Z_]*) } + .rela.text : { *(.rela.text .rela.text[.0-9a-zA-Z_]*) } + .rodata : { *(.rodata .rodata[.0-9a-zA-Z_]*) } + .text : { *(.text .text[.0-9a-zA-Z_]*) } +} diff --git a/scripts/namespace.pl b/scripts/namespace.pl index 729c547fc9e1..30c43e639db8 100755 --- a/scripts/namespace.pl +++ b/scripts/namespace.pl @@ -65,13 +65,14 @@ use warnings; use strict; use File::Find; +use File::Spec; my $nm = ($ENV{'NM'} || "nm") . " -p"; my $objdump = ($ENV{'OBJDUMP'} || "objdump") . " -s -j .comment"; -my $srctree = ""; -my $objtree = ""; -$srctree = "$ENV{'srctree'}/" if (exists($ENV{'srctree'})); -$objtree = "$ENV{'objtree'}/" if (exists($ENV{'objtree'})); +my $srctree = File::Spec->curdir(); +my $objtree = File::Spec->curdir(); +$srctree = File::Spec->rel2abs($ENV{'srctree'}) if (exists($ENV{'srctree'})); +$objtree = File::Spec->rel2abs($ENV{'objtree'}) if (exists($ENV{'objtree'})); if ($#ARGV != -1) { print STDERR "usage: $0 takes no parameters\n"; @@ -231,9 +232,9 @@ sub do_nm } ($source = $basename) =~ s/\.o$//; if (-e "$source.c" || -e "$source.S") { - $source = "$objtree$File::Find::dir/$source"; + $source = File::Spec->catfile($objtree, $File::Find::dir, $source) } else { - $source = "$srctree$File::Find::dir/$source"; + $source = File::Spec->catfile($srctree, $File::Find::dir, $source) } if (! -e "$source.c" && ! -e "$source.S") { # No obvious source, exclude the object if it is conglomerate diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 69a769904da7..d4e674f6ffaf 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -53,6 +53,10 @@ #define R_AARCH64_ABS64 257 #endif +#define R_ARM_PC24 1 +#define R_ARM_THM_CALL 10 +#define R_ARM_CALL 28 + static int fd_map; /* File descriptor for file being modified. */ static int mmap_failed; /* Boolean flag. */ static char gpfx; /* prefix for global symbol name (sometimes '_') */ @@ -429,6 +433,18 @@ is_mcounted_section_name(char const *const txtname) #define RECORD_MCOUNT_64 #include "recordmcount.h" +static int arm_is_fake_mcount(Elf32_Rel const *rp) +{ + switch (ELF32_R_TYPE(w(rp->r_info))) { + case R_ARM_THM_CALL: + case R_ARM_CALL: + case R_ARM_PC24: + return 0; + } + + return 1; +} + /* 64-bit EM_MIPS has weird ELF64_Rela.r_info. * http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf * We interpret Table 29 Relocation Operation (Elf64_Rel, Elf64_Rela) [p.40] @@ -530,6 +546,7 @@ do_file(char const *const fname) altmcount = "__gnu_mcount_nc"; make_nop = make_nop_arm; rel_type_nop = R_ARM_NONE; + is_fake_mcount32 = arm_is_fake_mcount; break; case EM_AARCH64: reltype = R_AARCH64_ABS64; diff --git a/scripts/setlocalversion b/scripts/setlocalversion index 71f39410691b..365b3c2b8f43 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -73,8 +73,16 @@ scm_version() printf -- '-svn%s' "`git svn find-rev $head`" fi - # Check for uncommitted changes - if git diff-index --name-only HEAD | grep -qv "^scripts/package"; then + # Check for uncommitted changes. + # First, with git-status, but --no-optional-locks is only + # supported in git >= 2.14, so fall back to git-diff-index if + # it fails. Note that git-diff-index does not refresh the + # index, so it may give misleading results. See + # git-update-index(1), git-diff-index(1), and git-status(1). + if { + git --no-optional-locks status -uno --porcelain 2>/dev/null || + git diff-index --name-only HEAD + } | grep -qvE '^(.. )?scripts/package'; then printf '%s' -dirty fi diff --git a/security/Kconfig b/security/Kconfig index 8b6c5e9528e0..3a66cd8363c0 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -18,15 +18,6 @@ config SECURITY_DMESG_RESTRICT If you are unsure how to answer this question, answer N. -config SECURITY_PERF_EVENTS_RESTRICT - bool "Restrict unprivileged use of performance events" - depends on PERF_EVENTS - help - If you say Y here, the kernel.perf_event_paranoid sysctl - will be set to 3 by default, and no unprivileged use of the - perf_event_open syscall will be permitted unless it is - changed. - config SECURITY bool "Enable different security models" depends on SYSFS diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index dd746bd69a9b..c106988c1b25 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -363,6 +363,7 @@ static void aafs_remove(struct dentry *dentry) simple_rmdir(dir, dentry); else simple_unlink(dir, dentry); + d_delete(dentry); dput(dentry); } inode_unlock(dir); diff --git a/security/apparmor/include/context.h b/security/apparmor/include/context.h index 6ae07e9aaa17..812cdec9dd3b 100644 --- a/security/apparmor/include/context.h +++ b/security/apparmor/include/context.h @@ -191,6 +191,8 @@ static inline struct aa_label *begin_current_label_crit_section(void) { struct aa_label *label = aa_current_raw_label(); + might_sleep(); + if (label_is_stale(label)) { label = aa_get_newest_label(label); if (aa_replace_current_label(label) == 0) diff --git a/security/apparmor/label.c b/security/apparmor/label.c index c5b99b954580..ea63710442ae 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1463,11 +1463,13 @@ static inline bool use_label_hname(struct aa_ns *ns, struct aa_label *label, /* helper macro for snprint routines */ #define update_for_len(total, len, size, str) \ do { \ + size_t ulen = len; \ + \ AA_BUG(len < 0); \ - total += len; \ - len = min(len, size); \ - size -= len; \ - str += len; \ + total += ulen; \ + ulen = min(ulen, size); \ + size -= ulen; \ + str += ulen; \ } while (0) /** @@ -1602,7 +1604,7 @@ int aa_label_snxprint(char *str, size_t size, struct aa_ns *ns, struct aa_ns *prev_ns = NULL; struct label_it i; int count = 0, total = 0; - size_t len; + ssize_t len; AA_BUG(!str && size != 0); AA_BUG(!label); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 17893fde4487..c58df3375390 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -108,12 +108,12 @@ static int apparmor_ptrace_access_check(struct task_struct *child, struct aa_label *tracer, *tracee; int error; - tracer = begin_current_label_crit_section(); + tracer = __begin_current_label_crit_section(); tracee = aa_get_task_label(child); error = aa_may_ptrace(tracer, tracee, mode == PTRACE_MODE_READ ? AA_PTRACE_READ : AA_PTRACE_TRACE); aa_put_label(tracee); - end_current_label_crit_section(tracer); + __end_current_label_crit_section(tracer); return error; } diff --git a/security/keys/internal.h b/security/keys/internal.h index e3a573840186..dcb4a110a036 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -158,8 +158,6 @@ extern struct key *request_key_and_link(struct key_type *type, extern bool lookup_user_key_possessed(const struct key *key, const struct key_match_data *match_data); -extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, - key_perm_t perm); #define KEY_LOOKUP_CREATE 0x01 #define KEY_LOOKUP_PARTIAL 0x02 #define KEY_LOOKUP_FOR_UNLINK 0x04 diff --git a/security/keys/key.c b/security/keys/key.c index 87172f99f73e..17244f5f54c6 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -297,6 +297,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->gid = gid; key->perm = perm; key->restrict_link = restrict_link; + key->last_used_at = ktime_get_real_seconds(); if (!(flags & KEY_ALLOC_NOT_IN_QUOTA)) key->flags |= 1 << KEY_FLAG_IN_QUOTA; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 5f2993ab2d50..8968a3db1add 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -756,6 +756,7 @@ reget_creds: put_cred(ctx.cred); goto try_again; } +EXPORT_SYMBOL(lookup_user_key); /* * Join the named keyring as the session keyring if possible else attempt to diff --git a/security/security.c b/security/security.c index fb4910f0d0e2..5afd1dc81511 100644 --- a/security/security.c +++ b/security/security.c @@ -1745,3 +1745,30 @@ void security_bpf_prog_free(struct bpf_prog_aux *aux) call_void_hook(bpf_prog_free_security, aux); } #endif /* CONFIG_BPF_SYSCALL */ + +#ifdef CONFIG_PERF_EVENTS +int security_perf_event_open(struct perf_event_attr *attr, int type) +{ + return call_int_hook(perf_event_open, 0, attr, type); +} + +int security_perf_event_alloc(struct perf_event *event) +{ + return call_int_hook(perf_event_alloc, 0, event); +} + +void security_perf_event_free(struct perf_event *event) +{ + call_void_hook(perf_event_free, event); +} + +int security_perf_event_read(struct perf_event *event) +{ + return call_int_hook(perf_event_read, 0, event); +} + +int security_perf_event_write(struct perf_event *event) +{ + return call_int_hook(perf_event_write, 0, event); +} +#endif /* CONFIG_PERF_EVENTS */ diff --git a/security/selinux/Kconfig b/security/selinux/Kconfig index 8af7a690eb40..8297e48a283d 100644 --- a/security/selinux/Kconfig +++ b/security/selinux/Kconfig @@ -99,3 +99,15 @@ config SECURITY_SELINUX_CHECKREQPROT_VALUE via /selinux/checkreqprot if authorized by policy. If you are unsure how to answer this question, answer 0. + +config SECURITY_SELINUX_SIDTAB_HASH_BITS + int "NSA SELinux sidtab hashtable size" + depends on SECURITY_SELINUX + range 8 13 + default 9 + help + This option sets the number of buckets used in the sidtab hashtable + to 2^SECURITY_SELINUX_SIDTAB_HASH_BITS buckets. The number of hash + collisions may be viewed at /sys/fs/selinux/ss/sidtab_hash_stats. If + chain lengths are high (e.g. > 20) then selecting a higher value here + will ensure that lookups times are short and stable. diff --git a/security/selinux/avc.c b/security/selinux/avc.c index e38cbdb53f88..fc75a1abbad9 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -82,14 +82,42 @@ struct avc_callback_node { struct avc_callback_node *next; }; -/* Exported via selinufs */ -unsigned int avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD; - #ifdef CONFIG_SECURITY_SELINUX_AVC_STATS DEFINE_PER_CPU(struct avc_cache_stats, avc_cache_stats) = { 0 }; #endif -static struct avc_cache avc_cache; +struct selinux_avc { + unsigned int avc_cache_threshold; + struct avc_cache avc_cache; +}; + +static struct selinux_avc selinux_avc; + +void selinux_avc_init(struct selinux_avc **avc) +{ + int i; + + selinux_avc.avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD; + for (i = 0; i < AVC_CACHE_SLOTS; i++) { + INIT_HLIST_HEAD(&selinux_avc.avc_cache.slots[i]); + spin_lock_init(&selinux_avc.avc_cache.slots_lock[i]); + } + atomic_set(&selinux_avc.avc_cache.active_nodes, 0); + atomic_set(&selinux_avc.avc_cache.lru_hint, 0); + *avc = &selinux_avc; +} + +unsigned int avc_get_cache_threshold(struct selinux_avc *avc) +{ + return avc->avc_cache_threshold; +} + +void avc_set_cache_threshold(struct selinux_avc *avc, + unsigned int cache_threshold) +{ + avc->avc_cache_threshold = cache_threshold; +} + static struct avc_callback_node *avc_callbacks; static struct kmem_cache *avc_node_cachep; static struct kmem_cache *avc_xperms_data_cachep; @@ -143,13 +171,14 @@ static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av) * @tsid: target security identifier * @tclass: target security class */ -static void avc_dump_query(struct audit_buffer *ab, u32 ssid, u32 tsid, u16 tclass) +static void avc_dump_query(struct audit_buffer *ab, struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass) { int rc; char *scontext; u32 scontext_len; - rc = security_sid_to_context(ssid, &scontext, &scontext_len); + rc = security_sid_to_context(state, ssid, &scontext, &scontext_len); if (rc) audit_log_format(ab, "ssid=%d", ssid); else { @@ -157,7 +186,7 @@ static void avc_dump_query(struct audit_buffer *ab, u32 ssid, u32 tsid, u16 tcla kfree(scontext); } - rc = security_sid_to_context(tsid, &scontext, &scontext_len); + rc = security_sid_to_context(state, tsid, &scontext, &scontext_len); if (rc) audit_log_format(ab, " tsid=%d", tsid); else { @@ -176,15 +205,6 @@ static void avc_dump_query(struct audit_buffer *ab, u32 ssid, u32 tsid, u16 tcla */ void __init avc_init(void) { - int i; - - for (i = 0; i < AVC_CACHE_SLOTS; i++) { - INIT_HLIST_HEAD(&avc_cache.slots[i]); - spin_lock_init(&avc_cache.slots_lock[i]); - } - atomic_set(&avc_cache.active_nodes, 0); - atomic_set(&avc_cache.lru_hint, 0); - avc_node_cachep = kmem_cache_create("avc_node", sizeof(struct avc_node), 0, SLAB_PANIC, NULL); avc_xperms_cachep = kmem_cache_create("avc_xperms_node", @@ -199,7 +219,7 @@ void __init avc_init(void) 0, SLAB_PANIC, NULL); } -int avc_get_hash_stats(char *page) +int avc_get_hash_stats(struct selinux_avc *avc, char *page) { int i, chain_len, max_chain_len, slots_used; struct avc_node *node; @@ -210,7 +230,7 @@ int avc_get_hash_stats(char *page) slots_used = 0; max_chain_len = 0; for (i = 0; i < AVC_CACHE_SLOTS; i++) { - head = &avc_cache.slots[i]; + head = &avc->avc_cache.slots[i]; if (!hlist_empty(head)) { slots_used++; chain_len = 0; @@ -225,7 +245,7 @@ int avc_get_hash_stats(char *page) return scnprintf(page, PAGE_SIZE, "entries: %d\nbuckets used: %d/%d\n" "longest chain: %d\n", - atomic_read(&avc_cache.active_nodes), + atomic_read(&avc->avc_cache.active_nodes), slots_used, AVC_CACHE_SLOTS, max_chain_len); } @@ -462,11 +482,12 @@ static inline u32 avc_xperms_audit_required(u32 requested, return audited; } -static inline int avc_xperms_audit(u32 ssid, u32 tsid, u16 tclass, - u32 requested, struct av_decision *avd, - struct extended_perms_decision *xpd, - u8 perm, int result, - struct common_audit_data *ad) +static inline int avc_xperms_audit(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, + u32 requested, struct av_decision *avd, + struct extended_perms_decision *xpd, + u8 perm, int result, + struct common_audit_data *ad) { u32 audited, denied; @@ -474,7 +495,7 @@ static inline int avc_xperms_audit(u32 ssid, u32 tsid, u16 tclass, requested, avd, xpd, perm, result, &denied); if (likely(!audited)) return 0; - return slow_avc_audit(ssid, tsid, tclass, requested, + return slow_avc_audit(state, ssid, tsid, tclass, requested, audited, denied, result, ad, 0); } @@ -486,29 +507,30 @@ static void avc_node_free(struct rcu_head *rhead) avc_cache_stats_incr(frees); } -static void avc_node_delete(struct avc_node *node) +static void avc_node_delete(struct selinux_avc *avc, struct avc_node *node) { hlist_del_rcu(&node->list); call_rcu(&node->rhead, avc_node_free); - atomic_dec(&avc_cache.active_nodes); + atomic_dec(&avc->avc_cache.active_nodes); } -static void avc_node_kill(struct avc_node *node) +static void avc_node_kill(struct selinux_avc *avc, struct avc_node *node) { avc_xperms_free(node->ae.xp_node); kmem_cache_free(avc_node_cachep, node); avc_cache_stats_incr(frees); - atomic_dec(&avc_cache.active_nodes); + atomic_dec(&avc->avc_cache.active_nodes); } -static void avc_node_replace(struct avc_node *new, struct avc_node *old) +static void avc_node_replace(struct selinux_avc *avc, + struct avc_node *new, struct avc_node *old) { hlist_replace_rcu(&old->list, &new->list); call_rcu(&old->rhead, avc_node_free); - atomic_dec(&avc_cache.active_nodes); + atomic_dec(&avc->avc_cache.active_nodes); } -static inline int avc_reclaim_node(void) +static inline int avc_reclaim_node(struct selinux_avc *avc) { struct avc_node *node; int hvalue, try, ecx; @@ -517,16 +539,17 @@ static inline int avc_reclaim_node(void) spinlock_t *lock; for (try = 0, ecx = 0; try < AVC_CACHE_SLOTS; try++) { - hvalue = atomic_inc_return(&avc_cache.lru_hint) & (AVC_CACHE_SLOTS - 1); - head = &avc_cache.slots[hvalue]; - lock = &avc_cache.slots_lock[hvalue]; + hvalue = atomic_inc_return(&avc->avc_cache.lru_hint) & + (AVC_CACHE_SLOTS - 1); + head = &avc->avc_cache.slots[hvalue]; + lock = &avc->avc_cache.slots_lock[hvalue]; if (!spin_trylock_irqsave(lock, flags)) continue; rcu_read_lock(); hlist_for_each_entry(node, head, list) { - avc_node_delete(node); + avc_node_delete(avc, node); avc_cache_stats_incr(reclaims); ecx++; if (ecx >= AVC_CACHE_RECLAIM) { @@ -542,7 +565,7 @@ out: return ecx; } -static struct avc_node *avc_alloc_node(void) +static struct avc_node *avc_alloc_node(struct selinux_avc *avc) { struct avc_node *node; @@ -553,8 +576,9 @@ static struct avc_node *avc_alloc_node(void) INIT_HLIST_NODE(&node->list); avc_cache_stats_incr(allocations); - if (atomic_inc_return(&avc_cache.active_nodes) > avc_cache_threshold) - avc_reclaim_node(); + if (atomic_inc_return(&avc->avc_cache.active_nodes) > + avc->avc_cache_threshold) + avc_reclaim_node(avc); out: return node; @@ -568,14 +592,15 @@ static void avc_node_populate(struct avc_node *node, u32 ssid, u32 tsid, u16 tcl memcpy(&node->ae.avd, avd, sizeof(node->ae.avd)); } -static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass) +static inline struct avc_node *avc_search_node(struct selinux_avc *avc, + u32 ssid, u32 tsid, u16 tclass) { struct avc_node *node, *ret = NULL; int hvalue; struct hlist_head *head; hvalue = avc_hash(ssid, tsid, tclass); - head = &avc_cache.slots[hvalue]; + head = &avc->avc_cache.slots[hvalue]; hlist_for_each_entry_rcu(node, head, list) { if (ssid == node->ae.ssid && tclass == node->ae.tclass && @@ -600,12 +625,13 @@ static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass) * then this function returns the avc_node. * Otherwise, this function returns NULL. */ -static struct avc_node *avc_lookup(u32 ssid, u32 tsid, u16 tclass) +static struct avc_node *avc_lookup(struct selinux_avc *avc, + u32 ssid, u32 tsid, u16 tclass) { struct avc_node *node; avc_cache_stats_incr(lookups); - node = avc_search_node(ssid, tsid, tclass); + node = avc_search_node(avc, ssid, tsid, tclass); if (node) return node; @@ -614,7 +640,8 @@ static struct avc_node *avc_lookup(u32 ssid, u32 tsid, u16 tclass) return NULL; } -static int avc_latest_notif_update(int seqno, int is_insert) +static int avc_latest_notif_update(struct selinux_avc *avc, + int seqno, int is_insert) { int ret = 0; static DEFINE_SPINLOCK(notif_lock); @@ -622,14 +649,14 @@ static int avc_latest_notif_update(int seqno, int is_insert) spin_lock_irqsave(¬if_lock, flag); if (is_insert) { - if (seqno < avc_cache.latest_notif) { + if (seqno < avc->avc_cache.latest_notif) { printk(KERN_WARNING "SELinux: avc: seqno %d < latest_notif %d\n", - seqno, avc_cache.latest_notif); + seqno, avc->avc_cache.latest_notif); ret = -EAGAIN; } } else { - if (seqno > avc_cache.latest_notif) - avc_cache.latest_notif = seqno; + if (seqno > avc->avc_cache.latest_notif) + avc->avc_cache.latest_notif = seqno; } spin_unlock_irqrestore(¬if_lock, flag); @@ -654,18 +681,19 @@ static int avc_latest_notif_update(int seqno, int is_insert) * the access vectors into a cache entry, returns * avc_node inserted. Otherwise, this function returns NULL. */ -static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, - struct av_decision *avd, - struct avc_xperms_node *xp_node) +static struct avc_node *avc_insert(struct selinux_avc *avc, + u32 ssid, u32 tsid, u16 tclass, + struct av_decision *avd, + struct avc_xperms_node *xp_node) { struct avc_node *pos, *node = NULL; int hvalue; unsigned long flag; - if (avc_latest_notif_update(avd->seqno, 1)) + if (avc_latest_notif_update(avc, avd->seqno, 1)) goto out; - node = avc_alloc_node(); + node = avc_alloc_node(avc); if (node) { struct hlist_head *head; spinlock_t *lock; @@ -678,15 +706,15 @@ static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, kmem_cache_free(avc_node_cachep, node); return NULL; } - head = &avc_cache.slots[hvalue]; - lock = &avc_cache.slots_lock[hvalue]; + head = &avc->avc_cache.slots[hvalue]; + lock = &avc->avc_cache.slots_lock[hvalue]; spin_lock_irqsave(lock, flag); hlist_for_each_entry(pos, head, list) { if (pos->ae.ssid == ssid && pos->ae.tsid == tsid && pos->ae.tclass == tclass) { - avc_node_replace(node, pos); + avc_node_replace(avc, node, pos); goto found; } } @@ -724,36 +752,25 @@ static void avc_audit_post_callback(struct audit_buffer *ab, void *a) { struct common_audit_data *ad = a; audit_log_format(ab, " "); - avc_dump_query(ab, ad->selinux_audit_data->ssid, - ad->selinux_audit_data->tsid, - ad->selinux_audit_data->tclass); + avc_dump_query(ab, ad->selinux_audit_data->state, + ad->selinux_audit_data->ssid, + ad->selinux_audit_data->tsid, + ad->selinux_audit_data->tclass); if (ad->selinux_audit_data->denied) { audit_log_format(ab, " permissive=%u", ad->selinux_audit_data->result ? 0 : 1); #if defined(CONFIG_MTK_SELINUX_AEE_WARNING) &&\ defined(MTK_SELINUX_WARNING_ENABLE) - { - struct nlmsghdr *nlh; - char *selinux_data; - - if (ab) { - nlh = nlmsg_hdr(audit_get_skb(ab)); - selinux_data = nlmsg_data(nlh); - if (nlh->nlmsg_type != AUDIT_EOE) { - if (nlh->nlmsg_type == 1400) - mtk_audit_hook(selinux_data); - } - } - } #endif } } /* This is the slow part of avc audit with big stack footprint */ -noinline int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass, - u32 requested, u32 audited, u32 denied, int result, - struct common_audit_data *a, - unsigned flags) +noinline int slow_avc_audit(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, + u32 requested, u32 audited, u32 denied, int result, + struct common_audit_data *a, + unsigned int flags) { struct common_audit_data stack_data; struct selinux_audit_data sad; @@ -781,6 +798,7 @@ noinline int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass, sad.audited = audited; sad.denied = denied; sad.result = result; + sad.state = state; a->selinux_audit_data = &sad; @@ -829,10 +847,11 @@ out: * otherwise, this function updates the AVC entry. The original AVC-entry object * will release later by RCU. */ -static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, - u32 tsid, u16 tclass, u32 seqno, - struct extended_perms_decision *xpd, - u32 flags) +static int avc_update_node(struct selinux_avc *avc, + u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, + u32 tsid, u16 tclass, u32 seqno, + struct extended_perms_decision *xpd, + u32 flags) { int hvalue, rc = 0; unsigned long flag; @@ -840,7 +859,7 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, struct hlist_head *head; spinlock_t *lock; - node = avc_alloc_node(); + node = avc_alloc_node(avc); if (!node) { rc = -ENOMEM; goto out; @@ -849,8 +868,8 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, /* Lock the target slot */ hvalue = avc_hash(ssid, tsid, tclass); - head = &avc_cache.slots[hvalue]; - lock = &avc_cache.slots_lock[hvalue]; + head = &avc->avc_cache.slots[hvalue]; + lock = &avc->avc_cache.slots_lock[hvalue]; spin_lock_irqsave(lock, flag); @@ -866,7 +885,7 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, if (!orig) { rc = -ENOENT; - avc_node_kill(node); + avc_node_kill(avc, node); goto out_unlock; } @@ -879,7 +898,7 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, if (orig->ae.xp_node) { rc = avc_xperms_populate(node, orig->ae.xp_node); if (rc) { - kmem_cache_free(avc_node_cachep, node); + avc_node_kill(avc, node); goto out_unlock; } } @@ -910,7 +929,7 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, avc_add_xperms_decision(node, xpd); break; } - avc_node_replace(node, orig); + avc_node_replace(avc, node, orig); out_unlock: spin_unlock_irqrestore(lock, flag); out: @@ -920,7 +939,7 @@ out: /** * avc_flush - Flush the cache */ -static void avc_flush(void) +static void avc_flush(struct selinux_avc *avc) { struct hlist_head *head; struct avc_node *node; @@ -929,8 +948,8 @@ static void avc_flush(void) int i; for (i = 0; i < AVC_CACHE_SLOTS; i++) { - head = &avc_cache.slots[i]; - lock = &avc_cache.slots_lock[i]; + head = &avc->avc_cache.slots[i]; + lock = &avc->avc_cache.slots_lock[i]; spin_lock_irqsave(lock, flag); /* @@ -939,7 +958,7 @@ static void avc_flush(void) */ rcu_read_lock(); hlist_for_each_entry(node, head, list) - avc_node_delete(node); + avc_node_delete(avc, node); rcu_read_unlock(); spin_unlock_irqrestore(lock, flag); } @@ -949,12 +968,12 @@ static void avc_flush(void) * avc_ss_reset - Flush the cache and revalidate migrated permissions. * @seqno: policy sequence number */ -int avc_ss_reset(u32 seqno) +int avc_ss_reset(struct selinux_avc *avc, u32 seqno) { struct avc_callback_node *c; int rc = 0, tmprc; - avc_flush(); + avc_flush(avc); for (c = avc_callbacks; c; c = c->next) { if (c->events & AVC_CALLBACK_RESET) { @@ -966,7 +985,7 @@ int avc_ss_reset(u32 seqno) } } - avc_latest_notif_update(seqno, 0); + avc_latest_notif_update(avc, seqno, 0); return rc; } @@ -979,30 +998,34 @@ int avc_ss_reset(u32 seqno) * Don't inline this, since it's the slow-path and just * results in a bigger stack frame. */ -static noinline struct avc_node *avc_compute_av(u32 ssid, u32 tsid, - u16 tclass, struct av_decision *avd, - struct avc_xperms_node *xp_node) +static noinline +struct avc_node *avc_compute_av(struct selinux_state *state, + u32 ssid, u32 tsid, + u16 tclass, struct av_decision *avd, + struct avc_xperms_node *xp_node) { rcu_read_unlock(); INIT_LIST_HEAD(&xp_node->xpd_head); - security_compute_av(ssid, tsid, tclass, avd, &xp_node->xp); + security_compute_av(state, ssid, tsid, tclass, avd, &xp_node->xp); rcu_read_lock(); - return avc_insert(ssid, tsid, tclass, avd, xp_node); + return avc_insert(state->avc, ssid, tsid, tclass, avd, xp_node); } -static noinline int avc_denied(u32 ssid, u32 tsid, - u16 tclass, u32 requested, - u8 driver, u8 xperm, unsigned flags, - struct av_decision *avd) +static noinline int avc_denied(struct selinux_state *state, + u32 ssid, u32 tsid, + u16 tclass, u32 requested, + u8 driver, u8 xperm, unsigned int flags, + struct av_decision *avd) { if (flags & AVC_STRICT) return -EACCES; - if (selinux_enforcing && !(avd->flags & AVD_FLAGS_PERMISSIVE)) + if (enforcing_enabled(state) && + !(avd->flags & AVD_FLAGS_PERMISSIVE)) return -EACCES; - avc_update_node(AVC_CALLBACK_GRANT, requested, driver, xperm, ssid, - tsid, tclass, avd->seqno, NULL, flags); + avc_update_node(state->avc, AVC_CALLBACK_GRANT, requested, driver, + xperm, ssid, tsid, tclass, avd->seqno, NULL, flags); return 0; } @@ -1013,8 +1036,9 @@ static noinline int avc_denied(u32 ssid, u32 tsid, * as-is the case with ioctls, then multiple may be chained together and the * driver field is used to specify which set contains the permission. */ -int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested, - u8 driver, u8 xperm, struct common_audit_data *ad) +int avc_has_extended_perms(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, + u8 driver, u8 xperm, struct common_audit_data *ad) { struct avc_node *node; struct av_decision avd; @@ -1033,9 +1057,9 @@ int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested, rcu_read_lock(); - node = avc_lookup(ssid, tsid, tclass); + node = avc_lookup(state->avc, ssid, tsid, tclass); if (unlikely(!node)) { - node = avc_compute_av(ssid, tsid, tclass, &avd, xp_node); + node = avc_compute_av(state, ssid, tsid, tclass, &avd, xp_node); } else { memcpy(&avd, &node->ae.avd, sizeof(avd)); xp_node = node->ae.xp_node; @@ -1059,11 +1083,12 @@ int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested, goto decision; } rcu_read_unlock(); - security_compute_xperms_decision(ssid, tsid, tclass, driver, - &local_xpd); + security_compute_xperms_decision(state, ssid, tsid, tclass, + driver, &local_xpd); rcu_read_lock(); - avc_update_node(AVC_CALLBACK_ADD_XPERMS, requested, driver, xperm, - ssid, tsid, tclass, avd.seqno, &local_xpd, 0); + avc_update_node(state->avc, AVC_CALLBACK_ADD_XPERMS, requested, + driver, xperm, ssid, tsid, tclass, avd.seqno, + &local_xpd, 0); } else { avc_quick_copy_xperms_decision(xperm, &local_xpd, xpd); } @@ -1075,12 +1100,12 @@ int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested, decision: denied = requested & ~(avd.allowed); if (unlikely(denied)) - rc = avc_denied(ssid, tsid, tclass, requested, driver, xperm, - AVC_EXTENDED_PERMS, &avd); + rc = avc_denied(state, ssid, tsid, tclass, requested, + driver, xperm, AVC_EXTENDED_PERMS, &avd); rcu_read_unlock(); - rc2 = avc_xperms_audit(ssid, tsid, tclass, requested, + rc2 = avc_xperms_audit(state, ssid, tsid, tclass, requested, &avd, xpd, xperm, rc, ad); if (rc2) return rc2; @@ -1107,10 +1132,11 @@ decision: * auditing, e.g. in cases where a lock must be held for the check but * should be released for the auditing. */ -inline int avc_has_perm_noaudit(u32 ssid, u32 tsid, - u16 tclass, u32 requested, - unsigned flags, - struct av_decision *avd) +inline int avc_has_perm_noaudit(struct selinux_state *state, + u32 ssid, u32 tsid, + u16 tclass, u32 requested, + unsigned int flags, + struct av_decision *avd) { struct avc_node *node; struct avc_xperms_node xp_node; @@ -1121,15 +1147,16 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid, rcu_read_lock(); - node = avc_lookup(ssid, tsid, tclass); + node = avc_lookup(state->avc, ssid, tsid, tclass); if (unlikely(!node)) - node = avc_compute_av(ssid, tsid, tclass, avd, &xp_node); + node = avc_compute_av(state, ssid, tsid, tclass, avd, &xp_node); else memcpy(avd, &node->ae.avd, sizeof(*avd)); denied = requested & ~(avd->allowed); if (unlikely(denied)) - rc = avc_denied(ssid, tsid, tclass, requested, 0, 0, flags, avd); + rc = avc_denied(state, ssid, tsid, tclass, requested, 0, 0, + flags, avd); rcu_read_unlock(); return rc; @@ -1151,39 +1178,43 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid, * permissions are granted, -%EACCES if any permissions are denied, or * another -errno upon other errors. */ -int avc_has_perm(u32 ssid, u32 tsid, u16 tclass, +int avc_has_perm(struct selinux_state *state, u32 ssid, u32 tsid, u16 tclass, u32 requested, struct common_audit_data *auditdata) { struct av_decision avd; int rc, rc2; - rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd); + rc = avc_has_perm_noaudit(state, ssid, tsid, tclass, requested, 0, + &avd); - rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata, 0); + rc2 = avc_audit(state, ssid, tsid, tclass, requested, &avd, rc, + auditdata, 0); if (rc2) return rc2; return rc; } -int avc_has_perm_flags(u32 ssid, u32 tsid, u16 tclass, - u32 requested, struct common_audit_data *auditdata, +int avc_has_perm_flags(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, + struct common_audit_data *auditdata, int flags) { struct av_decision avd; int rc, rc2; - rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd); + rc = avc_has_perm_noaudit(state, ssid, tsid, tclass, requested, 0, + &avd); - rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, + rc2 = avc_audit(state, ssid, tsid, tclass, requested, &avd, rc, auditdata, flags); if (rc2) return rc2; return rc; } -u32 avc_policy_seqno(void) +u32 avc_policy_seqno(struct selinux_state *state) { - return avc_cache.latest_notif; + return state->avc->avc_cache.latest_notif; } void avc_disable(void) @@ -1200,7 +1231,7 @@ void avc_disable(void) * the cache and get that memory back. */ if (avc_node_cachep) { - avc_flush(); + avc_flush(selinux_state.avc); /* kmem_cache_destroy(avc_node_cachep); */ } } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 444739be29e9..134c7b5f8a0b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -98,20 +98,24 @@ #include "audit.h" #include "avc_ss.h" +struct selinux_state selinux_state; + /* SECMARK reference count */ static atomic_t selinux_secmark_refcount = ATOMIC_INIT(0); #ifdef CONFIG_SECURITY_SELINUX_DEVELOP -int selinux_enforcing; +static int selinux_enforcing_boot; static int __init enforcing_setup(char *str) { unsigned long enforcing; if (!kstrtoul(str, 0, &enforcing)) - selinux_enforcing = enforcing ? 1 : 0; + selinux_enforcing_boot = enforcing ? 1 : 0; return 1; } __setup("enforcing=", enforcing_setup); +#else +#define selinux_enforcing_boot 1 #endif #ifdef CONFIG_SECURITY_SELINUX_BOOTPARAM @@ -129,6 +133,19 @@ __setup("selinux=", selinux_enabled_setup); int selinux_enabled = 1; #endif +static unsigned int selinux_checkreqprot_boot = + CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE; + +static int __init checkreqprot_setup(char *str) +{ + unsigned long checkreqprot; + + if (!kstrtoul(str, 0, &checkreqprot)) + selinux_checkreqprot_boot = checkreqprot ? 1 : 0; + return 1; +} +__setup("checkreqprot=", checkreqprot_setup); + static struct kmem_cache *sel_inode_cache; static struct kmem_cache *file_security_cache; @@ -145,7 +162,8 @@ static struct kmem_cache *file_security_cache; */ static int selinux_secmark_enabled(void) { - return (selinux_policycap_alwaysnetwork || atomic_read(&selinux_secmark_refcount)); + return (selinux_policycap_alwaysnetwork() || + atomic_read(&selinux_secmark_refcount)); } /** @@ -160,7 +178,8 @@ static int selinux_secmark_enabled(void) */ static int selinux_peerlbl_enabled(void) { - return (selinux_policycap_alwaysnetwork || netlbl_enabled() || selinux_xfrm_enabled()); + return (selinux_policycap_alwaysnetwork() || + netlbl_enabled() || selinux_xfrm_enabled()); } static int selinux_netcache_avc_callback(u32 event) @@ -264,7 +283,8 @@ static int __inode_security_revalidate(struct inode *inode, might_sleep_if(may_sleep); - if (ss_initialized && isec->initialized != LABEL_INITIALIZED) { + if (selinux_state.initialized && + isec->initialized != LABEL_INITIALIZED) { if (!may_sleep) return -ECHILD; @@ -446,12 +466,14 @@ static int may_context_mount_sb_relabel(u32 sid, const struct task_security_struct *tsec = cred->security; int rc; - rc = avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, + rc = avc_has_perm(&selinux_state, + tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, FILESYSTEM__RELABELFROM, NULL); if (rc) return rc; - rc = avc_has_perm(tsec->sid, sid, SECCLASS_FILESYSTEM, + rc = avc_has_perm(&selinux_state, + tsec->sid, sid, SECCLASS_FILESYSTEM, FILESYSTEM__RELABELTO, NULL); return rc; } @@ -462,12 +484,14 @@ static int may_context_mount_inode_relabel(u32 sid, { const struct task_security_struct *tsec = cred->security; int rc; - rc = avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, + rc = avc_has_perm(&selinux_state, + tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, FILESYSTEM__RELABELFROM, NULL); if (rc) return rc; - rc = avc_has_perm(sid, sbsec->sid, SECCLASS_FILESYSTEM, + rc = avc_has_perm(&selinux_state, + sid, sbsec->sid, SECCLASS_FILESYSTEM, FILESYSTEM__ASSOCIATE, NULL); return rc; } @@ -480,7 +504,7 @@ static int selinux_is_genfs_special_handling(struct super_block *sb) !strcmp(sb->s_type->name, "debugfs") || !strcmp(sb->s_type->name, "tracefs") || !strcmp(sb->s_type->name, "rootfs") || - (selinux_policycap_cgroupseclabel && + (selinux_policycap_cgroupseclabel() && (!strcmp(sb->s_type->name, "cgroup") || !strcmp(sb->s_type->name, "cgroup2"))); } @@ -608,7 +632,7 @@ static int selinux_get_mnt_opts(const struct super_block *sb, if (!(sbsec->flags & SE_SBINITIALIZED)) return -EINVAL; - if (!ss_initialized) + if (!selinux_state.initialized) return -EINVAL; /* make sure we always check enough bits to cover the mask */ @@ -639,21 +663,25 @@ static int selinux_get_mnt_opts(const struct super_block *sb, i = 0; if (sbsec->flags & FSCONTEXT_MNT) { - rc = security_sid_to_context(sbsec->sid, &context, &len); + rc = security_sid_to_context(&selinux_state, sbsec->sid, + &context, &len); if (rc) goto out_free; opts->mnt_opts[i] = context; opts->mnt_opts_flags[i++] = FSCONTEXT_MNT; } if (sbsec->flags & CONTEXT_MNT) { - rc = security_sid_to_context(sbsec->mntpoint_sid, &context, &len); + rc = security_sid_to_context(&selinux_state, + sbsec->mntpoint_sid, + &context, &len); if (rc) goto out_free; opts->mnt_opts[i] = context; opts->mnt_opts_flags[i++] = CONTEXT_MNT; } if (sbsec->flags & DEFCONTEXT_MNT) { - rc = security_sid_to_context(sbsec->def_sid, &context, &len); + rc = security_sid_to_context(&selinux_state, sbsec->def_sid, + &context, &len); if (rc) goto out_free; opts->mnt_opts[i] = context; @@ -663,7 +691,8 @@ static int selinux_get_mnt_opts(const struct super_block *sb, struct dentry *root = sbsec->sb->s_root; struct inode_security_struct *isec = backing_inode_security(root); - rc = security_sid_to_context(isec->sid, &context, &len); + rc = security_sid_to_context(&selinux_state, isec->sid, + &context, &len); if (rc) goto out_free; opts->mnt_opts[i] = context; @@ -726,7 +755,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, mutex_lock(&sbsec->lock); - if (!ss_initialized) { + if (!selinux_state.initialized) { if (!num_opts) { /* Defer initialization until selinux_complete_init, after the initial policy is loaded and the security @@ -772,7 +801,9 @@ static int selinux_set_mnt_opts(struct super_block *sb, if (flags[i] == SBLABEL_MNT) continue; - rc = security_context_str_to_sid(mount_options[i], &sid, GFP_KERNEL); + rc = security_context_str_to_sid(&selinux_state, + mount_options[i], &sid, + GFP_KERNEL); if (rc) { printk(KERN_WARNING "SELinux: security_context_str_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", @@ -839,6 +870,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, !strcmp(sb->s_type->name, "tracefs") || !strcmp(sb->s_type->name, "sysfs") || !strcmp(sb->s_type->name, "pstore") || + !strcmp(sb->s_type->name, "binder") || !strcmp(sb->s_type->name, "cgroup") || !strcmp(sb->s_type->name, "cgroup2")) sbsec->flags |= SE_SBGENFS; @@ -848,7 +880,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, * Determine the labeling behavior to use for this * filesystem type. */ - rc = security_fs_use(sb); + rc = security_fs_use(&selinux_state, sb); if (rc) { printk(KERN_WARNING "%s: security_fs_use(%s) returned %d\n", @@ -873,7 +905,9 @@ static int selinux_set_mnt_opts(struct super_block *sb, } if (sbsec->behavior == SECURITY_FS_USE_XATTR) { sbsec->behavior = SECURITY_FS_USE_MNTPOINT; - rc = security_transition_sid(current_sid(), current_sid(), + rc = security_transition_sid(&selinux_state, + current_sid(), + current_sid(), SECCLASS_FILE, NULL, &sbsec->mntpoint_sid); if (rc) @@ -1009,7 +1043,7 @@ static int selinux_sb_clone_mnt_opts(const struct super_block *oldsb, * if the parent was able to be mounted it clearly had no special lsm * mount options. thus we can safely deal with this superblock later */ - if (!ss_initialized) + if (!selinux_state.initialized) return 0; /* @@ -1039,7 +1073,7 @@ static int selinux_sb_clone_mnt_opts(const struct super_block *oldsb, if (newsbsec->behavior == SECURITY_FS_USE_NATIVE && !(kern_flags & SECURITY_LSM_NATIVE_LABELS) && !set_context) { - rc = security_fs_use(newsb); + rc = security_fs_use(&selinux_state, newsb); if (rc) goto out; } @@ -1322,7 +1356,7 @@ static inline int default_protocol_dgram(int protocol) static inline u16 socket_type_to_security_class(int family, int type, int protocol) { - int extsockclass = selinux_policycap_extsockclass; + int extsockclass = selinux_policycap_extsockclass(); switch (family) { case PF_UNIX: @@ -1496,7 +1530,8 @@ static int selinux_genfs_get_sid(struct dentry *dentry, path++; } } - rc = security_genfs_sid(sb->s_type->name, path, tclass, sid); + rc = security_genfs_sid(&selinux_state, sb->s_type->name, + path, tclass, sid); } free_page((unsigned long)buffer); return rc; @@ -1614,7 +1649,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent sid = sbsec->def_sid; rc = 0; } else { - rc = security_context_to_sid_default(context, rc, &sid, + rc = security_context_to_sid_default(&selinux_state, + context, rc, &sid, sbsec->def_sid, GFP_NOFS); if (rc) { @@ -1647,7 +1683,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent sid = sbsec->sid; /* Try to obtain a transition SID. */ - rc = security_transition_sid(task_sid, sid, sclass, NULL, &sid); + rc = security_transition_sid(&selinux_state, task_sid, sid, + sclass, NULL, &sid); if (rc) goto out; break; @@ -1765,9 +1802,11 @@ static int cred_has_capability(const struct cred *cred, return -EINVAL; } - rc = avc_has_perm_noaudit(sid, sid, sclass, av, 0, &avd); + rc = avc_has_perm_noaudit(&selinux_state, + sid, sid, sclass, av, 0, &avd); if (audit == SECURITY_CAP_AUDIT) { - int rc2 = avc_audit(sid, sid, sclass, av, &avd, rc, &ad, 0); + int rc2 = avc_audit(&selinux_state, + sid, sid, sclass, av, &avd, rc, &ad, 0); if (rc2) return rc2; } @@ -1793,7 +1832,8 @@ static int inode_has_perm(const struct cred *cred, sid = cred_sid(cred); isec = inode->i_security; - return avc_has_perm(sid, isec->sid, isec->sclass, perms, adp); + return avc_has_perm(&selinux_state, + sid, isec->sid, isec->sclass, perms, adp); } /* Same as inode_has_perm, but pass explicit audit data containing @@ -1866,7 +1906,8 @@ static int file_has_perm(const struct cred *cred, ad.u.file = file; if (sid != fsec->sid) { - rc = avc_has_perm(sid, fsec->sid, + rc = avc_has_perm(&selinux_state, + sid, fsec->sid, SECCLASS_FD, FD__USE, &ad); @@ -1908,7 +1949,8 @@ selinux_determine_inode_label(const struct task_security_struct *tsec, *_new_isid = tsec->create_sid; } else { const struct inode_security_struct *dsec = inode_security(dir); - return security_transition_sid(tsec->sid, dsec->sid, tclass, + return security_transition_sid(&selinux_state, tsec->sid, + dsec->sid, tclass, name, _new_isid); } @@ -1935,7 +1977,8 @@ static int may_create(struct inode *dir, ad.type = LSM_AUDIT_DATA_DENTRY; ad.u.dentry = dentry; - rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR, + rc = avc_has_perm(&selinux_state, + sid, dsec->sid, SECCLASS_DIR, DIR__ADD_NAME | DIR__SEARCH, &ad); if (rc) @@ -1946,11 +1989,13 @@ static int may_create(struct inode *dir, if (rc) return rc; - rc = avc_has_perm(sid, newsid, tclass, FILE__CREATE, &ad); + rc = avc_has_perm(&selinux_state, + sid, newsid, tclass, FILE__CREATE, &ad); if (rc) return rc; - return avc_has_perm(newsid, sbsec->sid, + return avc_has_perm(&selinux_state, + newsid, sbsec->sid, SECCLASS_FILESYSTEM, FILESYSTEM__ASSOCIATE, &ad); } @@ -1979,7 +2024,8 @@ static int may_link(struct inode *dir, av = DIR__SEARCH; av |= (kind ? DIR__REMOVE_NAME : DIR__ADD_NAME); - rc = avc_has_perm(sid, dsec->sid, SECCLASS_DIR, av, &ad); + rc = avc_has_perm(&selinux_state, + sid, dsec->sid, SECCLASS_DIR, av, &ad); if (rc) return rc; @@ -1999,7 +2045,8 @@ static int may_link(struct inode *dir, return 0; } - rc = avc_has_perm(sid, isec->sid, isec->sclass, av, &ad); + rc = avc_has_perm(&selinux_state, + sid, isec->sid, isec->sclass, av, &ad); return rc; } @@ -2023,16 +2070,19 @@ static inline int may_rename(struct inode *old_dir, ad.type = LSM_AUDIT_DATA_DENTRY; ad.u.dentry = old_dentry; - rc = avc_has_perm(sid, old_dsec->sid, SECCLASS_DIR, + rc = avc_has_perm(&selinux_state, + sid, old_dsec->sid, SECCLASS_DIR, DIR__REMOVE_NAME | DIR__SEARCH, &ad); if (rc) return rc; - rc = avc_has_perm(sid, old_isec->sid, + rc = avc_has_perm(&selinux_state, + sid, old_isec->sid, old_isec->sclass, FILE__RENAME, &ad); if (rc) return rc; if (old_is_dir && new_dir != old_dir) { - rc = avc_has_perm(sid, old_isec->sid, + rc = avc_has_perm(&selinux_state, + sid, old_isec->sid, old_isec->sclass, DIR__REPARENT, &ad); if (rc) return rc; @@ -2042,13 +2092,15 @@ static inline int may_rename(struct inode *old_dir, av = DIR__ADD_NAME | DIR__SEARCH; if (d_is_positive(new_dentry)) av |= DIR__REMOVE_NAME; - rc = avc_has_perm(sid, new_dsec->sid, SECCLASS_DIR, av, &ad); + rc = avc_has_perm(&selinux_state, + sid, new_dsec->sid, SECCLASS_DIR, av, &ad); if (rc) return rc; if (d_is_positive(new_dentry)) { new_isec = backing_inode_security(new_dentry); new_is_dir = d_is_dir(new_dentry); - rc = avc_has_perm(sid, new_isec->sid, + rc = avc_has_perm(&selinux_state, + sid, new_isec->sid, new_isec->sclass, (new_is_dir ? DIR__RMDIR : FILE__UNLINK), &ad); if (rc) @@ -2068,7 +2120,8 @@ static int superblock_has_perm(const struct cred *cred, u32 sid = cred_sid(cred); sbsec = sb->s_security; - return avc_has_perm(sid, sbsec->sid, SECCLASS_FILESYSTEM, perms, ad); + return avc_has_perm(&selinux_state, + sid, sbsec->sid, SECCLASS_FILESYSTEM, perms, ad); } /* Convert a Linux mode and permission mask to an access vector. */ @@ -2131,7 +2184,8 @@ static inline u32 open_file_to_av(struct file *file) u32 av = file_to_av(file); struct inode *inode = file_inode(file); - if (selinux_policycap_openperm && inode->i_sb->s_magic != SOCKFS_MAGIC) + if (selinux_policycap_openperm() && + inode->i_sb->s_magic != SOCKFS_MAGIC) av |= FILE__OPEN; return av; @@ -2144,7 +2198,8 @@ static int selinux_binder_set_context_mgr(struct task_struct *mgr) u32 mysid = current_sid(); u32 mgrsid = task_sid(mgr); - return avc_has_perm(mysid, mgrsid, SECCLASS_BINDER, + return avc_has_perm(&selinux_state, + mysid, mgrsid, SECCLASS_BINDER, BINDER__SET_CONTEXT_MGR, NULL); } @@ -2157,13 +2212,15 @@ static int selinux_binder_transaction(struct task_struct *from, int rc; if (mysid != fromsid) { - rc = avc_has_perm(mysid, fromsid, SECCLASS_BINDER, + rc = avc_has_perm(&selinux_state, + mysid, fromsid, SECCLASS_BINDER, BINDER__IMPERSONATE, NULL); if (rc) return rc; } - return avc_has_perm(fromsid, tosid, SECCLASS_BINDER, BINDER__CALL, + return avc_has_perm(&selinux_state, + fromsid, tosid, SECCLASS_BINDER, BINDER__CALL, NULL); } @@ -2173,7 +2230,8 @@ static int selinux_binder_transfer_binder(struct task_struct *from, u32 fromsid = task_sid(from); u32 tosid = task_sid(to); - return avc_has_perm(fromsid, tosid, SECCLASS_BINDER, BINDER__TRANSFER, + return avc_has_perm(&selinux_state, + fromsid, tosid, SECCLASS_BINDER, BINDER__TRANSFER, NULL); } @@ -2192,7 +2250,8 @@ static int selinux_binder_transfer_file(struct task_struct *from, ad.u.path = file->f_path; if (sid != fsec->sid) { - rc = avc_has_perm(sid, fsec->sid, + rc = avc_has_perm(&selinux_state, + sid, fsec->sid, SECCLASS_FD, FD__USE, &ad); @@ -2210,7 +2269,8 @@ static int selinux_binder_transfer_file(struct task_struct *from, return 0; isec = backing_inode_security(dentry); - return avc_has_perm(sid, isec->sid, isec->sclass, file_to_av(file), + return avc_has_perm(&selinux_state, + sid, isec->sid, isec->sclass, file_to_av(file), &ad); } @@ -2221,21 +2281,25 @@ static int selinux_ptrace_access_check(struct task_struct *child, u32 csid = task_sid(child); if (mode & PTRACE_MODE_READ) - return avc_has_perm(sid, csid, SECCLASS_FILE, FILE__READ, NULL); + return avc_has_perm(&selinux_state, + sid, csid, SECCLASS_FILE, FILE__READ, NULL); - return avc_has_perm(sid, csid, SECCLASS_PROCESS, PROCESS__PTRACE, NULL); + return avc_has_perm(&selinux_state, + sid, csid, SECCLASS_PROCESS, PROCESS__PTRACE, NULL); } static int selinux_ptrace_traceme(struct task_struct *parent) { - return avc_has_perm(task_sid(parent), current_sid(), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + task_sid(parent), current_sid(), SECCLASS_PROCESS, PROCESS__PTRACE, NULL); } static int selinux_capget(struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - return avc_has_perm(current_sid(), task_sid(target), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(target), SECCLASS_PROCESS, PROCESS__GETCAP, NULL); } @@ -2244,7 +2308,8 @@ static int selinux_capset(struct cred *new, const struct cred *old, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { - return avc_has_perm(cred_sid(old), cred_sid(new), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + cred_sid(old), cred_sid(new), SECCLASS_PROCESS, PROCESS__SETCAP, NULL); } @@ -2304,18 +2369,21 @@ static int selinux_syslog(int type) switch (type) { case SYSLOG_ACTION_READ_ALL: /* Read last kernel messages */ case SYSLOG_ACTION_SIZE_BUFFER: /* Return size of the log buffer */ - return avc_has_perm(current_sid(), SECINITSID_KERNEL, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__SYSLOG_READ, NULL); case SYSLOG_ACTION_CONSOLE_OFF: /* Disable logging to console */ case SYSLOG_ACTION_CONSOLE_ON: /* Enable logging to console */ /* Set level of messages printed to console */ case SYSLOG_ACTION_CONSOLE_LEVEL: - return avc_has_perm(current_sid(), SECINITSID_KERNEL, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__SYSLOG_CONSOLE, NULL); } /* All other syslog types */ - return avc_has_perm(current_sid(), SECINITSID_KERNEL, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__SYSLOG_MOD, NULL); } @@ -2376,13 +2444,14 @@ static int check_nnp_nosuid(const struct linux_binprm *bprm, * policy allows the corresponding permission between * the old and new contexts. */ - if (selinux_policycap_nnp_nosuid_transition) { + if (selinux_policycap_nnp_nosuid_transition()) { av = 0; if (nnp) av |= PROCESS2__NNP_TRANSITION; if (nosuid) av |= PROCESS2__NOSUID_TRANSITION; - rc = avc_has_perm(old_tsec->sid, new_tsec->sid, + rc = avc_has_perm(&selinux_state, + old_tsec->sid, new_tsec->sid, SECCLASS_PROCESS2, av, NULL); if (!rc) return 0; @@ -2393,7 +2462,8 @@ static int check_nnp_nosuid(const struct linux_binprm *bprm, * i.e. SIDs that are guaranteed to only be allowed a subset * of the permissions of the current SID. */ - rc = security_bounded_transition(old_tsec->sid, new_tsec->sid); + rc = security_bounded_transition(&selinux_state, old_tsec->sid, + new_tsec->sid); if (!rc) return 0; @@ -2445,8 +2515,8 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm) return rc; } else { /* Check for a default transition on this program. */ - rc = security_transition_sid(old_tsec->sid, isec->sid, - SECCLASS_PROCESS, NULL, + rc = security_transition_sid(&selinux_state, old_tsec->sid, + isec->sid, SECCLASS_PROCESS, NULL, &new_tsec->sid); if (rc) return rc; @@ -2464,25 +2534,29 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm) ad.u.file = bprm->file; if (new_tsec->sid == old_tsec->sid) { - rc = avc_has_perm(old_tsec->sid, isec->sid, + rc = avc_has_perm(&selinux_state, + old_tsec->sid, isec->sid, SECCLASS_FILE, FILE__EXECUTE_NO_TRANS, &ad); if (rc) return rc; } else { /* Check permissions for the transition. */ - rc = avc_has_perm(old_tsec->sid, new_tsec->sid, + rc = avc_has_perm(&selinux_state, + old_tsec->sid, new_tsec->sid, SECCLASS_PROCESS, PROCESS__TRANSITION, &ad); if (rc) return rc; - rc = avc_has_perm(new_tsec->sid, isec->sid, + rc = avc_has_perm(&selinux_state, + new_tsec->sid, isec->sid, SECCLASS_FILE, FILE__ENTRYPOINT, &ad); if (rc) return rc; /* Check for shared state */ if (bprm->unsafe & LSM_UNSAFE_SHARE) { - rc = avc_has_perm(old_tsec->sid, new_tsec->sid, + rc = avc_has_perm(&selinux_state, + old_tsec->sid, new_tsec->sid, SECCLASS_PROCESS, PROCESS__SHARE, NULL); if (rc) @@ -2494,7 +2568,8 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm) if (bprm->unsafe & LSM_UNSAFE_PTRACE) { u32 ptsid = ptrace_parent_sid(); if (ptsid != 0) { - rc = avc_has_perm(ptsid, new_tsec->sid, + rc = avc_has_perm(&selinux_state, + ptsid, new_tsec->sid, SECCLASS_PROCESS, PROCESS__PTRACE, NULL); if (rc) @@ -2508,7 +2583,8 @@ static int selinux_bprm_set_creds(struct linux_binprm *bprm) /* Enable secure mode for SIDs transitions unless the noatsecure permission is granted between the two SIDs, i.e. ahp returns 0. */ - rc = avc_has_perm(old_tsec->sid, new_tsec->sid, + rc = avc_has_perm(&selinux_state, + old_tsec->sid, new_tsec->sid, SECCLASS_PROCESS, PROCESS__NOATSECURE, NULL); bprm->secureexec |= !!rc; @@ -2600,7 +2676,8 @@ static void selinux_bprm_committing_creds(struct linux_binprm *bprm) * higher than the default soft limit for cases where the default is * lower than the hard limit, e.g. RLIMIT_CORE or RLIMIT_STACK. */ - rc = avc_has_perm(new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS, + rc = avc_has_perm(&selinux_state, + new_tsec->osid, new_tsec->sid, SECCLASS_PROCESS, PROCESS__RLIMITINH, NULL); if (rc) { /* protect against do_prlimit() */ @@ -2640,7 +2717,8 @@ static void selinux_bprm_committed_creds(struct linux_binprm *bprm) * This must occur _after_ the task SID has been updated so that any * kill done after the flush will be checked against the new SID. */ - rc = avc_has_perm(osid, sid, SECCLASS_PROCESS, PROCESS__SIGINH, NULL); + rc = avc_has_perm(&selinux_state, + osid, sid, SECCLASS_PROCESS, PROCESS__SIGINH, NULL); if (rc) { if (IS_ENABLED(CONFIG_POSIX_TIMERS)) { memset(&itimer, 0, sizeof itimer); @@ -2804,7 +2882,9 @@ static int selinux_sb_remount(struct super_block *sb, void *data) if (flags[i] == SBLABEL_MNT) continue; - rc = security_context_str_to_sid(mount_options[i], &sid, GFP_KERNEL); + rc = security_context_str_to_sid(&selinux_state, + mount_options[i], &sid, + GFP_KERNEL); if (rc) { printk(KERN_WARNING "SELinux: security_context_str_to_sid" "(%s) failed for (dev %s, type %s) errno=%d\n", @@ -2929,7 +3009,8 @@ static int selinux_dentry_init_security(struct dentry *dentry, int mode, if (rc) return rc; - return security_sid_to_context(newsid, (char **)ctx, ctxlen); + return security_sid_to_context(&selinux_state, newsid, (char **)ctx, + ctxlen); } static int selinux_dentry_create_files_as(struct dentry *dentry, int mode, @@ -2984,14 +3065,15 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, isec->initialized = LABEL_INITIALIZED; } - if (!ss_initialized || !(sbsec->flags & SBLABEL_MNT)) + if (!selinux_state.initialized || !(sbsec->flags & SBLABEL_MNT)) return -EOPNOTSUPP; if (name) *name = XATTR_SELINUX_SUFFIX; if (value && len) { - rc = security_sid_to_context_force(newsid, &context, &clen); + rc = security_sid_to_context_force(&selinux_state, newsid, + &context, &clen); if (rc) return rc; *value = context; @@ -3066,7 +3148,8 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct inode *inode, if (IS_ERR(isec)) return PTR_ERR(isec); - return avc_has_perm_flags(sid, isec->sid, isec->sclass, FILE__READ, &ad, + return avc_has_perm_flags(&selinux_state, + sid, isec->sid, isec->sclass, FILE__READ, &ad, rcu ? MAY_NOT_BLOCK : 0); } @@ -3082,7 +3165,8 @@ static noinline int audit_inode_permission(struct inode *inode, ad.type = LSM_AUDIT_DATA_INODE; ad.u.inode = inode; - rc = slow_avc_audit(current_sid(), isec->sid, isec->sclass, perms, + rc = slow_avc_audit(&selinux_state, + current_sid(), isec->sid, isec->sclass, perms, audited, denied, result, &ad, flags); if (rc) return rc; @@ -3120,7 +3204,8 @@ static int selinux_inode_permission(struct inode *inode, int mask) if (IS_ERR(isec)) return PTR_ERR(isec); - rc = avc_has_perm_noaudit(sid, isec->sid, isec->sclass, perms, 0, &avd); + rc = avc_has_perm_noaudit(&selinux_state, + sid, isec->sid, isec->sclass, perms, 0, &avd); audited = avc_audit_required(perms, &avd, rc, from_access ? FILE__AUDIT_ACCESS : 0, &denied); @@ -3152,7 +3237,7 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr) ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET)) return dentry_has_perm(cred, dentry, FILE__SETATTR); - if (selinux_policycap_openperm && + if (selinux_policycap_openperm() && inode->i_sb->s_magic != SOCKFS_MAGIC && (ia_valid & ATTR_SIZE) && !(ia_valid & ATTR_FILE)) @@ -3223,12 +3308,14 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, ad.u.dentry = dentry; isec = backing_inode_security(dentry); - rc = avc_has_perm(sid, isec->sid, isec->sclass, + rc = avc_has_perm(&selinux_state, + sid, isec->sid, isec->sclass, FILE__RELABELFROM, &ad); if (rc) return rc; - rc = security_context_to_sid(value, size, &newsid, GFP_KERNEL); + rc = security_context_to_sid(&selinux_state, value, size, &newsid, + GFP_KERNEL); if (rc == -EINVAL) { if (!has_cap_mac_admin(true)) { struct audit_buffer *ab; @@ -3254,22 +3341,25 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name, return rc; } - rc = security_context_to_sid_force(value, size, &newsid); + rc = security_context_to_sid_force(&selinux_state, value, + size, &newsid); } if (rc) return rc; - rc = avc_has_perm(sid, newsid, isec->sclass, + rc = avc_has_perm(&selinux_state, + sid, newsid, isec->sclass, FILE__RELABELTO, &ad); if (rc) return rc; - rc = security_validate_transition(isec->sid, newsid, sid, - isec->sclass); + rc = security_validate_transition(&selinux_state, isec->sid, newsid, + sid, isec->sclass); if (rc) return rc; - return avc_has_perm(newsid, + return avc_has_perm(&selinux_state, + newsid, sbsec->sid, SECCLASS_FILESYSTEM, FILESYSTEM__ASSOCIATE, @@ -3290,7 +3380,8 @@ static void selinux_inode_post_setxattr(struct dentry *dentry, const char *name, return; } - rc = security_context_to_sid_force(value, size, &newsid); + rc = security_context_to_sid_force(&selinux_state, value, size, + &newsid); if (rc) { printk(KERN_ERR "SELinux: unable to map context to SID" "for (%s, %lu), rc=%d\n", @@ -3358,10 +3449,12 @@ static int selinux_inode_getsecurity(struct inode *inode, const char *name, void */ isec = inode_security(inode); if (has_cap_mac_admin(false)) - error = security_sid_to_context_force(isec->sid, &context, + error = security_sid_to_context_force(&selinux_state, + isec->sid, &context, &size); else - error = security_sid_to_context(isec->sid, &context, &size); + error = security_sid_to_context(&selinux_state, isec->sid, + &context, &size); if (error) return error; error = size; @@ -3391,7 +3484,8 @@ static int selinux_inode_setsecurity(struct inode *inode, const char *name, if (!value || !size) return -EACCES; - rc = security_context_to_sid(value, size, &newsid, GFP_KERNEL); + rc = security_context_to_sid(&selinux_state, value, size, &newsid, + GFP_KERNEL); if (rc) return rc; @@ -3480,7 +3574,7 @@ static int selinux_file_permission(struct file *file, int mask) isec = inode_security(inode); if (sid == fsec->sid && fsec->isid == isec->sid && - fsec->pseqno == avc_policy_seqno()) + fsec->pseqno == avc_policy_seqno(&selinux_state)) /* No change since file_open check. */ return 0; @@ -3520,7 +3614,8 @@ static int ioctl_has_perm(const struct cred *cred, struct file *file, ad.u.op->path = file->f_path; if (ssid != fsec->sid) { - rc = avc_has_perm(ssid, fsec->sid, + rc = avc_has_perm(&selinux_state, + ssid, fsec->sid, SECCLASS_FD, FD__USE, &ad); @@ -3532,8 +3627,9 @@ static int ioctl_has_perm(const struct cred *cred, struct file *file, return 0; isec = inode_security(inode); - rc = avc_has_extended_perms(ssid, isec->sid, isec->sclass, - requested, driver, xperm, &ad); + rc = avc_has_extended_perms(&selinux_state, + ssid, isec->sid, isec->sclass, + requested, driver, xperm, &ad); out: return rc; } @@ -3601,7 +3697,8 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared * private file mapping that will also be writable. * This has an additional check. */ - rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, + rc = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_PROCESS, PROCESS__EXECMEM, NULL); if (rc) goto error; @@ -3631,7 +3728,8 @@ static int selinux_mmap_addr(unsigned long addr) if (addr < CONFIG_LSM_MMAP_MIN_ADDR) { u32 sid = current_sid(); - rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT, + rc = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_MEMPROTECT, MEMPROTECT__MMAP_ZERO, NULL); } @@ -3653,7 +3751,7 @@ static int selinux_mmap_file(struct file *file, unsigned long reqprot, return rc; } - if (selinux_checkreqprot) + if (selinux_state.checkreqprot) prot = reqprot; return file_map_prot_check(file, prot, @@ -3667,7 +3765,7 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, const struct cred *cred = current_cred(); u32 sid = cred_sid(cred); - if (selinux_checkreqprot) + if (selinux_state.checkreqprot) prot = reqprot; if (default_noexec && @@ -3675,13 +3773,15 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, int rc = 0; if (vma->vm_start >= vma->vm_mm->start_brk && vma->vm_end <= vma->vm_mm->brk) { - rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, + rc = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_PROCESS, PROCESS__EXECHEAP, NULL); } else if (!vma->vm_file && ((vma->vm_start <= vma->vm_mm->start_stack && vma->vm_end >= vma->vm_mm->start_stack) || vma_is_stack_for_current(vma))) { - rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, + rc = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_PROCESS, PROCESS__EXECSTACK, NULL); } else if (vma->vm_file && vma->anon_vma) { /* @@ -3773,7 +3873,8 @@ static int selinux_file_send_sigiotask(struct task_struct *tsk, else perm = signal_to_av(signum); - return avc_has_perm(fsec->fown_sid, sid, + return avc_has_perm(&selinux_state, + fsec->fown_sid, sid, SECCLASS_PROCESS, perm, NULL); } @@ -3799,7 +3900,7 @@ static int selinux_file_open(struct file *file, const struct cred *cred) * struct as its SID. */ fsec->isid = isec->sid; - fsec->pseqno = avc_policy_seqno(); + fsec->pseqno = avc_policy_seqno(&selinux_state); /* * Since the inode label or policy seqno may have changed * between the selinux_inode_permission check and the saving @@ -3818,7 +3919,8 @@ static int selinux_task_alloc(struct task_struct *task, { u32 sid = current_sid(); - return avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__FORK, NULL); + return avc_has_perm(&selinux_state, + sid, sid, SECCLASS_PROCESS, PROCESS__FORK, NULL); } /* @@ -3892,7 +3994,8 @@ static int selinux_kernel_act_as(struct cred *new, u32 secid) u32 sid = current_sid(); int ret; - ret = avc_has_perm(sid, secid, + ret = avc_has_perm(&selinux_state, + sid, secid, SECCLASS_KERNEL_SERVICE, KERNEL_SERVICE__USE_AS_OVERRIDE, NULL); @@ -3916,7 +4019,8 @@ static int selinux_kernel_create_files_as(struct cred *new, struct inode *inode) u32 sid = current_sid(); int ret; - ret = avc_has_perm(sid, isec->sid, + ret = avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_KERNEL_SERVICE, KERNEL_SERVICE__CREATE_FILES_AS, NULL); @@ -3933,7 +4037,8 @@ static int selinux_kernel_module_request(char *kmod_name) ad.type = LSM_AUDIT_DATA_KMOD; ad.u.kmod_name = kmod_name; - return avc_has_perm(current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__MODULE_REQUEST, &ad); } @@ -3947,7 +4052,8 @@ static int selinux_kernel_module_from_file(struct file *file) /* init_module */ if (file == NULL) - return avc_has_perm(sid, sid, SECCLASS_SYSTEM, + return avc_has_perm(&selinux_state, + sid, sid, SECCLASS_SYSTEM, SYSTEM__MODULE_LOAD, NULL); /* finit_module */ @@ -3957,13 +4063,15 @@ static int selinux_kernel_module_from_file(struct file *file) fsec = file->f_security; if (sid != fsec->sid) { - rc = avc_has_perm(sid, fsec->sid, SECCLASS_FD, FD__USE, &ad); + rc = avc_has_perm(&selinux_state, + sid, fsec->sid, SECCLASS_FD, FD__USE, &ad); if (rc) return rc; } isec = inode_security(file_inode(file)); - return avc_has_perm(sid, isec->sid, SECCLASS_SYSTEM, + return avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_SYSTEM, SYSTEM__MODULE_LOAD, &ad); } @@ -3985,19 +4093,22 @@ static int selinux_kernel_read_file(struct file *file, static int selinux_task_setpgid(struct task_struct *p, pid_t pgid) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__SETPGID, NULL); } static int selinux_task_getpgid(struct task_struct *p) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__GETPGID, NULL); } static int selinux_task_getsid(struct task_struct *p) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__GETSESSION, NULL); } @@ -4008,19 +4119,22 @@ static void selinux_task_getsecid(struct task_struct *p, u32 *secid) static int selinux_task_setnice(struct task_struct *p, int nice) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__SETSCHED, NULL); } static int selinux_task_setioprio(struct task_struct *p, int ioprio) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__SETSCHED, NULL); } static int selinux_task_getioprio(struct task_struct *p) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__GETSCHED, NULL); } @@ -4035,7 +4149,8 @@ int selinux_task_prlimit(const struct cred *cred, const struct cred *tcred, av |= PROCESS__SETRLIMIT; if (flags & LSM_PRLIMIT_READ) av |= PROCESS__GETRLIMIT; - return avc_has_perm(cred_sid(cred), cred_sid(tcred), + return avc_has_perm(&selinux_state, + cred_sid(cred), cred_sid(tcred), SECCLASS_PROCESS, av, NULL); } @@ -4049,7 +4164,8 @@ static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource, later be used as a safe reset point for the soft limit upon context transitions. See selinux_bprm_committing_creds. */ if (old_rlim->rlim_max != new_rlim->rlim_max) - return avc_has_perm(current_sid(), task_sid(p), + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__SETRLIMIT, NULL); return 0; @@ -4057,19 +4173,22 @@ static int selinux_task_setrlimit(struct task_struct *p, unsigned int resource, static int selinux_task_setscheduler(struct task_struct *p) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__SETSCHED, NULL); } static int selinux_task_getscheduler(struct task_struct *p) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__GETSCHED, NULL); } static int selinux_task_movememory(struct task_struct *p) { - return avc_has_perm(current_sid(), task_sid(p), SECCLASS_PROCESS, + return avc_has_perm(&selinux_state, + current_sid(), task_sid(p), SECCLASS_PROCESS, PROCESS__SETSCHED, NULL); } @@ -4084,7 +4203,8 @@ static int selinux_task_kill(struct task_struct *p, struct siginfo *info, perm = signal_to_av(sig); if (!secid) secid = current_sid(); - return avc_has_perm(secid, task_sid(p), SECCLASS_PROCESS, perm, NULL); + return avc_has_perm(&selinux_state, + secid, task_sid(p), SECCLASS_PROCESS, perm, NULL); } static void selinux_task_to_inode(struct task_struct *p, @@ -4325,7 +4445,8 @@ static int selinux_skb_peerlbl_sid(struct sk_buff *skb, u16 family, u32 *sid) if (unlikely(err)) return -EACCES; - err = security_net_peersid_resolve(nlbl_sid, nlbl_type, xfrm_sid, sid); + err = security_net_peersid_resolve(&selinux_state, nlbl_sid, + nlbl_type, xfrm_sid, sid); if (unlikely(err)) { printk(KERN_WARNING "SELinux: failure in selinux_skb_peerlbl_sid()," @@ -4353,7 +4474,8 @@ static int selinux_conn_sid(u32 sk_sid, u32 skb_sid, u32 *conn_sid) int err = 0; if (skb_sid != SECSID_NULL) - err = security_sid_mls_copy(sk_sid, skb_sid, conn_sid); + err = security_sid_mls_copy(&selinux_state, sk_sid, skb_sid, + conn_sid); else *conn_sid = sk_sid; @@ -4370,8 +4492,8 @@ static int socket_sockcreate_sid(const struct task_security_struct *tsec, return 0; } - return security_transition_sid(tsec->sid, tsec->sid, secclass, NULL, - socksid); + return security_transition_sid(&selinux_state, tsec->sid, tsec->sid, + secclass, NULL, socksid); } static int sock_has_perm(struct sock *sk, u32 perms) @@ -4387,7 +4509,8 @@ static int sock_has_perm(struct sock *sk, u32 perms) ad.u.net = &net; ad.u.net->sk = sk; - return avc_has_perm(current_sid(), sksec->sid, sksec->sclass, perms, + return avc_has_perm(&selinux_state, + current_sid(), sksec->sid, sksec->sclass, perms, &ad); } @@ -4407,7 +4530,8 @@ static int selinux_socket_create(int family, int type, if (rc) return rc; - return avc_has_perm(tsec->sid, newsid, secclass, SOCKET__CREATE, NULL); + return avc_has_perm(&selinux_state, + tsec->sid, newsid, secclass, SOCKET__CREATE, NULL); } static int selinux_socket_post_create(struct socket *sock, int family, @@ -4503,7 +4627,8 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in ad.u.net = &net; ad.u.net->sport = htons(snum); ad.u.net->family = family; - err = avc_has_perm(sksec->sid, sid, + err = avc_has_perm(&selinux_state, + sksec->sid, sid, sksec->sclass, SOCKET__NAME_BIND, &ad); if (err) @@ -4543,7 +4668,8 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in else ad.u.net->v6info.saddr = addr6->sin6_addr; - err = avc_has_perm(sksec->sid, sid, + err = avc_has_perm(&selinux_state, + sksec->sid, sid, sksec->sclass, node_perm, &ad); if (err) goto out; @@ -4597,7 +4723,8 @@ static int selinux_socket_connect(struct socket *sock, struct sockaddr *address, ad.u.net = &net; ad.u.net->dport = htons(snum); ad.u.net->family = sk->sk_family; - err = avc_has_perm(sksec->sid, sid, sksec->sclass, perm, &ad); + err = avc_has_perm(&selinux_state, + sksec->sid, sid, sksec->sclass, perm, &ad); if (err) goto out; } @@ -4698,7 +4825,8 @@ static int selinux_socket_unix_stream_connect(struct sock *sock, ad.u.net = &net; ad.u.net->sk = other; - err = avc_has_perm(sksec_sock->sid, sksec_other->sid, + err = avc_has_perm(&selinux_state, + sksec_sock->sid, sksec_other->sid, sksec_other->sclass, UNIX_STREAM_SOCKET__CONNECTTO, &ad); if (err) @@ -4706,8 +4834,8 @@ static int selinux_socket_unix_stream_connect(struct sock *sock, /* server child socket */ sksec_new->peer_sid = sksec_sock->sid; - err = security_sid_mls_copy(sksec_other->sid, sksec_sock->sid, - &sksec_new->sid); + err = security_sid_mls_copy(&selinux_state, sksec_other->sid, + sksec_sock->sid, &sksec_new->sid); if (err) return err; @@ -4729,7 +4857,8 @@ static int selinux_socket_unix_may_send(struct socket *sock, ad.u.net = &net; ad.u.net->sk = other->sk; - return avc_has_perm(ssec->sid, osec->sid, osec->sclass, SOCKET__SENDTO, + return avc_has_perm(&selinux_state, + ssec->sid, osec->sid, osec->sclass, SOCKET__SENDTO, &ad); } @@ -4744,7 +4873,8 @@ static int selinux_inet_sys_rcv_skb(struct net *ns, int ifindex, err = sel_netif_sid(ns, ifindex, &if_sid); if (err) return err; - err = avc_has_perm(peer_sid, if_sid, + err = avc_has_perm(&selinux_state, + peer_sid, if_sid, SECCLASS_NETIF, NETIF__INGRESS, ad); if (err) return err; @@ -4752,7 +4882,8 @@ static int selinux_inet_sys_rcv_skb(struct net *ns, int ifindex, err = sel_netnode_sid(addrp, family, &node_sid); if (err) return err; - return avc_has_perm(peer_sid, node_sid, + return avc_has_perm(&selinux_state, + peer_sid, node_sid, SECCLASS_NODE, NODE__RECVFROM, ad); } @@ -4775,7 +4906,8 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, return err; if (selinux_secmark_enabled()) { - err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(&selinux_state, + sk_sid, skb->secmark, SECCLASS_PACKET, PACKET__RECV, &ad); if (err) return err; @@ -4812,7 +4944,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) * to the selinux_sock_rcv_skb_compat() function to deal with the * special handling. We do this in an attempt to keep this function * as fast and as clean as possible. */ - if (!selinux_policycap_netpeer) + if (!selinux_policycap_netpeer()) return selinux_sock_rcv_skb_compat(sk, skb, family); secmark_active = selinux_secmark_enabled(); @@ -4840,7 +4972,8 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) selinux_netlbl_err(skb, family, err, 0); return err; } - err = avc_has_perm(sk_sid, peer_sid, SECCLASS_PEER, + err = avc_has_perm(&selinux_state, + sk_sid, peer_sid, SECCLASS_PEER, PEER__RECV, &ad); if (err) { selinux_netlbl_err(skb, family, err, 0); @@ -4849,7 +4982,8 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) } if (secmark_active) { - err = avc_has_perm(sk_sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(&selinux_state, + sk_sid, skb->secmark, SECCLASS_PACKET, PACKET__RECV, &ad); if (err) return err; @@ -4873,7 +5007,8 @@ static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *op if (peer_sid == SECSID_NULL) return -ENOPROTOOPT; - err = security_sid_to_context(peer_sid, &scontext, &scontext_len); + err = security_sid_to_context(&selinux_state, peer_sid, &scontext, + &scontext_len); if (err) return err; @@ -5039,7 +5174,9 @@ static int selinux_secmark_relabel_packet(u32 sid) __tsec = current_security(); tsid = __tsec->sid; - return avc_has_perm(tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); + return avc_has_perm(&selinux_state, + tsid, sid, SECCLASS_PACKET, PACKET__RELABELTO, + NULL); } static void selinux_secmark_refcount_inc(void) @@ -5087,7 +5224,8 @@ static int selinux_tun_dev_create(void) * connections unlike traditional sockets - check the TUN driver to * get a better understanding of why this socket is special */ - return avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__CREATE, + return avc_has_perm(&selinux_state, + sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__CREATE, NULL); } @@ -5095,7 +5233,8 @@ static int selinux_tun_dev_attach_queue(void *security) { struct tun_security_struct *tunsec = security; - return avc_has_perm(current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET, + return avc_has_perm(&selinux_state, + current_sid(), tunsec->sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__ATTACH_QUEUE, NULL); } @@ -5123,11 +5262,13 @@ static int selinux_tun_dev_open(void *security) u32 sid = current_sid(); int err; - err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET, + err = avc_has_perm(&selinux_state, + sid, tunsec->sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__RELABELFROM, NULL); if (err) return err; - err = avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET, + err = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_TUN_SOCKET, TUN_SOCKET__RELABELTO, NULL); if (err) return err; @@ -5158,7 +5299,8 @@ static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) sk->sk_protocol, nlh->nlmsg_type, secclass_map[sksec->sclass - 1].name, task_pid_nr(current), current->comm); - if (!selinux_enforcing || security_get_allow_unknown()) + if (!enforcing_enabled(&selinux_state) || + security_get_allow_unknown(&selinux_state)) err = 0; } @@ -5188,7 +5330,7 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, u8 netlbl_active; u8 peerlbl_active; - if (!selinux_policycap_netpeer) + if (!selinux_policycap_netpeer()) return NF_ACCEPT; secmark_active = selinux_secmark_enabled(); @@ -5217,7 +5359,8 @@ static unsigned int selinux_ip_forward(struct sk_buff *skb, } if (secmark_active) - if (avc_has_perm(peer_sid, skb->secmark, + if (avc_has_perm(&selinux_state, + peer_sid, skb->secmark, SECCLASS_PACKET, PACKET__FORWARD_IN, &ad)) return NF_DROP; @@ -5329,7 +5472,8 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, return NF_DROP; if (selinux_secmark_enabled()) - if (avc_has_perm(sksec->sid, skb->secmark, + if (avc_has_perm(&selinux_state, + sksec->sid, skb->secmark, SECCLASS_PACKET, PACKET__SEND, &ad)) return NF_DROP_ERR(-ECONNREFUSED); @@ -5357,7 +5501,7 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, * to the selinux_ip_postroute_compat() function to deal with the * special handling. We do this in an attempt to keep this function * as fast and as clean as possible. */ - if (!selinux_policycap_netpeer) + if (!selinux_policycap_netpeer()) return selinux_ip_postroute_compat(skb, ifindex, family); secmark_active = selinux_secmark_enabled(); @@ -5452,7 +5596,8 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, return NF_DROP; if (secmark_active) - if (avc_has_perm(peer_sid, skb->secmark, + if (avc_has_perm(&selinux_state, + peer_sid, skb->secmark, SECCLASS_PACKET, secmark_perm, &ad)) return NF_DROP_ERR(-ECONNREFUSED); @@ -5462,13 +5607,15 @@ static unsigned int selinux_ip_postroute(struct sk_buff *skb, if (sel_netif_sid(dev_net(outdev), ifindex, &if_sid)) return NF_DROP; - if (avc_has_perm(peer_sid, if_sid, + if (avc_has_perm(&selinux_state, + peer_sid, if_sid, SECCLASS_NETIF, NETIF__EGRESS, &ad)) return NF_DROP_ERR(-ECONNREFUSED); if (sel_netnode_sid(addrp, family, &node_sid)) return NF_DROP; - if (avc_has_perm(peer_sid, node_sid, + if (avc_has_perm(&selinux_state, + peer_sid, node_sid, SECCLASS_NODE, NODE__SENDTO, &ad)) return NF_DROP_ERR(-ECONNREFUSED); } @@ -5556,7 +5703,8 @@ static int ipc_has_perm(struct kern_ipc_perm *ipc_perms, ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = ipc_perms->key; - return avc_has_perm(sid, isec->sid, isec->sclass, perms, &ad); + return avc_has_perm(&selinux_state, + sid, isec->sid, isec->sclass, perms, &ad); } static int selinux_msg_msg_alloc_security(struct msg_msg *msg) @@ -5586,7 +5734,8 @@ static int selinux_msg_queue_alloc_security(struct msg_queue *msq) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = msq->q_perm.key; - rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, + rc = avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_MSGQ, MSGQ__CREATE, &ad); if (rc) { ipc_free_security(&msq->q_perm); @@ -5611,7 +5760,8 @@ static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = msq->q_perm.key; - return avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, + return avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_MSGQ, MSGQ__ASSOCIATE, &ad); } @@ -5624,7 +5774,8 @@ static int selinux_msg_queue_msgctl(struct msg_queue *msq, int cmd) case IPC_INFO: case MSG_INFO: /* No specific object, just general system-wide information. */ - return avc_has_perm(current_sid(), SECINITSID_KERNEL, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL); case IPC_STAT: case MSG_STAT: @@ -5663,8 +5814,8 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, * Compute new sid based on current process and * message queue this message will be stored in */ - rc = security_transition_sid(sid, isec->sid, SECCLASS_MSG, - NULL, &msec->sid); + rc = security_transition_sid(&selinux_state, sid, isec->sid, + SECCLASS_MSG, NULL, &msec->sid); if (rc) return rc; } @@ -5673,15 +5824,18 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, ad.u.ipc_id = msq->q_perm.key; /* Can this process write to the queue? */ - rc = avc_has_perm(sid, isec->sid, SECCLASS_MSGQ, + rc = avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_MSGQ, MSGQ__WRITE, &ad); if (!rc) /* Can this process send the message */ - rc = avc_has_perm(sid, msec->sid, SECCLASS_MSG, + rc = avc_has_perm(&selinux_state, + sid, msec->sid, SECCLASS_MSG, MSG__SEND, &ad); if (!rc) /* Can the message be put in the queue? */ - rc = avc_has_perm(msec->sid, isec->sid, SECCLASS_MSGQ, + rc = avc_has_perm(&selinux_state, + msec->sid, isec->sid, SECCLASS_MSGQ, MSGQ__ENQUEUE, &ad); return rc; @@ -5703,10 +5857,12 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = msq->q_perm.key; - rc = avc_has_perm(sid, isec->sid, + rc = avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_MSGQ, MSGQ__READ, &ad); if (!rc) - rc = avc_has_perm(sid, msec->sid, + rc = avc_has_perm(&selinux_state, + sid, msec->sid, SECCLASS_MSG, MSG__RECEIVE, &ad); return rc; } @@ -5728,7 +5884,8 @@ static int selinux_shm_alloc_security(struct shmid_kernel *shp) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = shp->shm_perm.key; - rc = avc_has_perm(sid, isec->sid, SECCLASS_SHM, + rc = avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_SHM, SHM__CREATE, &ad); if (rc) { ipc_free_security(&shp->shm_perm); @@ -5753,7 +5910,8 @@ static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = shp->shm_perm.key; - return avc_has_perm(sid, isec->sid, SECCLASS_SHM, + return avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_SHM, SHM__ASSOCIATE, &ad); } @@ -5767,7 +5925,8 @@ static int selinux_shm_shmctl(struct shmid_kernel *shp, int cmd) case IPC_INFO: case SHM_INFO: /* No specific object, just general system-wide information. */ - return avc_has_perm(current_sid(), SECINITSID_KERNEL, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL); case IPC_STAT: case SHM_STAT: @@ -5821,7 +5980,8 @@ static int selinux_sem_alloc_security(struct sem_array *sma) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = sma->sem_perm.key; - rc = avc_has_perm(sid, isec->sid, SECCLASS_SEM, + rc = avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_SEM, SEM__CREATE, &ad); if (rc) { ipc_free_security(&sma->sem_perm); @@ -5846,7 +6006,8 @@ static int selinux_sem_associate(struct sem_array *sma, int semflg) ad.type = LSM_AUDIT_DATA_IPC; ad.u.ipc_id = sma->sem_perm.key; - return avc_has_perm(sid, isec->sid, SECCLASS_SEM, + return avc_has_perm(&selinux_state, + sid, isec->sid, SECCLASS_SEM, SEM__ASSOCIATE, &ad); } @@ -5860,7 +6021,8 @@ static int selinux_sem_semctl(struct sem_array *sma, int cmd) case IPC_INFO: case SEM_INFO: /* No specific object, just general system-wide information. */ - return avc_has_perm(current_sid(), SECINITSID_KERNEL, + return avc_has_perm(&selinux_state, + current_sid(), SECINITSID_KERNEL, SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL); case GETPID: case GETNCNT: @@ -5946,7 +6108,8 @@ static int selinux_getprocattr(struct task_struct *p, __tsec = __task_cred(p)->security; if (current != p) { - error = avc_has_perm(current_sid(), __tsec->sid, + error = avc_has_perm(&selinux_state, + current_sid(), __tsec->sid, SECCLASS_PROCESS, PROCESS__GETATTR, NULL); if (error) goto bad; @@ -5973,7 +6136,7 @@ static int selinux_getprocattr(struct task_struct *p, if (!sid) return 0; - error = security_sid_to_context(sid, value, &len); + error = security_sid_to_context(&selinux_state, sid, value, &len); if (error) return error; return len; @@ -5995,19 +6158,24 @@ static int selinux_setprocattr(const char *name, void *value, size_t size) * Basic control over ability to set these attributes at all. */ if (!strcmp(name, "exec")) - error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + mysid, mysid, SECCLASS_PROCESS, PROCESS__SETEXEC, NULL); else if (!strcmp(name, "fscreate")) - error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + mysid, mysid, SECCLASS_PROCESS, PROCESS__SETFSCREATE, NULL); else if (!strcmp(name, "keycreate")) - error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + mysid, mysid, SECCLASS_PROCESS, PROCESS__SETKEYCREATE, NULL); else if (!strcmp(name, "sockcreate")) - error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + mysid, mysid, SECCLASS_PROCESS, PROCESS__SETSOCKCREATE, NULL); else if (!strcmp(name, "current")) - error = avc_has_perm(mysid, mysid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + mysid, mysid, SECCLASS_PROCESS, PROCESS__SETCURRENT, NULL); else error = -EINVAL; @@ -6020,7 +6188,8 @@ static int selinux_setprocattr(const char *name, void *value, size_t size) str[size-1] = 0; size--; } - error = security_context_to_sid(value, size, &sid, GFP_KERNEL); + error = security_context_to_sid(&selinux_state, value, size, + &sid, GFP_KERNEL); if (error == -EINVAL && !strcmp(name, "fscreate")) { if (!has_cap_mac_admin(true)) { struct audit_buffer *ab; @@ -6039,8 +6208,9 @@ static int selinux_setprocattr(const char *name, void *value, size_t size) return error; } - error = security_context_to_sid_force(value, size, - &sid); + error = security_context_to_sid_force( + &selinux_state, + value, size, &sid); } if (error) return error; @@ -6062,7 +6232,8 @@ static int selinux_setprocattr(const char *name, void *value, size_t size) } else if (!strcmp(name, "fscreate")) { tsec->create_sid = sid; } else if (!strcmp(name, "keycreate")) { - error = avc_has_perm(mysid, sid, SECCLASS_KEY, KEY__CREATE, + error = avc_has_perm(&selinux_state, + mysid, sid, SECCLASS_KEY, KEY__CREATE, NULL); if (error) goto abort_change; @@ -6077,13 +6248,15 @@ static int selinux_setprocattr(const char *name, void *value, size_t size) /* Only allow single threaded processes to change context */ error = -EPERM; if (!current_is_single_threaded()) { - error = security_bounded_transition(tsec->sid, sid); + error = security_bounded_transition(&selinux_state, + tsec->sid, sid); if (error) goto abort_change; } /* Check permissions for the transition. */ - error = avc_has_perm(tsec->sid, sid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + tsec->sid, sid, SECCLASS_PROCESS, PROCESS__DYNTRANSITION, NULL); if (error) goto abort_change; @@ -6092,7 +6265,8 @@ static int selinux_setprocattr(const char *name, void *value, size_t size) Otherwise, leave SID unchanged and fail. */ ptsid = ptrace_parent_sid(); if (ptsid != 0) { - error = avc_has_perm(ptsid, sid, SECCLASS_PROCESS, + error = avc_has_perm(&selinux_state, + ptsid, sid, SECCLASS_PROCESS, PROCESS__PTRACE, NULL); if (error) goto abort_change; @@ -6119,12 +6293,14 @@ static int selinux_ismaclabel(const char *name) static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) { - return security_sid_to_context(secid, secdata, seclen); + return security_sid_to_context(&selinux_state, secid, + secdata, seclen); } static int selinux_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) { - return security_context_to_sid(secdata, seclen, secid, GFP_KERNEL); + return security_context_to_sid(&selinux_state, secdata, seclen, + secid, GFP_KERNEL); } static void selinux_release_secctx(char *secdata, u32 seclen) @@ -6219,7 +6395,8 @@ static int selinux_key_permission(key_ref_t key_ref, key = key_ref_to_ptr(key_ref); ksec = key->security; - return avc_has_perm(sid, ksec->sid, SECCLASS_KEY, perm, NULL); + return avc_has_perm(&selinux_state, + sid, ksec->sid, SECCLASS_KEY, perm, NULL); } static int selinux_key_getsecurity(struct key *key, char **_buffer) @@ -6229,7 +6406,8 @@ static int selinux_key_getsecurity(struct key *key, char **_buffer) unsigned len; int rc; - rc = security_sid_to_context(ksec->sid, &context, &len); + rc = security_sid_to_context(&selinux_state, ksec->sid, + &context, &len); if (!rc) rc = len; *_buffer = context; @@ -6254,7 +6432,8 @@ static int selinux_ib_pkey_access(void *ib_sec, u64 subnet_prefix, u16 pkey_val) ibpkey.subnet_prefix = subnet_prefix; ibpkey.pkey = pkey_val; ad.u.ibpkey = &ibpkey; - return avc_has_perm(sec->sid, sid, + return avc_has_perm(&selinux_state, + sec->sid, sid, SECCLASS_INFINIBAND_PKEY, INFINIBAND_PKEY__ACCESS, &ad); } @@ -6268,7 +6447,8 @@ static int selinux_ib_endport_manage_subnet(void *ib_sec, const char *dev_name, struct ib_security_struct *sec = ib_sec; struct lsm_ibendport_audit ibendport; - err = security_ib_endport_sid(dev_name, port_num, &sid); + err = security_ib_endport_sid(&selinux_state, dev_name, port_num, + &sid); if (err) return err; @@ -6277,7 +6457,8 @@ static int selinux_ib_endport_manage_subnet(void *ib_sec, const char *dev_name, strncpy(ibendport.dev_name, dev_name, sizeof(ibendport.dev_name)); ibendport.port = port_num; ad.u.ibendport = &ibendport; - return avc_has_perm(sec->sid, sid, + return avc_has_perm(&selinux_state, + sec->sid, sid, SECCLASS_INFINIBAND_ENDPORT, INFINIBAND_ENDPORT__MANAGE_SUBNET, &ad); } @@ -6310,11 +6491,13 @@ static int selinux_bpf(int cmd, union bpf_attr *attr, switch (cmd) { case BPF_MAP_CREATE: - ret = avc_has_perm(sid, sid, SECCLASS_BPF, BPF__MAP_CREATE, + ret = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_BPF, BPF__MAP_CREATE, NULL); break; case BPF_PROG_LOAD: - ret = avc_has_perm(sid, sid, SECCLASS_BPF, BPF__PROG_LOAD, + ret = avc_has_perm(&selinux_state, + sid, sid, SECCLASS_BPF, BPF__PROG_LOAD, NULL); break; default: @@ -6354,14 +6537,16 @@ static int bpf_fd_pass(struct file *file, u32 sid) if (file->f_op == &bpf_map_fops) { map = file->private_data; bpfsec = map->security; - ret = avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, + ret = avc_has_perm(&selinux_state, + sid, bpfsec->sid, SECCLASS_BPF, bpf_map_fmode_to_av(file->f_mode), NULL); if (ret) return ret; } else if (file->f_op == &bpf_prog_fops) { prog = file->private_data; bpfsec = prog->aux->security; - ret = avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, + ret = avc_has_perm(&selinux_state, + sid, bpfsec->sid, SECCLASS_BPF, BPF__PROG_RUN, NULL); if (ret) return ret; @@ -6375,7 +6560,8 @@ static int selinux_bpf_map(struct bpf_map *map, fmode_t fmode) struct bpf_security_struct *bpfsec; bpfsec = map->security; - return avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, + return avc_has_perm(&selinux_state, + sid, bpfsec->sid, SECCLASS_BPF, bpf_map_fmode_to_av(fmode), NULL); } @@ -6385,7 +6571,8 @@ static int selinux_bpf_prog(struct bpf_prog *prog) struct bpf_security_struct *bpfsec; bpfsec = prog->aux->security; - return avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF, + return avc_has_perm(&selinux_state, + sid, bpfsec->sid, SECCLASS_BPF, BPF__PROG_RUN, NULL); } @@ -6434,6 +6621,68 @@ static void selinux_bpf_prog_free(struct bpf_prog_aux *aux) } #endif + +#ifdef CONFIG_PERF_EVENTS +static int selinux_perf_event_open(struct perf_event_attr *attr, int type) +{ + u32 requested, sid = current_sid(); + + if (type == PERF_SECURITY_OPEN) + requested = PERF_EVENT__OPEN; + else if (type == PERF_SECURITY_CPU) + requested = PERF_EVENT__CPU; + else if (type == PERF_SECURITY_KERNEL) + requested = PERF_EVENT__KERNEL; + else if (type == PERF_SECURITY_TRACEPOINT) + requested = PERF_EVENT__TRACEPOINT; + else + return -EINVAL; + + return avc_has_perm(&selinux_state, sid, sid, SECCLASS_PERF_EVENT, + requested, NULL); +} + +static int selinux_perf_event_alloc(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec; + + perfsec = kzalloc(sizeof(*perfsec), GFP_KERNEL); + if (!perfsec) + return -ENOMEM; + + perfsec->sid = current_sid(); + event->security = perfsec; + + return 0; +} + +static void selinux_perf_event_free(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec = event->security; + + event->security = NULL; + kfree(perfsec); +} + +static int selinux_perf_event_read(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec = event->security; + u32 sid = current_sid(); + + return avc_has_perm(&selinux_state, sid, perfsec->sid, + SECCLASS_PERF_EVENT, PERF_EVENT__READ, NULL); +} + +static int selinux_perf_event_write(struct perf_event *event) +{ + struct perf_event_security_struct *perfsec = event->security; + u32 sid = current_sid(); + + return avc_has_perm(&selinux_state, sid, perfsec->sid, + SECCLASS_PERF_EVENT, PERF_EVENT__WRITE, NULL); +} +#endif + static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr), LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction), @@ -6663,6 +6912,14 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free), LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free), #endif + +#ifdef CONFIG_PERF_EVENTS + LSM_HOOK_INIT(perf_event_open, selinux_perf_event_open), + LSM_HOOK_INIT(perf_event_alloc, selinux_perf_event_alloc), + LSM_HOOK_INIT(perf_event_free, selinux_perf_event_free), + LSM_HOOK_INIT(perf_event_read, selinux_perf_event_read), + LSM_HOOK_INIT(perf_event_write, selinux_perf_event_write), +#endif }; static __init int selinux_init(void) @@ -6679,6 +6936,12 @@ static __init int selinux_init(void) printk(KERN_INFO "SELinux: Initializing.\n"); + memset(&selinux_state, 0, sizeof(selinux_state)); + enforcing_set(&selinux_state, selinux_enforcing_boot); + selinux_state.checkreqprot = selinux_checkreqprot_boot; + selinux_ss_init(&selinux_state.ss); + selinux_avc_init(&selinux_state.avc); + /* Set the security state for the initial task. */ cred_init_security(); @@ -6692,6 +6955,12 @@ static __init int selinux_init(void) 0, SLAB_PANIC, NULL); avc_init(); + avtab_cache_init(); + + ebitmap_cache_init(); + + hashtab_cache_init(); + security_add_hooks(selinux_hooks, ARRAY_SIZE(selinux_hooks), "selinux"); if (avc_add_callback(selinux_netcache_avc_callback, AVC_CALLBACK_RESET)) @@ -6700,7 +6969,7 @@ static __init int selinux_init(void) if (avc_add_callback(selinux_lsm_notifier_avc_callback, AVC_CALLBACK_RESET)) panic("SELinux: Unable to register AVC LSM notifier callback\n"); - if (selinux_enforcing) + if (selinux_enforcing_boot) printk(KERN_DEBUG "SELinux: Starting in enforcing mode\n"); else printk(KERN_DEBUG "SELinux: Starting in permissive mode\n"); @@ -6821,23 +7090,22 @@ static void selinux_nf_ip_exit(void) #endif /* CONFIG_NETFILTER */ #ifdef CONFIG_SECURITY_SELINUX_DISABLE -static int selinux_disabled; - -int selinux_disable(void) +int selinux_disable(struct selinux_state *state) { - if (ss_initialized) { + if (state->initialized) { /* Not permitted after initial policy load. */ return -EINVAL; } - if (selinux_disabled) { + if (state->disabled) { /* Only do this once. */ return -EINVAL; } + state->disabled = 1; + printk(KERN_INFO "SELinux: Disabled at runtime.\n"); - selinux_disabled = 1; selinux_enabled = 0; security_delete_hooks(selinux_hooks, ARRAY_SIZE(selinux_hooks)); diff --git a/security/selinux/ibpkey.c b/security/selinux/ibpkey.c index e3614ee5f1c0..0a4b89d48297 100644 --- a/security/selinux/ibpkey.c +++ b/security/selinux/ibpkey.c @@ -152,7 +152,8 @@ static int sel_ib_pkey_sid_slow(u64 subnet_prefix, u16 pkey_num, u32 *sid) return 0; } - ret = security_ib_pkey_sid(subnet_prefix, pkey_num, sid); + ret = security_ib_pkey_sid(&selinux_state, subnet_prefix, pkey_num, + sid); if (ret) goto out; diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h index 8c0850dbca42..a432e8bffbae 100644 --- a/security/selinux/include/avc.h +++ b/security/selinux/include/avc.h @@ -20,12 +20,6 @@ #include "av_permissions.h" #include "security.h" -#ifdef CONFIG_SECURITY_SELINUX_DEVELOP -extern int selinux_enforcing; -#else -#define selinux_enforcing 1 -#endif - /* * An entry in the AVC. */ @@ -58,6 +52,7 @@ struct selinux_audit_data { u32 audited; u32 denied; int result; + struct selinux_state *state; }; /* @@ -102,7 +97,8 @@ static inline u32 avc_audit_required(u32 requested, return audited; } -int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass, +int slow_avc_audit(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, u32 audited, u32 denied, int result, struct common_audit_data *a, unsigned flags); @@ -127,7 +123,8 @@ int slow_avc_audit(u32 ssid, u32 tsid, u16 tclass, * be performed under a lock, to allow the lock to be released * before calling the auditing code. */ -static inline int avc_audit(u32 ssid, u32 tsid, +static inline int avc_audit(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, struct av_decision *avd, int result, @@ -138,31 +135,35 @@ static inline int avc_audit(u32 ssid, u32 tsid, audited = avc_audit_required(requested, avd, result, 0, &denied); if (likely(!audited)) return 0; - return slow_avc_audit(ssid, tsid, tclass, + return slow_avc_audit(state, ssid, tsid, tclass, requested, audited, denied, result, a, flags); } #define AVC_STRICT 1 /* Ignore permissive mode. */ #define AVC_EXTENDED_PERMS 2 /* update extended permissions */ -int avc_has_perm_noaudit(u32 ssid, u32 tsid, +int avc_has_perm_noaudit(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, unsigned flags, struct av_decision *avd); -int avc_has_perm(u32 ssid, u32 tsid, +int avc_has_perm(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, struct common_audit_data *auditdata); -int avc_has_perm_flags(u32 ssid, u32 tsid, +int avc_has_perm_flags(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, struct common_audit_data *auditdata, int flags); -int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested, - u8 driver, u8 perm, struct common_audit_data *ad); +int avc_has_extended_perms(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 requested, + u8 driver, u8 perm, struct common_audit_data *ad); -u32 avc_policy_seqno(void); +u32 avc_policy_seqno(struct selinux_state *state); #define AVC_CALLBACK_GRANT 1 #define AVC_CALLBACK_TRY_REVOKE 2 @@ -177,8 +178,11 @@ u32 avc_policy_seqno(void); int avc_add_callback(int (*callback)(u32 event), u32 events); /* Exported to selinuxfs */ -int avc_get_hash_stats(char *page); -extern unsigned int avc_cache_threshold; +struct selinux_avc; +int avc_get_hash_stats(struct selinux_avc *avc, char *page); +unsigned int avc_get_cache_threshold(struct selinux_avc *avc); +void avc_set_cache_threshold(struct selinux_avc *avc, + unsigned int cache_threshold); /* Attempt to free avc node cache */ void avc_disable(void); diff --git a/security/selinux/include/avc_ss.h b/security/selinux/include/avc_ss.h index 3bcc72769b87..88c384c5c09e 100644 --- a/security/selinux/include/avc_ss.h +++ b/security/selinux/include/avc_ss.h @@ -9,7 +9,8 @@ #include "flask.h" -int avc_ss_reset(u32 seqno); +struct selinux_avc; +int avc_ss_reset(struct selinux_avc *avc, u32 seqno); /* Class/perm mapping support */ struct security_class_mapping { @@ -19,11 +20,5 @@ struct security_class_mapping { extern struct security_class_mapping secclass_map[]; -/* - * The security server must be initialized before - * any labeling or access decisions can be provided. - */ -extern int ss_initialized; - #endif /* _SELINUX_AVC_SS_H_ */ diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 5ae315ab060b..34631690b5f9 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -115,7 +115,7 @@ struct security_class_mapping secclass_map[] = { { COMMON_IPC_PERMS, NULL } }, { "netlink_route_socket", { COMMON_SOCK_PERMS, - "nlmsg_read", "nlmsg_write", NULL } }, + "nlmsg_read", "nlmsg_write", "nlmsg_readpriv", NULL } }, { "netlink_tcpdiag_socket", { COMMON_SOCK_PERMS, "nlmsg_read", "nlmsg_write", NULL } }, @@ -241,6 +241,8 @@ struct security_class_mapping secclass_map[] = { { "manage_subnet", NULL } }, { "bpf", {"map_create", "map_read", "map_write", "prog_load", "prog_run"} }, + { "perf_event", + {"open", "cpu", "kernel", "tracepoint", "read", "write"} }, { NULL } }; diff --git a/security/selinux/include/conditional.h b/security/selinux/include/conditional.h index ff4fddca9050..0e30eca02c48 100644 --- a/security/selinux/include/conditional.h +++ b/security/selinux/include/conditional.h @@ -13,10 +13,15 @@ #ifndef _SELINUX_CONDITIONAL_H_ #define _SELINUX_CONDITIONAL_H_ -int security_get_bools(int *len, char ***names, int **values); +#include "security.h" -int security_set_bools(int len, int *values); +int security_get_bools(struct selinux_state *state, + int *len, char ***names, int **values); -int security_get_bool_value(int index); +int security_set_bools(struct selinux_state *state, + int len, int *values); + +int security_get_bool_value(struct selinux_state *state, + int index); #endif diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 3d54468ce334..512908b55ca3 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -151,9 +151,11 @@ struct pkey_security_struct { }; struct bpf_security_struct { - u32 sid; /*SID of bpf obj creater*/ + u32 sid; /* SID of bpf obj creator */ }; -extern unsigned int selinux_checkreqprot; +struct perf_event_security_struct { + u32 sid; /* SID of perf_event obj creator */ +}; #endif /* _SELINUX_OBJSEC_H_ */ diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 02f0412d42f2..f64e33b23cd9 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "flask.h" #define SECSID_NULL 0x00000000 /* unspecified SID */ @@ -79,14 +81,7 @@ enum { }; #define POLICYDB_CAPABILITY_MAX (__POLICYDB_CAPABILITY_MAX - 1) -extern char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX]; - -extern int selinux_policycap_netpeer; -extern int selinux_policycap_openperm; -extern int selinux_policycap_extsockclass; -extern int selinux_policycap_alwaysnetwork; -extern int selinux_policycap_cgroupseclabel; -extern int selinux_policycap_nnp_nosuid_transition; +extern const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX]; /* * type_datum properties @@ -98,13 +93,106 @@ extern int selinux_policycap_nnp_nosuid_transition; /* limitation of boundary depth */ #define POLICYDB_BOUNDS_MAXDEPTH 4 -int security_mls_enabled(void); +struct selinux_avc; +struct selinux_ss; -int security_load_policy(void *data, size_t len); -int security_read_policy(void **data, size_t *len); -size_t security_policydb_len(void); +struct selinux_state { + bool disabled; +#ifdef CONFIG_SECURITY_SELINUX_DEVELOP + bool enforcing; +#endif + bool checkreqprot; + bool initialized; + bool policycap[__POLICYDB_CAPABILITY_MAX]; + bool android_netlink_route; + struct selinux_avc *avc; + struct selinux_ss *ss; +}; -int security_policycap_supported(unsigned int req_cap); +void selinux_ss_init(struct selinux_ss **ss); +void selinux_avc_init(struct selinux_avc **avc); + +extern struct selinux_state selinux_state; + +#ifdef CONFIG_SECURITY_SELINUX_DEVELOP +static inline bool enforcing_enabled(struct selinux_state *state) +{ + return state->enforcing; +} + +static inline void enforcing_set(struct selinux_state *state, bool value) +{ + state->enforcing = value; +} +#else +static inline bool enforcing_enabled(struct selinux_state *state) +{ + return true; +} + +static inline void enforcing_set(struct selinux_state *state, bool value) +{ +} +#endif + +static inline bool selinux_policycap_netpeer(void) +{ + struct selinux_state *state = &selinux_state; + + return state->policycap[POLICYDB_CAPABILITY_NETPEER]; +} + +static inline bool selinux_policycap_openperm(void) +{ + struct selinux_state *state = &selinux_state; + + return state->policycap[POLICYDB_CAPABILITY_OPENPERM]; +} + +static inline bool selinux_policycap_extsockclass(void) +{ + struct selinux_state *state = &selinux_state; + + return state->policycap[POLICYDB_CAPABILITY_EXTSOCKCLASS]; +} + +static inline bool selinux_policycap_alwaysnetwork(void) +{ + struct selinux_state *state = &selinux_state; + + return state->policycap[POLICYDB_CAPABILITY_ALWAYSNETWORK]; +} + +static inline bool selinux_policycap_cgroupseclabel(void) +{ + struct selinux_state *state = &selinux_state; + + return state->policycap[POLICYDB_CAPABILITY_CGROUPSECLABEL]; +} + +static inline bool selinux_policycap_nnp_nosuid_transition(void) +{ + struct selinux_state *state = &selinux_state; + + return state->policycap[POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION]; +} + +static inline bool selinux_android_nlroute_getlink(void) +{ + struct selinux_state *state = &selinux_state; + + return state->android_netlink_route; +} + +int security_mls_enabled(struct selinux_state *state); +int security_load_policy(struct selinux_state *state, + void *data, size_t len); +int security_read_policy(struct selinux_state *state, + void **data, size_t *len); +size_t security_policydb_len(struct selinux_state *state); + +int security_policycap_supported(struct selinux_state *state, + unsigned int req_cap); #define SEL_VEC_MAX 32 struct av_decision { @@ -141,76 +229,100 @@ struct extended_perms { /* definitions of av_decision.flags */ #define AVD_FLAGS_PERMISSIVE 0x0001 -void security_compute_av(u32 ssid, u32 tsid, +void security_compute_av(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd, struct extended_perms *xperms); -void security_compute_xperms_decision(u32 ssid, u32 tsid, u16 tclass, - u8 driver, struct extended_perms_decision *xpermd); +void security_compute_xperms_decision(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, + u8 driver, + struct extended_perms_decision *xpermd); -void security_compute_av_user(u32 ssid, u32 tsid, - u16 tclass, struct av_decision *avd); +void security_compute_av_user(struct selinux_state *state, + u32 ssid, u32 tsid, + u16 tclass, struct av_decision *avd); -int security_transition_sid(u32 ssid, u32 tsid, u16 tclass, +int security_transition_sid(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, const struct qstr *qstr, u32 *out_sid); -int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, +int security_transition_sid_user(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, const char *objname, u32 *out_sid); -int security_member_sid(u32 ssid, u32 tsid, - u16 tclass, u32 *out_sid); +int security_member_sid(struct selinux_state *state, u32 ssid, u32 tsid, + u16 tclass, u32 *out_sid); -int security_change_sid(u32 ssid, u32 tsid, - u16 tclass, u32 *out_sid); +int security_change_sid(struct selinux_state *state, u32 ssid, u32 tsid, + u16 tclass, u32 *out_sid); -int security_sid_to_context(u32 sid, char **scontext, - u32 *scontext_len); +int security_sid_to_context(struct selinux_state *state, u32 sid, + char **scontext, u32 *scontext_len); -int security_sid_to_context_force(u32 sid, char **scontext, u32 *scontext_len); +int security_sid_to_context_force(struct selinux_state *state, + u32 sid, char **scontext, u32 *scontext_len); -int security_context_to_sid(const char *scontext, u32 scontext_len, +int security_context_to_sid(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *out_sid, gfp_t gfp); -int security_context_str_to_sid(const char *scontext, u32 *out_sid, gfp_t gfp); +int security_context_str_to_sid(struct selinux_state *state, + const char *scontext, u32 *out_sid, gfp_t gfp); -int security_context_to_sid_default(const char *scontext, u32 scontext_len, +int security_context_to_sid_default(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *out_sid, u32 def_sid, gfp_t gfp_flags); -int security_context_to_sid_force(const char *scontext, u32 scontext_len, +int security_context_to_sid_force(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *sid); -int security_get_user_sids(u32 callsid, char *username, +int security_get_user_sids(struct selinux_state *state, + u32 callsid, char *username, u32 **sids, u32 *nel); -int security_port_sid(u8 protocol, u16 port, u32 *out_sid); +int security_port_sid(struct selinux_state *state, + u8 protocol, u16 port, u32 *out_sid); -int security_ib_pkey_sid(u64 subnet_prefix, u16 pkey_num, u32 *out_sid); +int security_ib_pkey_sid(struct selinux_state *state, + u64 subnet_prefix, u16 pkey_num, u32 *out_sid); -int security_ib_endport_sid(const char *dev_name, u8 port_num, u32 *out_sid); +int security_ib_endport_sid(struct selinux_state *state, + const char *dev_name, u8 port_num, u32 *out_sid); -int security_netif_sid(char *name, u32 *if_sid); +int security_netif_sid(struct selinux_state *state, + char *name, u32 *if_sid); -int security_node_sid(u16 domain, void *addr, u32 addrlen, - u32 *out_sid); +int security_node_sid(struct selinux_state *state, + u16 domain, void *addr, u32 addrlen, + u32 *out_sid); -int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, +int security_validate_transition(struct selinux_state *state, + u32 oldsid, u32 newsid, u32 tasksid, u16 tclass); -int security_validate_transition_user(u32 oldsid, u32 newsid, u32 tasksid, +int security_validate_transition_user(struct selinux_state *state, + u32 oldsid, u32 newsid, u32 tasksid, u16 tclass); -int security_bounded_transition(u32 oldsid, u32 newsid); +int security_bounded_transition(struct selinux_state *state, + u32 oldsid, u32 newsid); -int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid); +int security_sid_mls_copy(struct selinux_state *state, + u32 sid, u32 mls_sid, u32 *new_sid); -int security_net_peersid_resolve(u32 nlbl_sid, u32 nlbl_type, +int security_net_peersid_resolve(struct selinux_state *state, + u32 nlbl_sid, u32 nlbl_type, u32 xfrm_sid, u32 *peer_sid); -int security_get_classes(char ***classes, int *nclasses); -int security_get_permissions(char *class, char ***perms, int *nperms); -int security_get_reject_unknown(void); -int security_get_allow_unknown(void); +int security_get_classes(struct selinux_state *state, + char ***classes, int *nclasses); +int security_get_permissions(struct selinux_state *state, + char *class, char ***perms, int *nperms); +int security_get_reject_unknown(struct selinux_state *state); +int security_get_allow_unknown(struct selinux_state *state); #define SECURITY_FS_USE_XATTR 1 /* use xattr */ #define SECURITY_FS_USE_TRANS 2 /* use transition SIDs, e.g. devpts/tmpfs */ @@ -221,27 +333,31 @@ int security_get_allow_unknown(void); #define SECURITY_FS_USE_NATIVE 7 /* use native label support */ #define SECURITY_FS_USE_MAX 7 /* Highest SECURITY_FS_USE_XXX */ -int security_fs_use(struct super_block *sb); +int security_fs_use(struct selinux_state *state, struct super_block *sb); -int security_genfs_sid(const char *fstype, char *name, u16 sclass, - u32 *sid); +int security_genfs_sid(struct selinux_state *state, + const char *fstype, char *name, u16 sclass, + u32 *sid); #ifdef CONFIG_NETLABEL -int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, +int security_netlbl_secattr_to_sid(struct selinux_state *state, + struct netlbl_lsm_secattr *secattr, u32 *sid); -int security_netlbl_sid_to_secattr(u32 sid, +int security_netlbl_sid_to_secattr(struct selinux_state *state, + u32 sid, struct netlbl_lsm_secattr *secattr); #else -static inline int security_netlbl_secattr_to_sid( +static inline int security_netlbl_secattr_to_sid(struct selinux_state *state, struct netlbl_lsm_secattr *secattr, u32 *sid) { return -EIDRM; } -static inline int security_netlbl_sid_to_secattr(u32 sid, - struct netlbl_lsm_secattr *secattr) +static inline int security_netlbl_sid_to_secattr(struct selinux_state *state, + u32 sid, + struct netlbl_lsm_secattr *secattr) { return -ENOENT; } @@ -252,7 +368,7 @@ const char *security_get_initial_sid_context(u32 sid); /* * status notifier using mmap interface */ -extern struct page *selinux_kernel_status_page(void); +extern struct page *selinux_kernel_status_page(struct selinux_state *state); #define SELINUX_KERNEL_STATUS_VERSION 1 struct selinux_kernel_status { @@ -266,10 +382,12 @@ struct selinux_kernel_status { */ } __packed; -extern void selinux_status_update_setenforce(int enforcing); -extern void selinux_status_update_policyload(int seqno); +extern void selinux_status_update_setenforce(struct selinux_state *state, + int enforcing); +extern void selinux_status_update_policyload(struct selinux_state *state, + int seqno); extern void selinux_complete_init(void); -extern int selinux_disable(void); +extern int selinux_disable(struct selinux_state *state); extern void exit_sel_fs(void); extern struct path selinux_null; extern struct vfsmount *selinuxfs_mount; @@ -277,5 +395,10 @@ extern void selnl_notify_setenforce(int val); extern void selnl_notify_policyload(u32 seqno); extern int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm); -#endif /* _SELINUX_SECURITY_H_ */ +extern void avtab_cache_init(void); +extern void ebitmap_cache_init(void); +extern void hashtab_cache_init(void); +extern void selinux_nlmsg_init(void); +extern int security_sidtab_hash_stats(struct selinux_state *state, char *page); +#endif /* _SELINUX_SECURITY_H_ */ diff --git a/security/selinux/netif.c b/security/selinux/netif.c index e607b4473ef6..ac65f7417413 100644 --- a/security/selinux/netif.c +++ b/security/selinux/netif.c @@ -163,7 +163,7 @@ static int sel_netif_sid_slow(struct net *ns, int ifindex, u32 *sid) ret = -ENOMEM; goto out; } - ret = security_netif_sid(dev->name, &new->nsec.sid); + ret = security_netif_sid(&selinux_state, dev->name, &new->nsec.sid); if (ret != 0) goto out; new->nsec.ns = ns; diff --git a/security/selinux/netlabel.c b/security/selinux/netlabel.c index aaba6677ee2e..c99884a48fc4 100644 --- a/security/selinux/netlabel.c +++ b/security/selinux/netlabel.c @@ -60,7 +60,7 @@ static int selinux_netlbl_sidlookup_cached(struct sk_buff *skb, { int rc; - rc = security_netlbl_secattr_to_sid(secattr, sid); + rc = security_netlbl_secattr_to_sid(&selinux_state, secattr, sid); if (rc == 0 && (secattr->flags & NETLBL_SECATTR_CACHEABLE) && (secattr->flags & NETLBL_SECATTR_CACHE)) @@ -91,7 +91,8 @@ static struct netlbl_lsm_secattr *selinux_netlbl_sock_genattr(struct sock *sk) secattr = netlbl_secattr_alloc(GFP_ATOMIC); if (secattr == NULL) return NULL; - rc = security_netlbl_sid_to_secattr(sksec->sid, secattr); + rc = security_netlbl_sid_to_secattr(&selinux_state, sksec->sid, + secattr); if (rc != 0) { netlbl_secattr_free(secattr); return NULL; @@ -257,7 +258,8 @@ int selinux_netlbl_skbuff_setsid(struct sk_buff *skb, if (secattr == NULL) { secattr = &secattr_storage; netlbl_secattr_init(secattr); - rc = security_netlbl_sid_to_secattr(sid, secattr); + rc = security_netlbl_sid_to_secattr(&selinux_state, sid, + secattr); if (rc != 0) goto skbuff_setsid_return; } @@ -290,7 +292,8 @@ int selinux_netlbl_inet_conn_request(struct request_sock *req, u16 family) return 0; netlbl_secattr_init(&secattr); - rc = security_netlbl_sid_to_secattr(req->secid, &secattr); + rc = security_netlbl_sid_to_secattr(&selinux_state, req->secid, + &secattr); if (rc != 0) goto inet_conn_request_return; rc = netlbl_req_setattr(req, &secattr); @@ -403,7 +406,8 @@ int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, perm = RAWIP_SOCKET__RECVFROM; } - rc = avc_has_perm(sksec->sid, nlbl_sid, sksec->sclass, perm, ad); + rc = avc_has_perm(&selinux_state, + sksec->sid, nlbl_sid, sksec->sclass, perm, ad); if (rc == 0) return 0; diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c index da923f89d2a9..6dd89b89bc1f 100644 --- a/security/selinux/netnode.c +++ b/security/selinux/netnode.c @@ -215,12 +215,12 @@ static int sel_netnode_sid_slow(void *addr, u16 family, u32 *sid) goto out; switch (family) { case PF_INET: - ret = security_node_sid(PF_INET, + ret = security_node_sid(&selinux_state, PF_INET, addr, sizeof(struct in_addr), sid); new->nsec.addr.ipv4 = *(__be32 *)addr; break; case PF_INET6: - ret = security_node_sid(PF_INET6, + ret = security_node_sid(&selinux_state, PF_INET6, addr, sizeof(struct in6_addr), sid); new->nsec.addr.ipv6 = *(struct in6_addr *)addr; break; diff --git a/security/selinux/netport.c b/security/selinux/netport.c index 3311cc393cb4..9ed4c5064a5e 100644 --- a/security/selinux/netport.c +++ b/security/selinux/netport.c @@ -161,7 +161,7 @@ static int sel_netport_sid_slow(u8 protocol, u16 pnum, u32 *sid) new = kzalloc(sizeof(*new), GFP_ATOMIC); if (new == NULL) goto out; - ret = security_port_sid(protocol, pnum, sid); + ret = security_port_sid(&selinux_state, protocol, pnum, sid); if (ret != 0) goto out; diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index 7b7433a1a34c..6a93edf01cfb 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -28,7 +28,7 @@ struct nlmsg_perm { u32 perm; }; -static const struct nlmsg_perm nlmsg_route_perms[] = +static struct nlmsg_perm nlmsg_route_perms[] = { { RTM_NEWLINK, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, { RTM_DELLINK, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, @@ -195,3 +195,27 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm) return err; } + +static void nlmsg_set_getlink_perm(u32 perm) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(nlmsg_route_perms); i++) { + if (nlmsg_route_perms[i].nlmsg_type == RTM_GETLINK) { + nlmsg_route_perms[i].perm = perm; + break; + } + } +} + +/** + * Use nlmsg_readpriv as the permission for RTM_GETLINK messages if the + * netlink_route_getlink policy capability is set. Otherwise use nlmsg_read. + */ +void selinux_nlmsg_init(void) +{ + if (selinux_android_nlroute_getlink()) + nlmsg_set_getlink_perm(NETLINK_ROUTE_SOCKET__NLMSG_READPRIV); + else + nlmsg_set_getlink_perm(NETLINK_ROUTE_SOCKET__NLMSG_READ); +} diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 00eed842c491..dba17ea51ec0 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -41,34 +42,6 @@ #include "objsec.h" #include "conditional.h" -unsigned int selinux_checkreqprot = CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE; - -static int __init checkreqprot_setup(char *str) -{ - unsigned long checkreqprot; - if (!kstrtoul(str, 0, &checkreqprot)) - selinux_checkreqprot = checkreqprot ? 1 : 0; - return 1; -} -__setup("checkreqprot=", checkreqprot_setup); - -static DEFINE_MUTEX(sel_mutex); - -/* global data for booleans */ -static struct dentry *bool_dir; -static int bool_num; -static char **bool_pending_names; -static int *bool_pending_values; - -/* global data for classes */ -static struct dentry *class_dir; -static unsigned long last_class_ino; - -static char policy_opened; - -/* global data for policy capabilities */ -static struct dentry *policycap_dir; - enum sel_inos { SEL_ROOT_INO = 2, SEL_LOAD, /* load policy */ @@ -93,7 +66,51 @@ enum sel_inos { SEL_INO_NEXT, /* The next inode number to use */ }; -static unsigned long sel_last_ino = SEL_INO_NEXT - 1; +struct selinux_fs_info { + struct dentry *bool_dir; + unsigned int bool_num; + char **bool_pending_names; + unsigned int *bool_pending_values; + struct dentry *class_dir; + unsigned long last_class_ino; + bool policy_opened; + struct dentry *policycap_dir; + struct mutex mutex; + unsigned long last_ino; + struct selinux_state *state; + struct super_block *sb; +}; + +static int selinux_fs_info_create(struct super_block *sb) +{ + struct selinux_fs_info *fsi; + + fsi = kzalloc(sizeof(*fsi), GFP_KERNEL); + if (!fsi) + return -ENOMEM; + + mutex_init(&fsi->mutex); + fsi->last_ino = SEL_INO_NEXT - 1; + fsi->state = &selinux_state; + fsi->sb = sb; + sb->s_fs_info = fsi; + return 0; +} + +static void selinux_fs_info_free(struct super_block *sb) +{ + struct selinux_fs_info *fsi = sb->s_fs_info; + int i; + + if (fsi) { + for (i = 0; i < fsi->bool_num; i++) + kfree(fsi->bool_pending_names[i]); + kfree(fsi->bool_pending_names); + kfree(fsi->bool_pending_values); + } + kfree(sb->s_fs_info); + sb->s_fs_info = NULL; +} #define SEL_INITCON_INO_OFFSET 0x01000000 #define SEL_BOOL_INO_OFFSET 0x02000000 @@ -105,10 +122,12 @@ static unsigned long sel_last_ino = SEL_INO_NEXT - 1; static ssize_t sel_read_enforce(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; char tmpbuf[TMPBUFLEN]; ssize_t length; - length = scnprintf(tmpbuf, TMPBUFLEN, "%d", selinux_enforcing); + length = scnprintf(tmpbuf, TMPBUFLEN, "%d", + enforcing_enabled(fsi->state)); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -117,9 +136,11 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *page = NULL; ssize_t length; - int new_value; + int old_value, new_value; if (count >= PAGE_SIZE) return -ENOMEM; @@ -138,23 +159,25 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, new_value = !!new_value; - if (new_value != selinux_enforcing) { - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + old_value = enforcing_enabled(state); + if (new_value != old_value) { + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETENFORCE, NULL); if (length) goto out; audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_STATUS, "enforcing=%d old_enforcing=%d auid=%u ses=%u", - new_value, selinux_enforcing, + new_value, old_value, from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); - selinux_enforcing = new_value; - if (selinux_enforcing) - avc_ss_reset(0); - selnl_notify_setenforce(selinux_enforcing); - selinux_status_update_setenforce(selinux_enforcing); - if (!selinux_enforcing) + enforcing_set(state, new_value); + if (new_value) + avc_ss_reset(state->avc, 0); + selnl_notify_setenforce(new_value); + selinux_status_update_setenforce(state, new_value); + if (!new_value) call_lsm_notifier(LSM_POLICY_CHANGE, NULL); } length = count; @@ -175,11 +198,14 @@ static const struct file_operations sel_enforce_ops = { static ssize_t sel_read_handle_unknown(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char tmpbuf[TMPBUFLEN]; ssize_t length; ino_t ino = file_inode(filp)->i_ino; int handle_unknown = (ino == SEL_REJECT_UNKNOWN) ? - security_get_reject_unknown() : !security_get_allow_unknown(); + security_get_reject_unknown(state) : + !security_get_allow_unknown(state); length = scnprintf(tmpbuf, TMPBUFLEN, "%d", handle_unknown); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); @@ -192,7 +218,8 @@ static const struct file_operations sel_handle_unknown_ops = { static int sel_open_handle_status(struct inode *inode, struct file *filp) { - struct page *status = selinux_kernel_status_page(); + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; + struct page *status = selinux_kernel_status_page(fsi->state); if (!status) return -ENOMEM; @@ -248,6 +275,7 @@ static ssize_t sel_write_disable(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; char *page; ssize_t length; int new_value; @@ -268,7 +296,7 @@ static ssize_t sel_write_disable(struct file *file, const char __user *buf, goto out; if (new_value) { - length = selinux_disable(); + length = selinux_disable(fsi->state); if (length) goto out; audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_STATUS, @@ -307,9 +335,9 @@ static const struct file_operations sel_policyvers_ops = { }; /* declaration for sel_write_load */ -static int sel_make_bools(void); -static int sel_make_classes(void); -static int sel_make_policycap(void); +static int sel_make_bools(struct selinux_fs_info *fsi); +static int sel_make_classes(struct selinux_fs_info *fsi); +static int sel_make_policycap(struct selinux_fs_info *fsi); /* declaration for sel_make_class_dirs */ static struct dentry *sel_make_dir(struct dentry *dir, const char *name, @@ -318,11 +346,12 @@ static struct dentry *sel_make_dir(struct dentry *dir, const char *name, static ssize_t sel_read_mls(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; char tmpbuf[TMPBUFLEN]; ssize_t length; length = scnprintf(tmpbuf, TMPBUFLEN, "%d", - security_mls_enabled()); + security_mls_enabled(fsi->state)); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -338,20 +367,23 @@ struct policy_load_memory { static int sel_open_policy(struct inode *inode, struct file *filp) { + struct selinux_fs_info *fsi = inode->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; struct policy_load_memory *plm = NULL; int rc; BUG_ON(filp->private_data); - mutex_lock(&sel_mutex); + mutex_lock(&fsi->mutex); - rc = avc_has_perm(current_sid(), SECINITSID_SECURITY, + rc = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL); if (rc) goto err; rc = -EBUSY; - if (policy_opened) + if (fsi->policy_opened) goto err; rc = -ENOMEM; @@ -359,25 +391,25 @@ static int sel_open_policy(struct inode *inode, struct file *filp) if (!plm) goto err; - if (i_size_read(inode) != security_policydb_len()) { + if (i_size_read(inode) != security_policydb_len(state)) { inode_lock(inode); - i_size_write(inode, security_policydb_len()); + i_size_write(inode, security_policydb_len(state)); inode_unlock(inode); } - rc = security_read_policy(&plm->data, &plm->len); + rc = security_read_policy(state, &plm->data, &plm->len); if (rc) goto err; - policy_opened = 1; + fsi->policy_opened = 1; filp->private_data = plm; - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); return 0; err: - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); if (plm) vfree(plm->data); @@ -387,11 +419,12 @@ err: static int sel_release_policy(struct inode *inode, struct file *filp) { + struct selinux_fs_info *fsi = inode->i_sb->s_fs_info; struct policy_load_memory *plm = filp->private_data; BUG_ON(!plm); - policy_opened = 0; + fsi->policy_opened = 0; vfree(plm->data); kfree(plm); @@ -402,19 +435,21 @@ static int sel_release_policy(struct inode *inode, struct file *filp) static ssize_t sel_read_policy(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; struct policy_load_memory *plm = filp->private_data; int ret; - mutex_lock(&sel_mutex); + mutex_lock(&fsi->mutex); - ret = avc_has_perm(current_sid(), SECINITSID_SECURITY, + ret = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__READ_POLICY, NULL); if (ret) goto out; ret = simple_read_from_buffer(buf, count, ppos, plm->data, plm->len); out: - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); return ret; } @@ -468,16 +503,43 @@ static const struct file_operations sel_policy_ops = { .llseek = generic_file_llseek, }; +static int sel_make_policy_nodes(struct selinux_fs_info *fsi) +{ + int ret; + + ret = sel_make_bools(fsi); + if (ret) { + pr_err("SELinux: failed to load policy booleans\n"); + return ret; + } + + ret = sel_make_classes(fsi); + if (ret) { + pr_err("SELinux: failed to load policy classes\n"); + return ret; + } + + ret = sel_make_policycap(fsi); + if (ret) { + pr_err("SELinux: failed to load policy capabilities\n"); + return ret; + } + + return 0; +} + static ssize_t sel_write_load(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; ssize_t length; void *data = NULL; - mutex_lock(&sel_mutex); + mutex_lock(&fsi->mutex); - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__LOAD_POLICY, NULL); if (length) goto out; @@ -500,29 +562,15 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, if (copy_from_user(data, buf, count) != 0) goto out; - length = security_load_policy(data, count); + length = security_load_policy(fsi->state, data, count); if (length) { pr_warn_ratelimited("SELinux: failed to load policy\n"); goto out; } - length = sel_make_bools(); - if (length) { - pr_err("SELinux: failed to load policy booleans\n"); + length = sel_make_policy_nodes(fsi); + if (length) goto out1; - } - - length = sel_make_classes(); - if (length) { - pr_err("SELinux: failed to load policy classes\n"); - goto out1; - } - - length = sel_make_policycap(); - if (length) { - pr_err("SELinux: failed to load policy capabilities\n"); - goto out1; - } length = count; @@ -532,7 +580,7 @@ out1: from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); out: - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); vfree(data); return length; } @@ -544,20 +592,23 @@ static const struct file_operations sel_load_ops = { static ssize_t sel_write_context(struct file *file, char *buf, size_t size) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *canon = NULL; u32 sid, len; ssize_t length; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__CHECK_CONTEXT, NULL); if (length) goto out; - length = security_context_to_sid(buf, size, &sid, GFP_KERNEL); + length = security_context_to_sid(state, buf, size, &sid, GFP_KERNEL); if (length) goto out; - length = security_sid_to_context(sid, &canon, &len); + length = security_sid_to_context(state, sid, &canon, &len); if (length) goto out; @@ -578,21 +629,24 @@ out: static ssize_t sel_read_checkreqprot(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; char tmpbuf[TMPBUFLEN]; ssize_t length; - length = scnprintf(tmpbuf, TMPBUFLEN, "%u", selinux_checkreqprot); + length = scnprintf(tmpbuf, TMPBUFLEN, "%u", fsi->state->checkreqprot); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; char *page; ssize_t length; unsigned int new_value; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETCHECKREQPROT, NULL); if (length) @@ -613,7 +667,7 @@ static ssize_t sel_write_checkreqprot(struct file *file, const char __user *buf, if (sscanf(page, "%u", &new_value) != 1) goto out; - selinux_checkreqprot = new_value ? 1 : 0; + fsi->state->checkreqprot = new_value ? 1 : 0; length = count; out: kfree(page); @@ -629,13 +683,16 @@ static ssize_t sel_write_validatetrans(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *oldcon = NULL, *newcon = NULL, *taskcon = NULL; char *req = NULL; u32 osid, nsid, tsid; u16 tclass; int rc; - rc = avc_has_perm(current_sid(), SECINITSID_SECURITY, + rc = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__VALIDATE_TRANS, NULL); if (rc) goto out; @@ -673,19 +730,19 @@ static ssize_t sel_write_validatetrans(struct file *file, if (sscanf(req, "%s %s %hu %s", oldcon, newcon, &tclass, taskcon) != 4) goto out; - rc = security_context_str_to_sid(oldcon, &osid, GFP_KERNEL); + rc = security_context_str_to_sid(state, oldcon, &osid, GFP_KERNEL); if (rc) goto out; - rc = security_context_str_to_sid(newcon, &nsid, GFP_KERNEL); + rc = security_context_str_to_sid(state, newcon, &nsid, GFP_KERNEL); if (rc) goto out; - rc = security_context_str_to_sid(taskcon, &tsid, GFP_KERNEL); + rc = security_context_str_to_sid(state, taskcon, &tsid, GFP_KERNEL); if (rc) goto out; - rc = security_validate_transition_user(osid, nsid, tsid, tclass); + rc = security_validate_transition_user(state, osid, nsid, tsid, tclass); if (!rc) rc = count; out: @@ -755,13 +812,16 @@ static const struct file_operations transaction_ops = { static ssize_t sel_write_access(struct file *file, char *buf, size_t size) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *scon = NULL, *tcon = NULL; u32 ssid, tsid; u16 tclass; struct av_decision avd; ssize_t length; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__COMPUTE_AV, NULL); if (length) goto out; @@ -780,15 +840,15 @@ static ssize_t sel_write_access(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_str_to_sid(scon, &ssid, GFP_KERNEL); + length = security_context_str_to_sid(state, scon, &ssid, GFP_KERNEL); if (length) goto out; - length = security_context_str_to_sid(tcon, &tsid, GFP_KERNEL); + length = security_context_str_to_sid(state, tcon, &tsid, GFP_KERNEL); if (length) goto out; - security_compute_av_user(ssid, tsid, tclass, &avd); + security_compute_av_user(state, ssid, tsid, tclass, &avd); length = scnprintf(buf, SIMPLE_TRANSACTION_LIMIT, "%x %x %x %x %u %x", @@ -803,6 +863,8 @@ out: static ssize_t sel_write_create(struct file *file, char *buf, size_t size) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *scon = NULL, *tcon = NULL; char *namebuf = NULL, *objname = NULL; u32 ssid, tsid, newsid; @@ -812,7 +874,8 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) u32 len; int nargs; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__COMPUTE_CREATE, NULL); if (length) @@ -868,20 +931,20 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) objname = namebuf; } - length = security_context_str_to_sid(scon, &ssid, GFP_KERNEL); + length = security_context_str_to_sid(state, scon, &ssid, GFP_KERNEL); if (length) goto out; - length = security_context_str_to_sid(tcon, &tsid, GFP_KERNEL); + length = security_context_str_to_sid(state, tcon, &tsid, GFP_KERNEL); if (length) goto out; - length = security_transition_sid_user(ssid, tsid, tclass, + length = security_transition_sid_user(state, ssid, tsid, tclass, objname, &newsid); if (length) goto out; - length = security_sid_to_context(newsid, &newcon, &len); + length = security_sid_to_context(state, newsid, &newcon, &len); if (length) goto out; @@ -904,6 +967,8 @@ out: static ssize_t sel_write_relabel(struct file *file, char *buf, size_t size) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *scon = NULL, *tcon = NULL; u32 ssid, tsid, newsid; u16 tclass; @@ -911,7 +976,8 @@ static ssize_t sel_write_relabel(struct file *file, char *buf, size_t size) char *newcon = NULL; u32 len; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__COMPUTE_RELABEL, NULL); if (length) @@ -931,19 +997,19 @@ static ssize_t sel_write_relabel(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_str_to_sid(scon, &ssid, GFP_KERNEL); + length = security_context_str_to_sid(state, scon, &ssid, GFP_KERNEL); if (length) goto out; - length = security_context_str_to_sid(tcon, &tsid, GFP_KERNEL); + length = security_context_str_to_sid(state, tcon, &tsid, GFP_KERNEL); if (length) goto out; - length = security_change_sid(ssid, tsid, tclass, &newsid); + length = security_change_sid(state, ssid, tsid, tclass, &newsid); if (length) goto out; - length = security_sid_to_context(newsid, &newcon, &len); + length = security_sid_to_context(state, newsid, &newcon, &len); if (length) goto out; @@ -962,6 +1028,8 @@ out: static ssize_t sel_write_user(struct file *file, char *buf, size_t size) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *con = NULL, *user = NULL, *ptr; u32 sid, *sids = NULL; ssize_t length; @@ -969,7 +1037,8 @@ static ssize_t sel_write_user(struct file *file, char *buf, size_t size) int i, rc; u32 len, nsids; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__COMPUTE_USER, NULL); if (length) @@ -989,18 +1058,18 @@ static ssize_t sel_write_user(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s", con, user) != 2) goto out; - length = security_context_str_to_sid(con, &sid, GFP_KERNEL); + length = security_context_str_to_sid(state, con, &sid, GFP_KERNEL); if (length) goto out; - length = security_get_user_sids(sid, user, &sids, &nsids); + length = security_get_user_sids(state, sid, user, &sids, &nsids); if (length) goto out; length = sprintf(buf, "%u", nsids) + 1; ptr = buf + length; for (i = 0; i < nsids; i++) { - rc = security_sid_to_context(sids[i], &newcon, &len); + rc = security_sid_to_context(state, sids[i], &newcon, &len); if (rc) { length = rc; goto out; @@ -1024,6 +1093,8 @@ out: static ssize_t sel_write_member(struct file *file, char *buf, size_t size) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *scon = NULL, *tcon = NULL; u32 ssid, tsid, newsid; u16 tclass; @@ -1031,7 +1102,8 @@ static ssize_t sel_write_member(struct file *file, char *buf, size_t size) char *newcon = NULL; u32 len; - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__COMPUTE_MEMBER, NULL); if (length) @@ -1051,19 +1123,19 @@ static ssize_t sel_write_member(struct file *file, char *buf, size_t size) if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) goto out; - length = security_context_str_to_sid(scon, &ssid, GFP_KERNEL); + length = security_context_str_to_sid(state, scon, &ssid, GFP_KERNEL); if (length) goto out; - length = security_context_str_to_sid(tcon, &tsid, GFP_KERNEL); + length = security_context_str_to_sid(state, tcon, &tsid, GFP_KERNEL); if (length) goto out; - length = security_member_sid(ssid, tsid, tclass, &newsid); + length = security_member_sid(state, ssid, tsid, tclass, &newsid); if (length) goto out; - length = security_sid_to_context(newsid, &newcon, &len); + length = security_sid_to_context(state, newsid, &newcon, &len); if (length) goto out; @@ -1097,6 +1169,7 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode) static ssize_t sel_read_bool(struct file *filep, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filep)->i_sb->s_fs_info; char *page = NULL; ssize_t length; ssize_t ret; @@ -1104,10 +1177,11 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK; const char *name = filep->f_path.dentry->d_name.name; - mutex_lock(&sel_mutex); + mutex_lock(&fsi->mutex); ret = -EINVAL; - if (index >= bool_num || strcmp(name, bool_pending_names[index])) + if (index >= fsi->bool_num || strcmp(name, + fsi->bool_pending_names[index])) goto out; ret = -ENOMEM; @@ -1115,16 +1189,16 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf, if (!page) goto out; - cur_enforcing = security_get_bool_value(index); + cur_enforcing = security_get_bool_value(fsi->state, index); if (cur_enforcing < 0) { ret = cur_enforcing; goto out; } length = scnprintf(page, PAGE_SIZE, "%d %d", cur_enforcing, - bool_pending_values[index]); + fsi->bool_pending_values[index]); ret = simple_read_from_buffer(buf, count, ppos, page, length); out: - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); free_page((unsigned long)page); return ret; } @@ -1132,22 +1206,25 @@ out: static ssize_t sel_write_bool(struct file *filep, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filep)->i_sb->s_fs_info; char *page = NULL; ssize_t length; int new_value; unsigned index = file_inode(filep)->i_ino & SEL_INO_MASK; const char *name = filep->f_path.dentry->d_name.name; - mutex_lock(&sel_mutex); + mutex_lock(&fsi->mutex); - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETBOOL, NULL); if (length) goto out; length = -EINVAL; - if (index >= bool_num || strcmp(name, bool_pending_names[index])) + if (index >= fsi->bool_num || strcmp(name, + fsi->bool_pending_names[index])) goto out; length = -ENOMEM; @@ -1173,11 +1250,11 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf, if (new_value) new_value = 1; - bool_pending_values[index] = new_value; + fsi->bool_pending_values[index] = new_value; length = count; out: - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); kfree(page); return length; } @@ -1192,13 +1269,15 @@ static ssize_t sel_commit_bools_write(struct file *filep, const char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filep)->i_sb->s_fs_info; char *page = NULL; ssize_t length; int new_value; - mutex_lock(&sel_mutex); + mutex_lock(&fsi->mutex); - length = avc_has_perm(current_sid(), SECINITSID_SECURITY, + length = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETBOOL, NULL); if (length) @@ -1225,14 +1304,15 @@ static ssize_t sel_commit_bools_write(struct file *filep, goto out; length = 0; - if (new_value && bool_pending_values) - length = security_set_bools(bool_num, bool_pending_values); + if (new_value && fsi->bool_pending_values) + length = security_set_bools(fsi->state, fsi->bool_num, + fsi->bool_pending_values); if (!length) length = count; out: - mutex_unlock(&sel_mutex); + mutex_unlock(&fsi->mutex); kfree(page); return length; } @@ -1250,12 +1330,12 @@ static void sel_remove_entries(struct dentry *de) #define BOOL_DIR_NAME "booleans" -static int sel_make_bools(void) +static int sel_make_bools(struct selinux_fs_info *fsi) { int i, ret; ssize_t len; struct dentry *dentry = NULL; - struct dentry *dir = bool_dir; + struct dentry *dir = fsi->bool_dir; struct inode *inode = NULL; struct inode_security_struct *isec; char **names = NULL, *page; @@ -1264,13 +1344,13 @@ static int sel_make_bools(void) u32 sid; /* remove any existing files */ - for (i = 0; i < bool_num; i++) - kfree(bool_pending_names[i]); - kfree(bool_pending_names); - kfree(bool_pending_values); - bool_num = 0; - bool_pending_names = NULL; - bool_pending_values = NULL; + for (i = 0; i < fsi->bool_num; i++) + kfree(fsi->bool_pending_names[i]); + kfree(fsi->bool_pending_names); + kfree(fsi->bool_pending_values); + fsi->bool_num = 0; + fsi->bool_pending_names = NULL; + fsi->bool_pending_values = NULL; sel_remove_entries(dir); @@ -1279,7 +1359,7 @@ static int sel_make_bools(void) if (!page) goto out; - ret = security_get_bools(&num, &names, &values); + ret = security_get_bools(fsi->state, &num, &names, &values); if (ret) goto out; @@ -1300,7 +1380,8 @@ static int sel_make_bools(void) goto out; isec = (struct inode_security_struct *)inode->i_security; - ret = security_genfs_sid("selinuxfs", page, SECCLASS_FILE, &sid); + ret = security_genfs_sid(fsi->state, "selinuxfs", page, + SECCLASS_FILE, &sid); if (ret) { pr_warn_ratelimited("SELinux: no sid found, defaulting to security isid for %s\n", page); @@ -1313,9 +1394,9 @@ static int sel_make_bools(void) inode->i_ino = i|SEL_BOOL_INO_OFFSET; d_add(dentry, inode); } - bool_num = num; - bool_pending_names = names; - bool_pending_values = values; + fsi->bool_num = num; + fsi->bool_pending_names = names; + fsi->bool_pending_values = values; free_page((unsigned long)page); return 0; @@ -1333,17 +1414,16 @@ out: return ret; } -#define NULL_FILE_NAME "null" - -struct path selinux_null; - static ssize_t sel_read_avc_cache_threshold(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char tmpbuf[TMPBUFLEN]; ssize_t length; - length = scnprintf(tmpbuf, TMPBUFLEN, "%u", avc_cache_threshold); + length = scnprintf(tmpbuf, TMPBUFLEN, "%u", + avc_get_cache_threshold(state->avc)); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } @@ -1352,11 +1432,14 @@ static ssize_t sel_write_avc_cache_threshold(struct file *file, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *page; ssize_t ret; unsigned int new_value; - ret = avc_has_perm(current_sid(), SECINITSID_SECURITY, + ret = avc_has_perm(&selinux_state, + current_sid(), SECINITSID_SECURITY, SECCLASS_SECURITY, SECURITY__SETSECPARAM, NULL); if (ret) @@ -1377,7 +1460,7 @@ static ssize_t sel_write_avc_cache_threshold(struct file *file, if (sscanf(page, "%u", &new_value) != 1) goto out; - avc_cache_threshold = new_value; + avc_set_cache_threshold(state->avc, new_value); ret = count; out: @@ -1388,6 +1471,8 @@ out: static ssize_t sel_read_avc_hash_stats(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; char *page; ssize_t length; @@ -1395,7 +1480,7 @@ static ssize_t sel_read_avc_hash_stats(struct file *filp, char __user *buf, if (!page) return -ENOMEM; - length = avc_get_hash_stats(page); + length = avc_get_hash_stats(state->avc, page); if (length >= 0) length = simple_read_from_buffer(buf, count, ppos, page, length); free_page((unsigned long)page); @@ -1403,6 +1488,32 @@ static ssize_t sel_read_avc_hash_stats(struct file *filp, char __user *buf, return length; } +static ssize_t sel_read_sidtab_hash_stats(struct file *filp, char __user *buf, + size_t count, loff_t *ppos) +{ + struct selinux_fs_info *fsi = file_inode(filp)->i_sb->s_fs_info; + struct selinux_state *state = fsi->state; + char *page; + ssize_t length; + + page = (char *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + length = security_sidtab_hash_stats(state, page); + if (length >= 0) + length = simple_read_from_buffer(buf, count, ppos, page, + length); + free_page((unsigned long)page); + + return length; +} + +static const struct file_operations sel_sidtab_hash_stats_ops = { + .read = sel_read_sidtab_hash_stats, + .llseek = generic_file_llseek, +}; + static const struct file_operations sel_avc_cache_threshold_ops = { .read = sel_read_avc_cache_threshold, .write = sel_write_avc_cache_threshold, @@ -1486,6 +1597,8 @@ static const struct file_operations sel_avc_cache_stats_ops = { static int sel_make_avc_files(struct dentry *dir) { + struct super_block *sb = dir->d_sb; + struct selinux_fs_info *fsi = sb->s_fs_info; int i; static const struct tree_descr files[] = { { "cache_threshold", @@ -1509,7 +1622,38 @@ static int sel_make_avc_files(struct dentry *dir) return -ENOMEM; inode->i_fop = files[i].ops; - inode->i_ino = ++sel_last_ino; + inode->i_ino = ++fsi->last_ino; + d_add(dentry, inode); + } + + return 0; +} + +static int sel_make_ss_files(struct dentry *dir) +{ + struct super_block *sb = dir->d_sb; + struct selinux_fs_info *fsi = sb->s_fs_info; + int i; + static struct tree_descr files[] = { + { "sidtab_hash_stats", &sel_sidtab_hash_stats_ops, S_IRUGO }, + }; + + for (i = 0; i < ARRAY_SIZE(files); i++) { + struct inode *inode; + struct dentry *dentry; + + dentry = d_alloc_name(dir, files[i].name); + if (!dentry) + return -ENOMEM; + + inode = sel_make_inode(dir->d_sb, S_IFREG|files[i].mode); + if (!inode) { + dput(dentry); + return -ENOMEM; + } + + inode->i_fop = files[i].ops; + inode->i_ino = ++fsi->last_ino; d_add(dentry, inode); } @@ -1519,12 +1663,13 @@ static int sel_make_avc_files(struct dentry *dir) static ssize_t sel_read_initcon(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; char *con; u32 sid, len; ssize_t ret; sid = file_inode(file)->i_ino&SEL_INO_MASK; - ret = security_sid_to_context(sid, &con, &len); + ret = security_sid_to_context(fsi->state, sid, &con, &len); if (ret) return ret; @@ -1612,12 +1757,13 @@ static const struct file_operations sel_perm_ops = { static ssize_t sel_read_policycap(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; int value; char tmpbuf[TMPBUFLEN]; ssize_t length; unsigned long i_ino = file_inode(file)->i_ino; - value = security_policycap_supported(i_ino & SEL_INO_MASK); + value = security_policycap_supported(fsi->state, i_ino & SEL_INO_MASK); length = scnprintf(tmpbuf, TMPBUFLEN, "%d", value); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); @@ -1631,10 +1777,11 @@ static const struct file_operations sel_policycap_ops = { static int sel_make_perm_files(char *objclass, int classvalue, struct dentry *dir) { + struct selinux_fs_info *fsi = dir->d_sb->s_fs_info; int i, rc, nperms; char **perms; - rc = security_get_permissions(objclass, &perms, &nperms); + rc = security_get_permissions(fsi->state, objclass, &perms, &nperms); if (rc) return rc; @@ -1668,6 +1815,8 @@ out: static int sel_make_class_dir_entries(char *classname, int index, struct dentry *dir) { + struct super_block *sb = dir->d_sb; + struct selinux_fs_info *fsi = sb->s_fs_info; struct dentry *dentry = NULL; struct inode *inode = NULL; int rc; @@ -1684,7 +1833,7 @@ static int sel_make_class_dir_entries(char *classname, int index, inode->i_ino = sel_class_to_ino(index); d_add(dentry, inode); - dentry = sel_make_dir(dir, "perms", &last_class_ino); + dentry = sel_make_dir(dir, "perms", &fsi->last_class_ino); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -1693,26 +1842,27 @@ static int sel_make_class_dir_entries(char *classname, int index, return rc; } -static int sel_make_classes(void) +static int sel_make_classes(struct selinux_fs_info *fsi) { + int rc, nclasses, i; char **classes; /* delete any existing entries */ - sel_remove_entries(class_dir); + sel_remove_entries(fsi->class_dir); - rc = security_get_classes(&classes, &nclasses); + rc = security_get_classes(fsi->state, &classes, &nclasses); if (rc) return rc; /* +2 since classes are 1-indexed */ - last_class_ino = sel_class_to_ino(nclasses + 2); + fsi->last_class_ino = sel_class_to_ino(nclasses + 2); for (i = 0; i < nclasses; i++) { struct dentry *class_name_dir; - class_name_dir = sel_make_dir(class_dir, classes[i], - &last_class_ino); + class_name_dir = sel_make_dir(fsi->class_dir, classes[i], + &fsi->last_class_ino); if (IS_ERR(class_name_dir)) { rc = PTR_ERR(class_name_dir); goto out; @@ -1732,25 +1882,25 @@ out: return rc; } -static int sel_make_policycap(void) +static int sel_make_policycap(struct selinux_fs_info *fsi) { unsigned int iter; struct dentry *dentry = NULL; struct inode *inode = NULL; - sel_remove_entries(policycap_dir); + sel_remove_entries(fsi->policycap_dir); for (iter = 0; iter <= POLICYDB_CAPABILITY_MAX; iter++) { if (iter < ARRAY_SIZE(selinux_policycap_names)) - dentry = d_alloc_name(policycap_dir, + dentry = d_alloc_name(fsi->policycap_dir, selinux_policycap_names[iter]); else - dentry = d_alloc_name(policycap_dir, "unknown"); + dentry = d_alloc_name(fsi->policycap_dir, "unknown"); if (dentry == NULL) return -ENOMEM; - inode = sel_make_inode(policycap_dir->d_sb, S_IFREG | S_IRUGO); + inode = sel_make_inode(fsi->sb, S_IFREG | 0444); if (inode == NULL) return -ENOMEM; @@ -1789,8 +1939,11 @@ static struct dentry *sel_make_dir(struct dentry *dir, const char *name, return dentry; } +#define NULL_FILE_NAME "null" + static int sel_fill_super(struct super_block *sb, void *data, int silent) { + struct selinux_fs_info *fsi; int ret; struct dentry *dentry; struct inode *inode; @@ -1818,14 +1971,20 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) S_IWUGO}, /* last one */ {""} }; + + ret = selinux_fs_info_create(sb); + if (ret) + goto err; + ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files); if (ret) goto err; - bool_dir = sel_make_dir(sb->s_root, BOOL_DIR_NAME, &sel_last_ino); - if (IS_ERR(bool_dir)) { - ret = PTR_ERR(bool_dir); - bool_dir = NULL; + fsi = sb->s_fs_info; + fsi->bool_dir = sel_make_dir(sb->s_root, BOOL_DIR_NAME, &fsi->last_ino); + if (IS_ERR(fsi->bool_dir)) { + ret = PTR_ERR(fsi->bool_dir); + fsi->bool_dir = NULL; goto err; } @@ -1839,7 +1998,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) if (!inode) goto err; - inode->i_ino = ++sel_last_ino; + inode->i_ino = ++fsi->last_ino; isec = (struct inode_security_struct *)inode->i_security; isec->sid = SECINITSID_DEVNULL; isec->sclass = SECCLASS_CHR_FILE; @@ -1847,19 +2006,26 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) init_special_inode(inode, S_IFCHR | S_IRUGO | S_IWUGO, MKDEV(MEM_MAJOR, 3)); d_add(dentry, inode); - selinux_null.dentry = dentry; - dentry = sel_make_dir(sb->s_root, "avc", &sel_last_ino); + dentry = sel_make_dir(sb->s_root, "avc", &fsi->last_ino); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto err; } ret = sel_make_avc_files(dentry); + + dentry = sel_make_dir(sb->s_root, "ss", &fsi->last_ino); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto err; + } + + ret = sel_make_ss_files(dentry); if (ret) goto err; - dentry = sel_make_dir(sb->s_root, "initial_contexts", &sel_last_ino); + dentry = sel_make_dir(sb->s_root, "initial_contexts", &fsi->last_ino); if (IS_ERR(dentry)) { ret = PTR_ERR(dentry); goto err; @@ -1869,23 +2035,31 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) if (ret) goto err; - class_dir = sel_make_dir(sb->s_root, "class", &sel_last_ino); - if (IS_ERR(class_dir)) { - ret = PTR_ERR(class_dir); - class_dir = NULL; + fsi->class_dir = sel_make_dir(sb->s_root, "class", &fsi->last_ino); + if (IS_ERR(fsi->class_dir)) { + ret = PTR_ERR(fsi->class_dir); + fsi->class_dir = NULL; goto err; } - policycap_dir = sel_make_dir(sb->s_root, "policy_capabilities", &sel_last_ino); - if (IS_ERR(policycap_dir)) { - ret = PTR_ERR(policycap_dir); - policycap_dir = NULL; + fsi->policycap_dir = sel_make_dir(sb->s_root, "policy_capabilities", + &fsi->last_ino); + if (IS_ERR(fsi->policycap_dir)) { + ret = PTR_ERR(fsi->policycap_dir); + fsi->policycap_dir = NULL; goto err; } + + ret = sel_make_policy_nodes(fsi); + if (ret) + goto err; return 0; err: printk(KERN_ERR "SELinux: %s: failed while creating inodes\n", __func__); + + selinux_fs_info_free(sb); + return ret; } @@ -1895,16 +2069,25 @@ static struct dentry *sel_mount(struct file_system_type *fs_type, return mount_single(fs_type, flags, data, sel_fill_super); } +static void sel_kill_sb(struct super_block *sb) +{ + selinux_fs_info_free(sb); + kill_litter_super(sb); +} + static struct file_system_type sel_fs_type = { .name = "selinuxfs", .mount = sel_mount, - .kill_sb = kill_litter_super, + .kill_sb = sel_kill_sb, }; struct vfsmount *selinuxfs_mount; +struct path selinux_null; static int __init init_sel_fs(void) { + struct qstr null_name = QSTR_INIT(NULL_FILE_NAME, + sizeof(NULL_FILE_NAME)-1); int err; if (!selinux_enabled) @@ -1926,6 +2109,13 @@ static int __init init_sel_fs(void) err = PTR_ERR(selinuxfs_mount); selinuxfs_mount = NULL; } + selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root, + &null_name); + if (IS_ERR(selinux_null.dentry)) { + pr_err("selinuxfs: could not lookup null!\n"); + err = PTR_ERR(selinux_null.dentry); + selinux_null.dentry = NULL; + } return err; } diff --git a/security/selinux/ss/avtab.c b/security/selinux/ss/avtab.c index 2c3c7d010d8a..a2c9148b0662 100644 --- a/security/selinux/ss/avtab.c +++ b/security/selinux/ss/avtab.c @@ -655,7 +655,8 @@ int avtab_write(struct policydb *p, struct avtab *a, void *fp) return rc; } -void avtab_cache_init(void) + +void __init avtab_cache_init(void) { avtab_node_cachep = kmem_cache_create("avtab_node", sizeof(struct avtab_node), @@ -664,9 +665,3 @@ void avtab_cache_init(void) sizeof(struct avtab_extended_perms), 0, SLAB_PANIC, NULL); } - -void avtab_cache_destroy(void) -{ - kmem_cache_destroy(avtab_node_cachep); - kmem_cache_destroy(avtab_xperms_cachep); -} diff --git a/security/selinux/ss/avtab.h b/security/selinux/ss/avtab.h index 725853cadc42..0d652fad5319 100644 --- a/security/selinux/ss/avtab.h +++ b/security/selinux/ss/avtab.h @@ -114,9 +114,6 @@ struct avtab_node *avtab_search_node(struct avtab *h, struct avtab_key *key); struct avtab_node *avtab_search_node_next(struct avtab_node *node, int specified); -void avtab_cache_init(void); -void avtab_cache_destroy(void); - #define MAX_AVTAB_HASH_BITS 16 #define MAX_AVTAB_HASH_BUCKETS (1 << MAX_AVTAB_HASH_BITS) diff --git a/security/selinux/ss/context.h b/security/selinux/ss/context.h index 2260c44a568c..69fd1943683c 100644 --- a/security/selinux/ss/context.h +++ b/security/selinux/ss/context.h @@ -31,6 +31,7 @@ struct context { u32 len; /* length of string in bytes */ struct mls_range range; char *str; /* string representation if context cannot be mapped. */ + u32 hash; /* a hash of the string representation */ }; static inline void mls_context_init(struct context *c) @@ -136,12 +137,13 @@ static inline int context_cpy(struct context *dst, struct context *src) kfree(dst->str); return rc; } + dst->hash = src->hash; return 0; } static inline void context_destroy(struct context *c) { - c->user = c->role = c->type = 0; + c->user = c->role = c->type = c->hash = 0; kfree(c->str); c->str = NULL; c->len = 0; @@ -150,6 +152,8 @@ static inline void context_destroy(struct context *c) static inline int context_cmp(struct context *c1, struct context *c2) { + if (c1->hash && c2->hash && (c1->hash != c2->hash)) + return 0; if (c1->len && c2->len) return (c1->len == c2->len && !strcmp(c1->str, c2->str)); if (c1->len || c2->len) @@ -160,5 +164,10 @@ static inline int context_cmp(struct context *c1, struct context *c2) mls_context_cmp(c1, c2)); } +static inline unsigned int context_compute_hash(const char *s) +{ + return full_name_hash(NULL, s, strlen(s)); +} + #endif /* _SS_CONTEXT_H_ */ diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index b6a78b09235c..5ae8c61b75bf 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -523,14 +523,9 @@ int ebitmap_write(struct ebitmap *e, void *fp) return 0; } -void ebitmap_cache_init(void) +void __init ebitmap_cache_init(void) { ebitmap_node_cachep = kmem_cache_create("ebitmap_node", sizeof(struct ebitmap_node), 0, SLAB_PANIC, NULL); } - -void ebitmap_cache_destroy(void) -{ - kmem_cache_destroy(ebitmap_node_cachep); -} diff --git a/security/selinux/ss/ebitmap.h b/security/selinux/ss/ebitmap.h index edf4fa39c60a..6aa7cf6a2197 100644 --- a/security/selinux/ss/ebitmap.h +++ b/security/selinux/ss/ebitmap.h @@ -131,9 +131,6 @@ void ebitmap_destroy(struct ebitmap *e); int ebitmap_read(struct ebitmap *e, void *fp); int ebitmap_write(struct ebitmap *e, void *fp); -void ebitmap_cache_init(void); -void ebitmap_cache_destroy(void); - #ifdef CONFIG_NETLABEL int ebitmap_netlbl_export(struct ebitmap *ebmap, struct netlbl_lsm_catmap **catmap); diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c index 6bd6dcd954fa..836aa5b7d29b 100644 --- a/security/selinux/ss/hashtab.c +++ b/security/selinux/ss/hashtab.c @@ -10,6 +10,8 @@ #include #include "hashtab.h" +static struct kmem_cache *hashtab_node_cachep; + struct hashtab *hashtab_create(u32 (*hash_value)(struct hashtab *h, const void *key), int (*keycmp)(struct hashtab *h, const void *key1, const void *key2), u32 size) @@ -58,7 +60,7 @@ int hashtab_insert(struct hashtab *h, void *key, void *datum) if (cur && (h->keycmp(h, key, cur->key) == 0)) return -EEXIST; - newnode = kzalloc(sizeof(*newnode), GFP_KERNEL); + newnode = kmem_cache_zalloc(hashtab_node_cachep, GFP_KERNEL); if (!newnode) return -ENOMEM; newnode->key = key; @@ -107,7 +109,7 @@ void hashtab_destroy(struct hashtab *h) while (cur) { temp = cur; cur = cur->next; - kfree(temp); + kmem_cache_free(hashtab_node_cachep, temp); } h->htable[i] = NULL; } @@ -167,3 +169,10 @@ void hashtab_stat(struct hashtab *h, struct hashtab_info *info) info->slots_used = slots_used; info->max_chain_len = max_chain_len; } + +void __init hashtab_cache_init(void) +{ + hashtab_node_cachep = kmem_cache_create("hashtab_node", + sizeof(struct hashtab_node), + 0, SLAB_PANIC, NULL); +} diff --git a/security/selinux/ss/mls.c b/security/selinux/ss/mls.c index ad982ce8bfa4..18ba0c2328fb 100644 --- a/security/selinux/ss/mls.c +++ b/security/selinux/ss/mls.c @@ -33,20 +33,20 @@ * Return the length in bytes for the MLS fields of the * security context string representation of `context'. */ -int mls_compute_context_len(struct context *context) +int mls_compute_context_len(struct policydb *p, struct context *context) { int i, l, len, head, prev; char *nm; struct ebitmap *e; struct ebitmap_node *node; - if (!policydb.mls_enabled) + if (!p->mls_enabled) return 0; len = 1; /* for the beginning ":" */ for (l = 0; l < 2; l++) { int index_sens = context->range.level[l].sens; - len += strlen(sym_name(&policydb, SYM_LEVELS, index_sens - 1)); + len += strlen(sym_name(p, SYM_LEVELS, index_sens - 1)); /* categories */ head = -2; @@ -56,17 +56,17 @@ int mls_compute_context_len(struct context *context) if (i - prev > 1) { /* one or more negative bits are skipped */ if (head != prev) { - nm = sym_name(&policydb, SYM_CATS, prev); + nm = sym_name(p, SYM_CATS, prev); len += strlen(nm) + 1; } - nm = sym_name(&policydb, SYM_CATS, i); + nm = sym_name(p, SYM_CATS, i); len += strlen(nm) + 1; head = i; } prev = i; } if (prev != head) { - nm = sym_name(&policydb, SYM_CATS, prev); + nm = sym_name(p, SYM_CATS, prev); len += strlen(nm) + 1; } if (l == 0) { @@ -86,7 +86,8 @@ int mls_compute_context_len(struct context *context) * the MLS fields of `context' into the string `*scontext'. * Update `*scontext' to point to the end of the MLS fields. */ -void mls_sid_to_context(struct context *context, +void mls_sid_to_context(struct policydb *p, + struct context *context, char **scontext) { char *scontextp, *nm; @@ -94,7 +95,7 @@ void mls_sid_to_context(struct context *context, struct ebitmap *e; struct ebitmap_node *node; - if (!policydb.mls_enabled) + if (!p->mls_enabled) return; scontextp = *scontext; @@ -103,7 +104,7 @@ void mls_sid_to_context(struct context *context, scontextp++; for (l = 0; l < 2; l++) { - strcpy(scontextp, sym_name(&policydb, SYM_LEVELS, + strcpy(scontextp, sym_name(p, SYM_LEVELS, context->range.level[l].sens - 1)); scontextp += strlen(scontextp); @@ -119,7 +120,7 @@ void mls_sid_to_context(struct context *context, *scontextp++ = '.'; else *scontextp++ = ','; - nm = sym_name(&policydb, SYM_CATS, prev); + nm = sym_name(p, SYM_CATS, prev); strcpy(scontextp, nm); scontextp += strlen(nm); } @@ -127,7 +128,7 @@ void mls_sid_to_context(struct context *context, *scontextp++ = ':'; else *scontextp++ = ','; - nm = sym_name(&policydb, SYM_CATS, i); + nm = sym_name(p, SYM_CATS, i); strcpy(scontextp, nm); scontextp += strlen(nm); head = i; @@ -140,7 +141,7 @@ void mls_sid_to_context(struct context *context, *scontextp++ = '.'; else *scontextp++ = ','; - nm = sym_name(&policydb, SYM_CATS, prev); + nm = sym_name(p, SYM_CATS, prev); strcpy(scontextp, nm); scontextp += strlen(nm); } @@ -217,9 +218,7 @@ int mls_context_isvalid(struct policydb *p, struct context *c) /* * Set the MLS fields in the security context structure * `context' based on the string representation in - * the string `*scontext'. Update `*scontext' to - * point to the end of the string representation of - * the MLS fields. + * the string `scontext'. * * This function modifies the string in place, inserting * NULL characters to terminate the MLS fields. @@ -234,22 +233,21 @@ int mls_context_isvalid(struct policydb *p, struct context *c) */ int mls_context_to_sid(struct policydb *pol, char oldc, - char **scontext, + char *scontext, struct context *context, struct sidtab *s, u32 def_sid) { - - char delim; - char *scontextp, *p, *rngptr; + char *sensitivity, *cur_cat, *next_cat, *rngptr; struct level_datum *levdatum; struct cat_datum *catdatum, *rngdatum; - int l, rc = -EINVAL; + int l, rc, i; + char *rangep[2]; if (!pol->mls_enabled) { - if (def_sid != SECSID_NULL && oldc) - *scontext += strlen(*scontext) + 1; - return 0; + if ((def_sid != SECSID_NULL && oldc) || (*scontext) == '\0') + return 0; + return -EINVAL; } /* @@ -260,113 +258,94 @@ int mls_context_to_sid(struct policydb *pol, struct context *defcon; if (def_sid == SECSID_NULL) - goto out; + return -EINVAL; defcon = sidtab_search(s, def_sid); if (!defcon) - goto out; + return -EINVAL; - rc = mls_context_cpy(context, defcon); - goto out; + return mls_context_cpy(context, defcon); } - /* Extract low sensitivity. */ - scontextp = p = *scontext; - while (*p && *p != ':' && *p != '-') - p++; - - delim = *p; - if (delim != '\0') - *p++ = '\0'; + /* + * If we're dealing with a range, figure out where the two parts + * of the range begin. + */ + rangep[0] = scontext; + rangep[1] = strchr(scontext, '-'); + if (rangep[1]) { + rangep[1][0] = '\0'; + rangep[1]++; + } + /* For each part of the range: */ for (l = 0; l < 2; l++) { - levdatum = hashtab_search(pol->p_levels.table, scontextp); - if (!levdatum) { - rc = -EINVAL; - goto out; - } + /* Split sensitivity and category set. */ + sensitivity = rangep[l]; + if (sensitivity == NULL) + break; + next_cat = strchr(sensitivity, ':'); + if (next_cat) + *(next_cat++) = '\0'; + /* Parse sensitivity. */ + levdatum = hashtab_search(pol->p_levels.table, sensitivity); + if (!levdatum) + return -EINVAL; context->range.level[l].sens = levdatum->level->sens; - if (delim == ':') { - /* Extract category set. */ - while (1) { - scontextp = p; - while (*p && *p != ',' && *p != '-') - p++; - delim = *p; - if (delim != '\0') - *p++ = '\0'; + /* Extract category set. */ + while (next_cat != NULL) { + cur_cat = next_cat; + next_cat = strchr(next_cat, ','); + if (next_cat != NULL) + *(next_cat++) = '\0'; - /* Separate into range if exists */ - rngptr = strchr(scontextp, '.'); - if (rngptr != NULL) { - /* Remove '.' */ - *rngptr++ = '\0'; - } + /* Separate into range if exists */ + rngptr = strchr(cur_cat, '.'); + if (rngptr != NULL) { + /* Remove '.' */ + *rngptr++ = '\0'; + } - catdatum = hashtab_search(pol->p_cats.table, - scontextp); - if (!catdatum) { - rc = -EINVAL; - goto out; - } + catdatum = hashtab_search(pol->p_cats.table, cur_cat); + if (!catdatum) + return -EINVAL; - rc = ebitmap_set_bit(&context->range.level[l].cat, - catdatum->value - 1, 1); + rc = ebitmap_set_bit(&context->range.level[l].cat, + catdatum->value - 1, 1); + if (rc) + return rc; + + /* If range, set all categories in range */ + if (rngptr == NULL) + continue; + + rngdatum = hashtab_search(pol->p_cats.table, rngptr); + if (!rngdatum) + return -EINVAL; + + if (catdatum->value >= rngdatum->value) + return -EINVAL; + + for (i = catdatum->value; i < rngdatum->value; i++) { + rc = ebitmap_set_bit(&context->range.level[l].cat, i, 1); if (rc) - goto out; - - /* If range, set all categories in range */ - if (rngptr) { - int i; - - rngdatum = hashtab_search(pol->p_cats.table, rngptr); - if (!rngdatum) { - rc = -EINVAL; - goto out; - } - - if (catdatum->value >= rngdatum->value) { - rc = -EINVAL; - goto out; - } - - for (i = catdatum->value; i < rngdatum->value; i++) { - rc = ebitmap_set_bit(&context->range.level[l].cat, i, 1); - if (rc) - goto out; - } - } - - if (delim != ',') - break; + return rc; } } - if (delim == '-') { - /* Extract high sensitivity. */ - scontextp = p; - while (*p && *p != ':') - p++; - - delim = *p; - if (delim != '\0') - *p++ = '\0'; - } else - break; } - if (l == 0) { + /* If we didn't see a '-', the range start is also the range end. */ + if (rangep[1] == NULL) { context->range.level[1].sens = context->range.level[0].sens; rc = ebitmap_cpy(&context->range.level[1].cat, &context->range.level[0].cat); if (rc) - goto out; + return rc; } - *scontext = ++p; - rc = 0; -out: - return rc; + + return 0; } /* @@ -375,23 +354,22 @@ out: * the string `str'. This function will allocate temporary memory with the * given constraints of gfp_mask. */ -int mls_from_string(char *str, struct context *context, gfp_t gfp_mask) +int mls_from_string(struct policydb *p, char *str, struct context *context, + gfp_t gfp_mask) { - char *tmpstr, *freestr; + char *tmpstr; int rc; - if (!policydb.mls_enabled) + if (!p->mls_enabled) return -EINVAL; - /* we need freestr because mls_context_to_sid will change - the value of tmpstr */ - tmpstr = freestr = kstrdup(str, gfp_mask); + tmpstr = kstrdup(str, gfp_mask); if (!tmpstr) { rc = -ENOMEM; } else { - rc = mls_context_to_sid(&policydb, ':', &tmpstr, context, + rc = mls_context_to_sid(p, ':', tmpstr, context, NULL, SECSID_NULL); - kfree(freestr); + kfree(tmpstr); } return rc; @@ -417,10 +395,11 @@ int mls_range_set(struct context *context, return rc; } -int mls_setup_user_range(struct context *fromcon, struct user_datum *user, +int mls_setup_user_range(struct policydb *p, + struct context *fromcon, struct user_datum *user, struct context *usercon) { - if (policydb.mls_enabled) { + if (p->mls_enabled) { struct mls_level *fromcon_sen = &(fromcon->range.level[0]); struct mls_level *fromcon_clr = &(fromcon->range.level[1]); struct mls_level *user_low = &(user->range.level[0]); @@ -457,53 +436,52 @@ int mls_setup_user_range(struct context *fromcon, struct user_datum *user, /* * Convert the MLS fields in the security context - * structure `c' from the values specified in the - * policy `oldp' to the values specified in the policy `newp'. + * structure `oldc' from the values specified in the + * policy `oldp' to the values specified in the policy `newp', + * storing the resulting context in `newc'. */ int mls_convert_context(struct policydb *oldp, struct policydb *newp, - struct context *c) + struct context *oldc, + struct context *newc) { struct level_datum *levdatum; struct cat_datum *catdatum; - struct ebitmap bitmap; struct ebitmap_node *node; int l, i; - if (!policydb.mls_enabled) + if (!oldp->mls_enabled || !newp->mls_enabled) return 0; for (l = 0; l < 2; l++) { levdatum = hashtab_search(newp->p_levels.table, sym_name(oldp, SYM_LEVELS, - c->range.level[l].sens - 1)); + oldc->range.level[l].sens - 1)); if (!levdatum) return -EINVAL; - c->range.level[l].sens = levdatum->level->sens; + newc->range.level[l].sens = levdatum->level->sens; - ebitmap_init(&bitmap); - ebitmap_for_each_positive_bit(&c->range.level[l].cat, node, i) { + ebitmap_for_each_positive_bit(&oldc->range.level[l].cat, + node, i) { int rc; catdatum = hashtab_search(newp->p_cats.table, sym_name(oldp, SYM_CATS, i)); if (!catdatum) return -EINVAL; - rc = ebitmap_set_bit(&bitmap, catdatum->value - 1, 1); + rc = ebitmap_set_bit(&newc->range.level[l].cat, + catdatum->value - 1, 1); if (rc) return rc; - - cond_resched(); } - ebitmap_destroy(&c->range.level[l].cat); - c->range.level[l].cat = bitmap; } return 0; } -int mls_compute_sid(struct context *scontext, +int mls_compute_sid(struct policydb *p, + struct context *scontext, struct context *tcontext, u16 tclass, u32 specified, @@ -515,7 +493,7 @@ int mls_compute_sid(struct context *scontext, struct class_datum *cladatum; int default_range = 0; - if (!policydb.mls_enabled) + if (!p->mls_enabled) return 0; switch (specified) { @@ -524,12 +502,12 @@ int mls_compute_sid(struct context *scontext, rtr.source_type = scontext->type; rtr.target_type = tcontext->type; rtr.target_class = tclass; - r = hashtab_search(policydb.range_tr, &rtr); + r = hashtab_search(p->range_tr, &rtr); if (r) return mls_range_set(newcontext, r); - if (tclass && tclass <= policydb.p_classes.nprim) { - cladatum = policydb.class_val_to_struct[tclass - 1]; + if (tclass && tclass <= p->p_classes.nprim) { + cladatum = p->class_val_to_struct[tclass - 1]; if (cladatum) default_range = cladatum->default_range; } @@ -551,7 +529,7 @@ int mls_compute_sid(struct context *scontext, /* Fallthrough */ case AVTAB_CHANGE: - if ((tclass == policydb.process_class) || (sock == true)) + if ((tclass == p->process_class) || (sock == true)) /* Use the process MLS attributes. */ return mls_context_cpy(newcontext, scontext); else @@ -577,10 +555,11 @@ int mls_compute_sid(struct context *scontext, * NetLabel MLS sensitivity level field. * */ -void mls_export_netlbl_lvl(struct context *context, +void mls_export_netlbl_lvl(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { - if (!policydb.mls_enabled) + if (!p->mls_enabled) return; secattr->attr.mls.lvl = context->range.level[0].sens - 1; @@ -597,10 +576,11 @@ void mls_export_netlbl_lvl(struct context *context, * NetLabel MLS sensitivity level into the context. * */ -void mls_import_netlbl_lvl(struct context *context, +void mls_import_netlbl_lvl(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { - if (!policydb.mls_enabled) + if (!p->mls_enabled) return; context->range.level[0].sens = secattr->attr.mls.lvl + 1; @@ -617,12 +597,13 @@ void mls_import_netlbl_lvl(struct context *context, * MLS category field. Returns zero on success, negative values on failure. * */ -int mls_export_netlbl_cat(struct context *context, +int mls_export_netlbl_cat(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { int rc; - if (!policydb.mls_enabled) + if (!p->mls_enabled) return 0; rc = ebitmap_netlbl_export(&context->range.level[0].cat, @@ -645,12 +626,13 @@ int mls_export_netlbl_cat(struct context *context, * negative values on failure. * */ -int mls_import_netlbl_cat(struct context *context, +int mls_import_netlbl_cat(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { int rc; - if (!policydb.mls_enabled) + if (!p->mls_enabled) return 0; rc = ebitmap_netlbl_import(&context->range.level[0].cat, diff --git a/security/selinux/ss/mls.h b/security/selinux/ss/mls.h index 131d76266ea5..7954b1e60b64 100644 --- a/security/selinux/ss/mls.h +++ b/security/selinux/ss/mls.h @@ -25,63 +25,76 @@ #include "context.h" #include "policydb.h" -int mls_compute_context_len(struct context *context); -void mls_sid_to_context(struct context *context, char **scontext); +int mls_compute_context_len(struct policydb *p, struct context *context); +void mls_sid_to_context(struct policydb *p, struct context *context, + char **scontext); int mls_context_isvalid(struct policydb *p, struct context *c); int mls_range_isvalid(struct policydb *p, struct mls_range *r); int mls_level_isvalid(struct policydb *p, struct mls_level *l); int mls_context_to_sid(struct policydb *p, char oldc, - char **scontext, + char *scontext, struct context *context, struct sidtab *s, u32 def_sid); -int mls_from_string(char *str, struct context *context, gfp_t gfp_mask); +int mls_from_string(struct policydb *p, char *str, struct context *context, + gfp_t gfp_mask); int mls_range_set(struct context *context, struct mls_range *range); int mls_convert_context(struct policydb *oldp, struct policydb *newp, - struct context *context); + struct context *oldc, + struct context *newc); -int mls_compute_sid(struct context *scontext, +int mls_compute_sid(struct policydb *p, + struct context *scontext, struct context *tcontext, u16 tclass, u32 specified, struct context *newcontext, bool sock); -int mls_setup_user_range(struct context *fromcon, struct user_datum *user, +int mls_setup_user_range(struct policydb *p, + struct context *fromcon, struct user_datum *user, struct context *usercon); #ifdef CONFIG_NETLABEL -void mls_export_netlbl_lvl(struct context *context, +void mls_export_netlbl_lvl(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr); -void mls_import_netlbl_lvl(struct context *context, +void mls_import_netlbl_lvl(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr); -int mls_export_netlbl_cat(struct context *context, +int mls_export_netlbl_cat(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr); -int mls_import_netlbl_cat(struct context *context, +int mls_import_netlbl_cat(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr); #else -static inline void mls_export_netlbl_lvl(struct context *context, +static inline void mls_export_netlbl_lvl(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { return; } -static inline void mls_import_netlbl_lvl(struct context *context, +static inline void mls_import_netlbl_lvl(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { return; } -static inline int mls_export_netlbl_cat(struct context *context, +static inline int mls_export_netlbl_cat(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { return -ENOMEM; } -static inline int mls_import_netlbl_cat(struct context *context, +static inline int mls_import_netlbl_cat(struct policydb *p, + struct context *context, struct netlbl_lsm_secattr *secattr) { return -ENOMEM; diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 9d9f6bb1e56e..2472b2a66f70 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -43,6 +43,7 @@ #include "conditional.h" #include "mls.h" #include "services.h" +#include "flask.h" #define _DEBUG_HASHES @@ -912,13 +913,26 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) if (!c->context[0].user) { printk(KERN_ERR "SELinux: SID %s was never defined.\n", c->u.name); + sidtab_destroy(s); + goto out; + } + if (c->sid[0] == SECSID_NULL || c->sid[0] > SECINITSID_NUM) { + pr_err("SELinux: Initial SID %s out of range.\n", + c->u.name); + sidtab_destroy(s); + goto out; + } + rc = context_add_hash(p, &c->context[0]); + if (rc) { + sidtab_destroy(s); goto out; } - rc = sidtab_insert(s, c->sid[0], &c->context[0]); + rc = sidtab_set_initial(s, c->sid[0], &c->context[0]); if (rc) { printk(KERN_ERR "SELinux: unable to load initial SID %s.\n", c->u.name); + sidtab_destroy(s); goto out; } } @@ -2386,6 +2400,10 @@ int policydb_read(struct policydb *p, void *fp) p->reject_unknown = !!(le32_to_cpu(buf[1]) & REJECT_UNKNOWN); p->allow_unknown = !!(le32_to_cpu(buf[1]) & ALLOW_UNKNOWN); + if ((le32_to_cpu(buf[1]) & POLICYDB_CONFIG_ANDROID_NETLINK_ROUTE)) { + p->android_netlink_route = 1; + } + if (p->policyvers >= POLICYDB_VERSION_POLCAP) { rc = ebitmap_read(&p->policycaps, fp); if (rc) diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h index 215f8f30ac5a..dbb0ed57ed8b 100644 --- a/security/selinux/ss/policydb.h +++ b/security/selinux/ss/policydb.h @@ -238,6 +238,7 @@ struct genfs { /* The policy database */ struct policydb { int mls_enabled; + int android_netlink_route; /* symbol tables */ struct symtab symtab[SYM_NUM]; @@ -324,6 +325,7 @@ extern int policydb_write(struct policydb *p, void *fp); #define PERM_SYMTAB_SIZE 32 #define POLICYDB_CONFIG_MLS 1 +#define POLICYDB_CONFIG_ANDROID_NETLINK_ROUTE (1 << 31) /* the config flags related to unknown classes/perms are bits 2 and 3 */ #define REJECT_UNKNOWN 0x00000002 diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index b275743e23cc..98c418060032 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -71,7 +71,7 @@ #include "audit.h" /* Policy capability names */ -char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = { +const char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = { "network_peer_controls", "open_perms", "extended_socket_class", @@ -80,53 +80,32 @@ char *selinux_policycap_names[__POLICYDB_CAPABILITY_MAX] = { "nnp_nosuid_transition" }; -int selinux_policycap_netpeer; -int selinux_policycap_openperm; -int selinux_policycap_extsockclass; -int selinux_policycap_alwaysnetwork; -int selinux_policycap_cgroupseclabel; -int selinux_policycap_nnp_nosuid_transition; +static struct selinux_ss selinux_ss; -static DEFINE_RWLOCK(policy_rwlock); - -static struct sidtab sidtab; -struct policydb policydb; -int ss_initialized; - -/* - * The largest sequence number that has been used when - * providing an access decision to the access vector cache. - * The sequence number only changes when a policy change - * occurs. - */ -static u32 latest_granting; +void selinux_ss_init(struct selinux_ss **ss) +{ + rwlock_init(&selinux_ss.policy_rwlock); + mutex_init(&selinux_ss.status_lock); + *ss = &selinux_ss; +} /* Forward declaration. */ -static int context_struct_to_string(struct context *context, char **scontext, +static int context_struct_to_string(struct policydb *policydb, + struct context *context, + char **scontext, u32 *scontext_len); -static void context_struct_compute_av(struct context *scontext, - struct context *tcontext, - u16 tclass, - struct av_decision *avd, - struct extended_perms *xperms); - -struct selinux_mapping { - u16 value; /* policy value */ - unsigned num_perms; - u32 perms[sizeof(u32) * 8]; -}; - -static struct selinux_mapping *current_mapping; -static u16 current_mapping_size; +static void context_struct_compute_av(struct policydb *policydb, + struct context *scontext, + struct context *tcontext, + u16 tclass, + struct av_decision *avd, + struct extended_perms *xperms); static int selinux_set_mapping(struct policydb *pol, struct security_class_mapping *map, - struct selinux_mapping **out_map_p, - u16 *out_map_size) + struct selinux_map *out_map) { - struct selinux_mapping *out_map = NULL; - size_t size = sizeof(struct selinux_mapping); u16 i, j; unsigned k; bool print_unknown_handle = false; @@ -139,15 +118,15 @@ static int selinux_set_mapping(struct policydb *pol, i++; /* Allocate space for the class records, plus one for class zero */ - out_map = kcalloc(++i, size, GFP_ATOMIC); - if (!out_map) + out_map->mapping = kcalloc(++i, sizeof(*out_map->mapping), GFP_ATOMIC); + if (!out_map->mapping) return -ENOMEM; /* Store the raw class and permission values */ j = 0; while (map[j].name) { struct security_class_mapping *p_in = map + (j++); - struct selinux_mapping *p_out = out_map + j; + struct selinux_mapping *p_out = out_map->mapping + j; /* An empty class string skips ahead */ if (!strcmp(p_in->name, "")) { @@ -157,8 +136,7 @@ static int selinux_set_mapping(struct policydb *pol, p_out->value = string_to_security_class(pol, p_in->name); if (!p_out->value) { - printk(KERN_INFO - "SELinux: Class %s not defined in policy.\n", + pr_info("SELinux: Class %s not defined in policy.\n", p_in->name); if (pol->reject_unknown) goto err; @@ -177,8 +155,7 @@ static int selinux_set_mapping(struct policydb *pol, p_out->perms[k] = string_to_av_perm(pol, p_out->value, p_in->perms[k]); if (!p_out->perms[k]) { - printk(KERN_INFO - "SELinux: Permission %s in class %s not defined in policy.\n", + pr_info("SELinux: Permission %s in class %s not defined in policy.\n", p_in->perms[k], p_in->name); if (pol->reject_unknown) goto err; @@ -191,14 +168,14 @@ static int selinux_set_mapping(struct policydb *pol, } if (print_unknown_handle) - printk(KERN_INFO "SELinux: the above unknown classes and permissions will be %s\n", + pr_info("SELinux: the above unknown classes and permissions will be %s\n", pol->allow_unknown ? "allowed" : "denied"); - *out_map_p = out_map; - *out_map_size = i; + out_map->size = i; return 0; err: - kfree(out_map); + kfree(out_map->mapping); + out_map->mapping = NULL; return -EINVAL; } @@ -206,10 +183,10 @@ err: * Get real, policy values from mapped values */ -static u16 unmap_class(u16 tclass) +static u16 unmap_class(struct selinux_map *map, u16 tclass) { - if (tclass < current_mapping_size) - return current_mapping[tclass].value; + if (tclass < map->size) + return map->mapping[tclass].value; return tclass; } @@ -217,42 +194,44 @@ static u16 unmap_class(u16 tclass) /* * Get kernel value for class from its policy value */ -static u16 map_class(u16 pol_value) +static u16 map_class(struct selinux_map *map, u16 pol_value) { u16 i; - for (i = 1; i < current_mapping_size; i++) { - if (current_mapping[i].value == pol_value) + for (i = 1; i < map->size; i++) { + if (map->mapping[i].value == pol_value) return i; } return SECCLASS_NULL; } -static void map_decision(u16 tclass, struct av_decision *avd, +static void map_decision(struct selinux_map *map, + u16 tclass, struct av_decision *avd, int allow_unknown) { - if (tclass < current_mapping_size) { - unsigned i, n = current_mapping[tclass].num_perms; + if (tclass < map->size) { + struct selinux_mapping *mapping = &map->mapping[tclass]; + unsigned int i, n = mapping->num_perms; u32 result; for (i = 0, result = 0; i < n; i++) { - if (avd->allowed & current_mapping[tclass].perms[i]) + if (avd->allowed & mapping->perms[i]) result |= 1<perms[i]) result |= 1<allowed = result; for (i = 0, result = 0; i < n; i++) - if (avd->auditallow & current_mapping[tclass].perms[i]) + if (avd->auditallow & mapping->perms[i]) result |= 1<auditallow = result; for (i = 0, result = 0; i < n; i++) { - if (avd->auditdeny & current_mapping[tclass].perms[i]) + if (avd->auditdeny & mapping->perms[i]) result |= 1<perms[i]) result |= 1<ss->policydb; + + return p->mls_enabled; } /* @@ -282,7 +263,8 @@ int security_mls_enabled(void) * of the process performing the transition. All other callers of * constraint_expr_eval should pass in NULL for xcontext. */ -static int constraint_expr_eval(struct context *scontext, +static int constraint_expr_eval(struct policydb *policydb, + struct context *scontext, struct context *tcontext, struct context *xcontext, struct constraint_expr *cexpr) @@ -326,8 +308,8 @@ static int constraint_expr_eval(struct context *scontext, case CEXPR_ROLE: val1 = scontext->role; val2 = tcontext->role; - r1 = policydb.role_val_to_struct[val1 - 1]; - r2 = policydb.role_val_to_struct[val2 - 1]; + r1 = policydb->role_val_to_struct[val1 - 1]; + r2 = policydb->role_val_to_struct[val2 - 1]; switch (e->op) { case CEXPR_DOM: s[++sp] = ebitmap_get_bit(&r1->dominates, @@ -472,7 +454,8 @@ static int dump_masked_av_helper(void *k, void *d, void *args) return 0; } -static void security_dump_masked_av(struct context *scontext, +static void security_dump_masked_av(struct policydb *policydb, + struct context *scontext, struct context *tcontext, u16 tclass, u32 permissions, @@ -492,8 +475,8 @@ static void security_dump_masked_av(struct context *scontext, if (!permissions) return; - tclass_name = sym_name(&policydb, SYM_CLASSES, tclass - 1); - tclass_dat = policydb.class_val_to_struct[tclass - 1]; + tclass_name = sym_name(policydb, SYM_CLASSES, tclass - 1); + tclass_dat = policydb->class_val_to_struct[tclass - 1]; common_dat = tclass_dat->comdatum; /* init permission_names */ @@ -507,11 +490,11 @@ static void security_dump_masked_av(struct context *scontext, goto out; /* get scontext/tcontext in text form */ - if (context_struct_to_string(scontext, + if (context_struct_to_string(policydb, scontext, &scontext_name, &length) < 0) goto out; - if (context_struct_to_string(tcontext, + if (context_struct_to_string(policydb, tcontext, &tcontext_name, &length) < 0) goto out; @@ -550,7 +533,8 @@ out: * security_boundary_permission - drops violated permissions * on boundary constraint. */ -static void type_attribute_bounds_av(struct context *scontext, +static void type_attribute_bounds_av(struct policydb *policydb, + struct context *scontext, struct context *tcontext, u16 tclass, struct av_decision *avd) @@ -562,14 +546,14 @@ static void type_attribute_bounds_av(struct context *scontext, struct type_datum *target; u32 masked = 0; - source = flex_array_get_ptr(policydb.type_val_to_struct_array, + source = flex_array_get_ptr(policydb->type_val_to_struct_array, scontext->type - 1); BUG_ON(!source); if (!source->bounds) return; - target = flex_array_get_ptr(policydb.type_val_to_struct_array, + target = flex_array_get_ptr(policydb->type_val_to_struct_array, tcontext->type - 1); BUG_ON(!target); @@ -584,7 +568,7 @@ static void type_attribute_bounds_av(struct context *scontext, tcontextp = &lo_tcontext; } - context_struct_compute_av(&lo_scontext, + context_struct_compute_av(policydb, &lo_scontext, tcontextp, tclass, &lo_avd, @@ -599,7 +583,7 @@ static void type_attribute_bounds_av(struct context *scontext, avd->allowed &= ~masked; /* audit masked permissions */ - security_dump_masked_av(scontext, tcontext, + security_dump_masked_av(policydb, scontext, tcontext, tclass, masked, "bounds"); } @@ -632,11 +616,12 @@ void services_compute_xperms_drivers( * Compute access vectors and extended permissions based on a context * structure pair for the permissions in a particular class. */ -static void context_struct_compute_av(struct context *scontext, - struct context *tcontext, - u16 tclass, - struct av_decision *avd, - struct extended_perms *xperms) +static void context_struct_compute_av(struct policydb *policydb, + struct context *scontext, + struct context *tcontext, + u16 tclass, + struct av_decision *avd, + struct extended_perms *xperms) { struct constraint_node *constraint; struct role_allow *ra; @@ -655,13 +640,13 @@ static void context_struct_compute_av(struct context *scontext, xperms->len = 0; } - if (unlikely(!tclass || tclass > policydb.p_classes.nprim)) { + if (unlikely(!tclass || tclass > policydb->p_classes.nprim)) { if (printk_ratelimit()) - printk(KERN_WARNING "SELinux: Invalid class %hu\n", tclass); + pr_warn("SELinux: Invalid class %hu\n", tclass); return; } - tclass_datum = policydb.class_val_to_struct[tclass - 1]; + tclass_datum = policydb->class_val_to_struct[tclass - 1]; /* * If a specific type enforcement rule was defined for @@ -669,15 +654,18 @@ static void context_struct_compute_av(struct context *scontext, */ avkey.target_class = tclass; avkey.specified = AVTAB_AV | AVTAB_XPERMS; - sattr = flex_array_get(policydb.type_attr_map_array, scontext->type - 1); + sattr = flex_array_get(policydb->type_attr_map_array, + scontext->type - 1); BUG_ON(!sattr); - tattr = flex_array_get(policydb.type_attr_map_array, tcontext->type - 1); + tattr = flex_array_get(policydb->type_attr_map_array, + tcontext->type - 1); BUG_ON(!tattr); ebitmap_for_each_positive_bit(sattr, snode, i) { ebitmap_for_each_positive_bit(tattr, tnode, j) { avkey.source_type = i + 1; avkey.target_type = j + 1; - for (node = avtab_search_node(&policydb.te_avtab, &avkey); + for (node = avtab_search_node(&policydb->te_avtab, + &avkey); node; node = avtab_search_node_next(node, avkey.specified)) { if (node->key.specified == AVTAB_ALLOWED) @@ -691,7 +679,7 @@ static void context_struct_compute_av(struct context *scontext, } /* Check conditional av table for additional permissions */ - cond_compute_av(&policydb.te_cond_avtab, &avkey, + cond_compute_av(&policydb->te_cond_avtab, &avkey, avd, xperms); } @@ -704,7 +692,7 @@ static void context_struct_compute_av(struct context *scontext, constraint = tclass_datum->constraints; while (constraint) { if ((constraint->permissions & (avd->allowed)) && - !constraint_expr_eval(scontext, tcontext, NULL, + !constraint_expr_eval(policydb, scontext, tcontext, NULL, constraint->expr)) { avd->allowed &= ~(constraint->permissions); } @@ -716,16 +704,16 @@ static void context_struct_compute_av(struct context *scontext, * role is changing, then check the (current_role, new_role) * pair. */ - if (tclass == policydb.process_class && - (avd->allowed & policydb.process_trans_perms) && + if (tclass == policydb->process_class && + (avd->allowed & policydb->process_trans_perms) && scontext->role != tcontext->role) { - for (ra = policydb.role_allow; ra; ra = ra->next) { + for (ra = policydb->role_allow; ra; ra = ra->next) { if (scontext->role == ra->role && tcontext->role == ra->new_role) break; } if (!ra) - avd->allowed &= ~policydb.process_trans_perms; + avd->allowed &= ~policydb->process_trans_perms; } /* @@ -733,41 +721,46 @@ static void context_struct_compute_av(struct context *scontext, * constraint, lazy checks have to mask any violated * permission and notice it to userspace via audit. */ - type_attribute_bounds_av(scontext, tcontext, + type_attribute_bounds_av(policydb, scontext, tcontext, tclass, avd); } -static int security_validtrans_handle_fail(struct context *ocontext, +static int security_validtrans_handle_fail(struct selinux_state *state, + struct context *ocontext, struct context *ncontext, struct context *tcontext, u16 tclass) { + struct policydb *p = &state->ss->policydb; char *o = NULL, *n = NULL, *t = NULL; u32 olen, nlen, tlen; - if (context_struct_to_string(ocontext, &o, &olen)) + if (context_struct_to_string(p, ocontext, &o, &olen)) goto out; - if (context_struct_to_string(ncontext, &n, &nlen)) + if (context_struct_to_string(p, ncontext, &n, &nlen)) goto out; - if (context_struct_to_string(tcontext, &t, &tlen)) + if (context_struct_to_string(p, tcontext, &t, &tlen)) goto out; audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR, "op=security_validate_transition seresult=denied" " oldcontext=%s newcontext=%s taskcontext=%s tclass=%s", - o, n, t, sym_name(&policydb, SYM_CLASSES, tclass-1)); + o, n, t, sym_name(p, SYM_CLASSES, tclass-1)); out: kfree(o); kfree(n); kfree(t); - if (!selinux_enforcing) + if (!enforcing_enabled(state)) return 0; return -EPERM; } -static int security_compute_validatetrans(u32 oldsid, u32 newsid, u32 tasksid, +static int security_compute_validatetrans(struct selinux_state *state, + u32 oldsid, u32 newsid, u32 tasksid, u16 orig_tclass, bool user) { + struct policydb *policydb; + struct sidtab *sidtab; struct context *ocontext; struct context *ncontext; struct context *tcontext; @@ -776,41 +769,45 @@ static int security_compute_validatetrans(u32 oldsid, u32 newsid, u32 tasksid, u16 tclass; int rc = 0; - if (!ss_initialized) + + if (!state->initialized) return 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; + sidtab = state->ss->sidtab; if (!user) - tclass = unmap_class(orig_tclass); + tclass = unmap_class(&state->ss->map, orig_tclass); else tclass = orig_tclass; - if (!tclass || tclass > policydb.p_classes.nprim) { + if (!tclass || tclass > policydb->p_classes.nprim) { rc = -EINVAL; goto out; } - tclass_datum = policydb.class_val_to_struct[tclass - 1]; + tclass_datum = policydb->class_val_to_struct[tclass - 1]; - ocontext = sidtab_search(&sidtab, oldsid); + ocontext = sidtab_search(sidtab, oldsid); if (!ocontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, oldsid); rc = -EINVAL; goto out; } - ncontext = sidtab_search(&sidtab, newsid); + ncontext = sidtab_search(sidtab, newsid); if (!ncontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, newsid); rc = -EINVAL; goto out; } - tcontext = sidtab_search(&sidtab, tasksid); + tcontext = sidtab_search(sidtab, tasksid); if (!tcontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, tasksid); rc = -EINVAL; goto out; @@ -818,12 +815,13 @@ static int security_compute_validatetrans(u32 oldsid, u32 newsid, u32 tasksid, constraint = tclass_datum->validatetrans; while (constraint) { - if (!constraint_expr_eval(ocontext, ncontext, tcontext, - constraint->expr)) { + if (!constraint_expr_eval(policydb, ocontext, ncontext, + tcontext, constraint->expr)) { if (user) rc = -EPERM; else - rc = security_validtrans_handle_fail(ocontext, + rc = security_validtrans_handle_fail(state, + ocontext, ncontext, tcontext, tclass); @@ -833,22 +831,24 @@ static int security_compute_validatetrans(u32 oldsid, u32 newsid, u32 tasksid, } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } -int security_validate_transition_user(u32 oldsid, u32 newsid, u32 tasksid, - u16 tclass) +int security_validate_transition_user(struct selinux_state *state, + u32 oldsid, u32 newsid, u32 tasksid, + u16 tclass) { - return security_compute_validatetrans(oldsid, newsid, tasksid, - tclass, true); + return security_compute_validatetrans(state, oldsid, newsid, tasksid, + tclass, true); } -int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, +int security_validate_transition(struct selinux_state *state, + u32 oldsid, u32 newsid, u32 tasksid, u16 orig_tclass) { - return security_compute_validatetrans(oldsid, newsid, tasksid, - orig_tclass, false); + return security_compute_validatetrans(state, oldsid, newsid, tasksid, + orig_tclass, false); } /* @@ -860,30 +860,36 @@ int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, * @oldsid : current security identifier * @newsid : destinated security identifier */ -int security_bounded_transition(u32 old_sid, u32 new_sid) +int security_bounded_transition(struct selinux_state *state, + u32 old_sid, u32 new_sid) { + struct policydb *policydb; + struct sidtab *sidtab; struct context *old_context, *new_context; struct type_datum *type; int index; int rc; - if (!ss_initialized) + if (!state->initialized) return 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; + sidtab = state->ss->sidtab; rc = -EINVAL; - old_context = sidtab_search(&sidtab, old_sid); + old_context = sidtab_search(sidtab, old_sid); if (!old_context) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n", + pr_err("SELinux: %s: unrecognized SID %u\n", __func__, old_sid); goto out; } rc = -EINVAL; - new_context = sidtab_search(&sidtab, new_sid); + new_context = sidtab_search(sidtab, new_sid); if (!new_context) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %u\n", + pr_err("SELinux: %s: unrecognized SID %u\n", __func__, new_sid); goto out; } @@ -895,7 +901,7 @@ int security_bounded_transition(u32 old_sid, u32 new_sid) index = new_context->type; while (true) { - type = flex_array_get_ptr(policydb.type_val_to_struct_array, + type = flex_array_get_ptr(policydb->type_val_to_struct_array, index - 1); BUG_ON(!type); @@ -917,9 +923,9 @@ int security_bounded_transition(u32 old_sid, u32 new_sid) char *new_name = NULL; u32 length; - if (!context_struct_to_string(old_context, + if (!context_struct_to_string(policydb, old_context, &old_name, &length) && - !context_struct_to_string(new_context, + !context_struct_to_string(policydb, new_context, &new_name, &length)) { audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR, @@ -932,17 +938,17 @@ int security_bounded_transition(u32 old_sid, u32 new_sid) kfree(old_name); } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } -static void avd_init(struct av_decision *avd) +static void avd_init(struct selinux_state *state, struct av_decision *avd) { avd->allowed = 0; avd->auditallow = 0; avd->auditdeny = 0xffffffff; - avd->seqno = latest_granting; + avd->seqno = state->ss->latest_granting; avd->flags = 0; } @@ -1000,12 +1006,15 @@ void services_compute_xperms_decision(struct extended_perms_decision *xpermd, } } -void security_compute_xperms_decision(u32 ssid, - u32 tsid, - u16 orig_tclass, - u8 driver, - struct extended_perms_decision *xpermd) +void security_compute_xperms_decision(struct selinux_state *state, + u32 ssid, + u32 tsid, + u16 orig_tclass, + u8 driver, + struct extended_perms_decision *xpermd) { + struct policydb *policydb; + struct sidtab *sidtab; u16 tclass; struct context *scontext, *tcontext; struct avtab_key avkey; @@ -1020,60 +1029,64 @@ void security_compute_xperms_decision(u32 ssid, memset(xpermd->auditallow->p, 0, sizeof(xpermd->auditallow->p)); memset(xpermd->dontaudit->p, 0, sizeof(xpermd->dontaudit->p)); - read_lock(&policy_rwlock); - if (!ss_initialized) + read_lock(&state->ss->policy_rwlock); + if (!state->initialized) goto allow; - scontext = sidtab_search(&sidtab, ssid); + policydb = &state->ss->policydb; + sidtab = state->ss->sidtab; + + scontext = sidtab_search(sidtab, ssid); if (!scontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, ssid); goto out; } - tcontext = sidtab_search(&sidtab, tsid); + tcontext = sidtab_search(sidtab, tsid); if (!tcontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, tsid); goto out; } - tclass = unmap_class(orig_tclass); + tclass = unmap_class(&state->ss->map, orig_tclass); if (unlikely(orig_tclass && !tclass)) { - if (policydb.allow_unknown) + if (policydb->allow_unknown) goto allow; goto out; } - if (unlikely(!tclass || tclass > policydb.p_classes.nprim)) { + if (unlikely(!tclass || tclass > policydb->p_classes.nprim)) { pr_warn_ratelimited("SELinux: Invalid class %hu\n", tclass); goto out; } avkey.target_class = tclass; avkey.specified = AVTAB_XPERMS; - sattr = flex_array_get(policydb.type_attr_map_array, + sattr = flex_array_get(policydb->type_attr_map_array, scontext->type - 1); BUG_ON(!sattr); - tattr = flex_array_get(policydb.type_attr_map_array, + tattr = flex_array_get(policydb->type_attr_map_array, tcontext->type - 1); BUG_ON(!tattr); ebitmap_for_each_positive_bit(sattr, snode, i) { ebitmap_for_each_positive_bit(tattr, tnode, j) { avkey.source_type = i + 1; avkey.target_type = j + 1; - for (node = avtab_search_node(&policydb.te_avtab, &avkey); + for (node = avtab_search_node(&policydb->te_avtab, + &avkey); node; node = avtab_search_node_next(node, avkey.specified)) services_compute_xperms_decision(xpermd, node); - cond_compute_xperms(&policydb.te_cond_avtab, + cond_compute_xperms(&policydb->te_cond_avtab, &avkey, xpermd); } } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return; allow: memset(xpermd->allowed->p, 0xff, sizeof(xpermd->allowed->p)); @@ -1091,94 +1104,109 @@ allow: * Compute a set of access vector decisions based on the * SID pair (@ssid, @tsid) for the permissions in @tclass. */ -void security_compute_av(u32 ssid, +void security_compute_av(struct selinux_state *state, + u32 ssid, u32 tsid, u16 orig_tclass, struct av_decision *avd, struct extended_perms *xperms) { + struct policydb *policydb; + struct sidtab *sidtab; u16 tclass; struct context *scontext = NULL, *tcontext = NULL; - read_lock(&policy_rwlock); - avd_init(avd); + read_lock(&state->ss->policy_rwlock); + avd_init(state, avd); xperms->len = 0; - if (!ss_initialized) + if (!state->initialized) goto allow; - scontext = sidtab_search(&sidtab, ssid); + policydb = &state->ss->policydb; + sidtab = state->ss->sidtab; + + scontext = sidtab_search(sidtab, ssid); if (!scontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, ssid); goto out; } /* permissive domain? */ - if (ebitmap_get_bit(&policydb.permissive_map, scontext->type)) + if (ebitmap_get_bit(&policydb->permissive_map, scontext->type)) avd->flags |= AVD_FLAGS_PERMISSIVE; - tcontext = sidtab_search(&sidtab, tsid); + tcontext = sidtab_search(sidtab, tsid); if (!tcontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, tsid); goto out; } - tclass = unmap_class(orig_tclass); + tclass = unmap_class(&state->ss->map, orig_tclass); if (unlikely(orig_tclass && !tclass)) { - if (policydb.allow_unknown) + if (policydb->allow_unknown) goto allow; goto out; } - context_struct_compute_av(scontext, tcontext, tclass, avd, xperms); - map_decision(orig_tclass, avd, policydb.allow_unknown); + context_struct_compute_av(policydb, scontext, tcontext, tclass, avd, + xperms); + map_decision(&state->ss->map, orig_tclass, avd, + policydb->allow_unknown); out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return; allow: avd->allowed = 0xffffffff; goto out; } -void security_compute_av_user(u32 ssid, +void security_compute_av_user(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd) { + struct policydb *policydb; + struct sidtab *sidtab; struct context *scontext = NULL, *tcontext = NULL; - read_lock(&policy_rwlock); - avd_init(avd); - if (!ss_initialized) + read_lock(&state->ss->policy_rwlock); + avd_init(state, avd); + if (!state->initialized) goto allow; - scontext = sidtab_search(&sidtab, ssid); + policydb = &state->ss->policydb; + sidtab = state->ss->sidtab; + + scontext = sidtab_search(sidtab, ssid); if (!scontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, ssid); goto out; } /* permissive domain? */ - if (ebitmap_get_bit(&policydb.permissive_map, scontext->type)) + if (ebitmap_get_bit(&policydb->permissive_map, scontext->type)) avd->flags |= AVD_FLAGS_PERMISSIVE; - tcontext = sidtab_search(&sidtab, tsid); + tcontext = sidtab_search(sidtab, tsid); if (!tcontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, tsid); goto out; } if (unlikely(!tclass)) { - if (policydb.allow_unknown) + if (policydb->allow_unknown) goto allow; goto out; } - context_struct_compute_av(scontext, tcontext, tclass, avd, NULL); + context_struct_compute_av(policydb, scontext, tcontext, tclass, avd, + NULL); out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return; allow: avd->allowed = 0xffffffff; @@ -1192,7 +1220,9 @@ allow: * to point to this string and set `*scontext_len' to * the length of the string. */ -static int context_struct_to_string(struct context *context, char **scontext, u32 *scontext_len) +static int context_struct_to_string(struct policydb *p, + struct context *context, + char **scontext, u32 *scontext_len) { char *scontextp; @@ -1211,10 +1241,10 @@ static int context_struct_to_string(struct context *context, char **scontext, u3 } /* Compute the size of the context. */ - *scontext_len += strlen(sym_name(&policydb, SYM_USERS, context->user - 1)) + 1; - *scontext_len += strlen(sym_name(&policydb, SYM_ROLES, context->role - 1)) + 1; - *scontext_len += strlen(sym_name(&policydb, SYM_TYPES, context->type - 1)) + 1; - *scontext_len += mls_compute_context_len(context); + *scontext_len += strlen(sym_name(p, SYM_USERS, context->user - 1)) + 1; + *scontext_len += strlen(sym_name(p, SYM_ROLES, context->role - 1)) + 1; + *scontext_len += strlen(sym_name(p, SYM_TYPES, context->type - 1)) + 1; + *scontext_len += mls_compute_context_len(p, context); if (!scontext) return 0; @@ -1229,11 +1259,11 @@ static int context_struct_to_string(struct context *context, char **scontext, u3 * Copy the user name, role name and type name into the context. */ scontextp += sprintf(scontextp, "%s:%s:%s", - sym_name(&policydb, SYM_USERS, context->user - 1), - sym_name(&policydb, SYM_ROLES, context->role - 1), - sym_name(&policydb, SYM_TYPES, context->type - 1)); + sym_name(p, SYM_USERS, context->user - 1), + sym_name(p, SYM_ROLES, context->role - 1), + sym_name(p, SYM_TYPES, context->type - 1)); - mls_sid_to_context(context, &scontextp); + mls_sid_to_context(p, context, &scontextp); *scontextp = 0; @@ -1242,6 +1272,17 @@ static int context_struct_to_string(struct context *context, char **scontext, u3 #include "initial_sid_to_string.h" +int security_sidtab_hash_stats(struct selinux_state *state, char *page) +{ + int rc; + + read_lock(&state->ss->policy_rwlock); + rc = sidtab_hash_stats(state->ss->sidtab, page); + read_unlock(&state->ss->policy_rwlock); + + return rc; +} + const char *security_get_initial_sid_context(u32 sid) { if (unlikely(sid > SECINITSID_NUM)) @@ -1249,9 +1290,12 @@ const char *security_get_initial_sid_context(u32 sid) return initial_sid_to_string[sid]; } -static int security_sid_to_context_core(u32 sid, char **scontext, +static int security_sid_to_context_core(struct selinux_state *state, + u32 sid, char **scontext, u32 *scontext_len, int force) { + struct policydb *policydb; + struct sidtab *sidtab; struct context *context; int rc = 0; @@ -1259,7 +1303,7 @@ static int security_sid_to_context_core(u32 sid, char **scontext, *scontext = NULL; *scontext_len = 0; - if (!ss_initialized) { + if (!state->initialized) { if (sid <= SECINITSID_NUM) { char *scontextp; @@ -1275,25 +1319,28 @@ static int security_sid_to_context_core(u32 sid, char **scontext, *scontext = scontextp; goto out; } - printk(KERN_ERR "SELinux: %s: called before initial " + pr_err("SELinux: %s: called before initial " "load_policy on unknown SID %d\n", __func__, sid); rc = -EINVAL; goto out; } - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + policydb = &state->ss->policydb; + sidtab = state->ss->sidtab; if (force) - context = sidtab_search_force(&sidtab, sid); + context = sidtab_search_force(sidtab, sid); else - context = sidtab_search(&sidtab, sid); + context = sidtab_search(sidtab, sid); if (!context) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, sid); rc = -EINVAL; goto out_unlock; } - rc = context_struct_to_string(context, scontext, scontext_len); + rc = context_struct_to_string(policydb, context, scontext, + scontext_len); out_unlock: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); out: return rc; @@ -1309,14 +1356,18 @@ out: * into a dynamically allocated string of the correct size. Set @scontext * to point to this string and set @scontext_len to the length of the string. */ -int security_sid_to_context(u32 sid, char **scontext, u32 *scontext_len) +int security_sid_to_context(struct selinux_state *state, + u32 sid, char **scontext, u32 *scontext_len) { - return security_sid_to_context_core(sid, scontext, scontext_len, 0); + return security_sid_to_context_core(state, sid, scontext, + scontext_len, 0); } -int security_sid_to_context_force(u32 sid, char **scontext, u32 *scontext_len) +int security_sid_to_context_force(struct selinux_state *state, u32 sid, + char **scontext, u32 *scontext_len) { - return security_sid_to_context_core(sid, scontext, scontext_len, 1); + return security_sid_to_context_core(state, sid, scontext, + scontext_len, 1); } /* @@ -1325,7 +1376,6 @@ int security_sid_to_context_force(u32 sid, char **scontext, u32 *scontext_len) static int string_to_context_struct(struct policydb *pol, struct sidtab *sidtabp, char *scontext, - u32 scontext_len, struct context *ctx, u32 def_sid) { @@ -1386,15 +1436,12 @@ static int string_to_context_struct(struct policydb *pol, ctx->type = typdatum->value; - rc = mls_context_to_sid(pol, oldc, &p, ctx, sidtabp, def_sid); + rc = mls_context_to_sid(pol, oldc, p, ctx, sidtabp, def_sid); if (rc) goto out; - rc = -EINVAL; - if ((p - scontext) < scontext_len) - goto out; - /* Check the validity of the new context. */ + rc = -EINVAL; if (!policydb_context_isvalid(pol, ctx)) goto out; rc = 0; @@ -1404,10 +1451,49 @@ out: return rc; } -static int security_context_to_sid_core(const char *scontext, u32 scontext_len, +int context_add_hash(struct policydb *policydb, + struct context *context) +{ + int rc; + char *str; + int len; + + if (context->str) { + context->hash = context_compute_hash(context->str); + } else { + rc = context_struct_to_string(policydb, context, + &str, &len); + if (rc) + return rc; + context->hash = context_compute_hash(str); + kfree(str); + } + return 0; +} + +static int context_struct_to_sid(struct selinux_state *state, + struct context *context, u32 *sid) +{ + int rc; + struct sidtab *sidtab = state->ss->sidtab; + struct policydb *policydb = &state->ss->policydb; + + if (!context->hash) { + rc = context_add_hash(policydb, context); + if (rc) + return rc; + } + + return sidtab_context_to_sid(sidtab, context, sid); +} + +static int security_context_to_sid_core(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *sid, u32 def_sid, gfp_t gfp_flags, int force) { + struct policydb *policydb; + struct sidtab *sidtab; char *scontext2, *str = NULL; struct context context; int rc = 0; @@ -1421,7 +1507,7 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len, if (!scontext2) return -ENOMEM; - if (!ss_initialized) { + if (!state->initialized) { int i; for (i = 1; i < SECINITSID_NUM; i++) { @@ -1442,20 +1528,21 @@ static int security_context_to_sid_core(const char *scontext, u32 scontext_len, if (!str) goto out; } - - read_lock(&policy_rwlock); - rc = string_to_context_struct(&policydb, &sidtab, scontext2, - scontext_len, &context, def_sid); + read_lock(&state->ss->policy_rwlock); + policydb = &state->ss->policydb; + sidtab = state->ss->sidtab; + rc = string_to_context_struct(policydb, sidtab, scontext2, + &context, def_sid); if (rc == -EINVAL && force) { context.str = str; context.len = strlen(str) + 1; str = NULL; } else if (rc) goto out_unlock; - rc = sidtab_context_to_sid(&sidtab, &context, sid); + rc = context_struct_to_sid(state, &context, sid); context_destroy(&context); out_unlock: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); out: kfree(scontext2); kfree(str); @@ -1474,16 +1561,19 @@ out: * Returns -%EINVAL if the context is invalid, -%ENOMEM if insufficient * memory is available, or 0 on success. */ -int security_context_to_sid(const char *scontext, u32 scontext_len, u32 *sid, +int security_context_to_sid(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *sid, gfp_t gfp) { - return security_context_to_sid_core(scontext, scontext_len, + return security_context_to_sid_core(state, scontext, scontext_len, sid, SECSID_NULL, gfp, 0); } -int security_context_str_to_sid(const char *scontext, u32 *sid, gfp_t gfp) +int security_context_str_to_sid(struct selinux_state *state, + const char *scontext, u32 *sid, gfp_t gfp) { - return security_context_to_sid(scontext, strlen(scontext), sid, gfp); + return security_context_to_sid(state, scontext, strlen(scontext), + sid, gfp); } /** @@ -1504,51 +1594,56 @@ int security_context_str_to_sid(const char *scontext, u32 *sid, gfp_t gfp) * Returns -%EINVAL if the context is invalid, -%ENOMEM if insufficient * memory is available, or 0 on success. */ -int security_context_to_sid_default(const char *scontext, u32 scontext_len, +int security_context_to_sid_default(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *sid, u32 def_sid, gfp_t gfp_flags) { - return security_context_to_sid_core(scontext, scontext_len, + return security_context_to_sid_core(state, scontext, scontext_len, sid, def_sid, gfp_flags, 1); } -int security_context_to_sid_force(const char *scontext, u32 scontext_len, +int security_context_to_sid_force(struct selinux_state *state, + const char *scontext, u32 scontext_len, u32 *sid) { - return security_context_to_sid_core(scontext, scontext_len, + return security_context_to_sid_core(state, scontext, scontext_len, sid, SECSID_NULL, GFP_KERNEL, 1); } static int compute_sid_handle_invalid_context( + struct selinux_state *state, struct context *scontext, struct context *tcontext, u16 tclass, struct context *newcontext) { + struct policydb *policydb = &state->ss->policydb; char *s = NULL, *t = NULL, *n = NULL; u32 slen, tlen, nlen; - if (context_struct_to_string(scontext, &s, &slen)) + if (context_struct_to_string(policydb, scontext, &s, &slen)) goto out; - if (context_struct_to_string(tcontext, &t, &tlen)) + if (context_struct_to_string(policydb, tcontext, &t, &tlen)) goto out; - if (context_struct_to_string(newcontext, &n, &nlen)) + if (context_struct_to_string(policydb, newcontext, &n, &nlen)) goto out; audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR, "op=security_compute_sid invalid_context=%s" " scontext=%s" " tcontext=%s" " tclass=%s", - n, s, t, sym_name(&policydb, SYM_CLASSES, tclass-1)); + n, s, t, sym_name(policydb, SYM_CLASSES, tclass-1)); out: kfree(s); kfree(t); kfree(n); - if (!selinux_enforcing) + if (!enforcing_enabled(state)) return 0; return -EACCES; } -static void filename_compute_type(struct policydb *p, struct context *newcontext, +static void filename_compute_type(struct policydb *policydb, + struct context *newcontext, u32 stype, u32 ttype, u16 tclass, const char *objname) { @@ -1560,7 +1655,7 @@ static void filename_compute_type(struct policydb *p, struct context *newcontext * like /dev or /var/run. This bitmap will quickly skip rule searches * if the ttype does not contain any rules. */ - if (!ebitmap_get_bit(&p->filename_trans_ttypes, ttype)) + if (!ebitmap_get_bit(&policydb->filename_trans_ttypes, ttype)) return; ft.stype = stype; @@ -1568,12 +1663,13 @@ static void filename_compute_type(struct policydb *p, struct context *newcontext ft.tclass = tclass; ft.name = objname; - otype = hashtab_search(p->filename_trans, &ft); + otype = hashtab_search(policydb->filename_trans, &ft); if (otype) newcontext->type = otype->otype; } -static int security_compute_sid(u32 ssid, +static int security_compute_sid(struct selinux_state *state, + u32 ssid, u32 tsid, u16 orig_tclass, u32 specified, @@ -1581,6 +1677,8 @@ static int security_compute_sid(u32 ssid, u32 *out_sid, bool kern) { + struct policydb *policydb; + struct sidtab *sidtab; struct class_datum *cladatum = NULL; struct context *scontext = NULL, *tcontext = NULL, newcontext; struct role_trans *roletr = NULL; @@ -1591,7 +1689,7 @@ static int security_compute_sid(u32 ssid, int rc = 0; bool sock; - if (!ss_initialized) { + if (!state->initialized) { switch (orig_tclass) { case SECCLASS_PROCESS: /* kernel value */ *out_sid = ssid; @@ -1605,33 +1703,37 @@ static int security_compute_sid(u32 ssid, context_init(&newcontext); - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); if (kern) { - tclass = unmap_class(orig_tclass); + tclass = unmap_class(&state->ss->map, orig_tclass); sock = security_is_socket_class(orig_tclass); } else { tclass = orig_tclass; - sock = security_is_socket_class(map_class(tclass)); + sock = security_is_socket_class(map_class(&state->ss->map, + tclass)); } - scontext = sidtab_search(&sidtab, ssid); + policydb = &state->ss->policydb; + sidtab = state->ss->sidtab; + + scontext = sidtab_search(sidtab, ssid); if (!scontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, ssid); rc = -EINVAL; goto out_unlock; } - tcontext = sidtab_search(&sidtab, tsid); + tcontext = sidtab_search(sidtab, tsid); if (!tcontext) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, tsid); rc = -EINVAL; goto out_unlock; } - if (tclass && tclass <= policydb.p_classes.nprim) - cladatum = policydb.class_val_to_struct[tclass - 1]; + if (tclass && tclass <= policydb->p_classes.nprim) + cladatum = policydb->class_val_to_struct[tclass - 1]; /* Set the user identity. */ switch (specified) { @@ -1657,7 +1759,7 @@ static int security_compute_sid(u32 ssid, } else if (cladatum && cladatum->default_role == DEFAULT_TARGET) { newcontext.role = tcontext->role; } else { - if ((tclass == policydb.process_class) || (sock == true)) + if ((tclass == policydb->process_class) || (sock == true)) newcontext.role = scontext->role; else newcontext.role = OBJECT_R_VAL; @@ -1669,7 +1771,7 @@ static int security_compute_sid(u32 ssid, } else if (cladatum && cladatum->default_type == DEFAULT_TARGET) { newcontext.type = tcontext->type; } else { - if ((tclass == policydb.process_class) || (sock == true)) { + if ((tclass == policydb->process_class) || (sock == true)) { /* Use the type of process. */ newcontext.type = scontext->type; } else { @@ -1683,11 +1785,11 @@ static int security_compute_sid(u32 ssid, avkey.target_type = tcontext->type; avkey.target_class = tclass; avkey.specified = specified; - avdatum = avtab_search(&policydb.te_avtab, &avkey); + avdatum = avtab_search(&policydb->te_avtab, &avkey); /* If no permanent rule, also check for enabled conditional rules */ if (!avdatum) { - node = avtab_search_node(&policydb.te_cond_avtab, &avkey); + node = avtab_search_node(&policydb->te_cond_avtab, &avkey); for (; node; node = avtab_search_node_next(node, specified)) { if (node->key.specified & AVTAB_ENABLED) { avdatum = &node->datum; @@ -1703,13 +1805,14 @@ static int security_compute_sid(u32 ssid, /* if we have a objname this is a file trans check so check those rules */ if (objname) - filename_compute_type(&policydb, &newcontext, scontext->type, + filename_compute_type(policydb, &newcontext, scontext->type, tcontext->type, tclass, objname); /* Check for class-specific changes. */ if (specified & AVTAB_TRANSITION) { /* Look for a role transition rule. */ - for (roletr = policydb.role_tr; roletr; roletr = roletr->next) { + for (roletr = policydb->role_tr; roletr; + roletr = roletr->next) { if ((roletr->role == scontext->role) && (roletr->type == tcontext->type) && (roletr->tclass == tclass)) { @@ -1722,14 +1825,14 @@ static int security_compute_sid(u32 ssid, /* Set the MLS attributes. This is done last because it may allocate memory. */ - rc = mls_compute_sid(scontext, tcontext, tclass, specified, + rc = mls_compute_sid(policydb, scontext, tcontext, tclass, specified, &newcontext, sock); if (rc) goto out_unlock; /* Check the validity of the context. */ - if (!policydb_context_isvalid(&policydb, &newcontext)) { - rc = compute_sid_handle_invalid_context(scontext, + if (!policydb_context_isvalid(policydb, &newcontext)) { + rc = compute_sid_handle_invalid_context(state, scontext, tcontext, tclass, &newcontext); @@ -1737,9 +1840,9 @@ static int security_compute_sid(u32 ssid, goto out_unlock; } /* Obtain the sid for the context. */ - rc = sidtab_context_to_sid(&sidtab, &newcontext, out_sid); + rc = context_struct_to_sid(state, &newcontext, out_sid); out_unlock: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); context_destroy(&newcontext); out: return rc; @@ -1758,17 +1861,21 @@ out: * if insufficient memory is available, or %0 if the new SID was * computed successfully. */ -int security_transition_sid(u32 ssid, u32 tsid, u16 tclass, +int security_transition_sid(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, const struct qstr *qstr, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, + return security_compute_sid(state, ssid, tsid, tclass, + AVTAB_TRANSITION, qstr ? qstr->name : NULL, out_sid, true); } -int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, +int security_transition_sid_user(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, const char *objname, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, + return security_compute_sid(state, ssid, tsid, tclass, + AVTAB_TRANSITION, objname, out_sid, false); } @@ -1785,12 +1892,14 @@ int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, * if insufficient memory is available, or %0 if the SID was * computed successfully. */ -int security_member_sid(u32 ssid, +int security_member_sid(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_MEMBER, NULL, + return security_compute_sid(state, ssid, tsid, tclass, + AVTAB_MEMBER, NULL, out_sid, false); } @@ -1807,145 +1916,130 @@ int security_member_sid(u32 ssid, * if insufficient memory is available, or %0 if the SID was * computed successfully. */ -int security_change_sid(u32 ssid, +int security_change_sid(struct selinux_state *state, + u32 ssid, u32 tsid, u16 tclass, u32 *out_sid) { - return security_compute_sid(ssid, tsid, tclass, AVTAB_CHANGE, NULL, + return security_compute_sid(state, + ssid, tsid, tclass, AVTAB_CHANGE, NULL, out_sid, false); } -/* Clone the SID into the new SID table. */ -static int clone_sid(u32 sid, - struct context *context, - void *arg) -{ - struct sidtab *s = arg; - - if (sid > SECINITSID_NUM) - return sidtab_insert(s, sid, context); - else - return 0; -} - -static inline int convert_context_handle_invalid_context(struct context *context) +static inline int convert_context_handle_invalid_context( + struct selinux_state *state, + struct context *context) { + struct policydb *policydb = &state->ss->policydb; char *s; u32 len; - if (selinux_enforcing) + if (enforcing_enabled(state)) return -EINVAL; - if (!context_struct_to_string(context, &s, &len)) { - printk(KERN_WARNING "SELinux: Context %s would be invalid if enforcing\n", s); + if (!context_struct_to_string(policydb, context, &s, &len)) { + pr_warn("SELinux: Context %s would be invalid if enforcing\n", + s); kfree(s); } return 0; } struct convert_context_args { + struct selinux_state *state; struct policydb *oldp; struct policydb *newp; }; /* * Convert the values in the security context - * structure `c' from the values specified + * structure `oldc' from the values specified * in the policy `p->oldp' to the values specified - * in the policy `p->newp'. Verify that the - * context is valid under the new policy. + * in the policy `p->newp', storing the new context + * in `newc'. Verify that the context is valid + * under the new policy. */ -static int convert_context(u32 key, - struct context *c, - void *p) +static int convert_context(struct context *oldc, struct context *newc, void *p) { struct convert_context_args *args; - struct context oldc; struct ocontext *oc; - struct mls_range *range; struct role_datum *role; struct type_datum *typdatum; struct user_datum *usrdatum; char *s; u32 len; - int rc = 0; - - if (key <= SECINITSID_NUM) - goto out; + int rc; args = p; - if (c->str) { - struct context ctx; - - rc = -ENOMEM; - s = kstrdup(c->str, GFP_KERNEL); + if (oldc->str) { + s = kstrdup(oldc->str, GFP_KERNEL); if (!s) - goto out; + return -ENOMEM; rc = string_to_context_struct(args->newp, NULL, s, - c->len, &ctx, SECSID_NULL); - kfree(s); - if (!rc) { - printk(KERN_INFO "SELinux: Context %s became valid (mapped).\n", - c->str); - /* Replace string with mapped representation. */ - kfree(c->str); - memcpy(c, &ctx, sizeof(*c)); - goto out; - } else if (rc == -EINVAL) { - /* Retain string representation for later mapping. */ - rc = 0; - goto out; - } else { - /* Other error condition, e.g. ENOMEM. */ - printk(KERN_ERR "SELinux: Unable to map context %s, rc = %d.\n", - c->str, -rc); - goto out; + newc, SECSID_NULL); + if (rc == -EINVAL) { + /* + * Retain string representation for later mapping. + * + * IMPORTANT: We need to copy the contents of oldc->str + * back into s again because string_to_context_struct() + * may have garbled it. + */ + memcpy(s, oldc->str, oldc->len); + context_init(newc); + newc->str = s; + newc->len = oldc->len; + newc->hash = oldc->hash; + return 0; } + kfree(s); + if (rc) { + /* Other error condition, e.g. ENOMEM. */ + pr_err("SELinux: Unable to map context %s, rc = %d.\n", + oldc->str, -rc); + return rc; + } + pr_info("SELinux: Context %s became valid (mapped).\n", + oldc->str); + return 0; } - rc = context_cpy(&oldc, c); - if (rc) - goto out; + context_init(newc); /* Convert the user. */ rc = -EINVAL; usrdatum = hashtab_search(args->newp->p_users.table, - sym_name(args->oldp, SYM_USERS, c->user - 1)); + sym_name(args->oldp, + SYM_USERS, oldc->user - 1)); if (!usrdatum) goto bad; - c->user = usrdatum->value; + newc->user = usrdatum->value; /* Convert the role. */ rc = -EINVAL; role = hashtab_search(args->newp->p_roles.table, - sym_name(args->oldp, SYM_ROLES, c->role - 1)); + sym_name(args->oldp, SYM_ROLES, oldc->role - 1)); if (!role) goto bad; - c->role = role->value; + newc->role = role->value; /* Convert the type. */ rc = -EINVAL; typdatum = hashtab_search(args->newp->p_types.table, - sym_name(args->oldp, SYM_TYPES, c->type - 1)); + sym_name(args->oldp, + SYM_TYPES, oldc->type - 1)); if (!typdatum) goto bad; - c->type = typdatum->value; + newc->type = typdatum->value; /* Convert the MLS fields if dealing with MLS policies */ if (args->oldp->mls_enabled && args->newp->mls_enabled) { - rc = mls_convert_context(args->oldp, args->newp, c); + rc = mls_convert_context(args->oldp, args->newp, oldc, newc); if (rc) goto bad; - } else if (args->oldp->mls_enabled && !args->newp->mls_enabled) { - /* - * Switching between MLS and non-MLS policy: - * free any storage used by the MLS fields in the - * context for all existing entries in the sidtab. - */ - mls_context_destroy(c); } else if (!args->oldp->mls_enabled && args->newp->mls_enabled) { /* * Switching between non-MLS and MLS policy: @@ -1959,76 +2053,67 @@ static int convert_context(u32 key, oc = oc->next; rc = -EINVAL; if (!oc) { - printk(KERN_ERR "SELinux: unable to look up" + pr_err("SELinux: unable to look up" " the initial SIDs list\n"); goto bad; } - range = &oc->context[0].range; - rc = mls_range_set(c, range); + rc = mls_range_set(newc, &oc->context[0].range); if (rc) goto bad; } /* Check the validity of the new context. */ - if (!policydb_context_isvalid(args->newp, c)) { - rc = convert_context_handle_invalid_context(&oldc); + if (!policydb_context_isvalid(args->newp, newc)) { + rc = convert_context_handle_invalid_context(args->state, oldc); if (rc) goto bad; } - context_destroy(&oldc); + rc = context_add_hash(args->newp, newc); + if (rc) + goto bad; - rc = 0; -out: - return rc; + return 0; bad: /* Map old representation to string and save it. */ - rc = context_struct_to_string(&oldc, &s, &len); + rc = context_struct_to_string(args->oldp, oldc, &s, &len); if (rc) return rc; - context_destroy(&oldc); - context_destroy(c); - c->str = s; - c->len = len; - printk(KERN_INFO "SELinux: Context %s became invalid (unmapped).\n", - c->str); - rc = 0; - goto out; + context_destroy(newc); + newc->str = s; + newc->len = len; + newc->hash = context_compute_hash(s); + pr_info("SELinux: Context %s became invalid (unmapped).\n", + newc->str); + return 0; } -static void security_load_policycaps(void) +static void security_load_policycaps(struct selinux_state *state) { + struct policydb *p = &state->ss->policydb; unsigned int i; struct ebitmap_node *node; - selinux_policycap_netpeer = ebitmap_get_bit(&policydb.policycaps, - POLICYDB_CAPABILITY_NETPEER); - selinux_policycap_openperm = ebitmap_get_bit(&policydb.policycaps, - POLICYDB_CAPABILITY_OPENPERM); - selinux_policycap_extsockclass = ebitmap_get_bit(&policydb.policycaps, - POLICYDB_CAPABILITY_EXTSOCKCLASS); - selinux_policycap_alwaysnetwork = ebitmap_get_bit(&policydb.policycaps, - POLICYDB_CAPABILITY_ALWAYSNETWORK); - selinux_policycap_cgroupseclabel = - ebitmap_get_bit(&policydb.policycaps, - POLICYDB_CAPABILITY_CGROUPSECLABEL); - selinux_policycap_nnp_nosuid_transition = - ebitmap_get_bit(&policydb.policycaps, - POLICYDB_CAPABILITY_NNP_NOSUID_TRANSITION); + for (i = 0; i < ARRAY_SIZE(state->policycap); i++) + state->policycap[i] = ebitmap_get_bit(&p->policycaps, i); for (i = 0; i < ARRAY_SIZE(selinux_policycap_names); i++) pr_info("SELinux: policy capability %s=%d\n", selinux_policycap_names[i], - ebitmap_get_bit(&policydb.policycaps, i)); + ebitmap_get_bit(&p->policycaps, i)); - ebitmap_for_each_positive_bit(&policydb.policycaps, node, i) { + ebitmap_for_each_positive_bit(&p->policycaps, node, i) { if (i >= ARRAY_SIZE(selinux_policycap_names)) pr_info("SELinux: unknown policy capability %u\n", i); } + + state->android_netlink_route = p->android_netlink_route; + selinux_nlmsg_init(); } -static int security_preserve_bools(struct policydb *p); +static int security_preserve_bools(struct selinux_state *state, + struct policydb *newpolicydb); /** * security_load_policy - Load a security policy configuration. @@ -2040,14 +2125,16 @@ static int security_preserve_bools(struct policydb *p); * This function will flush the access vector cache after * loading the new policy. */ -int security_load_policy(void *data, size_t len) +int security_load_policy(struct selinux_state *state, void *data, size_t len) { + struct policydb *policydb; + struct sidtab *oldsidtab, *newsidtab; struct policydb *oldpolicydb, *newpolicydb; - struct sidtab oldsidtab, newsidtab; - struct selinux_mapping *oldmap, *map = NULL; + struct selinux_mapping *oldmapping; + struct selinux_map newmap; + struct sidtab_convert_params convert_params; struct convert_context_args args; u32 seqno; - u16 map_size; int rc = 0; struct policy_file file = { data, len }, *fp = &file; @@ -2058,123 +2145,126 @@ int security_load_policy(void *data, size_t len) } newpolicydb = oldpolicydb + 1; - if (!ss_initialized) { - avtab_cache_init(); - ebitmap_cache_init(); - rc = policydb_read(&policydb, fp); + policydb = &state->ss->policydb; + + newsidtab = kmalloc(sizeof(*newsidtab), GFP_KERNEL); + if (!newsidtab) { + rc = -ENOMEM; + goto out; + } + + if (!state->initialized) { + rc = policydb_read(policydb, fp); if (rc) { - avtab_cache_destroy(); - ebitmap_cache_destroy(); + kfree(newsidtab); goto out; } - policydb.len = len; - rc = selinux_set_mapping(&policydb, secclass_map, - ¤t_mapping, - ¤t_mapping_size); + policydb->len = len; + rc = selinux_set_mapping(policydb, secclass_map, + &state->ss->map); if (rc) { - policydb_destroy(&policydb); - avtab_cache_destroy(); - ebitmap_cache_destroy(); + kfree(newsidtab); + policydb_destroy(policydb); goto out; } - rc = policydb_load_isids(&policydb, &sidtab); + rc = policydb_load_isids(policydb, newsidtab); if (rc) { - policydb_destroy(&policydb); - avtab_cache_destroy(); - ebitmap_cache_destroy(); + kfree(newsidtab); + policydb_destroy(policydb); goto out; } - security_load_policycaps(); - ss_initialized = 1; - seqno = ++latest_granting; + state->ss->sidtab = newsidtab; + security_load_policycaps(state); + state->initialized = 1; + seqno = ++state->ss->latest_granting; selinux_complete_init(); - avc_ss_reset(seqno); + avc_ss_reset(state->avc, seqno); selnl_notify_policyload(seqno); - selinux_status_update_policyload(seqno); + selinux_status_update_policyload(state, seqno); selinux_netlbl_cache_invalidate(); selinux_xfrm_notify_policyload(); goto out; } -#if 0 - sidtab_hash_eval(&sidtab, "sids"); -#endif - rc = policydb_read(newpolicydb, fp); - if (rc) + if (rc) { + kfree(newsidtab); goto out; + } newpolicydb->len = len; /* If switching between different policy types, log MLS status */ - if (policydb.mls_enabled && !newpolicydb->mls_enabled) - printk(KERN_INFO "SELinux: Disabling MLS support...\n"); - else if (!policydb.mls_enabled && newpolicydb->mls_enabled) - printk(KERN_INFO "SELinux: Enabling MLS support...\n"); + if (policydb->mls_enabled && !newpolicydb->mls_enabled) + pr_info("SELinux: Disabling MLS support...\n"); + else if (!policydb->mls_enabled && newpolicydb->mls_enabled) + pr_info("SELinux: Enabling MLS support...\n"); - rc = policydb_load_isids(newpolicydb, &newsidtab); + rc = policydb_load_isids(newpolicydb, newsidtab); if (rc) { - printk(KERN_ERR "SELinux: unable to load the initial SIDs\n"); + pr_err("SELinux: unable to load the initial SIDs\n"); policydb_destroy(newpolicydb); + kfree(newsidtab); goto out; } - rc = selinux_set_mapping(newpolicydb, secclass_map, &map, &map_size); + rc = selinux_set_mapping(newpolicydb, secclass_map, &newmap); if (rc) goto err; - rc = security_preserve_bools(newpolicydb); + rc = security_preserve_bools(state, newpolicydb); if (rc) { - printk(KERN_ERR "SELinux: unable to preserve booleans\n"); + pr_err("SELinux: unable to preserve booleans\n"); goto err; } - /* Clone the SID table. */ - sidtab_shutdown(&sidtab); - - rc = sidtab_map(&sidtab, clone_sid, &newsidtab); - if (rc) - goto err; + oldsidtab = state->ss->sidtab; /* * Convert the internal representations of contexts * in the new SID table. */ - args.oldp = &policydb; + args.state = state; + args.oldp = policydb; args.newp = newpolicydb; - rc = sidtab_map(&newsidtab, convert_context, &args); + + convert_params.func = convert_context; + convert_params.args = &args; + convert_params.target = newsidtab; + + rc = sidtab_convert(oldsidtab, &convert_params); if (rc) { - printk(KERN_ERR "SELinux: unable to convert the internal" + pr_err("SELinux: unable to convert the internal" " representation of contexts in the new SID" " table\n"); goto err; } /* Save the old policydb and SID table to free later. */ - memcpy(oldpolicydb, &policydb, sizeof(policydb)); - sidtab_set(&oldsidtab, &sidtab); + memcpy(oldpolicydb, policydb, sizeof(*policydb)); /* Install the new policydb and SID table. */ - write_lock_irq(&policy_rwlock); - memcpy(&policydb, newpolicydb, sizeof(policydb)); - sidtab_set(&sidtab, &newsidtab); - security_load_policycaps(); - oldmap = current_mapping; - current_mapping = map; - current_mapping_size = map_size; - seqno = ++latest_granting; - write_unlock_irq(&policy_rwlock); + write_lock_irq(&state->ss->policy_rwlock); + memcpy(policydb, newpolicydb, sizeof(*policydb)); + state->ss->sidtab = newsidtab; + security_load_policycaps(state); + oldmapping = state->ss->map.mapping; + state->ss->map.mapping = newmap.mapping; + state->ss->map.size = newmap.size; + seqno = ++state->ss->latest_granting; + write_unlock_irq(&state->ss->policy_rwlock); /* Free the old policydb and SID table. */ policydb_destroy(oldpolicydb); - sidtab_destroy(&oldsidtab); - kfree(oldmap); + sidtab_destroy(oldsidtab); + kfree(oldsidtab); + kfree(oldmapping); - avc_ss_reset(seqno); + avc_ss_reset(state->avc, seqno); selnl_notify_policyload(seqno); - selinux_status_update_policyload(seqno); + selinux_status_update_policyload(state, seqno); selinux_netlbl_cache_invalidate(); selinux_xfrm_notify_policyload(); @@ -2182,8 +2272,9 @@ int security_load_policy(void *data, size_t len) goto out; err: - kfree(map); - sidtab_destroy(&newsidtab); + kfree(newmap.mapping); + sidtab_destroy(newsidtab); + kfree(newsidtab); policydb_destroy(newpolicydb); out: @@ -2191,13 +2282,14 @@ out: return rc; } -size_t security_policydb_len(void) +size_t security_policydb_len(struct selinux_state *state) { + struct policydb *p = &state->ss->policydb; size_t len; - read_lock(&policy_rwlock); - len = policydb.len; - read_unlock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + len = p->len; + read_unlock(&state->ss->policy_rwlock); return len; } @@ -2208,14 +2300,20 @@ size_t security_policydb_len(void) * @port: port number * @out_sid: security identifier */ -int security_port_sid(u8 protocol, u16 port, u32 *out_sid) +int security_port_sid(struct selinux_state *state, + u8 protocol, u16 port, u32 *out_sid) { + struct policydb *policydb; + struct sidtab *sidtab; struct ocontext *c; int rc = 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - c = policydb.ocontexts[OCON_PORT]; + policydb = &state->ss->policydb; + sidtab = state->ss->sidtab; + + c = policydb->ocontexts[OCON_PORT]; while (c) { if (c->u.port.protocol == protocol && c->u.port.low_port <= port && @@ -2226,8 +2324,7 @@ int security_port_sid(u8 protocol, u16 port, u32 *out_sid) if (c) { if (!c->sid[0]) { - rc = sidtab_context_to_sid(&sidtab, - &c->context[0], + rc = context_struct_to_sid(state, &c->context[0], &c->sid[0]); if (rc) goto out; @@ -2238,7 +2335,7 @@ int security_port_sid(u8 protocol, u16 port, u32 *out_sid) } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -2248,14 +2345,18 @@ out: * @pkey_num: pkey number * @out_sid: security identifier */ -int security_ib_pkey_sid(u64 subnet_prefix, u16 pkey_num, u32 *out_sid) +int security_ib_pkey_sid(struct selinux_state *state, + u64 subnet_prefix, u16 pkey_num, u32 *out_sid) { + struct policydb *policydb; struct ocontext *c; int rc = 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - c = policydb.ocontexts[OCON_IBPKEY]; + policydb = &state->ss->policydb; + + c = policydb->ocontexts[OCON_IBPKEY]; while (c) { if (c->u.ibpkey.low_pkey <= pkey_num && c->u.ibpkey.high_pkey >= pkey_num && @@ -2267,7 +2368,7 @@ int security_ib_pkey_sid(u64 subnet_prefix, u16 pkey_num, u32 *out_sid) if (c) { if (!c->sid[0]) { - rc = sidtab_context_to_sid(&sidtab, + rc = context_struct_to_sid(state, &c->context[0], &c->sid[0]); if (rc) @@ -2278,7 +2379,7 @@ int security_ib_pkey_sid(u64 subnet_prefix, u16 pkey_num, u32 *out_sid) *out_sid = SECINITSID_UNLABELED; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -2288,14 +2389,20 @@ out: * @port: port number * @out_sid: security identifier */ -int security_ib_endport_sid(const char *dev_name, u8 port_num, u32 *out_sid) +int security_ib_endport_sid(struct selinux_state *state, + const char *dev_name, u8 port_num, u32 *out_sid) { + struct policydb *policydb; + struct sidtab *sidtab; struct ocontext *c; int rc = 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - c = policydb.ocontexts[OCON_IBENDPORT]; + policydb = &state->ss->policydb; + sidtab = state->ss->sidtab; + + c = policydb->ocontexts[OCON_IBENDPORT]; while (c) { if (c->u.ibendport.port == port_num && !strncmp(c->u.ibendport.dev_name, @@ -2308,8 +2415,7 @@ int security_ib_endport_sid(const char *dev_name, u8 port_num, u32 *out_sid) if (c) { if (!c->sid[0]) { - rc = sidtab_context_to_sid(&sidtab, - &c->context[0], + rc = context_struct_to_sid(state, &c->context[0], &c->sid[0]); if (rc) goto out; @@ -2319,7 +2425,7 @@ int security_ib_endport_sid(const char *dev_name, u8 port_num, u32 *out_sid) *out_sid = SECINITSID_UNLABELED; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -2328,14 +2434,20 @@ out: * @name: interface name * @if_sid: interface SID */ -int security_netif_sid(char *name, u32 *if_sid) +int security_netif_sid(struct selinux_state *state, + char *name, u32 *if_sid) { + struct policydb *policydb; + struct sidtab *sidtab; int rc = 0; struct ocontext *c; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - c = policydb.ocontexts[OCON_NETIF]; + policydb = &state->ss->policydb; + sidtab = state->ss->sidtab; + + c = policydb->ocontexts[OCON_NETIF]; while (c) { if (strcmp(name, c->u.name) == 0) break; @@ -2344,13 +2456,11 @@ int security_netif_sid(char *name, u32 *if_sid) if (c) { if (!c->sid[0] || !c->sid[1]) { - rc = sidtab_context_to_sid(&sidtab, - &c->context[0], - &c->sid[0]); + rc = context_struct_to_sid(state, &c->context[0], + &c->sid[0]); if (rc) goto out; - rc = sidtab_context_to_sid(&sidtab, - &c->context[1], + rc = context_struct_to_sid(state, &c->context[1], &c->sid[1]); if (rc) goto out; @@ -2360,7 +2470,7 @@ int security_netif_sid(char *name, u32 *if_sid) *if_sid = SECINITSID_NETIF; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -2384,15 +2494,19 @@ static int match_ipv6_addrmask(u32 *input, u32 *addr, u32 *mask) * @addrlen: address length in bytes * @out_sid: security identifier */ -int security_node_sid(u16 domain, +int security_node_sid(struct selinux_state *state, + u16 domain, void *addrp, u32 addrlen, u32 *out_sid) { + struct policydb *policydb; int rc; struct ocontext *c; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; switch (domain) { case AF_INET: { @@ -2404,7 +2518,7 @@ int security_node_sid(u16 domain, addr = *((u32 *)addrp); - c = policydb.ocontexts[OCON_NODE]; + c = policydb->ocontexts[OCON_NODE]; while (c) { if (c->u.node.addr == (addr & c->u.node.mask)) break; @@ -2417,7 +2531,7 @@ int security_node_sid(u16 domain, rc = -EINVAL; if (addrlen != sizeof(u64) * 2) goto out; - c = policydb.ocontexts[OCON_NODE6]; + c = policydb->ocontexts[OCON_NODE6]; while (c) { if (match_ipv6_addrmask(addrp, c->u.node6.addr, c->u.node6.mask)) @@ -2434,7 +2548,7 @@ int security_node_sid(u16 domain, if (c) { if (!c->sid[0]) { - rc = sidtab_context_to_sid(&sidtab, + rc = context_struct_to_sid(state, &c->context[0], &c->sid[0]); if (rc) @@ -2447,7 +2561,7 @@ int security_node_sid(u16 domain, rc = 0; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -2467,11 +2581,14 @@ out: * number of elements in the array. */ -int security_get_user_sids(u32 fromsid, +int security_get_user_sids(struct selinux_state *state, + u32 fromsid, char *username, u32 **sids, u32 *nel) { + struct policydb *policydb; + struct sidtab *sidtab; struct context *fromcon, usercon; u32 *mysids = NULL, *mysids2, sid; u32 mynel = 0, maxnel = SIDS_NEL; @@ -2483,20 +2600,23 @@ int security_get_user_sids(u32 fromsid, *sids = NULL; *nel = 0; - if (!ss_initialized) + if (!state->initialized) goto out; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; + sidtab = state->ss->sidtab; context_init(&usercon); rc = -EINVAL; - fromcon = sidtab_search(&sidtab, fromsid); + fromcon = sidtab_search(sidtab, fromsid); if (!fromcon) goto out_unlock; rc = -EINVAL; - user = hashtab_search(policydb.p_users.table, username); + user = hashtab_search(policydb->p_users.table, username); if (!user) goto out_unlock; @@ -2508,15 +2628,21 @@ int security_get_user_sids(u32 fromsid, goto out_unlock; ebitmap_for_each_positive_bit(&user->roles, rnode, i) { - role = policydb.role_val_to_struct[i]; + role = policydb->role_val_to_struct[i]; usercon.role = i + 1; ebitmap_for_each_positive_bit(&role->types, tnode, j) { usercon.type = j + 1; + /* + * The same context struct is reused here so the hash + * must be reset. + */ + usercon.hash = 0; - if (mls_setup_user_range(fromcon, user, &usercon)) + if (mls_setup_user_range(policydb, fromcon, user, + &usercon)) continue; - rc = sidtab_context_to_sid(&sidtab, &usercon, &sid); + rc = context_struct_to_sid(state, &usercon, &sid); if (rc) goto out_unlock; if (mynel < maxnel) { @@ -2536,7 +2662,7 @@ int security_get_user_sids(u32 fromsid, } rc = 0; out_unlock: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); if (rc || !mynel) { kfree(mysids); goto out; @@ -2550,7 +2676,8 @@ out_unlock: } for (i = 0, j = 0; i < mynel; i++) { struct av_decision dummy_avd; - rc = avc_has_perm_noaudit(fromsid, mysids[i], + rc = avc_has_perm_noaudit(state, + fromsid, mysids[i], SECCLASS_PROCESS, /* kernel value */ PROCESS__TRANSITION, AVC_STRICT, &dummy_avd); @@ -2579,11 +2706,13 @@ out: * * The caller must acquire the policy_rwlock before calling this function. */ -static inline int __security_genfs_sid(const char *fstype, +static inline int __security_genfs_sid(struct selinux_state *state, + const char *fstype, char *path, u16 orig_sclass, u32 *sid) { + struct policydb *policydb = &state->ss->policydb; int len; u16 sclass; struct genfs *genfs; @@ -2593,10 +2722,10 @@ static inline int __security_genfs_sid(const char *fstype, while (path[0] == '/' && path[1] == '/') path++; - sclass = unmap_class(orig_sclass); + sclass = unmap_class(&state->ss->map, orig_sclass); *sid = SECINITSID_UNLABELED; - for (genfs = policydb.genfs; genfs; genfs = genfs->next) { + for (genfs = policydb->genfs; genfs; genfs = genfs->next) { cmp = strcmp(fstype, genfs->fstype); if (cmp <= 0) break; @@ -2618,7 +2747,7 @@ static inline int __security_genfs_sid(const char *fstype, goto out; if (!c->sid[0]) { - rc = sidtab_context_to_sid(&sidtab, &c->context[0], &c->sid[0]); + rc = context_struct_to_sid(state, &c->context[0], &c->sid[0]); if (rc) goto out; } @@ -2639,16 +2768,17 @@ out: * Acquire policy_rwlock before calling __security_genfs_sid() and release * it afterward. */ -int security_genfs_sid(const char *fstype, +int security_genfs_sid(struct selinux_state *state, + const char *fstype, char *path, u16 orig_sclass, u32 *sid) { int retval; - read_lock(&policy_rwlock); - retval = __security_genfs_sid(fstype, path, orig_sclass, sid); - read_unlock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + retval = __security_genfs_sid(state, fstype, path, orig_sclass, sid); + read_unlock(&state->ss->policy_rwlock); return retval; } @@ -2656,16 +2786,21 @@ int security_genfs_sid(const char *fstype, * security_fs_use - Determine how to handle labeling for a filesystem. * @sb: superblock in question */ -int security_fs_use(struct super_block *sb) +int security_fs_use(struct selinux_state *state, struct super_block *sb) { + struct policydb *policydb; + struct sidtab *sidtab; int rc = 0; struct ocontext *c; struct superblock_security_struct *sbsec = sb->s_security; const char *fstype = sb->s_type->name; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - c = policydb.ocontexts[OCON_FSUSE]; + policydb = &state->ss->policydb; + sidtab = state->ss->sidtab; + + c = policydb->ocontexts[OCON_FSUSE]; while (c) { if (strcmp(fstype, c->u.name) == 0) break; @@ -2675,14 +2810,14 @@ int security_fs_use(struct super_block *sb) if (c) { sbsec->behavior = c->v.behavior; if (!c->sid[0]) { - rc = sidtab_context_to_sid(&sidtab, &c->context[0], + rc = context_struct_to_sid(state, &c->context[0], &c->sid[0]); if (rc) goto out; } sbsec->sid = c->sid[0]; } else { - rc = __security_genfs_sid(fstype, "/", SECCLASS_DIR, + rc = __security_genfs_sid(state, fstype, "/", SECCLASS_DIR, &sbsec->sid); if (rc) { sbsec->behavior = SECURITY_FS_USE_NONE; @@ -2693,20 +2828,25 @@ int security_fs_use(struct super_block *sb) } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } -int security_get_bools(int *len, char ***names, int **values) +int security_get_bools(struct selinux_state *state, + int *len, char ***names, int **values) { + struct policydb *policydb; int i, rc; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; + *names = NULL; *values = NULL; rc = 0; - *len = policydb.p_bools.nprim; + *len = policydb->p_bools.nprim; if (!*len) goto out; @@ -2721,16 +2861,17 @@ int security_get_bools(int *len, char ***names, int **values) goto err; for (i = 0; i < *len; i++) { - (*values)[i] = policydb.bool_val_to_struct[i]->state; + (*values)[i] = policydb->bool_val_to_struct[i]->state; rc = -ENOMEM; - (*names)[i] = kstrdup(sym_name(&policydb, SYM_BOOLS, i), GFP_ATOMIC); + (*names)[i] = kstrdup(sym_name(policydb, SYM_BOOLS, i), + GFP_ATOMIC); if (!(*names)[i]) goto err; } rc = 0; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; err: if (*names) { @@ -2742,90 +2883,98 @@ err: } -int security_set_bools(int len, int *values) +int security_set_bools(struct selinux_state *state, int len, int *values) { + struct policydb *policydb; int i, rc; int lenp, seqno = 0; struct cond_node *cur; - write_lock_irq(&policy_rwlock); + write_lock_irq(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; rc = -EFAULT; - lenp = policydb.p_bools.nprim; + lenp = policydb->p_bools.nprim; if (len != lenp) goto out; for (i = 0; i < len; i++) { - if (!!values[i] != policydb.bool_val_to_struct[i]->state) { + if (!!values[i] != policydb->bool_val_to_struct[i]->state) { audit_log(current->audit_context, GFP_ATOMIC, AUDIT_MAC_CONFIG_CHANGE, "bool=%s val=%d old_val=%d auid=%u ses=%u", - sym_name(&policydb, SYM_BOOLS, i), + sym_name(policydb, SYM_BOOLS, i), !!values[i], - policydb.bool_val_to_struct[i]->state, + policydb->bool_val_to_struct[i]->state, from_kuid(&init_user_ns, audit_get_loginuid(current)), audit_get_sessionid(current)); } if (values[i]) - policydb.bool_val_to_struct[i]->state = 1; + policydb->bool_val_to_struct[i]->state = 1; else - policydb.bool_val_to_struct[i]->state = 0; + policydb->bool_val_to_struct[i]->state = 0; } - for (cur = policydb.cond_list; cur; cur = cur->next) { - rc = evaluate_cond_node(&policydb, cur); + for (cur = policydb->cond_list; cur; cur = cur->next) { + rc = evaluate_cond_node(policydb, cur); if (rc) goto out; } - seqno = ++latest_granting; + seqno = ++state->ss->latest_granting; rc = 0; out: - write_unlock_irq(&policy_rwlock); + write_unlock_irq(&state->ss->policy_rwlock); if (!rc) { - avc_ss_reset(seqno); + avc_ss_reset(state->avc, seqno); selnl_notify_policyload(seqno); - selinux_status_update_policyload(seqno); + selinux_status_update_policyload(state, seqno); selinux_xfrm_notify_policyload(); } return rc; } -int security_get_bool_value(int index) +int security_get_bool_value(struct selinux_state *state, + int index) { + struct policydb *policydb; int rc; int len; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + + policydb = &state->ss->policydb; rc = -EFAULT; - len = policydb.p_bools.nprim; + len = policydb->p_bools.nprim; if (index >= len) goto out; - rc = policydb.bool_val_to_struct[index]->state; + rc = policydb->bool_val_to_struct[index]->state; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } -static int security_preserve_bools(struct policydb *p) +static int security_preserve_bools(struct selinux_state *state, + struct policydb *policydb) { int rc, nbools = 0, *bvalues = NULL, i; char **bnames = NULL; struct cond_bool_datum *booldatum; struct cond_node *cur; - rc = security_get_bools(&nbools, &bnames, &bvalues); + rc = security_get_bools(state, &nbools, &bnames, &bvalues); if (rc) goto out; for (i = 0; i < nbools; i++) { - booldatum = hashtab_search(p->p_bools.table, bnames[i]); + booldatum = hashtab_search(policydb->p_bools.table, bnames[i]); if (booldatum) booldatum->state = bvalues[i]; } - for (cur = p->cond_list; cur; cur = cur->next) { - rc = evaluate_cond_node(p, cur); + for (cur = policydb->cond_list; cur; cur = cur->next) { + rc = evaluate_cond_node(policydb, cur); if (rc) goto out; } @@ -2844,8 +2993,11 @@ out: * security_sid_mls_copy() - computes a new sid based on the given * sid and the mls portion of mls_sid. */ -int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid) +int security_sid_mls_copy(struct selinux_state *state, + u32 sid, u32 mls_sid, u32 *new_sid) { + struct policydb *policydb = &state->ss->policydb; + struct sidtab *sidtab = state->ss->sidtab; struct context *context1; struct context *context2; struct context newcon; @@ -2854,27 +3006,27 @@ int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid) int rc; rc = 0; - if (!ss_initialized || !policydb.mls_enabled) { + if (!state->initialized || !policydb->mls_enabled) { *new_sid = sid; goto out; } context_init(&newcon); - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); rc = -EINVAL; - context1 = sidtab_search(&sidtab, sid); + context1 = sidtab_search(sidtab, sid); if (!context1) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, sid); goto out_unlock; } rc = -EINVAL; - context2 = sidtab_search(&sidtab, mls_sid); + context2 = sidtab_search(sidtab, mls_sid); if (!context2) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, mls_sid); goto out_unlock; } @@ -2887,10 +3039,11 @@ int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid) goto out_unlock; /* Check the validity of the new context. */ - if (!policydb_context_isvalid(&policydb, &newcon)) { - rc = convert_context_handle_invalid_context(&newcon); + if (!policydb_context_isvalid(policydb, &newcon)) { + rc = convert_context_handle_invalid_context(state, &newcon); if (rc) { - if (!context_struct_to_string(&newcon, &s, &len)) { + if (!context_struct_to_string(policydb, &newcon, &s, + &len)) { audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR, "op=security_sid_mls_copy " @@ -2900,10 +3053,9 @@ int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid) goto out_unlock; } } - - rc = sidtab_context_to_sid(&sidtab, &newcon, new_sid); + rc = context_struct_to_sid(state, &newcon, new_sid); out_unlock: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); context_destroy(&newcon); out: return rc; @@ -2929,10 +3081,13 @@ out: * multiple, inconsistent labels | - | SECSID_NULL * */ -int security_net_peersid_resolve(u32 nlbl_sid, u32 nlbl_type, +int security_net_peersid_resolve(struct selinux_state *state, + u32 nlbl_sid, u32 nlbl_type, u32 xfrm_sid, u32 *peer_sid) { + struct policydb *policydb = &state->ss->policydb; + struct sidtab *sidtab = state->ss->sidtab; int rc; struct context *nlbl_ctx; struct context *xfrm_ctx; @@ -2954,25 +3109,27 @@ int security_net_peersid_resolve(u32 nlbl_sid, u32 nlbl_type, return 0; } - /* we don't need to check ss_initialized here since the only way both + /* + * We don't need to check initialized here since the only way both * nlbl_sid and xfrm_sid are not equal to SECSID_NULL would be if the - * security server was initialized and ss_initialized was true */ - if (!policydb.mls_enabled) + * security server was initialized and state->initialized was true. + */ + if (!policydb->mls_enabled) return 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); rc = -EINVAL; - nlbl_ctx = sidtab_search(&sidtab, nlbl_sid); + nlbl_ctx = sidtab_search(sidtab, nlbl_sid); if (!nlbl_ctx) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, nlbl_sid); goto out; } rc = -EINVAL; - xfrm_ctx = sidtab_search(&sidtab, xfrm_sid); + xfrm_ctx = sidtab_search(sidtab, xfrm_sid); if (!xfrm_ctx) { - printk(KERN_ERR "SELinux: %s: unrecognized SID %d\n", + pr_err("SELinux: %s: unrecognized SID %d\n", __func__, xfrm_sid); goto out; } @@ -2987,7 +3144,7 @@ int security_net_peersid_resolve(u32 nlbl_sid, u32 nlbl_type, * expressive */ *peer_sid = xfrm_sid; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -3004,19 +3161,21 @@ static int get_classes_callback(void *k, void *d, void *args) return 0; } -int security_get_classes(char ***classes, int *nclasses) +int security_get_classes(struct selinux_state *state, + char ***classes, int *nclasses) { + struct policydb *policydb = &state->ss->policydb; int rc; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); rc = -ENOMEM; - *nclasses = policydb.p_classes.nprim; + *nclasses = policydb->p_classes.nprim; *classes = kcalloc(*nclasses, sizeof(**classes), GFP_ATOMIC); if (!*classes) goto out; - rc = hashtab_map(policydb.p_classes.table, get_classes_callback, + rc = hashtab_map(policydb->p_classes.table, get_classes_callback, *classes); if (rc) { int i; @@ -3026,7 +3185,7 @@ int security_get_classes(char ***classes, int *nclasses) } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -3043,17 +3202,19 @@ static int get_permissions_callback(void *k, void *d, void *args) return 0; } -int security_get_permissions(char *class, char ***perms, int *nperms) +int security_get_permissions(struct selinux_state *state, + char *class, char ***perms, int *nperms) { + struct policydb *policydb = &state->ss->policydb; int rc, i; struct class_datum *match; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); rc = -EINVAL; - match = hashtab_search(policydb.p_classes.table, class); + match = hashtab_search(policydb->p_classes.table, class); if (!match) { - printk(KERN_ERR "SELinux: %s: unrecognized class %s\n", + pr_err("SELinux: %s: unrecognized class %s\n", __func__, class); goto out; } @@ -3077,25 +3238,25 @@ int security_get_permissions(char *class, char ***perms, int *nperms) goto err; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; err: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); for (i = 0; i < *nperms; i++) kfree((*perms)[i]); kfree(*perms); return rc; } -int security_get_reject_unknown(void) +int security_get_reject_unknown(struct selinux_state *state) { - return policydb.reject_unknown; + return state->ss->policydb.reject_unknown; } -int security_get_allow_unknown(void) +int security_get_allow_unknown(struct selinux_state *state) { - return policydb.allow_unknown; + return state->ss->policydb.allow_unknown; } /** @@ -3108,13 +3269,15 @@ int security_get_allow_unknown(void) * supported, false (0) if it isn't supported. * */ -int security_policycap_supported(unsigned int req_cap) +int security_policycap_supported(struct selinux_state *state, + unsigned int req_cap) { + struct policydb *policydb = &state->ss->policydb; int rc; - read_lock(&policy_rwlock); - rc = ebitmap_get_bit(&policydb.policycaps, req_cap); - read_unlock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + rc = ebitmap_get_bit(&policydb->policycaps, req_cap); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -3136,6 +3299,8 @@ void selinux_audit_rule_free(void *vrule) int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) { + struct selinux_state *state = &selinux_state; + struct policydb *policydb = &state->ss->policydb; struct selinux_audit_rule *tmprule; struct role_datum *roledatum; struct type_datum *typedatum; @@ -3145,7 +3310,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) *rule = NULL; - if (!ss_initialized) + if (!state->initialized) return -EOPNOTSUPP; switch (field) { @@ -3178,15 +3343,15 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) context_init(&tmprule->au_ctxt); - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - tmprule->au_seqno = latest_granting; + tmprule->au_seqno = state->ss->latest_granting; switch (field) { case AUDIT_SUBJ_USER: case AUDIT_OBJ_USER: rc = -EINVAL; - userdatum = hashtab_search(policydb.p_users.table, rulestr); + userdatum = hashtab_search(policydb->p_users.table, rulestr); if (!userdatum) goto out; tmprule->au_ctxt.user = userdatum->value; @@ -3194,7 +3359,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) case AUDIT_SUBJ_ROLE: case AUDIT_OBJ_ROLE: rc = -EINVAL; - roledatum = hashtab_search(policydb.p_roles.table, rulestr); + roledatum = hashtab_search(policydb->p_roles.table, rulestr); if (!roledatum) goto out; tmprule->au_ctxt.role = roledatum->value; @@ -3202,7 +3367,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) case AUDIT_SUBJ_TYPE: case AUDIT_OBJ_TYPE: rc = -EINVAL; - typedatum = hashtab_search(policydb.p_types.table, rulestr); + typedatum = hashtab_search(policydb->p_types.table, rulestr); if (!typedatum) goto out; tmprule->au_ctxt.type = typedatum->value; @@ -3211,14 +3376,15 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) case AUDIT_SUBJ_CLR: case AUDIT_OBJ_LEV_LOW: case AUDIT_OBJ_LEV_HIGH: - rc = mls_from_string(rulestr, &tmprule->au_ctxt, GFP_ATOMIC); + rc = mls_from_string(policydb, rulestr, &tmprule->au_ctxt, + GFP_ATOMIC); if (rc) goto out; break; } rc = 0; out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); if (rc) { selinux_audit_rule_free(tmprule); @@ -3258,6 +3424,7 @@ int selinux_audit_rule_known(struct audit_krule *rule) int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule, struct audit_context *actx) { + struct selinux_state *state = &selinux_state; struct context *ctxt; struct mls_level *level; struct selinux_audit_rule *rule = vrule; @@ -3268,14 +3435,14 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule, return -ENOENT; } - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); - if (rule->au_seqno < latest_granting) { + if (rule->au_seqno < state->ss->latest_granting) { match = -ESTALE; goto out; } - ctxt = sidtab_search(&sidtab, sid); + ctxt = sidtab_search(state->ss->sidtab, sid); if (unlikely(!ctxt)) { WARN_ONCE(1, "selinux_audit_rule_match: unrecognized SID %d\n", sid); @@ -3359,7 +3526,7 @@ int selinux_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule, } out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return match; } @@ -3433,19 +3600,22 @@ static void security_netlbl_cache_add(struct netlbl_lsm_secattr *secattr, * failure. * */ -int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, +int security_netlbl_secattr_to_sid(struct selinux_state *state, + struct netlbl_lsm_secattr *secattr, u32 *sid) { + struct policydb *policydb = &state->ss->policydb; + struct sidtab *sidtab = state->ss->sidtab; int rc; struct context *ctx; struct context ctx_new; - if (!ss_initialized) { + if (!state->initialized) { *sid = SECSID_NULL; return 0; } - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); if (secattr->flags & NETLBL_SECATTR_CACHE) *sid = *(u32 *)secattr->cache->data; @@ -3453,7 +3623,7 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, *sid = secattr->attr.secid; else if (secattr->flags & NETLBL_SECATTR_MLS_LVL) { rc = -EIDRM; - ctx = sidtab_search(&sidtab, SECINITSID_NETMSG); + ctx = sidtab_search(sidtab, SECINITSID_NETMSG); if (ctx == NULL) goto out; @@ -3461,17 +3631,17 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, ctx_new.user = ctx->user; ctx_new.role = ctx->role; ctx_new.type = ctx->type; - mls_import_netlbl_lvl(&ctx_new, secattr); + mls_import_netlbl_lvl(policydb, &ctx_new, secattr); if (secattr->flags & NETLBL_SECATTR_MLS_CAT) { - rc = mls_import_netlbl_cat(&ctx_new, secattr); + rc = mls_import_netlbl_cat(policydb, &ctx_new, secattr); if (rc) goto out; } rc = -EIDRM; - if (!mls_context_isvalid(&policydb, &ctx_new)) + if (!mls_context_isvalid(policydb, &ctx_new)) goto out_free; - rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid); + rc = context_struct_to_sid(state, &ctx_new, sid); if (rc) goto out_free; @@ -3481,12 +3651,12 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, } else *sid = SECSID_NULL; - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return 0; out_free: ebitmap_destroy(&ctx_new.range.level[0].cat); out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } @@ -3500,33 +3670,35 @@ out: * Returns zero on success, negative values on failure. * */ -int security_netlbl_sid_to_secattr(u32 sid, struct netlbl_lsm_secattr *secattr) +int security_netlbl_sid_to_secattr(struct selinux_state *state, + u32 sid, struct netlbl_lsm_secattr *secattr) { + struct policydb *policydb = &state->ss->policydb; int rc; struct context *ctx; - if (!ss_initialized) + if (!state->initialized) return 0; - read_lock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); rc = -ENOENT; - ctx = sidtab_search(&sidtab, sid); + ctx = sidtab_search(state->ss->sidtab, sid); if (ctx == NULL) goto out; rc = -ENOMEM; - secattr->domain = kstrdup(sym_name(&policydb, SYM_TYPES, ctx->type - 1), + secattr->domain = kstrdup(sym_name(policydb, SYM_TYPES, ctx->type - 1), GFP_ATOMIC); if (secattr->domain == NULL) goto out; secattr->attr.secid = sid; secattr->flags |= NETLBL_SECATTR_DOMAIN_CPY | NETLBL_SECATTR_SECID; - mls_export_netlbl_lvl(ctx, secattr); - rc = mls_export_netlbl_cat(ctx, secattr); + mls_export_netlbl_lvl(policydb, ctx, secattr); + rc = mls_export_netlbl_cat(policydb, ctx, secattr); out: - read_unlock(&policy_rwlock); + read_unlock(&state->ss->policy_rwlock); return rc; } #endif /* CONFIG_NETLABEL */ @@ -3537,15 +3709,17 @@ out: * @len: length of data in bytes * */ -int security_read_policy(void **data, size_t *len) +int security_read_policy(struct selinux_state *state, + void **data, size_t *len) { + struct policydb *policydb = &state->ss->policydb; int rc; struct policy_file fp; - if (!ss_initialized) + if (!state->initialized) return -EINVAL; - *len = security_policydb_len(); + *len = security_policydb_len(state); *data = vmalloc_user(*len); if (!*data) @@ -3554,9 +3728,9 @@ int security_read_policy(void **data, size_t *len) fp.data = *data; fp.len = *len; - read_lock(&policy_rwlock); - rc = policydb_write(&policydb, &fp); - read_unlock(&policy_rwlock); + read_lock(&state->ss->policy_rwlock); + rc = policydb_write(policydb, &fp); + read_unlock(&state->ss->policy_rwlock); if (rc) return rc; diff --git a/security/selinux/ss/services.h b/security/selinux/ss/services.h index 356bdd36cf6d..fc40640a9725 100644 --- a/security/selinux/ss/services.h +++ b/security/selinux/ss/services.h @@ -8,9 +8,30 @@ #define _SS_SERVICES_H_ #include "policydb.h" -#include "sidtab.h" +#include "context.h" -extern struct policydb policydb; +/* Mapping for a single class */ +struct selinux_mapping { + u16 value; /* policy value for class */ + unsigned int num_perms; /* number of permissions in class */ + u32 perms[sizeof(u32) * 8]; /* policy values for permissions */ +}; + +/* Map for all of the classes, with array size */ +struct selinux_map { + struct selinux_mapping *mapping; /* indexed by class */ + u16 size; /* array size of mapping */ +}; + +struct selinux_ss { + struct sidtab *sidtab; + struct policydb policydb; + rwlock_t policy_rwlock; + u32 latest_granting; + struct selinux_map map; + struct page *status_page; + struct mutex status_lock; +}; void services_compute_xperms_drivers(struct extended_perms *xperms, struct avtab_node *node); @@ -18,5 +39,6 @@ void services_compute_xperms_drivers(struct extended_perms *xperms, void services_compute_xperms_decision(struct extended_perms_decision *xpermd, struct avtab_node *node); -#endif /* _SS_SERVICES_H_ */ +int context_add_hash(struct policydb *policydb, struct context *context); +#endif /* _SS_SERVICES_H_ */ diff --git a/security/selinux/ss/sidtab.c b/security/selinux/ss/sidtab.c index 5be31b7af225..d9d8599e8e63 100644 --- a/security/selinux/ss/sidtab.c +++ b/security/selinux/ss/sidtab.c @@ -2,108 +2,226 @@ /* * Implementation of the SID table type. * - * Author : Stephen Smalley, + * Original author: Stephen Smalley, + * Author: Ondrej Mosnacek, + * + * Copyright (C) 2018 Red Hat, Inc. */ +#include #include #include +#include #include -#include +#include #include "flask.h" #include "security.h" #include "sidtab.h" -#define SIDTAB_HASH(sid) \ -(sid & SIDTAB_HASH_MASK) +#define index_to_sid(index) (index + SECINITSID_NUM + 1) +#define sid_to_index(sid) (sid - (SECINITSID_NUM + 1)) int sidtab_init(struct sidtab *s) { - int i; + u32 i; + + memset(s->roots, 0, sizeof(s->roots)); + + for (i = 0; i < SECINITSID_NUM; i++) + s->isids[i].set = 0; + + s->count = 0; + s->convert = NULL; + hash_init(s->context_to_sid); - s->htable = kmalloc_array(SIDTAB_SIZE, sizeof(*s->htable), GFP_ATOMIC); - if (!s->htable) - return -ENOMEM; - for (i = 0; i < SIDTAB_SIZE; i++) - s->htable[i] = NULL; - s->nel = 0; - s->next_sid = 1; - s->shutdown = 0; spin_lock_init(&s->lock); return 0; } -int sidtab_insert(struct sidtab *s, u32 sid, struct context *context) +static u32 context_to_sid(struct sidtab *s, struct context *context) { - int hvalue; - struct sidtab_node *prev, *cur, *newnode; + struct sidtab_entry_leaf *entry; + u32 sid = 0; - if (!s) - return -ENOMEM; + rcu_read_lock(); + hash_for_each_possible_rcu(s->context_to_sid, entry, list, + context->hash) { + if (context_cmp(&entry->context, context)) { + sid = entry->sid; + break; + } + } + rcu_read_unlock(); + return sid; +} - hvalue = SIDTAB_HASH(sid); - prev = NULL; - cur = s->htable[hvalue]; - while (cur && sid > cur->sid) { - prev = cur; - cur = cur->next; +int sidtab_set_initial(struct sidtab *s, u32 sid, struct context *context) +{ + struct sidtab_isid_entry *entry; + int rc; + + if (sid == 0 || sid > SECINITSID_NUM) + return -EINVAL; + + entry = &s->isids[sid - 1]; + + rc = context_cpy(&entry->leaf.context, context); + if (rc) + return rc; + + entry->set = 1; + + /* + * Multiple initial sids may map to the same context. Check that this + * context is not already represented in the context_to_sid hashtable + * to avoid duplicate entries and long linked lists upon hash + * collision. + */ + if (!context_to_sid(s, context)) { + entry->leaf.sid = sid; + hash_add(s->context_to_sid, &entry->leaf.list, context->hash); } - if (cur && sid == cur->sid) - return -EEXIST; - - newnode = kmalloc(sizeof(*newnode), GFP_ATOMIC); - if (!newnode) - return -ENOMEM; - - newnode->sid = sid; - if (context_cpy(&newnode->context, context)) { - kfree(newnode); - return -ENOMEM; - } - - if (prev) { - newnode->next = prev->next; - wmb(); - prev->next = newnode; - } else { - newnode->next = s->htable[hvalue]; - wmb(); - s->htable[hvalue] = newnode; - } - - s->nel++; - if (sid >= s->next_sid) - s->next_sid = sid + 1; return 0; } +int sidtab_hash_stats(struct sidtab *sidtab, char *page) +{ + int i; + int chain_len = 0; + int slots_used = 0; + int entries = 0; + int max_chain_len = 0; + int cur_bucket = 0; + struct sidtab_entry_leaf *entry; + + rcu_read_lock(); + hash_for_each_rcu(sidtab->context_to_sid, i, entry, list) { + entries++; + if (i == cur_bucket) { + chain_len++; + if (chain_len == 1) + slots_used++; + } else { + cur_bucket = i; + if (chain_len > max_chain_len) + max_chain_len = chain_len; + chain_len = 0; + } + } + rcu_read_unlock(); + + if (chain_len > max_chain_len) + max_chain_len = chain_len; + + return scnprintf(page, PAGE_SIZE, "entries: %d\nbuckets used: %d/%d\n" + "longest chain: %d\n", entries, + slots_used, SIDTAB_HASH_BUCKETS, max_chain_len); +} + +static u32 sidtab_level_from_count(u32 count) +{ + u32 capacity = SIDTAB_LEAF_ENTRIES; + u32 level = 0; + + while (count > capacity) { + capacity <<= SIDTAB_INNER_SHIFT; + ++level; + } + return level; +} + +static int sidtab_alloc_roots(struct sidtab *s, u32 level) +{ + u32 l; + + if (!s->roots[0].ptr_leaf) { + s->roots[0].ptr_leaf = kzalloc(SIDTAB_NODE_ALLOC_SIZE, + GFP_ATOMIC); + if (!s->roots[0].ptr_leaf) + return -ENOMEM; + } + for (l = 1; l <= level; ++l) + if (!s->roots[l].ptr_inner) { + s->roots[l].ptr_inner = kzalloc(SIDTAB_NODE_ALLOC_SIZE, + GFP_ATOMIC); + if (!s->roots[l].ptr_inner) + return -ENOMEM; + s->roots[l].ptr_inner->entries[0] = s->roots[l - 1]; + } + return 0; +} + +static struct sidtab_entry_leaf *sidtab_do_lookup(struct sidtab *s, u32 index, + int alloc) +{ + union sidtab_entry_inner *entry; + u32 level, capacity_shift, leaf_index = index / SIDTAB_LEAF_ENTRIES; + + /* find the level of the subtree we need */ + level = sidtab_level_from_count(index + 1); + capacity_shift = level * SIDTAB_INNER_SHIFT; + + /* allocate roots if needed */ + if (alloc && sidtab_alloc_roots(s, level) != 0) + return NULL; + + /* lookup inside the subtree */ + entry = &s->roots[level]; + while (level != 0) { + capacity_shift -= SIDTAB_INNER_SHIFT; + --level; + + entry = &entry->ptr_inner->entries[leaf_index >> capacity_shift]; + leaf_index &= ((u32)1 << capacity_shift) - 1; + + if (!entry->ptr_inner) { + if (alloc) + entry->ptr_inner = kzalloc(SIDTAB_NODE_ALLOC_SIZE, + GFP_ATOMIC); + if (!entry->ptr_inner) + return NULL; + } + } + if (!entry->ptr_leaf) { + if (alloc) + entry->ptr_leaf = kzalloc(SIDTAB_NODE_ALLOC_SIZE, + GFP_ATOMIC); + if (!entry->ptr_leaf) + return NULL; + } + return &entry->ptr_leaf->entries[index % SIDTAB_LEAF_ENTRIES]; +} + +static struct context *sidtab_lookup(struct sidtab *s, u32 index) +{ + /* read entries only after reading count */ + u32 count = smp_load_acquire(&s->count); + + if (index >= count) + return NULL; + + return &sidtab_do_lookup(s, index, 0)->context; +} + +static struct context *sidtab_lookup_initial(struct sidtab *s, u32 sid) +{ + return s->isids[sid - 1].set ? &s->isids[sid - 1].leaf.context : NULL; +} + static struct context *sidtab_search_core(struct sidtab *s, u32 sid, int force) { - int hvalue; - struct sidtab_node *cur; + struct context *context; - if (!s) - return NULL; - - hvalue = SIDTAB_HASH(sid); - cur = s->htable[hvalue]; - while (cur && sid > cur->sid) - cur = cur->next; - - if (force && cur && sid == cur->sid && cur->context.len) - return &cur->context; - - if (!cur || sid != cur->sid || cur->context.len) { - /* Remap invalid SIDs to the unlabeled SID. */ - sid = SECINITSID_UNLABELED; - hvalue = SIDTAB_HASH(sid); - cur = s->htable[hvalue]; - while (cur && sid > cur->sid) - cur = cur->next; - if (!cur || sid != cur->sid) - return NULL; + if (sid != 0) { + if (sid > SECINITSID_NUM) + context = sidtab_lookup(s, sid_to_index(sid)); + else + context = sidtab_lookup_initial(s, sid); + if (context && (!context->len || force)) + return context; } - return &cur->context; + return sidtab_lookup_initial(s, SECINITSID_UNLABELED); } struct context *sidtab_search(struct sidtab *s, u32 sid) @@ -116,192 +234,252 @@ struct context *sidtab_search_force(struct sidtab *s, u32 sid) return sidtab_search_core(s, sid, 1); } -int sidtab_map(struct sidtab *s, - int (*apply) (u32 sid, - struct context *context, - void *args), - void *args) +int sidtab_context_to_sid(struct sidtab *s, struct context *context, + u32 *sid) { - int i, rc = 0; - struct sidtab_node *cur; + unsigned long flags; + u32 count; + struct sidtab_convert_params *convert; + struct sidtab_entry_leaf *dst, *dst_convert; + int rc; - if (!s) - goto out; + *sid = context_to_sid(s, context); + if (*sid) + return 0; - for (i = 0; i < SIDTAB_SIZE; i++) { - cur = s->htable[i]; - while (cur) { - rc = apply(cur->sid, &cur->context, args); - if (rc) - goto out; - cur = cur->next; + /* lock-free search failed: lock, re-search, and insert if not found */ + spin_lock_irqsave(&s->lock, flags); + + rc = 0; + *sid = context_to_sid(s, context); + if (*sid) + goto out_unlock; + + /* read entries only after reading count */ + count = smp_load_acquire(&s->count); + convert = s->convert; + + /* bail out if we already reached max entries */ + rc = -EOVERFLOW; + if (count >= SIDTAB_MAX) + goto out_unlock; + + /* insert context into new entry */ + rc = -ENOMEM; + dst = sidtab_do_lookup(s, count, 1); + if (!dst) + goto out_unlock; + + dst->sid = index_to_sid(count); + + rc = context_cpy(&dst->context, context); + if (rc) + goto out_unlock; + + /* + * if we are building a new sidtab, we need to convert the context + * and insert it there as well + */ + if (convert) { + rc = -ENOMEM; + dst_convert = sidtab_do_lookup(convert->target, count, 1); + if (!dst_convert) { + context_destroy(&dst->context); + goto out_unlock; } + + rc = convert->func(context, &dst_convert->context, + convert->args); + if (rc) { + context_destroy(&dst->context); + goto out_unlock; + } + dst_convert->sid = index_to_sid(count); + convert->target->count = count + 1; + + hash_add_rcu(convert->target->context_to_sid, + &dst_convert->list, dst_convert->context.hash); } -out: + + if (context->len) + pr_info("SELinux: Context %s is not valid (left unmapped).\n", + context->str); + + *sid = index_to_sid(count); + + /* write entries before updating count */ + smp_store_release(&s->count, count + 1); + hash_add_rcu(s->context_to_sid, &dst->list, dst->context.hash); + + rc = 0; +out_unlock: + spin_unlock_irqrestore(&s->lock, flags); return rc; } -static void sidtab_update_cache(struct sidtab *s, struct sidtab_node *n, int loc) +static void sidtab_convert_hashtable(struct sidtab *s, u32 count) { - BUG_ON(loc >= SIDTAB_CACHE_LEN); + struct sidtab_entry_leaf *entry; + u32 i; + + for (i = 0; i < count; i++) { + entry = sidtab_do_lookup(s, i, 0); + entry->sid = index_to_sid(i); + + hash_add_rcu(s->context_to_sid, &entry->list, + entry->context.hash); - while (loc > 0) { - s->cache[loc] = s->cache[loc - 1]; - loc--; } - s->cache[0] = n; } -static inline u32 sidtab_search_context(struct sidtab *s, - struct context *context) +static int sidtab_convert_tree(union sidtab_entry_inner *edst, + union sidtab_entry_inner *esrc, + u32 *pos, u32 count, u32 level, + struct sidtab_convert_params *convert) { - int i; - struct sidtab_node *cur; + int rc; + u32 i; - for (i = 0; i < SIDTAB_SIZE; i++) { - cur = s->htable[i]; - while (cur) { - if (context_cmp(&cur->context, context)) { - sidtab_update_cache(s, cur, SIDTAB_CACHE_LEN - 1); - return cur->sid; - } - cur = cur->next; + if (level != 0) { + if (!edst->ptr_inner) { + edst->ptr_inner = kzalloc(SIDTAB_NODE_ALLOC_SIZE, + GFP_KERNEL); + if (!edst->ptr_inner) + return -ENOMEM; } + i = 0; + while (i < SIDTAB_INNER_ENTRIES && *pos < count) { + rc = sidtab_convert_tree(&edst->ptr_inner->entries[i], + &esrc->ptr_inner->entries[i], + pos, count, level - 1, + convert); + if (rc) + return rc; + i++; + } + } else { + if (!edst->ptr_leaf) { + edst->ptr_leaf = kzalloc(SIDTAB_NODE_ALLOC_SIZE, + GFP_KERNEL); + if (!edst->ptr_leaf) + return -ENOMEM; + } + i = 0; + while (i < SIDTAB_LEAF_ENTRIES && *pos < count) { + rc = convert->func(&esrc->ptr_leaf->entries[i].context, + &edst->ptr_leaf->entries[i].context, + convert->args); + if (rc) + return rc; + (*pos)++; + i++; + } + cond_resched(); } + return 0; } -static inline u32 sidtab_search_cache(struct sidtab *s, struct context *context) +int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params) { - int i; - struct sidtab_node *node; - - for (i = 0; i < SIDTAB_CACHE_LEN; i++) { - node = s->cache[i]; - if (unlikely(!node)) - return 0; - if (context_cmp(&node->context, context)) { - sidtab_update_cache(s, node, i); - return node->sid; - } - } - return 0; -} - -int sidtab_context_to_sid(struct sidtab *s, - struct context *context, - u32 *out_sid) -{ - u32 sid; - int ret = 0; unsigned long flags; + u32 count, level, pos; + int rc; - *out_sid = SECSID_NULL; + spin_lock_irqsave(&s->lock, flags); - sid = sidtab_search_cache(s, context); - if (!sid) - sid = sidtab_search_context(s, context); - if (!sid) { - spin_lock_irqsave(&s->lock, flags); - /* Rescan now that we hold the lock. */ - sid = sidtab_search_context(s, context); - if (sid) - goto unlock_out; - /* No SID exists for the context. Allocate a new one. */ - if (s->next_sid == UINT_MAX || s->shutdown) { - ret = -ENOMEM; - goto unlock_out; - } - sid = s->next_sid++; - if (context->len) - printk(KERN_INFO - "SELinux: Context %s is not valid (left unmapped).\n", - context->str); - ret = sidtab_insert(s, sid, context); - if (ret) - s->next_sid--; -unlock_out: + /* concurrent policy loads are not allowed */ + if (s->convert) { spin_unlock_irqrestore(&s->lock, flags); + return -EBUSY; } - if (ret) - return ret; + count = s->count; + level = sidtab_level_from_count(count); + + /* allocate last leaf in the new sidtab (to avoid race with + * live convert) + */ + rc = sidtab_do_lookup(params->target, count - 1, 1) ? 0 : -ENOMEM; + if (rc) { + spin_unlock_irqrestore(&s->lock, flags); + return rc; + } + + /* set count in case no new entries are added during conversion */ + params->target->count = count; + + /* enable live convert of new entries */ + s->convert = params; + + /* we can safely convert the tree outside the lock */ + spin_unlock_irqrestore(&s->lock, flags); + + pr_info("SELinux: Converting %u SID table entries...\n", count); + + /* convert all entries not covered by live convert */ + pos = 0; + rc = sidtab_convert_tree(¶ms->target->roots[level], + &s->roots[level], &pos, count, level, params); + if (rc) { + /* we need to keep the old table - disable live convert */ + spin_lock_irqsave(&s->lock, flags); + s->convert = NULL; + spin_unlock_irqrestore(&s->lock, flags); + return rc; + } + /* + * The hashtable can also be modified in sidtab_context_to_sid() + * so we must re-acquire the lock here. + */ + spin_lock_irqsave(&s->lock, flags); + sidtab_convert_hashtable(params->target, count); + spin_unlock_irqrestore(&s->lock, flags); - *out_sid = sid; return 0; } -void sidtab_hash_eval(struct sidtab *h, char *tag) +static void sidtab_destroy_tree(union sidtab_entry_inner entry, u32 level) { - int i, chain_len, slots_used, max_chain_len; - struct sidtab_node *cur; + u32 i; - slots_used = 0; - max_chain_len = 0; - for (i = 0; i < SIDTAB_SIZE; i++) { - cur = h->htable[i]; - if (cur) { - slots_used++; - chain_len = 0; - while (cur) { - chain_len++; - cur = cur->next; - } + if (level != 0) { + struct sidtab_node_inner *node = entry.ptr_inner; - if (chain_len > max_chain_len) - max_chain_len = chain_len; - } + if (!node) + return; + + for (i = 0; i < SIDTAB_INNER_ENTRIES; i++) + sidtab_destroy_tree(node->entries[i], level - 1); + kfree(node); + } else { + struct sidtab_node_leaf *node = entry.ptr_leaf; + + if (!node) + return; + + for (i = 0; i < SIDTAB_LEAF_ENTRIES; i++) + context_destroy(&node->entries[i].context); + kfree(node); } - - printk(KERN_DEBUG "%s: %d entries and %d/%d buckets used, longest " - "chain length %d\n", tag, h->nel, slots_used, SIDTAB_SIZE, - max_chain_len); } void sidtab_destroy(struct sidtab *s) { - int i; - struct sidtab_node *cur, *temp; + u32 i, level; - if (!s) - return; + for (i = 0; i < SECINITSID_NUM; i++) + if (s->isids[i].set) + context_destroy(&s->isids[i].leaf.context); - for (i = 0; i < SIDTAB_SIZE; i++) { - cur = s->htable[i]; - while (cur) { - temp = cur; - cur = cur->next; - context_destroy(&temp->context); - kfree(temp); - } - s->htable[i] = NULL; - } - kfree(s->htable); - s->htable = NULL; - s->nel = 0; - s->next_sid = 1; -} - -void sidtab_set(struct sidtab *dst, struct sidtab *src) -{ - unsigned long flags; - int i; - - spin_lock_irqsave(&src->lock, flags); - dst->htable = src->htable; - dst->nel = src->nel; - dst->next_sid = src->next_sid; - dst->shutdown = 0; - for (i = 0; i < SIDTAB_CACHE_LEN; i++) - dst->cache[i] = NULL; - spin_unlock_irqrestore(&src->lock, flags); -} - -void sidtab_shutdown(struct sidtab *s) -{ - unsigned long flags; - - spin_lock_irqsave(&s->lock, flags); - s->shutdown = 1; - spin_unlock_irqrestore(&s->lock, flags); + level = SIDTAB_MAX_LEVEL; + while (level && !s->roots[level].ptr_inner) + --level; + + sidtab_destroy_tree(s->roots[level], level); + /* + * The context_to_sid hashtable's objects are all shared + * with the isids array and context tree, and so don't need + * to be cleaned up here. + */ } diff --git a/security/selinux/ss/sidtab.h b/security/selinux/ss/sidtab.h index a1a1d2617b6f..8564ec0f3599 100644 --- a/security/selinux/ss/sidtab.h +++ b/security/selinux/ss/sidtab.h @@ -1,56 +1,112 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * A security identifier table (sidtab) is a hash table + * A security identifier table (sidtab) is a lookup table * of security context structures indexed by SID value. * - * Author : Stephen Smalley, + * Original author: Stephen Smalley, + * Author: Ondrej Mosnacek, + * + * Copyright (C) 2018 Red Hat, Inc. */ #ifndef _SS_SIDTAB_H_ #define _SS_SIDTAB_H_ -#include "context.h" +#include +#include +#include -struct sidtab_node { - u32 sid; /* security identifier */ - struct context context; /* security context structure */ - struct sidtab_node *next; +#include "context.h" +#include "flask.h" + +struct sidtab_entry_leaf { + u32 sid; + struct context context; + struct hlist_node list; }; -#define SIDTAB_HASH_BITS 7 -#define SIDTAB_HASH_BUCKETS (1 << SIDTAB_HASH_BITS) -#define SIDTAB_HASH_MASK (SIDTAB_HASH_BUCKETS-1) +struct sidtab_node_inner; +struct sidtab_node_leaf; -#define SIDTAB_SIZE SIDTAB_HASH_BUCKETS +union sidtab_entry_inner { + struct sidtab_node_inner *ptr_inner; + struct sidtab_node_leaf *ptr_leaf; +}; + +/* align node size to page boundary */ +#define SIDTAB_NODE_ALLOC_SHIFT PAGE_SHIFT +#define SIDTAB_NODE_ALLOC_SIZE PAGE_SIZE + +#define size_to_shift(size) ((size) == 1 ? 1 : (const_ilog2((size) - 1) + 1)) + +#define SIDTAB_INNER_SHIFT \ + (SIDTAB_NODE_ALLOC_SHIFT - size_to_shift(sizeof(union sidtab_entry_inner))) +#define SIDTAB_INNER_ENTRIES ((size_t)1 << SIDTAB_INNER_SHIFT) +#define SIDTAB_LEAF_ENTRIES \ + (SIDTAB_NODE_ALLOC_SIZE / sizeof(struct sidtab_entry_leaf)) + +#define SIDTAB_MAX_BITS 32 +#define SIDTAB_MAX U32_MAX +/* ensure enough tree levels for SIDTAB_MAX entries */ +#define SIDTAB_MAX_LEVEL \ + DIV_ROUND_UP(SIDTAB_MAX_BITS - size_to_shift(SIDTAB_LEAF_ENTRIES), \ + SIDTAB_INNER_SHIFT) + +struct sidtab_node_leaf { + struct sidtab_entry_leaf entries[SIDTAB_LEAF_ENTRIES]; +}; + +struct sidtab_node_inner { + union sidtab_entry_inner entries[SIDTAB_INNER_ENTRIES]; +}; + +struct sidtab_isid_entry { + int set; + struct sidtab_entry_leaf leaf; +}; + +struct sidtab_convert_params { + int (*func)(struct context *oldc, struct context *newc, void *args); + void *args; + struct sidtab *target; +}; + +#define SIDTAB_HASH_BITS CONFIG_SECURITY_SELINUX_SIDTAB_HASH_BITS +#define SIDTAB_HASH_BUCKETS (1 << SIDTAB_HASH_BITS) struct sidtab { - struct sidtab_node **htable; - unsigned int nel; /* number of elements */ - unsigned int next_sid; /* next SID to allocate */ - unsigned char shutdown; -#define SIDTAB_CACHE_LEN 3 - struct sidtab_node *cache[SIDTAB_CACHE_LEN]; + /* + * lock-free read access only for as many items as a prior read of + * 'count' + */ + union sidtab_entry_inner roots[SIDTAB_MAX_LEVEL + 1]; + /* + * access atomically via {READ|WRITE}_ONCE(); only increment under + * spinlock + */ + u32 count; + /* access only under spinlock */ + struct sidtab_convert_params *convert; spinlock_t lock; + + /* index == SID - 1 (no entry for SECSID_NULL) */ + struct sidtab_isid_entry isids[SECINITSID_NUM]; + + /* Hash table for fast reverse context-to-sid lookups. */ + DECLARE_HASHTABLE(context_to_sid, SIDTAB_HASH_BITS); }; int sidtab_init(struct sidtab *s); -int sidtab_insert(struct sidtab *s, u32 sid, struct context *context); +int sidtab_set_initial(struct sidtab *s, u32 sid, struct context *context); struct context *sidtab_search(struct sidtab *s, u32 sid); struct context *sidtab_search_force(struct sidtab *s, u32 sid); -int sidtab_map(struct sidtab *s, - int (*apply) (u32 sid, - struct context *context, - void *args), - void *args); +int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params); -int sidtab_context_to_sid(struct sidtab *s, - struct context *context, - u32 *sid); +int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid); -void sidtab_hash_eval(struct sidtab *h, char *tag); void sidtab_destroy(struct sidtab *s); -void sidtab_set(struct sidtab *dst, struct sidtab *src); -void sidtab_shutdown(struct sidtab *s); + +int sidtab_hash_stats(struct sidtab *sidtab, char *page); #endif /* _SS_SIDTAB_H_ */ diff --git a/security/selinux/ss/status.c b/security/selinux/ss/status.c index d982365f9d1a..a121de45ac0e 100644 --- a/security/selinux/ss/status.c +++ b/security/selinux/ss/status.c @@ -35,8 +35,6 @@ * In most cases, application shall confirm the kernel status is not * changed without any system call invocations. */ -static struct page *selinux_status_page; -static DEFINE_MUTEX(selinux_status_lock); /* * selinux_kernel_status_page @@ -44,21 +42,21 @@ static DEFINE_MUTEX(selinux_status_lock); * It returns a reference to selinux_status_page. If the status page is * not allocated yet, it also tries to allocate it at the first time. */ -struct page *selinux_kernel_status_page(void) +struct page *selinux_kernel_status_page(struct selinux_state *state) { struct selinux_kernel_status *status; struct page *result = NULL; - mutex_lock(&selinux_status_lock); - if (!selinux_status_page) { - selinux_status_page = alloc_page(GFP_KERNEL|__GFP_ZERO); + mutex_lock(&state->ss->status_lock); + if (!state->ss->status_page) { + state->ss->status_page = alloc_page(GFP_KERNEL|__GFP_ZERO); - if (selinux_status_page) { - status = page_address(selinux_status_page); + if (state->ss->status_page) { + status = page_address(state->ss->status_page); status->version = SELINUX_KERNEL_STATUS_VERSION; status->sequence = 0; - status->enforcing = selinux_enforcing; + status->enforcing = enforcing_enabled(state); /* * NOTE: the next policyload event shall set * a positive value on the status->policyload, @@ -66,11 +64,12 @@ struct page *selinux_kernel_status_page(void) * So, application can know it was updated. */ status->policyload = 0; - status->deny_unknown = !security_get_allow_unknown(); + status->deny_unknown = + !security_get_allow_unknown(state); } } - result = selinux_status_page; - mutex_unlock(&selinux_status_lock); + result = state->ss->status_page; + mutex_unlock(&state->ss->status_lock); return result; } @@ -80,13 +79,14 @@ struct page *selinux_kernel_status_page(void) * * It updates status of the current enforcing/permissive mode. */ -void selinux_status_update_setenforce(int enforcing) +void selinux_status_update_setenforce(struct selinux_state *state, + int enforcing) { struct selinux_kernel_status *status; - mutex_lock(&selinux_status_lock); - if (selinux_status_page) { - status = page_address(selinux_status_page); + mutex_lock(&state->ss->status_lock); + if (state->ss->status_page) { + status = page_address(state->ss->status_page); status->sequence++; smp_wmb(); @@ -96,7 +96,7 @@ void selinux_status_update_setenforce(int enforcing) smp_wmb(); status->sequence++; } - mutex_unlock(&selinux_status_lock); + mutex_unlock(&state->ss->status_lock); } /* @@ -105,22 +105,23 @@ void selinux_status_update_setenforce(int enforcing) * It updates status of the times of policy reloaded, and current * setting of deny_unknown. */ -void selinux_status_update_policyload(int seqno) +void selinux_status_update_policyload(struct selinux_state *state, + int seqno) { struct selinux_kernel_status *status; - mutex_lock(&selinux_status_lock); - if (selinux_status_page) { - status = page_address(selinux_status_page); + mutex_lock(&state->ss->status_lock); + if (state->ss->status_page) { + status = page_address(state->ss->status_page); status->sequence++; smp_wmb(); status->policyload = seqno; - status->deny_unknown = !security_get_allow_unknown(); + status->deny_unknown = !security_get_allow_unknown(state); smp_wmb(); status->sequence++; } - mutex_unlock(&selinux_status_lock); + mutex_unlock(&state->ss->status_lock); } diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 56e354fcdfc6..fa1430cfe691 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -101,11 +101,13 @@ static int selinux_xfrm_alloc_user(struct xfrm_sec_ctx **ctxp, ctx->ctx_len = str_len; memcpy(ctx->ctx_str, &uctx[1], str_len); ctx->ctx_str[str_len] = '\0'; - rc = security_context_to_sid(ctx->ctx_str, str_len, &ctx->ctx_sid, gfp); + rc = security_context_to_sid(&selinux_state, ctx->ctx_str, str_len, + &ctx->ctx_sid, gfp); if (rc) goto err; - rc = avc_has_perm(tsec->sid, ctx->ctx_sid, + rc = avc_has_perm(&selinux_state, + tsec->sid, ctx->ctx_sid, SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, NULL); if (rc) goto err; @@ -141,7 +143,8 @@ static int selinux_xfrm_delete(struct xfrm_sec_ctx *ctx) if (!ctx) return 0; - return avc_has_perm(tsec->sid, ctx->ctx_sid, + return avc_has_perm(&selinux_state, + tsec->sid, ctx->ctx_sid, SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, NULL); } @@ -163,7 +166,8 @@ int selinux_xfrm_policy_lookup(struct xfrm_sec_ctx *ctx, u32 fl_secid, u8 dir) if (!selinux_authorizable_ctx(ctx)) return -EINVAL; - rc = avc_has_perm(fl_secid, ctx->ctx_sid, + rc = avc_has_perm(&selinux_state, + fl_secid, ctx->ctx_sid, SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, NULL); return (rc == -EACCES ? -ESRCH : rc); } @@ -202,7 +206,8 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, /* We don't need a separate SA Vs. policy polmatch check since the SA * is now of the same label as the flow and a flow Vs. policy polmatch * check had already happened in selinux_xfrm_policy_lookup() above. */ - return (avc_has_perm(fl->flowi_secid, state_sid, + return (avc_has_perm(&selinux_state, + fl->flowi_secid, state_sid, SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, NULL) ? 0 : 1); } @@ -352,7 +357,8 @@ int selinux_xfrm_state_alloc_acquire(struct xfrm_state *x, if (secid == 0) return -EINVAL; - rc = security_sid_to_context(secid, &ctx_str, &str_len); + rc = security_sid_to_context(&selinux_state, secid, &ctx_str, + &str_len); if (rc) return rc; @@ -420,7 +426,8 @@ int selinux_xfrm_sock_rcv_skb(u32 sk_sid, struct sk_buff *skb, /* This check even when there's no association involved is intended, * according to Trent Jaeger, to make sure a process can't engage in * non-IPsec communication unless explicitly allowed by policy. */ - return avc_has_perm(sk_sid, peer_sid, + return avc_has_perm(&selinux_state, + sk_sid, peer_sid, SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, ad); } @@ -463,6 +470,6 @@ int selinux_xfrm_postroute_last(u32 sk_sid, struct sk_buff *skb, /* This check even when there's no association involved is intended, * according to Trent Jaeger, to make sure a process can't engage in * non-IPsec communication unless explicitly allowed by policy. */ - return avc_has_perm(sk_sid, SECINITSID_UNLABELED, + return avc_has_perm(&selinux_state, sk_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, ad); } diff --git a/sound/aoa/codecs/onyx.c b/sound/aoa/codecs/onyx.c index d2d96ca082b7..6224fd3bbf7c 100644 --- a/sound/aoa/codecs/onyx.c +++ b/sound/aoa/codecs/onyx.c @@ -74,8 +74,10 @@ static int onyx_read_register(struct onyx *onyx, u8 reg, u8 *value) return 0; } v = i2c_smbus_read_byte_data(onyx->i2c, reg); - if (v < 0) + if (v < 0) { + *value = 0; return -1; + } *value = (u8)v; onyx->cache[ONYX_REG_CONTROL-FIRSTREGISTER] = *value; return 0; diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index 2e2d18468491..7ae8e24dc1e6 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -529,7 +529,7 @@ static int snd_compress_check_input(struct snd_compr_params *params) { /* first let's check the buffer parameter's */ if (params->buffer.fragment_size == 0 || - params->buffer.fragments > INT_MAX / params->buffer.fragment_size || + params->buffer.fragments > U32_MAX / params->buffer.fragment_size || params->buffer.fragments == 0) return -EINVAL; diff --git a/sound/core/control.c b/sound/core/control.c index 36571cd49be3..a0ce22164957 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1467,8 +1467,9 @@ static int call_tlv_handler(struct snd_ctl_file *file, int op_flag, if (kctl->tlv.c == NULL) return -ENXIO; - /* When locked, this is unavailable. */ - if (vd->owner != NULL && vd->owner != file) + /* Write and command operations are not allowed for locked element. */ + if (op_flag != SNDRV_CTL_TLV_OP_READ && + vd->owner != NULL && vd->owner != file) return -EPERM; return kctl->tlv.c(kctl, op_flag, size, buf); diff --git a/sound/core/oss/linear.c b/sound/core/oss/linear.c index 2045697f449d..797d838a2f9e 100644 --- a/sound/core/oss/linear.c +++ b/sound/core/oss/linear.c @@ -107,6 +107,8 @@ static snd_pcm_sframes_t linear_transfer(struct snd_pcm_plugin *plugin, } } #endif + if (frames > dst_channels[0].frames) + frames = dst_channels[0].frames; convert(plugin, src_channels, dst_channels, frames); return frames; } diff --git a/sound/core/oss/mulaw.c b/sound/core/oss/mulaw.c index 7915564bd394..3788906421a7 100644 --- a/sound/core/oss/mulaw.c +++ b/sound/core/oss/mulaw.c @@ -269,6 +269,8 @@ static snd_pcm_sframes_t mulaw_transfer(struct snd_pcm_plugin *plugin, } } #endif + if (frames > dst_channels[0].frames) + frames = dst_channels[0].frames; data = (struct mulaw_priv *)plugin->extra_data; data->func(plugin, src_channels, dst_channels, frames); return frames; diff --git a/sound/core/oss/pcm_plugin.c b/sound/core/oss/pcm_plugin.c index 617845d4a811..b8ab46b8298d 100644 --- a/sound/core/oss/pcm_plugin.c +++ b/sound/core/oss/pcm_plugin.c @@ -111,7 +111,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames) while (plugin->next) { if (plugin->dst_frames) frames = plugin->dst_frames(plugin, frames); - if (snd_BUG_ON(frames <= 0)) + if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0)) return -ENXIO; plugin = plugin->next; err = snd_pcm_plugin_alloc(plugin, frames); @@ -123,7 +123,7 @@ int snd_pcm_plug_alloc(struct snd_pcm_substream *plug, snd_pcm_uframes_t frames) while (plugin->prev) { if (plugin->src_frames) frames = plugin->src_frames(plugin, frames); - if (snd_BUG_ON(frames <= 0)) + if (snd_BUG_ON((snd_pcm_sframes_t)frames <= 0)) return -ENXIO; plugin = plugin->prev; err = snd_pcm_plugin_alloc(plugin, frames); diff --git a/sound/core/oss/route.c b/sound/core/oss/route.c index c8171f5783c8..72dea04197ef 100644 --- a/sound/core/oss/route.c +++ b/sound/core/oss/route.c @@ -57,6 +57,8 @@ static snd_pcm_sframes_t route_transfer(struct snd_pcm_plugin *plugin, return -ENXIO; if (frames == 0) return 0; + if (frames > dst_channels[0].frames) + frames = dst_channels[0].frames; nsrcs = plugin->src_format.channels; ndsts = plugin->dst_format.channels; diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index 729a85a6211d..80453266a2de 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -1803,11 +1803,14 @@ void snd_pcm_period_elapsed(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime; unsigned long flags; - if (PCM_RUNTIME_CHECK(substream)) + if (snd_BUG_ON(!substream)) return; - runtime = substream->runtime; snd_pcm_stream_lock_irqsave(substream, flags); + if (PCM_RUNTIME_CHECK(substream)) + goto _unlock; + runtime = substream->runtime; + if (!snd_pcm_running(substream) || snd_pcm_update_hw_ptr0(substream, 1) < 0) goto _end; @@ -1818,6 +1821,7 @@ void snd_pcm_period_elapsed(struct snd_pcm_substream *substream) #endif _end: kill_fasync(&runtime->fasync, SIGIO, POLL_IN); + _unlock: snd_pcm_stream_unlock_irqrestore(substream, flags); } EXPORT_SYMBOL(snd_pcm_period_elapsed); diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 182e4afd21eb..14b1ee29509d 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -720,6 +720,10 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, while (runtime->boundary * 2 <= LONG_MAX - runtime->buffer_size) runtime->boundary *= 2; + /* clear the buffer for avoiding possible kernel info leaks */ + if (runtime->dma_area && !substream->ops->copy_user) + memset(runtime->dma_area, 0, runtime->dma_bytes); + snd_pcm_timer_resolution_change(substream); snd_pcm_set_state(substream, SNDRV_PCM_STATE_SETUP); diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c index 92b0d4523a07..6fe93d5f6f71 100644 --- a/sound/core/seq/seq_clientmgr.c +++ b/sound/core/seq/seq_clientmgr.c @@ -564,7 +564,7 @@ static int update_timestamp_of_queue(struct snd_seq_event *event, event->queue = queue; event->flags &= ~SNDRV_SEQ_TIME_STAMP_MASK; if (real_time) { - event->time.time = snd_seq_timer_get_cur_time(q->timer); + event->time.time = snd_seq_timer_get_cur_time(q->timer, true); event->flags |= SNDRV_SEQ_TIME_STAMP_REAL; } else { event->time.tick = snd_seq_timer_get_cur_tick(q->timer); @@ -1639,7 +1639,7 @@ static int snd_seq_ioctl_get_queue_status(struct snd_seq_client *client, tmr = queue->timer; status->events = queue->tickq->cells + queue->timeq->cells; - status->time = snd_seq_timer_get_cur_time(tmr); + status->time = snd_seq_timer_get_cur_time(tmr, true); status->tick = snd_seq_timer_get_cur_tick(tmr); status->running = tmr->running; diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index 1a6dc4ff44a6..ea1aa0796276 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -261,6 +261,8 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) { unsigned long flags; struct snd_seq_event_cell *cell; + snd_seq_tick_time_t cur_tick; + snd_seq_real_time_t cur_time; if (q == NULL) return; @@ -277,17 +279,18 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) __again: /* Process tick queue... */ + cur_tick = snd_seq_timer_get_cur_tick(q->timer); for (;;) { - cell = snd_seq_prioq_cell_out(q->tickq, - &q->timer->tick.cur_tick); + cell = snd_seq_prioq_cell_out(q->tickq, &cur_tick); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); } /* Process time queue... */ + cur_time = snd_seq_timer_get_cur_time(q->timer, false); for (;;) { - cell = snd_seq_prioq_cell_out(q->timeq, &q->timer->cur_time); + cell = snd_seq_prioq_cell_out(q->timeq, &cur_time); if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); @@ -415,6 +418,7 @@ int snd_seq_queue_check_access(int queueid, int client) int snd_seq_queue_set_owner(int queueid, int client, int locked) { struct snd_seq_queue *q = queueptr(queueid); + unsigned long flags; if (q == NULL) return -EINVAL; @@ -424,8 +428,10 @@ int snd_seq_queue_set_owner(int queueid, int client, int locked) return -EPERM; } + spin_lock_irqsave(&q->owner_lock, flags); q->locked = locked ? 1 : 0; q->owner = client; + spin_unlock_irqrestore(&q->owner_lock, flags); queue_access_unlock(q); queuefree(q); @@ -564,15 +570,17 @@ void snd_seq_queue_client_termination(int client) unsigned long flags; int i; struct snd_seq_queue *q; + bool matched; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if ((q = queueptr(i)) == NULL) continue; spin_lock_irqsave(&q->owner_lock, flags); - if (q->owner == client) + matched = (q->owner == client); + if (matched) q->klocked = 1; spin_unlock_irqrestore(&q->owner_lock, flags); - if (q->owner == client) { + if (matched) { if (q->timer->running) snd_seq_timer_stop(q->timer); snd_seq_timer_reset(q->timer); @@ -764,6 +772,8 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry, int i, bpm; struct snd_seq_queue *q; struct snd_seq_timer *tmr; + bool locked; + int owner; for (i = 0; i < SNDRV_SEQ_MAX_QUEUES; i++) { if ((q = queueptr(i)) == NULL) @@ -775,9 +785,14 @@ void snd_seq_info_queues_read(struct snd_info_entry *entry, else bpm = 0; + spin_lock_irq(&q->owner_lock); + locked = q->locked; + owner = q->owner; + spin_unlock_irq(&q->owner_lock); + snd_iprintf(buffer, "queue %d: [%s]\n", q->queue, q->name); - snd_iprintf(buffer, "owned by client : %d\n", q->owner); - snd_iprintf(buffer, "lock status : %s\n", q->locked ? "Locked" : "Free"); + snd_iprintf(buffer, "owned by client : %d\n", owner); + snd_iprintf(buffer, "lock status : %s\n", locked ? "Locked" : "Free"); snd_iprintf(buffer, "queued time events : %d\n", snd_seq_prioq_avail(q->timeq)); snd_iprintf(buffer, "queued tick events : %d\n", snd_seq_prioq_avail(q->tickq)); snd_iprintf(buffer, "timer state : %s\n", tmr->running ? "Running" : "Stopped"); diff --git a/sound/core/seq/seq_system.c b/sound/core/seq/seq_system.c index 8ce1d0b40dce..ce1f1e4727ab 100644 --- a/sound/core/seq/seq_system.c +++ b/sound/core/seq/seq_system.c @@ -123,6 +123,7 @@ int __init snd_seq_system_client_init(void) { struct snd_seq_port_callback pcallbacks; struct snd_seq_port_info *port; + int err; port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) @@ -144,7 +145,10 @@ int __init snd_seq_system_client_init(void) port->flags = SNDRV_SEQ_PORT_FLG_GIVEN_PORT; port->addr.client = sysclient; port->addr.port = SNDRV_SEQ_PORT_SYSTEM_TIMER; - snd_seq_kernel_client_ctl(sysclient, SNDRV_SEQ_IOCTL_CREATE_PORT, port); + err = snd_seq_kernel_client_ctl(sysclient, SNDRV_SEQ_IOCTL_CREATE_PORT, + port); + if (err < 0) + goto error_port; /* register announcement port */ strcpy(port->name, "Announce"); @@ -154,16 +158,24 @@ int __init snd_seq_system_client_init(void) port->flags = SNDRV_SEQ_PORT_FLG_GIVEN_PORT; port->addr.client = sysclient; port->addr.port = SNDRV_SEQ_PORT_SYSTEM_ANNOUNCE; - snd_seq_kernel_client_ctl(sysclient, SNDRV_SEQ_IOCTL_CREATE_PORT, port); + err = snd_seq_kernel_client_ctl(sysclient, SNDRV_SEQ_IOCTL_CREATE_PORT, + port); + if (err < 0) + goto error_port; announce_port = port->addr.port; kfree(port); return 0; + + error_port: + snd_seq_system_client_done(); + kfree(port); + return err; } /* unregister our internal client */ -void __exit snd_seq_system_client_done(void) +void snd_seq_system_client_done(void) { int oldsysclient = sysclient; diff --git a/sound/core/seq/seq_timer.c b/sound/core/seq/seq_timer.c index b80985fbc334..bd5e5a5d52a8 100644 --- a/sound/core/seq/seq_timer.c +++ b/sound/core/seq/seq_timer.c @@ -436,14 +436,15 @@ int snd_seq_timer_continue(struct snd_seq_timer *tmr) } /* return current 'real' time. use timeofday() to get better granularity. */ -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr, + bool adjust_ktime) { snd_seq_real_time_t cur_time; unsigned long flags; spin_lock_irqsave(&tmr->lock, flags); cur_time = tmr->cur_time; - if (tmr->running) { + if (adjust_ktime && tmr->running) { struct timespec64 tm; ktime_get_ts64(&tm); @@ -460,7 +461,13 @@ snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr) high PPQ values) */ snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr) { - return tmr->tick.cur_tick; + snd_seq_tick_time_t cur_tick; + unsigned long flags; + + spin_lock_irqsave(&tmr->lock, flags); + cur_tick = tmr->tick.cur_tick; + spin_unlock_irqrestore(&tmr->lock, flags); + return cur_tick; } @@ -479,15 +486,19 @@ void snd_seq_info_timer_read(struct snd_info_entry *entry, q = queueptr(idx); if (q == NULL) continue; - if ((tmr = q->timer) == NULL || - (ti = tmr->timeri) == NULL) { - queuefree(q); - continue; - } + mutex_lock(&q->timer_mutex); + tmr = q->timer; + if (!tmr) + goto unlock; + ti = tmr->timeri; + if (!ti) + goto unlock; snd_iprintf(buffer, "Timer for queue %i : %s\n", q->queue, ti->timer->name); resolution = snd_timer_resolution(ti) * tmr->ticks; snd_iprintf(buffer, " Period time : %lu.%09lu\n", resolution / 1000000000, resolution % 1000000000); snd_iprintf(buffer, " Skew : %u / %u\n", tmr->skew, tmr->skew_base); +unlock: + mutex_unlock(&q->timer_mutex); queuefree(q); } } diff --git a/sound/core/seq/seq_timer.h b/sound/core/seq/seq_timer.h index 9506b661fe5b..5d47d559465e 100644 --- a/sound/core/seq/seq_timer.h +++ b/sound/core/seq/seq_timer.h @@ -135,7 +135,8 @@ int snd_seq_timer_set_ppq(struct snd_seq_timer *tmr, int ppq); int snd_seq_timer_set_position_tick(struct snd_seq_timer *tmr, snd_seq_tick_time_t position); int snd_seq_timer_set_position_time(struct snd_seq_timer *tmr, snd_seq_real_time_t position); int snd_seq_timer_set_skew(struct snd_seq_timer *tmr, unsigned int skew, unsigned int base); -snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr); +snd_seq_real_time_t snd_seq_timer_get_cur_time(struct snd_seq_timer *tmr, + bool adjust_ktime); snd_seq_tick_time_t snd_seq_timer_get_cur_tick(struct snd_seq_timer *tmr); extern int seq_default_timer_class; diff --git a/sound/core/timer.c b/sound/core/timer.c index a4b1764b9ac5..22589a073423 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -88,6 +88,9 @@ static LIST_HEAD(snd_timer_slave_list); /* lock for slave active lists */ static DEFINE_SPINLOCK(slave_active_lock); +#define MAX_SLAVE_INSTANCES 1000 +static int num_slaves; + static DEFINE_MUTEX(register_mutex); static int snd_timer_free(struct snd_timer *timer); @@ -240,7 +243,8 @@ static int snd_timer_check_master(struct snd_timer_instance *master) return 0; } -static int snd_timer_close_locked(struct snd_timer_instance *timeri); +static int snd_timer_close_locked(struct snd_timer_instance *timeri, + struct device **card_devp_to_put); /* * open a timer instance @@ -252,38 +256,42 @@ int snd_timer_open(struct snd_timer_instance **ti, { struct snd_timer *timer; struct snd_timer_instance *timeri = NULL; + struct device *card_dev_to_put = NULL; int err; + mutex_lock(®ister_mutex); if (tid->dev_class == SNDRV_TIMER_CLASS_SLAVE) { /* open a slave instance */ if (tid->dev_sclass <= SNDRV_TIMER_SCLASS_NONE || tid->dev_sclass > SNDRV_TIMER_SCLASS_OSS_SEQUENCER) { pr_debug("ALSA: timer: invalid slave class %i\n", tid->dev_sclass); - return -EINVAL; + err = -EINVAL; + goto unlock; + } + if (num_slaves >= MAX_SLAVE_INSTANCES) { + err = -EBUSY; + goto unlock; } - mutex_lock(®ister_mutex); timeri = snd_timer_instance_new(owner, NULL); if (!timeri) { - mutex_unlock(®ister_mutex); - return -ENOMEM; + err = -ENOMEM; + goto unlock; } timeri->slave_class = tid->dev_sclass; timeri->slave_id = tid->device; timeri->flags |= SNDRV_TIMER_IFLG_SLAVE; list_add_tail(&timeri->open_list, &snd_timer_slave_list); + num_slaves++; err = snd_timer_check_slave(timeri); if (err < 0) { - snd_timer_close_locked(timeri); + snd_timer_close_locked(timeri, &card_dev_to_put); timeri = NULL; } - mutex_unlock(®ister_mutex); - *ti = timeri; - return err; + goto unlock; } /* open a master instance */ - mutex_lock(®ister_mutex); timer = snd_timer_find(tid); #ifdef CONFIG_MODULES if (!timer) { @@ -294,26 +302,26 @@ int snd_timer_open(struct snd_timer_instance **ti, } #endif if (!timer) { - mutex_unlock(®ister_mutex); - return -ENODEV; + err = -ENODEV; + goto unlock; } if (!list_empty(&timer->open_list_head)) { struct snd_timer_instance *t = list_entry(timer->open_list_head.next, struct snd_timer_instance, open_list); if (t->flags & SNDRV_TIMER_IFLG_EXCLUSIVE) { - mutex_unlock(®ister_mutex); - return -EBUSY; + err = -EBUSY; + goto unlock; } } if (timer->num_instances >= timer->max_instances) { - mutex_unlock(®ister_mutex); - return -EBUSY; + err = -EBUSY; + goto unlock; } timeri = snd_timer_instance_new(owner, timer); if (!timeri) { - mutex_unlock(®ister_mutex); - return -ENOMEM; + err = -ENOMEM; + goto unlock; } /* take a card refcount for safe disconnection */ if (timer->card) @@ -322,16 +330,16 @@ int snd_timer_open(struct snd_timer_instance **ti, timeri->slave_id = slave_id; if (list_empty(&timer->open_list_head) && timer->hw.open) { - int err = timer->hw.open(timer); + err = timer->hw.open(timer); if (err) { kfree(timeri->owner); kfree(timeri); + timeri = NULL; if (timer->card) - put_device(&timer->card->card_dev); + card_dev_to_put = &timer->card->card_dev; module_put(timer->module); - mutex_unlock(®ister_mutex); - return err; + goto unlock; } } @@ -339,10 +347,15 @@ int snd_timer_open(struct snd_timer_instance **ti, timer->num_instances++; err = snd_timer_check_master(timeri); if (err < 0) { - snd_timer_close_locked(timeri); + snd_timer_close_locked(timeri, &card_dev_to_put); timeri = NULL; } + + unlock: mutex_unlock(®ister_mutex); + /* put_device() is called after unlock for avoiding deadlock */ + if (card_dev_to_put) + put_device(card_dev_to_put); *ti = timeri; return err; } @@ -352,12 +365,15 @@ EXPORT_SYMBOL(snd_timer_open); * close a timer instance * call this with register_mutex down. */ -static int snd_timer_close_locked(struct snd_timer_instance *timeri) +static int snd_timer_close_locked(struct snd_timer_instance *timeri, + struct device **card_devp_to_put) { struct snd_timer *timer = NULL; struct snd_timer_instance *slave, *tmp; list_del(&timeri->open_list); + if (timeri->flags & SNDRV_TIMER_IFLG_SLAVE) + num_slaves--; /* force to stop the timer */ snd_timer_stop(timeri); @@ -404,7 +420,7 @@ static int snd_timer_close_locked(struct snd_timer_instance *timeri) timer->hw.close(timer); /* release a card refcount for safe disconnection */ if (timer->card) - put_device(&timer->card->card_dev); + *card_devp_to_put = &timer->card->card_dev; module_put(timer->module); } @@ -416,14 +432,18 @@ static int snd_timer_close_locked(struct snd_timer_instance *timeri) */ int snd_timer_close(struct snd_timer_instance *timeri) { + struct device *card_dev_to_put = NULL; int err; if (snd_BUG_ON(!timeri)) return -ENXIO; mutex_lock(®ister_mutex); - err = snd_timer_close_locked(timeri); + err = snd_timer_close_locked(timeri, &card_dev_to_put); mutex_unlock(®ister_mutex); + /* put_device() is called after unlock for avoiding deadlock */ + if (card_dev_to_put) + put_device(card_dev_to_put); return err; } EXPORT_SYMBOL(snd_timer_close); diff --git a/sound/drivers/dummy.c b/sound/drivers/dummy.c index c0939a0164a6..aeb65d7d4cb3 100644 --- a/sound/drivers/dummy.c +++ b/sound/drivers/dummy.c @@ -933,7 +933,7 @@ static void print_formats(struct snd_dummy *dummy, { int i; - for (i = 0; i < SNDRV_PCM_FORMAT_LAST; i++) { + for (i = 0; i <= SNDRV_PCM_FORMAT_LAST; i++) { if (dummy->pcm_hw.formats & (1ULL << i)) snd_iprintf(buffer, " %s", snd_pcm_format_name(i)); } diff --git a/sound/firewire/bebob/bebob_focusrite.c b/sound/firewire/bebob/bebob_focusrite.c index 52b8b61ecddd..62d989edd129 100644 --- a/sound/firewire/bebob/bebob_focusrite.c +++ b/sound/firewire/bebob/bebob_focusrite.c @@ -28,6 +28,8 @@ #define SAFFIRE_CLOCK_SOURCE_SPDIF 1 /* clock sources as returned from register of Saffire Pro 10 and 26 */ +#define SAFFIREPRO_CLOCK_SOURCE_SELECT_MASK 0x000000ff +#define SAFFIREPRO_CLOCK_SOURCE_DETECT_MASK 0x0000ff00 #define SAFFIREPRO_CLOCK_SOURCE_INTERNAL 0 #define SAFFIREPRO_CLOCK_SOURCE_SKIP 1 /* never used on hardware */ #define SAFFIREPRO_CLOCK_SOURCE_SPDIF 2 @@ -190,6 +192,7 @@ saffirepro_both_clk_src_get(struct snd_bebob *bebob, unsigned int *id) map = saffirepro_clk_maps[1]; /* In a case that this driver cannot handle the value of register. */ + value &= SAFFIREPRO_CLOCK_SOURCE_SELECT_MASK; if (value >= SAFFIREPRO_CLOCK_SOURCE_COUNT || map[value] < 0) { err = -EIO; goto end; diff --git a/sound/firewire/bebob/bebob_stream.c b/sound/firewire/bebob/bebob_stream.c index 4d3034a68bdf..be2c056eb62d 100644 --- a/sound/firewire/bebob/bebob_stream.c +++ b/sound/firewire/bebob/bebob_stream.c @@ -253,8 +253,7 @@ end: return err; } -static unsigned int -map_data_channels(struct snd_bebob *bebob, struct amdtp_stream *s) +static int map_data_channels(struct snd_bebob *bebob, struct amdtp_stream *s) { unsigned int sec, sections, ch, channels; unsigned int pcm, midi, location; diff --git a/sound/firewire/isight.c b/sound/firewire/isight.c index 5826aa8362f1..9edb26ab16e9 100644 --- a/sound/firewire/isight.c +++ b/sound/firewire/isight.c @@ -639,7 +639,7 @@ static int isight_probe(struct fw_unit *unit, if (!isight->audio_base) { dev_err(&unit->device, "audio unit base not found\n"); err = -ENXIO; - goto err_unit; + goto error; } fw_iso_resources_init(&isight->resources, unit); @@ -668,12 +668,12 @@ static int isight_probe(struct fw_unit *unit, dev_set_drvdata(&unit->device, isight); return 0; - -err_unit: - fw_unit_put(isight->unit); - mutex_destroy(&isight->mutex); error: snd_card_free(card); + + mutex_destroy(&isight->mutex); + fw_unit_put(isight->unit); + return err; } diff --git a/sound/firewire/motu/motu-proc.c b/sound/firewire/motu/motu-proc.c index 4edc064999ed..706f1e982e36 100644 --- a/sound/firewire/motu/motu-proc.c +++ b/sound/firewire/motu/motu-proc.c @@ -17,7 +17,7 @@ static const char *const clock_names[] = { [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT] = "S/PDIF on optical interface", [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT_A] = "S/PDIF on optical interface A", [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_OPT_B] = "S/PDIF on optical interface B", - [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_COAX] = "S/PCIF on coaxial interface", + [SND_MOTU_CLOCK_SOURCE_SPDIF_ON_COAX] = "S/PDIF on coaxial interface", [SND_MOTU_CLOCK_SOURCE_AESEBU_ON_XLR] = "AESEBU on XLR interface", [SND_MOTU_CLOCK_SOURCE_WORD_ON_BNC] = "Word clock on BNC interface", }; diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index 3377f0bc2828..778b42ba90b8 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -442,8 +442,6 @@ static void azx_int_disable(struct hdac_bus *bus) list_for_each_entry(azx_dev, &bus->stream_list, list) snd_hdac_stream_updateb(azx_dev, SD_CTL, SD_INT_MASK, 0); - synchronize_irq(bus->irq); - /* disable SIE for all streams */ snd_hdac_chip_writeb(bus, INTCTL, 0); diff --git a/sound/hda/hdmi_chmap.c b/sound/hda/hdmi_chmap.c index f21633cd9b38..acbe61b8db7b 100644 --- a/sound/hda/hdmi_chmap.c +++ b/sound/hda/hdmi_chmap.c @@ -249,7 +249,7 @@ void snd_hdac_print_channel_allocation(int spk_alloc, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(cea_speaker_allocation_names); i++) { if (spk_alloc & (1 << i)) - j += snprintf(buf + j, buflen - j, " %s", + j += scnprintf(buf + j, buflen - j, " %s", cea_speaker_allocation_names[i]); } buf[j] = '\0'; /* necessary when j == 0 */ diff --git a/sound/i2c/cs8427.c b/sound/i2c/cs8427.c index 7e21621e492a..7fd1b4000883 100644 --- a/sound/i2c/cs8427.c +++ b/sound/i2c/cs8427.c @@ -118,7 +118,7 @@ static int snd_cs8427_send_corudata(struct snd_i2c_device *device, struct cs8427 *chip = device->private_data; char *hw_data = udata ? chip->playback.hw_udata : chip->playback.hw_status; - char data[32]; + unsigned char data[32]; int err, idx; if (!memcmp(hw_data, ndata, count)) diff --git a/sound/isa/cs423x/cs4236.c b/sound/isa/cs423x/cs4236.c index 70559e59d18f..7d4e18cb6351 100644 --- a/sound/isa/cs423x/cs4236.c +++ b/sound/isa/cs423x/cs4236.c @@ -293,7 +293,8 @@ static int snd_cs423x_pnp_init_mpu(int dev, struct pnp_dev *pdev) } else { mpu_port[dev] = pnp_port_start(pdev, 0); if (mpu_irq[dev] >= 0 && - pnp_irq_valid(pdev, 0) && pnp_irq(pdev, 0) >= 0) { + pnp_irq_valid(pdev, 0) && + pnp_irq(pdev, 0) != (resource_size_t)-1) { mpu_irq[dev] = pnp_irq(pdev, 0); } else { mpu_irq[dev] = -1; /* disable interrupt */ diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index 8db1890605f6..c175b2cf63f7 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -42,6 +42,10 @@ static void hda_codec_unsol_event(struct hdac_device *dev, unsigned int ev) { struct hda_codec *codec = container_of(dev, struct hda_codec, core); + /* ignore unsol events during shutdown */ + if (codec->bus->shutdown) + return; + if (codec->patch_ops.unsol_event) codec->patch_ops.unsol_event(codec, ev); } diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index e3f3351da480..a6f7561e7bb9 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -4002,7 +4002,7 @@ void snd_print_pcm_bits(int pcm, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(bits); i++) if (pcm & (AC_SUPPCM_BITS_8 << i)) - j += snprintf(buf + j, buflen - j, " %d", bits[i]); + j += scnprintf(buf + j, buflen - j, " %d", bits[i]); buf[j] = '\0'; /* necessary when j == 0 */ } diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 8fcb421193e0..fa261b27d858 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -883,7 +883,7 @@ static int azx_rirb_get_response(struct hdac_bus *bus, unsigned int addr, return -EAGAIN; /* give a chance to retry */ } - dev_WARN(chip->card->dev, + dev_err(chip->card->dev, "azx_get_response timeout, switching to single_cmd mode: last cmd=0x%08x\n", bus->last_cmd[addr]); chip->single_cmd = 1; diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index 8a9dd4767b1e..63cc10604afc 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -176,11 +176,10 @@ struct azx { #define azx_bus(chip) (&(chip)->bus.core) #define bus_to_azx(_bus) container_of(_bus, struct azx, bus.core) -#ifdef CONFIG_X86 -#define azx_snoop(chip) ((chip)->snoop) -#else -#define azx_snoop(chip) true -#endif +static inline bool azx_snoop(struct azx *chip) +{ + return !IS_ENABLED(CONFIG_X86) || chip->snoop; +} /* * macros for easy use diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c index ba7fe9b6655c..864cc8c9ada0 100644 --- a/sound/pci/hda/hda_eld.c +++ b/sound/pci/hda/hda_eld.c @@ -373,7 +373,7 @@ static void hdmi_print_pcm_rates(int pcm, char *buf, int buflen) for (i = 0, j = 0; i < ARRAY_SIZE(alsa_rates); i++) if (pcm & (1 << i)) - j += snprintf(buf + j, buflen - j, " %d", + j += scnprintf(buf + j, buflen - j, " %d", alsa_rates[i]); buf[j] = '\0'; /* necessary when j == 0 */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b42ab80ee607..890793ad85ca 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1413,9 +1413,9 @@ static int azx_free(struct azx *chip) } if (bus->chip_init) { - azx_stop_chip(chip); azx_clear_irq_pending(chip); azx_stop_all_streams(chip); + azx_stop_chip(chip); } if (bus->irq >= 0) @@ -1450,8 +1450,11 @@ static int azx_free(struct azx *chip) static int azx_dev_disconnect(struct snd_device *device) { struct azx *chip = device->device_data; + struct hdac_bus *bus = azx_bus(chip); chip->bus.shutdown = 1; + cancel_work_sync(&bus->unsol_work); + return 0; } diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c index 9b7efece4484..2a173de7ca02 100644 --- a/sound/pci/hda/hda_sysfs.c +++ b/sound/pci/hda/hda_sysfs.c @@ -221,7 +221,7 @@ static ssize_t init_verbs_show(struct device *dev, mutex_lock(&codec->user_mutex); for (i = 0; i < codec->init_verbs.used; i++) { struct hda_verb *v = snd_array_elem(&codec->init_verbs, i); - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "0x%02x 0x%03x 0x%04x\n", v->nid, v->verb, v->param); } @@ -271,7 +271,7 @@ static ssize_t hints_show(struct device *dev, mutex_lock(&codec->user_mutex); for (i = 0; i < codec->hints.used; i++) { struct hda_hint *hint = snd_array_elem(&codec->hints, i); - len += snprintf(buf + len, PAGE_SIZE - len, + len += scnprintf(buf + len, PAGE_SIZE - len, "%s = %s\n", hint->key, hint->val); } mutex_unlock(&codec->user_mutex); diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 119f3b504765..92f5f452bee2 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -1300,13 +1300,14 @@ struct scp_msg { static void dspio_clear_response_queue(struct hda_codec *codec) { + unsigned long timeout = jiffies + msecs_to_jiffies(1000); unsigned int dummy = 0; - int status = -1; + int status; /* clear all from the response queue */ do { status = dspio_read(codec, &dummy); - } while (status == 0); + } while (status == 0 && time_before(jiffies, timeout)); } static int dspio_get_response_data(struct hda_codec *codec) @@ -4424,12 +4425,14 @@ static void ca0132_process_dsp_response(struct hda_codec *codec, struct ca0132_spec *spec = codec->spec; codec_dbg(codec, "ca0132_process_dsp_response\n"); + snd_hda_power_up_pm(codec); if (spec->wait_scp) { if (dspio_get_response_data(codec) >= 0) spec->wait_scp = 0; } dspio_clear_response_queue(codec); + snd_hda_power_down_pm(codec); } static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb) @@ -4440,7 +4443,7 @@ static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb) /* Delay enabling the HP amp, to let the mic-detection * state machine run. */ - cancel_delayed_work_sync(&spec->unsol_hp_work); + cancel_delayed_work(&spec->unsol_hp_work); schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500)); tbl = snd_hda_jack_tbl_get(codec, cb->nid); if (tbl) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 49be42d27761..9cc9304ff21a 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -960,6 +960,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x103c, 0x8402, "HP ProBook 645 G4", CXT_FIXUP_MUTE_LED_GPIO), SND_PCI_QUIRK(0x103c, 0x8455, "HP Z2 G4", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x138d, "Asus", CXT_FIXUP_HEADPHONE_MIC_PIN), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT_FIXUP_OLPC_XO), @@ -968,6 +969,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x215f, "Lenovo T510", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21ce, "Lenovo T420", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21cf, "Lenovo T520", CXT_PINCFG_LENOVO_TP410), + SND_PCI_QUIRK(0x17aa, 0x21d2, "Lenovo T420s", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21da, "Lenovo X220", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x21db, "Lenovo X220-tablet", CXT_PINCFG_LENOVO_TP410), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo IdeaPad Z560", CXT_FIXUP_MUTE_LED_EAPD), diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index f21405597215..12913368c231 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2574,9 +2574,12 @@ static int alloc_intel_hdmi(struct hda_codec *codec) /* parse and post-process for Intel codecs */ static int parse_intel_hdmi(struct hda_codec *codec) { - int err; + int err, retries = 3; + + do { + err = hdmi_parse_codec(codec); + } while (err < 0 && retries--); - err = hdmi_parse_codec(codec); if (err < 0) { generic_spec_free(codec); return err; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ab7bc7ebb721..5a7afbeb612d 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -333,9 +333,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0215: case 0x10ec0233: case 0x10ec0235: - case 0x10ec0236: case 0x10ec0255: - case 0x10ec0256: case 0x10ec0257: case 0x10ec0282: case 0x10ec0283: @@ -347,6 +345,11 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0300: alc_update_coef_idx(codec, 0x10, 1<<9, 0); break; + case 0x10ec0236: + case 0x10ec0256: + alc_write_coef_idx(codec, 0x36, 0x5757); + alc_update_coef_idx(codec, 0x10, 1<<9, 0); + break; case 0x10ec0275: alc_update_coef_idx(codec, 0xe, 0, 1<<0); break; @@ -359,6 +362,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0700: case 0x10ec0701: case 0x10ec0703: + case 0x10ec0711: alc_update_coef_idx(codec, 0x10, 1<<15, 0); break; case 0x10ec0662: @@ -374,6 +378,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0672: alc_update_coef_idx(codec, 0xd, 0, 1<<14); /* EAPD Ctrl */ break; + case 0x10ec0222: + case 0x10ec0623: + alc_update_coef_idx(codec, 0x19, 1<<13, 0); + break; case 0x10ec0668: alc_update_coef_idx(codec, 0x7, 3<<13, 0); break; @@ -389,6 +397,7 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) break; case 0x10ec0899: case 0x10ec0900: + case 0x10ec0b00: case 0x10ec1168: case 0x10ec1220: alc_update_coef_idx(codec, 0x7, 1<<1, 0); @@ -2382,6 +2391,7 @@ static int patch_alc882(struct hda_codec *codec) case 0x10ec0882: case 0x10ec0885: case 0x10ec0900: + case 0x10ec0b00: case 0x10ec1220: break; default: @@ -2756,6 +2766,7 @@ enum { ALC269_TYPE_ALC225, ALC269_TYPE_ALC294, ALC269_TYPE_ALC300, + ALC269_TYPE_ALC623, ALC269_TYPE_ALC700, }; @@ -2791,6 +2802,7 @@ static int alc269_parse_auto_config(struct hda_codec *codec) case ALC269_TYPE_ALC225: case ALC269_TYPE_ALC294: case ALC269_TYPE_ALC300: + case ALC269_TYPE_ALC623: case ALC269_TYPE_ALC700: ssids = alc269_ssids; break; @@ -3245,7 +3257,9 @@ static void alc294_init(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - if (!spec->done_hp_init) { + /* required only at boot or S4 resume time */ + if (!spec->done_hp_init || + codec->core.dev.power.power_state.event == PM_EVENT_RESTORE) { alc294_hp_init(codec); spec->done_hp_init = true; } @@ -6611,6 +6625,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x312f, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x313c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION), SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), @@ -7269,9 +7285,13 @@ static int patch_alc269(struct hda_codec *codec) spec->codec_variant = ALC269_TYPE_ALC300; spec->gen.mixer_nid = 0; /* no loopback on ALC300 */ break; + case 0x10ec0623: + spec->codec_variant = ALC269_TYPE_ALC623; + break; case 0x10ec0700: case 0x10ec0701: case 0x10ec0703: + case 0x10ec0711: spec->codec_variant = ALC269_TYPE_ALC700; spec->gen.mixer_nid = 0; /* ALC700 does not have any loopback mixer path */ alc_update_coef_idx(codec, 0x4a, 1 << 15, 0); /* Combo jack auto trigger control */ @@ -8348,6 +8368,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0298, "ALC298", patch_alc269), HDA_CODEC_ENTRY(0x10ec0299, "ALC299", patch_alc269), HDA_CODEC_ENTRY(0x10ec0300, "ALC300", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0623, "ALC623", patch_alc269), HDA_CODEC_REV_ENTRY(0x10ec0861, 0x100340, "ALC660", patch_alc861), HDA_CODEC_ENTRY(0x10ec0660, "ALC660-VD", patch_alc861vd), HDA_CODEC_ENTRY(0x10ec0861, "ALC861", patch_alc861), @@ -8365,6 +8386,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0700, "ALC700", patch_alc269), HDA_CODEC_ENTRY(0x10ec0701, "ALC701", patch_alc269), HDA_CODEC_ENTRY(0x10ec0703, "ALC703", patch_alc269), + HDA_CODEC_ENTRY(0x10ec0711, "ALC711", patch_alc269), HDA_CODEC_ENTRY(0x10ec0867, "ALC891", patch_alc662), HDA_CODEC_ENTRY(0x10ec0880, "ALC880", patch_alc880), HDA_CODEC_ENTRY(0x10ec0882, "ALC882", patch_alc882), @@ -8379,6 +8401,7 @@ static const struct hda_device_id snd_hda_id_realtek[] = { HDA_CODEC_ENTRY(0x10ec0892, "ALC892", patch_alc662), HDA_CODEC_ENTRY(0x10ec0899, "ALC898", patch_alc882), HDA_CODEC_ENTRY(0x10ec0900, "ALC1150", patch_alc882), + HDA_CODEC_ENTRY(0x10ec0b00, "ALCS1200A", patch_alc882), HDA_CODEC_ENTRY(0x10ec1168, "ALC1220", patch_alc882), HDA_CODEC_ENTRY(0x10ec1220, "ALC1220", patch_alc882), {} /* terminator */ diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 63d15b545b33..7cd147411b22 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -77,6 +77,7 @@ enum { STAC_DELL_M6_BOTH, STAC_DELL_EQ, STAC_ALIENWARE_M17X, + STAC_ELO_VUPOINT_15MX, STAC_92HD89XX_HP_FRONT_JACK, STAC_92HD89XX_HP_Z1_G2_RIGHT_MIC_JACK, STAC_92HD73XX_ASUS_MOBO, @@ -1897,6 +1898,18 @@ static void stac92hd73xx_fixup_no_jd(struct hda_codec *codec, codec->no_jack_detect = 1; } + +static void stac92hd73xx_disable_automute(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct sigmatel_spec *spec = codec->spec; + + if (action != HDA_FIXUP_ACT_PRE_PROBE) + return; + + spec->gen.suppress_auto_mute = 1; +} + static const struct hda_fixup stac92hd73xx_fixups[] = { [STAC_92HD73XX_REF] = { .type = HDA_FIXUP_FUNC, @@ -1922,6 +1935,10 @@ static const struct hda_fixup stac92hd73xx_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = stac92hd73xx_fixup_alienware_m17x, }, + [STAC_ELO_VUPOINT_15MX] = { + .type = HDA_FIXUP_FUNC, + .v.func = stac92hd73xx_disable_automute, + }, [STAC_92HD73XX_INTEL] = { .type = HDA_FIXUP_PINS, .v.pins = intel_dg45id_pin_configs, @@ -1960,6 +1977,7 @@ static const struct hda_model_fixup stac92hd73xx_models[] = { { .id = STAC_DELL_M6_BOTH, .name = "dell-m6" }, { .id = STAC_DELL_EQ, .name = "dell-eq" }, { .id = STAC_ALIENWARE_M17X, .name = "alienware" }, + { .id = STAC_ELO_VUPOINT_15MX, .name = "elo-vupoint-15mx" }, { .id = STAC_92HD73XX_ASUS_MOBO, .name = "asus-mobo" }, {} }; @@ -2009,6 +2027,8 @@ static const struct snd_pci_quirk stac92hd73xx_fixup_tbl[] = { "Alienware M17x", STAC_ALIENWARE_M17X), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0490, "Alienware M17x R3", STAC_DELL_EQ), + SND_PCI_QUIRK(0x1059, 0x1011, + "ELO VuPoint 15MX", STAC_ELO_VUPOINT_15MX), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1927, "HP Z1 G2", STAC_92HD89XX_HP_Z1_G2_RIGHT_MIC_JACK), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x2b17, diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c index 057c2f394ea7..41ea8e7b389a 100644 --- a/sound/pci/ice1712/ice1724.c +++ b/sound/pci/ice1712/ice1724.c @@ -661,6 +661,7 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate, unsigned long flags; unsigned char mclk_change; unsigned int i, old_rate; + bool call_set_rate = false; if (rate > ice->hw_rates->list[ice->hw_rates->count - 1]) return -EINVAL; @@ -684,7 +685,7 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate, * setting clock rate for internal clock mode */ old_rate = ice->get_rate(ice); if (force || (old_rate != rate)) - ice->set_rate(ice, rate); + call_set_rate = true; else if (rate == ice->cur_rate) { spin_unlock_irqrestore(&ice->reg_lock, flags); return 0; @@ -692,12 +693,14 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate, } ice->cur_rate = rate; + spin_unlock_irqrestore(&ice->reg_lock, flags); + + if (call_set_rate) + ice->set_rate(ice, rate); /* setting master clock */ mclk_change = ice->set_mclk(ice, rate); - spin_unlock_irqrestore(&ice->reg_lock, flags); - if (mclk_change && ice->gpio.i2s_mclk_changed) ice->gpio.i2s_mclk_changed(ice); if (ice->gpio.set_pro_rate) diff --git a/sound/pci/intel8x0m.c b/sound/pci/intel8x0m.c index 3a4769a97d29..a626ee18628e 100644 --- a/sound/pci/intel8x0m.c +++ b/sound/pci/intel8x0m.c @@ -1171,16 +1171,6 @@ static int snd_intel8x0m_create(struct snd_card *card, } port_inited: - if (request_irq(pci->irq, snd_intel8x0m_interrupt, IRQF_SHARED, - KBUILD_MODNAME, chip)) { - dev_err(card->dev, "unable to grab IRQ %d\n", pci->irq); - snd_intel8x0m_free(chip); - return -EBUSY; - } - chip->irq = pci->irq; - pci_set_master(pci); - synchronize_irq(chip->irq); - /* initialize offsets */ chip->bdbars_count = 2; tbl = intel_regs; @@ -1224,11 +1214,21 @@ static int snd_intel8x0m_create(struct snd_card *card, chip->int_sta_reg = ICH_REG_GLOB_STA; chip->int_sta_mask = int_sta_masks; + pci_set_master(pci); + if ((err = snd_intel8x0m_chip_init(chip, 1)) < 0) { snd_intel8x0m_free(chip); return err; } + if (request_irq(pci->irq, snd_intel8x0m_interrupt, IRQF_SHARED, + KBUILD_MODNAME, chip)) { + dev_err(card->dev, "unable to grab IRQ %d\n", pci->irq); + snd_intel8x0m_free(chip); + return -EBUSY; + } + chip->irq = pci->irq; + if ((err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops)) < 0) { snd_intel8x0m_free(chip); return err; diff --git a/sound/sh/aica.c b/sound/sh/aica.c index fdc680ae8aa0..d9acf551a898 100644 --- a/sound/sh/aica.c +++ b/sound/sh/aica.c @@ -117,10 +117,10 @@ static void spu_memset(u32 toi, u32 what, int length) } /* spu_memload - write to SPU address space */ -static void spu_memload(u32 toi, void *from, int length) +static void spu_memload(u32 toi, const void *from, int length) { unsigned long flags; - u32 *froml = from; + const u32 *froml = from; u32 __iomem *to = (u32 __iomem *) (SPU_MEMORY_BASE + toi); int i; u32 val; diff --git a/sound/sh/sh_dac_audio.c b/sound/sh/sh_dac_audio.c index 834b2574786f..6251b5e1b64a 100644 --- a/sound/sh/sh_dac_audio.c +++ b/sound/sh/sh_dac_audio.c @@ -190,7 +190,6 @@ static int snd_sh_dac_pcm_copy(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; if (copy_from_user_toio(chip->data_buffer + pos, src, count)) return -EFAULT; @@ -210,7 +209,6 @@ static int snd_sh_dac_pcm_copy_kernel(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; memcpy_toio(chip->data_buffer + pos, src, count); chip->buffer_end = chip->data_buffer + pos + count; @@ -229,7 +227,6 @@ static int snd_sh_dac_pcm_silence(struct snd_pcm_substream *substream, { /* channel is not used (interleaved data) */ struct snd_sh_dac *chip = snd_pcm_substream_chip(substream); - struct snd_pcm_runtime *runtime = substream->runtime; memset_io(chip->data_buffer + pos, 0, count); chip->buffer_end = chip->data_buffer + pos + count; diff --git a/sound/soc/atmel/Kconfig b/sound/soc/atmel/Kconfig index 4a56f3dfba51..23887613b5c3 100644 --- a/sound/soc/atmel/Kconfig +++ b/sound/soc/atmel/Kconfig @@ -25,6 +25,8 @@ config SND_ATMEL_SOC_DMA config SND_ATMEL_SOC_SSC_DMA tristate + select SND_ATMEL_SOC_DMA + select SND_ATMEL_SOC_PDC config SND_ATMEL_SOC_SSC tristate diff --git a/sound/soc/codecs/cs4349.c b/sound/soc/codecs/cs4349.c index 0a749c79ef57..1d38e53dc95c 100644 --- a/sound/soc/codecs/cs4349.c +++ b/sound/soc/codecs/cs4349.c @@ -380,6 +380,7 @@ static struct i2c_driver cs4349_i2c_driver = { .driver = { .name = "cs4349", .of_match_table = cs4349_of_match, + .pm = &cs4349_runtime_pm, }, .id_table = cs4349_i2c_id, .probe = cs4349_i2c_probe, diff --git a/sound/soc/codecs/es8328.c b/sound/soc/codecs/es8328.c index bcdb8914ec16..e2f44fa46262 100644 --- a/sound/soc/codecs/es8328.c +++ b/sound/soc/codecs/es8328.c @@ -231,7 +231,7 @@ static const struct soc_enum es8328_rline_enum = ARRAY_SIZE(es8328_line_texts), es8328_line_texts); static const struct snd_kcontrol_new es8328_right_line_controls = - SOC_DAPM_ENUM("Route", es8328_lline_enum); + SOC_DAPM_ENUM("Route", es8328_rline_enum); /* Left Mixer */ static const struct snd_kcontrol_new es8328_left_mixer_controls[] = { diff --git a/sound/soc/codecs/hdac_hdmi.c b/sound/soc/codecs/hdac_hdmi.c index e824d47cc22b..1c3626347e12 100644 --- a/sound/soc/codecs/hdac_hdmi.c +++ b/sound/soc/codecs/hdac_hdmi.c @@ -1408,6 +1408,12 @@ static int hdac_hdmi_create_dais(struct hdac_device *hdac, if (ret) return ret; + /* Filter out 44.1, 88.2 and 176.4Khz */ + rates &= ~(SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_88200 | + SNDRV_PCM_RATE_176400); + if (!rates) + return -EINVAL; + sprintf(dai_name, "intel-hdmi-hifi%d", i+1); hdmi_dais[i].name = devm_kstrdup(&hdac->dev, dai_name, GFP_KERNEL); diff --git a/sound/soc/codecs/msm8916-wcd-analog.c b/sound/soc/codecs/msm8916-wcd-analog.c index 0b9b014b4bb6..3633eb30dd13 100644 --- a/sound/soc/codecs/msm8916-wcd-analog.c +++ b/sound/soc/codecs/msm8916-wcd-analog.c @@ -303,7 +303,7 @@ struct pm8916_wcd_analog_priv { }; static const char *const adc2_mux_text[] = { "ZERO", "INP2", "INP3" }; -static const char *const rdac2_mux_text[] = { "ZERO", "RX2", "RX1" }; +static const char *const rdac2_mux_text[] = { "RX1", "RX2" }; static const char *const hph_text[] = { "ZERO", "Switch", }; static const struct soc_enum hph_enum = SOC_ENUM_SINGLE_VIRT( @@ -318,7 +318,7 @@ static const struct soc_enum adc2_enum = SOC_ENUM_SINGLE_VIRT( /* RDAC2 MUX */ static const struct soc_enum rdac2_mux_enum = SOC_ENUM_SINGLE( - CDC_D_CDC_CONN_HPHR_DAC_CTL, 0, 3, rdac2_mux_text); + CDC_D_CDC_CONN_HPHR_DAC_CTL, 0, 2, rdac2_mux_text); static const struct snd_kcontrol_new spkr_switch[] = { SOC_DAPM_SINGLE("Switch", CDC_A_SPKR_DAC_CTL, 7, 1, 0) @@ -876,10 +876,10 @@ static const struct snd_soc_dapm_widget pm8916_wcd_analog_dapm_widgets[] = { SND_SOC_DAPM_SUPPLY("MIC BIAS External1", CDC_A_MICB_1_EN, 7, 0, pm8916_wcd_analog_enable_micbias_ext1, - SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_SUPPLY("MIC BIAS External2", CDC_A_MICB_2_EN, 7, 0, pm8916_wcd_analog_enable_micbias_ext2, - SND_SOC_DAPM_POST_PMU | SND_SOC_DAPM_POST_PMD), + SND_SOC_DAPM_POST_PMU), SND_SOC_DAPM_ADC_E("ADC1", NULL, CDC_A_TX_1_EN, 7, 0, pm8916_wcd_analog_enable_adc, diff --git a/sound/soc/codecs/nau8540.c b/sound/soc/codecs/nau8540.c index f9c9933acffb..c0c64f90a61b 100644 --- a/sound/soc/codecs/nau8540.c +++ b/sound/soc/codecs/nau8540.c @@ -548,7 +548,7 @@ static int nau8540_calc_fll_param(unsigned int fll_in, fvco_max = 0; fvco_sel = ARRAY_SIZE(mclk_src_scaling); for (i = 0; i < ARRAY_SIZE(mclk_src_scaling); i++) { - fvco = 256 * fs * 2 * mclk_src_scaling[i].param; + fvco = 256ULL * fs * 2 * mclk_src_scaling[i].param; if (fvco > NAU_FVCO_MIN && fvco < NAU_FVCO_MAX && fvco_max < fvco) { fvco_max = fvco; diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c index 68feae262476..940bdc30753d 100644 --- a/sound/soc/codecs/pcm512x.c +++ b/sound/soc/codecs/pcm512x.c @@ -1438,13 +1438,15 @@ int pcm512x_probe(struct device *dev, struct regmap *regmap) } pcm512x->sclk = devm_clk_get(dev, NULL); - if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER) - return -EPROBE_DEFER; + if (PTR_ERR(pcm512x->sclk) == -EPROBE_DEFER) { + ret = -EPROBE_DEFER; + goto err; + } if (!IS_ERR(pcm512x->sclk)) { ret = clk_prepare_enable(pcm512x->sclk); if (ret != 0) { dev_err(dev, "Failed to enable SCLK: %d\n", ret); - return ret; + goto err; } } diff --git a/sound/soc/codecs/rt5677.c b/sound/soc/codecs/rt5677.c index 1cd20b88a3a9..82ee8f4b965b 100644 --- a/sound/soc/codecs/rt5677.c +++ b/sound/soc/codecs/rt5677.c @@ -297,6 +297,7 @@ static bool rt5677_volatile_register(struct device *dev, unsigned int reg) case RT5677_I2C_MASTER_CTRL7: case RT5677_I2C_MASTER_CTRL8: case RT5677_HAP_GENE_CTRL2: + case RT5677_PWR_ANLG2: /* Modified by DSP firmware */ case RT5677_PWR_DSP_ST: case RT5677_PRIV_DATA: case RT5677_ASRC_22: diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 10764c1e854e..ca8a70ab22a8 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1314,7 +1314,7 @@ static int sgtl5000_set_power_regs(struct snd_soc_codec *codec) * Searching for a suitable index solving this formula: * idx = 40 * log10(vag_val / lo_cagcntrl) + 15 */ - vol_quot = (vag * 100) / lo_vag; + vol_quot = lo_vag ? (vag * 100) / lo_vag : 0; lo_vol = 0; for (i = 0; i < ARRAY_SIZE(vol_quot_table); i++) { if (vol_quot >= vol_quot_table[i]) diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index 54a87a905eb6..cc95c15ceceb 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -924,23 +924,31 @@ static int aic31xx_set_dai_fmt(struct snd_soc_dai *codec_dai, return -EINVAL; } + /* signal polarity */ + switch (fmt & SND_SOC_DAIFMT_INV_MASK) { + case SND_SOC_DAIFMT_NB_NF: + break; + case SND_SOC_DAIFMT_IB_NF: + iface_reg2 |= AIC31XX_BCLKINV_MASK; + break; + default: + dev_err(codec->dev, "Invalid DAI clock signal polarity\n"); + return -EINVAL; + } + /* interface format */ switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: break; case SND_SOC_DAIFMT_DSP_A: - dsp_a_val = 0x1; + dsp_a_val = 0x1; /* fall through */ case SND_SOC_DAIFMT_DSP_B: - /* NOTE: BCLKINV bit value 1 equas NB and 0 equals IB */ - switch (fmt & SND_SOC_DAIFMT_INV_MASK) { - case SND_SOC_DAIFMT_NB_NF: - iface_reg2 |= AIC31XX_BCLKINV_MASK; - break; - case SND_SOC_DAIFMT_IB_NF: - break; - default: - return -EINVAL; - } + /* + * NOTE: This CODEC samples on the falling edge of BCLK in + * DSP mode, this is inverted compared to what most DAIs + * expect, so we invert for this mode + */ + iface_reg2 ^= AIC31XX_BCLKINV_MASK; iface_reg1 |= (AIC31XX_DSP_MODE << AIC31XX_IFACE1_DATATYPE_SHIFT); break; diff --git a/sound/soc/codecs/wm8737.c b/sound/soc/codecs/wm8737.c index f0cb1c4afe3c..c5a8d758f58b 100644 --- a/sound/soc/codecs/wm8737.c +++ b/sound/soc/codecs/wm8737.c @@ -170,7 +170,7 @@ SOC_DOUBLE("Polarity Invert Switch", WM8737_ADC_CONTROL, 5, 6, 1, 0), SOC_SINGLE("3D Switch", WM8737_3D_ENHANCE, 0, 1, 0), SOC_SINGLE("3D Depth", WM8737_3D_ENHANCE, 1, 15, 0), SOC_ENUM("3D Low Cut-off", low_3d), -SOC_ENUM("3D High Cut-off", low_3d), +SOC_ENUM("3D High Cut-off", high_3d), SOC_SINGLE_TLV("3D ADC Volume", WM8737_3D_ENHANCE, 7, 1, 1, adc_tlv), SOC_SINGLE("Noise Gate Switch", WM8737_NOISE_GATE, 0, 1, 0), diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index fd2731d171dd..0e8008d38161 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2791,7 +2791,7 @@ static int fll_factors(struct _fll_div *fll_div, unsigned int Fref, if (target % Fref == 0) { fll_div->theta = 0; - fll_div->lambda = 0; + fll_div->lambda = 1; } else { gcd_fll = gcd(target, fratio * Fref); @@ -2861,7 +2861,7 @@ static int wm8962_set_fll(struct snd_soc_codec *codec, int fll_id, int source, return -EINVAL; } - if (fll_div.theta || fll_div.lambda) + if (fll_div.theta) fll1 |= WM8962_FLL_FRAC; /* Stop the FLL while we reconfigure */ diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index d632a0511d62..158ce68bc9bf 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -1169,8 +1169,7 @@ static unsigned int wmfw_convert_flags(unsigned int in, unsigned int len) } if (in) { - if (in & WMFW_CTL_FLAG_READABLE) - out |= rd; + out |= rd; if (in & WMFW_CTL_FLAG_WRITEABLE) out |= wr; if (in & WMFW_CTL_FLAG_VOLATILE) diff --git a/sound/soc/davinci/davinci-mcasp.c b/sound/soc/davinci/davinci-mcasp.c index 0480ec4c8035..e10e03800cce 100644 --- a/sound/soc/davinci/davinci-mcasp.c +++ b/sound/soc/davinci/davinci-mcasp.c @@ -882,14 +882,13 @@ static int mcasp_i2s_hw_param(struct davinci_mcasp *mcasp, int stream, active_slots = hweight32(mcasp->tdm_mask[stream]); active_serializers = (channels + active_slots - 1) / active_slots; - if (active_serializers == 1) { + if (active_serializers == 1) active_slots = channels; - for (i = 0; i < total_slots; i++) { - if ((1 << i) & mcasp->tdm_mask[stream]) { - mask |= (1 << i); - if (--active_slots <= 0) - break; - } + for (i = 0; i < total_slots; i++) { + if ((1 << i) & mcasp->tdm_mask[stream]) { + mask |= (1 << i); + if (--active_slots <= 0) + break; } } } else { @@ -1748,7 +1747,8 @@ static int davinci_mcasp_get_dma_type(struct davinci_mcasp *mcasp) PTR_ERR(chan)); return PTR_ERR(chan); } - BUG_ON(!chan->device || !chan->device->dev); + if (WARN_ON(!chan->device || !chan->device->dev)) + return -EINVAL; if (chan->device->dev->of_node) ret = of_property_read_string(chan->device->dev->of_node, @@ -1894,6 +1894,10 @@ static int davinci_mcasp_probe(struct platform_device *pdev) if (irq >= 0) { irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_common", dev_name(&pdev->dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err; + } ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, davinci_mcasp_common_irq_handler, IRQF_ONESHOT | IRQF_SHARED, @@ -1911,6 +1915,10 @@ static int davinci_mcasp_probe(struct platform_device *pdev) if (irq >= 0) { irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_rx", dev_name(&pdev->dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err; + } ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, davinci_mcasp_rx_irq_handler, IRQF_ONESHOT, irq_name, mcasp); @@ -1926,6 +1934,10 @@ static int davinci_mcasp_probe(struct platform_device *pdev) if (irq >= 0) { irq_name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%s_tx", dev_name(&pdev->dev)); + if (!irq_name) { + ret = -ENOMEM; + goto err; + } ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, davinci_mcasp_tx_irq_handler, IRQF_ONESHOT, irq_name, mcasp); @@ -2009,8 +2021,10 @@ static int davinci_mcasp_probe(struct platform_device *pdev) GFP_KERNEL); if (!mcasp->chconstr[SNDRV_PCM_STREAM_PLAYBACK].list || - !mcasp->chconstr[SNDRV_PCM_STREAM_CAPTURE].list) - return -ENOMEM; + !mcasp->chconstr[SNDRV_PCM_STREAM_CAPTURE].list) { + ret = -ENOMEM; + goto err; + } ret = davinci_mcasp_set_ch_constraints(mcasp); if (ret) diff --git a/sound/soc/fsl/imx-sgtl5000.c b/sound/soc/fsl/imx-sgtl5000.c index 8e525f7ac08d..3d99a8579c99 100644 --- a/sound/soc/fsl/imx-sgtl5000.c +++ b/sound/soc/fsl/imx-sgtl5000.c @@ -119,7 +119,8 @@ static int imx_sgtl5000_probe(struct platform_device *pdev) codec_dev = of_find_i2c_device_by_node(codec_np); if (!codec_dev) { dev_err(&pdev->dev, "failed to find codec platform device\n"); - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto fail; } data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); diff --git a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c index 41cb1fefbd42..405196283688 100644 --- a/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c +++ b/sound/soc/intel/boards/kbl_rt5663_rt5514_max98927.c @@ -422,6 +422,9 @@ static int kabylake_dmic_startup(struct snd_pcm_substream *substream) snd_pcm_hw_constraint_list(runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS, dmic_constraints); + runtime->hw.formats = SNDRV_PCM_FMTBIT_S16_LE; + snd_pcm_hw_constraint_msbits(runtime, 0, 16, 16); + return snd_pcm_hw_constraint_list(substream->runtime, 0, SNDRV_PCM_HW_PARAM_RATE, &constraints_rates); } diff --git a/sound/soc/intel/skylake/skl-debug.c b/sound/soc/intel/skylake/skl-debug.c index 1987f78ea91e..71c6bbf37b6c 100644 --- a/sound/soc/intel/skylake/skl-debug.c +++ b/sound/soc/intel/skylake/skl-debug.c @@ -42,8 +42,8 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, int i; ssize_t ret = 0; - for (i = 0; i < max_pin; i++) - ret += snprintf(buf + size, MOD_BUF - size, + for (i = 0; i < max_pin; i++) { + ret += scnprintf(buf + size, MOD_BUF - size, "%s %d\n\tModule %d\n\tInstance %d\n\t" "In-used %s\n\tType %s\n" "\tState %d\n\tIndex %d\n", @@ -53,13 +53,15 @@ static ssize_t skl_print_pins(struct skl_module_pin *m_pin, char *buf, m_pin[i].in_use ? "Used" : "Unused", m_pin[i].is_dynamic ? "Dynamic" : "Static", m_pin[i].pin_state, i); + size += ret; + } return ret; } static ssize_t skl_print_fmt(struct skl_module_fmt *fmt, char *buf, ssize_t size, bool direction) { - return snprintf(buf + size, MOD_BUF - size, + return scnprintf(buf + size, MOD_BUF - size, "%s\n\tCh %d\n\tFreq %d\n\tBit depth %d\n\t" "Valid bit depth %d\n\tCh config %#x\n\tInterleaving %d\n\t" "Sample Type %d\n\tCh Map %#x\n", @@ -81,16 +83,16 @@ static ssize_t module_read(struct file *file, char __user *user_buf, if (!buf) return -ENOMEM; - ret = snprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n" + ret = scnprintf(buf, MOD_BUF, "Module:\n\tUUID %pUL\n\tModule id %d\n" "\tInstance id %d\n\tPvt_id %d\n", mconfig->guid, mconfig->id.module_id, mconfig->id.instance_id, mconfig->id.pvt_id); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Resources:\n\tMCPS %#x\n\tIBS %#x\n\tOBS %#x\t\n", mconfig->mcps, mconfig->ibs, mconfig->obs); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Module data:\n\tCore %d\n\tIn queue %d\n\t" "Out queue %d\n\tType %s\n", mconfig->core_id, mconfig->max_in_queue, @@ -100,38 +102,38 @@ static ssize_t module_read(struct file *file, char __user *user_buf, ret += skl_print_fmt(mconfig->in_fmt, buf, ret, true); ret += skl_print_fmt(mconfig->out_fmt, buf, ret, false); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Fixup:\n\tParams %#x\n\tConverter %#x\n", mconfig->params_fixup, mconfig->converter); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Module Gateway:\n\tType %#x\n\tVbus %#x\n\tHW conn %#x\n\tSlot %#x\n", mconfig->dev_type, mconfig->vbus_id, mconfig->hw_conn_type, mconfig->time_slot); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Pipeline:\n\tID %d\n\tPriority %d\n\tConn Type %d\n\t" "Pages %#x\n", mconfig->pipe->ppl_id, mconfig->pipe->pipe_priority, mconfig->pipe->conn_type, mconfig->pipe->memory_pages); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tParams:\n\t\tHost DMA %d\n\t\tLink DMA %d\n", mconfig->pipe->p_params->host_dma_id, mconfig->pipe->p_params->link_dma_id); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tPCM params:\n\t\tCh %d\n\t\tFreq %d\n\t\tFormat %d\n", mconfig->pipe->p_params->ch, mconfig->pipe->p_params->s_freq, mconfig->pipe->p_params->s_fmt); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tLink %#x\n\tStream %#x\n", mconfig->pipe->p_params->linktype, mconfig->pipe->p_params->stream); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "\tState %d\n\tPassthru %s\n", mconfig->pipe->state, mconfig->pipe->passthru ? "true" : "false"); @@ -141,7 +143,7 @@ static ssize_t module_read(struct file *file, char __user *user_buf, ret += skl_print_pins(mconfig->m_out_pin, buf, mconfig->max_out_queue, ret, false); - ret += snprintf(buf + ret, MOD_BUF - ret, + ret += scnprintf(buf + ret, MOD_BUF - ret, "Other:\n\tDomain %d\n\tHomogenous Input %s\n\t" "Homogenous Output %s\n\tIn Queue Mask %d\n\t" "Out Queue Mask %d\n\tDMA ID %d\n\tMem Pages %d\n\t" @@ -199,7 +201,7 @@ static ssize_t fw_softreg_read(struct file *file, char __user *user_buf, __ioread32_copy(d->fw_read_buff, fw_reg_addr, w0_stat_sz >> 2); for (offset = 0; offset < FW_REG_SIZE; offset += 16) { - ret += snprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset); + ret += scnprintf(tmp + ret, FW_REG_BUF - ret, "%#.4x: ", offset); hex_dump_to_buffer(d->fw_read_buff + offset, 16, 16, 4, tmp + ret, FW_REG_BUF - ret, 0); ret += strlen(tmp + ret); diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c index 105a73cc5158..149b7cba10fb 100644 --- a/sound/soc/kirkwood/kirkwood-i2s.c +++ b/sound/soc/kirkwood/kirkwood-i2s.c @@ -569,10 +569,6 @@ static int kirkwood_i2s_dev_probe(struct platform_device *pdev) return PTR_ERR(priv->clk); } - err = clk_prepare_enable(priv->clk); - if (err < 0) - return err; - priv->extclk = devm_clk_get(&pdev->dev, "extclk"); if (IS_ERR(priv->extclk)) { if (PTR_ERR(priv->extclk) == -EPROBE_DEFER) @@ -588,6 +584,10 @@ static int kirkwood_i2s_dev_probe(struct platform_device *pdev) } } + err = clk_prepare_enable(priv->clk); + if (err < 0) + return err; + /* Some sensible defaults - this reflects the powerup values */ priv->ctl_play = KIRKWOOD_PLAYCTL_SIZE_24; priv->ctl_rec = KIRKWOOD_RECCTL_SIZE_24; diff --git a/sound/soc/qcom/apq8016_sbc.c b/sound/soc/qcom/apq8016_sbc.c index d49adc822a11..8e6b88d68ca6 100644 --- a/sound/soc/qcom/apq8016_sbc.c +++ b/sound/soc/qcom/apq8016_sbc.c @@ -163,41 +163,52 @@ static struct apq8016_sbc_data *apq8016_sbc_parse_of(struct snd_soc_card *card) if (!cpu || !codec) { dev_err(dev, "Can't find cpu/codec DT node\n"); - return ERR_PTR(-EINVAL); + ret = -EINVAL; + goto error; } link->cpu_of_node = of_parse_phandle(cpu, "sound-dai", 0); if (!link->cpu_of_node) { dev_err(card->dev, "error getting cpu phandle\n"); - return ERR_PTR(-EINVAL); + ret = -EINVAL; + goto error; } ret = snd_soc_of_get_dai_name(cpu, &link->cpu_dai_name); if (ret) { dev_err(card->dev, "error getting cpu dai name\n"); - return ERR_PTR(ret); + goto error; } ret = snd_soc_of_get_dai_link_codecs(dev, codec, link); if (ret < 0) { dev_err(card->dev, "error getting codec dai name\n"); - return ERR_PTR(ret); + goto error; } link->platform_of_node = link->cpu_of_node; ret = of_property_read_string(np, "link-name", &link->name); if (ret) { dev_err(card->dev, "error getting codec dai_link name\n"); - return ERR_PTR(ret); + goto error; } link->stream_name = link->name; link->init = apq8016_sbc_dai_init; link++; + + of_node_put(cpu); + of_node_put(codec); } return data; + + error: + of_node_put(np); + of_node_put(cpu); + of_node_put(codec); + return ERR_PTR(ret); } static const struct snd_soc_dapm_widget apq8016_sbc_dapm_widgets[] = { diff --git a/sound/soc/rockchip/rockchip_i2s.c b/sound/soc/rockchip/rockchip_i2s.c index 66fc13a2396a..0e07e3dea7de 100644 --- a/sound/soc/rockchip/rockchip_i2s.c +++ b/sound/soc/rockchip/rockchip_i2s.c @@ -676,7 +676,7 @@ static int rockchip_i2s_probe(struct platform_device *pdev) ret = devm_snd_dmaengine_pcm_register(&pdev->dev, NULL, 0); if (ret) { dev_err(&pdev->dev, "Could not register PCM\n"); - return ret; + goto err_suspend; } return 0; diff --git a/sound/soc/sh/rcar/core.c b/sound/soc/sh/rcar/core.c index 710c01cd2ad2..f203c0878e69 100644 --- a/sound/soc/sh/rcar/core.c +++ b/sound/soc/sh/rcar/core.c @@ -676,6 +676,7 @@ static int rsnd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) } /* set format */ + rdai->bit_clk_inv = 0; switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) { case SND_SOC_DAIFMT_I2S: rdai->sys_delay = 0; @@ -1277,6 +1278,18 @@ int rsnd_kctrl_new(struct rsnd_mod *mod, }; int ret; + /* + * 1) Avoid duplicate register for DVC with MIX case + * 2) Allow duplicate register for MIX + * 3) re-register if card was rebinded + */ + list_for_each_entry(kctrl, &card->controls, list) { + struct rsnd_kctrl_cfg *c = kctrl->private_data; + + if (c == cfg) + return 0; + } + if (size > RSND_MAX_CHANNELS) return -EINVAL; diff --git a/sound/soc/sh/rcar/rsnd.h b/sound/soc/sh/rcar/rsnd.h index 1768a0ae469d..c68b31483c7b 100644 --- a/sound/soc/sh/rcar/rsnd.h +++ b/sound/soc/sh/rcar/rsnd.h @@ -432,6 +432,7 @@ struct rsnd_dai_stream { char name[RSND_DAI_NAME_SIZE]; struct snd_pcm_substream *substream; struct rsnd_mod *mod[RSND_MOD_MAX]; + struct rsnd_mod *dma; struct rsnd_dai *rdai; u32 parent_ssi_status; }; diff --git a/sound/soc/sh/rcar/ssi.c b/sound/soc/sh/rcar/ssi.c index 60cc550c5a4c..cae9ed6a0cdb 100644 --- a/sound/soc/sh/rcar/ssi.c +++ b/sound/soc/sh/rcar/ssi.c @@ -66,7 +66,6 @@ struct rsnd_ssi { struct rsnd_mod mod; - struct rsnd_mod *dma; u32 flags; u32 cr_own; @@ -868,7 +867,6 @@ static int rsnd_ssi_dma_probe(struct rsnd_mod *mod, struct rsnd_dai_stream *io, struct rsnd_priv *priv) { - struct rsnd_ssi *ssi = rsnd_mod_to_ssi(mod); int ret; /* @@ -883,7 +881,7 @@ static int rsnd_ssi_dma_probe(struct rsnd_mod *mod, return ret; /* SSI probe might be called many times in MUX multi path */ - ret = rsnd_dma_attach(io, mod, &ssi->dma); + ret = rsnd_dma_attach(io, mod, &io->dma); return ret; } diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 9fbbd8f0b897..aa4d2d3ef9df 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -4481,7 +4481,7 @@ static void soc_dapm_shutdown_dapm(struct snd_soc_dapm_context *dapm) continue; if (w->power) { dapm_seq_insert(w, &down_list, false); - w->power = 0; + w->new_power = 0; powerdown = 1; } } diff --git a/sound/soc/soc-jack.c b/sound/soc/soc-jack.c index 99902ae1a2d9..b04ecc633da3 100644 --- a/sound/soc/soc-jack.c +++ b/sound/soc/soc-jack.c @@ -127,10 +127,9 @@ void snd_soc_jack_report(struct snd_soc_jack *jack, int status, int mask) unsigned int sync = 0; int enable; - trace_snd_soc_jack_report(jack, mask, status); - if (!jack) return; + trace_snd_soc_jack_report(jack, mask, status); dapm = &jack->card->dapm; diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 939b5950c06d..73481d8e9c76 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -48,8 +48,8 @@ static bool snd_soc_dai_stream_valid(struct snd_soc_dai *dai, int stream) else codec_stream = &dai->driver->capture; - /* If the codec specifies any rate at all, it supports the stream. */ - return codec_stream->rates; + /* If the codec specifies any channels at all, it supports the stream */ + return codec_stream->channels_min; } /** @@ -1587,7 +1587,7 @@ static void dpcm_init_runtime_hw(struct snd_pcm_runtime *runtime, u64 formats) { runtime->hw.rate_min = stream->rate_min; - runtime->hw.rate_max = stream->rate_max; + runtime->hw.rate_max = min_not_zero(stream->rate_max, UINT_MAX); runtime->hw.channels_min = stream->channels_min; runtime->hw.channels_max = stream->channels_max; if (runtime->hw.formats) @@ -2132,42 +2132,81 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, } EXPORT_SYMBOL_GPL(dpcm_be_dai_trigger); +static int dpcm_dai_trigger_fe_be(struct snd_pcm_substream *substream, + int cmd, bool fe_first) +{ + struct snd_soc_pcm_runtime *fe = substream->private_data; + int ret; + + /* call trigger on the frontend before the backend. */ + if (fe_first) { + dev_dbg(fe->dev, "ASoC: pre trigger FE %s cmd %d\n", + fe->dai_link->name, cmd); + + ret = soc_pcm_trigger(substream, cmd); + if (ret < 0) + return ret; + + ret = dpcm_be_dai_trigger(fe, substream->stream, cmd); + return ret; + } + + /* call trigger on the frontend after the backend. */ + ret = dpcm_be_dai_trigger(fe, substream->stream, cmd); + if (ret < 0) + return ret; + + dev_dbg(fe->dev, "ASoC: post trigger FE %s cmd %d\n", + fe->dai_link->name, cmd); + + ret = soc_pcm_trigger(substream, cmd); + + return ret; +} + static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) { struct snd_soc_pcm_runtime *fe = substream->private_data; - int stream = substream->stream, ret; + int stream = substream->stream; + int ret = 0; enum snd_soc_dpcm_trigger trigger = fe->dai_link->trigger[stream]; fe->dpcm[stream].runtime_update = SND_SOC_DPCM_UPDATE_FE; switch (trigger) { case SND_SOC_DPCM_TRIGGER_PRE: - /* call trigger on the frontend before the backend. */ - - dev_dbg(fe->dev, "ASoC: pre trigger FE %s cmd %d\n", - fe->dai_link->name, cmd); - - ret = soc_pcm_trigger(substream, cmd); - if (ret < 0) { - dev_err(fe->dev,"ASoC: trigger FE failed %d\n", ret); - goto out; + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + ret = dpcm_dai_trigger_fe_be(substream, cmd, true); + break; + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + ret = dpcm_dai_trigger_fe_be(substream, cmd, false); + break; + default: + ret = -EINVAL; + break; } - - ret = dpcm_be_dai_trigger(fe, substream->stream, cmd); break; case SND_SOC_DPCM_TRIGGER_POST: - /* call trigger on the frontend after the backend. */ - - ret = dpcm_be_dai_trigger(fe, substream->stream, cmd); - if (ret < 0) { - dev_err(fe->dev,"ASoC: trigger FE failed %d\n", ret); - goto out; + switch (cmd) { + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + ret = dpcm_dai_trigger_fe_be(substream, cmd, false); + break; + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + ret = dpcm_dai_trigger_fe_be(substream, cmd, true); + break; + default: + ret = -EINVAL; + break; } - - dev_dbg(fe->dev, "ASoC: post trigger FE %s cmd %d\n", - fe->dai_link->name, cmd); - - ret = soc_pcm_trigger(substream, cmd); break; case SND_SOC_DPCM_TRIGGER_BESPOKE: /* bespoke trigger() - handles both FE and BEs */ @@ -2176,10 +2215,6 @@ static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) fe->dai_link->name, cmd); ret = soc_pcm_bespoke_trigger(substream, cmd); - if (ret < 0) { - dev_err(fe->dev,"ASoC: trigger FE failed %d\n", ret); - goto out; - } break; default: dev_err(fe->dev, "ASoC: invalid trigger cmd %d for %s\n", cmd, @@ -2188,6 +2223,12 @@ static int dpcm_fe_dai_do_trigger(struct snd_pcm_substream *substream, int cmd) goto out; } + if (ret < 0) { + dev_err(fe->dev, "ASoC: trigger FE cmd: %d failed: %d\n", + cmd, ret); + goto out; + } + switch (cmd) { case SNDRV_PCM_TRIGGER_START: case SNDRV_PCM_TRIGGER_RESUME: @@ -2952,16 +2993,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, unsigned long flags; /* FE state */ - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "[%s - %s]\n", fe->dai_link->name, stream ? "Capture" : "Playback"); - offset += snprintf(buf + offset, size - offset, "State: %s\n", + offset += scnprintf(buf + offset, size - offset, "State: %s\n", dpcm_state_string(fe->dpcm[stream].state)); if ((fe->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) && (fe->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP)) - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "Hardware Params: " "Format = %s, Channels = %d, Rate = %d\n", snd_pcm_format_name(params_format(params)), @@ -2969,10 +3010,10 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, params_rate(params)); /* BEs state */ - offset += snprintf(buf + offset, size - offset, "Backends:\n"); + offset += scnprintf(buf + offset, size - offset, "Backends:\n"); if (list_empty(&fe->dpcm[stream].be_clients)) { - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " No active DSP links\n"); goto out; } @@ -2982,16 +3023,16 @@ static ssize_t dpcm_show_state(struct snd_soc_pcm_runtime *fe, struct snd_soc_pcm_runtime *be = dpcm->be; params = &dpcm->hw_params; - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, "- %s\n", be->dai_link->name); - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " State: %s\n", dpcm_state_string(be->dpcm[stream].state)); if ((be->dpcm[stream].state >= SND_SOC_DPCM_STATE_HW_PARAMS) && (be->dpcm[stream].state <= SND_SOC_DPCM_STATE_STOP)) - offset += snprintf(buf + offset, size - offset, + offset += scnprintf(buf + offset, size - offset, " Hardware Params: " "Format = %s, Channels = %d, Rate = %d\n", snd_pcm_format_name(params_format(params)), diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 2d5cf263515b..1a912f72bddd 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -1921,6 +1921,7 @@ static int soc_tplg_pcm_elems_load(struct soc_tplg *tplg, int count = hdr->count; int i; bool abi_match; + int ret; if (tplg->pass != SOC_TPLG_PASS_PCM_DAI) return 0; @@ -1957,7 +1958,12 @@ static int soc_tplg_pcm_elems_load(struct soc_tplg *tplg, } /* create the FE DAIs and DAI links */ - soc_tplg_pcm_create(tplg, _pcm); + ret = soc_tplg_pcm_create(tplg, _pcm); + if (ret < 0) { + if (!abi_match) + kfree(_pcm); + return ret; + } /* offset by version-specific struct size and * real priv data size @@ -2171,8 +2177,11 @@ static int soc_tplg_link_elems_load(struct soc_tplg *tplg, } ret = soc_tplg_link_config(tplg, _link); - if (ret < 0) + if (ret < 0) { + if (!abi_match) + kfree(_link); return ret; + } /* offset by version-specific struct size and * real priv data size @@ -2324,7 +2333,7 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg, { struct snd_soc_tplg_manifest *manifest, *_manifest; bool abi_match; - int err; + int ret = 0; if (tplg->pass != SOC_TPLG_PASS_MANIFEST) return 0; @@ -2337,19 +2346,19 @@ static int soc_tplg_manifest_load(struct soc_tplg *tplg, _manifest = manifest; } else { abi_match = false; - err = manifest_new_ver(tplg, manifest, &_manifest); - if (err < 0) - return err; + ret = manifest_new_ver(tplg, manifest, &_manifest); + if (ret < 0) + return ret; } /* pass control to component driver for optional further init */ if (tplg->comp && tplg->ops && tplg->ops->manifest) - return tplg->ops->manifest(tplg->comp, _manifest); + ret = tplg->ops->manifest(tplg->comp, _manifest); if (!abi_match) /* free the duplicated one */ kfree(_manifest); - return 0; + return ret; } /* validate header magic, size and type */ diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c index d8b6936e544e..908f13623f8c 100644 --- a/sound/soc/sti/uniperif_player.c +++ b/sound/soc/sti/uniperif_player.c @@ -226,7 +226,6 @@ static void uni_player_set_channel_status(struct uniperif *player, * sampling frequency. If no sample rate is already specified, then * set one. */ - mutex_lock(&player->ctrl_lock); if (runtime) { switch (runtime->rate) { case 22050: @@ -303,7 +302,6 @@ static void uni_player_set_channel_status(struct uniperif *player, player->stream_settings.iec958.status[3 + (n * 4)] << 24; SET_UNIPERIF_CHANNEL_STA_REGN(player, n, status); } - mutex_unlock(&player->ctrl_lock); /* Update the channel status */ if (player->ver < SND_ST_UNIPERIF_VERSION_UNI_PLR_TOP_1_0) @@ -365,8 +363,10 @@ static int uni_player_prepare_iec958(struct uniperif *player, SET_UNIPERIF_CTRL_ZERO_STUFF_HW(player); + mutex_lock(&player->ctrl_lock); /* Update the channel status */ uni_player_set_channel_status(player, runtime); + mutex_unlock(&player->ctrl_lock); /* Clear the user validity user bits */ SET_UNIPERIF_USER_VALIDITY_VALIDITY_LR(player, 0); @@ -598,7 +598,6 @@ static int uni_player_ctl_iec958_put(struct snd_kcontrol *kcontrol, iec958->status[1] = ucontrol->value.iec958.status[1]; iec958->status[2] = ucontrol->value.iec958.status[2]; iec958->status[3] = ucontrol->value.iec958.status[3]; - mutex_unlock(&player->ctrl_lock); spin_lock_irqsave(&player->irq_lock, flags); if (player->substream && player->substream->runtime) @@ -608,6 +607,8 @@ static int uni_player_ctl_iec958_put(struct snd_kcontrol *kcontrol, uni_player_set_channel_status(player, NULL); spin_unlock_irqrestore(&player->irq_lock, flags); + mutex_unlock(&player->ctrl_lock); + return 0; } diff --git a/sound/soc/stm/stm32_i2s.c b/sound/soc/stm/stm32_i2s.c index 6d0bf78d114d..aa2b1196171a 100644 --- a/sound/soc/stm/stm32_i2s.c +++ b/sound/soc/stm/stm32_i2s.c @@ -246,8 +246,8 @@ static irqreturn_t stm32_i2s_isr(int irq, void *devid) return IRQ_NONE; } - regmap_update_bits(i2s->regmap, STM32_I2S_IFCR_REG, - I2S_IFCR_MASK, flags); + regmap_write_bits(i2s->regmap, STM32_I2S_IFCR_REG, + I2S_IFCR_MASK, flags); if (flags & I2S_SR_OVR) { dev_dbg(&pdev->dev, "Overrun\n"); @@ -276,7 +276,6 @@ static bool stm32_i2s_readable_reg(struct device *dev, unsigned int reg) case STM32_I2S_CFG2_REG: case STM32_I2S_IER_REG: case STM32_I2S_SR_REG: - case STM32_I2S_IFCR_REG: case STM32_I2S_TXDR_REG: case STM32_I2S_RXDR_REG: case STM32_I2S_CGFR_REG: @@ -488,7 +487,7 @@ static int stm32_i2s_configure(struct snd_soc_dai *cpu_dai, { struct stm32_i2s_data *i2s = snd_soc_dai_get_drvdata(cpu_dai); int format = params_width(params); - u32 cfgr, cfgr_mask, cfg1, cfg1_mask; + u32 cfgr, cfgr_mask, cfg1; unsigned int fthlv; int ret; @@ -501,7 +500,7 @@ static int stm32_i2s_configure(struct snd_soc_dai *cpu_dai, switch (format) { case 16: cfgr = I2S_CGFR_DATLEN_SET(I2S_I2SMOD_DATLEN_16); - cfgr_mask = I2S_CGFR_DATLEN_MASK; + cfgr_mask = I2S_CGFR_DATLEN_MASK | I2S_CGFR_CHLEN; break; case 32: cfgr = I2S_CGFR_DATLEN_SET(I2S_I2SMOD_DATLEN_32) | @@ -529,15 +528,11 @@ static int stm32_i2s_configure(struct snd_soc_dai *cpu_dai, if (ret < 0) return ret; - cfg1 = I2S_CFG1_RXDMAEN | I2S_CFG1_TXDMAEN; - cfg1_mask = cfg1; - fthlv = STM32_I2S_FIFO_SIZE * I2S_FIFO_TH_ONE_QUARTER / 4; - cfg1 |= I2S_CFG1_FTHVL_SET(fthlv - 1); - cfg1_mask |= I2S_CFG1_FTHVL_MASK; + cfg1 = I2S_CFG1_FTHVL_SET(fthlv - 1); return regmap_update_bits(i2s->regmap, STM32_I2S_CFG1_REG, - cfg1_mask, cfg1); + I2S_CFG1_FTHVL_MASK, cfg1); } static int stm32_i2s_startup(struct snd_pcm_substream *substream, @@ -551,8 +546,8 @@ static int stm32_i2s_startup(struct snd_pcm_substream *substream, i2s->refcount++; spin_unlock(&i2s->lock_fd); - return regmap_update_bits(i2s->regmap, STM32_I2S_IFCR_REG, - I2S_IFCR_MASK, I2S_IFCR_MASK); + return regmap_write_bits(i2s->regmap, STM32_I2S_IFCR_REG, + I2S_IFCR_MASK, I2S_IFCR_MASK); } static int stm32_i2s_hw_params(struct snd_pcm_substream *substream, @@ -589,6 +584,10 @@ static int stm32_i2s_trigger(struct snd_pcm_substream *substream, int cmd, /* Enable i2s */ dev_dbg(cpu_dai->dev, "start I2S\n"); + cfg1_mask = I2S_CFG1_RXDMAEN | I2S_CFG1_TXDMAEN; + regmap_update_bits(i2s->regmap, STM32_I2S_CFG1_REG, + cfg1_mask, cfg1_mask); + ret = regmap_update_bits(i2s->regmap, STM32_I2S_CR1_REG, I2S_CR1_SPE, I2S_CR1_SPE); if (ret < 0) { @@ -603,8 +602,8 @@ static int stm32_i2s_trigger(struct snd_pcm_substream *substream, int cmd, return ret; } - regmap_update_bits(i2s->regmap, STM32_I2S_IFCR_REG, - I2S_IFCR_MASK, I2S_IFCR_MASK); + regmap_write_bits(i2s->regmap, STM32_I2S_IFCR_REG, + I2S_IFCR_MASK, I2S_IFCR_MASK); if (playback_flg) { ier = I2S_IER_UDRIE; diff --git a/sound/soc/stm/stm32_spdifrx.c b/sound/soc/stm/stm32_spdifrx.c index 84cc5678beba..7bc57651e186 100644 --- a/sound/soc/stm/stm32_spdifrx.c +++ b/sound/soc/stm/stm32_spdifrx.c @@ -213,6 +213,7 @@ * @slave_config: dma slave channel runtime config pointer * @phys_addr: SPDIFRX registers physical base address * @lock: synchronization enabling lock + * @irq_lock: prevent race condition with IRQ on stream state * @cs: channel status buffer * @ub: user data buffer * @irq: SPDIFRX interrupt line @@ -233,6 +234,7 @@ struct stm32_spdifrx_data { struct dma_slave_config slave_config; dma_addr_t phys_addr; spinlock_t lock; /* Sync enabling lock */ + spinlock_t irq_lock; /* Prevent race condition on stream state */ unsigned char cs[SPDIFRX_CS_BYTES_NB]; unsigned char ub[SPDIFRX_UB_BYTES_NB]; int irq; @@ -313,6 +315,7 @@ static void stm32_spdifrx_dma_ctrl_stop(struct stm32_spdifrx_data *spdifrx) static int stm32_spdifrx_start_sync(struct stm32_spdifrx_data *spdifrx) { int cr, cr_mask, imr, ret; + unsigned long flags; /* Enable IRQs */ imr = SPDIFRX_IMR_IFEIE | SPDIFRX_IMR_SYNCDIE | SPDIFRX_IMR_PERRIE; @@ -320,7 +323,7 @@ static int stm32_spdifrx_start_sync(struct stm32_spdifrx_data *spdifrx) if (ret) return ret; - spin_lock(&spdifrx->lock); + spin_lock_irqsave(&spdifrx->lock, flags); spdifrx->refcount++; @@ -353,7 +356,7 @@ static int stm32_spdifrx_start_sync(struct stm32_spdifrx_data *spdifrx) "Failed to start synchronization\n"); } - spin_unlock(&spdifrx->lock); + spin_unlock_irqrestore(&spdifrx->lock, flags); return ret; } @@ -361,11 +364,12 @@ static int stm32_spdifrx_start_sync(struct stm32_spdifrx_data *spdifrx) static void stm32_spdifrx_stop(struct stm32_spdifrx_data *spdifrx) { int cr, cr_mask, reg; + unsigned long flags; - spin_lock(&spdifrx->lock); + spin_lock_irqsave(&spdifrx->lock, flags); if (--spdifrx->refcount) { - spin_unlock(&spdifrx->lock); + spin_unlock_irqrestore(&spdifrx->lock, flags); return; } @@ -384,7 +388,7 @@ static void stm32_spdifrx_stop(struct stm32_spdifrx_data *spdifrx) regmap_read(spdifrx->regmap, STM32_SPDIFRX_DR, ®); regmap_read(spdifrx->regmap, STM32_SPDIFRX_CSR, ®); - spin_unlock(&spdifrx->lock); + spin_unlock_irqrestore(&spdifrx->lock, flags); } static int stm32_spdifrx_dma_ctrl_register(struct device *dev, @@ -644,7 +648,6 @@ static const struct regmap_config stm32_h7_spdifrx_regmap_conf = { static irqreturn_t stm32_spdifrx_isr(int irq, void *devid) { struct stm32_spdifrx_data *spdifrx = (struct stm32_spdifrx_data *)devid; - struct snd_pcm_substream *substream = spdifrx->substream; struct platform_device *pdev = spdifrx->pdev; unsigned int cr, mask, sr, imr; unsigned int flags; @@ -712,14 +715,19 @@ static irqreturn_t stm32_spdifrx_isr(int irq, void *devid) regmap_update_bits(spdifrx->regmap, STM32_SPDIFRX_CR, SPDIFRX_CR_SPDIFEN_MASK, cr); - if (substream) - snd_pcm_stop(substream, SNDRV_PCM_STATE_DISCONNECTED); + spin_lock(&spdifrx->irq_lock); + if (spdifrx->substream) + snd_pcm_stop(spdifrx->substream, + SNDRV_PCM_STATE_DISCONNECTED); + spin_unlock(&spdifrx->irq_lock); return IRQ_HANDLED; } - if (err_xrun && substream) - snd_pcm_stop_xrun(substream); + spin_lock(&spdifrx->irq_lock); + if (err_xrun && spdifrx->substream) + snd_pcm_stop_xrun(spdifrx->substream); + spin_unlock(&spdifrx->irq_lock); return IRQ_HANDLED; } @@ -728,9 +736,12 @@ static int stm32_spdifrx_startup(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai) { struct stm32_spdifrx_data *spdifrx = snd_soc_dai_get_drvdata(cpu_dai); + unsigned long flags; int ret; + spin_lock_irqsave(&spdifrx->irq_lock, flags); spdifrx->substream = substream; + spin_unlock_irqrestore(&spdifrx->irq_lock, flags); ret = clk_prepare_enable(spdifrx->kclk); if (ret) @@ -802,8 +813,12 @@ static void stm32_spdifrx_shutdown(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai) { struct stm32_spdifrx_data *spdifrx = snd_soc_dai_get_drvdata(cpu_dai); + unsigned long flags; + spin_lock_irqsave(&spdifrx->irq_lock, flags); spdifrx->substream = NULL; + spin_unlock_irqrestore(&spdifrx->irq_lock, flags); + clk_disable_unprepare(spdifrx->kclk); } @@ -908,6 +923,7 @@ static int stm32_spdifrx_probe(struct platform_device *pdev) spdifrx->pdev = pdev; init_completion(&spdifrx->cs_completion); spin_lock_init(&spdifrx->lock); + spin_lock_init(&spdifrx->irq_lock); platform_set_drvdata(pdev, spdifrx); diff --git a/sound/soc/sunxi/sun4i-i2s.c b/sound/soc/sunxi/sun4i-i2s.c index da0a2083e12a..d2802fd8c1dd 100644 --- a/sound/soc/sunxi/sun4i-i2s.c +++ b/sound/soc/sunxi/sun4i-i2s.c @@ -80,8 +80,8 @@ #define SUN4I_I2S_CLK_DIV_MCLK_MASK GENMASK(3, 0) #define SUN4I_I2S_CLK_DIV_MCLK(mclk) ((mclk) << 0) -#define SUN4I_I2S_RX_CNT_REG 0x28 -#define SUN4I_I2S_TX_CNT_REG 0x2c +#define SUN4I_I2S_TX_CNT_REG 0x28 +#define SUN4I_I2S_RX_CNT_REG 0x2c #define SUN4I_I2S_TX_CHAN_SEL_REG 0x30 #define SUN4I_I2S_CHAN_SEL(num_chan) (((num_chan) - 1) << 0) diff --git a/sound/soc/sunxi/sun8i-codec.c b/sound/soc/sunxi/sun8i-codec.c index 7a312168f864..a031f25031b4 100644 --- a/sound/soc/sunxi/sun8i-codec.c +++ b/sound/soc/sunxi/sun8i-codec.c @@ -71,6 +71,7 @@ #define SUN8I_SYS_SR_CTRL_AIF1_FS_MASK GENMASK(15, 12) #define SUN8I_SYS_SR_CTRL_AIF2_FS_MASK GENMASK(11, 8) +#define SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK GENMASK(3, 2) #define SUN8I_AIF1CLK_CTRL_AIF1_WORD_SIZ_MASK GENMASK(5, 4) #define SUN8I_AIF1CLK_CTRL_AIF1_LRCK_DIV_MASK GENMASK(8, 6) #define SUN8I_AIF1CLK_CTRL_AIF1_BCLK_DIV_MASK GENMASK(12, 9) @@ -221,7 +222,7 @@ static int sun8i_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) return -EINVAL; } regmap_update_bits(scodec->regmap, SUN8I_AIF1CLK_CTRL, - BIT(SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT), + SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT_MASK, value << SUN8I_AIF1CLK_CTRL_AIF1_DATA_FMT); return 0; diff --git a/sound/soc/tegra/tegra_sgtl5000.c b/sound/soc/tegra/tegra_sgtl5000.c index 45a4aa9d2a47..901457da25ec 100644 --- a/sound/soc/tegra/tegra_sgtl5000.c +++ b/sound/soc/tegra/tegra_sgtl5000.c @@ -149,14 +149,14 @@ static int tegra_sgtl5000_driver_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Property 'nvidia,i2s-controller' missing/invalid\n"); ret = -EINVAL; - goto err; + goto err_put_codec_of_node; } tegra_sgtl5000_dai.platform_of_node = tegra_sgtl5000_dai.cpu_of_node; ret = tegra_asoc_utils_init(&machine->util_data, &pdev->dev); if (ret) - goto err; + goto err_put_cpu_of_node; ret = snd_soc_register_card(card); if (ret) { @@ -169,6 +169,13 @@ static int tegra_sgtl5000_driver_probe(struct platform_device *pdev) err_fini_utils: tegra_asoc_utils_fini(&machine->util_data); +err_put_cpu_of_node: + of_node_put(tegra_sgtl5000_dai.cpu_of_node); + tegra_sgtl5000_dai.cpu_of_node = NULL; + tegra_sgtl5000_dai.platform_of_node = NULL; +err_put_codec_of_node: + of_node_put(tegra_sgtl5000_dai.codec_of_node); + tegra_sgtl5000_dai.codec_of_node = NULL; err: return ret; } @@ -183,6 +190,12 @@ static int tegra_sgtl5000_driver_remove(struct platform_device *pdev) tegra_asoc_utils_fini(&machine->util_data); + of_node_put(tegra_sgtl5000_dai.cpu_of_node); + tegra_sgtl5000_dai.cpu_of_node = NULL; + tegra_sgtl5000_dai.platform_of_node = NULL; + of_node_put(tegra_sgtl5000_dai.codec_of_node); + tegra_sgtl5000_dai.codec_of_node = NULL; + return ret; } diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 3502ad6201d2..6ea14b347304 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -417,6 +417,9 @@ static void snd_complete_urb(struct urb *urb) } prepare_outbound_urb(ep, ctx); + /* can be stopped during prepare callback */ + if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags))) + goto exit_clear; } else { retire_inbound_urb(ep, ctx); /* can be stopped during retire callback */ diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 5d48f8e74c56..e6e4c3b9d9d3 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1052,7 +1052,8 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval, if (cval->min + cval->res < cval->max) { int last_valid_res = cval->res; int saved, test, check; - get_cur_mix_raw(cval, minchn, &saved); + if (get_cur_mix_raw(cval, minchn, &saved) < 0) + goto no_res_check; for (;;) { test = saved; if (test < cval->max) @@ -1072,6 +1073,7 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval, snd_usb_set_cur_mix_value(cval, minchn, 0, saved); } +no_res_check: cval->initialized = 1; } @@ -2630,7 +2632,9 @@ int snd_usb_create_mixer(struct snd_usb_audio *chip, int ctrlif, (err = snd_usb_mixer_status_create(mixer)) < 0) goto _error; - snd_usb_mixer_apply_create_quirk(mixer); + err = snd_usb_mixer_apply_create_quirk(mixer); + if (err < 0) + goto _error; err = snd_device_new(chip->card, SNDRV_DEV_CODEC, mixer, &dev_ops); if (err < 0) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index d32727c74a16..c892b4d1e733 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3293,19 +3293,14 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), .ifnum = 0, .type = QUIRK_AUDIO_STANDARD_MIXER, }, - /* Capture */ - { - .ifnum = 1, - .type = QUIRK_IGNORE_INTERFACE, - }, /* Playback */ { - .ifnum = 2, + .ifnum = 1, .type = QUIRK_AUDIO_FIXED_ENDPOINT, .data = &(const struct audioformat) { .formats = SNDRV_PCM_FMTBIT_S16_LE, .channels = 2, - .iface = 2, + .iface = 1, .altsetting = 1, .altset_idx = 1, .attributes = UAC_EP_CS_ATTR_FILL_MAX | diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index ad14d6b78bdc..4872c27f6054 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1143,6 +1143,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */ case USB_ID(0x0556, 0x0014): /* Phoenix Audio TMX320VC */ case USB_ID(0x05A3, 0x9420): /* ELP HD USB Camera */ + case USB_ID(0x05a7, 0x1020): /* Bose Companion 5 */ case USB_ID(0x074D, 0x3553): /* Outlaw RR2150 (Micronas UAC3553B) */ case USB_ID(0x1395, 0x740a): /* Sennheiser DECT */ case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */ @@ -1150,6 +1151,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x1de7, 0x0014): /* Phoenix Audio TMX320 */ case USB_ID(0x1de7, 0x0114): /* Phoenix Audio MT202pcs */ case USB_ID(0x21B4, 0x0081): /* AudioQuest DragonFly */ + case USB_ID(0x2912, 0x30c8): /* Audioengine D1 */ return true; } return false; diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c index f4b3cda412fc..e75271e731b2 100644 --- a/sound/usb/usx2y/usX2Yhwdep.c +++ b/sound/usb/usx2y/usX2Yhwdep.c @@ -131,7 +131,7 @@ static int snd_usX2Y_hwdep_dsp_status(struct snd_hwdep *hw, info->num_dsps = 2; // 0: Prepad Data, 1: FPGA Code if (us428->chip_status & USX2Y_STAT_CHIP_INIT) info->chip_ready = 1; - info->version = USX2Y_DRIVER_VERSION; + info->version = USX2Y_DRIVER_VERSION; return 0; } diff --git a/tools/gpio/Build b/tools/gpio/Build index 620c1937d957..4141f35837db 100644 --- a/tools/gpio/Build +++ b/tools/gpio/Build @@ -1,3 +1,4 @@ +gpio-utils-y += gpio-utils.o lsgpio-y += lsgpio.o gpio-utils.o gpio-hammer-y += gpio-hammer.o gpio-utils.o gpio-event-mon-y += gpio-event-mon.o gpio-utils.o diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index 240eda014b37..6a73c06e069c 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -3,7 +3,11 @@ include ../scripts/Makefile.include bindir ?= /usr/bin -ifeq ($(srctree),) +# This will work when gpio is built in tools env. where srctree +# isn't set and when invoked from selftests build, where srctree +# is set to ".". building_out_of_srctree is undefined for in srctree +# builds +ifndef building_out_of_srctree srctree := $(patsubst %/,%,$(dir $(CURDIR))) srctree := $(patsubst %/,%,$(dir $(srctree))) endif @@ -31,11 +35,15 @@ $(OUTPUT)include/linux/gpio.h: ../../include/uapi/linux/gpio.h prepare: $(OUTPUT)include/linux/gpio.h +GPIO_UTILS_IN := $(output)gpio-utils-in.o +$(GPIO_UTILS_IN): prepare FORCE + $(Q)$(MAKE) $(build)=gpio-utils + # # lsgpio # LSGPIO_IN := $(OUTPUT)lsgpio-in.o -$(LSGPIO_IN): prepare FORCE +$(LSGPIO_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o $(Q)$(MAKE) $(build)=lsgpio $(OUTPUT)lsgpio: $(LSGPIO_IN) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ @@ -44,7 +52,7 @@ $(OUTPUT)lsgpio: $(LSGPIO_IN) # gpio-hammer # GPIO_HAMMER_IN := $(OUTPUT)gpio-hammer-in.o -$(GPIO_HAMMER_IN): prepare FORCE +$(GPIO_HAMMER_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o $(Q)$(MAKE) $(build)=gpio-hammer $(OUTPUT)gpio-hammer: $(GPIO_HAMMER_IN) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ @@ -53,7 +61,7 @@ $(OUTPUT)gpio-hammer: $(GPIO_HAMMER_IN) # gpio-event-mon # GPIO_EVENT_MON_IN := $(OUTPUT)gpio-event-mon-in.o -$(GPIO_EVENT_MON_IN): prepare FORCE +$(GPIO_EVENT_MON_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o $(Q)$(MAKE) $(build)=gpio-event-mon $(OUTPUT)gpio-event-mon: $(GPIO_EVENT_MON_IN) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index edfeaba95429..a2681d17a579 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -23,11 +23,13 @@ /* ETMv3.5/PTM's ETMCR config bit */ #define ETM_OPT_CYCACC 12 +#define ETM_OPT_CTXTID 14 #define ETM_OPT_TS 28 #define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ #define ETM4_CFG_BIT_CYCACC 4 +#define ETM4_CFG_BIT_CTXTID 6 #define ETM4_CFG_BIT_TS 11 #define ETM4_CFG_BIT_RETSTK 12 diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index 6c3e2cc274c5..0ec646f127dc 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -14,7 +14,15 @@ int strtobool(const char *s, bool *res); * However uClibc headers also define __GLIBC__ hence the hack below */ #if defined(__GLIBC__) && !defined(__UCLIBC__) +// pragma diagnostic was introduced in gcc 4.6 +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredundant-decls" +#endif extern size_t strlcpy(char *dest, const char *src, size_t size); +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#pragma GCC diagnostic pop +#endif #endif char *str_error_r(int errnum, char *buf, size_t buflen); diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index c0d653d36c0f..fb02aa4591eb 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -261,6 +261,7 @@ class ArchX86(Arch): def __init__(self, exit_reasons): self.sc_perf_evt_open = 298 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = 'exit_reason' self.exit_reasons = exit_reasons @@ -276,6 +277,7 @@ class ArchPPC(Arch): # numbers depend on the wordsize. char_ptr_size = ctypes.sizeof(ctypes.c_char_p) self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 + self.exit_reason_field = 'exit_nr' self.exit_reasons = {} @@ -283,6 +285,7 @@ class ArchA64(Arch): def __init__(self): self.sc_perf_evt_open = 241 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = 'esr_ec' self.exit_reasons = AARCH64_EXIT_REASONS @@ -290,6 +293,7 @@ class ArchS390(Arch): def __init__(self): self.sc_perf_evt_open = 331 self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reason_field = None self.exit_reasons = None ARCH = Arch.get_arch() @@ -513,8 +517,8 @@ class TracepointProvider(Provider): """ filters = {} filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) - if ARCH.exit_reasons: - filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) + if ARCH.exit_reason_field and ARCH.exit_reasons: + filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons) return filters def get_available_fields(self): diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c index b24afc0e6e81..45b50b89009a 100644 --- a/tools/lib/api/fs/fs.c +++ b/tools/lib/api/fs/fs.c @@ -210,6 +210,7 @@ static bool fs__env_override(struct fs *fs) size_t name_len = strlen(fs->name); /* name + "_PATH" + '\0' */ char upper_name[name_len + 5 + 1]; + memcpy(upper_name, fs->name, name_len); mem_toupper(upper_name, name_len); strcpy(&upper_name[name_len], "_PATH"); @@ -219,7 +220,8 @@ static bool fs__env_override(struct fs *fs) return false; fs->found = true; - strncpy(fs->path, override_path, sizeof(fs->path)); + strncpy(fs->path, override_path, sizeof(fs->path) - 1); + fs->path[sizeof(fs->path) - 1] = '\0'; return true; } diff --git a/tools/lib/string.c b/tools/lib/string.c index 93b3d4b6feac..ee0afcbdd696 100644 --- a/tools/lib/string.c +++ b/tools/lib/string.c @@ -95,6 +95,10 @@ int strtobool(const char *s, bool *res) * If libc has strlcpy() then that version will override this * implementation: */ +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wignored-attributes" +#endif size_t __weak strlcpy(char *dest, const char *src, size_t size) { size_t ret = strlen(src); @@ -106,3 +110,6 @@ size_t __weak strlcpy(char *dest, const char *src, size_t size) } return ret; } +#ifdef __clang__ +#pragma clang diagnostic pop +#endif diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index ed61fb3a46c0..5dbb0dde208c 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -20,9 +20,17 @@ MAKEFLAGS += --no-print-directory LIBFILE = $(OUTPUT)libsubcmd.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -fPIC -ifeq ($(CC_NO_CLANG), 0) +ifeq ($(DEBUG),0) + ifeq ($(feature-fortify-source), 1) + CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 + endif +endif + +ifeq ($(DEBUG),1) + CFLAGS += -O0 +else ifeq ($(CC_NO_CLANG), 0) CFLAGS += -O3 else CFLAGS += -O6 diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 8107f060fa84..a0ac01c647f5 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -115,6 +115,7 @@ EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) LIB_TARGET = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION) LIB_INSTALL = libtraceevent.a libtraceevent.so* +LIB_INSTALL := $(addprefix $(OUTPUT),$(LIB_INSTALL)) INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES) diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 5e10ba796a6f..569bceff5f51 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1492,8 +1492,10 @@ static int copy_filter_type(struct event_filter *filter, if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) { /* Add trivial event */ arg = allocate_arg(); - if (arg == NULL) + if (arg == NULL) { + free(str); return -1; + } arg->type = FILTER_ARG_BOOLEAN; if (strcmp(str, "TRUE") == 0) @@ -1502,8 +1504,11 @@ static int copy_filter_type(struct event_filter *filter, arg->boolean.value = 0; filter_type = add_filter_type(filter, event->id); - if (filter_type == NULL) + if (filter_type == NULL) { + free(str); + free_arg(arg); return -1; + } filter_type->filter = arg; diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt index e0b85930dd77..5cb9f009f2be 100644 --- a/tools/objtool/arch/x86/lib/x86-opcode-map.txt +++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt @@ -333,7 +333,7 @@ AVXcode: 1 06: CLTS 07: SYSRET (o64) 08: INVD -09: WBINVD +09: WBINVD | WBNOINVD (F3) 0a: 0b: UD2 (1B) 0c: @@ -364,7 +364,7 @@ AVXcode: 1 # a ModR/M byte. 1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev 1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv -1c: +1c: Grp20 (1A),(1C) 1d: 1e: 1f: NOP Ev @@ -792,6 +792,8 @@ f3: Grp17 (1A) f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) +f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3) +f9: MOVDIRI My,Gy EndTable Table: 3-byte opcode 2 (0x0f 0x3a) @@ -907,7 +909,7 @@ EndTable GrpTable: Grp3_2 0: TEST Ev,Iz -1: +1: TEST Ev,Iz 2: NOT Ev 3: NEG Ev 4: MUL rAX,Ev @@ -943,9 +945,9 @@ GrpTable: Grp6 EndTable GrpTable: Grp7 -0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) -1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) -2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) +0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B) +1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B) +2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv 5: rdpkru (110),(11B) | wrpkru (111),(11B) @@ -1020,7 +1022,7 @@ GrpTable: Grp15 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) 4: XSAVE | ptwrite Ey (F3),(11B) 5: XRSTOR | lfence (11B) -6: XSAVEOPT | clwb (66) | mfence (11B) +6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) 7: clflush | clflushopt (66) | sfence (11B) EndTable @@ -1051,6 +1053,10 @@ GrpTable: Grp19 6: vscatterpf1qps/d Wx (66),(ev) EndTable +GrpTable: Grp20 +0: cldemote Mb +EndTable + # AMD's Prefetch Group GrpTable: GrpP 0: PREFETCH diff --git a/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk index b02a36b2c14f..a42015b305f4 100644 --- a/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk +++ b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk @@ -69,7 +69,7 @@ BEGIN { lprefix1_expr = "\\((66|!F3)\\)" lprefix2_expr = "\\(F3\\)" - lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" + lprefix3_expr = "\\((F2|!F3|66&F2)\\)" lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 @@ -257,7 +257,7 @@ function convert_operands(count,opnd, i,j,imm,mod) return add_flags(imm, mod) } -/^[0-9a-f]+\:/ { +/^[0-9a-f]+:/ { if (NR == 1) next # get index diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c index 9074b477bff0..7002df55826f 100644 --- a/tools/pci/pcitest.c +++ b/tools/pci/pcitest.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -45,15 +44,13 @@ struct pci_test { static int run_test(struct pci_test *test) { - long ret; + int ret = -EINVAL; int fd; - struct timespec start, end; - double time; fd = open(test->device, O_RDWR); if (fd < 0) { perror("can't open PCI Endpoint Test device"); - return fd; + return -ENODEV; } if (test->barnum >= 0 && test->barnum <= 5) { diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 32e64a8a6443..264d458bfe2a 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -528,8 +528,8 @@ tot_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, { struct c2c_hist_entry *c2c_left; struct c2c_hist_entry *c2c_right; - unsigned int tot_hitm_left; - unsigned int tot_hitm_right; + uint64_t tot_hitm_left; + uint64_t tot_hitm_right; c2c_left = container_of(left, struct c2c_hist_entry, he); c2c_right = container_of(right, struct c2c_hist_entry, he); @@ -562,7 +562,8 @@ __f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \ \ c2c_left = container_of(left, struct c2c_hist_entry, he); \ c2c_right = container_of(right, struct c2c_hist_entry, he); \ - return c2c_left->stats.__f - c2c_right->stats.__f; \ + return (uint64_t) c2c_left->stats.__f - \ + (uint64_t) c2c_right->stats.__f; \ } #define STAT_FN(__f) \ @@ -615,7 +616,8 @@ ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, c2c_left = container_of(left, struct c2c_hist_entry, he); c2c_right = container_of(right, struct c2c_hist_entry, he); - return llc_miss(&c2c_left->stats) - llc_miss(&c2c_right->stats); + return (uint64_t) llc_miss(&c2c_left->stats) - + (uint64_t) llc_miss(&c2c_right->stats); } static uint64_t total_records(struct c2c_stats *stats) @@ -2454,6 +2456,7 @@ static int build_cl_output(char *cl_sort, bool no_source) bool add_sym = false; bool add_dso = false; bool add_src = false; + int ret = 0; if (!buf) return -ENOMEM; @@ -2472,7 +2475,8 @@ static int build_cl_output(char *cl_sort, bool no_source) add_dso = true; } else if (strcmp(tok, "offset")) { pr_err("unrecognized sort token: %s\n", tok); - return -EINVAL; + ret = -EINVAL; + goto err; } } @@ -2495,13 +2499,15 @@ static int build_cl_output(char *cl_sort, bool no_source) add_sym ? "symbol," : "", add_dso ? "dso," : "", add_src ? "cl_srcline," : "", - "node") < 0) - return -ENOMEM; + "node") < 0) { + ret = -ENOMEM; + goto err; + } c2c.show_src = add_src; - +err: free(buf); - return 0; + return ret; } static int setup_coalesce(const char *coalesce, bool no_source) diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 9e693ce4b73b..ce786f363476 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -687,6 +687,7 @@ static char *compact_gfp_flags(char *gfp_flags) new = realloc(new_flags, len + strlen(cpt) + 2); if (new == NULL) { free(new_flags); + free(orig_flags); return NULL; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4ddb0726eebc..429c3e140dc3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -342,6 +342,13 @@ static int report__setup_sample_type(struct report *rep) PERF_SAMPLE_BRANCH_ANY)) rep->nonany_branch_mode = true; +#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT) + if (dwarf_callchain_users) { + ui__warning("Please install libunwind or libdw " + "development packages during the perf build.\n"); + } +#endif + return 0; } @@ -735,6 +742,7 @@ int cmd_report(int argc, const char **argv) struct stat st; bool has_br_stack = false; int branch_mode = -1; + int last_key = 0; bool branch_call_mode = false; char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; const char * const report_usage[] = { @@ -1041,7 +1049,8 @@ repeat: else use_browser = 0; - if (setup_sorting(session->evlist) < 0) { + if ((last_key != K_SWITCH_INPUT_DATA) && + (setup_sorting(session->evlist) < 0)) { if (sort_order) parse_options_usage(report_usage, options, "s", 1); if (field_order) @@ -1101,6 +1110,7 @@ repeat: ret = __cmd_report(&report); if (ret == K_SWITCH_INPUT_DATA) { perf_session__delete(session); + last_key = K_SWITCH_INPUT_DATA; goto repeat; } else ret = 0; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 76789523429a..09c4380bc225 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -355,7 +355,7 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, "selected. Hence, no address to lookup the source line number.\n"); return -EINVAL; } - if (PRINT_FIELD(BRSTACKINSN) && + if (PRINT_FIELD(BRSTACKINSN) && !allow_user_set && !(perf_evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) { pr_err("Display of branch stack assembler requested, but non all-branch filter set\n" diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 94a7cabe9b82..6f9f247b4516 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -342,12 +342,12 @@ static struct fixed { const char *name; const char *event; } fixed[] = { - { "inst_retired.any", "event=0xc0" }, - { "inst_retired.any_p", "event=0xc0" }, - { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" }, - { "cpu_clk_unhalted.thread", "event=0x3c" }, - { "cpu_clk_unhalted.core", "event=0x3c" }, - { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" }, + { "inst_retired.any", "event=0xc0,period=2000003" }, + { "inst_retired.any_p", "event=0xc0,period=2000003" }, + { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03,period=2000003" }, + { "cpu_clk_unhalted.thread", "event=0x3c,period=2000003" }, + { "cpu_clk_unhalted.core", "event=0x3c,period=2000003" }, + { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1,period=2000003" }, { NULL, NULL}, }; diff --git a/tools/perf/tests/perf-hooks.c b/tools/perf/tests/perf-hooks.c index a693bcf017ea..44c16fd11bf6 100644 --- a/tools/perf/tests/perf-hooks.c +++ b/tools/perf/tests/perf-hooks.c @@ -20,12 +20,11 @@ static void sigsegv_handler(int sig __maybe_unused) static void the_hook(void *_hook_flags) { int *hook_flags = _hook_flags; - int *p = NULL; *hook_flags = 1234; /* Generate a segfault, test perf_hooks__recover */ - *p = 0; + raise(SIGSEGV); } int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unused) diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 89c8e1604ca7..94fe5464bc6f 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -104,6 +104,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused if (perf_evlist__mmap(evlist, 128, true) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); + err = -1; goto out_delete_evlist; } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 628ad5f7eddb..49a87fb64156 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -3142,6 +3142,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, continue; } + actions->ms.map = map; top = pstack__peek(browser->pstack); if (top == &browser->hists->dso_filter) { /* diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index f5acda13dcfa..289ef63208fb 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -320,21 +320,51 @@ bool die_is_func_def(Dwarf_Die *dw_die) dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL); } +/** + * die_entrypc - Returns entry PC (the lowest address) of a DIE + * @dw_die: a DIE + * @addr: where to store entry PC + * + * Since dwarf_entrypc() does not return entry PC if the DIE has only address + * range, we have to use this to retrieve the lowest address from the address + * range attribute. + */ +int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr) +{ + Dwarf_Addr base, end; + + if (!addr) + return -EINVAL; + + if (dwarf_entrypc(dw_die, addr) == 0) + return 0; + + return dwarf_ranges(dw_die, 0, &base, addr, &end) < 0 ? -ENOENT : 0; +} + /** * die_is_func_instance - Ensure that this DIE is an instance of a subprogram * @dw_die: a DIE * * Ensure that this DIE is an instance (which has an entry address). - * This returns true if @dw_die is a function instance. If not, you need to - * call die_walk_instances() to find actual instances. + * This returns true if @dw_die is a function instance. If not, the @dw_die + * must be a prototype. You can use die_walk_instances() to find actual + * instances. **/ bool die_is_func_instance(Dwarf_Die *dw_die) { Dwarf_Addr tmp; + Dwarf_Attribute attr_mem; + int tag = dwarf_tag(dw_die); - /* Actually gcc optimizes non-inline as like as inlined */ - return !dwarf_func_inline(dw_die) && dwarf_entrypc(dw_die, &tmp) == 0; + if (tag != DW_TAG_subprogram && + tag != DW_TAG_inlined_subroutine) + return false; + + return dwarf_entrypc(dw_die, &tmp) == 0 || + dwarf_attr(dw_die, DW_AT_ranges, &attr_mem) != NULL; } + /** * die_get_data_member_location - Get the data-member offset * @mb_die: a DIE of a member of a data structure @@ -611,6 +641,9 @@ static int __die_walk_instances_cb(Dwarf_Die *inst, void *data) Dwarf_Die *origin; int tmp; + if (!die_is_func_instance(inst)) + return DIE_FIND_CB_CONTINUE; + attr = dwarf_attr(inst, DW_AT_abstract_origin, &attr_mem); if (attr == NULL) return DIE_FIND_CB_CONTINUE; @@ -682,15 +715,14 @@ static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data) if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) { fname = die_get_call_file(in_die); lineno = die_get_call_lineno(in_die); - if (fname && lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) { + if (fname && lineno > 0 && die_entrypc(in_die, &addr) == 0) { lw->retval = lw->callback(fname, lineno, addr, lw->data); if (lw->retval != 0) return DIE_FIND_CB_END; } + if (!lw->recursive) + return DIE_FIND_CB_SIBLING; } - if (!lw->recursive) - /* Don't need to search recursively */ - return DIE_FIND_CB_SIBLING; if (addr) { fname = dwarf_decl_file(in_die); @@ -723,7 +755,7 @@ static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive, /* Handle function declaration line */ fname = dwarf_decl_file(sp_die); if (fname && dwarf_decl_line(sp_die, &lineno) == 0 && - dwarf_entrypc(sp_die, &addr) == 0) { + die_entrypc(sp_die, &addr) == 0) { lw.retval = callback(fname, lineno, addr, data); if (lw.retval != 0) goto done; @@ -737,6 +769,10 @@ static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data) { struct __line_walk_param *lw = data; + /* + * Since inlined function can include another inlined function in + * the same file, we need to walk in it recursively. + */ lw->retval = __die_walk_funclines(sp_die, true, lw->callback, lw->data); if (lw->retval != 0) return DWARF_CB_ABORT; @@ -761,11 +797,12 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) Dwarf_Lines *lines; Dwarf_Line *line; Dwarf_Addr addr; - const char *fname, *decf = NULL; + const char *fname, *decf = NULL, *inf = NULL; int lineno, ret = 0; int decl = 0, inl; Dwarf_Die die_mem, *cu_die; size_t nlines, i; + bool flag; /* Get the CU die */ if (dwarf_tag(rt_die) != DW_TAG_compile_unit) { @@ -796,6 +833,12 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) "Possible error in debuginfo.\n"); continue; } + /* Skip end-of-sequence */ + if (dwarf_lineendsequence(line, &flag) != 0 || flag) + continue; + /* Skip Non statement line-info */ + if (dwarf_linebeginstatement(line, &flag) != 0 || !flag) + continue; /* Filter lines based on address */ if (rt_die != cu_die) { /* @@ -805,13 +848,21 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) */ if (!dwarf_haspc(rt_die, addr)) continue; + if (die_find_inlinefunc(rt_die, addr, &die_mem)) { + /* Call-site check */ + inf = die_get_call_file(&die_mem); + if ((inf && !strcmp(inf, decf)) && + die_get_call_lineno(&die_mem) == lineno) + goto found; + dwarf_decl_line(&die_mem, &inl); if (inl != decl || decf != dwarf_decl_file(&die_mem)) continue; } } +found: /* Get source line */ fname = dwarf_linesrc(line, NULL, NULL); @@ -826,8 +877,9 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) */ if (rt_die != cu_die) /* - * Don't need walk functions recursively, because nested - * inlined functions don't have lines of the specified DIE. + * Don't need walk inlined functions recursively, because + * inner inlined functions don't have the lines of the + * specified function. */ ret = __die_walk_funclines(rt_die, false, callback, data); else { @@ -1002,7 +1054,7 @@ static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die, bool first = true; const char *name; - ret = dwarf_entrypc(sp_die, &entry); + ret = die_entrypc(sp_die, &entry); if (ret) return ret; @@ -1065,7 +1117,7 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) bool first = true; const char *name; - ret = dwarf_entrypc(sp_die, &entry); + ret = die_entrypc(sp_die, &entry); if (ret) return ret; diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 8ac53bf1ec4e..ee15fac4e1d0 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -41,6 +41,9 @@ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, /* Get DW_AT_linkage_name (should be NULL for C binary) */ const char *die_get_linkage_name(Dwarf_Die *dw_die); +/* Get the lowest PC in DIE (including range list) */ +int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr); + /* Ensure that this DIE is a subprogram and definition (not declaration) */ bool die_is_func_def(Dwarf_Die *dw_die); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 5b8bc1fd943d..c1f9615b02f7 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1504,7 +1504,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) return 0; } -static int hist_entry__sort(struct hist_entry *a, struct hist_entry *b) +static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b) { struct hists *hists = a->hists; struct perf_hpp_fmt *fmt; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index b99d68943f25..595f91f46811 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -317,10 +317,10 @@ static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format) list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list) #define hists__for_each_format(hists, format) \ - perf_hpp_list__for_each_format((hists)->hpp_list, fmt) + perf_hpp_list__for_each_format((hists)->hpp_list, format) #define hists__for_each_sort_list(hists, format) \ - perf_hpp_list__for_each_sort_list((hists)->hpp_list, fmt) + perf_hpp_list__for_each_sort_list((hists)->hpp_list, format) extern struct perf_hpp_fmt perf_hpp__format[]; diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 4e7bd2750122..63db9872c880 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include "symbol.h" +#include #include #include #include @@ -737,6 +738,8 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp } after->start = map->end; + after->pgoff += map->end - pos->start; + assert(pos->map_ip(pos, map->end) == after->map_ip(after, map->end)); __map_groups__insert(pos->groups, after); if (verbose >= 2 && !use_browser) map__fprintf(after, fp); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 29e2bb304168..096c52f296d7 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1253,8 +1253,15 @@ static int __parse_events_add_pmu(struct parse_events_state *parse_state, if (get_config_terms(head_config, &config_terms)) return -ENOMEM; - if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) + if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { + struct perf_evsel_config_term *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, &config_terms, list) { + list_del_init(&pos->list); + free(pos); + } return -EINVAL; + } evsel = __add_event(list, &parse_state->idx, &attr, get_config_name(head_config), pmu, diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index c9319f8d17a6..f732e3af2bd4 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -34,7 +34,7 @@ int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); static inline const char *perf_reg_name(int id __maybe_unused) { - return NULL; + return "unknown"; } static inline int perf_reg_value(u64 *valp __maybe_unused, diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index a5731de0e5eb..893193bd28c1 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -615,38 +615,26 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod, const char *function, struct probe_trace_point *tp) { - Dwarf_Addr eaddr, highaddr; + Dwarf_Addr eaddr; GElf_Sym sym; const char *symbol; /* Verify the address is correct */ - if (dwarf_entrypc(sp_die, &eaddr) != 0) { - pr_warning("Failed to get entry address of %s\n", - dwarf_diename(sp_die)); - return -ENOENT; - } - if (dwarf_highpc(sp_die, &highaddr) != 0) { - pr_warning("Failed to get end address of %s\n", - dwarf_diename(sp_die)); - return -ENOENT; - } - if (paddr > highaddr) { - pr_warning("Offset specified is greater than size of %s\n", + if (!dwarf_haspc(sp_die, paddr)) { + pr_warning("Specified offset is out of %s\n", dwarf_diename(sp_die)); return -EINVAL; } - symbol = dwarf_diename(sp_die); + /* Try to get actual symbol name from symtab */ + symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); if (!symbol) { - /* Try to get the symbol name from symtab */ - symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL); - if (!symbol) { - pr_warning("Failed to find symbol at 0x%lx\n", - (unsigned long)paddr); - return -ENOENT; - } - eaddr = sym.st_value; + pr_warning("Failed to find symbol at 0x%lx\n", + (unsigned long)paddr); + return -ENOENT; } + eaddr = sym.st_value; + tp->offset = (unsigned long)(paddr - eaddr); tp->address = (unsigned long)paddr; tp->symbol = strdup(symbol); @@ -767,6 +755,16 @@ static int find_best_scope_cb(Dwarf_Die *fn_die, void *data) return 0; } +/* Return innermost DIE */ +static int find_inner_scope_cb(Dwarf_Die *fn_die, void *data) +{ + struct find_scope_param *fsp = data; + + memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die)); + fsp->found = true; + return 1; +} + /* Find an appropriate scope fits to given conditions */ static Dwarf_Die *find_best_scope(struct probe_finder *pf, Dwarf_Die *die_mem) { @@ -778,8 +776,13 @@ static Dwarf_Die *find_best_scope(struct probe_finder *pf, Dwarf_Die *die_mem) .die_mem = die_mem, .found = false, }; + int ret; - cu_walk_functions_at(&pf->cu_die, pf->addr, find_best_scope_cb, &fsp); + ret = cu_walk_functions_at(&pf->cu_die, pf->addr, find_best_scope_cb, + &fsp); + if (!ret && !fsp.found) + cu_walk_functions_at(&pf->cu_die, pf->addr, + find_inner_scope_cb, &fsp); return fsp.found ? die_mem : NULL; } @@ -953,7 +956,7 @@ static int probe_point_inline_cb(Dwarf_Die *in_die, void *data) ret = find_probe_point_lazy(in_die, pf); else { /* Get probe address */ - if (dwarf_entrypc(in_die, &addr) != 0) { + if (die_entrypc(in_die, &addr) != 0) { pr_warning("Failed to get entry address of %s.\n", dwarf_diename(in_die)); return -ENOENT; @@ -1005,7 +1008,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) param->retval = find_probe_point_by_line(pf); } else if (die_is_func_instance(sp_die)) { /* Instances always have the entry address */ - dwarf_entrypc(sp_die, &pf->addr); + die_entrypc(sp_die, &pf->addr); /* But in some case the entry address is 0 */ if (pf->addr == 0) { pr_debug("%s has no entry PC. Skipped\n", @@ -1417,6 +1420,18 @@ error: return DIE_FIND_CB_END; } +static bool available_var_finder_overlap(struct available_var_finder *af) +{ + int i; + + for (i = 0; i < af->nvls; i++) { + if (af->pf.addr == af->vls[i].point.address) + return true; + } + return false; + +} + /* Add a found vars into available variables list */ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) { @@ -1427,6 +1442,14 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) Dwarf_Die die_mem; int ret; + /* + * For some reason (e.g. different column assigned to same address), + * this callback can be called with the address which already passed. + * Ignore it first. + */ + if (available_var_finder_overlap(af)) + return 0; + /* Check number of tevs */ if (af->nvls == af->max_vls) { pr_warning("Too many( > %d) probe point found.\n", af->max_vls); @@ -1570,7 +1593,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, /* Get function entry information */ func = basefunc = dwarf_diename(&spdie); if (!func || - dwarf_entrypc(&spdie, &baseaddr) != 0 || + die_entrypc(&spdie, &baseaddr) != 0 || dwarf_decl_line(&spdie, &baseline) != 0) { lineno = 0; goto post; @@ -1587,7 +1610,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, while (die_find_top_inlinefunc(&spdie, (Dwarf_Addr)addr, &indie)) { /* There is an inline function */ - if (dwarf_entrypc(&indie, &_addr) == 0 && + if (die_entrypc(&indie, &_addr) == 0 && _addr == addr) { /* * addr is at an inline function entry. diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c index 9005fbe0780e..23092fd6451d 100644 --- a/tools/perf/util/strbuf.c +++ b/tools/perf/util/strbuf.c @@ -109,7 +109,6 @@ static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap) return ret; } len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved); - va_end(ap_saved); if (len > strbuf_avail(sb)) { pr_debug("this should not happen, your vsnprintf is broken"); va_end(ap_saved); diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config index f304be71c278..fc116c060b98 100644 --- a/tools/power/acpi/Makefile.config +++ b/tools/power/acpi/Makefile.config @@ -18,7 +18,7 @@ include $(srctree)/../../scripts/Makefile.include OUTPUT=$(srctree)/ ifeq ("$(origin O)", "command line") - OUTPUT := $(O)/power/acpi/ + OUTPUT := $(O)/tools/power/acpi/ endif #$(info Determined 'OUTPUT' to be $(OUTPUT)) diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index 943b6b614683..bed0794e3295 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -139,7 +139,7 @@ static int ap_insert_action(char *argument, u32 to_be_done) current_action++; if (current_action > AP_MAX_ACTIONS) { - fprintf(stderr, "Too many table options (max %u)\n", + fprintf(stderr, "Too many table options (max %d)\n", AP_MAX_ACTIONS); return (-1); } diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c index f794d6bbb7e9..3e4ff4a1cdf4 100644 --- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c +++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c @@ -40,7 +40,6 @@ static cstate_t hsw_ext_cstates[HSW_EXT_CSTATE_COUNT] = { { .name = "PC9", .desc = N_("Processor Package C9"), - .desc = N_("Processor Package C2"), .id = PC9, .range = RANGE_PACKAGE, .get_count_percent = hsw_ext_get_count_percent, diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 0c8b61f8398e..3bdd6a463819 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -1345,7 +1345,7 @@ sub reboot { } else { # Make sure everything has been written to disk - run_ssh("sync"); + run_ssh("sync", 10); if (defined($time)) { start_monitor; diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 8e61aad0ca3f..07cec1b5a0d8 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -177,6 +177,57 @@ void idr_get_next_test(void) idr_destroy(&idr); } +static inline void *idr_mk_value(unsigned long v) +{ + BUG_ON((long)v < 0); + return (void *)((v & 1) | 2 | (v << 1)); +} + +DEFINE_IDR(find_idr); + +static void *idr_throbber(void *arg) +{ + time_t start = time(NULL); + int id = *(int *)arg; + + rcu_register_thread(); + do { + idr_alloc(&find_idr, idr_mk_value(id), id, id + 1, GFP_KERNEL); + idr_remove(&find_idr, id); + } while (time(NULL) < start + 10); + rcu_unregister_thread(); + + return NULL; +} + +void idr_find_test_1(int anchor_id, int throbber_id) +{ + pthread_t throbber; + time_t start = time(NULL); + + pthread_create(&throbber, NULL, idr_throbber, &throbber_id); + + BUG_ON(idr_alloc(&find_idr, idr_mk_value(anchor_id), anchor_id, + anchor_id + 1, GFP_KERNEL) != anchor_id); + + do { + int id = 0; + void *entry = idr_get_next(&find_idr, &id); + BUG_ON(entry != idr_mk_value(id)); + } while (time(NULL) < start + 11); + + pthread_join(throbber, NULL); + + idr_remove(&find_idr, anchor_id); + BUG_ON(!idr_is_empty(&find_idr)); +} + +void idr_find_test(void) +{ + idr_find_test_1(100000, 0); + idr_find_test_1(0, 100000); +} + void idr_checks(void) { unsigned long i; @@ -234,6 +285,7 @@ void idr_checks(void) idr_null_test(); idr_nowait_test(); idr_get_next_test(); + idr_find_test(); } /* diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 913539aea645..9babb3fef8e2 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -7281,7 +7281,7 @@ static struct bpf_test tests[] = { offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .errstr = "dereference of modified ctx ptr R1 off=68+8, ctx+const is allowed, ctx+const+const is not", + .errstr = "dereference of modified ctx ptr", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, @@ -7944,6 +7944,62 @@ static struct bpf_test tests[] = { .errstr = "BPF_XADD stores into R2 packet", .prog_type = BPF_PROG_TYPE_XDP, }, + { + "pass unmodified ctx pointer to helper", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_update), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "pass modified ctx pointer to helper, 1", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_update), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "dereference of modified ctx ptr", + }, + { + "pass modified ctx pointer to helper, 2", + .insns = { + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_get_socket_cookie), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result_unpriv = REJECT, + .result = REJECT, + .errstr_unpriv = "dereference of modified ctx ptr", + .errstr = "dereference of modified ctx ptr", + }, + { + "pass modified ctx pointer to helper, 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_csum_update), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "variable ctx access var_off=(0x0; 0x4)", + }, }; static int probe_filter_length(const struct bpf_insn *fp) diff --git a/tools/testing/selftests/filesystems/incfs/.gitignore b/tools/testing/selftests/filesystems/incfs/.gitignore new file mode 100644 index 000000000000..4cba9c219a92 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/.gitignore @@ -0,0 +1 @@ +incfs_test \ No newline at end of file diff --git a/tools/testing/selftests/filesystems/incfs/Makefile b/tools/testing/selftests/filesystems/incfs/Makefile new file mode 100644 index 000000000000..1f13573d3617 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -lssl -lcrypto -llz4 +CFLAGS += -I../../../../../usr/include/ +CFLAGS += -I../../../../include/uapi/ +CFLAGS += -I../../../../lib + +EXTRA_SOURCES := utils.c +CFLAGS += $(EXTRA_SOURCES) + +TEST_GEN_PROGS := incfs_test + +include ../../lib.mk + +$(OUTPUT)incfs_test: incfs_test.c $(EXTRA_SOURCES) +all: $(OUTPUT)incfs_test + +clean: + rm -rf $(OUTPUT)incfs_test *.o diff --git a/tools/testing/selftests/filesystems/incfs/config b/tools/testing/selftests/filesystems/incfs/config new file mode 100644 index 000000000000..b6749837a318 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/config @@ -0,0 +1 @@ +CONFIG_INCREMENTAL_FS=y \ No newline at end of file diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c new file mode 100644 index 000000000000..7031561c0173 --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -0,0 +1,2193 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../kselftest.h" + +#include "lz4.h" +#include "utils.h" + +#define TEST_FAILURE 1 +#define TEST_SUCCESS 0 +#define INCFS_MAX_MTREE_LEVELS 8 + +#define INCFS_ROOT_INODE 0 + +struct hash_block { + char data[INCFS_DATA_FILE_BLOCK_SIZE]; +}; + +struct test_signature { + void *data; + size_t size; + + char add_data[100]; + size_t add_data_size; +}; + +struct test_file { + int index; + incfs_uuid_t id; + char *name; + off_t size; + char root_hash[INCFS_MAX_HASH_SIZE]; + struct hash_block *mtree; + int mtree_block_count; + struct test_signature sig; +}; + +struct test_files_set { + struct test_file *files; + int files_count; +}; + +struct linux_dirent64 { + uint64_t d_ino; + int64_t d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[0]; +} __packed; + +struct test_files_set get_test_files_set(void) +{ + static struct test_file files[] = { + { .index = 0, .name = "file_one_byte", .size = 1 }, + { .index = 1, + .name = "file_one_block", + .size = INCFS_DATA_FILE_BLOCK_SIZE }, + { .index = 2, + .name = "file_one_and_a_half_blocks", + .size = INCFS_DATA_FILE_BLOCK_SIZE + + INCFS_DATA_FILE_BLOCK_SIZE / 2 }, + { .index = 3, + .name = "file_three", + .size = 300 * INCFS_DATA_FILE_BLOCK_SIZE + 3 }, + { .index = 4, + .name = "file_four", + .size = 400 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 5, + .name = "file_five", + .size = 500 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 6, + .name = "file_six", + .size = 600 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 7, + .name = "file_seven", + .size = 700 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 8, + .name = "file_eight", + .size = 800 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 9, + .name = "file_nine", + .size = 900 * INCFS_DATA_FILE_BLOCK_SIZE + 7 }, + { .index = 10, .name = "file_big", .size = 500 * 1024 * 1024 } + }; + return (struct test_files_set){ .files = files, + .files_count = ARRAY_SIZE(files) }; +} + +struct test_files_set get_small_test_files_set(void) +{ + static struct test_file files[] = { + { .index = 0, .name = "file_one_byte", .size = 1 }, + { .index = 1, + .name = "file_one_block", + .size = INCFS_DATA_FILE_BLOCK_SIZE }, + { .index = 2, + .name = "file_one_and_a_half_blocks", + .size = INCFS_DATA_FILE_BLOCK_SIZE + + INCFS_DATA_FILE_BLOCK_SIZE / 2 }, + { .index = 3, + .name = "file_three", + .size = 300 * INCFS_DATA_FILE_BLOCK_SIZE + 3 }, + { .index = 4, + .name = "file_four", + .size = 400 * INCFS_DATA_FILE_BLOCK_SIZE + 7 } + }; + return (struct test_files_set){ .files = files, + .files_count = ARRAY_SIZE(files) }; +} + +static int get_file_block_seed(int file, int block) +{ + return 7919 * file + block; +} + +static loff_t min(loff_t a, loff_t b) +{ + return a < b ? a : b; +} + +static pid_t flush_and_fork(void) +{ + fflush(stdout); + return fork(); +} + +static void print_error(char *msg) +{ + ksft_print_msg("%s: %s\n", msg, strerror(errno)); +} + +static int wait_for_process(pid_t pid) +{ + int status; + int wait_res; + + wait_res = waitpid(pid, &status, 0); + if (wait_res <= 0) { + print_error("Can't wait for the child"); + return -EINVAL; + } + if (!WIFEXITED(status)) { + ksft_print_msg("Unexpected child status pid=%d\n", pid); + return -EINVAL; + } + status = WEXITSTATUS(status); + if (status != 0) + return status; + return 0; +} + +static void rnd_buf(uint8_t *data, size_t len, unsigned int seed) +{ + int i; + + for (i = 0; i < len; i++) { + seed = 1103515245 * seed + 12345; + data[i] = (uint8_t)(seed >> (i % 13)); + } +} + +char *bin2hex(char *dst, const void *src, size_t count) +{ + const unsigned char *_src = src; + static const char hex_asc[] = "0123456789abcdef"; + + while (count--) { + unsigned char x = *_src++; + + *dst++ = hex_asc[(x & 0xf0) >> 4]; + *dst++ = hex_asc[(x & 0x0f)]; + } + *dst = 0; + return dst; +} + +static char *get_index_filename(const char *mnt_dir, incfs_uuid_t id) +{ + char path[FILENAME_MAX]; + char str_id[1 + 2 * sizeof(id)]; + + bin2hex(str_id, id.bytes, sizeof(id.bytes)); + snprintf(path, ARRAY_SIZE(path), "%s/.index/%s", mnt_dir, str_id); + + return strdup(path); +} + +int open_file_by_id(const char *mnt_dir, incfs_uuid_t id, bool use_ioctl) +{ + char *path = get_index_filename(mnt_dir, id); + int cmd_fd = open_commands_file(mnt_dir); + int fd = open(path, O_RDWR); + struct incfs_permit_fill permit_fill = { + .file_descriptor = fd, + }; + int error = 0; + + if (fd < 0) { + print_error("Can't open file by id."); + error = -errno; + goto out; + } + + if (use_ioctl && ioctl(cmd_fd, INCFS_IOC_PERMIT_FILL, &permit_fill)) { + print_error("Failed to call PERMIT_FILL"); + error = -errno; + goto out; + } + + if (ioctl(fd, INCFS_IOC_PERMIT_FILL, &permit_fill) != -1 || + errno != EPERM) { + print_error( + "Successfully called PERMIT_FILL on non pending_read file"); + return -errno; + goto out; + } + +out: + free(path); + close(cmd_fd); + + if (error) { + close(fd); + return error; + } + + return fd; +} + +int get_file_attr(char *mnt_dir, incfs_uuid_t id, char *value, int size) +{ + char *path = get_index_filename(mnt_dir, id); + int res; + + res = getxattr(path, INCFS_XATTR_METADATA_NAME, value, size); + if (res < 0) + res = -errno; + + free(path); + return res; +} + +static bool same_id(incfs_uuid_t *id1, incfs_uuid_t *id2) +{ + return !memcmp(id1->bytes, id2->bytes, sizeof(id1->bytes)); +} + +static int emit_test_blocks(char *mnt_dir, struct test_file *file, + int blocks[], int count) +{ + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + uint8_t comp_data[2 * INCFS_DATA_FILE_BLOCK_SIZE]; + int block_count = (count > 32) ? 32 : count; + int data_buf_size = 2 * INCFS_DATA_FILE_BLOCK_SIZE * block_count; + uint8_t *data_buf = malloc(data_buf_size); + uint8_t *current_data = data_buf; + uint8_t *data_end = data_buf + data_buf_size; + struct incfs_fill_block *block_buf = + calloc(block_count, sizeof(struct incfs_fill_block)); + struct incfs_fill_blocks fill_blocks = { + .count = block_count, + .fill_blocks = ptr_to_u64(block_buf), + }; + ssize_t write_res = 0; + int fd; + int error = 0; + int i = 0; + int blocks_written = 0; + + for (i = 0; i < block_count; i++) { + int block_index = blocks[i]; + bool compress = (file->index + block_index) % 2 == 0; + int seed = get_file_block_seed(file->index, block_index); + off_t block_offset = + ((off_t)block_index) * INCFS_DATA_FILE_BLOCK_SIZE; + size_t block_size = 0; + + if (block_offset > file->size) { + error = -EINVAL; + break; + } + if (file->size - block_offset > + INCFS_DATA_FILE_BLOCK_SIZE) + block_size = INCFS_DATA_FILE_BLOCK_SIZE; + else + block_size = file->size - block_offset; + + rnd_buf(data, block_size, seed); + if (compress) { + size_t comp_size = LZ4_compress_default( + (char *)data, (char *)comp_data, block_size, + ARRAY_SIZE(comp_data)); + + if (comp_size <= 0) { + error = -EBADMSG; + break; + } + if (current_data + comp_size > data_end) { + error = -ENOMEM; + break; + } + memcpy(current_data, comp_data, comp_size); + block_size = comp_size; + block_buf[i].compression = COMPRESSION_LZ4; + } else { + if (current_data + block_size > data_end) { + error = -ENOMEM; + break; + } + memcpy(current_data, data, block_size); + block_buf[i].compression = COMPRESSION_NONE; + } + + block_buf[i].block_index = block_index; + block_buf[i].data_len = block_size; + block_buf[i].data = ptr_to_u64(current_data); + current_data += block_size; + } + + if (!error) { + fd = open_file_by_id(mnt_dir, file->id, false); + if (fd < 0) { + error = -errno; + goto out; + } + write_res = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + if (write_res >= 0) { + ksft_print_msg("Wrote to file via normal fd error\n"); + error = -EPERM; + goto out; + } + + close(fd); + fd = open_file_by_id(mnt_dir, file->id, true); + if (fd < 0) { + error = -errno; + goto out; + } + write_res = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + if (write_res < 0) + error = -errno; + else + blocks_written = write_res; + } + if (error) { + ksft_print_msg( + "Writing data block error. Write returned: %d. Error:%s\n", + write_res, strerror(-error)); + } + +out: + free(block_buf); + free(data_buf); + close(fd); + return (error < 0) ? error : blocks_written; +} + +static int emit_test_block(char *mnt_dir, struct test_file *file, + int block_index) +{ + int res = emit_test_blocks(mnt_dir, file, &block_index, 1); + + if (res == 0) + return -EINVAL; + if (res == 1) + return 0; + return res; +} + +static void shuffle(int array[], int count, unsigned int seed) +{ + int i; + + for (i = 0; i < count - 1; i++) { + int items_left = count - i; + int shuffle_index; + int v; + + seed = 1103515245 * seed + 12345; + shuffle_index = i + seed % items_left; + + v = array[shuffle_index]; + array[shuffle_index] = array[i]; + array[i] = v; + } +} + +static int emit_test_file_data(char *mount_dir, struct test_file *file) +{ + int i; + int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int *block_indexes = NULL; + int result = 0; + int blocks_written = 0; + + if (file->size == 0) + return 0; + + block_indexes = calloc(block_cnt, sizeof(*block_indexes)); + for (i = 0; i < block_cnt; i++) + block_indexes[i] = i; + shuffle(block_indexes, block_cnt, file->index); + + for (i = 0; i < block_cnt; i += blocks_written) { + blocks_written = emit_test_blocks(mount_dir, file, + block_indexes + i, block_cnt - i); + if (blocks_written < 0) { + result = blocks_written; + goto out; + } + if (blocks_written == 0) { + result = -EIO; + goto out; + } + } +out: + free(block_indexes); + return result; +} + +static loff_t read_whole_file(char *filename) +{ + int fd = -1; + loff_t result; + loff_t bytes_read = 0; + uint8_t buff[16 * 1024]; + + fd = open(filename, O_RDONLY); + if (fd <= 0) + return fd; + + while (1) { + int read_result = read(fd, buff, ARRAY_SIZE(buff)); + + if (read_result < 0) { + print_error("Error during reading from a file."); + result = -errno; + goto cleanup; + } else if (read_result == 0) + break; + + bytes_read += read_result; + } + result = bytes_read; + +cleanup: + close(fd); + return result; +} + +static int read_test_file(uint8_t *buf, size_t len, char *filename, + int block_idx) +{ + int fd = -1; + int result; + int bytes_read = 0; + size_t bytes_to_read = len; + off_t offset = ((off_t)block_idx) * INCFS_DATA_FILE_BLOCK_SIZE; + + fd = open(filename, O_RDONLY); + if (fd <= 0) + return fd; + + if (lseek(fd, offset, SEEK_SET) != offset) { + print_error("Seek error"); + return -errno; + } + + while (bytes_read < bytes_to_read) { + int read_result = + read(fd, buf + bytes_read, bytes_to_read - bytes_read); + if (read_result < 0) { + result = -errno; + goto cleanup; + } else if (read_result == 0) + break; + + bytes_read += read_result; + } + result = bytes_read; + +cleanup: + close(fd); + return result; +} + +static char *create_backing_dir(char *mount_dir) +{ + struct stat st; + char backing_dir_name[255]; + + snprintf(backing_dir_name, ARRAY_SIZE(backing_dir_name), "%s-src", + mount_dir); + + if (stat(backing_dir_name, &st) == 0) { + if (S_ISDIR(st.st_mode)) { + int error = delete_dir_tree(backing_dir_name); + + if (error) { + ksft_print_msg( + "Can't delete existing backing dir. %d\n", + error); + return NULL; + } + } else { + if (unlink(backing_dir_name)) { + print_error("Can't clear backing dir"); + return NULL; + } + } + } + + if (mkdir(backing_dir_name, 0777)) { + if (errno != EEXIST) { + print_error("Can't open/create backing dir"); + return NULL; + } + } + + return strdup(backing_dir_name); +} + +static int validate_test_file_content_with_seed(char *mount_dir, + struct test_file *file, + unsigned int shuffle_seed) +{ + int error = -1; + char *filename = concat_file_name(mount_dir, file->name); + off_t size = file->size; + loff_t actual_size = get_file_size(filename); + int block_cnt = 1 + (size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int *block_indexes = NULL; + int i; + + block_indexes = alloca(sizeof(int) * block_cnt); + for (i = 0; i < block_cnt; i++) + block_indexes[i] = i; + + if (shuffle_seed != 0) + shuffle(block_indexes, block_cnt, shuffle_seed); + + if (actual_size != size) { + ksft_print_msg( + "File size doesn't match. name: %s expected size:%ld actual size:%ld\n", + filename, size, actual_size); + error = -1; + goto failure; + } + + for (i = 0; i < block_cnt; i++) { + int block_idx = block_indexes[i]; + uint8_t expected_block[INCFS_DATA_FILE_BLOCK_SIZE]; + uint8_t actual_block[INCFS_DATA_FILE_BLOCK_SIZE]; + int seed = get_file_block_seed(file->index, block_idx); + size_t bytes_to_compare = min( + (off_t)INCFS_DATA_FILE_BLOCK_SIZE, + size - ((off_t)block_idx) * INCFS_DATA_FILE_BLOCK_SIZE); + int read_result = + read_test_file(actual_block, INCFS_DATA_FILE_BLOCK_SIZE, + filename, block_idx); + if (read_result < 0) { + ksft_print_msg( + "Error reading block %d from file %s. Error: %s\n", + block_idx, filename, strerror(-read_result)); + error = read_result; + goto failure; + } + rnd_buf(expected_block, INCFS_DATA_FILE_BLOCK_SIZE, seed); + if (memcmp(expected_block, actual_block, bytes_to_compare)) { + ksft_print_msg( + "File contents don't match. name: %s block:%d\n", + file->name, block_idx); + error = -2; + goto failure; + } + } + free(filename); + return 0; + +failure: + free(filename); + return error; +} + +static int validate_test_file_content(char *mount_dir, struct test_file *file) +{ + return validate_test_file_content_with_seed(mount_dir, file, 0); +} + +static int data_producer(char *mount_dir, struct test_files_set *test_set) +{ + int ret = 0; + int timeout_ms = 1000; + struct incfs_pending_read_info prs[100] = {}; + int prs_size = ARRAY_SIZE(prs); + int fd = open_commands_file(mount_dir); + + if (fd < 0) + return -errno; + + while ((ret = wait_for_pending_reads(fd, timeout_ms, prs, prs_size)) > + 0) { + int read_count = ret; + int i; + + for (i = 0; i < read_count; i++) { + int j = 0; + struct test_file *file = NULL; + + for (j = 0; j < test_set->files_count; j++) { + bool same = same_id(&(test_set->files[j].id), + &(prs[i].file_id)); + + if (same) { + file = &test_set->files[j]; + break; + } + } + if (!file) { + ksft_print_msg( + "Unknown file in pending reads.\n"); + break; + } + + ret = emit_test_block(mount_dir, file, + prs[i].block_index); + if (ret < 0) { + ksft_print_msg("Emitting test data error: %s\n", + strerror(-ret)); + break; + } + } + } + close(fd); + return ret; +} + +static int build_mtree(struct test_file *file) +{ + char data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; + const int digest_size = SHA256_DIGEST_SIZE; + const int hash_per_block = INCFS_DATA_FILE_BLOCK_SIZE / digest_size; + int block_count = 0; + int hash_block_count = 0; + int total_tree_block_count = 0; + int tree_lvl_index[INCFS_MAX_MTREE_LEVELS] = {}; + int tree_lvl_count[INCFS_MAX_MTREE_LEVELS] = {}; + int levels_count = 0; + int i, level; + + if (file->size == 0) + return 0; + + block_count = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + hash_block_count = block_count; + for (i = 0; hash_block_count > 1; i++) { + hash_block_count = (hash_block_count + hash_per_block - 1) + / hash_per_block; + tree_lvl_count[i] = hash_block_count; + total_tree_block_count += hash_block_count; + } + levels_count = i; + + for (i = 0; i < levels_count; i++) { + int prev_lvl_base = (i == 0) ? total_tree_block_count : + tree_lvl_index[i - 1]; + + tree_lvl_index[i] = prev_lvl_base - tree_lvl_count[i]; + } + + file->mtree_block_count = total_tree_block_count; + if (block_count == 1) { + int seed = get_file_block_seed(file->index, 0); + + memset(data, 0, INCFS_DATA_FILE_BLOCK_SIZE); + rnd_buf((uint8_t *)data, file->size, seed); + sha256(data, INCFS_DATA_FILE_BLOCK_SIZE, file->root_hash); + return 0; + } + + file->mtree = calloc(total_tree_block_count, sizeof(*file->mtree)); + /* Build level 0 hashes. */ + for (i = 0; i < block_count; i++) { + off_t offset = i * INCFS_DATA_FILE_BLOCK_SIZE; + size_t block_size = INCFS_DATA_FILE_BLOCK_SIZE; + int block_index = tree_lvl_index[0] + + i / hash_per_block; + int block_off = (i % hash_per_block) * digest_size; + int seed = get_file_block_seed(file->index, i); + char *hash_ptr = file->mtree[block_index].data + block_off; + + if (file->size - offset < block_size) { + block_size = file->size - offset; + memset(data, 0, INCFS_DATA_FILE_BLOCK_SIZE); + } + + rnd_buf((uint8_t *)data, block_size, seed); + sha256(data, INCFS_DATA_FILE_BLOCK_SIZE, hash_ptr); + } + + /* Build higher levels of hash tree. */ + for (level = 1; level < levels_count; level++) { + int prev_lvl_base = tree_lvl_index[level - 1]; + int prev_lvl_count = tree_lvl_count[level - 1]; + + for (i = 0; i < prev_lvl_count; i++) { + int block_index = + i / hash_per_block + tree_lvl_index[level]; + int block_off = (i % hash_per_block) * digest_size; + char *hash_ptr = + file->mtree[block_index].data + block_off; + + sha256(file->mtree[i + prev_lvl_base].data, + INCFS_DATA_FILE_BLOCK_SIZE, hash_ptr); + } + } + + /* Calculate root hash from the top block */ + sha256(file->mtree[0].data, + INCFS_DATA_FILE_BLOCK_SIZE, file->root_hash); + + return 0; +} + +static int load_hash_tree(const char *mount_dir, struct test_file *file) +{ + int err; + int i; + int fd; + struct incfs_fill_blocks fill_blocks = { + .count = file->mtree_block_count, + }; + struct incfs_fill_block *fill_block_array = + calloc(fill_blocks.count, sizeof(struct incfs_fill_block)); + + if (fill_blocks.count == 0) + return 0; + + if (!fill_block_array) + return -ENOMEM; + fill_blocks.fill_blocks = ptr_to_u64(fill_block_array); + + for (i = 0; i < fill_blocks.count; i++) { + fill_block_array[i] = (struct incfs_fill_block){ + .block_index = i, + .data_len = INCFS_DATA_FILE_BLOCK_SIZE, + .data = ptr_to_u64(file->mtree[i].data), + .flags = INCFS_BLOCK_FLAGS_HASH + }; + } + + fd = open_file_by_id(mount_dir, file->id, false); + if (fd < 0) { + err = errno; + goto failure; + } + + err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + close(fd); + if (err >= 0) { + err = -EPERM; + goto failure; + } + + fd = open_file_by_id(mount_dir, file->id, true); + if (fd < 0) { + err = errno; + goto failure; + } + + err = ioctl(fd, INCFS_IOC_FILL_BLOCKS, &fill_blocks); + close(fd); + if (err < fill_blocks.count) + err = errno; + else { + err = 0; + free(file->mtree); + } + +failure: + free(fill_block_array); + return err; +} + +static int cant_touch_index_test(char *mount_dir) +{ + char *file_name = "test_file"; + int file_size = 123; + incfs_uuid_t file_id; + char *index_path = concat_file_name(mount_dir, ".index"); + char *subdir = concat_file_name(index_path, "subdir"); + char *dst_name = concat_file_name(mount_dir, "something"); + char *filename_in_index = NULL; + char *file_path = concat_file_name(mount_dir, file_name); + char *backing_dir; + int cmd_fd = -1; + int err; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + free(backing_dir); + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + + err = mkdir(subdir, 0777); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't be able to crate subdir in index\n"); + goto failure; + } + + err = emit_file(cmd_fd, ".index", file_name, &file_id, + file_size, NULL); + if (err != -EBUSY) { + print_error("Shouldn't be able to crate a file in index\n"); + goto failure; + } + + err = emit_file(cmd_fd, NULL, file_name, &file_id, + file_size, NULL); + if (err < 0) + goto failure; + filename_in_index = get_index_filename(mount_dir, file_id); + + err = unlink(filename_in_index); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't be delete from index\n"); + goto failure; + } + + + err = rename(filename_in_index, dst_name); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't be able to move from index\n"); + goto failure; + } + + free(filename_in_index); + filename_in_index = concat_file_name(index_path, "abc"); + err = link(file_path, filename_in_index); + if (err == 0 || errno != EBUSY) { + print_error("Shouldn't be able to link inside index\n"); + goto failure; + } + + close(cmd_fd); + free(subdir); + free(index_path); + free(dst_name); + free(filename_in_index); + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + free(subdir); + free(dst_name); + free(index_path); + free(filename_in_index); + close(cmd_fd); + umount(mount_dir); + return TEST_FAILURE; +} + +static bool iterate_directory(char *dir_to_iterate, bool root, int file_count) +{ + struct expected_name { + const char *name; + bool root_only; + bool found; + } names[] = { + {INCFS_LOG_FILENAME, true, false}, + {INCFS_PENDING_READS_FILENAME, true, false}, + {".index", true, false}, + {"..", false, false}, + {".", false, false}, + }; + + bool pass = true, found; + int i; + + /* Test directory iteration */ + int fd = open(dir_to_iterate, O_RDONLY | O_DIRECTORY); + + if (fd < 0) { + print_error("Can't open directory\n"); + return false; + } + + for (;;) { + /* Enough space for one dirent - no name over 30 */ + char buf[sizeof(struct linux_dirent64) + NAME_MAX]; + struct linux_dirent64 *dirent = (struct linux_dirent64 *) buf; + int nread; + int i; + + for (i = 0; i < NAME_MAX; ++i) { + nread = syscall(__NR_getdents64, fd, buf, + sizeof(struct linux_dirent64) + i); + + if (nread >= 0) + break; + if (errno != EINVAL) + break; + } + + if (nread == 0) + break; + if (nread < 0) { + print_error("Error iterating directory\n"); + pass = false; + goto failure; + } + + /* Expected size is rounded up to 8 byte boundary. Not sure if + * this is universal truth or just happenstance, but useful test + * for the moment + */ + if (nread != (((sizeof(struct linux_dirent64) + + strlen(dirent->d_name) + 1) + 7) & ~7)) { + print_error("Wrong dirent size"); + pass = false; + goto failure; + } + + found = false; + for (i = 0; i < sizeof(names) / sizeof(*names); ++i) + if (!strcmp(dirent->d_name, names[i].name)) { + if (names[i].root_only && !root) { + print_error("Root file error"); + pass = false; + goto failure; + } + + if (names[i].found) { + print_error("File appears twice"); + pass = false; + goto failure; + } + + names[i].found = true; + found = true; + break; + } + + if (!found) + --file_count; + } + + for (i = 0; i < sizeof(names) / sizeof(*names); ++i) { + if (!names[i].found) + if (root || !names[i].root_only) { + print_error("Expected file not present"); + pass = false; + goto failure; + } + } + + if (file_count) { + print_error("Wrong number of files\n"); + pass = false; + goto failure; + } + +failure: + close(fd); + return pass; +} + +static int basic_file_ops_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + char *subdir1 = concat_file_name(mount_dir, "subdir1"); + char *subdir2 = concat_file_name(mount_dir, "subdir2"); + char *backing_dir; + int cmd_fd = -1; + int i, err; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + free(backing_dir); + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + err = mkdir(subdir1, 0777); + if (err < 0 && errno != EEXIST) { + print_error("Can't create subdir1\n"); + goto failure; + } + + err = mkdir(subdir2, 0777); + if (err < 0 && errno != EEXIST) { + print_error("Can't create subdir2\n"); + goto failure; + } + + /* Create all test files in subdir1 directory */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + loff_t size; + char *file_path = concat_file_name(subdir1, file->name); + + err = emit_file(cmd_fd, "subdir1", file->name, &file->id, + file->size, NULL); + if (err < 0) + goto failure; + + size = get_file_size(file_path); + free(file_path); + if (size != file->size) { + ksft_print_msg("Wrong size %lld of %s.\n", + size, file->name); + goto failure; + } + } + + if (!iterate_directory(subdir1, false, file_num)) + goto failure; + + /* Link the files to subdir2 */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *src_name = concat_file_name(subdir1, file->name); + char *dst_name = concat_file_name(subdir2, file->name); + loff_t size; + + err = link(src_name, dst_name); + if (err < 0) { + print_error("Can't move file\n"); + goto failure; + } + + size = get_file_size(dst_name); + if (size != file->size) { + ksft_print_msg("Wrong size %lld of %s.\n", + size, file->name); + goto failure; + } + free(src_name); + free(dst_name); + } + + /* Move the files from subdir2 to the mount dir */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *src_name = concat_file_name(subdir2, file->name); + char *dst_name = concat_file_name(mount_dir, file->name); + loff_t size; + + err = rename(src_name, dst_name); + if (err < 0) { + print_error("Can't move file\n"); + goto failure; + } + + size = get_file_size(dst_name); + if (size != file->size) { + ksft_print_msg("Wrong size %lld of %s.\n", + size, file->name); + goto failure; + } + free(src_name); + free(dst_name); + } + + /* +2 because there are 2 subdirs */ + if (!iterate_directory(mount_dir, true, file_num + 2)) + goto failure; + + /* Open and close all files from the mount dir */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *path = concat_file_name(mount_dir, file->name); + int fd; + + fd = open(path, O_RDWR); + free(path); + if (fd <= 0) { + print_error("Can't open file"); + goto failure; + } + if (close(fd)) { + print_error("Can't close file"); + goto failure; + } + } + + /* Delete all files from the mount dir */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *path = concat_file_name(mount_dir, file->name); + + err = unlink(path); + free(path); + if (err < 0) { + print_error("Can't unlink file"); + goto failure; + } + } + + err = delete_dir_tree(subdir1); + if (err) { + ksft_print_msg("Error deleting subdir1 %d", err); + goto failure; + } + + err = rmdir(subdir2); + if (err) { + print_error("Error deleting subdir2"); + goto failure; + } + + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + umount(mount_dir); + return TEST_FAILURE; +} + +static int dynamic_files_and_data_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + const int missing_file_idx = 5; + int cmd_fd = -1; + char *backing_dir; + int i; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + free(backing_dir); + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Check that test files don't exist in the filesystem. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *filename = concat_file_name(mount_dir, file->name); + + if (access(filename, F_OK) != -1) { + ksft_print_msg( + "File %s somehow already exists in a clean FS.\n", + filename); + goto failure; + } + free(filename); + } + + /* Write test data into the command file. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int res; + + build_mtree(file); + res = emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL); + if (res < 0) { + ksft_print_msg("Error %s emiting file %s.\n", + strerror(-res), file->name); + goto failure; + } + + /* Skip writing data to one file so we can check */ + /* that it's missing later. */ + if (i == missing_file_idx) + continue; + + res = emit_test_file_data(mount_dir, file); + if (res) { + ksft_print_msg("Error %s emiting data for %s.\n", + strerror(-res), file->name); + goto failure; + } + } + + /* Validate contents of the FS */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (i == missing_file_idx) { + /* No data has been written to this file. */ + /* Check for read error; */ + uint8_t buf; + char *filename = + concat_file_name(mount_dir, file->name); + int res = read_test_file(&buf, 1, filename, 0); + + free(filename); + if (res > 0) { + ksft_print_msg( + "Data present, even though never writtern.\n"); + goto failure; + } + if (res != -ETIME) { + ksft_print_msg("Wrong error code: %d.\n", res); + goto failure; + } + } else { + if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + } + } + + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + umount(mount_dir); + return TEST_FAILURE; +} + +static int concurrent_reads_and_writes_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + /* Validate each file from that many child processes. */ + const int child_multiplier = 3; + int cmd_fd = -1; + char *backing_dir; + int status; + int i; + pid_t producer_pid; + pid_t *child_pids = alloca(child_multiplier * file_num * sizeof(pid_t)); + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + free(backing_dir); + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Tell FS about the files, without actually providing the data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int res; + + res = emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL); + if (res) + goto failure; + } + + /* Start child processes acessing data in the files */ + for (i = 0; i < file_num * child_multiplier; i++) { + struct test_file *file = &test.files[i / child_multiplier]; + pid_t child_pid = flush_and_fork(); + + if (child_pid == 0) { + /* This is a child process, do the data validation. */ + int ret = validate_test_file_content_with_seed( + mount_dir, file, i); + if (ret >= 0) { + /* Zero exit status if data is valid. */ + exit(0); + } + + /* Positive status if validation error found. */ + exit(-ret); + } else if (child_pid > 0) { + child_pids[i] = child_pid; + } else { + print_error("Fork error"); + goto failure; + } + } + + producer_pid = flush_and_fork(); + if (producer_pid == 0) { + int ret; + /* + * This is a child that should provide data to + * pending reads. + */ + + ret = data_producer(mount_dir, &test); + exit(-ret); + } else { + status = wait_for_process(producer_pid); + if (status != 0) { + ksft_print_msg("Data produces failed. %d(%s) ", status, + strerror(status)); + goto failure; + } + } + + /* Check that all children has finished with 0 exit status */ + for (i = 0; i < file_num * child_multiplier; i++) { + struct test_file *file = &test.files[i / child_multiplier]; + + status = wait_for_process(child_pids[i]); + if (status != 0) { + ksft_print_msg( + "Validation for the file %s failed with code %d (%s)\n", + file->name, status, strerror(status)); + goto failure; + } + } + + /* Check that there are no pending reads left */ + { + struct incfs_pending_read_info prs[1] = {}; + int timeout = 0; + int read_count = wait_for_pending_reads(cmd_fd, timeout, prs, + ARRAY_SIZE(prs)); + + if (read_count) { + ksft_print_msg( + "Pending reads pending when all data written\n"); + goto failure; + } + } + + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + umount(mount_dir); + return TEST_FAILURE; +} + +static int work_after_remount_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + const int file_num_stage1 = file_num / 2; + const int file_num_stage2 = file_num; + char *backing_dir = NULL; + int i = 0; + int cmd_fd = -1; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Write first half of the data into the command file. (stage 1) */ + for (i = 0; i < file_num_stage1; i++) { + struct test_file *file = &test.files[i]; + + build_mtree(file); + if (emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL)) + goto failure; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + } + + /* Unmount and mount again, to see that data is persistent. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Write the second half of the data into the command file. (stage 2) */ + for (; i < file_num_stage2; i++) { + struct test_file *file = &test.files[i]; + int res = emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL); + + if (res) + goto failure; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + } + + /* Validate contents of the FS */ + for (i = 0; i < file_num_stage2; i++) { + struct test_file *file = &test.files[i]; + + if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + } + + /* Delete all files */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *filename = concat_file_name(mount_dir, file->name); + char *filename_in_index = get_index_filename(mount_dir, + file->id); + + if (access(filename, F_OK) != 0) { + ksft_print_msg("File %s is not visible.\n", filename); + goto failure; + } + + if (access(filename_in_index, F_OK) != 0) { + ksft_print_msg("File %s is not visible.\n", + filename_in_index); + goto failure; + } + + unlink(filename); + + if (access(filename, F_OK) != -1) { + ksft_print_msg("File %s is still present.\n", filename); + goto failure; + } + + if (access(filename_in_index, F_OK) != 0) { + ksft_print_msg("File %s is still present.\n", + filename_in_index); + goto failure; + } + free(filename); + free(filename_in_index); + } + + /* Unmount and mount again, to see that deleted files stay deleted. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Validate all deleted files are still deleted. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *filename = concat_file_name(mount_dir, file->name); + + if (access(filename, F_OK) != -1) { + ksft_print_msg("File %s is still visible.\n", filename); + goto failure; + } + free(filename); + } + + /* Final unmount */ + close(cmd_fd); + free(backing_dir); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int attribute_test(char *mount_dir) +{ + char file_attr[] = "metadata123123"; + char attr_buf[INCFS_MAX_FILE_ATTR_SIZE] = {}; + int cmd_fd = -1; + incfs_uuid_t file_id; + int attr_res = 0; + char *backing_dir; + + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + if (emit_file(cmd_fd, NULL, "file", &file_id, 12, file_attr)) + goto failure; + + /* Test attribute values */ + attr_res = get_file_attr(mount_dir, file_id, attr_buf, + ARRAY_SIZE(attr_buf)); + if (attr_res != strlen(file_attr)) { + ksft_print_msg("Get file attr error: %d\n", attr_res); + goto failure; + } + if (strcmp(attr_buf, file_attr) != 0) { + ksft_print_msg("Incorrect file attr value: '%s'", attr_buf); + goto failure; + } + + /* Unmount and mount again, to see that attributes are persistent. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Test attribute values again after remount*/ + attr_res = get_file_attr(mount_dir, file_id, attr_buf, + ARRAY_SIZE(attr_buf)); + if (attr_res != strlen(file_attr)) { + ksft_print_msg("Get dir attr error: %d\n", attr_res); + goto failure; + } + if (strcmp(attr_buf, file_attr) != 0) { + ksft_print_msg("Incorrect file attr value: '%s'", attr_buf); + goto failure; + } + + /* Final unmount */ + close(cmd_fd); + free(backing_dir); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int child_procs_waiting_for_data_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + int cmd_fd = -1; + int i; + pid_t *child_pids = alloca(file_num * sizeof(pid_t)); + char *backing_dir; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. (10s wait time) */ + if (mount_fs(mount_dir, backing_dir, 10000) != 0) + goto failure; + + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Tell FS about the files, without actually providing the data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL); + } + + /* Start child processes acessing data in the files */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + pid_t child_pid = flush_and_fork(); + + if (child_pid == 0) { + /* This is a child process, do the data validation. */ + int ret = validate_test_file_content(mount_dir, file); + + if (ret >= 0) { + /* Zero exit status if data is valid. */ + exit(0); + } + + /* Positive status if validation error found. */ + exit(-ret); + } else if (child_pid > 0) { + child_pids[i] = child_pid; + } else { + print_error("Fork error"); + goto failure; + } + } + + /* Write test data into the command file. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + } + + /* Check that all children has finished with 0 exit status */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int status = wait_for_process(child_pids[i]); + + if (status != 0) { + ksft_print_msg( + "Validation for the file %s failed with code %d (%s)\n", + file->name, status, strerror(status)); + goto failure; + } + } + + close(cmd_fd); + free(backing_dir); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int multiple_providers_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + const int producer_count = 5; + int cmd_fd = -1; + int status; + int i; + pid_t *producer_pids = alloca(producer_count * sizeof(pid_t)); + char *backing_dir; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. (10s wait time) */ + if (mount_fs(mount_dir, backing_dir, 10000) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Tell FS about the files, without actually providing the data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL) < 0) + goto failure; + } + + /* Start producer processes */ + for (i = 0; i < producer_count; i++) { + pid_t producer_pid = flush_and_fork(); + + if (producer_pid == 0) { + int ret; + /* + * This is a child that should provide data to + * pending reads. + */ + + ret = data_producer(mount_dir, &test); + exit(-ret); + } else if (producer_pid > 0) { + producer_pids[i] = producer_pid; + } else { + print_error("Fork error"); + goto failure; + } + } + + /* Validate FS content */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + char *filename = concat_file_name(mount_dir, file->name); + loff_t read_result = read_whole_file(filename); + + free(filename); + if (read_result != file->size) { + ksft_print_msg( + "Error validating file %s. Result: %ld\n", + file->name, read_result); + goto failure; + } + } + + /* Check that all producers has finished with 0 exit status */ + for (i = 0; i < producer_count; i++) { + status = wait_for_process(producer_pids[i]); + if (status != 0) { + ksft_print_msg("Producer %d failed with code (%s)\n", i, + strerror(status)); + goto failure; + } + } + + close(cmd_fd); + free(backing_dir); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int hash_tree_test(char *mount_dir) +{ + char *backing_dir; + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + const int corrupted_file_idx = 5; + int i = 0; + int cmd_fd = -1; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + /* Mount FS and release the backing file. */ + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Write hashes and data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + int res; + + build_mtree(file); + res = crypto_emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, file->root_hash, + file->sig.add_data); + + if (i == corrupted_file_idx) { + /* Corrupt third blocks hash */ + file->mtree[0].data[2 * SHA256_DIGEST_SIZE] ^= 0xff; + } + if (emit_test_file_data(mount_dir, file)) + goto failure; + + res = load_hash_tree(mount_dir, file); + if (res) { + ksft_print_msg("Can't load hashes for %s. error: %s\n", + file->name, strerror(-res)); + goto failure; + } + } + + /* Validate data */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (i == corrupted_file_idx) { + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + char *filename = + concat_file_name(mount_dir, file->name); + int res; + + res = read_test_file(data, INCFS_DATA_FILE_BLOCK_SIZE, + filename, 2); + free(filename); + if (res != -EBADMSG) { + ksft_print_msg("Hash violation missed1. %d\n", + res); + goto failure; + } + } else if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + } + + /* Unmount and mount again, to that hashes are persistent. */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + if (mount_fs(mount_dir, backing_dir, 50) != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + /* Validate data again */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (i == corrupted_file_idx) { + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + char *filename = + concat_file_name(mount_dir, file->name); + int res; + + res = read_test_file(data, INCFS_DATA_FILE_BLOCK_SIZE, + filename, 2); + free(filename); + if (res != -EBADMSG) { + ksft_print_msg("Hash violation missed2. %d\n", + res); + goto failure; + } + } else if (validate_test_file_content(mount_dir, file) < 0) + goto failure; + } + + /* Final unmount */ + close(cmd_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + return TEST_SUCCESS; + +failure: + close(cmd_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static int validate_logs(char *mount_dir, int log_fd, struct test_file *file) +{ + uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE]; + struct incfs_pending_read_info prs[100] = {}; + int prs_size = ARRAY_SIZE(prs); + int block_cnt = 1 + (file->size - 1) / INCFS_DATA_FILE_BLOCK_SIZE; + int res; + int read_count; + int i; + char *filename = concat_file_name(mount_dir, file->name); + int fd; + + fd = open(filename, O_RDONLY); + free(filename); + if (fd <= 0) + return TEST_FAILURE; + + if (block_cnt > prs_size) + block_cnt = prs_size; + + for (i = 0; i < block_cnt; i++) { + res = pread(fd, data, sizeof(data), + INCFS_DATA_FILE_BLOCK_SIZE * i); + if (res <= 0) + goto failure; + } + + read_count = wait_for_pending_reads(log_fd, 0, prs, prs_size); + if (read_count < 0) { + ksft_print_msg("Error reading logged reads %s.\n", + strerror(-read_count)); + goto failure; + } + + if (read_count != block_cnt) { + ksft_print_msg("Bad log read count %s %d %d.\n", file->name, + read_count, block_cnt); + goto failure; + } + + for (i = 0; i < read_count; i++) { + struct incfs_pending_read_info *read = &prs[i]; + + if (!same_id(&read->file_id, &file->id)) { + ksft_print_msg("Bad log read ino %s\n", file->name); + goto failure; + } + + if (read->block_index != i) { + ksft_print_msg("Bad log read ino %s %d %d.\n", + file->name, read->block_index, i); + goto failure; + } + + if (i != 0) { + unsigned long psn = prs[i - 1].serial_number; + + if (read->serial_number != psn + 1) { + ksft_print_msg("Bad log read sn %s %d %d.\n", + file->name, read->serial_number, + psn); + goto failure; + } + } + + if (read->timestamp_us == 0) { + ksft_print_msg("Bad log read timestamp %s.\n", + file->name); + goto failure; + } + } + close(fd); + return TEST_SUCCESS; + +failure: + close(fd); + return TEST_FAILURE; +} + +static int read_log_test(char *mount_dir) +{ + struct test_files_set test = get_test_files_set(); + const int file_num = test.files_count; + int i = 0; + int cmd_fd = -1, log_fd = -1; + char *backing_dir; + + backing_dir = create_backing_dir(mount_dir); + if (!backing_dir) + goto failure; + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + log_fd = open_log_file(mount_dir); + if (cmd_fd < 0) + ksft_print_msg("Can't open log file.\n"); + + /* Write data. */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (emit_file(cmd_fd, NULL, file->name, &file->id, + file->size, NULL)) + goto failure; + + if (emit_test_file_data(mount_dir, file)) + goto failure; + } + + /* Validate data */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (validate_logs(mount_dir, log_fd, file)) + goto failure; + } + + /* Unmount and mount again, to see that logs work after remount. */ + close(cmd_fd); + close(log_fd); + cmd_fd = -1; + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + if (mount_fs_opt(mount_dir, backing_dir, "readahead=0") != 0) + goto failure; + + cmd_fd = open_commands_file(mount_dir); + if (cmd_fd < 0) + goto failure; + + log_fd = open_log_file(mount_dir); + if (cmd_fd < 0) + ksft_print_msg("Can't open log file.\n"); + + /* Validate data again */ + for (i = 0; i < file_num; i++) { + struct test_file *file = &test.files[i]; + + if (validate_logs(mount_dir, log_fd, file)) + goto failure; + } + + /* Final unmount */ + close(cmd_fd); + close(log_fd); + free(backing_dir); + if (umount(mount_dir) != 0) { + print_error("Can't unmout FS"); + goto failure; + } + + return TEST_SUCCESS; + +failure: + close(cmd_fd); + close(log_fd); + free(backing_dir); + umount(mount_dir); + return TEST_FAILURE; +} + +static char *setup_mount_dir() +{ + struct stat st; + char *current_dir = getcwd(NULL, 0); + char *mount_dir = concat_file_name(current_dir, "incfs-mount-dir"); + + free(current_dir); + if (stat(mount_dir, &st) == 0) { + if (S_ISDIR(st.st_mode)) + return mount_dir; + + ksft_print_msg("%s is a file, not a dir.\n", mount_dir); + return NULL; + } + + if (mkdir(mount_dir, 0777)) { + print_error("Can't create mount dir."); + return NULL; + } + + return mount_dir; +} + +int main(int argc, char *argv[]) +{ + char *mount_dir = NULL; + int fails = 0; + int i; + int fd, count; + + // Seed randomness pool for testing on QEMU + // NOTE - this abuses the concept of randomness - do *not* ever do this + // on a machine for production use - the device will think it has good + // randomness when it does not. + fd = open("/dev/urandom", O_WRONLY); + count = 4096; + for (int i = 0; i < 128; ++i) + ioctl(fd, RNDADDTOENTCNT, &count); + close(fd); + + ksft_print_header(); + + if (geteuid() != 0) + ksft_print_msg("Not a root, might fail to mount.\n"); + + mount_dir = setup_mount_dir(); + if (mount_dir == NULL) + ksft_exit_fail_msg("Can't create a mount dir\n"); + +#define MAKE_TEST(test) \ + { \ + test, #test \ + } + struct { + int (*pfunc)(char *dir); + const char *name; + } cases[] = { + MAKE_TEST(basic_file_ops_test), + MAKE_TEST(cant_touch_index_test), + MAKE_TEST(dynamic_files_and_data_test), + MAKE_TEST(concurrent_reads_and_writes_test), + MAKE_TEST(attribute_test), + MAKE_TEST(work_after_remount_test), + MAKE_TEST(child_procs_waiting_for_data_test), + MAKE_TEST(multiple_providers_test), + MAKE_TEST(hash_tree_test), + MAKE_TEST(read_log_test), + }; +#undef MAKE_TEST + + /* Bring back for kernel 5.x */ + /* ksft_set_plan(ARRAY_SIZE(cases)); */ + + for (i = 0; i < ARRAY_SIZE(cases); ++i) { + ksft_print_msg("Running %s\n", cases[i].name); + if (cases[i].pfunc(mount_dir) == TEST_SUCCESS) + ksft_test_result_pass("%s\n", cases[i].name); + else { + ksft_test_result_fail("%s\n", cases[i].name); + fails++; + } + } + + umount2(mount_dir, MNT_FORCE); + rmdir(mount_dir); + + if (fails > 0) + ksft_exit_pass(); + else + ksft_exit_pass(); + return 0; +} diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c new file mode 100644 index 000000000000..3a72fa5d5e9a --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2018 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "utils.h" + +int mount_fs(const char *mount_dir, const char *backing_dir, + int read_timeout_ms) +{ + static const char fs_name[] = INCFS_NAME; + char mount_options[512]; + int result; + + snprintf(mount_options, ARRAY_SIZE(mount_options), + "read_timeout_ms=%u", + read_timeout_ms); + + result = mount(backing_dir, mount_dir, fs_name, 0, mount_options); + if (result != 0) + perror("Error mounting fs."); + return result; +} + +int mount_fs_opt(const char *mount_dir, const char *backing_dir, + const char *opt) +{ + static const char fs_name[] = INCFS_NAME; + int result; + + result = mount(backing_dir, mount_dir, fs_name, 0, opt); + if (result != 0) + perror("Error mounting fs."); + return result; +} + +struct hash_section { + uint32_t algorithm; + uint8_t log2_blocksize; + uint32_t salt_size; + /* no salt */ + uint32_t hash_size; + uint8_t hash[SHA256_DIGEST_SIZE]; +} __packed; + +struct signature_blob { + uint32_t version; + uint32_t hash_section_size; + struct hash_section hash_section; + uint32_t signing_section_size; + uint8_t signing_section[]; +} __packed; + +size_t format_signature(void **buf, const char *root_hash, const char *add_data) +{ + size_t size = sizeof(struct signature_blob) + strlen(add_data) + 1; + struct signature_blob *sb = malloc(size); + + *sb = (struct signature_blob){ + .version = INCFS_SIGNATURE_VERSION, + .hash_section_size = sizeof(struct hash_section), + .hash_section = + (struct hash_section){ + .algorithm = INCFS_HASH_TREE_SHA256, + .log2_blocksize = 12, + .salt_size = 0, + .hash_size = SHA256_DIGEST_SIZE, + }, + .signing_section_size = sizeof(uint32_t) + strlen(add_data) + 1, + }; + + memcpy(sb->hash_section.hash, root_hash, SHA256_DIGEST_SIZE); + memcpy((char *)sb->signing_section, add_data, strlen(add_data) + 1); + *buf = sb; + return size; +} + +int crypto_emit_file(int fd, const char *dir, const char *filename, + incfs_uuid_t *id_out, size_t size, const char *root_hash, + const char *add_data) +{ + int mode = __S_IFREG | 0555; + void *signature; + int error = 0; + + struct incfs_new_file_args args = { + .size = size, + .mode = mode, + .file_name = ptr_to_u64(filename), + .directory_path = ptr_to_u64(dir), + .file_attr = 0, + .file_attr_len = 0 + }; + + args.signature_size = format_signature(&signature, root_hash, add_data); + args.signature_info = ptr_to_u64(signature); + + md5(filename, strlen(filename), (char *)args.file_id.bytes); + + if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0) { + error = -errno; + goto out; + } + + *id_out = args.file_id; + +out: + free(signature); + return error; +} + +int emit_file(int fd, const char *dir, const char *filename, + incfs_uuid_t *id_out, size_t size, const char *attr) +{ + int mode = __S_IFREG | 0555; + struct incfs_new_file_args args = { .size = size, + .mode = mode, + .file_name = ptr_to_u64(filename), + .directory_path = ptr_to_u64(dir), + .signature_info = ptr_to_u64(NULL), + .signature_size = 0, + .file_attr = ptr_to_u64(attr), + .file_attr_len = + attr ? strlen(attr) : 0 }; + + md5(filename, strlen(filename), (char *)args.file_id.bytes); + + if (ioctl(fd, INCFS_IOC_CREATE_FILE, &args) != 0) + return -errno; + + *id_out = args.file_id; + return 0; +} + +int get_file_bmap(int cmd_fd, int ino, unsigned char *buf, int buf_size) +{ + return 0; +} + +int get_file_signature(int fd, unsigned char *buf, int buf_size) +{ + struct incfs_get_file_sig_args args = { + .file_signature = ptr_to_u64(buf), + .file_signature_buf_size = buf_size + }; + + if (ioctl(fd, INCFS_IOC_READ_FILE_SIGNATURE, &args) == 0) + return args.file_signature_len_out; + return -errno; +} + +loff_t get_file_size(const char *name) +{ + struct stat st; + + if (stat(name, &st) == 0) + return st.st_size; + return -ENOENT; +} + +int open_commands_file(const char *mount_dir) +{ + char cmd_file[255]; + int cmd_fd; + + snprintf(cmd_file, ARRAY_SIZE(cmd_file), + "%s/%s", mount_dir, INCFS_PENDING_READS_FILENAME); + cmd_fd = open(cmd_file, O_RDONLY); + + if (cmd_fd < 0) + perror("Can't open commands file"); + return cmd_fd; +} + +int open_log_file(const char *mount_dir) +{ + char cmd_file[255]; + int cmd_fd; + + snprintf(cmd_file, ARRAY_SIZE(cmd_file), "%s/.log", mount_dir); + cmd_fd = open(cmd_file, O_RDWR); + if (cmd_fd < 0) + perror("Can't open log file"); + return cmd_fd; +} + +int wait_for_pending_reads(int fd, int timeout_ms, + struct incfs_pending_read_info *prs, int prs_count) +{ + ssize_t read_res = 0; + + if (timeout_ms > 0) { + int poll_res = 0; + struct pollfd pollfd = { + .fd = fd, + .events = POLLIN + }; + + poll_res = poll(&pollfd, 1, timeout_ms); + if (poll_res < 0) + return -errno; + if (poll_res == 0) + return 0; + if (!(pollfd.revents | POLLIN)) + return 0; + } + + read_res = read(fd, prs, prs_count * sizeof(*prs)); + if (read_res < 0) + return -errno; + + return read_res / sizeof(*prs); +} + +char *concat_file_name(const char *dir, char *file) +{ + char full_name[FILENAME_MAX] = ""; + + if (snprintf(full_name, ARRAY_SIZE(full_name), "%s/%s", dir, file) < 0) + return NULL; + return strdup(full_name); +} + +int delete_dir_tree(const char *dir_path) +{ + DIR *dir = NULL; + struct dirent *dp; + int result = 0; + + dir = opendir(dir_path); + if (!dir) { + result = -errno; + goto out; + } + + while ((dp = readdir(dir))) { + char *full_path; + + if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) + continue; + + full_path = concat_file_name(dir_path, dp->d_name); + if (dp->d_type == DT_DIR) + result = delete_dir_tree(full_path); + else + result = unlink(full_path); + free(full_path); + if (result) + goto out; + } + +out: + if (dir) + closedir(dir); + if (!result) + rmdir(dir_path); + return result; +} + +void sha256(const char *data, size_t dsize, char *hash) +{ + SHA256_CTX ctx; + + SHA256_Init(&ctx); + SHA256_Update(&ctx, data, dsize); + SHA256_Final((unsigned char *)hash, &ctx); +} + +void md5(const char *data, size_t dsize, char *hash) +{ + MD5_CTX ctx; + + MD5_Init(&ctx); + MD5_Update(&ctx, data, dsize); + MD5_Final((unsigned char *)hash, &ctx); +} diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h new file mode 100644 index 000000000000..23c8a099662a --- /dev/null +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2019 Google LLC + */ +#include +#include + +#include "../../include/uapi/linux/incrementalfs.h" + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) + +#define __packed __attribute__((__packed__)) + +#ifdef __LP64__ +#define ptr_to_u64(p) ((__u64)p) +#else +#define ptr_to_u64(p) ((__u64)(__u32)p) +#endif + +#define SHA256_DIGEST_SIZE 32 + +int mount_fs(const char *mount_dir, const char *backing_dir, + int read_timeout_ms); + +int mount_fs_opt(const char *mount_dir, const char *backing_dir, + const char *opt); + +int get_file_bmap(int cmd_fd, int ino, unsigned char *buf, int buf_size); + +int get_file_signature(int fd, unsigned char *buf, int buf_size); + +int emit_node(int fd, char *filename, int *ino_out, int parent_ino, + size_t size, mode_t mode, char *attr); + +int emit_file(int fd, const char *dir, const char *filename, + incfs_uuid_t *id_out, size_t size, const char *attr); + +int crypto_emit_file(int fd, const char *dir, const char *filename, + incfs_uuid_t *id_out, size_t size, const char *root_hash, + const char *add_data); + +loff_t get_file_size(const char *name); + +int open_commands_file(const char *mount_dir); + +int open_log_file(const char *mount_dir); + +int wait_for_pending_reads(int fd, int timeout_ms, + struct incfs_pending_read_info *prs, int prs_count); + +char *concat_file_name(const char *dir, char *file); + +void sha256(const char *data, size_t dsize, char *hash); + +void md5(const char *data, size_t dsize, char *hash); + +int delete_dir_tree(const char *path); diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc index 231bcd2c4eb5..1e7ac6f3362f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc @@ -71,8 +71,11 @@ test_badarg "\$stackp" "\$stack0+10" "\$stack1-10" echo "r ${PROBEFUNC} \$retval" > kprobe_events ! echo "p ${PROBEFUNC} \$retval" > kprobe_events +# $comm was introduced in 4.8, older kernels reject it. +if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then : "Comm access" test_goodarg "\$comm" +fi : "Indirect memory access" test_goodarg "+0(${GOODREG})" "-0(${GOODREG})" "+10(\$stack)" \ diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index ee9382bdfadc..c5587844fbb8 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -1,9 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE #include #include #include #include -#include +#include #include #include "../kselftest.h" @@ -73,7 +74,7 @@ int restore_queue(struct msgque_data *msgque) return 0; destroy: - if (msgctl(id, IPC_RMID, 0)) + if (msgctl(id, IPC_RMID, NULL)) printf("Failed to destroy queue: %d\n", -errno); return ret; } @@ -120,7 +121,7 @@ int check_and_destroy_queue(struct msgque_data *msgque) ret = 0; err: - if (msgctl(msgque->msq_id, IPC_RMID, 0)) { + if (msgctl(msgque->msq_id, IPC_RMID, NULL)) { printf("Failed to destroy queue: %d\n", -errno); return -errno; } @@ -129,7 +130,7 @@ err: int dump_queue(struct msgque_data *msgque) { - struct msqid64_ds ds; + struct msqid_ds ds; int kern_id; int i, ret; @@ -246,7 +247,7 @@ int main(int argc, char **argv) return ksft_exit_pass(); err_destroy: - if (msgctl(msgque.msq_id, IPC_RMID, 0)) { + if (msgctl(msgque.msq_id, IPC_RMID, NULL)) { printf("Failed to destroy queue: %d\n", -errno); return ksft_exit_fail(); } diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 5bef05d6ba39..c9be64dc681d 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -54,17 +54,20 @@ else $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS)) endif +define INSTALL_SINGLE_RULE + $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH)) + $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) + $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/) +endef + define INSTALL_RULE - @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(INSTALL_PATH)/; \ - fi - @if [ "X$(TEST_GEN_PROGS)$(TEST_CUSTOM_PROGS)$(TEST_GEN_PROGS_EXTENDED)$(TEST_GEN_FILES)" != "X" ]; then \ - mkdir -p ${INSTALL_PATH}; \ - echo "rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/"; \ - rsync -a $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(INSTALL_PATH)/; \ - fi + $(eval INSTALL_LIST = $(TEST_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_FILES)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE) + $(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE) endef install: all diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c index fe3230c55986..fb7a59ed759e 100644 --- a/tools/testing/selftests/net/reuseport_dualstack.c +++ b/tools/testing/selftests/net/reuseport_dualstack.c @@ -129,7 +129,7 @@ static void test(int *rcv_fds, int count, int proto) { struct epoll_event ev; int epfd, i, test_fd; - uint16_t test_family; + int test_family; socklen_t len; epfd = epoll_create(1); @@ -146,6 +146,7 @@ static void test(int *rcv_fds, int count, int proto) send_from_v4(proto); test_fd = receive_once(epfd, proto); + len = sizeof(test_family); if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len)) error(1, errno, "failed to read socket domain"); if (test_family != AF_INET) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 891130daac7c..8a5066d98e72 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -195,6 +195,26 @@ kci_test_route_get() echo "PASS: route get" } +kci_test_addrlft() +{ + for i in $(seq 10 100) ;do + lft=$(((RANDOM%3) + 1)) + ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1)) + check_err $? + done + + sleep 5 + + ip addr show dev "$devdummy" | grep "10.23.11." + if [ $? -eq 0 ]; then + echo "FAIL: preferred_lft addresses remaining" + check_err 1 + return + fi + + echo "PASS: preferred_lft addresses have expired" +} + kci_test_addrlabel() { ret=0 @@ -245,6 +265,7 @@ kci_test_rtnl() kci_test_polrouting kci_test_route_get + kci_test_addrlft kci_test_tc kci_test_gre kci_test_bridge diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile index 1be547434a49..7e0c175b8297 100644 --- a/tools/testing/selftests/powerpc/cache_shape/Makefile +++ b/tools/testing/selftests/powerpc/cache_shape/Makefile @@ -1,11 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := cache_shape - -all: $(TEST_PROGS) - -$(TEST_PROGS): ../harness.c ../utils.c +TEST_GEN_PROGS := cache_shape include ../../lib.mk -clean: - rm -f $(TEST_PROGS) *.o +$(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index bf315bcbe663..fae8c52cf8f0 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -3,6 +3,7 @@ noarg: $(MAKE) -C ../ TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao +TEST_GEN_PROGS_EXTENDED := tlbie_test TEST_GEN_FILES := tempfile include ../../lib.mk @@ -14,3 +15,4 @@ $(OUTPUT)/prot_sao: ../utils.c $(OUTPUT)/tempfile: dd if=/dev/zero of=$@ bs=64k count=1 +$(OUTPUT)/tlbie_test: LDLIBS += -lpthread diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c new file mode 100644 index 000000000000..f85a0938ab25 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -0,0 +1,734 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2019, Nick Piggin, Gautham R. Shenoy, Aneesh Kumar K.V, IBM Corp. + */ + +/* + * + * Test tlbie/mtpidr race. We have 4 threads doing flush/load/compare/store + * sequence in a loop. The same threads also rung a context switch task + * that does sched_yield() in loop. + * + * The snapshot thread mark the mmap area PROT_READ in between, make a copy + * and copy it back to the original area. This helps us to detect if any + * store continued to happen after we marked the memory PROT_READ. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline void dcbf(volatile unsigned int *addr) +{ + __asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory"); +} + +static void err_msg(char *msg) +{ + + time_t now; + time(&now); + printf("=================================\n"); + printf(" Error: %s\n", msg); + printf(" %s", ctime(&now)); + printf("=================================\n"); + exit(1); +} + +static char *map1; +static char *map2; +static pid_t rim_process_pid; + +/* + * A "rim-sequence" is defined to be the sequence of the following + * operations performed on a memory word: + * 1) FLUSH the contents of that word. + * 2) LOAD the contents of that word. + * 3) COMPARE the contents of that word with the content that was + * previously stored at that word + * 4) STORE new content into that word. + * + * The threads in this test that perform the rim-sequence are termed + * as rim_threads. + */ + +/* + * A "corruption" is defined to be the failed COMPARE operation in a + * rim-sequence. + * + * A rim_thread that detects a corruption informs about it to all the + * other rim_threads, and the mem_snapshot thread. + */ +static volatile unsigned int corruption_found; + +/* + * This defines the maximum number of rim_threads in this test. + * + * The THREAD_ID_BITS denote the number of bits required + * to represent the thread_ids [0..MAX_THREADS - 1]. + * We are being a bit paranoid here and set it to 8 bits, + * though 6 bits suffice. + * + */ +#define MAX_THREADS 64 +#define THREAD_ID_BITS 8 +#define THREAD_ID_MASK ((1 << THREAD_ID_BITS) - 1) +static unsigned int rim_thread_ids[MAX_THREADS]; +static pthread_t rim_threads[MAX_THREADS]; + + +/* + * Each rim_thread works on an exclusive "chunk" of size + * RIM_CHUNK_SIZE. + * + * The ith rim_thread works on the ith chunk. + * + * The ith chunk begins at + * map1 + (i * RIM_CHUNK_SIZE) + */ +#define RIM_CHUNK_SIZE 1024 +#define BITS_PER_BYTE 8 +#define WORD_SIZE (sizeof(unsigned int)) +#define WORD_BITS (WORD_SIZE * BITS_PER_BYTE) +#define WORDS_PER_CHUNK (RIM_CHUNK_SIZE/WORD_SIZE) + +static inline char *compute_chunk_start_addr(unsigned int thread_id) +{ + char *chunk_start; + + chunk_start = (char *)((unsigned long)map1 + + (thread_id * RIM_CHUNK_SIZE)); + + return chunk_start; +} + +/* + * The "word-offset" of a word-aligned address inside a chunk, is + * defined to be the number of words that precede the address in that + * chunk. + * + * WORD_OFFSET_BITS denote the number of bits required to represent + * the word-offsets of all the word-aligned addresses of a chunk. + */ +#define WORD_OFFSET_BITS (__builtin_ctz(WORDS_PER_CHUNK)) +#define WORD_OFFSET_MASK ((1 << WORD_OFFSET_BITS) - 1) + +static inline unsigned int compute_word_offset(char *start, unsigned int *addr) +{ + unsigned int delta_bytes, ret; + delta_bytes = (unsigned long)addr - (unsigned long)start; + + ret = delta_bytes/WORD_SIZE; + + return ret; +} + +/* + * A "sweep" is defined to be the sequential execution of the + * rim-sequence by a rim_thread on its chunk one word at a time, + * starting from the first word of its chunk and ending with the last + * word of its chunk. + * + * Each sweep of a rim_thread is uniquely identified by a sweep_id. + * SWEEP_ID_BITS denote the number of bits required to represent + * the sweep_ids of rim_threads. + * + * As to why SWEEP_ID_BITS are computed as a function of THREAD_ID_BITS, + * WORD_OFFSET_BITS, and WORD_BITS, see the "store-pattern" below. + */ +#define SWEEP_ID_BITS (WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS)) +#define SWEEP_ID_MASK ((1 << SWEEP_ID_BITS) - 1) + +/* + * A "store-pattern" is the word-pattern that is stored into a word + * location in the 4)STORE step of the rim-sequence. + * + * In the store-pattern, we shall encode: + * + * - The thread-id of the rim_thread performing the store + * (The most significant THREAD_ID_BITS) + * + * - The word-offset of the address into which the store is being + * performed (The next WORD_OFFSET_BITS) + * + * - The sweep_id of the current sweep in which the store is + * being performed. (The lower SWEEP_ID_BITS) + * + * Store Pattern: 32 bits + * |------------------|--------------------|---------------------------------| + * | Thread id | Word offset | sweep_id | + * |------------------|--------------------|---------------------------------| + * THREAD_ID_BITS WORD_OFFSET_BITS SWEEP_ID_BITS + * + * In the store pattern, the (Thread-id + Word-offset) uniquely identify the + * address to which the store is being performed i.e, + * address == map1 + + * (Thread-id * RIM_CHUNK_SIZE) + (Word-offset * WORD_SIZE) + * + * And the sweep_id in the store pattern identifies the time when the + * store was performed by the rim_thread. + * + * We shall use this property in the 3)COMPARE step of the + * rim-sequence. + */ +#define SWEEP_ID_SHIFT 0 +#define WORD_OFFSET_SHIFT (SWEEP_ID_BITS) +#define THREAD_ID_SHIFT (WORD_OFFSET_BITS + SWEEP_ID_BITS) + +/* + * Compute the store pattern for a given thread with id @tid, at + * location @addr in the sweep identified by @sweep_id + */ +static inline unsigned int compute_store_pattern(unsigned int tid, + unsigned int *addr, + unsigned int sweep_id) +{ + unsigned int ret = 0; + char *start = compute_chunk_start_addr(tid); + unsigned int word_offset = compute_word_offset(start, addr); + + ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT; + ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT; + ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT; + return ret; +} + +/* Extract the thread-id from the given store-pattern */ +static inline unsigned int extract_tid(unsigned int pattern) +{ + unsigned int ret; + + ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK; + return ret; +} + +/* Extract the word-offset from the given store-pattern */ +static inline unsigned int extract_word_offset(unsigned int pattern) +{ + unsigned int ret; + + ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK; + + return ret; +} + +/* Extract the sweep-id from the given store-pattern */ +static inline unsigned int extract_sweep_id(unsigned int pattern) + +{ + unsigned int ret; + + ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK; + + return ret; +} + +/************************************************************ + * * + * Logging the output of the verification * + * * + ************************************************************/ +#define LOGDIR_NAME_SIZE 100 +static char logdir[LOGDIR_NAME_SIZE]; + +static FILE *fp[MAX_THREADS]; +static const char logfilename[] ="Thread-%02d-Chunk"; + +static inline void start_verification_log(unsigned int tid, + unsigned int *addr, + unsigned int cur_sweep_id, + unsigned int prev_sweep_id) +{ + FILE *f; + char logfile[30]; + char path[LOGDIR_NAME_SIZE + 30]; + char separator[2] = "/"; + char *chunk_start = compute_chunk_start_addr(tid); + unsigned int size = RIM_CHUNK_SIZE; + + sprintf(logfile, logfilename, tid); + strcpy(path, logdir); + strcat(path, separator); + strcat(path, logfile); + f = fopen(path, "w"); + + if (!f) { + err_msg("Unable to create logfile\n"); + } + + fp[tid] = f; + + fprintf(f, "----------------------------------------------------------\n"); + fprintf(f, "PID = %d\n", rim_process_pid); + fprintf(f, "Thread id = %02d\n", tid); + fprintf(f, "Chunk Start Addr = 0x%016lx\n", (unsigned long)chunk_start); + fprintf(f, "Chunk Size = %d\n", size); + fprintf(f, "Next Store Addr = 0x%016lx\n", (unsigned long)addr); + fprintf(f, "Current sweep-id = 0x%08x\n", cur_sweep_id); + fprintf(f, "Previous sweep-id = 0x%08x\n", prev_sweep_id); + fprintf(f, "----------------------------------------------------------\n"); +} + +static inline void log_anamoly(unsigned int tid, unsigned int *addr, + unsigned int expected, unsigned int observed) +{ + FILE *f = fp[tid]; + + fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n", + tid, (unsigned long)addr, expected, observed); + fprintf(f, "Thread %02d: Expected Thread id = %02d\n", tid, extract_tid(expected)); + fprintf(f, "Thread %02d: Observed Thread id = %02d\n", tid, extract_tid(observed)); + fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected)); + fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed)); + fprintf(f, "Thread %02d: Expected sweep-id = 0x%x\n", tid, extract_sweep_id(expected)); + fprintf(f, "Thread %02d: Observed sweep-id = 0x%x\n", tid, extract_sweep_id(observed)); + fprintf(f, "----------------------------------------------------------\n"); +} + +static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies) +{ + FILE *f = fp[tid]; + char logfile[30]; + char path[LOGDIR_NAME_SIZE + 30]; + char separator[] = "/"; + + fclose(f); + + if (nr_anamolies == 0) { + remove(path); + return; + } + + sprintf(logfile, logfilename, tid); + strcpy(path, logdir); + strcat(path, separator); + strcat(path, logfile); + + printf("Thread %02d chunk has %d corrupted words. For details check %s\n", + tid, nr_anamolies, path); +} + +/* + * When a COMPARE step of a rim-sequence fails, the rim_thread informs + * everyone else via the shared_memory pointed to by + * corruption_found variable. On seeing this, every thread verifies the + * content of its chunk as follows. + * + * Suppose a thread identified with @tid was about to store (but not + * yet stored) to @next_store_addr in its current sweep identified + * @cur_sweep_id. Let @prev_sweep_id indicate the previous sweep_id. + * + * This implies that for all the addresses @addr < @next_store_addr, + * Thread @tid has already performed a store as part of its current + * sweep. Hence we expect the content of such @addr to be: + * |-------------------------------------------------| + * | tid | word_offset(addr) | cur_sweep_id | + * |-------------------------------------------------| + * + * Since Thread @tid is yet to perform stores on address + * @next_store_addr and above, we expect the content of such an + * address @addr to be: + * |-------------------------------------------------| + * | tid | word_offset(addr) | prev_sweep_id | + * |-------------------------------------------------| + * + * The verifier function @verify_chunk does this verification and logs + * any anamolies that it finds. + */ +static void verify_chunk(unsigned int tid, unsigned int *next_store_addr, + unsigned int cur_sweep_id, + unsigned int prev_sweep_id) +{ + unsigned int *iter_ptr; + unsigned int size = RIM_CHUNK_SIZE; + unsigned int expected; + unsigned int observed; + char *chunk_start = compute_chunk_start_addr(tid); + + int nr_anamolies = 0; + + start_verification_log(tid, next_store_addr, + cur_sweep_id, prev_sweep_id); + + for (iter_ptr = (unsigned int *)chunk_start; + (unsigned long)iter_ptr < (unsigned long)chunk_start + size; + iter_ptr++) { + unsigned int expected_sweep_id; + + if (iter_ptr < next_store_addr) { + expected_sweep_id = cur_sweep_id; + } else { + expected_sweep_id = prev_sweep_id; + } + + expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id); + + dcbf((volatile unsigned int*)iter_ptr); //Flush before reading + observed = *iter_ptr; + + if (observed != expected) { + nr_anamolies++; + log_anamoly(tid, iter_ptr, expected, observed); + } + } + + end_verification_log(tid, nr_anamolies); +} + +static void set_pthread_cpu(pthread_t th, int cpu) +{ + cpu_set_t run_cpu_mask; + struct sched_param param; + + CPU_ZERO(&run_cpu_mask); + CPU_SET(cpu, &run_cpu_mask); + pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask); + + param.sched_priority = 1; + if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) { + /* haven't reproduced with this setting, it kills random preemption which may be a factor */ + fprintf(stderr, "could not set SCHED_FIFO, run as root?\n"); + } +} + +static void set_mycpu(int cpu) +{ + cpu_set_t run_cpu_mask; + struct sched_param param; + + CPU_ZERO(&run_cpu_mask); + CPU_SET(cpu, &run_cpu_mask); + sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask); + + param.sched_priority = 1; + if (0 && sched_setscheduler(0, SCHED_FIFO, ¶m) == -1) { + fprintf(stderr, "could not set SCHED_FIFO, run as root?\n"); + } +} + +static volatile int segv_wait; + +static void segv_handler(int signo, siginfo_t *info, void *extra) +{ + while (segv_wait) { + sched_yield(); + } + +} + +static void set_segv_handler(void) +{ + struct sigaction sa; + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = segv_handler; + + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } +} + +int timeout = 0; +/* + * This function is executed by every rim_thread. + * + * This function performs sweeps over the exclusive chunks of the + * rim_threads executing the rim-sequence one word at a time. + */ +static void *rim_fn(void *arg) +{ + unsigned int tid = *((unsigned int *)arg); + + int size = RIM_CHUNK_SIZE; + char *chunk_start = compute_chunk_start_addr(tid); + + unsigned int prev_sweep_id; + unsigned int cur_sweep_id = 0; + + /* word access */ + unsigned int pattern = cur_sweep_id; + unsigned int *pattern_ptr = &pattern; + unsigned int *w_ptr, read_data; + + set_segv_handler(); + + /* + * Let us initialize the chunk: + * + * Each word-aligned address addr in the chunk, + * is initialized to : + * |-------------------------------------------------| + * | tid | word_offset(addr) | 0 | + * |-------------------------------------------------| + */ + for (w_ptr = (unsigned int *)chunk_start; + (unsigned long)w_ptr < (unsigned long)(chunk_start) + size; + w_ptr++) { + + *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id); + *w_ptr = *pattern_ptr; + } + + while (!corruption_found && !timeout) { + prev_sweep_id = cur_sweep_id; + cur_sweep_id = cur_sweep_id + 1; + + for (w_ptr = (unsigned int *)chunk_start; + (unsigned long)w_ptr < (unsigned long)(chunk_start) + size; + w_ptr++) { + unsigned int old_pattern; + + /* + * Compute the pattern that we would have + * stored at this location in the previous + * sweep. + */ + old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id); + + /* + * FLUSH:Ensure that we flush the contents of + * the cache before loading + */ + dcbf((volatile unsigned int*)w_ptr); //Flush + + /* LOAD: Read the value */ + read_data = *w_ptr; //Load + + /* + * COMPARE: Is it the same as what we had stored + * in the previous sweep ? It better be! + */ + if (read_data != old_pattern) { + /* No it isn't! Tell everyone */ + corruption_found = 1; + } + + /* + * Before performing a store, let us check if + * any rim_thread has found a corruption. + */ + if (corruption_found || timeout) { + /* + * Yes. Someone (including us!) has found + * a corruption :( + * + * Let us verify that our chunk is + * correct. + */ + /* But first, let us allow the dust to settle down! */ + verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id); + + return 0; + } + + /* + * Compute the new pattern that we are going + * to write to this location + */ + *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id); + + /* + * STORE: Now let us write this pattern into + * the location + */ + *w_ptr = *pattern_ptr; + } + } + + return NULL; +} + + +static unsigned long start_cpu = 0; +static unsigned long nrthreads = 4; + +static pthread_t mem_snapshot_thread; + +static void *mem_snapshot_fn(void *arg) +{ + int page_size = getpagesize(); + size_t size = page_size; + void *tmp = malloc(size); + + while (!corruption_found && !timeout) { + /* Stop memory migration once corruption is found */ + segv_wait = 1; + + mprotect(map1, size, PROT_READ); + + /* + * Load from the working alias (map1). Loading from map2 + * also fails. + */ + memcpy(tmp, map1, size); + + /* + * Stores must go via map2 which has write permissions, but + * the corrupted data tends to be seen in the snapshot buffer, + * so corruption does not appear to be introduced at the + * copy-back via map2 alias here. + */ + memcpy(map2, tmp, size); + /* + * Before releasing other threads, must ensure the copy + * back to + */ + asm volatile("sync" ::: "memory"); + mprotect(map1, size, PROT_READ|PROT_WRITE); + asm volatile("sync" ::: "memory"); + segv_wait = 0; + + usleep(1); /* This value makes a big difference */ + } + + return 0; +} + +void alrm_sighandler(int sig) +{ + timeout = 1; +} + +int main(int argc, char *argv[]) +{ + int c; + int page_size = getpagesize(); + time_t now; + int i, dir_error; + pthread_attr_t attr; + key_t shm_key = (key_t) getpid(); + int shmid, run_time = 20 * 60; + struct sigaction sa_alrm; + + snprintf(logdir, LOGDIR_NAME_SIZE, + "/tmp/logdir-%u", (unsigned int)getpid()); + while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) { + switch(c) { + case 'r': + start_cpu = strtoul(optarg, NULL, 10); + break; + case 'h': + printf("%s [-r ] [-n ] [-l ] [-t ]\n", argv[0]); + exit(0); + break; + case 'n': + nrthreads = strtoul(optarg, NULL, 10); + break; + case 'l': + strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1); + break; + case 't': + run_time = strtoul(optarg, NULL, 10); + break; + default: + printf("invalid option\n"); + exit(0); + break; + } + } + + if (nrthreads > MAX_THREADS) + nrthreads = MAX_THREADS; + + shmid = shmget(shm_key, page_size, IPC_CREAT|0666); + if (shmid < 0) { + err_msg("Failed shmget\n"); + } + + map1 = shmat(shmid, NULL, 0); + if (map1 == (void *) -1) { + err_msg("Failed shmat"); + } + + map2 = shmat(shmid, NULL, 0); + if (map2 == (void *) -1) { + err_msg("Failed shmat"); + } + + dir_error = mkdir(logdir, 0755); + + if (dir_error) { + err_msg("Failed mkdir"); + } + + printf("start_cpu list:%lu\n", start_cpu); + printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads); + printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2); + printf("logdir at : %s\n", logdir); + printf("Timeout: %d seconds\n", run_time); + + time(&now); + printf("=================================\n"); + printf(" Starting Test\n"); + printf(" %s", ctime(&now)); + printf("=================================\n"); + + for (i = 0; i < nrthreads; i++) { + if (1 && !fork()) { + prctl(PR_SET_PDEATHSIG, SIGKILL); + set_mycpu(start_cpu + i); + for (;;) + sched_yield(); + exit(0); + } + } + + + sa_alrm.sa_handler = &alrm_sighandler; + sigemptyset(&sa_alrm.sa_mask); + sa_alrm.sa_flags = 0; + + if (sigaction(SIGALRM, &sa_alrm, 0) == -1) { + err_msg("Failed signal handler registration\n"); + } + + alarm(run_time); + + pthread_attr_init(&attr); + for (i = 0; i < nrthreads; i++) { + rim_thread_ids[i] = i; + pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]); + set_pthread_cpu(rim_threads[i], start_cpu + i); + } + + pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1); + set_pthread_cpu(mem_snapshot_thread, start_cpu + i); + + + pthread_join(mem_snapshot_thread, NULL); + for (i = 0; i < nrthreads; i++) { + pthread_join(rim_threads[i], NULL); + } + + if (!timeout) { + time(&now); + printf("=================================\n"); + printf(" Data Corruption Detected\n"); + printf(" %s", ctime(&now)); + printf(" See logfiles in %s\n", logdir); + printf("=================================\n"); + return 1; + } + return 0; +} diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile index a7cbd5082e27..4213978f3ee2 100644 --- a/tools/testing/selftests/powerpc/signal/Makefile +++ b/tools/testing/selftests/powerpc/signal/Makefile @@ -1,14 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := signal signal_tm - -all: $(TEST_PROGS) - -$(TEST_PROGS): ../harness.c ../utils.c signal.S +TEST_GEN_PROGS := signal signal_tm CFLAGS += -maltivec -signal_tm: CFLAGS += -mhtm +$(OUTPUT)/signal_tm: CFLAGS += -mhtm include ../../lib.mk -clean: - rm -f $(TEST_PROGS) *.o +$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile index 30b8ff8fb82e..e4cedfe9753d 100644 --- a/tools/testing/selftests/powerpc/switch_endian/Makefile +++ b/tools/testing/selftests/powerpc/switch_endian/Makefile @@ -7,6 +7,7 @@ EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S include ../../lib.mk +$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT) $(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S $(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o diff --git a/tools/testing/selftests/rseq/settings b/tools/testing/selftests/rseq/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/rseq/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/size/get_size.c b/tools/testing/selftests/size/get_size.c index d4b59ab979a0..f55943b6d1e2 100644 --- a/tools/testing/selftests/size/get_size.c +++ b/tools/testing/selftests/size/get_size.c @@ -12,23 +12,35 @@ * own execution. It also attempts to have as few dependencies * on kernel features as possible. * - * It should be statically linked, with startup libs avoided. - * It uses no library calls, and only the following 3 syscalls: + * It should be statically linked, with startup libs avoided. It uses + * no library calls except the syscall() function for the following 3 + * syscalls: * sysinfo(), write(), and _exit() * * For output, it avoids printf (which in some C libraries * has large external dependencies) by implementing it's own * number output and print routines, and using __builtin_strlen() + * + * The test may crash if any of the above syscalls fails because in some + * libc implementations (e.g. the GNU C Library) errno is saved in + * thread-local storage, which does not get initialized due to avoiding + * startup libs. */ #include #include +#include #define STDOUT_FILENO 1 static int print(const char *s) { - return write(STDOUT_FILENO, s, __builtin_strlen(s)); + size_t len = 0; + + while (s[len] != '\0') + len++; + + return syscall(SYS_write, STDOUT_FILENO, s, len); } static inline char *num_to_str(unsigned long num, char *buf, int len) @@ -80,12 +92,12 @@ void _start(void) print("TAP version 13\n"); print("# Testing system size.\n"); - ccode = sysinfo(&info); + ccode = syscall(SYS_sysinfo, &info); if (ccode < 0) { print("not ok 1"); print(test_name); print(" ---\n reason: \"could not get sysinfo\"\n ...\n"); - _exit(ccode); + syscall(SYS_exit, ccode); } print("ok 1"); print(test_name); @@ -101,5 +113,5 @@ void _start(void) print(" ...\n"); print("1..1\n"); - _exit(0); + syscall(SYS_exit, 0); } diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index 6e290874b70e..f1c6e025cbe5 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -89,7 +89,13 @@ int main(int argc, char *argv[]) fd = open("/dev/watchdog", O_WRONLY); if (fd == -1) { - printf("Watchdog device not enabled.\n"); + if (errno == ENOENT) + printf("Watchdog device not enabled.\n"); + else if (errno == EACCES) + printf("Run watchdog as root.\n"); + else + printf("Watchdog device open failed %s\n", + strerror(errno)); exit(-1); } @@ -103,7 +109,7 @@ int main(int argc, char *argv[]) printf("Last boot is caused by: %s.\n", (flags != 0) ? "Watchdog" : "Power-On-Reset"); else - printf("WDIOC_GETBOOTSTATUS errno '%s'\n", strerror(errno)); + printf("WDIOC_GETBOOTSTATUS error '%s'\n", strerror(errno)); break; case 'd': flags = WDIOS_DISABLECARD; @@ -111,7 +117,7 @@ int main(int argc, char *argv[]) if (!ret) printf("Watchdog card disabled.\n"); else - printf("WDIOS_DISABLECARD errno '%s'\n", strerror(errno)); + printf("WDIOS_DISABLECARD error '%s'\n", strerror(errno)); break; case 'e': flags = WDIOS_ENABLECARD; @@ -119,7 +125,7 @@ int main(int argc, char *argv[]) if (!ret) printf("Watchdog card enabled.\n"); else - printf("WDIOS_ENABLECARD errno '%s'\n", strerror(errno)); + printf("WDIOS_ENABLECARD error '%s'\n", strerror(errno)); break; case 'p': ping_rate = strtoul(optarg, NULL, 0); @@ -133,7 +139,7 @@ int main(int argc, char *argv[]) if (!ret) printf("Watchdog timeout set to %u seconds.\n", flags); else - printf("WDIOC_SETTIMEOUT errno '%s'\n", strerror(errno)); + printf("WDIOC_SETTIMEOUT error '%s'\n", strerror(errno)); break; default: usage(argv[0]); diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c index 6ff7b601f854..4bb905925b0e 100644 --- a/tools/usb/usbip/libsrc/usbip_host_common.c +++ b/tools/usb/usbip/libsrc/usbip_host_common.c @@ -43,7 +43,7 @@ static int32_t read_attr_usbip_status(struct usbip_usb_device *udev) int size; int fd; int length; - char status; + char status[2] = { 0 }; int value = 0; size = snprintf(status_attr_path, sizeof(status_attr_path), @@ -61,15 +61,15 @@ static int32_t read_attr_usbip_status(struct usbip_usb_device *udev) return -1; } - length = read(fd, &status, 1); + length = read(fd, status, 1); if (length < 0) { err("error reading attribute %s", status_attr_path); close(fd); return -1; } - value = atoi(&status); - + value = atoi(status); + close(fd); return value; } diff --git a/tools/usb/usbip/src/usbip_network.c b/tools/usb/usbip/src/usbip_network.c index b4c37e76a6e0..187dfaa67d0a 100644 --- a/tools/usb/usbip/src/usbip_network.c +++ b/tools/usb/usbip/src/usbip_network.c @@ -62,39 +62,39 @@ void usbip_setup_port_number(char *arg) info("using port %d (\"%s\")", usbip_port, usbip_port_string); } -void usbip_net_pack_uint32_t(int pack, uint32_t *num) +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num) { uint32_t i; if (pack) - i = htonl(*num); + i = htonl(num); else - i = ntohl(*num); + i = ntohl(num); - *num = i; + return i; } -void usbip_net_pack_uint16_t(int pack, uint16_t *num) +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num) { uint16_t i; if (pack) - i = htons(*num); + i = htons(num); else - i = ntohs(*num); + i = ntohs(num); - *num = i; + return i; } void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev) { - usbip_net_pack_uint32_t(pack, &udev->busnum); - usbip_net_pack_uint32_t(pack, &udev->devnum); - usbip_net_pack_uint32_t(pack, &udev->speed); + udev->busnum = usbip_net_pack_uint32_t(pack, udev->busnum); + udev->devnum = usbip_net_pack_uint32_t(pack, udev->devnum); + udev->speed = usbip_net_pack_uint32_t(pack, udev->speed); - usbip_net_pack_uint16_t(pack, &udev->idVendor); - usbip_net_pack_uint16_t(pack, &udev->idProduct); - usbip_net_pack_uint16_t(pack, &udev->bcdDevice); + udev->idVendor = usbip_net_pack_uint16_t(pack, udev->idVendor); + udev->idProduct = usbip_net_pack_uint16_t(pack, udev->idProduct); + udev->bcdDevice = usbip_net_pack_uint16_t(pack, udev->bcdDevice); } void usbip_net_pack_usb_interface(int pack __attribute__((unused)), @@ -141,6 +141,14 @@ ssize_t usbip_net_send(int sockfd, void *buff, size_t bufflen) return usbip_net_xmit(sockfd, buff, bufflen, 1); } +static inline void usbip_net_pack_op_common(int pack, + struct op_common *op_common) +{ + op_common->version = usbip_net_pack_uint16_t(pack, op_common->version); + op_common->code = usbip_net_pack_uint16_t(pack, op_common->code); + op_common->status = usbip_net_pack_uint32_t(pack, op_common->status); +} + int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status) { struct op_common op_common; @@ -152,7 +160,7 @@ int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status) op_common.code = code; op_common.status = status; - PACK_OP_COMMON(1, &op_common); + usbip_net_pack_op_common(1, &op_common); rc = usbip_net_send(sockfd, &op_common, sizeof(op_common)); if (rc < 0) { @@ -176,7 +184,7 @@ int usbip_net_recv_op_common(int sockfd, uint16_t *code) goto err; } - PACK_OP_COMMON(0, &op_common); + usbip_net_pack_op_common(0, &op_common); if (op_common.version != USBIP_VERSION) { dbg("version mismatch: %d %d", op_common.version, diff --git a/tools/usb/usbip/src/usbip_network.h b/tools/usb/usbip/src/usbip_network.h index 7032687621d3..8e8330c0f1c9 100644 --- a/tools/usb/usbip/src/usbip_network.h +++ b/tools/usb/usbip/src/usbip_network.h @@ -34,12 +34,6 @@ struct op_common { } __attribute__((packed)); -#define PACK_OP_COMMON(pack, op_common) do {\ - usbip_net_pack_uint16_t(pack, &(op_common)->version);\ - usbip_net_pack_uint16_t(pack, &(op_common)->code);\ - usbip_net_pack_uint32_t(pack, &(op_common)->status);\ -} while (0) - /* ---------------------------------------------------------------------- */ /* Dummy Code */ #define OP_UNSPEC 0x00 @@ -165,11 +159,11 @@ struct op_devlist_reply_extra { } while (0) #define PACK_OP_DEVLIST_REPLY(pack, reply) do {\ - usbip_net_pack_uint32_t(pack, &(reply)->ndev);\ + (reply)->ndev = usbip_net_pack_uint32_t(pack, (reply)->ndev);\ } while (0) -void usbip_net_pack_uint32_t(int pack, uint32_t *num); -void usbip_net_pack_uint16_t(int pack, uint16_t *num); +uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num); +uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num); void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev); void usbip_net_pack_usb_interface(int pack, struct usbip_usb_interface *uinf); diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index 3caee91bca08..878e0edb2e1b 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -117,6 +117,9 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) data = (data ^ mask) - mask; } + if (!vcpu->arch.mmio_decode.sixty_four) + data = data & 0xffffffff; + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, &data); data = vcpu_data_host_to_guest(vcpu, data, len); @@ -137,6 +140,7 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) unsigned long rt; int access_size; bool sign_extend; + bool sixty_four; if (kvm_vcpu_dabt_iss1tw(vcpu)) { /* page table accesses IO mem: tell guest to fix its TTBR */ @@ -150,11 +154,13 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) *is_write = kvm_vcpu_dabt_iswrite(vcpu); sign_extend = kvm_vcpu_dabt_issext(vcpu); + sixty_four = kvm_vcpu_dabt_issf(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); *len = access_size; vcpu->arch.mmio_decode.sign_extend = sign_extend; vcpu->arch.mmio_decode.rt = rt; + vcpu->arch.mmio_decode.sixty_four = sixty_four; return 0; } diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 1f4cac53b923..3814cdad643a 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -375,7 +375,8 @@ static void stage2_flush_memslot(struct kvm *kvm, pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { next = stage2_pgd_addr_end(addr, end); - stage2_flush_puds(kvm, pgd, addr, next); + if (!stage2_pgd_none(*pgd)) + stage2_flush_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } @@ -1735,7 +1736,8 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) if (!kvm->arch.pgd) return 0; trace_kvm_test_age_hva(hva); - return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); + return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, + kvm_test_age_hva_handler, NULL); } void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 526d808ecbbd..8354ec4ef912 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -2210,7 +2210,8 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT); coll_id = val & KVM_ITS_CTE_ICID_MASK; - if (target_addr >= atomic_read(&kvm->online_vcpus)) + if (target_addr != COLLECTION_NOT_MAPPED && + target_addr >= atomic_read(&kvm->online_vcpus)) return -EINVAL; collection = find_collection(its, coll_id); diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 084edc9dc553..f16a55012ea3 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -331,8 +331,8 @@ retry: int vgic_v3_save_pending_tables(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - int last_byte_offset = -1; struct vgic_irq *irq; + gpa_t last_ptr = ~(gpa_t)0; int ret; u8 val; @@ -352,11 +352,11 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) bit_nr = irq->intid % BITS_PER_BYTE; ptr = pendbase + byte_offset; - if (byte_offset != last_byte_offset) { + if (ptr != last_ptr) { ret = kvm_read_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; - last_byte_offset = byte_offset; + last_ptr = ptr; } stored = val & (1U << bit_nr); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 89fd40e57cae..71f77ae6c2a6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -92,7 +93,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ -DEFINE_SPINLOCK(kvm_lock); +DEFINE_MUTEX(kvm_lock); static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); @@ -141,10 +142,30 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, { } +bool kvm_is_zone_device_pfn(kvm_pfn_t pfn) +{ + /* + * The metadata used by is_zone_device_page() to determine whether or + * not a page is ZONE_DEVICE is guaranteed to be valid if and only if + * the device has been pinned, e.g. by get_user_pages(). WARN if the + * page_count() is zero to help detect bad usage of this helper. + */ + if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn)))) + return false; + + return is_zone_device_page(pfn_to_page(pfn)); +} + bool kvm_is_reserved_pfn(kvm_pfn_t pfn) { + /* + * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting + * perspective they are "normal" pages, albeit with slightly different + * usage rules. + */ if (pfn_valid(pfn)) - return PageReserved(pfn_to_page(pfn)); + return PageReserved(pfn_to_page(pfn)) && + !kvm_is_zone_device_pfn(pfn); return true; } @@ -596,8 +617,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) stat_data->kvm = kvm; stat_data->offset = p->offset; + stat_data->mode = p->mode ? p->mode : 0644; kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; - if (!debugfs_create_file(p->name, 0644, + if (!debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry, stat_data, stat_fops_per_vm[p->kind])) @@ -606,6 +628,23 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) return 0; } +/* + * Called after the VM is otherwise initialized, but just before adding it to + * the vm_list. + */ +int __weak kvm_arch_post_init_vm(struct kvm *kvm) +{ + return 0; +} + +/* + * Called just after removing the VM from the vm_list, but before doing any + * other destruction. + */ +void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm) +{ +} + static struct kvm *kvm_create_vm(unsigned long type) { int r, i; @@ -660,22 +699,31 @@ static struct kvm *kvm_create_vm(unsigned long type) rcu_assign_pointer(kvm->buses[i], kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL)); if (!kvm->buses[i]) - goto out_err; + goto out_err_no_mmu_notifier; } r = kvm_init_mmu_notifier(kvm); + if (r) + goto out_err_no_mmu_notifier; + + r = kvm_arch_post_init_vm(kvm); if (r) goto out_err; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); preempt_notifier_inc(); return kvm; out_err: +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) + if (kvm->mmu_notifier.ops) + mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); +#endif +out_err_no_mmu_notifier: cleanup_srcu_struct(&kvm->irq_srcu); out_err_no_irq_srcu: cleanup_srcu_struct(&kvm->srcu); @@ -715,9 +763,11 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); kvm_destroy_vm_debugfs(kvm); kvm_arch_sync_events(kvm); - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_del(&kvm->vm_list); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); + kvm_arch_pre_destroy_vm(kvm); + kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { struct kvm_io_bus *bus = kvm_get_bus(kvm, i); @@ -1227,14 +1277,14 @@ bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); -unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn) +unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn) { struct vm_area_struct *vma; unsigned long addr, size; size = PAGE_SIZE; - addr = gfn_to_hva(kvm, gfn); + addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gfn, NULL); if (kvm_is_error_hva(addr)) return PAGE_SIZE; @@ -1700,7 +1750,7 @@ static void kvm_release_pfn_dirty(kvm_pfn_t pfn) void kvm_set_pfn_dirty(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn)) { + if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) { struct page *page = pfn_to_page(pfn); if (!PageReserved(page)) @@ -1711,7 +1761,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn)) + if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); @@ -1977,12 +2027,12 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, if (slots->generation != ghc->generation) __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - if (unlikely(!ghc->memslot)) - return kvm_write_guest(kvm, gpa, data, len); - if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_write_guest(kvm, gpa, data, len); + r = __copy_to_user((void __user *)ghc->hva + offset, data, len); if (r) return -EFAULT; @@ -2010,12 +2060,12 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, if (slots->generation != ghc->generation) __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); - if (unlikely(!ghc->memslot)) - return kvm_read_guest(kvm, ghc->gpa, data, len); - if (kvm_is_error_hva(ghc->hva)) return -EFAULT; + if (unlikely(!ghc->memslot)) + return kvm_read_guest(kvm, ghc->gpa, data, len); + r = __copy_from_user(data, (void __user *)ghc->hva, len); if (r) return -EFAULT; @@ -3713,7 +3763,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, if (!refcount_inc_not_zero(&stat_data->kvm->users_count)) return -ENOENT; - if (simple_attr_open(inode, file, get, set, fmt)) { + if (simple_attr_open(inode, file, get, + stat_data->mode & S_IWUGO ? set : NULL, + fmt)) { kvm_put_kvm(stat_data->kvm); return -ENOMEM; } @@ -3827,13 +3879,13 @@ static int vm_stat_get(void *_offset, u64 *val) u64 tmp_val; *val = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val); *val += tmp_val; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -3846,12 +3898,12 @@ static int vm_stat_clear(void *_offset, u64 val) if (val) return -EINVAL; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vm_stat_clear_per_vm((void *)&stat_tmp, 0); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -3866,13 +3918,13 @@ static int vcpu_stat_get(void *_offset, u64 *val) u64 tmp_val; *val = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val); *val += tmp_val; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -3885,12 +3937,12 @@ static int vcpu_stat_clear(void *_offset, u64 val) if (val) return -EINVAL; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vcpu_stat_clear_per_vm((void *)&stat_tmp, 0); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -3911,7 +3963,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) if (!kvm_dev.this_device || !kvm) return; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); if (type == KVM_EVENT_CREATE_VM) { kvm_createvm_count++; kvm_active_vms++; @@ -3920,7 +3972,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } created = kvm_createvm_count; active = kvm_active_vms; - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); env = kzalloc(sizeof(*env), GFP_KERNEL); if (!env) @@ -3937,7 +3989,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } add_uevent_var(env, "PID=%d", kvm->userspace_pid); - if (kvm->debugfs_dentry) { + if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL); if (p) { @@ -3964,7 +4016,8 @@ static int kvm_init_debug(void) kvm_debugfs_num_entries = 0; for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { - if (!debugfs_create_file(p->name, 0644, kvm_debugfs_dir, + int mode = p->mode ? p->mode : 0644; + if (!debugfs_create_file(p->name, mode, kvm_debugfs_dir, (void *)(long)p->offset, stat_fops[p->kind])) goto out_dir; @@ -4151,3 +4204,86 @@ void kvm_exit(void) kvm_vfio_ops_exit(); } EXPORT_SYMBOL_GPL(kvm_exit); + +struct kvm_vm_worker_thread_context { + struct kvm *kvm; + struct task_struct *parent; + struct completion init_done; + kvm_vm_thread_fn_t thread_fn; + uintptr_t data; + int err; +}; + +static int kvm_vm_worker_thread(void *context) +{ + /* + * The init_context is allocated on the stack of the parent thread, so + * we have to locally copy anything that is needed beyond initialization + */ + struct kvm_vm_worker_thread_context *init_context = context; + struct kvm *kvm = init_context->kvm; + kvm_vm_thread_fn_t thread_fn = init_context->thread_fn; + uintptr_t data = init_context->data; + int err; + + err = kthread_park(current); + /* kthread_park(current) is never supposed to return an error */ + WARN_ON(err != 0); + if (err) + goto init_complete; + + err = cgroup_attach_task_all(init_context->parent, current); + if (err) { + kvm_err("%s: cgroup_attach_task_all failed with err %d\n", + __func__, err); + goto init_complete; + } + + set_user_nice(current, task_nice(init_context->parent)); + +init_complete: + init_context->err = err; + complete(&init_context->init_done); + init_context = NULL; + + if (err) + return err; + + /* Wait to be woken up by the spawner before proceeding. */ + kthread_parkme(); + + if (!kthread_should_stop()) + err = thread_fn(kvm, data); + + return err; +} + +int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, + uintptr_t data, const char *name, + struct task_struct **thread_ptr) +{ + struct kvm_vm_worker_thread_context init_context = {}; + struct task_struct *thread; + + *thread_ptr = NULL; + init_context.kvm = kvm; + init_context.parent = current; + init_context.thread_fn = thread_fn; + init_context.data = data; + init_completion(&init_context.init_done); + + thread = kthread_run(kvm_vm_worker_thread, &init_context, + "%s-%d", name, task_pid_nr(current)); + if (IS_ERR(thread)) + return PTR_ERR(thread); + + /* kthread_run is never supposed to return NULL */ + WARN_ON(thread == NULL); + + wait_for_completion(&init_context.init_done); + + if (!init_context.err) + *thread_ptr = thread; + + return init_context.err; +}